gg

2026-05-10 10:37:45 +03:00
parent 4f7090e2d9
commit c525b12dfd
32 changed files with 2374 additions and 209 deletions
@@ -1,17 +1,19 @@
 import os
+import json
 import yaml
 from typing import Dict, Any, Optional

+
 class EnsembleConfig:
    _instance: Optional['EnsembleConfig'] = None
    _config: Dict[str, Any] = {}
-    
+
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(EnsembleConfig, cls).__new__(cls)
            cls._instance._load_config()
        return cls._instance
-    
+
    def _load_config(self):
        """Load configuration from YAML file."""
        config_path = os.path.join(os.path.dirname(__file__), 'ensemble_config.yaml')
@@ -22,12 +24,12 @@ class EnsembleConfig:
        except Exception as e:
            print(f"❌ Failed to load ensemble config: {e}")
            self._config = {}
-    
+
    def get(self, key: str, default: Any = None) -> Any:
        """Get configuration value by key (supports dot notation for nested keys)."""
        keys = key.split('.')
        value = self._config
-        
+
        try:
            for k in keys:
                value = value[k]
@@ -35,12 +37,79 @@ class EnsembleConfig:
        except (KeyError, TypeError):
            return default

+
 # Singleton accessor
 def get_config() -> EnsembleConfig:
    return EnsembleConfig()

+
+# ── Market Thresholds Loader ────────────────────────────────────────────
+
+_market_thresholds_cache: Optional[Dict[str, Any]] = None
+
+
+def load_market_thresholds() -> Dict[str, Any]:
+    """
+    Load market thresholds from JSON config file.
+    Returns the full config dict with 'markets' and 'defaults' keys.
+    Caches after first load for performance.
+    """
+    global _market_thresholds_cache
+    if _market_thresholds_cache is not None:
+        return _market_thresholds_cache
+
+    config_path = os.path.join(os.path.dirname(__file__), 'market_thresholds.json')
+    try:
+        with open(config_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        _market_thresholds_cache = data
+        print(f"✅ Market thresholds loaded: {len(data.get('markets', {}))} markets (v={data.get('_meta', {}).get('version', '?')})")
+        return data
+    except Exception as e:
+        print(f"❌ Failed to load market thresholds: {e} — using built-in defaults")
+        _market_thresholds_cache = {"markets": {}, "defaults": {
+            "calibration": 0.55,
+            "min_conf": 55.0,
+            "min_play_score": 68.0,
+            "min_edge": 0.02,
+            "odds_band_min_sample": 0.0,
+            "odds_band_min_edge": 0.0,
+        }}
+        return _market_thresholds_cache
+
+
+def build_threshold_dict(field: str) -> Dict[str, float]:
+    """
+    Build a flat {market: value} dict for a specific threshold field.
+
+    Usage:
+        calibration_map = build_threshold_dict("calibration")
+        # → {"MS": 0.62, "DC": 0.82, ...}
+    """
+    data = load_market_thresholds()
+    markets = data.get("markets", {})
+    result: Dict[str, float] = {}
+    for market, cfg in markets.items():
+        if field in cfg:
+            result[market] = float(cfg[field])
+    return result
+
+
+def get_threshold_default(field: str) -> float:
+    """Get the default fallback value for a threshold field."""
+    data = load_market_thresholds()
+    defaults = data.get("defaults", {})
+    return float(defaults.get(field, 0.0))
+
+
 if __name__ == "__main__":
    # Test
    cfg = get_config()
    print(f"Weights: {cfg.get('engine_weights')}")
    print(f"Team Weight: {cfg.get('engine_weights.team')}")
+    print()
+    print("--- Market Thresholds ---")
+    for field in ["calibration", "min_conf", "min_play_score", "min_edge"]:
+        d = build_threshold_dict(field)
+        print(f"{field}: {d}")
+    print(f"Default calibration: {get_threshold_default('calibration')}")
@@ -0,0 +1,115 @@
+{
+  "_meta": {
+    "version": "v34",
+    "description": "Market-specific thresholds for the betting engine pipeline — V34 odds-aware gate fix",
+    "rule": "max_reachable (100 × calibration) MUST be > min_conf + 8",
+    "updated_at": "2026-05-10",
+    "changelog": "V34: Reduced min_edge to realistic levels for odds-aware V25 model. Model output ≈ market-implied, so large EV edges are mathematically impossible."
+  },
+  "markets": {
+    "MS": {
+      "calibration": 0.62,
+      "min_conf": 20.0,
+      "min_play_score": 28.0,
+      "min_edge": 0.005,
+      "odds_band_min_sample": 8.0,
+      "odds_band_min_edge": 0.005
+    },
+    "DC": {
+      "calibration": 0.82,
+      "min_conf": 40.0,
+      "min_play_score": 50.0,
+      "min_edge": 0.003,
+      "odds_band_min_sample": 8.0,
+      "odds_band_min_edge": 0.005
+    },
+    "OU15": {
+      "calibration": 0.84,
+      "min_conf": 45.0,
+      "min_play_score": 50.0,
+      "min_edge": 0.003,
+      "odds_band_min_sample": 8.0,
+      "odds_band_min_edge": 0.005
+    },
+    "OU25": {
+      "calibration": 0.68,
+      "min_conf": 30.0,
+      "min_play_score": 40.0,
+      "min_edge": 0.005,
+      "odds_band_min_sample": 8.0,
+      "odds_band_min_edge": 0.005
+    },
+    "OU35": {
+      "calibration": 0.60,
+      "min_conf": 20.0,
+      "min_play_score": 30.0,
+      "min_edge": 0.008,
+      "odds_band_min_sample": 8.0,
+      "odds_band_min_edge": 0.008
+    },
+    "BTTS": {
+      "calibration": 0.65,
+      "min_conf": 30.0,
+      "min_play_score": 40.0,
+      "min_edge": 0.005,
+      "odds_band_min_sample": 8.0,
+      "odds_band_min_edge": 0.005
+    },
+    "HT": {
+      "calibration": 0.58,
+      "min_conf": 20.0,
+      "min_play_score": 28.0,
+      "min_edge": 0.01,
+      "odds_band_min_sample": 8.0,
+      "odds_band_min_edge": 0.008
+    },
+    "HT_OU05": {
+      "calibration": 0.68,
+      "min_conf": 35.0,
+      "min_play_score": 42.0,
+      "min_edge": 0.005,
+      "odds_band_min_sample": 8.0,
+      "odds_band_min_edge": 0.005
+    },
+    "HT_OU15": {
+      "calibration": 0.60,
+      "min_conf": 25.0,
+      "min_play_score": 32.0,
+      "min_edge": 0.008,
+      "odds_band_min_sample": 8.0,
+      "odds_band_min_edge": 0.008
+    },
+    "OE": {
+      "calibration": 0.62,
+      "min_conf": 35.0,
+      "min_play_score": 32.0,
+      "min_edge": 0.005
+    },
+    "CARDS": {
+      "calibration": 0.58,
+      "min_conf": 30.0,
+      "min_play_score": 35.0,
+      "min_edge": 0.008
+    },
+    "HCAP": {
+      "calibration": 0.56,
+      "min_conf": 25.0,
+      "min_play_score": 30.0,
+      "min_edge": 0.015
+    },
+    "HTFT": {
+      "calibration": 0.45,
+      "min_conf": 10.0,
+      "min_play_score": 18.0,
+      "min_edge": 0.02
+    }
+  },
+  "defaults": {
+    "calibration": 0.55,
+    "min_conf": 55.0,
+    "min_play_score": 60.0,
+    "min_edge": 0.008,
+    "odds_band_min_sample": 0.0,
+    "odds_band_min_edge": 0.0
+  }
+}
@@ -59,7 +59,7 @@ def fetch_matches(conn, sport: str):


 def flush_features_batch(conn, rows, dry_run: bool, sport: str = 'football'):
-    """Bulk upsert a batch of (match_id, home_elo, away_elo) into sport-partitioned ai_features table."""
+    """Bulk upsert ELO features into sport-partitioned ai_features table."""
    if not rows or dry_run:
        return

@@ -70,19 +70,27 @@ def flush_features_batch(conn, rows, dry_run: bool, sport: str = 'football'):
            f"""
            INSERT INTO {table_name}
                (match_id, home_elo, away_elo,
+                 home_home_elo, away_away_elo,
+                 home_form_elo, away_form_elo,
+                 elo_diff,
                 home_form_score, away_form_score,
                 missing_players_impact, calculator_ver, updated_at)
            VALUES %s
            ON CONFLICT (match_id) DO UPDATE SET
                home_elo             = EXCLUDED.home_elo,
                away_elo             = EXCLUDED.away_elo,
+                home_home_elo        = EXCLUDED.home_home_elo,
+                away_away_elo        = EXCLUDED.away_away_elo,
+                home_form_elo        = EXCLUDED.home_form_elo,
+                away_form_elo        = EXCLUDED.away_form_elo,
+                elo_diff             = EXCLUDED.elo_diff,
                home_form_score      = EXCLUDED.home_form_score,
                away_form_score      = EXCLUDED.away_form_score,
                calculator_ver       = EXCLUDED.calculator_ver,
                updated_at           = EXCLUDED.updated_at
            """,
            rows,
-            template="(%s, %s, %s, %s, %s, 0.0, %s, NOW())",
+            template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 0.0, %s, NOW())",
            page_size=500,
        )
    conn.commit()
@@ -136,16 +144,24 @@ def backfill(sport: str, batch_size: int, dry_run: bool):
        if not home_id or not away_id:
            continue

-        # Snapshot PRE-match ELO
+        # Snapshot PRE-match ELO (all dimensions)
        home_rating = elo.get_or_create_rating(home_id, h_name or "")
        away_rating = elo.get_or_create_rating(away_id, a_name or "")

+        h_overall = round(home_rating.overall_elo, 2)
+        a_overall = round(away_rating.overall_elo, 2)
+
        feature_buf.append((
            match_id,
-            round(home_rating.overall_elo, 2),
-            round(away_rating.overall_elo, 2),
-            round(form_to_score(home_rating.recent_form), 2),
-            round(form_to_score(away_rating.recent_form), 2),
+            h_overall,                                          # home_elo
+            a_overall,                                          # away_elo
+            round(home_rating.home_elo, 2),                     # home_home_elo
+            round(away_rating.away_elo, 2),                     # away_away_elo
+            round(home_rating.form_elo, 2),                     # home_form_elo
+            round(away_rating.form_elo, 2),                     # away_form_elo
+            round(h_overall - a_overall, 2),                    # elo_diff
+            round(form_to_score(home_rating.recent_form), 2),   # home_form_score
+            round(form_to_score(away_rating.recent_form), 2),   # away_form_score
            CALCULATOR_VER,
        ))

@@ -0,0 +1,507 @@
+"""
+V25 Pro Model Trainer — Optuna + Isotonic Calibration
+=====================================================
+Combines V25's 83 features + 12 markets + temporal split
+with Optuna hyperparameter tuning and Isotonic Regression calibration.
+
+Usage:
+  python scripts/train_v25_pro.py
+  python scripts/train_v25_pro.py --markets MS,OU25,BTTS  # specific markets
+  python scripts/train_v25_pro.py --trials 30              # fewer trials
+"""
+
+import os
+import sys
+import json
+import pickle
+import argparse
+import numpy as np
+import pandas as pd
+import xgboost as xgb
+import lightgbm as lgb
+import optuna
+from optuna.samplers import TPESampler
+from datetime import datetime
+from sklearn.metrics import accuracy_score, log_loss, classification_report
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.base import BaseEstimator, ClassifierMixin
+
+optuna.logging.set_verbosity(optuna.logging.WARNING)
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
+MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "v25")
+REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports", "training_v25")
+
+os.makedirs(MODELS_DIR, exist_ok=True)
+os.makedirs(REPORTS_DIR, exist_ok=True)
+
+# ─── Feature Columns (83 features, NO target leakage) ───────────────
+FEATURES = [
+    # ELO (8)
+    "home_overall_elo", "away_overall_elo", "elo_diff",
+    "home_home_elo", "away_away_elo",
+    "home_form_elo", "away_form_elo", "form_elo_diff",
+    # Form (12)
+    "home_goals_avg", "home_conceded_avg",
+    "away_goals_avg", "away_conceded_avg",
+    "home_clean_sheet_rate", "away_clean_sheet_rate",
+    "home_scoring_rate", "away_scoring_rate",
+    "home_winning_streak", "away_winning_streak",
+    "home_unbeaten_streak", "away_unbeaten_streak",
+    # H2H (6)
+    "h2h_total_matches", "h2h_home_win_rate", "h2h_draw_rate",
+    "h2h_avg_goals", "h2h_btts_rate", "h2h_over25_rate",
+    # Team Stats (8)
+    "home_avg_possession", "away_avg_possession",
+    "home_avg_shots_on_target", "away_avg_shots_on_target",
+    "home_shot_conversion", "away_shot_conversion",
+    "home_avg_corners", "away_avg_corners",
+    # Odds (24 + 20 presence flags)
+    "odds_ms_h", "odds_ms_d", "odds_ms_a",
+    "implied_home", "implied_draw", "implied_away",
+    "odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
+    "odds_ou05_o", "odds_ou05_u",
+    "odds_ou15_o", "odds_ou15_u",
+    "odds_ou25_o", "odds_ou25_u",
+    "odds_ou35_o", "odds_ou35_u",
+    "odds_ht_ou05_o", "odds_ht_ou05_u",
+    "odds_ht_ou15_o", "odds_ht_ou15_u",
+    "odds_btts_y", "odds_btts_n",
+    "odds_ms_h_present", "odds_ms_d_present", "odds_ms_a_present",
+    "odds_ht_ms_h_present", "odds_ht_ms_d_present", "odds_ht_ms_a_present",
+    "odds_ou05_o_present", "odds_ou05_u_present",
+    "odds_ou15_o_present", "odds_ou15_u_present",
+    "odds_ou25_o_present", "odds_ou25_u_present",
+    "odds_ou35_o_present", "odds_ou35_u_present",
+    "odds_ht_ou05_o_present", "odds_ht_ou05_u_present",
+    "odds_ht_ou15_o_present", "odds_ht_ou15_u_present",
+    "odds_btts_y_present", "odds_btts_n_present",
+    # League (4)
+    "home_xga", "away_xga",
+    "league_avg_goals", "league_zero_goal_rate",
+    # Upset Engine (4)
+    "upset_atmosphere", "upset_motivation", "upset_fatigue", "upset_potential",
+    # Referee Engine (5)
+    "referee_home_bias", "referee_avg_goals", "referee_cards_total",
+    "referee_avg_yellow", "referee_experience",
+    # Momentum (3)
+    "home_momentum_score", "away_momentum_score", "momentum_diff",
+    # Squad (9)
+    "home_squad_quality", "away_squad_quality", "squad_diff",
+    "home_key_players", "away_key_players",
+    "home_missing_impact", "away_missing_impact",
+    "home_goals_form", "away_goals_form",
+]
+
+MARKET_CONFIGS = [
+    {"target": "label_ms", "name": "MS", "num_class": 3},
+    {"target": "label_ou15", "name": "OU15", "num_class": 2},
+    {"target": "label_ou25", "name": "OU25", "num_class": 2},
+    {"target": "label_ou35", "name": "OU35", "num_class": 2},
+    {"target": "label_btts", "name": "BTTS", "num_class": 2},
+    {"target": "label_ht_result", "name": "HT_RESULT", "num_class": 3},
+    {"target": "label_ht_ou05", "name": "HT_OU05", "num_class": 2},
+    {"target": "label_ht_ou15", "name": "HT_OU15", "num_class": 2},
+    {"target": "label_ht_ft", "name": "HTFT", "num_class": 9},
+    {"target": "label_odd_even", "name": "ODD_EVEN", "num_class": 2},
+    {"target": "label_cards_ou45", "name": "CARDS_OU45", "num_class": 2},
+    {"target": "label_handicap_ms", "name": "HANDICAP_MS", "num_class": 3},
+]
+
+
+def load_data():
+    """Load and prepare training data."""
+    if not os.path.exists(DATA_PATH):
+        print(f"[ERROR] Data not found: {DATA_PATH}")
+        sys.exit(1)
+
+    print(f"[INFO] Loading {DATA_PATH}...")
+    df = pd.read_csv(DATA_PATH)
+
+    for col in FEATURES:
+        if col in df.columns:
+            df[col] = df[col].fillna(0)
+
+    # Derive odds presence flags for older CSVs
+    odds_flag_sources = {
+        "odds_ms_h_present": "odds_ms_h", "odds_ms_d_present": "odds_ms_d",
+        "odds_ms_a_present": "odds_ms_a", "odds_ht_ms_h_present": "odds_ht_ms_h",
+        "odds_ht_ms_d_present": "odds_ht_ms_d", "odds_ht_ms_a_present": "odds_ht_ms_a",
+        "odds_ou05_o_present": "odds_ou05_o", "odds_ou05_u_present": "odds_ou05_u",
+        "odds_ou15_o_present": "odds_ou15_o", "odds_ou15_u_present": "odds_ou15_u",
+        "odds_ou25_o_present": "odds_ou25_o", "odds_ou25_u_present": "odds_ou25_u",
+        "odds_ou35_o_present": "odds_ou35_o", "odds_ou35_u_present": "odds_ou35_u",
+        "odds_ht_ou05_o_present": "odds_ht_ou05_o", "odds_ht_ou05_u_present": "odds_ht_ou05_u",
+        "odds_ht_ou15_o_present": "odds_ht_ou15_o", "odds_ht_ou15_u_present": "odds_ht_ou15_u",
+        "odds_btts_y_present": "odds_btts_y", "odds_btts_n_present": "odds_btts_n",
+    }
+    for flag_col, odds_col in odds_flag_sources.items():
+        if flag_col not in df.columns:
+            df[flag_col] = (
+                pd.to_numeric(df.get(odds_col, 0), errors="coerce").fillna(0) > 1.01
+            ).astype(float)
+
+    print(f"[INFO] Shape: {df.shape}, Features: {len(FEATURES)}")
+    return df
+
+
+def temporal_split_4way(valid_df: pd.DataFrame):
+    """Chronological 60/15/10/15 split: train/val/cal/test."""
+    ordered = valid_df.sort_values("mst_utc").reset_index(drop=True)
+    n = len(ordered)
+    i1 = int(n * 0.60)
+    i2 = int(n * 0.75)
+    i3 = int(n * 0.85)
+
+    train = ordered.iloc[:i1].copy()
+    val = ordered.iloc[i1:i2].copy()
+    cal = ordered.iloc[i2:i3].copy()
+    test = ordered.iloc[i3:].copy()
+
+    return train, val, cal, test
+
+
+# ─── XGBoost Wrapper for sklearn CalibratedClassifierCV ─────────────
+class XGBWrapper(BaseEstimator, ClassifierMixin):
+    """Thin sklearn-compatible wrapper around xgb.train for Isotonic calibration."""
+
+    def __init__(self, params, num_boost_round=500):
+        self.params = params
+        self.num_boost_round = num_boost_round
+        self.model_ = None
+        self.classes_ = None
+
+    def fit(self, X, y, **kwargs):
+        self.classes_ = np.unique(y)
+        dtrain = xgb.DMatrix(X, label=y)
+        self.model_ = xgb.train(self.params, dtrain, num_boost_round=self.num_boost_round)
+        return self
+
+    def predict_proba(self, X):
+        dm = xgb.DMatrix(X)
+        probs = self.model_.predict(dm)
+        if len(probs.shape) == 1:
+            probs = np.column_stack([1 - probs, probs])
+        return probs
+
+    def predict(self, X):
+        return np.argmax(self.predict_proba(X), axis=1)
+
+
+# ─── Optuna Objectives ──────────────────────────────────────────────
+def xgb_objective(trial, X_train, y_train, X_val, y_val, num_class):
+    params = {
+        "objective": "multi:softprob" if num_class > 2 else "binary:logistic",
+        "eval_metric": "mlogloss" if num_class > 2 else "logloss",
+        "max_depth": trial.suggest_int("max_depth", 3, 8),
+        "eta": trial.suggest_float("eta", 0.01, 0.15, log=True),
+        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
+        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
+        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
+        "gamma": trial.suggest_float("gamma", 1e-8, 1.0, log=True),
+        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
+        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 1.0, log=True),
+        "n_jobs": 4,
+        "random_state": 42,
+    }
+    if num_class > 2:
+        params["num_class"] = num_class
+
+    dtrain = xgb.DMatrix(X_train, label=y_train)
+    dval = xgb.DMatrix(X_val, label=y_val)
+
+    model = xgb.train(
+        params, dtrain, num_boost_round=1000,
+        evals=[(dval, "val")], early_stopping_rounds=50, verbose_eval=False,
+    )
+
+    preds = model.predict(dval)
+    if len(preds.shape) == 1:
+        preds = np.column_stack([1 - preds, preds])
+
+    return log_loss(y_val, preds)
+
+
+def lgb_objective(trial, X_train, y_train, X_val, y_val, num_class):
+    params = {
+        "objective": "multiclass" if num_class > 2 else "binary",
+        "metric": "multi_logloss" if num_class > 2 else "binary_logloss",
+        "max_depth": trial.suggest_int("max_depth", 3, 8),
+        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.15, log=True),
+        "feature_fraction": trial.suggest_float("feature_fraction", 0.5, 1.0),
+        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0),
+        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
+        "min_child_samples": trial.suggest_int("min_child_samples", 5, 50),
+        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 1.0, log=True),
+        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
+        "n_jobs": 4, "random_state": 42, "verbose": -1,
+    }
+    if num_class > 2:
+        params["num_class"] = num_class
+
+    train_data = lgb.Dataset(X_train, label=y_train)
+    val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)
+
+    model = lgb.train(
+        params, train_data, num_boost_round=1000,
+        valid_sets=[val_data], valid_names=["val"],
+        callbacks=[lgb.early_stopping(50), lgb.log_evaluation(0)],
+    )
+
+    preds = model.predict(X_val, num_iteration=model.best_iteration)
+    if len(preds.shape) == 1:
+        preds = np.column_stack([1 - preds, preds])
+
+    return log_loss(y_val, preds)
+
+
+# ─── Main Training Pipeline ─────────────────────────────────────────
+def train_market(df, target_col, market_name, num_class, n_trials):
+    """Full pipeline for one market: Optuna → Train → Calibrate → Evaluate."""
+    print(f"\n{'='*60}")
+    print(f"[MARKET] {market_name} (classes={num_class})")
+    print(f"{'='*60}")
+
+    valid_df = df[df[target_col].notna()].copy()
+    valid_df = valid_df[valid_df[target_col].astype(str) != ""].copy()
+    print(f"[INFO] Valid samples: {len(valid_df)}")
+
+    if len(valid_df) < 500:
+        print(f"[SKIP] Not enough data for {market_name}")
+        return None
+
+    available_features = [f for f in FEATURES if f in valid_df.columns]
+    print(f"[INFO] Features: {len(available_features)}/{len(FEATURES)}")
+
+    train_df, val_df, cal_df, test_df = temporal_split_4way(valid_df)
+    X_train = train_df[available_features].values
+    X_val = val_df[available_features].values
+    X_cal = cal_df[available_features].values
+    X_test = test_df[available_features].values
+    y_train = train_df[target_col].astype(int).values
+    y_val = val_df[target_col].astype(int).values
+    y_cal = cal_df[target_col].astype(int).values
+    y_test = test_df[target_col].astype(int).values
+
+    print(f"[INFO] Split: train={len(X_train)} val={len(X_val)} cal={len(X_cal)} test={len(X_test)}")
+
+    # ── Phase 1: Optuna XGBoost ──────────────────────────────────
+    print(f"\n[OPTUNA] XGBoost tuning ({n_trials} trials)...")
+    xgb_study = optuna.create_study(direction="minimize", sampler=TPESampler(seed=42))
+    xgb_study.optimize(
+        lambda trial: xgb_objective(trial, X_train, y_train, X_val, y_val, num_class),
+        n_trials=n_trials,
+    )
+    xgb_best = xgb_study.best_params
+    print(f"[OK] XGB best logloss: {xgb_study.best_value:.4f}")
+
+    # ── Phase 2: Optuna LightGBM ─────────────────────────────────
+    print(f"[OPTUNA] LightGBM tuning ({n_trials} trials)...")
+    lgb_study = optuna.create_study(direction="minimize", sampler=TPESampler(seed=42))
+    lgb_study.optimize(
+        lambda trial: lgb_objective(trial, X_train, y_train, X_val, y_val, num_class),
+        n_trials=n_trials,
+    )
+    lgb_best = lgb_study.best_params
+    print(f"[OK] LGB best logloss: {lgb_study.best_value:.4f}")
+
+    # ── Phase 3: Train final models with best params ─────────────
+    # XGBoost final
+    xgb_params = {
+        "objective": "multi:softprob" if num_class > 2 else "binary:logistic",
+        "eval_metric": "mlogloss" if num_class > 2 else "logloss",
+        "n_jobs": 4, "random_state": 42,
+        **{k: v for k, v in xgb_best.items()},
+    }
+    if num_class > 2:
+        xgb_params["num_class"] = num_class
+
+    dtrain = xgb.DMatrix(X_train, label=y_train)
+    dval = xgb.DMatrix(X_val, label=y_val)
+    xgb_model = xgb.train(
+        xgb_params, dtrain, num_boost_round=1500,
+        evals=[(dtrain, "train"), (dval, "val")],
+        early_stopping_rounds=80, verbose_eval=200,
+    )
+    print(f"[OK] XGB final: iter={xgb_model.best_iteration}, score={xgb_model.best_score:.4f}")
+
+    # LightGBM final
+    lgb_params = {
+        "objective": "multiclass" if num_class > 2 else "binary",
+        "metric": "multi_logloss" if num_class > 2 else "binary_logloss",
+        "n_jobs": 4, "random_state": 42, "verbose": -1,
+        **{k: v for k, v in lgb_best.items()},
+    }
+    if num_class > 2:
+        lgb_params["num_class"] = num_class
+
+    lgb_train_data = lgb.Dataset(X_train, label=y_train)
+    lgb_val_data = lgb.Dataset(X_val, label=y_val, reference=lgb_train_data)
+    lgb_model = lgb.train(
+        lgb_params, lgb_train_data, num_boost_round=1500,
+        valid_sets=[lgb_train_data, lgb_val_data],
+        valid_names=["train", "val"],
+        callbacks=[lgb.early_stopping(80), lgb.log_evaluation(200)],
+    )
+    print(f"[OK] LGB final: iter={lgb_model.best_iteration}")
+
+    # ── Phase 4: Isotonic Calibration on cal set ─────────────────
+    print("[CAL] Fitting Isotonic Regression...")
+
+    # XGB calibration
+    xgb_wrapper = XGBWrapper(xgb_params, num_boost_round=xgb_model.best_iteration)
+    xgb_calibrated = CalibratedClassifierCV(xgb_wrapper, method="isotonic", cv="prefit")
+    xgb_wrapper.fit(X_train, y_train)
+    xgb_calibrated.fit(X_cal, y_cal)
+
+    # LGB calibration — use raw predictions approach
+    lgb_cal_preds = lgb_model.predict(X_cal, num_iteration=lgb_model.best_iteration)
+    if len(lgb_cal_preds.shape) == 1:
+        lgb_cal_preds = np.column_stack([1 - lgb_cal_preds, lgb_cal_preds])
+
+    # ── Phase 5: Evaluate on test set ────────────────────────────
+    print("\n[EVAL] Test set evaluation...")
+    dtest = xgb.DMatrix(X_test)
+
+    # Raw XGB
+    xgb_raw_probs = xgb_model.predict(dtest)
+    if len(xgb_raw_probs.shape) == 1:
+        xgb_raw_probs = np.column_stack([1 - xgb_raw_probs, xgb_raw_probs])
+
+    # Calibrated XGB
+    xgb_cal_probs = xgb_calibrated.predict_proba(X_test)
+
+    # Raw LGB
+    lgb_raw_probs = lgb_model.predict(X_test, num_iteration=lgb_model.best_iteration)
+    if len(lgb_raw_probs.shape) == 1:
+        lgb_raw_probs = np.column_stack([1 - lgb_raw_probs, lgb_raw_probs])
+
+    # Ensemble (raw)
+    raw_ensemble = (xgb_raw_probs + lgb_raw_probs) / 2
+
+    def _eval(probs, label):
+        preds = np.argmax(probs, axis=1)
+        acc = accuracy_score(y_test, preds)
+        ll = log_loss(y_test, probs)
+        print(f"  {label}: Acc={acc:.4f} LogLoss={ll:.4f}")
+        return {"accuracy": round(float(acc), 4), "logloss": round(float(ll), 4)}
+
+    m_xgb_raw = _eval(xgb_raw_probs, "XGB Raw")
+    m_xgb_cal = _eval(xgb_cal_probs, "XGB Calibrated")
+    m_lgb_raw = _eval(lgb_raw_probs, "LGB Raw")
+    m_ensemble = _eval(raw_ensemble, "Ensemble Raw")
+
+    # Classification report for ensemble
+    ens_preds = np.argmax(raw_ensemble, axis=1)
+    print(f"\n[REPORT] Ensemble Classification Report:")
+    print(classification_report(y_test, ens_preds))
+
+    # ── Phase 6: Save models ─────────────────────────────────────
+    # Raw models (orchestrator compatible)
+    xgb_path = os.path.join(MODELS_DIR, f"xgb_v25_{market_name.lower()}.json")
+    xgb_model.save_model(xgb_path)
+    print(f"[SAVE] {xgb_path}")
+
+    lgb_path = os.path.join(MODELS_DIR, f"lgb_v25_{market_name.lower()}.txt")
+    lgb_model.save_model(lgb_path)
+    print(f"[SAVE] {lgb_path}")
+
+    # Calibrated model
+    cal_path = os.path.join(MODELS_DIR, f"cal_xgb_v25_{market_name.lower()}.pkl")
+    with open(cal_path, "wb") as f:
+        pickle.dump(xgb_calibrated, f)
+    print(f"[SAVE] {cal_path}")
+
+    return {
+        "market": market_name,
+        "samples": int(len(valid_df)),
+        "train": int(len(X_train)),
+        "val": int(len(X_val)),
+        "cal": int(len(X_cal)),
+        "test": int(len(X_test)),
+        "features_used": len(available_features),
+        "xgb_best_params": xgb_best,
+        "lgb_best_params": lgb_best,
+        "xgb_best_iteration": int(xgb_model.best_iteration),
+        "lgb_best_iteration": int(lgb_model.best_iteration),
+        "xgb_optuna_best_logloss": round(float(xgb_study.best_value), 4),
+        "lgb_optuna_best_logloss": round(float(lgb_study.best_value), 4),
+        "test_xgb_raw": m_xgb_raw,
+        "test_xgb_calibrated": m_xgb_cal,
+        "test_lgb_raw": m_lgb_raw,
+        "test_ensemble_raw": m_ensemble,
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(description="V25 Pro Trainer")
+    parser.add_argument("--markets", type=str, default=None,
+                        help="Comma-separated market names (e.g., MS,OU25,BTTS)")
+    parser.add_argument("--trials", type=int, default=50,
+                        help="Optuna trials per model per market")
+    args = parser.parse_args()
+
+    print("=" * 60)
+    print("V25 PRO — Optuna + Isotonic Calibration")
+    print("=" * 60)
+    print(f"[INFO] Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"[INFO] Trials per model: {args.trials}")
+    print(f"[INFO] Total features: {len(FEATURES)}")
+
+    df = load_data()
+
+    configs = MARKET_CONFIGS
+    if args.markets:
+        selected = [m.strip().upper() for m in args.markets.split(",")]
+        configs = [c for c in configs if c["name"] in selected]
+        print(f"[INFO] Selected markets: {[c['name'] for c in configs]}")
+
+    all_metrics = {
+        "trained_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        "trainer": "v25_pro",
+        "optuna_trials": args.trials,
+        "total_features": len(FEATURES),
+        "markets": {},
+    }
+
+    for config in configs:
+        target = config["target"]
+        if target not in df.columns:
+            print(f"[SKIP] {config['name']}: missing target {target}")
+            continue
+
+        metrics = train_market(
+            df, target, config["name"], config["num_class"], args.trials,
+        )
+        if metrics:
+            all_metrics["markets"][config["name"]] = metrics
+
+    # Save feature list
+    feature_path = os.path.join(MODELS_DIR, "feature_cols.json")
+    with open(feature_path, "w") as f:
+        json.dump(FEATURES, f, indent=2)
+
+    # Save full report
+    report_path = os.path.join(REPORTS_DIR, "v25_pro_metrics.json")
+    with open(report_path, "w") as f:
+        json.dump(all_metrics, f, indent=2, default=str)
+    print(f"\n[SAVE] Report: {report_path}")
+
+    # Summary
+    print("\n" + "=" * 60)
+    print("[SUMMARY]")
+    print("=" * 60)
+    for name, m in all_metrics["markets"].items():
+        ens = m.get("test_ensemble_raw", {})
+        print(f"  {name:12s} | Acc={ens.get('accuracy','?'):>6s} | LL={ens.get('logloss','?'):>6s} | "
+              f"XGB_iter={m.get('xgb_best_iteration','?')} LGB_iter={m.get('lgb_best_iteration','?')}")
+
+    print(f"\n[INFO] Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print("[OK] V25 PRO Training Complete!")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,367 @@
+"""
+Match Commentary Generator
+===========================
+Generates human-readable Turkish commentary from the analysis package.
+Reads all engine signals (model, odds band, betting brain, triple value)
+and produces a clear, actionable summary for end users.
+
+No LLM required — fully template-based.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional
+
+
+def generate_match_commentary(package: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Main entry point. Takes a full analysis package and returns a commentary dict.
+
+    Returns:
+        {
+            "action": "BET" | "WATCH" | "SKIP",
+            "headline": "...",
+            "summary": "...",
+            "notes": ["...", "..."],
+            "contradictions": ["...", "..."],
+            "confidence_label": "YÜKSEK" | "ORTA" | "DÜŞÜK" | "ÇOK DÜŞÜK"
+        }
+    """
+    match_info = package.get("match_info") or {}
+    home = match_info.get("home_team", "Ev Sahibi")
+    away = match_info.get("away_team", "Deplasman")
+    main_pick = package.get("main_pick") or {}
+    betting_brain = package.get("betting_brain") or {}
+    v27_engine = package.get("v27_engine") or {}
+    market_board = package.get("market_board") or {}
+    score_pred = package.get("score_prediction") or {}
+    risk = package.get("risk") or {}
+    data_quality = package.get("data_quality") or {}
+
+    # ── Determine action ──────────────────────────────────────────
+    brain_decision = str(betting_brain.get("decision") or "NO_BET").upper()
+    main_playable = bool(main_pick.get("playable"))
+    main_vetoed = bool((main_pick.get("upper_brain") or {}).get("veto"))
+    approved_count = int(betting_brain.get("approved_count", 0) or 0)
+
+    if main_playable and not main_vetoed and approved_count > 0:
+        action = "BET"
+    elif approved_count == 0 and brain_decision == "NO_BET":
+        action = "SKIP"
+    else:
+        action = "WATCH"
+
+    # ── Headline ──────────────────────────────────────────────────
+    headline = _build_headline(action, main_pick, home, away)
+
+    # ── Summary paragraph ─────────────────────────────────────────
+    summary = _build_summary(
+        action, main_pick, market_board, v27_engine,
+        score_pred, risk, data_quality, home, away,
+    )
+
+    # ── Quick notes ───────────────────────────────────────────────
+    notes = _build_notes(market_board, v27_engine, score_pred, risk, home, away)
+
+    # ── Contradiction detection ───────────────────────────────────
+    contradictions = _detect_contradictions(market_board, v27_engine, package)
+
+    # ── Overall confidence label ──────────────────────────────────
+    confidence_label = _overall_confidence_label(main_pick, data_quality)
+
+    return {
+        "action": action,
+        "headline": headline,
+        "summary": summary,
+        "notes": notes[:6],
+        "contradictions": contradictions[:4],
+        "confidence_label": confidence_label,
+    }
+
+
+# ═══════════════════════════════════════════════════════════════════════
+#  Headline
+# ═══════════════════════════════════════════════════════════════════════
+
+def _build_headline(
+    action: str,
+    main_pick: Dict[str, Any],
+    home: str,
+    away: str,
+) -> str:
+    if action == "BET":
+        market = main_pick.get("market", "")
+        pick = main_pick.get("pick", "")
+        odds = main_pick.get("odds", 0.0)
+        conf = main_pick.get("calibrated_confidence", main_pick.get("confidence", 0))
+        market_tr = _market_to_turkish(market, pick)
+        return f"🎯 {market_tr} önerisi — Oran: {odds}, Güven: %{conf:.0f}"
+
+    if action == "WATCH":
+        return f"👀 {home} vs {away} — İzlemeye değer sinyaller var"
+
+    return f"⚠️ {home} vs {away} — Şu an net bir fırsat görülmüyor"
+
+
+# ═══════════════════════════════════════════════════════════════════════
+#  Summary
+# ═══════════════════════════════════════════════════════════════════════
+
+def _build_summary(
+    action: str,
+    main_pick: Dict[str, Any],
+    market_board: Dict[str, Any],
+    v27_engine: Dict[str, Any],
+    score_pred: Dict[str, Any],
+    risk: Dict[str, Any],
+    data_quality: Dict[str, Any],
+    home: str,
+    away: str,
+) -> str:
+    parts: List[str] = []
+
+    # Who is the favourite?
+    ms_board = market_board.get("MS") or {}
+    ms_pick = ms_board.get("pick", "")
+    ms_conf = float(ms_board.get("confidence", 50) or 50)
+
+    if ms_pick == "1" and ms_conf > 45:
+        parts.append(f"{home} hafif favori görünüyor")
+    elif ms_pick == "1" and ms_conf > 55:
+        parts.append(f"{home} net favori")
+    elif ms_pick == "2" and ms_conf > 45:
+        parts.append(f"{away} hafif favori görünüyor")
+    elif ms_pick == "2" and ms_conf > 55:
+        parts.append(f"{away} net favori")
+    else:
+        parts.append("İki takım da birbirine yakın güçte")
+
+    # xG expectation
+    xg_home = float(score_pred.get("xg_home", 0) or 0)
+    xg_away = float(score_pred.get("xg_away", 0) or 0)
+    xg_total = xg_home + xg_away
+    if xg_total > 3.0:
+        parts.append(f"Gol beklentisi yüksek (toplam xG: {xg_total:.1f})")
+    elif xg_total < 2.0:
+        parts.append(f"Düşük gol beklentisi (toplam xG: {xg_total:.1f})")
+
+    # Consensus check
+    consensus = str(v27_engine.get("consensus") or "").upper()
+    if consensus == "AGREE":
+        parts.append("Model motorları aynı fikirde")
+    elif consensus == "DISAGREE":
+        parts.append("Model motorları farklı sonuçlara ulaşıyor — belirsizlik var")
+
+    # Action-specific
+    if action == "BET":
+        market_tr = _market_to_turkish(
+            main_pick.get("market", ""), main_pick.get("pick", "")
+        )
+        edge = float(main_pick.get("ev_edge", 0) or 0)
+        parts.append(
+            f"{market_tr} yönünde değer tespit edildi (EV edge: {edge:+.1%})"
+        )
+    elif action == "SKIP":
+        parts.append(
+            "Hiçbir markette piyasanın fiyatlamadığı bir avantaj görülmüyor"
+        )
+
+    # Risk
+    risk_level = str(risk.get("level") or "MEDIUM").upper()
+    if risk_level == "HIGH":
+        parts.append("⚠️ Risk seviyesi yüksek")
+    elif risk_level == "EXTREME":
+        parts.append("🔴 Çok yüksek risk — dikkatli olun")
+
+    # Data quality
+    quality_label = str(data_quality.get("label") or "MEDIUM").upper()
+    if quality_label == "LOW":
+        parts.append("Veri kalitesi düşük — tahminler daha az güvenilir")
+
+    return ". ".join(parts) + "."
+
+
+# ═══════════════════════════════════════════════════════════════════════
+#  Quick Notes
+# ═══════════════════════════════════════════════════════════════════════
+
+def _build_notes(
+    market_board: Dict[str, Any],
+    v27_engine: Dict[str, Any],
+    score_pred: Dict[str, Any],
+    risk: Dict[str, Any],
+    home: str,
+    away: str,
+) -> List[str]:
+    notes: List[str] = []
+    triple_value = v27_engine.get("triple_value") or {}
+    odds_band = v27_engine.get("odds_band") or {}
+
+    # MS note
+    ms = market_board.get("MS") or {}
+    ms_conf = float(ms.get("confidence", 0) or 0)
+    if ms_conf < 45:
+        notes.append("Maç sonucu belirsiz, net favori yok")
+    elif ms.get("pick") == "1":
+        notes.append(f"{home} favori ama oran değerli mi kontrol et")
+    elif ms.get("pick") == "2":
+        notes.append(f"{away} favori ama oran değerli mi kontrol et")
+
+    # OU25 note
+    ou25 = market_board.get("OU25") or {}
+    ou25_probs = ou25.get("probs") or {}
+    over_prob = float(ou25_probs.get("over", 0.5) or 0.5)
+    if over_prob > 0.58:
+        notes.append("2.5 Üst yönünde eğilim var")
+    elif over_prob < 0.42:
+        notes.append("2.5 Alt yönünde eğilim var")
+    else:
+        notes.append("2.5 Üst/Alt dengeli — kesin sinyal yok")
+
+    # BTTS note
+    btts = market_board.get("BTTS") or {}
+    btts_probs = btts.get("probs") or {}
+    btts_yes = float(btts_probs.get("yes", 0.5) or 0.5)
+    if btts_yes > 0.58:
+        notes.append("Her iki takımın da gol atması bekleniyor")
+    elif btts_yes < 0.42:
+        notes.append("KG olasılığı düşük")
+
+    # HT note
+    ht = market_board.get("HT") or {}
+    ht_pick = ht.get("pick", "")
+    ht_conf = float(ht.get("confidence", 0) or 0)
+    if ht_conf > 40 and ht_pick:
+        ht_label = {"1": f"İY {home}", "2": f"İY {away}", "X": "İY beraberlik"}.get(
+            ht_pick, f"İY {ht_pick}"
+        )
+        notes.append(f"{ht_label} yönünde hafif sinyal (%{ht_conf:.0f})")
+
+    # Risk warnings
+    warnings = risk.get("warnings") or []
+    for w in warnings[:2]:
+        notes.append(f"⚠️ {w}")
+
+    return notes
+
+
+# ═══════════════════════════════════════════════════════════════════════
+#  Contradiction Detection
+# ═══════════════════════════════════════════════════════════════════════
+
+def _detect_contradictions(
+    market_board: Dict[str, Any],
+    v27_engine: Dict[str, Any],
+    package: Dict[str, Any],
+) -> List[str]:
+    """
+    Detect cases where model prediction and odds band/triple value
+    point in opposite directions — the user's main complaint.
+    """
+    contradictions: List[str] = []
+    triple_value = v27_engine.get("triple_value") or {}
+    predictions = v27_engine.get("predictions") or {}
+
+    # MS contradiction: model says home but triple_value says away has value
+    ms_preds = predictions.get("ms") or {}
+    ms_home = float(ms_preds.get("home", 0) or 0)
+    ms_away = float(ms_preds.get("away", 0) or 0)
+    home_triple = triple_value.get("home") or {}
+    away_triple = triple_value.get("away") or {}
+
+    model_favours_home = ms_home > ms_away
+    away_is_value = bool(away_triple.get("is_value"))
+    home_is_value = bool(home_triple.get("is_value"))
+
+    if model_favours_home and away_is_value:
+        contradictions.append(
+            "Model ev sahibini favori görüyor ama oran bandı deplasmanda değer buluyor — "
+            "bu çelişki nedeniyle MS tahminine dikkatli yaklaş"
+        )
+    elif not model_favours_home and home_is_value:
+        contradictions.append(
+            "Model deplasmanı favori görüyor ama oran bandı ev sahibinde değer buluyor — "
+            "bu çelişki nedeniyle MS tahminine dikkatli yaklaş"
+        )
+
+    # HT contradiction
+    ht_board = market_board.get("HT") or {}
+    ht_pick = ht_board.get("pick", "")
+    ht_home_triple = triple_value.get("ht_home") or {}
+    ht_away_triple = triple_value.get("ht_away") or {}
+
+    if ht_pick == "1" and bool(ht_away_triple.get("is_value")):
+        contradictions.append(
+            "Model İY ev sahibi diyor ama oran bandı İY deplasmanda değer buluyor — "
+            "İY tahmini güvenilir değil"
+        )
+    elif ht_pick == "2" and bool(ht_home_triple.get("is_value")):
+        contradictions.append(
+            "Model İY deplasman diyor ama oran bandı İY ev sahibinde değer buluyor — "
+            "İY tahmini güvenilir değil"
+        )
+
+    # OU25 contradiction
+    ou25_board = market_board.get("OU25") or {}
+    ou25_pick = ou25_board.get("pick", "")
+    ou25_over_triple = triple_value.get("ou25_over") or {}
+    ou25_under_triple = triple_value.get("ou25_under") or {}
+
+    if ou25_pick == "Üst" and bool(ou25_under_triple.get("is_value")):
+        contradictions.append(
+            "Model 2.5 Üst diyor ama oran bandı 2.5 Alt'ta değer buluyor — çelişki var"
+        )
+    elif ou25_pick == "Alt" and bool(ou25_over_triple.get("is_value")):
+        contradictions.append(
+            "Model 2.5 Alt diyor ama oran bandı 2.5 Üst'te değer buluyor — çelişki var"
+        )
+
+    return contradictions
+
+
+# ═══════════════════════════════════════════════════════════════════════
+#  Helpers
+# ═══════════════════════════════════════════════════════════════════════
+
+def _overall_confidence_label(
+    main_pick: Dict[str, Any],
+    data_quality: Dict[str, Any],
+) -> str:
+    """Overall confidence label for the entire analysis."""
+    quality_score = float(data_quality.get("score", 0.5) or 0.5)
+    main_conf = float(
+        main_pick.get("calibrated_confidence", main_pick.get("confidence", 0)) or 0
+    )
+    main_playable = bool(main_pick.get("playable"))
+
+    if main_playable and main_conf >= 60 and quality_score >= 0.8:
+        return "YÜKSEK"
+    if main_playable and main_conf >= 45:
+        return "ORTA"
+    if main_conf >= 30:
+        return "DÜŞÜK"
+    return "ÇOK DÜŞÜK"
+
+
+_MARKET_TR_MAP = {
+    "MS": {"1": "Maç Sonucu Ev Sahibi", "2": "Maç Sonucu Deplasman", "X": "Beraberlik"},
+    "DC": {"1X": "Çifte Şans 1X", "X2": "Çifte Şans X2", "12": "Çifte Şans 12"},
+    "OU25": {"Üst": "2.5 Üst", "Alt": "2.5 Alt", "Over": "2.5 Üst", "Under": "2.5 Alt"},
+    "OU15": {"Üst": "1.5 Üst", "Alt": "1.5 Alt", "Over": "1.5 Üst", "Under": "1.5 Alt"},
+    "OU35": {"Üst": "3.5 Üst", "Alt": "3.5 Alt", "Over": "3.5 Üst", "Under": "3.5 Alt"},
+    "BTTS": {"KG Var": "Karşılıklı Gol Var", "KG Yok": "Karşılıklı Gol Yok",
+             "Yes": "Karşılıklı Gol Var", "No": "Karşılıklı Gol Yok"},
+    "HT": {"1": "İlk Yarı Ev Sahibi", "2": "İlk Yarı Deplasman", "X": "İlk Yarı Beraberlik"},
+    "HT_OU05": {"Üst": "İY 0.5 Üst", "Alt": "İY 0.5 Alt"},
+    "HT_OU15": {"Üst": "İY 1.5 Üst", "Alt": "İY 1.5 Alt"},
+    "OE": {"Tek": "Tek", "Çift": "Çift", "Odd": "Tek", "Even": "Çift"},
+    "CARDS": {"Üst": "Kart Üst", "Alt": "Kart Alt"},
+}
+
+
+def _market_to_turkish(market: str, pick: str) -> str:
+    market_map = _MARKET_TR_MAP.get(market, {})
+    result = market_map.get(pick)
+    if result:
+        return result
+    return f"{market} {pick}"
@@ -51,8 +51,10 @@ from core.engines.player_predictor import PlayerPrediction, get_player_predictor
 from services.feature_enrichment import FeatureEnrichmentService
 from services.betting_brain import BettingBrain
 from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
+from services.match_commentary import generate_match_commentary
 from utils.top_leagues import load_top_league_ids
 from utils.league_reliability import load_league_reliability
+from config.config_loader import build_threshold_dict, get_threshold_default


@dataclass
@@ -165,99 +167,15 @@ class SingleMatchOrchestrator:
        self.league_reliability = load_league_reliability()
        self.enrichment = FeatureEnrichmentService()
        self.odds_band_analyzer = OddsBandAnalyzer()
-        # ── V32 Calibration Rebalance ──────────────────────────────────
-        # RULE: max_reachable = 100 × calibration MUST be > min_conf + 8
-        # Previous values had 5 markets where this was IMPOSSIBLE:
-        #   HT(0.42×100=42 < 45), HCAP(0.40×100=40 < 46), HTFT(0.28×100=28 < 32)
-        #   HT_OU15(0.46×100=46 < 48), CARDS(0.45×100=45 < 48)
-        # These markets could NEVER become playable → all predictions were PASS.
-        #
-        # New calibration: conservative but mathematically achievable.
-        # Each market's calibration ensures high-confidence model outputs CAN pass.
-        self.market_calibration: Dict[str, float] = {
-            "MS": 0.62,       # max=62 vs min=42 ✓ (was 0.48→max=48 vs 44 ⚠️)
-            "DC": 0.82,       # max=82 vs min=52 ✓ (unchanged, already good)
-            "OU15": 0.84,     # max=84 vs min=55 ✓ (unchanged, already good)
-            "OU25": 0.68,     # max=68 vs min=48 ✓ (was 0.54→max=54 vs 52 ⚠️)
-            "OU35": 0.60,     # max=60 vs min=48 ✓ (was 0.44→max=44 vs 54 ❌)
-            "BTTS": 0.65,     # max=65 vs min=46 ✓ (was 0.50→max=50 vs 50 ⚠️)
-            "HT": 0.58,       # max=58 vs min=40 ✓ (was 0.42→max=42 vs 45 ❌)
-            "HT_OU05": 0.68,  # max=68 vs min=50 ✓ (unchanged)
-            "HT_OU15": 0.60,  # max=60 vs min=42 ✓ (was 0.46→max=46 vs 48 ❌)
-            "OE": 0.62,       # max=62 vs min=46 ✓ (was 0.58→max=58 vs 50 ok)
-            "CARDS": 0.58,    # max=58 vs min=42 ✓ (was 0.45→max=45 vs 48 ❌)
-            "HCAP": 0.56,     # max=56 vs min=40 ✓ (was 0.40→max=40 vs 46 ❌)
-            "HTFT": 0.45,     # max=45 vs min=28 ✓ (was 0.28→max=28 vs 32 ❌)
-        }
-        # Min confidence: lowered to be achievable (max_reachable - 16 to -20)
-        self.market_min_conf: Dict[str, float] = {
-            "MS": 20.0,       # was 42 — drastically lowered to allow underdog/draw value bets
-            "DC": 40.0,       # was 52
-            "OU15": 45.0,     # was 55
-            "OU25": 30.0,     # was 48
-            "OU35": 20.0,     # was 48
-            "BTTS": 30.0,     # was 46
-            "HT": 20.0,       # was 40
-            "HT_OU05": 35.0,  # was 50
-            "HT_OU15": 25.0,  # was 42
-            "OE": 35.0,       # was 46
-            "CARDS": 30.0,    # was 42
-            "HCAP": 25.0,     # was 40
-            "HTFT": 10.0,     # was 28
-        }
-        # Min play score: Significantly reduced to stop blocking value bets on underdogs
-        self.market_min_play_score: Dict[str, float] = {
-            "MS": 30.0,       # was 65
-            "DC": 55.0,       # was 58
-            "OU15": 55.0,     # was 60
-            "OU25": 45.0,     # was 64
-            "OU35": 35.0,     # was 68
-            "BTTS": 45.0,     # was 64
-            "HT": 30.0,       # was 66
-            "HT_OU05": 45.0,  # was 60
-            "HT_OU15": 35.0,  # was 64
-            "OE": 35.0,       # was 60
-            "CARDS": 40.0,    # was 66
-            "HCAP": 35.0,     # was 68
-            "HTFT": 20.0,     # was 72
-        }
-        self.market_min_edge: Dict[str, float] = {
-            "MS": 0.02,       # was 0.03 — slight relaxation
-            "DC": 0.01,       # unchanged
-            "OU15": 0.01,     # unchanged
-            "OU25": 0.02,     # unchanged
-            "OU35": 0.03,     # was 0.04
-            "BTTS": 0.02,     # was 0.03
-            "HT": 0.03,       # was 0.04
-            "HT_OU05": 0.01,  # unchanged
-            "HT_OU15": 0.02,  # was 0.03
-            "OE": 0.02,       # unchanged
-            "CARDS": 0.02,    # was 0.03
-            "HCAP": 0.03,     # was 0.04
-            "HTFT": 0.05,     # was 0.06
-        }
-        self.odds_band_min_sample: Dict[str, float] = {
-            "MS": 8.0,
-            "DC": 8.0,
-            "OU15": 8.0,
-            "OU25": 8.0,
-            "OU35": 8.0,
-            "BTTS": 8.0,
-            "HT": 8.0,
-            "HT_OU05": 8.0,
-            "HT_OU15": 8.0,
-        }
-        self.odds_band_min_edge: Dict[str, float] = {
-            "MS": 0.015,
-            "DC": 0.012,
-            "OU15": 0.012,
-            "OU25": 0.015,
-            "OU35": 0.018,
-            "BTTS": 0.015,
-            "HT": 0.018,
-            "HT_OU05": 0.012,
-            "HT_OU15": 0.015,
-        }
+        # ── Market Thresholds (loaded from config/market_thresholds.json) ──
+        # All values are centralized in a single JSON file for easy tuning
+        # without code changes. See config/market_thresholds.json for details.
+        self.market_calibration: Dict[str, float] = build_threshold_dict("calibration")
+        self.market_min_conf: Dict[str, float] = build_threshold_dict("min_conf")
+        self.market_min_play_score: Dict[str, float] = build_threshold_dict("min_play_score")
+        self.market_min_edge: Dict[str, float] = build_threshold_dict("min_edge")
+        self.odds_band_min_sample: Dict[str, float] = build_threshold_dict("odds_band_min_sample")
+        self.odds_band_min_edge: Dict[str, float] = build_threshold_dict("odds_band_min_edge")

    def _get_v25_predictor(self) -> V25Predictor:
        if self.v25_predictor is None:
@@ -720,7 +638,7 @@ class SingleMatchOrchestrator:

        signal: Dict[str, Any] = {}

-        def _temperature_scale(probs_dict: Dict[str, float], temperature: float = 2.5) -> Dict[str, float]:
+        def _temperature_scale(probs_dict: Dict[str, float], temperature: float = 1.5) -> Dict[str, float]:
            """
            Apply temperature scaling to soften overconfident model outputs.

@@ -729,19 +647,22 @@ class SingleMatchOrchestrator:
            T=1.0 → no change, T>1 → softer probabilities.

            Standard approach for post-hoc model calibration (Guo et al., 2017).
+
+            V34: Reduced from 2.5 to 1.5 — V25 model is already calibrated via
+            odds-aware training. Excessive flattening was destroying signal.
            """
            import math
            eps = 1e-7  # numerical stability
            n = len(probs_dict)

-            # Determine appropriate temperature based on market type
+            # V34: Reduced temperature — odds-aware model is already calibrated
            # Binary markets (2-class) tend to be more overconfident in LGB
            if n <= 2:
-                T = max(temperature, 2.0)
+                T = max(temperature, 1.5)   # was 2.0
            elif n == 3:
-                T = max(temperature * 0.8, 1.5)  # 3-way slightly less aggressive
+                T = max(temperature * 0.8, 1.2)  # was 1.5 — 3-way slightly less aggressive
            else:
-                T = max(temperature * 0.6, 1.3)  # 9-way (HTFT) already spread
+                T = max(temperature * 0.6, 1.0)  # was 1.3 — 9-way (HTFT) already spread

            # Convert to log-odds and apply temperature
            labels = list(probs_dict.keys())
@@ -767,8 +688,8 @@ class SingleMatchOrchestrator:
            Applies temperature scaling to convert overconfident LightGBM outputs
            into realistic, calibrated probabilities.
            """
-            # Apply temperature scaling to soften extreme probabilities
-            scaled_probs = _temperature_scale(probs_dict, temperature=2.5)
+            # V34: Apply temperature scaling — reduced from 2.5 to 1.5
+            scaled_probs = _temperature_scale(probs_dict, temperature=1.5)

            best_label = max(scaled_probs, key=scaled_probs.get)
            best_prob = float(scaled_probs[best_label])
@@ -1532,6 +1453,13 @@ class SingleMatchOrchestrator:

        base_package = self._apply_upper_brain_guards(base_package)

+        # ── Match Commentary: human-readable summary ──────────────
+        try:
+            base_package["match_commentary"] = generate_match_commentary(base_package)
+        except Exception as e:
+            print(f"[Commentary] ⚠ Generation failed (non-fatal): {e}")
+            base_package["match_commentary"] = None
+
        mode = str(getattr(self, "engine_mode", "v28-pro-max") or "v28-pro-max").lower()
        if mode not in {"v25", "v26", "dual", "v28", "v28-pro-max"}:
            mode = "v25"
@@ -1545,6 +1473,7 @@ class SingleMatchOrchestrator:
        )

        if mode == "v26":
+            shadow_package["match_commentary"] = base_package.get("match_commentary")
            return shadow_package
        if mode == "dual":
            merged = dict(base_package)
@@ -5239,7 +5168,9 @@ class SingleMatchOrchestrator:
        reasons: List[str] = []
        playable = True
        
-        is_value_sniper = ev_edge >= 0.03
+        # V34: Broadened value_sniper bypass — odds-aware model rarely shows 3% EV edge
+        # Allow high-confidence predictions OR modest positive EV to bypass secondary gates
+        is_value_sniper = ev_edge >= 0.008 or calibrated_conf >= 55.0

        if calibrated_conf < min_conf:
            if not is_value_sniper:
@@ -5261,29 +5192,48 @@ class SingleMatchOrchestrator:
            # Most pre-match predictions use probable_xi — blocking kills all output
            lineup_penalty += 6.0
            reasons.append("lineup_probable_xi_penalty")
-        base_score = calibrated_conf + (simple_edge * 100.0 * edge_multiplier)
+        # V34: Added confidence bonus — high raw model probability gets a boost
+        # This prevents over-penalization when edge is near-zero but model is confident
+        raw_top_prob = float(row.get("probability", 0.0))
+        confidence_bonus = 0.0
+        if raw_top_prob >= 0.65:
+            confidence_bonus = 15.0
+        elif raw_top_prob >= 0.55:
+            confidence_bonus = 10.0
+        elif raw_top_prob >= 0.45:
+            confidence_bonus = 5.0
+        base_score = calibrated_conf + (simple_edge * 100.0 * edge_multiplier) + confidence_bonus
        play_score = max(
            0.0,
            min(100.0, base_score - risk_penalty - quality_penalty - lineup_penalty),
        )
-        if bool(band_verdict.get("required")) and not bool(band_verdict.get("aligned")):
+        # V34: odds_band gate — only hard-block when band data is AVAILABLE and aligned=False
+        # When band data is sparse (available=False), skip alignment check entirely
+        band_available = bool(band_verdict.get("available", False))
+        if band_available and bool(band_verdict.get("required")) and not bool(band_verdict.get("aligned")):
            if not is_value_sniper:
                playable = False
                reasons.append(str(band_verdict.get("reason") or "odds_band_not_aligned"))
-        if bool(band_verdict.get("required")) and implied_prob > 0.0 and model_edge <= 0.0:
-            if not is_value_sniper:
-                playable = False
-                reasons.append(f"model_not_above_market_{model_edge:+.3f}")
-        # V31: negative edge threshold adapts to league reliability
-        # Reliable league: stricter (-0.03), unreliable: looser (-0.08)
-        neg_edge_threshold = -0.03 - (1.0 - odds_rel) * 0.05
+        elif not band_available and bool(band_verdict.get("required")):
+            # Sparse data — log but don't block
+            reasons.append("odds_band_data_sparse_skipped")
+        # V34: REMOVED model_not_above_market gate entirely
+        # V25 model is odds-informed BY DESIGN → model output ≈ market-implied probability
+        # Requiring model > market is mathematically impossible with this architecture
+        # The negative_model_edge gate below still catches truly anti-value picks
+        # V34: negative edge threshold relaxed — odds-aware model's edge is naturally near zero
+        # Reliable league: -0.08, unreliable: up to -0.15
+        # Only blocks truly anti-value picks (model significantly below market)
+        neg_edge_threshold = -0.08 - (1.0 - odds_rel) * 0.07
        if odd > 1.0 and simple_edge < neg_edge_threshold:
            if not is_value_sniper:
                playable = False
                reasons.append(f"negative_model_edge_{simple_edge:+.3f}")
+        # V34: Added value_sniper bypass — was missing before, causing hard blocks
        if odd > 1.0 and ev_edge < min_edge:
-            playable = False
-            reasons.append(f"below_market_edge_threshold_{ev_edge:+.3f}")
+            if not is_value_sniper:
+                playable = False
+                reasons.append(f"below_market_edge_threshold_{ev_edge:+.3f}")
        if play_score < min_play_score:
            if not is_value_sniper:
                playable = False