feat(ai-engine): value sniper thresholds and logic relaxed

2026-05-06 17:44:45 +03:00
parent 5b5f83c8cf
commit 4f7090e2d9
13 changed files with 2040 additions and 382 deletions
@@ -20,7 +20,7 @@ from sklearn.isotonic import IsotonicRegression
 warnings.filterwarnings("ignore")

 AI_DIR = Path(__file__).resolve().parent.parent
-DATA_CSV = AI_DIR / "data" / "training_data_v27.csv"
+DATA_CSV = AI_DIR / "data" / "training_data.csv"
 MODELS_DIR = AI_DIR / "models" / "v27"
 MODELS_DIR.mkdir(parents=True, exist_ok=True)

@@ -373,15 +373,52 @@ def main():
        print("\n" + "─"*65)
        print("  STAGE A.2: Fundamentals-Only O/U 2.5 Model")
        print("─"*65)
-        y_tr_ou = tr["label_ou25"].values
-        y_va_ou = va["label_ou25"].values
+        y_tr_ou = tr['label_ou25'].values
+        y_va_ou = va['label_ou25'].values
        mask_tr = ~np.isnan(y_tr_ou)
        mask_va = ~np.isnan(y_va_ou)
        if mask_tr.sum() > 1000:
            ou_models = train_fundamentals_model(
                X_tr[mask_tr], y_tr_ou[mask_tr].astype(int),
                X_va[mask_va], y_va_ou[mask_va].astype(int),
-                clean_feats, "ou25")
+                clean_feats, 'ou25')
+
+    # ── STAGE A.3: BTTS Model ──
+    btts_models = None
+    if 'label_btts' in tr.columns:
+        print('\n' + '─' * 65)
+        print('  STAGE A.3: Fundamentals-Only BTTS Model')
+        print('─' * 65)
+        y_tr_btts = tr['label_btts'].values
+        y_va_btts = va['label_btts'].values
+        mask_tr_btts = ~np.isnan(y_tr_btts)
+        mask_va_btts = ~np.isnan(y_va_btts)
+        if mask_tr_btts.sum() > 1000:
+            btts_models = train_fundamentals_model(
+                X_tr[mask_tr_btts], y_tr_btts[mask_tr_btts].astype(int),
+                X_va[mask_va_btts], y_va_btts[mask_va_btts].astype(int),
+                clean_feats, 'btts')
+
+            # Quick val accuracy
+            btts_probs = ensemble_predict(
+                btts_models,
+                X_va[mask_va_btts],
+                clean_feats,
+                n_class=2,
+            )
+            btts_acc = accuracy_score(
+                y_va_btts[mask_va_btts].astype(int),
+                btts_probs.argmax(1),
+            )
+            btts_ll = log_loss(
+                y_va_btts[mask_va_btts].astype(int),
+                btts_probs,
+            )
+            print(f'\n  BTTS Ensemble Val: acc={btts_acc:.4f}, logloss={btts_ll:.4f}')
+            # Compare with naive baseline (always predict majority class)
+            btts_majority = y_va_btts[mask_va_btts].astype(int).mean()
+            print(f'  BTTS baseline: {max(btts_majority, 1-btts_majority):.4f} (majority class)')
+            print(f'  Model vs baseline: {btts_acc - max(btts_majority, 1-btts_majority):+.4f}')

    # ── STAGE C: Backtest ──
    print("\n" + "─"*65)
@@ -422,13 +459,58 @@ def main():

    # OU25 backtest
    if ou_models:
-        print("\n  --- O/U 2.5 Backtest ---")
+        print('\n  --- O/U 2.5 Backtest ---')
        for edge in [0.05, 0.07, 0.10]:
-            r = backtest_value(ou_models, te, clean_feats, "ou25",
+            r = backtest_value(ou_models, te, clean_feats, 'ou25',
                               min_edge=edge, min_odds=1.50, max_odds=3.0,
                               use_kelly=True)
-            if r.get("total", 0) > 0:
-                print_backtest(r, f"OU25 edge>{edge}")
+            if r.get('total', 0) > 0:
+                print_backtest(r, f'OU25 edge>{edge}')
+
+    # BTTS backtest
+    if btts_models and 'label_btts' in te.columns:
+        print('\n  --- BTTS Backtest ---')
+        # Build BTTS odds for backtest
+        if 'odds_btts_y' in te.columns and 'odds_btts_n' in te.columns:
+            te_btts = te.copy()
+            te_btts['odds_btts_y'] = pd.to_numeric(
+                te_btts['odds_btts_y'], errors='coerce',
+            ).fillna(1.85)
+            te_btts['odds_btts_n'] = pd.to_numeric(
+                te_btts['odds_btts_n'], errors='coerce',
+            ).fillna(1.85)
+
+            for edge in [0.05, 0.07, 0.10]:
+                X_test = te_btts[clean_feats].values
+                probs = ensemble_predict(btts_models, X_test, clean_feats, 2)
+                y_btts = te_btts['label_btts'].values.astype(int)
+                odds_arr = te_btts[['odds_btts_n', 'odds_btts_y']].values
+                m_arr = 1 / odds_arr
+                impl = m_arr / m_arr.sum(axis=1, keepdims=True)
+
+                total_bets = 0
+                wins = 0
+                pnl = 0.0
+                for i in range(len(y_btts)):
+                    for cls in range(2):
+                        e = probs[i, cls] - impl[i, cls]
+                        o = odds_arr[i, cls]
+                        if e < edge or o < 1.50 or o > 3.0:
+                            continue
+                        total_bets += 1
+                        won = (y_btts[i] == cls)
+                        if won:
+                            wins += 1
+                            pnl += 10 * (o - 1)
+                        else:
+                            pnl -= 10
+                if total_bets > 0:
+                    roi = pnl / (total_bets * 10) * 100
+                    hit = wins / total_bets * 100
+                    print(
+                        f'    Edge>{edge:.2f}: {total_bets} bets, '
+                        f'hit={hit:.1f}%, ROI={roi:+.1f}%'
+                    )

    # ── Feature importance ──
    if "lgb" in ms_models:
@@ -452,25 +534,40 @@ def main():

    if ou_models:
        for name, m in ou_models.items():
-            p = MODELS_DIR / f"v27_ou25_{name}.pkl"
-            with open(p, "wb") as f:
+            p = MODELS_DIR / f'v27_ou25_{name}.pkl'
+            with open(p, 'wb') as f:
                pickle.dump(m, f)
-            print(f"  ✓ {p.name}")
+            print(f'  ✓ {p.name}')
+
+    if btts_models:
+        for name, m in btts_models.items():
+            p = MODELS_DIR / f'v27_btts_{name}.pkl'
+            with open(p, 'wb') as f:
+                pickle.dump(m, f)
+            print(f'  ✓ {p.name}')

    meta = {
-        "version": "v27-pro", "trained_at": time.strftime("%Y-%m-%d %H:%M:%S"),
-        "approach": "odds-free fundamentals + value edge detection",
-        "feature_count": len(clean_feats),
-        "total_samples": len(df),
-        "val_acc": round(val_acc, 4), "val_ll": round(val_ll, 4),
-        "best_config": {k: v for k, v in best_cfg.items() if k != "result"} if best_cfg else {},
-        "markets": ["ms"] + (["ou25"] if ou_models else []),
+        'version': 'v27-pro',
+        'trained_at': time.strftime('%Y-%m-%d %H:%M:%S'),
+        'approach': 'odds-free fundamentals + value edge detection',
+        'feature_count': len(clean_feats),
+        'total_samples': len(df),
+        'val_acc': round(val_acc, 4),
+        'val_ll': round(val_ll, 4),
+        'best_config': {
+            k: v for k, v in best_cfg.items() if k != 'result'
+        } if best_cfg else {},
+        'markets': (
+            ['ms']
+            + (['ou25'] if ou_models else [])
+            + (['btts'] if btts_models else [])
+        ),
    }
-    with open(MODELS_DIR / "v27_metadata.json", "w") as f:
+    with open(MODELS_DIR / 'v27_metadata.json', 'w') as f:
        json.dump(meta, f, indent=2, default=str)
-    with open(MODELS_DIR / "v27_feature_cols.json", "w") as f:
+    with open(MODELS_DIR / 'v27_feature_cols.json', 'w') as f:
        json.dump(clean_feats, f, indent=2)
-    print(f"  ✓ metadata + feature_cols")
+    print(f'  ✓ metadata + feature_cols')

    print(f"\n  Total time: {(time.time()-t0)/60:.1f} min")
    print("  DONE!")