feat(ai-engine): value sniper thresholds and logic relaxed
This commit is contained in:
@@ -20,7 +20,7 @@ from sklearn.isotonic import IsotonicRegression
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
AI_DIR = Path(__file__).resolve().parent.parent
|
||||
DATA_CSV = AI_DIR / "data" / "training_data_v27.csv"
|
||||
DATA_CSV = AI_DIR / "data" / "training_data.csv"
|
||||
MODELS_DIR = AI_DIR / "models" / "v27"
|
||||
MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -373,15 +373,52 @@ def main():
|
||||
print("\n" + "─"*65)
|
||||
print(" STAGE A.2: Fundamentals-Only O/U 2.5 Model")
|
||||
print("─"*65)
|
||||
y_tr_ou = tr["label_ou25"].values
|
||||
y_va_ou = va["label_ou25"].values
|
||||
y_tr_ou = tr['label_ou25'].values
|
||||
y_va_ou = va['label_ou25'].values
|
||||
mask_tr = ~np.isnan(y_tr_ou)
|
||||
mask_va = ~np.isnan(y_va_ou)
|
||||
if mask_tr.sum() > 1000:
|
||||
ou_models = train_fundamentals_model(
|
||||
X_tr[mask_tr], y_tr_ou[mask_tr].astype(int),
|
||||
X_va[mask_va], y_va_ou[mask_va].astype(int),
|
||||
clean_feats, "ou25")
|
||||
clean_feats, 'ou25')
|
||||
|
||||
# ── STAGE A.3: BTTS Model ──
|
||||
btts_models = None
|
||||
if 'label_btts' in tr.columns:
|
||||
print('\n' + '─' * 65)
|
||||
print(' STAGE A.3: Fundamentals-Only BTTS Model')
|
||||
print('─' * 65)
|
||||
y_tr_btts = tr['label_btts'].values
|
||||
y_va_btts = va['label_btts'].values
|
||||
mask_tr_btts = ~np.isnan(y_tr_btts)
|
||||
mask_va_btts = ~np.isnan(y_va_btts)
|
||||
if mask_tr_btts.sum() > 1000:
|
||||
btts_models = train_fundamentals_model(
|
||||
X_tr[mask_tr_btts], y_tr_btts[mask_tr_btts].astype(int),
|
||||
X_va[mask_va_btts], y_va_btts[mask_va_btts].astype(int),
|
||||
clean_feats, 'btts')
|
||||
|
||||
# Quick val accuracy
|
||||
btts_probs = ensemble_predict(
|
||||
btts_models,
|
||||
X_va[mask_va_btts],
|
||||
clean_feats,
|
||||
n_class=2,
|
||||
)
|
||||
btts_acc = accuracy_score(
|
||||
y_va_btts[mask_va_btts].astype(int),
|
||||
btts_probs.argmax(1),
|
||||
)
|
||||
btts_ll = log_loss(
|
||||
y_va_btts[mask_va_btts].astype(int),
|
||||
btts_probs,
|
||||
)
|
||||
print(f'\n BTTS Ensemble Val: acc={btts_acc:.4f}, logloss={btts_ll:.4f}')
|
||||
# Compare with naive baseline (always predict majority class)
|
||||
btts_majority = y_va_btts[mask_va_btts].astype(int).mean()
|
||||
print(f' BTTS baseline: {max(btts_majority, 1-btts_majority):.4f} (majority class)')
|
||||
print(f' Model vs baseline: {btts_acc - max(btts_majority, 1-btts_majority):+.4f}')
|
||||
|
||||
# ── STAGE C: Backtest ──
|
||||
print("\n" + "─"*65)
|
||||
@@ -422,13 +459,58 @@ def main():
|
||||
|
||||
# OU25 backtest
|
||||
if ou_models:
|
||||
print("\n --- O/U 2.5 Backtest ---")
|
||||
print('\n --- O/U 2.5 Backtest ---')
|
||||
for edge in [0.05, 0.07, 0.10]:
|
||||
r = backtest_value(ou_models, te, clean_feats, "ou25",
|
||||
r = backtest_value(ou_models, te, clean_feats, 'ou25',
|
||||
min_edge=edge, min_odds=1.50, max_odds=3.0,
|
||||
use_kelly=True)
|
||||
if r.get("total", 0) > 0:
|
||||
print_backtest(r, f"OU25 edge>{edge}")
|
||||
if r.get('total', 0) > 0:
|
||||
print_backtest(r, f'OU25 edge>{edge}')
|
||||
|
||||
# BTTS backtest
|
||||
if btts_models and 'label_btts' in te.columns:
|
||||
print('\n --- BTTS Backtest ---')
|
||||
# Build BTTS odds for backtest
|
||||
if 'odds_btts_y' in te.columns and 'odds_btts_n' in te.columns:
|
||||
te_btts = te.copy()
|
||||
te_btts['odds_btts_y'] = pd.to_numeric(
|
||||
te_btts['odds_btts_y'], errors='coerce',
|
||||
).fillna(1.85)
|
||||
te_btts['odds_btts_n'] = pd.to_numeric(
|
||||
te_btts['odds_btts_n'], errors='coerce',
|
||||
).fillna(1.85)
|
||||
|
||||
for edge in [0.05, 0.07, 0.10]:
|
||||
X_test = te_btts[clean_feats].values
|
||||
probs = ensemble_predict(btts_models, X_test, clean_feats, 2)
|
||||
y_btts = te_btts['label_btts'].values.astype(int)
|
||||
odds_arr = te_btts[['odds_btts_n', 'odds_btts_y']].values
|
||||
m_arr = 1 / odds_arr
|
||||
impl = m_arr / m_arr.sum(axis=1, keepdims=True)
|
||||
|
||||
total_bets = 0
|
||||
wins = 0
|
||||
pnl = 0.0
|
||||
for i in range(len(y_btts)):
|
||||
for cls in range(2):
|
||||
e = probs[i, cls] - impl[i, cls]
|
||||
o = odds_arr[i, cls]
|
||||
if e < edge or o < 1.50 or o > 3.0:
|
||||
continue
|
||||
total_bets += 1
|
||||
won = (y_btts[i] == cls)
|
||||
if won:
|
||||
wins += 1
|
||||
pnl += 10 * (o - 1)
|
||||
else:
|
||||
pnl -= 10
|
||||
if total_bets > 0:
|
||||
roi = pnl / (total_bets * 10) * 100
|
||||
hit = wins / total_bets * 100
|
||||
print(
|
||||
f' Edge>{edge:.2f}: {total_bets} bets, '
|
||||
f'hit={hit:.1f}%, ROI={roi:+.1f}%'
|
||||
)
|
||||
|
||||
# ── Feature importance ──
|
||||
if "lgb" in ms_models:
|
||||
@@ -452,25 +534,40 @@ def main():
|
||||
|
||||
if ou_models:
|
||||
for name, m in ou_models.items():
|
||||
p = MODELS_DIR / f"v27_ou25_{name}.pkl"
|
||||
with open(p, "wb") as f:
|
||||
p = MODELS_DIR / f'v27_ou25_{name}.pkl'
|
||||
with open(p, 'wb') as f:
|
||||
pickle.dump(m, f)
|
||||
print(f" ✓ {p.name}")
|
||||
print(f' ✓ {p.name}')
|
||||
|
||||
if btts_models:
|
||||
for name, m in btts_models.items():
|
||||
p = MODELS_DIR / f'v27_btts_{name}.pkl'
|
||||
with open(p, 'wb') as f:
|
||||
pickle.dump(m, f)
|
||||
print(f' ✓ {p.name}')
|
||||
|
||||
meta = {
|
||||
"version": "v27-pro", "trained_at": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"approach": "odds-free fundamentals + value edge detection",
|
||||
"feature_count": len(clean_feats),
|
||||
"total_samples": len(df),
|
||||
"val_acc": round(val_acc, 4), "val_ll": round(val_ll, 4),
|
||||
"best_config": {k: v for k, v in best_cfg.items() if k != "result"} if best_cfg else {},
|
||||
"markets": ["ms"] + (["ou25"] if ou_models else []),
|
||||
'version': 'v27-pro',
|
||||
'trained_at': time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'approach': 'odds-free fundamentals + value edge detection',
|
||||
'feature_count': len(clean_feats),
|
||||
'total_samples': len(df),
|
||||
'val_acc': round(val_acc, 4),
|
||||
'val_ll': round(val_ll, 4),
|
||||
'best_config': {
|
||||
k: v for k, v in best_cfg.items() if k != 'result'
|
||||
} if best_cfg else {},
|
||||
'markets': (
|
||||
['ms']
|
||||
+ (['ou25'] if ou_models else [])
|
||||
+ (['btts'] if btts_models else [])
|
||||
),
|
||||
}
|
||||
with open(MODELS_DIR / "v27_metadata.json", "w") as f:
|
||||
with open(MODELS_DIR / 'v27_metadata.json', 'w') as f:
|
||||
json.dump(meta, f, indent=2, default=str)
|
||||
with open(MODELS_DIR / "v27_feature_cols.json", "w") as f:
|
||||
with open(MODELS_DIR / 'v27_feature_cols.json', 'w') as f:
|
||||
json.dump(clean_feats, f, indent=2)
|
||||
print(f" ✓ metadata + feature_cols")
|
||||
print(f' ✓ metadata + feature_cols')
|
||||
|
||||
print(f"\n Total time: {(time.time()-t0)/60:.1f} min")
|
||||
print(" DONE!")
|
||||
|
||||
Reference in New Issue
Block a user