+113
-200
@@ -1,223 +1,136 @@
|
||||
"""
|
||||
Real AI Engine Backtest Script
|
||||
==============================
|
||||
Uses the ACTUAL models (V20/V25 Ensemble) to predict historical matches.
|
||||
|
||||
Usage:
|
||||
python ai-engine/scripts/backtest_real.py
|
||||
Gerçek Odds Bazlı Backtest
|
||||
============================
|
||||
Model olasılığı vs gerçek bookmaker odds karşılaştırır.
|
||||
Edge varsa bahis açıldığı varsayılır, gerçek ROI hesaplanır.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from datetime import datetime
|
||||
import os, sys, json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
|
||||
# Add paths
|
||||
AI_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
ROOT_DIR = os.path.dirname(AI_DIR)
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Fix for Windows path issues in scripts
|
||||
if "scripts" in os.path.basename(AI_DIR):
|
||||
ROOT_DIR = os.path.dirname(ROOT_DIR) # One level up if inside scripts folder
|
||||
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'training_data.csv')
|
||||
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'v25')
|
||||
REPORT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'reports')
|
||||
|
||||
from services.single_match_orchestrator import get_single_match_orchestrator, MatchData
|
||||
SKIP_COLS = {
|
||||
'match_id','home_team_id','away_team_id','league_id','mst_utc',
|
||||
'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
|
||||
'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
|
||||
'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
|
||||
'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
|
||||
}
|
||||
|
||||
def get_clean_dsn() -> str:
|
||||
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||
# (model_key, n_class, pred_class, label_col, odds_col, isim)
|
||||
MARKETS = [
|
||||
('ms', 3, 0, 'label_ms', 'odds_ms_h', 'MS-Ev'),
|
||||
('ms', 3, 1, 'label_ms', 'odds_ms_d', 'MS-Ber'),
|
||||
('ms', 3, 2, 'label_ms', 'odds_ms_a', 'MS-Dep'),
|
||||
('ou15', 2, 1, 'label_ou15', 'odds_ou15_o', 'OU15-Ust'),
|
||||
('ou15', 2, 0, 'label_ou15', 'odds_ou15_u', 'OU15-Alt'),
|
||||
('ou25', 2, 1, 'label_ou25', 'odds_ou25_o', 'OU25-Ust'),
|
||||
('ou25', 2, 0, 'label_ou25', 'odds_ou25_u', 'OU25-Alt'),
|
||||
('ou35', 2, 1, 'label_ou35', 'odds_ou35_o', 'OU35-Ust'),
|
||||
('ou35', 2, 0, 'label_ou35', 'odds_ou35_u', 'OU35-Alt'),
|
||||
('btts', 2, 1, 'label_btts', 'odds_btts_y', 'BTTS-Var'),
|
||||
('btts', 2, 0, 'label_btts', 'odds_btts_n', 'BTTS-Yok'),
|
||||
]
|
||||
|
||||
def run_backtest():
|
||||
print("🚀 REAL AI BACKTEST: Sept 13, 2024 - Top Leagues")
|
||||
print("🧠 Engine: V30 Ensemble (V20+V25)")
|
||||
print("="*60)
|
||||
MIN_ODDS = 1.10
|
||||
MAX_ODDS = 10.0
|
||||
|
||||
# Load Top Leagues
|
||||
leagues_path = os.path.join(ROOT_DIR, "top_leagues.json")
|
||||
try:
|
||||
with open(leagues_path, 'r') as f:
|
||||
top_leagues = json.load(f)
|
||||
league_ids = tuple(str(lid) for lid in top_leagues)
|
||||
print(f"📋 Loaded {len(top_leagues)} top leagues.")
|
||||
except Exception as e:
|
||||
print(f"❌ Error loading top_leagues.json: {e}")
|
||||
return
|
||||
|
||||
# Date Range (Sept 13, 2024)
|
||||
start_dt = datetime(2024, 9, 13, 0, 0, 0)
|
||||
end_dt = datetime(2024, 9, 13, 23, 59, 59)
|
||||
start_ts = int(start_dt.timestamp() * 1000)
|
||||
end_ts = int(end_dt.timestamp() * 1000)
|
||||
def load_model(market):
|
||||
path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
b = xgb.Booster()
|
||||
b.load_model(path)
|
||||
return b
|
||||
|
||||
dsn = get_clean_dsn()
|
||||
conn = psycopg2.connect(dsn)
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# Fetch Matches
|
||||
cur.execute("""
|
||||
SELECT m.id, m.match_name, m.home_team_id, m.away_team_id,
|
||||
m.mst_utc, m.league_id, m.status, m.score_home, m.score_away,
|
||||
t1.name as home_team, t2.name as away_team,
|
||||
l.name as league_name
|
||||
FROM matches m
|
||||
LEFT JOIN teams t1 ON m.home_team_id = t1.id
|
||||
LEFT JOIN teams t2 ON m.away_team_id = t2.id
|
||||
LEFT JOIN leagues l ON m.league_id = l.id
|
||||
WHERE m.mst_utc BETWEEN %s AND %s
|
||||
AND m.league_id IN %s
|
||||
AND m.status = 'FT'
|
||||
ORDER BY m.mst_utc ASC
|
||||
LIMIT 20 -- Limit to 20 matches to avoid running for hours on a single backtest
|
||||
""", (start_ts, end_ts, league_ids))
|
||||
|
||||
rows = cur.fetchall()
|
||||
print(f"📊 Found {len(rows)} finished matches. Starting AI Analysis...")
|
||||
def main():
|
||||
print('Veri yukleniyor...')
|
||||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||||
df = df.sort_values('mst_utc')
|
||||
n_test = int(len(df) * 0.20)
|
||||
df_test = df.tail(n_test).copy().reset_index(drop=True)
|
||||
print(f'Test seti: {len(df_test):,} mac')
|
||||
|
||||
if not rows:
|
||||
print("⚠️ No matches found for this date.")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return
|
||||
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
|
||||
X = df_test[feature_cols].fillna(0).values
|
||||
|
||||
# Initialize AI Engine
|
||||
try:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
print("✅ AI Engine (SingleMatchOrchestrator) Loaded.")
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to load AI Engine: {e}")
|
||||
print("💡 Make sure models are trained/present in ai-engine/models/")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return
|
||||
# Modelleri yukle
|
||||
loaded = {}
|
||||
for mkey, n_class, *_ in MARKETS:
|
||||
if mkey not in loaded:
|
||||
m = load_model(mkey)
|
||||
if m:
|
||||
loaded[mkey] = (m, n_class)
|
||||
print(f'Modeller: {list(loaded.keys())}')
|
||||
|
||||
# ─── Backtest Loop ───
|
||||
total_matches_analyzed = 0
|
||||
bets_skipped = 0
|
||||
bets_played = 0
|
||||
bets_won = 0
|
||||
total_profit = 0.0
|
||||
|
||||
# Thresholds matching the NEW Skip Logic
|
||||
MIN_CONF = 45.0
|
||||
# Toplu tahmin
|
||||
raw_preds = {}
|
||||
for mkey, (model, n_class) in loaded.items():
|
||||
dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
|
||||
raw = model.predict(dmat)
|
||||
raw_preds[mkey] = raw.reshape(-1, n_class) if n_class > 2 else np.column_stack([1-raw, raw])
|
||||
|
||||
start_time = time.time()
|
||||
# Backtest
|
||||
all_results = []
|
||||
print(f'\n{"Market":<12} {"Edge>=":>7} {"Bahis":>7} {"Hit%":>7} {"AvgOdds":>9} {"ROI/b":>8} {"Toplam":>10}')
|
||||
print('-' * 65)
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
match_id = str(row['id'])
|
||||
home_team = row['home_team']
|
||||
away_team = row['away_team']
|
||||
home_score = row['score_home']
|
||||
away_score = row['score_away']
|
||||
|
||||
print(f"\n[{i+1}/{len(rows)}] Analyzing: {home_team} vs {away_team} ...")
|
||||
for mkey, n_class, pred_cls, label_col, odds_col, isim in MARKETS:
|
||||
if mkey not in raw_preds or label_col not in df_test.columns or odds_col not in df_test.columns:
|
||||
continue
|
||||
|
||||
try:
|
||||
# 1. AI PREDICTION (Actual Model Call)
|
||||
prediction = orchestrator.analyze_match(match_id)
|
||||
|
||||
if not prediction:
|
||||
print(f" ⚠️ AI returned no prediction.")
|
||||
mp = raw_preds[mkey][:, pred_cls]
|
||||
act = pd.to_numeric(df_test[label_col], errors='coerce').values
|
||||
bko = pd.to_numeric(df_test[odds_col], errors='coerce').values
|
||||
|
||||
valid = (~np.isnan(act) & ~np.isnan(bko) &
|
||||
(bko >= MIN_ODDS) & (bko <= MAX_ODDS))
|
||||
mp, act, bko = mp[valid], act[valid].astype(int), bko[valid]
|
||||
implied = 1.0 / bko
|
||||
edge = mp - implied
|
||||
|
||||
print(f'\n{isim}:')
|
||||
for min_e in [0.02, 0.03, 0.05, 0.07, 0.10]:
|
||||
mask = edge >= min_e
|
||||
n = mask.sum()
|
||||
if n < 20:
|
||||
continue
|
||||
won = (act[mask] == pred_cls).astype(int)
|
||||
roi = (bko[mask] - 1) * won - (1 - won)
|
||||
hit = won.mean()
|
||||
avg_roi = roi.mean()
|
||||
total = roi.sum()
|
||||
avg_odds = bko[mask].mean()
|
||||
sign = '+' if total > 0 else ''
|
||||
print(f' edge>={min_e:+.0%} n={n:>5,} hit={hit:.1%} odds={avg_odds:.2f} roi/b={avg_roi:+.3f} toplam={sign}{total:.1f}')
|
||||
all_results.append({'market': isim, 'min_edge': min_e, 'n': n,
|
||||
'hit': round(hit, 4), 'avg_odds': round(avg_odds, 3),
|
||||
'avg_roi': round(avg_roi, 4), 'total_roi': round(total, 2)})
|
||||
|
||||
total_matches_analyzed += 1
|
||||
|
||||
# 2. Extract Main Pick
|
||||
main_pick = prediction.get("main_pick") or {}
|
||||
pick_name = main_pick.get("pick")
|
||||
confidence = main_pick.get("confidence", 0)
|
||||
odds = main_pick.get("odds", 0)
|
||||
# En iyi
|
||||
winners = sorted([r for r in all_results if r['total_roi'] > 0],
|
||||
key=lambda x: x['avg_roi'], reverse=True)
|
||||
print(f'\n{"="*65}')
|
||||
print('KAZANCLI KOMBINASYONLAR (total_roi > 0):')
|
||||
print(f'{"="*65}')
|
||||
for r in winners[:20]:
|
||||
print(f' {r["market"]:<12} edge>={r["min_edge"]:+.0%} | n={r["n"]:>5,} | '
|
||||
f'hit={r["hit"]:.0%} | roi/b={r["avg_roi"]:+.3f} | toplam={r["total_roi"]:+.1f}')
|
||||
|
||||
if not pick_name or not confidence:
|
||||
print(f" ⚠️ No main pick found in prediction.")
|
||||
continue
|
||||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||||
with open(os.path.join(REPORT_DIR, 'backtest_real_odds.json'), 'w') as f:
|
||||
json.dump(all_results, f, indent=2)
|
||||
print(f'\nRapor kaydedildi.')
|
||||
|
||||
print(f" 🤖 Pick: {pick_name} | Conf: {confidence}% | Odds: {odds}")
|
||||
|
||||
# 3. Apply Skip Logic (New Backtest Logic)
|
||||
if confidence < MIN_CONF:
|
||||
print(f" 🚫 SKIPPED (Confidence {confidence}% < {MIN_CONF}%)")
|
||||
bets_skipped += 1
|
||||
continue
|
||||
|
||||
if odds > 0:
|
||||
implied_prob = 1.0 / odds
|
||||
my_prob = confidence / 100.0
|
||||
if my_prob - implied_prob < -0.03: # Negative edge
|
||||
print(f" 🚫 SKIPPED (Negative Edge)")
|
||||
bets_skipped += 1
|
||||
continue
|
||||
|
||||
# 4. Bet Played
|
||||
bets_played += 1
|
||||
print(f" 🎲 BET PLAYED: {pick_name} @ {odds}")
|
||||
|
||||
# 5. Resolve Bet
|
||||
won = False
|
||||
# Basic resolution logic (Need to parse pick_name like "1", "X", "2", "2.5 Üst", etc.)
|
||||
pick_clean = str(pick_name).upper()
|
||||
|
||||
# MS
|
||||
if pick_clean in ["1", "MS 1"] and home_score > away_score: won = True
|
||||
elif pick_clean in ["X", "MS X"] and home_score == away_score: won = True
|
||||
elif pick_clean in ["2", "MS 2"] and away_score > home_score: won = True
|
||||
|
||||
# OU25
|
||||
elif "ÜST" in pick_clean or "OVER" in pick_clean:
|
||||
if (home_score + away_score) > 2.5: won = True
|
||||
elif "ALT" in pick_clean or "UNDER" in pick_clean:
|
||||
if (home_score + away_score) < 2.5: won = True
|
||||
|
||||
# BTTS
|
||||
elif "VAR" in pick_clean and home_score > 0 and away_score > 0: won = True
|
||||
elif "YOK" in pick_clean and (home_score == 0 or away_score == 0): won = True
|
||||
|
||||
if won:
|
||||
bets_won += 1
|
||||
profit = odds - 1.0
|
||||
print(f" ✅ WON! (+{profit:.2f} units)")
|
||||
else:
|
||||
profit = -1.0
|
||||
print(f" ❌ LOST! (-1.00 units)")
|
||||
|
||||
total_profit += profit
|
||||
|
||||
except Exception as e:
|
||||
print(f" 💥 Error during analysis: {e}")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# ─── FINAL REPORT ───
|
||||
print("\n" + "="*60)
|
||||
print("📈 REAL AI BACKTEST RESULTS")
|
||||
print(f"🕒 Time taken: {elapsed:.1f} seconds")
|
||||
print("="*60)
|
||||
print(f"📊 Matches Analyzed: {total_matches_analyzed}")
|
||||
print(f"🚫 Bets SKIPPED: {bets_skipped}")
|
||||
print(f"✅ Bets PLAYED: {bets_played}")
|
||||
|
||||
if bets_played > 0:
|
||||
win_rate = (bets_won / bets_played) * 100
|
||||
roi = (total_profit / bets_played) * 100
|
||||
yield_val = total_profit # Net Units
|
||||
|
||||
print(f"🏆 Bets Won: {bets_won}")
|
||||
print(f"💀 Bets Lost: {bets_played - bets_won}")
|
||||
print("-" * 40)
|
||||
print(f" Win Rate: {win_rate:.2f}%")
|
||||
print(f"💰 Total Profit (Units): {total_profit:.2f}")
|
||||
print(f"📊 ROI: {roi:.2f}%")
|
||||
|
||||
if roi > 0:
|
||||
print("🟢 STRATEGY IS PROFITABLE!")
|
||||
else:
|
||||
print("🔴 STRATEGY IS LOSING")
|
||||
else:
|
||||
print("⚠️ No bets were played. All were skipped or failed.")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_backtest()
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user