main

2026-05-17 02:17:22 +03:00
parent 17ace9bd12
commit 94c7a4481a
53 changed files with 29602 additions and 7832 deletions
@@ -1,223 +1,136 @@
 """
-Real AI Engine Backtest Script
-==============================
-Uses the ACTUAL models (V20/V25 Ensemble) to predict historical matches.
-
-Usage:
-    python ai-engine/scripts/backtest_real.py
+Gerçek Odds Bazlı Backtest
+============================
+Model olasılığı vs gerçek bookmaker odds karşılaştırır.
+Edge varsa bahis açıldığı varsayılır, gerçek ROI hesaplanır.
 """

-import os
-import sys
-import json
-import time
-import psycopg2
-from psycopg2.extras import RealDictCursor
-from datetime import datetime
+import os, sys, json
+import numpy as np
+import pandas as pd
+import xgboost as xgb

-# Add paths
-AI_DIR = os.path.dirname(os.path.abspath(__file__))
-ROOT_DIR = os.path.dirname(AI_DIR)
-sys.path.insert(0, ROOT_DIR)
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-# Fix for Windows path issues in scripts
-if "scripts" in os.path.basename(AI_DIR):
-    ROOT_DIR = os.path.dirname(ROOT_DIR) # One level up if inside scripts folder
+DATA_PATH  = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'training_data.csv')
+MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'v25')
+REPORT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'reports')

-from services.single_match_orchestrator import get_single_match_orchestrator, MatchData
+SKIP_COLS = {
+    'match_id','home_team_id','away_team_id','league_id','mst_utc',
+    'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
+    'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
+    'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
+    'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
+}

-def get_clean_dsn() -> str:
-    return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
+# (model_key, n_class, pred_class, label_col, odds_col, isim)
+MARKETS = [
+    ('ms',   3, 0, 'label_ms',   'odds_ms_h',   'MS-Ev'),
+    ('ms',   3, 1, 'label_ms',   'odds_ms_d',   'MS-Ber'),
+    ('ms',   3, 2, 'label_ms',   'odds_ms_a',   'MS-Dep'),
+    ('ou15', 2, 1, 'label_ou15', 'odds_ou15_o', 'OU15-Ust'),
+    ('ou15', 2, 0, 'label_ou15', 'odds_ou15_u', 'OU15-Alt'),
+    ('ou25', 2, 1, 'label_ou25', 'odds_ou25_o', 'OU25-Ust'),
+    ('ou25', 2, 0, 'label_ou25', 'odds_ou25_u', 'OU25-Alt'),
+    ('ou35', 2, 1, 'label_ou35', 'odds_ou35_o', 'OU35-Ust'),
+    ('ou35', 2, 0, 'label_ou35', 'odds_ou35_u', 'OU35-Alt'),
+    ('btts', 2, 1, 'label_btts', 'odds_btts_y', 'BTTS-Var'),
+    ('btts', 2, 0, 'label_btts', 'odds_btts_n', 'BTTS-Yok'),
+]

-def run_backtest():
-    print("🚀 REAL AI BACKTEST: Sept 13, 2024 - Top Leagues")
-    print("🧠 Engine: V30 Ensemble (V20+V25)")
-    print("="*60)
+MIN_ODDS = 1.10
+MAX_ODDS = 10.0

-    # Load Top Leagues
-    leagues_path = os.path.join(ROOT_DIR, "top_leagues.json")
-    try:
-        with open(leagues_path, 'r') as f:
-            top_leagues = json.load(f)
-        league_ids = tuple(str(lid) for lid in top_leagues)
-        print(f"📋 Loaded {len(top_leagues)} top leagues.")
-    except Exception as e:
-        print(f"❌ Error loading top_leagues.json: {e}")
-        return

-    # Date Range (Sept 13, 2024)
-    start_dt = datetime(2024, 9, 13, 0, 0, 0)
-    end_dt = datetime(2024, 9, 13, 23, 59, 59)
-    start_ts = int(start_dt.timestamp() * 1000)
-    end_ts = int(end_dt.timestamp() * 1000)
+def load_model(market):
+    path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
+    if not os.path.exists(path):
+        return None
+    b = xgb.Booster()
+    b.load_model(path)
+    return b

-    dsn = get_clean_dsn()
-    conn = psycopg2.connect(dsn)
-    cur = conn.cursor(cursor_factory=RealDictCursor)

-    # Fetch Matches
-    cur.execute("""
-        SELECT m.id, m.match_name, m.home_team_id, m.away_team_id, 
-               m.mst_utc, m.league_id, m.status, m.score_home, m.score_away,
-               t1.name as home_team, t2.name as away_team,
-               l.name as league_name
-        FROM matches m
-        LEFT JOIN teams t1 ON m.home_team_id = t1.id
-        LEFT JOIN teams t2 ON m.away_team_id = t2.id
-        LEFT JOIN leagues l ON m.league_id = l.id
-        WHERE m.mst_utc BETWEEN %s AND %s
-          AND m.league_id IN %s
-          AND m.status = 'FT'
-        ORDER BY m.mst_utc ASC
-        LIMIT 20  -- Limit to 20 matches to avoid running for hours on a single backtest
-    """, (start_ts, end_ts, league_ids))
-    
-    rows = cur.fetchall()
-    print(f"📊 Found {len(rows)} finished matches. Starting AI Analysis...")
+def main():
+    print('Veri yukleniyor...')
+    df = pd.read_csv(DATA_PATH, low_memory=False)
+    df = df.sort_values('mst_utc')
+    n_test = int(len(df) * 0.20)
+    df_test = df.tail(n_test).copy().reset_index(drop=True)
+    print(f'Test seti: {len(df_test):,} mac')

-    if not rows:
-        print("⚠️ No matches found for this date.")
-        cur.close()
-        conn.close()
-        return
+    feature_cols = [c for c in df.columns if c not in SKIP_COLS]
+    X = df_test[feature_cols].fillna(0).values

-    # Initialize AI Engine
-    try:
-        orchestrator = get_single_match_orchestrator()
-        print("✅ AI Engine (SingleMatchOrchestrator) Loaded.")
-    except Exception as e:
-        print(f"❌ Failed to load AI Engine: {e}")
-        print("💡 Make sure models are trained/present in ai-engine/models/")
-        cur.close()
-        conn.close()
-        return
+    # Modelleri yukle
+    loaded = {}
+    for mkey, n_class, *_ in MARKETS:
+        if mkey not in loaded:
+            m = load_model(mkey)
+            if m:
+                loaded[mkey] = (m, n_class)
+    print(f'Modeller: {list(loaded.keys())}')

-    # ─── Backtest Loop ───
-    total_matches_analyzed = 0
-    bets_skipped = 0
-    bets_played = 0
-    bets_won = 0
-    total_profit = 0.0
-    
-    # Thresholds matching the NEW Skip Logic
-    MIN_CONF = 45.0 
+    # Toplu tahmin
+    raw_preds = {}
+    for mkey, (model, n_class) in loaded.items():
+        dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
+        raw = model.predict(dmat)
+        raw_preds[mkey] = raw.reshape(-1, n_class) if n_class > 2 else np.column_stack([1-raw, raw])

-    start_time = time.time()
+    # Backtest
+    all_results = []
+    print(f'\n{"Market":<12} {"Edge>=":>7} {"Bahis":>7} {"Hit%":>7} {"AvgOdds":>9} {"ROI/b":>8} {"Toplam":>10}')
+    print('-' * 65)

-    for i, row in enumerate(rows):
-        match_id = str(row['id'])
-        home_team = row['home_team']
-        away_team = row['away_team']
-        home_score = row['score_home']
-        away_score = row['score_away']
-        
-        print(f"\n[{i+1}/{len(rows)}] Analyzing: {home_team} vs {away_team} ...")
+    for mkey, n_class, pred_cls, label_col, odds_col, isim in MARKETS:
+        if mkey not in raw_preds or label_col not in df_test.columns or odds_col not in df_test.columns:
+            continue

-        try:
-            # 1. AI PREDICTION (Actual Model Call)
-            prediction = orchestrator.analyze_match(match_id)
-            
-            if not prediction:
-                print(f"   ⚠️ AI returned no prediction.")
+        mp  = raw_preds[mkey][:, pred_cls]
+        act = pd.to_numeric(df_test[label_col], errors='coerce').values
+        bko = pd.to_numeric(df_test[odds_col],  errors='coerce').values
+
+        valid = (~np.isnan(act) & ~np.isnan(bko) &
+                 (bko >= MIN_ODDS) & (bko <= MAX_ODDS))
+        mp, act, bko = mp[valid], act[valid].astype(int), bko[valid]
+        implied = 1.0 / bko
+        edge = mp - implied
+
+        print(f'\n{isim}:')
+        for min_e in [0.02, 0.03, 0.05, 0.07, 0.10]:
+            mask = edge >= min_e
+            n = mask.sum()
+            if n < 20:
                continue
+            won = (act[mask] == pred_cls).astype(int)
+            roi = (bko[mask] - 1) * won - (1 - won)
+            hit = won.mean()
+            avg_roi = roi.mean()
+            total = roi.sum()
+            avg_odds = bko[mask].mean()
+            sign = '+' if total > 0 else ''
+            print(f'  edge>={min_e:+.0%}  n={n:>5,}  hit={hit:.1%}  odds={avg_odds:.2f}  roi/b={avg_roi:+.3f}  toplam={sign}{total:.1f}')
+            all_results.append({'market': isim, 'min_edge': min_e, 'n': n,
+                                 'hit': round(hit, 4), 'avg_odds': round(avg_odds, 3),
+                                 'avg_roi': round(avg_roi, 4), 'total_roi': round(total, 2)})

-            total_matches_analyzed += 1
-            
-            # 2. Extract Main Pick
-            main_pick = prediction.get("main_pick") or {}
-            pick_name = main_pick.get("pick")
-            confidence = main_pick.get("confidence", 0)
-            odds = main_pick.get("odds", 0)
+    # En iyi
+    winners = sorted([r for r in all_results if r['total_roi'] > 0],
+                     key=lambda x: x['avg_roi'], reverse=True)
+    print(f'\n{"="*65}')
+    print('KAZANCLI KOMBINASYONLAR (total_roi > 0):')
+    print(f'{"="*65}')
+    for r in winners[:20]:
+        print(f'  {r["market"]:<12} edge>={r["min_edge"]:+.0%} | n={r["n"]:>5,} | '
+              f'hit={r["hit"]:.0%} | roi/b={r["avg_roi"]:+.3f} | toplam={r["total_roi"]:+.1f}')

-            if not pick_name or not confidence:
-                print(f"   ⚠️ No main pick found in prediction.")
-                continue
+    os.makedirs(REPORT_DIR, exist_ok=True)
+    with open(os.path.join(REPORT_DIR, 'backtest_real_odds.json'), 'w') as f:
+        json.dump(all_results, f, indent=2)
+    print(f'\nRapor kaydedildi.')

-            print(f"   🤖 Pick: {pick_name} | Conf: {confidence}% | Odds: {odds}")

-            # 3. Apply Skip Logic (New Backtest Logic)
-            if confidence < MIN_CONF:
-                print(f"   🚫 SKIPPED (Confidence {confidence}% < {MIN_CONF}%)")
-                bets_skipped += 1
-                continue
-
-            if odds > 0:
-                implied_prob = 1.0 / odds
-                my_prob = confidence / 100.0
-                if my_prob - implied_prob < -0.03: # Negative edge
-                    print(f"   🚫 SKIPPED (Negative Edge)")
-                    bets_skipped += 1
-                    continue
-
-            # 4. Bet Played
-            bets_played += 1
-            print(f"   🎲 BET PLAYED: {pick_name} @ {odds}")
-
-            # 5. Resolve Bet
-            won = False
-            # Basic resolution logic (Need to parse pick_name like "1", "X", "2", "2.5 Üst", etc.)
-            pick_clean = str(pick_name).upper()
-            
-            # MS
-            if pick_clean in ["1", "MS 1"] and home_score > away_score: won = True
-            elif pick_clean in ["X", "MS X"] and home_score == away_score: won = True
-            elif pick_clean in ["2", "MS 2"] and away_score > home_score: won = True
-            
-            # OU25
-            elif "ÜST" in pick_clean or "OVER" in pick_clean:
-                if (home_score + away_score) > 2.5: won = True
-            elif "ALT" in pick_clean or "UNDER" in pick_clean:
-                if (home_score + away_score) < 2.5: won = True
-            
-            # BTTS
-            elif "VAR" in pick_clean and home_score > 0 and away_score > 0: won = True
-            elif "YOK" in pick_clean and (home_score == 0 or away_score == 0): won = True
-
-            if won:
-                bets_won += 1
-                profit = odds - 1.0
-                print(f"   ✅ WON! (+{profit:.2f} units)")
-            else:
-                profit = -1.0
-                print(f"   ❌ LOST! (-1.00 units)")
-            
-            total_profit += profit
-
-        except Exception as e:
-            print(f"   💥 Error during analysis: {e}")
-
-    elapsed = time.time() - start_time
-
-    # ─── FINAL REPORT ───
-    print("\n" + "="*60)
-    print("📈 REAL AI BACKTEST RESULTS")
-    print(f"🕒 Time taken: {elapsed:.1f} seconds")
-    print("="*60)
-    print(f"📊 Matches Analyzed: {total_matches_analyzed}")
-    print(f"🚫 Bets SKIPPED: {bets_skipped}")
-    print(f"✅ Bets PLAYED: {bets_played}")
-    
-    if bets_played > 0:
-        win_rate = (bets_won / bets_played) * 100
-        roi = (total_profit / bets_played) * 100
-        yield_val = total_profit  # Net Units
-        
-        print(f"🏆 Bets Won: {bets_won}")
-        print(f"💀 Bets Lost: {bets_played - bets_won}")
-        print("-" * 40)
-        print(f" Win Rate: {win_rate:.2f}%")
-        print(f"💰 Total Profit (Units): {total_profit:.2f}")
-        print(f"📊 ROI: {roi:.2f}%")
-        
-        if roi > 0:
-            print("🟢 STRATEGY IS PROFITABLE!")
-        else:
-            print("🔴 STRATEGY IS LOSING")
-    else:
-        print("⚠️ No bets were played. All were skipped or failed.")
-
-    cur.close()
-    conn.close()
-
-if __name__ == "__main__":
-    run_backtest()
+if __name__ == '__main__':
+    main()