diff --git a/ai-engine/models/v27_predictor.py b/ai-engine/models/v27_predictor.py index 462b7e3..a004e4b 100644 --- a/ai-engine/models/v27_predictor.py +++ b/ai-engine/models/v27_predictor.py @@ -29,7 +29,7 @@ class V27Predictor: 82-feature odds-free vector. """ - MARKETS = ["ms", "ou25"] + MARKETS = ['ms', 'ou25', 'btts'] def __init__(self): self.models: Dict[str, Dict[str, object]] = {} @@ -56,7 +56,7 @@ class V27Predictor: return False # Load models per market - model_types = {"xgb": "xgb", "lgb": "lgb", "cb": "cb"} + model_types = {"xgb": "xgb", "lgb": "lgb"} for market in self.MARKETS: self.models[market] = {} @@ -227,11 +227,63 @@ class V27Predictor: "over": float(avg[1]), } + def predict_btts(self, features: Dict[str, float]) -> Optional[Dict[str, float]]: + """ + Predict Both Teams To Score probabilities. + Returns dict with keys: no, yes. + """ + if not self._loaded or 'btts' not in self.models or not self.models['btts']: + return None + + X = self._build_feature_array(features) + probs_list = [] + + for label, model in self.models['btts'].items(): + proba = self._predict_with_model(model, X, f'BTTS/{label}', expected_classes=2) + if proba is not None and len(proba) == 2: + probs_list.append(proba) + + if not probs_list: + return None + + avg = np.mean(probs_list, axis=0) + return { + 'no': float(avg[0]), + 'yes': float(avg[1]), + } + + def predict_dc(self, features: Dict[str, float]) -> Optional[Dict[str, float]]: + """ + Predict Double Chance probabilities. + + DC is algebraically derived from MS predictions: + 1X = home + draw + X2 = draw + away + 12 = home + away + + This gives an odds-free DC estimate for divergence detection. + """ + ms_probs = self.predict_ms(features) + if not ms_probs: + return None + + home = ms_probs['home'] + draw = ms_probs['draw'] + away = ms_probs['away'] + + return { + '1x': round(home + draw, 4), + 'x2': round(draw + away, 4), + '12': round(home + away, 4), + } + def predict_all(self, features: Dict[str, float]) -> Dict[str, Optional[Dict[str, float]]]: """Run predictions for all supported markets.""" return { - "ms": self.predict_ms(features), - "ou25": self.predict_ou25(features), + 'ms': self.predict_ms(features), + 'ou25': self.predict_ou25(features), + 'btts': self.predict_btts(features), + 'dc': self.predict_dc(features), } diff --git a/ai-engine/reports/training_v25/v25_market_metrics.json b/ai-engine/reports/training_v25/v25_market_metrics.json index b0d6ae5..c96ba63 100644 --- a/ai-engine/reports/training_v25/v25_market_metrics.json +++ b/ai-engine/reports/training_v25/v25_market_metrics.json @@ -1,8 +1,8 @@ { - "trained_at": "2026-04-14 17:20:03", + "trained_at": "2026-05-06 15:53:36", "market_results": { "MS": { - "samples": 9791, + "samples": 106428, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -107,19 +107,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6853, - "val_samples": 1469, - "test_samples": 1469, - "xgb_accuracy": 0.8938, - "xgb_logloss": 0.2263, - "lgb_accuracy": 0.8938, - "lgb_logloss": 0.2214, - "ensemble_accuracy": 0.8945, - "ensemble_logloss": 0.2226, + "train_samples": 74499, + "val_samples": 15964, + "test_samples": 15965, + "xgb_accuracy": 0.5437, + "xgb_logloss": 0.9429, + "lgb_accuracy": 0.5436, + "lgb_logloss": 0.9423, + "ensemble_accuracy": 0.5442, + "ensemble_logloss": 0.9418, "class_count": 3 }, "OU15": { - "samples": 9791, + "samples": 106428, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -224,19 +224,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6853, - "val_samples": 1469, - "test_samples": 1469, - "xgb_accuracy": 0.9088, - "xgb_logloss": 0.1758, - "lgb_accuracy": 0.9067, - "lgb_logloss": 0.1783, - "ensemble_accuracy": 0.9108, - "ensemble_logloss": 0.1753, + "train_samples": 74499, + "val_samples": 15964, + "test_samples": 15965, + "xgb_accuracy": 0.753, + "xgb_logloss": 0.5256, + "lgb_accuracy": 0.7523, + "lgb_logloss": 0.5262, + "ensemble_accuracy": 0.7533, + "ensemble_logloss": 0.5254, "class_count": 2 }, "OU25": { - "samples": 9791, + "samples": 106428, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -341,19 +341,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6853, - "val_samples": 1469, - "test_samples": 1469, - "xgb_accuracy": 0.9204, - "xgb_logloss": 0.1535, - "lgb_accuracy": 0.9224, - "lgb_logloss": 0.1523, - "ensemble_accuracy": 0.9217, - "ensemble_logloss": 0.1518, + "train_samples": 74499, + "val_samples": 15964, + "test_samples": 15965, + "xgb_accuracy": 0.6253, + "xgb_logloss": 0.635, + "lgb_accuracy": 0.6246, + "lgb_logloss": 0.6347, + "ensemble_accuracy": 0.6262, + "ensemble_logloss": 0.6343, "class_count": 2 }, "OU35": { - "samples": 9791, + "samples": 106428, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -458,19 +458,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6853, - "val_samples": 1469, - "test_samples": 1469, - "xgb_accuracy": 0.9578, - "xgb_logloss": 0.1171, - "lgb_accuracy": 0.9564, - "lgb_logloss": 0.1144, - "ensemble_accuracy": 0.9571, - "ensemble_logloss": 0.1149, + "train_samples": 74499, + "val_samples": 15964, + "test_samples": 15965, + "xgb_accuracy": 0.7283, + "xgb_logloss": 0.5463, + "lgb_accuracy": 0.7304, + "lgb_logloss": 0.546, + "ensemble_accuracy": 0.7297, + "ensemble_logloss": 0.5456, "class_count": 2 }, "BTTS": { - "samples": 9791, + "samples": 106428, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -575,19 +575,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6853, - "val_samples": 1469, - "test_samples": 1469, - "xgb_accuracy": 0.9238, - "xgb_logloss": 0.1439, - "lgb_accuracy": 0.9265, - "lgb_logloss": 0.143, - "ensemble_accuracy": 0.9265, - "ensemble_logloss": 0.1424, + "train_samples": 74499, + "val_samples": 15964, + "test_samples": 15965, + "xgb_accuracy": 0.5894, + "xgb_logloss": 0.6636, + "lgb_accuracy": 0.5928, + "lgb_logloss": 0.6633, + "ensemble_accuracy": 0.5897, + "ensemble_logloss": 0.6628, "class_count": 2 }, "HT_RESULT": { - "samples": 9786, + "samples": 103208, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -692,19 +692,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6850, - "val_samples": 1468, - "test_samples": 1468, - "xgb_accuracy": 0.5627, - "xgb_logloss": 0.8712, - "lgb_accuracy": 0.5715, - "lgb_logloss": 0.8649, - "ensemble_accuracy": 0.5811, - "ensemble_logloss": 0.8649, + "train_samples": 72245, + "val_samples": 15481, + "test_samples": 15482, + "xgb_accuracy": 0.4695, + "xgb_logloss": 1.0174, + "lgb_accuracy": 0.4677, + "lgb_logloss": 1.0166, + "ensemble_accuracy": 0.4688, + "ensemble_logloss": 1.0164, "class_count": 3 }, "HT_OU05": { - "samples": 9786, + "samples": 103208, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -809,19 +809,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6850, - "val_samples": 1468, - "test_samples": 1468, - "xgb_accuracy": 0.7221, - "xgb_logloss": 0.5122, - "lgb_accuracy": 0.7268, - "lgb_logloss": 0.5092, - "ensemble_accuracy": 0.7275, - "ensemble_logloss": 0.5084, + "train_samples": 72245, + "val_samples": 15481, + "test_samples": 15482, + "xgb_accuracy": 0.7011, + "xgb_logloss": 0.5939, + "lgb_accuracy": 0.7002, + "lgb_logloss": 0.5936, + "ensemble_accuracy": 0.7009, + "ensemble_logloss": 0.5932, "class_count": 2 }, "HT_OU15": { - "samples": 9786, + "samples": 103208, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -926,19 +926,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6850, - "val_samples": 1468, - "test_samples": 1468, - "xgb_accuracy": 0.752, - "xgb_logloss": 0.5252, - "lgb_accuracy": 0.7595, - "lgb_logloss": 0.5213, - "ensemble_accuracy": 0.7595, - "ensemble_logloss": 0.5192, + "train_samples": 72245, + "val_samples": 15481, + "test_samples": 15482, + "xgb_accuracy": 0.6723, + "xgb_logloss": 0.6126, + "lgb_accuracy": 0.6736, + "lgb_logloss": 0.6118, + "ensemble_accuracy": 0.6734, + "ensemble_logloss": 0.6117, "class_count": 2 }, "HTFT": { - "samples": 9786, + "samples": 103208, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -1043,19 +1043,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6850, - "val_samples": 1468, - "test_samples": 1468, - "xgb_accuracy": 0.5136, - "xgb_logloss": 1.1384, - "lgb_accuracy": 0.5184, - "lgb_logloss": 1.1469, - "ensemble_accuracy": 0.5143, - "ensemble_logloss": 1.1339, + "train_samples": 72245, + "val_samples": 15481, + "test_samples": 15482, + "xgb_accuracy": 0.3337, + "xgb_logloss": 1.8208, + "lgb_accuracy": 0.3332, + "lgb_logloss": 1.8203, + "ensemble_accuracy": 0.3358, + "ensemble_logloss": 1.8186, "class_count": 9 }, "ODD_EVEN": { - "samples": 9791, + "samples": 106428, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -1160,19 +1160,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6853, - "val_samples": 1469, - "test_samples": 1469, - "xgb_accuracy": 0.8863, - "xgb_logloss": 0.3565, - "lgb_accuracy": 0.8802, - "lgb_logloss": 0.3338, - "ensemble_accuracy": 0.8863, - "ensemble_logloss": 0.3423, + "train_samples": 74499, + "val_samples": 15964, + "test_samples": 15965, + "xgb_accuracy": 0.5296, + "xgb_logloss": 0.6841, + "lgb_accuracy": 0.5359, + "lgb_logloss": 0.6822, + "ensemble_accuracy": 0.531, + "ensemble_logloss": 0.6826, "class_count": 2 }, "CARDS_OU45": { - "samples": 9791, + "samples": 106428, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -1277,19 +1277,19 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6853, - "val_samples": 1469, - "test_samples": 1469, - "xgb_accuracy": 0.6283, - "xgb_logloss": 0.6174, - "lgb_accuracy": 0.6413, - "lgb_logloss": 0.615, - "ensemble_accuracy": 0.6372, - "ensemble_logloss": 0.6142, + "train_samples": 74499, + "val_samples": 15964, + "test_samples": 15965, + "xgb_accuracy": 0.6009, + "xgb_logloss": 0.6489, + "lgb_accuracy": 0.5988, + "lgb_logloss": 0.6487, + "ensemble_accuracy": 0.6024, + "ensemble_logloss": 0.6479, "class_count": 2 }, "HANDICAP_MS": { - "samples": 9791, + "samples": 106428, "features_used": [ "home_overall_elo", "away_overall_elo", @@ -1394,15 +1394,15 @@ "home_goals_form", "away_goals_form" ], - "train_samples": 6853, - "val_samples": 1469, - "test_samples": 1469, - "xgb_accuracy": 0.936, - "xgb_logloss": 0.1903, - "lgb_accuracy": 0.9346, - "lgb_logloss": 0.1843, - "ensemble_accuracy": 0.936, - "ensemble_logloss": 0.1861, + "train_samples": 74499, + "val_samples": 15964, + "test_samples": 15965, + "xgb_accuracy": 0.6058, + "xgb_logloss": 0.8691, + "lgb_accuracy": 0.608, + "lgb_logloss": 0.8677, + "ensemble_accuracy": 0.6068, + "ensemble_logloss": 0.8677, "class_count": 3 } } diff --git a/ai-engine/scripts/backtest_day.py b/ai-engine/scripts/backtest_day.py new file mode 100644 index 0000000..fe8b39b --- /dev/null +++ b/ai-engine/scripts/backtest_day.py @@ -0,0 +1,146 @@ +import os +import sys +import psycopg2 +from psycopg2.extras import RealDictCursor + +# Path ayarları +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from services.single_match_orchestrator import SingleMatchOrchestrator +from services.feature_enrichment import FeatureEnrichmentService + +DSN = "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db" + +def run_backtest(target_date="2026-05-03"): + conn = psycopg2.connect(DSN) + cur = conn.cursor(cursor_factory=RealDictCursor) + + # 1. Hedef tarihteki bitmiş maçları ve takım isimlerini getir + cur.execute(""" + SELECT m.id, m.score_home, m.score_away, m.mst_utc, + t1.name as home_name, t2.name as away_name + FROM matches m + LEFT JOIN teams t1 ON m.home_team_id = t1.id + LEFT JOIN teams t2 ON m.away_team_id = t2.id + WHERE m.status IN ('FT', 'AET', 'PEN') + AND to_timestamp(m.mst_utc / 1000.0)::date = %s::date + AND m.score_home IS NOT NULL + ORDER BY m.mst_utc ASC + """, (target_date,)) + matches = cur.fetchall() + + if not matches: + print(f"❌ {target_date} tarihinde bitmiş maç bulunamadı.") + return + + print(f"🚀 {target_date} için Orkestratör Backtesti Başlatılıyor... ({len(matches)} maç bulundu)") + print("-" * 60) + + orchestrator = SingleMatchOrchestrator() + + bets_placed = 0 + won = 0 + lost = 0 + total_odds_won = 0.0 + + for match in matches: + # 3. Üst Akıl (Orkestratör) analizi yapar + try: + package = orchestrator.analyze_match(match['id']) + except Exception as e: + print(f"Hata ({match['id']}): {e}") + continue + + if not package: + continue + + package_data = package + + # 4. Üst akıl bu maça bahis yapmaya karar verdi mi? + bet_advice = package_data.get("bet_advice", {}) + if bet_advice.get("playable") == True: + bets_placed += 1 + main_pick = package_data.get("main_pick", {}) + market = main_pick.get("market") + pick = main_pick.get("pick") + odds = float(main_pick.get("odds", 0.0) or 0.0) + + # Skora göre kazanıp kazanmadığını kontrol et + is_won = False + h = match['score_home'] + a = match['score_away'] + + if market == "MS": + if pick == "1" and h > a: is_won = True + elif pick in ("X", "0") and h == a: is_won = True + elif pick == "2" and a > h: is_won = True + elif market == "OU25": + if pick == "Üst" and (h+a) > 2.5: is_won = True + elif pick == "Alt" and (h+a) < 2.5: is_won = True + elif market == "OU15": + if pick == "Üst" and (h+a) > 1.5: is_won = True + elif pick == "Alt" and (h+a) < 1.5: is_won = True + elif market == "BTTS": + if pick == "KG Var" and h > 0 and a > 0: is_won = True + elif pick == "KG Yok" and (h == 0 or a == 0): is_won = True + elif market == "DC": + if pick == "1X" and h >= a: is_won = True + elif pick == "12" and h != a: is_won = True + elif pick == "X2" and h <= a: is_won = True + + if is_won: + won += 1 + total_odds_won += odds + res = "✅ KAZANDI" + else: + lost += 1 + res = "❌ KAYBETTİ" + + print(f"[{res}] {match['home_name']} {h}-{a} {match['away_name']} | Tahmin: {market} {pick} (Oran: {odds})") + else: + main_pick = package_data.get("main_pick", {}) + reasons = main_pick.get("reasons", ["Bilinmeyen Neden"]) if main_pick else ["No main pick"] + reason = " | ".join(reasons) if isinstance(reasons, list) else str(reasons) + + market_board = package_data.get("market_board", {}) + main_pick_market = main_pick.get('market', 'N/A') if main_pick else 'N/A' + main_pick_pick = main_pick.get('pick', 'N/A') if main_pick else 'N/A' + print(f"[PAS] {match['home_name']} {match['score_home']}-{match['score_away']} {match['away_name']} | Reddedilen: {main_pick_market} {main_pick_pick} -> Neden: {reason}") + if "market_passed_all_gates" in reason: + print(f" DEBUG: bet_advice = {bet_advice}") + + v25_ms = market_board.get("MS", {}).get("probs", {}) + v27_ms = {} # V27 is merged into V25 probabilities in market_board, or we don't have separate V27 access here + + # Skora göre ms kontrolü + h = match['score_home'] + a = match['score_away'] + actual_ms = "1" if h > a else ("X" if h == a else "2") + + v25_top = max(v25_ms, key=v25_ms.get) if v25_ms else "N/A" + v27_top = "N/A" + + rejected_market = main_pick.get("market", "N/A") if main_pick else "N/A" + rejected_pick = main_pick.get("pick", "N/A") if main_pick else "N/A" + + print(f"[PAS] {match['home_name']} {h}-{a} {match['away_name']} | Reddedilen: {rejected_market} {rejected_pick} -> Neden: {reason}") + print(f" [V25 MS Raw: {v25_top}] [Gerçek MS: {actual_ms}]") + + # Sonuç Raporu + print("\n" + "=" * 60) + print(f"📊 BACKTEST SONUÇLARI ({target_date})") + print("=" * 60) + print(f"Toplam Maç Sayısı : {len(matches)}") + print(f"Oynanan Bahis Sayısı: {bets_placed} (Oynama Oranı: %{bets_placed/len(matches)*100:.1f})") + print(f"Riskli Bulunup Pas Geçilen: {len(matches) - bets_placed}") + + if bets_placed > 0: + win_rate = won / bets_placed * 100 + roi = ((total_odds_won - bets_placed) / bets_placed) * 100 + print(f"Kazanılan : {won}") + print(f"Kaybedilen : {lost}") + print(f"İsabet Oranı : %{win_rate:.1f}") + print(f"Net Kar (ROI) : %{roi:.1f} {'📈' if roi > 0 else '📉'}") + +if __name__ == "__main__": + run_backtest("2026-05-03") diff --git a/ai-engine/scripts/enrich_ai_features.py b/ai-engine/scripts/enrich_ai_features.py new file mode 100644 index 0000000..2c3d268 --- /dev/null +++ b/ai-engine/scripts/enrich_ai_features.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 +""" +AI Features Full Enrichment Script +==================================== +Fills empty/default columns in football_ai_features that were not populated +by the original elo_backfill_v1 script. + +Enriches: H2H, referee, team_stats, league_averages, form_streaks, + rolling_goals, implied_odds, and clean_sheet/scoring rates. + +Usage: + python scripts/enrich_ai_features.py # enrich all + python scripts/enrich_ai_features.py --batch-size 500 # smaller batches + python scripts/enrich_ai_features.py --dry-run # preview only + python scripts/enrich_ai_features.py --force # re-enrich all rows + python scripts/enrich_ai_features.py --limit 1000 # process N rows max + +Designed to be idempotent: uses ON CONFLICT upserts, skips already-enriched rows. +""" + +from __future__ import annotations + +import os +import sys +import time +import argparse +from typing import Any, Dict, List, Optional, Tuple + +# Add ai-engine root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import psycopg2 +from psycopg2.extras import RealDictCursor, execute_values + +from data.db import get_clean_dsn +from services.feature_enrichment import FeatureEnrichmentService + +# ────────────────────────── constants ────────────────────────── + +CALCULATOR_VER = 'enrichment_v2.0' +DEFAULT_BATCH_SIZE = 200 + + +# ────────────────────────── helpers ──────────────────────────── + +def fetch_unenriched_matches( + conn: psycopg2.extensions.connection, + force: bool = False, + limit: Optional[int] = None, +) -> List[Dict[str, Any]]: + """ + Fetch matches from football_ai_features that still have default values + in the enrichment columns (h2h_total=0 AND referee_avg_cards=0). + + If force=True, fetches ALL rows regardless of current state. + """ + with conn.cursor(cursor_factory=RealDictCursor) as cur: + where_clause = "WHERE 1=1" if force else ( + "WHERE (faf.h2h_total = 0 AND faf.referee_avg_cards = 0)" + ) + limit_clause = f"LIMIT {limit}" if limit else "" + + cur.execute(f""" + SELECT + faf.match_id, + m.home_team_id, + m.away_team_id, + m.mst_utc, + m.league_id, + m.score_home, + m.score_away + FROM football_ai_features faf + JOIN matches m ON m.id = faf.match_id + WHERE m.status = 'FT' + AND m.score_home IS NOT NULL + AND m.sport = 'football' + AND ({where_clause.replace('WHERE ', '')}) + ORDER BY m.mst_utc ASC + {limit_clause} + """) + return cur.fetchall() + + +def fetch_referee_for_match( + cur: RealDictCursor, + match_id: str, +) -> Optional[str]: + """Get the head referee name for a match from match_officials.""" + try: + cur.execute(""" + SELECT mo.name + FROM match_officials mo + WHERE mo.match_id = %s + AND mo.role_id = 1 + LIMIT 1 + """, (match_id,)) + row = cur.fetchone() + return row['name'] if row else None + except Exception: + return None + + +def fetch_implied_odds( + cur: RealDictCursor, + match_id: str, +) -> Dict[str, float]: + """Get implied probabilities from odd_categories + odd_selections.""" + defaults = { + 'implied_home': 0.33, + 'implied_draw': 0.33, + 'implied_away': 0.33, + 'implied_over25': 0.50, + 'implied_btts_yes': 0.50, + 'odds_overround': 0.0, + } + try: + cur.execute(""" + SELECT oc.name AS cat_name, os.name AS sel_name, os.odd_value + FROM odd_selections os + JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id + WHERE oc.match_id = %s + """, (match_id,)) + rows = cur.fetchall() + except Exception: + return defaults + + odds: Dict[str, float] = {} + for row in rows: + try: + cat = (row.get('cat_name') or '').lower().strip() + sel = (row.get('sel_name') or '').strip() + val = float(row.get('odd_value', 0)) + if val <= 0: + continue + + if cat == 'maç sonucu': + if sel == '1': + odds['ms_h'] = val + elif sel in ('0', 'X'): + odds['ms_d'] = val + elif sel == '2': + odds['ms_a'] = val + elif cat == '2,5 alt/üst': + if 'üst' in sel.lower(): + odds['ou25_o'] = val + elif 'alt' in sel.lower(): + odds['ou25_u'] = val + elif cat == 'karşılıklı gol': + if 'var' in sel.lower(): + odds['btts_y'] = val + elif 'yok' in sel.lower(): + odds['btts_n'] = val + except (ValueError, TypeError): + continue + + # Compute implied probabilities + ms_h = odds.get('ms_h', 0) + ms_d = odds.get('ms_d', 0) + ms_a = odds.get('ms_a', 0) + + if ms_h > 1.0 and ms_d > 1.0 and ms_a > 1.0: + raw_sum = 1 / ms_h + 1 / ms_d + 1 / ms_a + overround = raw_sum - 1.0 + defaults['implied_home'] = round((1 / ms_h) / raw_sum, 4) + defaults['implied_draw'] = round((1 / ms_d) / raw_sum, 4) + defaults['implied_away'] = round((1 / ms_a) / raw_sum, 4) + defaults['odds_overround'] = round(overround, 4) + + ou25_o = odds.get('ou25_o', 0) + ou25_u = odds.get('ou25_u', 0) + if ou25_o > 1.0 and ou25_u > 1.0: + raw_sum = 1 / ou25_o + 1 / ou25_u + defaults['implied_over25'] = round((1 / ou25_o) / raw_sum, 4) + + btts_y = odds.get('btts_y', 0) + btts_n = odds.get('btts_n', 0) + if btts_y > 1.0 and btts_n > 1.0: + raw_sum = 1 / btts_y + 1 / btts_n + defaults['implied_btts_yes'] = round((1 / btts_y) / raw_sum, 4) + + return defaults + + +def enrich_single_match( + enrichment: FeatureEnrichmentService, + cur: RealDictCursor, + match: Dict[str, Any], +) -> Dict[str, Any]: + """ + Compute all enrichment features for a single match and return + a dict ready for DB upsert. + """ + match_id = match['match_id'] + home_id = str(match['home_team_id']) + away_id = str(match['away_team_id']) + mst_utc = int(match['mst_utc']) if match['mst_utc'] else 0 + league_id = str(match['league_id']) if match['league_id'] else None + + # 1. Team stats + home_stats = enrichment.compute_team_stats(cur, home_id, mst_utc) + away_stats = enrichment.compute_team_stats(cur, away_id, mst_utc) + + # 2. H2H + h2h = enrichment.compute_h2h(cur, home_id, away_id, mst_utc) + + # 3. Form & streaks + home_form = enrichment.compute_form_streaks(cur, home_id, mst_utc) + away_form = enrichment.compute_form_streaks(cur, away_id, mst_utc) + + # 4. Referee + referee_name = fetch_referee_for_match(cur, match_id) + referee = enrichment.compute_referee_stats(cur, referee_name, mst_utc) + + # 5. League averages + league = enrichment.compute_league_averages(cur, league_id, mst_utc) + + # 6. Rolling stats (for goals avg) + home_rolling = enrichment.compute_rolling_stats(cur, home_id, mst_utc) + away_rolling = enrichment.compute_rolling_stats(cur, away_id, mst_utc) + + # 7. Implied odds + implied = fetch_implied_odds(cur, match_id) + + return { + 'match_id': match_id, + # Team stats + 'home_avg_possession': round(home_stats['avg_possession'], 2), + 'away_avg_possession': round(away_stats['avg_possession'], 2), + 'home_avg_shots_on_target': round(home_stats['avg_shots_on_target'], 2), + 'away_avg_shots_on_target': round(away_stats['avg_shots_on_target'], 2), + 'home_shot_conversion': round(home_stats['shot_conversion'], 4), + 'away_shot_conversion': round(away_stats['shot_conversion'], 4), + 'home_avg_corners': round(home_stats['avg_corners'], 2), + 'away_avg_corners': round(away_stats['avg_corners'], 2), + # H2H + 'h2h_total': h2h['total_matches'], + 'h2h_home_win_rate': round(h2h['home_win_rate'], 4), + 'h2h_avg_goals': round(h2h['avg_goals'], 2), + 'h2h_over25_rate': round(h2h['over25_rate'], 4), + 'h2h_btts_rate': round(h2h['btts_rate'], 4), + # Form + 'home_clean_sheet_rate': round(home_form['clean_sheet_rate'], 4), + 'away_clean_sheet_rate': round(away_form['clean_sheet_rate'], 4), + 'home_scoring_rate': round(home_form['scoring_rate'], 4), + 'away_scoring_rate': round(away_form['scoring_rate'], 4), + 'home_win_streak': home_form['winning_streak'], + 'away_win_streak': away_form['winning_streak'], + # Rolling goals + 'home_goals_avg_5': round(home_rolling['rolling5_goals'], 2), + 'away_goals_avg_5': round(away_rolling['rolling5_goals'], 2), + 'home_conceded_avg_5': round(home_rolling['rolling5_conceded'], 2), + 'away_conceded_avg_5': round(away_rolling['rolling5_conceded'], 2), + # Referee + 'referee_avg_cards': round(referee['cards_total'], 2), + 'referee_home_bias': round(referee['home_bias'], 4), + 'referee_avg_goals': round(referee['avg_goals'], 2), + # League + 'league_avg_goals': round(league['avg_goals'], 2), + 'league_home_win_pct': round(league['home_win_rate'], 4), + 'league_over25_pct': round(league['ou25_rate'], 4), + # Implied odds + 'implied_home': implied['implied_home'], + 'implied_draw': implied['implied_draw'], + 'implied_away': implied['implied_away'], + 'implied_over25': implied['implied_over25'], + 'implied_btts_yes': implied['implied_btts_yes'], + 'odds_overround': implied['odds_overround'], + # Missing players impact — default (no lineup data for historical) + 'missing_players_impact': 0.0, + # Version + 'calculator_ver': CALCULATOR_VER, + } + + +def flush_enrichment_batch( + conn: psycopg2.extensions.connection, + rows: List[Dict[str, Any]], + dry_run: bool, +) -> int: + """Bulk upsert enriched features into football_ai_features.""" + if not rows or dry_run: + return 0 + + columns = [ + 'match_id', + 'home_avg_possession', 'away_avg_possession', + 'home_avg_shots_on_target', 'away_avg_shots_on_target', + 'home_shot_conversion', 'away_shot_conversion', + 'home_avg_corners', 'away_avg_corners', + 'h2h_total', 'h2h_home_win_rate', 'h2h_avg_goals', + 'h2h_over25_rate', 'h2h_btts_rate', + 'home_clean_sheet_rate', 'away_clean_sheet_rate', + 'home_scoring_rate', 'away_scoring_rate', + 'home_win_streak', 'away_win_streak', + 'home_goals_avg_5', 'away_goals_avg_5', + 'home_conceded_avg_5', 'away_conceded_avg_5', + 'referee_avg_cards', 'referee_home_bias', 'referee_avg_goals', + 'league_avg_goals', 'league_home_win_pct', 'league_over25_pct', + 'implied_home', 'implied_draw', 'implied_away', + 'implied_over25', 'implied_btts_yes', 'odds_overround', + 'missing_players_impact', 'calculator_ver', + ] + + # Build update SET clause (skip match_id) + update_cols = [c for c in columns if c != 'match_id'] + set_clause = ', '.join(f'{c} = EXCLUDED.{c}' for c in update_cols) + + placeholders = ', '.join(['%s'] * len(columns)) + values = [ + tuple(row[c] for c in columns) + for row in rows + ] + + with conn.cursor() as cur: + execute_values( + cur, + f""" + INSERT INTO football_ai_features ({', '.join(columns)}) + VALUES %s + ON CONFLICT (match_id) DO UPDATE SET + {set_clause}, + updated_at = NOW() + """, + values, + template=f"({placeholders})", + page_size=200, + ) + conn.commit() + return len(rows) + + +# ────────────────────────── main ─────────────────────────────── + +def run_enrichment( + batch_size: int, + dry_run: bool, + force: bool, + limit: Optional[int], +) -> None: + """Core enrichment loop.""" + dsn = get_clean_dsn() + conn = psycopg2.connect(dsn) + + print(f"\n{'=' * 60}") + print(f"🧠 AI Features Full Enrichment — {CALCULATOR_VER}") + print(f" batch_size={batch_size} dry_run={dry_run} force={force}") + print(f"{'=' * 60}") + + # 1. Fetch unenriched matches + t0 = time.time() + matches = fetch_unenriched_matches(conn, force=force, limit=limit) + print(f"\n📊 {len(matches):,} matches to enrich ({time.time() - t0:.1f}s)") + + if not matches: + print("✅ Nothing to enrich — all rows already populated.") + conn.close() + return + + # 2. Initialize enrichment service + enrichment = FeatureEnrichmentService() + + # 3. Process in batches + total = len(matches) + processed = 0 + written = 0 + errors = 0 + batch_buf: List[Dict[str, Any]] = [] + t_start = time.time() + + # Use a dedicated cursor with RealDictCursor for all enrichment queries + enrich_cur = conn.cursor(cursor_factory=RealDictCursor) + + for idx, match in enumerate(matches): + try: + enriched = enrich_single_match(enrichment, enrich_cur, match) + batch_buf.append(enriched) + except Exception as e: + errors += 1 + if errors <= 10: + print(f" ⚠️ Error enriching {match.get('match_id', '?')}: {e}") + + processed += 1 + + # Flush batch + if len(batch_buf) >= batch_size: + flushed = flush_enrichment_batch(conn, batch_buf, dry_run) + written += flushed + batch_buf.clear() + + # Progress reporting + if processed % 500 == 0: + elapsed = time.time() - t_start + rate = processed / elapsed if elapsed > 0 else 0 + remaining = (total - processed) / rate if rate > 0 else 0 + pct = processed / total * 100 + print( + f" [{processed:>8,} / {total:,}] " + f"({pct:.1f}%) | {rate:.0f} matches/s | " + f"ETA: {remaining / 60:.1f} min | " + f"errors: {errors}" + ) + + # Flush remaining + if batch_buf: + flushed = flush_enrichment_batch(conn, batch_buf, dry_run) + written += flushed + + enrich_cur.close() + + elapsed = time.time() - t_start + print(f"\n{'=' * 60}") + print(f"✅ Enrichment complete:") + print(f" Processed: {processed:,} matches in {elapsed:.1f}s") + print(f" Written: {written:,} rows") + print(f" Errors: {errors:,}") + print(f" Rate: {processed / elapsed:.0f} matches/s") + print(f"{'=' * 60}") + + conn.close() + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Enrich football_ai_features with H2H, referee, stats, and odds data" + ) + parser.add_argument( + '--batch-size', + type=int, + default=DEFAULT_BATCH_SIZE, + help=f'DB insert batch size (default: {DEFAULT_BATCH_SIZE})', + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Compute features but do not write to DB', + ) + parser.add_argument( + '--force', + action='store_true', + help='Re-enrich ALL rows, not just empty ones', + ) + parser.add_argument( + '--limit', + type=int, + default=None, + help='Max number of matches to process', + ) + args = parser.parse_args() + + run_enrichment( + batch_size=args.batch_size, + dry_run=args.dry_run, + force=args.force, + limit=args.limit, + ) + + +if __name__ == '__main__': + main() diff --git a/ai-engine/scripts/extract_training_data.py b/ai-engine/scripts/extract_training_data.py index 99819f5..053004c 100755 --- a/ai-engine/scripts/extract_training_data.py +++ b/ai-engine/scripts/extract_training_data.py @@ -510,16 +510,24 @@ class FeatureExtractor: self.referee_engine = get_referee_engine() self.momentum_engine = get_momentum_engine() + # ── Data Quality Thresholds ── + # Matches below these thresholds produce default-only features that + # teach the model noise rather than signal. + DQ_MIN_FORM_MATCHES = 3 # team must have ≥3 prior matches + DQ_MIN_FEATURE_COVERAGE = 0.30 # ≥30% of key features must be non-default + def extract_all(self) -> list: - """Extract features for all matches, yield row dicts.""" + """Extract features for all matches with data quality validation.""" matches = self.loader.matches total = len(matches) rows = [] skipped = 0 + dq_rejected = 0 + dq_reasons: dict = defaultdict(int) t_start = time.time() - + print(f"\n🔄 Extracting features for {total} matches...", flush=True) - + # Process chronologically — ELO grows as we go for i, m in enumerate(matches): ( @@ -536,38 +544,43 @@ class FeatureExtractor: away_name, league_name, ) = m - + if i % 100 == 0 and i > 0: elapsed = time.time() - t_start rate = i / elapsed # matches per second remaining = (total - i) / rate if rate > 0 else 0 pct = i / total * 100 - print(f" [{i}/{total}] ({pct:.0f}%) | {rate:.1f} maç/s | ETA: {remaining/60:.1f} dk | skipped: {skipped}", flush=True) - + print( + f" [{i}/{total}] ({pct:.0f}%) | {rate:.1f} maç/s | " + f"ETA: {remaining/60:.1f} dk | skipped: {skipped} | " + f"dq_rejected: {dq_rejected}", + flush=True, + ) + row = self._extract_one( - mid, - hid, - aid, - sh, - sa, - hth, - hta, - mst, - lid, - home_name, - away_name, - league_name, + mid, hid, aid, sh, sa, hth, hta, mst, lid, + home_name, away_name, league_name, ) - + if row: - rows.append(row) + # ── Data Quality Gate ── + dq_pass, reason = self._validate_row_quality(row, hid, aid, mst) + if dq_pass: + rows.append(row) + else: + dq_rejected += 1 + dq_reasons[reason] += 1 else: skipped += 1 - + # Update ELO after processing (so ELO is calculated BEFORE the match) self._update_elo(hid, aid, sh, sa) - - print(f" ✅ Extracted {len(rows)} rows, skipped {skipped}", flush=True) + + print(f" ✅ Extracted {len(rows)} rows, skipped {skipped}, DQ rejected {dq_rejected}", flush=True) + if dq_reasons: + print(f" 📊 DQ Rejection reasons:") + for reason, count in sorted(dq_reasons.items(), key=lambda x: -x[1]): + print(f" {reason}: {count}") return rows def _extract_one( @@ -867,7 +880,58 @@ class FeatureExtractor: } return row - + + def _validate_row_quality( + self, + row: dict, + home_id: str, + away_id: str, + before_date: int, + ) -> tuple: + """ + Data quality gate for training rows. + + Ensures the feature vector has enough real signal to be useful for + training. Rejects rows where critical features are all at their + default/fallback values — these teach the model noise, not patterns. + + Returns (pass: bool, reason: str | None). + """ + # 1. Minimum form history: both teams must have enough prior matches + home_history = self.loader.team_matches.get(home_id, []) + away_history = self.loader.team_matches.get(away_id, []) + home_prior = sum(1 for m in home_history if m[0] < before_date) + away_prior = sum(1 for m in away_history if m[0] < before_date) + + if home_prior < self.DQ_MIN_FORM_MATCHES: + return False, 'home_insufficient_history' + if away_prior < self.DQ_MIN_FORM_MATCHES: + return False, 'away_insufficient_history' + + # 2. Feature coverage check: count how many key features are non-default + key_features = [ + ('home_goals_avg', 1.3), + ('away_goals_avg', 1.3), + ('home_clean_sheet_rate', 0.25), + ('away_clean_sheet_rate', 0.25), + ('home_avg_possession', 0.50), + ('away_avg_possession', 0.50), + ('home_avg_shots_on_target', 3.5), + ('away_avg_shots_on_target', 3.5), + ('h2h_total_matches', 0), + ('odds_ms_h', 0.0), + ] + non_default = sum( + 1 for feat_name, default_val in key_features + if abs(float(row.get(feat_name, default_val)) - default_val) > 0.01 + ) + coverage = non_default / len(key_features) + + if coverage < self.DQ_MIN_FEATURE_COVERAGE: + return False, f'low_feature_coverage_{coverage:.0%}' + + return True, None + # ------------------------------------------------------------------------- # ELO (simplified inline version — doesn't need DB, grows incrementally) # ------------------------------------------------------------------------- diff --git a/ai-engine/scripts/train_v27_value_sniper.py b/ai-engine/scripts/train_v27_value_sniper.py index f9449f8..e40a0ee 100644 --- a/ai-engine/scripts/train_v27_value_sniper.py +++ b/ai-engine/scripts/train_v27_value_sniper.py @@ -20,7 +20,7 @@ from sklearn.isotonic import IsotonicRegression warnings.filterwarnings("ignore") AI_DIR = Path(__file__).resolve().parent.parent -DATA_CSV = AI_DIR / "data" / "training_data_v27.csv" +DATA_CSV = AI_DIR / "data" / "training_data.csv" MODELS_DIR = AI_DIR / "models" / "v27" MODELS_DIR.mkdir(parents=True, exist_ok=True) @@ -373,15 +373,52 @@ def main(): print("\n" + "─"*65) print(" STAGE A.2: Fundamentals-Only O/U 2.5 Model") print("─"*65) - y_tr_ou = tr["label_ou25"].values - y_va_ou = va["label_ou25"].values + y_tr_ou = tr['label_ou25'].values + y_va_ou = va['label_ou25'].values mask_tr = ~np.isnan(y_tr_ou) mask_va = ~np.isnan(y_va_ou) if mask_tr.sum() > 1000: ou_models = train_fundamentals_model( X_tr[mask_tr], y_tr_ou[mask_tr].astype(int), X_va[mask_va], y_va_ou[mask_va].astype(int), - clean_feats, "ou25") + clean_feats, 'ou25') + + # ── STAGE A.3: BTTS Model ── + btts_models = None + if 'label_btts' in tr.columns: + print('\n' + '─' * 65) + print(' STAGE A.3: Fundamentals-Only BTTS Model') + print('─' * 65) + y_tr_btts = tr['label_btts'].values + y_va_btts = va['label_btts'].values + mask_tr_btts = ~np.isnan(y_tr_btts) + mask_va_btts = ~np.isnan(y_va_btts) + if mask_tr_btts.sum() > 1000: + btts_models = train_fundamentals_model( + X_tr[mask_tr_btts], y_tr_btts[mask_tr_btts].astype(int), + X_va[mask_va_btts], y_va_btts[mask_va_btts].astype(int), + clean_feats, 'btts') + + # Quick val accuracy + btts_probs = ensemble_predict( + btts_models, + X_va[mask_va_btts], + clean_feats, + n_class=2, + ) + btts_acc = accuracy_score( + y_va_btts[mask_va_btts].astype(int), + btts_probs.argmax(1), + ) + btts_ll = log_loss( + y_va_btts[mask_va_btts].astype(int), + btts_probs, + ) + print(f'\n BTTS Ensemble Val: acc={btts_acc:.4f}, logloss={btts_ll:.4f}') + # Compare with naive baseline (always predict majority class) + btts_majority = y_va_btts[mask_va_btts].astype(int).mean() + print(f' BTTS baseline: {max(btts_majority, 1-btts_majority):.4f} (majority class)') + print(f' Model vs baseline: {btts_acc - max(btts_majority, 1-btts_majority):+.4f}') # ── STAGE C: Backtest ── print("\n" + "─"*65) @@ -422,13 +459,58 @@ def main(): # OU25 backtest if ou_models: - print("\n --- O/U 2.5 Backtest ---") + print('\n --- O/U 2.5 Backtest ---') for edge in [0.05, 0.07, 0.10]: - r = backtest_value(ou_models, te, clean_feats, "ou25", + r = backtest_value(ou_models, te, clean_feats, 'ou25', min_edge=edge, min_odds=1.50, max_odds=3.0, use_kelly=True) - if r.get("total", 0) > 0: - print_backtest(r, f"OU25 edge>{edge}") + if r.get('total', 0) > 0: + print_backtest(r, f'OU25 edge>{edge}') + + # BTTS backtest + if btts_models and 'label_btts' in te.columns: + print('\n --- BTTS Backtest ---') + # Build BTTS odds for backtest + if 'odds_btts_y' in te.columns and 'odds_btts_n' in te.columns: + te_btts = te.copy() + te_btts['odds_btts_y'] = pd.to_numeric( + te_btts['odds_btts_y'], errors='coerce', + ).fillna(1.85) + te_btts['odds_btts_n'] = pd.to_numeric( + te_btts['odds_btts_n'], errors='coerce', + ).fillna(1.85) + + for edge in [0.05, 0.07, 0.10]: + X_test = te_btts[clean_feats].values + probs = ensemble_predict(btts_models, X_test, clean_feats, 2) + y_btts = te_btts['label_btts'].values.astype(int) + odds_arr = te_btts[['odds_btts_n', 'odds_btts_y']].values + m_arr = 1 / odds_arr + impl = m_arr / m_arr.sum(axis=1, keepdims=True) + + total_bets = 0 + wins = 0 + pnl = 0.0 + for i in range(len(y_btts)): + for cls in range(2): + e = probs[i, cls] - impl[i, cls] + o = odds_arr[i, cls] + if e < edge or o < 1.50 or o > 3.0: + continue + total_bets += 1 + won = (y_btts[i] == cls) + if won: + wins += 1 + pnl += 10 * (o - 1) + else: + pnl -= 10 + if total_bets > 0: + roi = pnl / (total_bets * 10) * 100 + hit = wins / total_bets * 100 + print( + f' Edge>{edge:.2f}: {total_bets} bets, ' + f'hit={hit:.1f}%, ROI={roi:+.1f}%' + ) # ── Feature importance ── if "lgb" in ms_models: @@ -452,25 +534,40 @@ def main(): if ou_models: for name, m in ou_models.items(): - p = MODELS_DIR / f"v27_ou25_{name}.pkl" - with open(p, "wb") as f: + p = MODELS_DIR / f'v27_ou25_{name}.pkl' + with open(p, 'wb') as f: pickle.dump(m, f) - print(f" ✓ {p.name}") + print(f' ✓ {p.name}') + + if btts_models: + for name, m in btts_models.items(): + p = MODELS_DIR / f'v27_btts_{name}.pkl' + with open(p, 'wb') as f: + pickle.dump(m, f) + print(f' ✓ {p.name}') meta = { - "version": "v27-pro", "trained_at": time.strftime("%Y-%m-%d %H:%M:%S"), - "approach": "odds-free fundamentals + value edge detection", - "feature_count": len(clean_feats), - "total_samples": len(df), - "val_acc": round(val_acc, 4), "val_ll": round(val_ll, 4), - "best_config": {k: v for k, v in best_cfg.items() if k != "result"} if best_cfg else {}, - "markets": ["ms"] + (["ou25"] if ou_models else []), + 'version': 'v27-pro', + 'trained_at': time.strftime('%Y-%m-%d %H:%M:%S'), + 'approach': 'odds-free fundamentals + value edge detection', + 'feature_count': len(clean_feats), + 'total_samples': len(df), + 'val_acc': round(val_acc, 4), + 'val_ll': round(val_ll, 4), + 'best_config': { + k: v for k, v in best_cfg.items() if k != 'result' + } if best_cfg else {}, + 'markets': ( + ['ms'] + + (['ou25'] if ou_models else []) + + (['btts'] if btts_models else []) + ), } - with open(MODELS_DIR / "v27_metadata.json", "w") as f: + with open(MODELS_DIR / 'v27_metadata.json', 'w') as f: json.dump(meta, f, indent=2, default=str) - with open(MODELS_DIR / "v27_feature_cols.json", "w") as f: + with open(MODELS_DIR / 'v27_feature_cols.json', 'w') as f: json.dump(clean_feats, f, indent=2) - print(f" ✓ metadata + feature_cols") + print(f' ✓ metadata + feature_cols') print(f"\n Total time: {(time.time()-t0)/60:.1f} min") print(" DONE!") diff --git a/ai-engine/services/betting_brain.py b/ai-engine/services/betting_brain.py index 3fd294d..61e7774 100644 --- a/ai-engine/services/betting_brain.py +++ b/ai-engine/services/betting_brain.py @@ -165,6 +165,11 @@ class BettingBrain: score -= 18.0 issues.append("base_model_not_playable") + is_value_sniper = bool(row.get("is_value_sniper")) + if is_value_sniper: + score += 35.0 + positives.append("value_sniper_override") + score += max(0.0, min(20.0, calibrated_conf * 0.22)) score += max(-8.0, min(16.0, ev_edge * 45.0)) score += max(0.0, min(14.0, play_score * 0.12)) @@ -178,13 +183,13 @@ class BettingBrain: if odds < self.MIN_ODDS: vetoes.append("odds_below_minimum") - if calibrated_conf < 38.0: + if calibrated_conf < 38.0 and not is_value_sniper: vetoes.append("calibrated_confidence_too_low") - if play_score < 50.0: + if play_score < 50.0 and not is_value_sniper: vetoes.append("play_score_too_low") if divergence is not None: - if divergence >= self.HARD_DIVERGENCE: + if divergence >= self.HARD_DIVERGENCE and not is_value_sniper: score -= 42.0 vetoes.append("v25_v27_hard_disagreement") elif divergence >= self.SOFT_DIVERGENCE: @@ -211,7 +216,7 @@ class BettingBrain: else: score -= 16.0 issues.append("historical_sample_too_low") - if market == "DC": + if market == "DC" and not is_value_sniper: vetoes.append("dc_without_historical_sample") elif market in {"MS", "DC", "OU25"}: score -= 10.0 @@ -227,20 +232,21 @@ class BettingBrain: and model_prob >= self.EXTREME_MODEL_PROB and model_gap >= self.EXTREME_GAP and not triple_is_value + and not is_value_sniper ): score -= 24.0 vetoes.append("extreme_probability_without_evidence") - if market in {"HT", "HTFT", "OE"} and score < 86.0: + if market in {"HT", "HTFT", "OE"} and score < 86.0 and not is_value_sniper: vetoes.append("volatile_market_requires_exceptional_evidence") score = max(0.0, min(100.0, score)) action = "BET" if vetoes: action = "REJECT" - elif score < self.MIN_WATCH_SCORE: + elif score < self.MIN_WATCH_SCORE and not is_value_sniper: action = "REJECT" - elif score < self.MIN_BET_SCORE: + elif score < self.MIN_BET_SCORE and not is_value_sniper: action = "WATCH" row["betting_brain"] = { @@ -276,6 +282,7 @@ class BettingBrain: for source in ("main_pick", "value_pick"): item = package.get(source) if isinstance(item, dict) and item.get("market"): + # print(f"DEBUG: {source} is_value_sniper: {item.get('is_value_sniper')}") rows[self._row_key(item)] = dict(item) for source in ("supporting_picks", "bet_summary"): @@ -283,6 +290,7 @@ class BettingBrain: if isinstance(item, dict) and item.get("market"): key = self._row_key(item) rows[key] = self._merge_row(rows.get(key), item) + return list(rows.values()) @staticmethod diff --git a/ai-engine/services/feature_enrichment.py b/ai-engine/services/feature_enrichment.py index 45ebe1a..153b94f 100644 --- a/ai-engine/services/feature_enrichment.py +++ b/ai-engine/services/feature_enrichment.py @@ -14,11 +14,40 @@ is missing or queries fail. from __future__ import annotations +import unicodedata from typing import Any, Dict, Optional, Tuple from psycopg2.extras import RealDictCursor +# ─── Turkish Name Normalization ────────────────────────────────── + +_TR_CHAR_MAP = str.maketrans( + 'çÇğĞıİöÖşŞüÜâÂîÎûÛ', + 'cCgGiIoOsSuUaAiIuU', +) + + +def _normalize_name(name: str) -> str: + """ + Normalize a Turkish referee name for fuzzy matching. + + Strips accents, lowercases, removes extra whitespace, and maps + Turkish-specific characters to their ASCII equivalents. + """ + if not name: + return '' + # 1. Turkish-specific character mapping + normalized = name.translate(_TR_CHAR_MAP) + # 2. Unicode NFKD decomposition → strip combining marks + normalized = unicodedata.normalize('NFKD', normalized) + normalized = ''.join( + c for c in normalized if not unicodedata.combining(c) + ) + # 3. Lowercase + collapse whitespace + return ' '.join(normalized.lower().split()) + + class FeatureEnrichmentService: """Stateless service — all state comes from DB via cursor.""" @@ -380,34 +409,20 @@ class FeatureEnrichmentService: """ Referee tendencies: home win bias, avg goals, card rates. Matches referee by name in match_officials (role_id=1 = Orta Hakem). + + Uses Turkish-aware fuzzy matching as a fallback when exact name + lookup returns zero results. """ if not referee_name: return dict(self._DEFAULT_REFEREE) - try: - # Get match IDs officiated by this referee - cur.execute( - """ - SELECT - m.home_team_id, - m.score_home, - m.score_away, - m.id AS match_id - FROM match_officials mo - JOIN matches m ON m.id = mo.match_id - WHERE mo.name = %s - AND mo.role_id = 1 - AND m.status = 'FT' - AND m.score_home IS NOT NULL - AND m.score_away IS NOT NULL - AND m.mst_utc < %s - ORDER BY m.mst_utc DESC - LIMIT %s - """, - (referee_name, before_date_ms, limit), + + rows = self._query_referee_matches(cur, referee_name, before_date_ms, limit) + + # Fuzzy fallback: if exact match fails, try normalized name search + if not rows: + rows = self._fuzzy_referee_lookup( + cur, referee_name, before_date_ms, limit, ) - rows = cur.fetchall() - except Exception: - return dict(self._DEFAULT_REFEREE) if not rows: return dict(self._DEFAULT_REFEREE) @@ -459,6 +474,118 @@ class FeatureEnrichmentService: 'experience': total, } + def _query_referee_matches( + self, + cur: RealDictCursor, + referee_name: str, + before_date_ms: int, + limit: int, + ) -> list: + """Exact-match referee lookup in match_officials.""" + try: + cur.execute( + """ + SELECT + m.home_team_id, + m.score_home, + m.score_away, + m.id AS match_id + FROM match_officials mo + JOIN matches m ON m.id = mo.match_id + WHERE mo.name = %s + AND mo.role_id = 1 + AND m.status = 'FT' + AND m.score_home IS NOT NULL + AND m.score_away IS NOT NULL + AND m.mst_utc < %s + ORDER BY m.mst_utc DESC + LIMIT %s + """, + (referee_name, before_date_ms, limit), + ) + return cur.fetchall() + except Exception: + return [] + + def _fuzzy_referee_lookup( + self, + cur: RealDictCursor, + referee_name: str, + before_date_ms: int, + limit: int, + ) -> list: + """ + Fuzzy referee lookup using Turkish name normalization. + + Strategy: fetch recent distinct referee names from match_officials, + normalize both the query name and each candidate, and pick the + best match. This handles common mismatches like: + - 'Hüseyin Göçek' vs 'Huseyin Gocek' + - 'Ali Palabıyık' vs 'Ali Palabiyik' + - Extra/missing middle initials + """ + normalized_query = _normalize_name(referee_name) + if not normalized_query: + return [] + + try: + # Fetch candidate referee names (distinct, recent, role=1) + cur.execute( + """ + SELECT DISTINCT mo.name + FROM match_officials mo + JOIN matches m ON m.id = mo.match_id + WHERE mo.role_id = 1 + AND m.status = 'FT' + AND m.mst_utc < %s + ORDER BY mo.name + LIMIT 2000 + """, + (before_date_ms,), + ) + candidates = cur.fetchall() + except Exception: + return [] + + if not candidates: + return [] + + # Find best match by normalized name comparison + best_match: Optional[str] = None + best_score = 0.0 + + for cand_row in candidates: + cand_name = cand_row.get('name', '') + if not cand_name: + continue + normalized_cand = _normalize_name(cand_name) + + # Exact normalized match + if normalized_cand == normalized_query: + best_match = cand_name + best_score = 1.0 + break + + # Substring containment (handles "First Last" vs "First M. Last") + if ( + normalized_query in normalized_cand + or normalized_cand in normalized_query + ): + containment_score = min( + len(normalized_query), len(normalized_cand) + ) / max(len(normalized_query), len(normalized_cand)) + if containment_score > best_score and containment_score > 0.6: + best_match = cand_name + best_score = containment_score + + if not best_match: + return [] + + # Re-query with the resolved name + return self._query_referee_matches( + cur, best_match, before_date_ms, limit, + ) + # ─── 5. League Averages ───────────────────────────────────────── def compute_league_averages( diff --git a/ai-engine/services/single_match_orchestrator.py b/ai-engine/services/single_match_orchestrator.py index 3c6ec36..9c16bd0 100755 --- a/ai-engine/services/single_match_orchestrator.py +++ b/ai-engine/services/single_match_orchestrator.py @@ -84,6 +84,7 @@ class MatchData: current_score_home: Optional[int] = None current_score_away: Optional[int] = None lineup_confidence: float = 0.0 + source_table: str = "matches" class SingleMatchOrchestrator: @@ -190,35 +191,35 @@ class SingleMatchOrchestrator: } # Min confidence: lowered to be achievable (max_reachable - 16 to -20) self.market_min_conf: Dict[str, float] = { - "MS": 42.0, # was 44 — 3-way market, hard to get high conf - "DC": 52.0, # was 55 — double chance is easier - "OU15": 55.0, # was 58 — binary + usually high conf - "OU25": 48.0, # was 52 — core market, allow more through - "OU35": 48.0, # was 54 — lowered to let signals pass - "BTTS": 46.0, # was 50 — binary market - "HT": 40.0, # was 45 — was ❌ impossible, now achievable - "HT_OU05": 50.0, # was 54 — binary HT market - "HT_OU15": 42.0, # was 48 — was ❌ impossible, now achievable - "OE": 46.0, # was 50 — coin-flip market, lower bar - "CARDS": 42.0, # was 48 — was ❌ impossible, now achievable - "HCAP": 40.0, # was 46 — was ❌ impossible, now achievable - "HTFT": 28.0, # was 32 — was ❌ impossible, 9-way market + "MS": 20.0, # was 42 — drastically lowered to allow underdog/draw value bets + "DC": 40.0, # was 52 + "OU15": 45.0, # was 55 + "OU25": 30.0, # was 48 + "OU35": 20.0, # was 48 + "BTTS": 30.0, # was 46 + "HT": 20.0, # was 40 + "HT_OU05": 35.0, # was 50 + "HT_OU15": 25.0, # was 42 + "OE": 35.0, # was 46 + "CARDS": 30.0, # was 42 + "HCAP": 25.0, # was 40 + "HTFT": 10.0, # was 28 } - # Min play score: moderate reduction to allow more C-grade bets + # Min play score: Significantly reduced to stop blocking value bets on underdogs self.market_min_play_score: Dict[str, float] = { - "MS": 65.0, # was 72 — let more MS through for tracking - "DC": 58.0, # was 62 — DC is high accuracy - "OU15": 60.0, # was 64 — strong market per backtest - "OU25": 64.0, # was 70 — core market - "OU35": 68.0, # was 76 — riskier market - "BTTS": 64.0, # was 70 — allow more signals - "HT": 66.0, # was 74 — was never reachable anyway - "HT_OU05": 60.0, # was 64 — strong backtest market - "HT_OU15": 64.0, # was 72 — moderate - "OE": 60.0, # was 66 — low priority market - "CARDS": 66.0, # was 74 — niche market - "HCAP": 68.0, # was 76 — risky - "HTFT": 72.0, # was 82 — 9-way, very risky + "MS": 30.0, # was 65 + "DC": 55.0, # was 58 + "OU15": 55.0, # was 60 + "OU25": 45.0, # was 64 + "OU35": 35.0, # was 68 + "BTTS": 45.0, # was 64 + "HT": 30.0, # was 66 + "HT_OU05": 45.0, # was 60 + "HT_OU15": 35.0, # was 64 + "OE": 35.0, # was 60 + "CARDS": 40.0, # was 66 + "HCAP": 35.0, # was 68 + "HTFT": 20.0, # was 72 } self.market_min_edge: Dict[str, float] = { "MS": 0.02, # was 0.03 — slight relaxation @@ -235,6 +236,28 @@ class SingleMatchOrchestrator: "HCAP": 0.03, # was 0.04 "HTFT": 0.05, # was 0.06 } + self.odds_band_min_sample: Dict[str, float] = { + "MS": 8.0, + "DC": 8.0, + "OU15": 8.0, + "OU25": 8.0, + "OU35": 8.0, + "BTTS": 8.0, + "HT": 8.0, + "HT_OU05": 8.0, + "HT_OU15": 8.0, + } + self.odds_band_min_edge: Dict[str, float] = { + "MS": 0.015, + "DC": 0.012, + "OU15": 0.012, + "OU25": 0.015, + "OU35": 0.018, + "BTTS": 0.015, + "HT": 0.018, + "HT_OU05": 0.012, + "HT_OU15": 0.015, + } def _get_v25_predictor(self) -> V25Predictor: if self.v25_predictor is None: @@ -362,6 +385,32 @@ class SingleMatchOrchestrator: away_venue_elo = float(elo_row.get('away_away_elo') or away_elo) home_form_elo_val = float(elo_row.get('home_form_elo') or home_elo) away_form_elo_val = float(elo_row.get('away_form_elo') or away_elo) + else: + cur.execute( + """ + SELECT + team_id, + overall_elo, + home_elo, + away_elo, + form_elo + FROM team_elo_ratings + WHERE team_id IN (%s, %s) + """, + (data.home_team_id, data.away_team_id), + ) + elo_rows = cur.fetchall() + by_team = {str(r.get("team_id")): r for r in elo_rows} + home_row = by_team.get(str(data.home_team_id)) + away_row = by_team.get(str(data.away_team_id)) + if home_row: + home_elo = float(home_row.get("overall_elo") or 1500.0) + home_venue_elo = float(home_row.get("home_elo") or home_elo) + home_form_elo_val = float(home_row.get("form_elo") or home_elo) + if away_row: + away_elo = float(away_row.get("overall_elo") or 1500.0) + away_venue_elo = float(away_row.get("away_elo") or away_elo) + away_form_elo_val = float(away_row.get("form_elo") or away_elo) # Enrichment queries home_stats = enr.compute_team_stats(cur, data.home_team_id, data.match_date_ms) @@ -390,6 +439,8 @@ class SingleMatchOrchestrator: before_ts=data.match_date_ms, referee_name=data.referee_name, ) + setattr(data, "odds_band_features", odds_band_features) + setattr(data, "feature_source", "football_ai_features" if elo_row else "live_prematch_enrichment") except Exception: # Full fallback — use all defaults home_stats = dict(enr._DEFAULT_TEAM_STATS) @@ -409,6 +460,8 @@ class SingleMatchOrchestrator: home_rest = 7.0 away_rest = 7.0 odds_band_features = {} # V28 fallback + setattr(data, "odds_band_features", odds_band_features) + setattr(data, "feature_source", "fallback_defaults") odds_presence = { 'odds_ms_h_present': 1.0 if ms_h > 1.01 else 0.0, @@ -1290,25 +1343,72 @@ class SingleMatchOrchestrator: ), } - # BTTS triple value - btts_yes_odds = float((data.odds_data or {}).get("btts_y", 0)) + # BTTS triple value — now with V27 BTTS model + btts_yes_odds = float((data.odds_data or {}).get('btts_y', 0)) btts_implied = (1.0 / btts_yes_odds) if btts_yes_odds > 1.0 else 0.50 - btts_band_rate = odds_band_btts["yes_rate"] - btts_combined = btts_band_rate + btts_band_rate = odds_band_btts['yes_rate'] + + # V27 BTTS model prediction (if available) + v27_btts = v27_preds.get('btts') + v27_btts_yes = (v27_btts or {}).get('yes', 0) if v27_btts else 0 + + if v27_btts_yes > 0: + btts_combined = (v27_btts_yes + btts_band_rate) / 2.0 + else: + btts_combined = btts_band_rate btts_edge = btts_combined - btts_implied btts_band_confirms = btts_band_rate > btts_implied + btts_v27_confirms = v27_btts_yes > btts_implied if v27_btts_yes > 0 else False + btts_conf_count = sum([btts_v27_confirms, btts_band_confirms]) - triple_value["btts_yes"] = { - "band_rate": round(btts_band_rate, 4), - "implied_prob": round(btts_implied, 4), - "combined_prob": round(btts_combined, 4), - "edge": round(btts_edge, 4), - "band_sample": odds_band_btts["sample"], - "confirmations": 1 if btts_band_confirms else 0, - "is_value": ( + # BTTS divergence (V25 vs V27) + v25_btts_probs = { + 'no': 1.0 - prediction.btts_yes_prob, + 'yes': prediction.btts_yes_prob, + } + btts_divergence = compute_divergence(v25_btts_probs, v27_btts) if v27_btts else {} + btts_odds = { + 'yes': float((data.odds_data or {}).get('btts_y', 0)), + 'no': float((data.odds_data or {}).get('btts_n', 0)), + } + btts_value_edge = compute_value_edge( + v25_btts_probs, v27_btts, btts_odds, + ) if v27_btts else {} + + # DC divergence (derived from V27 MS probs) + v27_dc = v27_preds.get('dc') + dc_divergence = {} + dc_value_edge = {} + if v27_dc: + v25_dc_probs = { + '1x': prediction.ms_home_prob + prediction.ms_draw_prob, + 'x2': prediction.ms_draw_prob + prediction.ms_away_prob, + '12': prediction.ms_home_prob + prediction.ms_away_prob, + } + dc_divergence = compute_divergence(v25_dc_probs, v27_dc) + dc_odds = { + '1x': float((data.odds_data or {}).get('dc_1x', 0)), + 'x2': float((data.odds_data or {}).get('dc_x2', 0)), + '12': float((data.odds_data or {}).get('dc_12', 0)), + } + dc_value_edge = compute_value_edge(v25_dc_probs, v27_dc, dc_odds) + + triple_value['btts_yes'] = { + 'v27_prob': round(v27_btts_yes, 4), + 'band_rate': round(btts_band_rate, 4), + 'implied_prob': round(btts_implied, 4), + 'combined_prob': round(btts_combined, 4), + 'edge': round(btts_edge, 4), + 'band_sample': odds_band_btts['sample'], + 'confirmations': btts_conf_count, + 'is_value': ( + btts_conf_count >= 2 + and btts_edge > 0.05 + and odds_band_btts['sample'] >= 8 + ) if v27_btts_yes > 0 else ( btts_band_confirms and btts_edge > 0.05 - and odds_band_btts["sample"] >= 8 + and odds_band_btts['sample'] >= 8 ), } @@ -1366,14 +1466,20 @@ class SingleMatchOrchestrator: "predictions": { "ms": v27_ms or {}, "ou25": v27_ou25 or {}, + "btts": v27_btts or {}, + "dc": v27_dc or {}, }, "divergence": { "ms": ms_divergence, "ou25": ou25_divergence, + "btts": btts_divergence, + "dc": dc_divergence, }, "value_edge": { "ms": ms_value, "ou25": ou25_value, + "btts": btts_value_edge, + "dc": dc_value_edge, }, "odds_band": { "ms_home": odds_band_ms_home, @@ -2670,6 +2776,13 @@ class SingleMatchOrchestrator: # Hard gate: predictions with unknown teams are noisy and misleading. return None + status, state, substate = self._normalize_match_status( + row.get("status"), + row.get("state"), + row.get("substate"), + row.get("score_home"), + row.get("score_away"), + ) odds_data = self._extract_odds(cur, row) home_lineup, away_lineup, lineup_source, lineup_confidence = self._extract_lineups(cur, row) sidelined = self._parse_json_dict(row.get("sidelined")) @@ -2723,10 +2836,11 @@ class SingleMatchOrchestrator: home_position=home_position, away_position=away_position, lineup_source=lineup_source, - status=str(row.get("status") or ""), - state=row.get("state"), - substate=row.get("substate"), + status=status, + state=state, + substate=substate, lineup_confidence=lineup_confidence, + source_table=str(row.get("source_table") or "matches"), current_score_home=( int(row.get("score_home")) if row.get("score_home") is not None @@ -2760,7 +2874,8 @@ class SingleMatchOrchestrator: lm.referee_name, ht.name as home_team_name, at.name as away_team_name, - l.name as league_name + l.name as league_name, + 'live_matches'::text as source_table FROM live_matches lm LEFT JOIN teams ht ON ht.id = lm.home_team_id LEFT JOIN teams at ON at.id = lm.away_team_id @@ -2772,6 +2887,37 @@ class SingleMatchOrchestrator: ) return cur.fetchone() + @staticmethod + def _normalize_match_status( + status: Any, + state: Any, + substate: Any, + score_home: Any, + score_away: Any, + ) -> Tuple[str, Optional[str], Optional[str]]: + state_text = str(state or "").strip() + status_text = str(status or "").strip() + substate_text = str(substate or "").strip() + + state_key = state_text.lower().replace("_", "").replace(" ", "") + status_key = status_text.lower().replace("_", "").replace(" ", "") + substate_key = substate_text.lower().replace("_", "").replace(" ", "") + + live_tokens = {"live", "livegame", "firsthalf", "secondhalf", "halftime", "1h", "2h", "ht", "1q", "2q", "3q", "4q"} + finished_tokens = {"post", "postgame", "finished", "played", "ft", "ended", "aet", "pen", "penalties", "afterpenalties"} + pre_tokens = {"pre", "pregame", "scheduled", "ns", "notstarted", "timestamp"} + + if state_key in live_tokens or status_key in live_tokens or substate_key in live_tokens: + return "LIVE", state_text or "live", substate_text or None + if state_key in finished_tokens or status_key in finished_tokens or substate_key in finished_tokens: + return "FT", state_text or "post", substate_text or None + if score_home is not None and score_away is not None and status_key not in pre_tokens: + return "FT", state_text or "post", substate_text or None + if state_key in pre_tokens or status_key in pre_tokens or substate_key in pre_tokens: + return "NS", state_text or "pre", substate_text or None + + return status_text or "NS", state_text or None, substate_text or None + def _fetch_hist_match(self, cur: RealDictCursor, match_id: str) -> Optional[Dict[str, Any]]: cur.execute( """ @@ -2793,7 +2939,8 @@ class SingleMatchOrchestrator: ref.name as referee_name, ht.name as home_team_name, at.name as away_team_name, - l.name as league_name + l.name as league_name, + 'matches'::text as source_table FROM matches m LEFT JOIN teams ht ON ht.id = m.home_team_id LEFT JOIN teams at ON at.id = m.away_team_id @@ -3668,66 +3815,33 @@ class SingleMatchOrchestrator: playable_rows = [row for row in market_rows if row.get("playable")] - # GUARANTEED PICK LOGIC (V32 - Calibration-aware): - # Runtime replay insights: - # - Trust only markets that remain robust after pre-match replay. - # - Current strongest football markets: DC, OU15, HT_OU05. - # - # Priority 1: High-accuracy market (DC/OU15/HT_OU05/OU25) + Odds >= 1.30 + Conf >= 44% - # Priority 2: Any playable + Odds >= 1.30 + Conf >= 44% - # Priority 3: Playable + Odds >= 1.30 - # Priority 4: Best non-playable (fallback) MIN_ODDS = 1.30 - MIN_CONFIDENCE = 44.0 # V32: lowered from 52 to match new calibration - - # High-accuracy markets from backtest (prioritize these) - HIGH_ACCURACY_MARKETS = {"DC", "OU15", "HT_OU05"} - - # Priority 1: High-accuracy markets with good odds and confidence - high_accuracy_picks = [ + playable_with_odds = [ row for row in playable_rows - if row.get("market") in HIGH_ACCURACY_MARKETS - and float(row.get("odds", 0.0)) >= MIN_ODDS - and float(row.get("calibrated_confidence", 0.0)) >= MIN_CONFIDENCE + if float(row.get("odds", 0.0)) >= MIN_ODDS ] - - if high_accuracy_picks: - # Sort by play_score, pick the best - high_accuracy_picks.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True) - main_pick = high_accuracy_picks[0] - main_pick["is_guaranteed"] = True - main_pick["pick_reason"] = "high_accuracy_market" + + if playable_with_odds: + playable_with_odds.sort( + key=lambda r: ( + float(r.get("ev_edge", 0.0)), + float(r.get("play_score", 0.0)), + ), + reverse=True, + ) + main_pick = playable_with_odds[0] + main_pick["is_guaranteed"] = False + main_pick["pick_reason"] = "positive_ev_after_odds_band_gate" else: - # Priority 2: Any playable with odds >= 1.30 and confidence >= 40% - guaranteed_picks = [ - row for row in playable_rows - if float(row.get("odds", 0.0)) >= MIN_ODDS - and float(row.get("calibrated_confidence", 0.0)) >= MIN_CONFIDENCE - ] - - if guaranteed_picks: - guaranteed_picks.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True) - main_pick = guaranteed_picks[0] - main_pick["is_guaranteed"] = True - main_pick["pick_reason"] = "confidence_threshold_met" - else: - # Priority 3: Fallback - playable with odds >= 1.30 - playable_with_odds = [ - row for row in playable_rows - if float(row.get("odds", 0.0)) >= MIN_ODDS - ] - if playable_with_odds: - playable_with_odds.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True) - main_pick = playable_with_odds[0] - main_pick["is_guaranteed"] = False - main_pick["pick_reason"] = "odds_only_fallback" - else: - # Priority 4: Last resort - any playable or first market WITH ODDS > 0 - fallback_with_odds = [r for r in market_rows if float(r.get("odds", 0.0)) > 1.0] - main_pick = playable_rows[0] if playable_rows else (fallback_with_odds[0] if fallback_with_odds else (market_rows[0] if market_rows else None)) - if main_pick: - main_pick["is_guaranteed"] = False - main_pick["pick_reason"] = "last_resort" + fallback_with_odds = [r for r in market_rows if float(r.get("odds", 0.0)) > 1.0] + fallback_with_odds.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True) + main_pick = fallback_with_odds[0] if fallback_with_odds else (market_rows[0] if market_rows else None) + if main_pick: + main_pick["is_guaranteed"] = False + main_pick["playable"] = False + main_pick["stake_units"] = 0.0 + main_pick["bet_grade"] = "PASS" + main_pick["pick_reason"] = "no_playable_value_after_odds_band_gate" aggressive_pick = None htft_probs = prediction.ht_ft_probs or {} @@ -3756,11 +3870,13 @@ class SingleMatchOrchestrator: value_candidates = [ row for row in playable_rows if float(row.get("odds", 0.0)) >= 1.60 - and float(row.get("calibrated_confidence", 0.0)) >= 40.0 + # V34: Lowered min calibrated_confidence for value candidates from 40.0 to 25.0 + # to allow high-odds value bets (which naturally have lower probabilities). + and float(row.get("calibrated_confidence", 0.0)) >= 25.0 ] if value_candidates: - # Score them by (play_score * odds) to reward higher odds - value_candidates.sort(key=lambda r: float(r.get("play_score", 0.0)) * float(r.get("odds", 1.0)), reverse=True) + # Score them by (ev_edge) to reward actual mathematical value + value_candidates.sort(key=lambda r: float(r.get("ev_edge", 0.0)), reverse=True) for v_cand in value_candidates: if not main_pick or (v_cand["market"] != main_pick["market"] or v_cand["pick"] != main_pick["pick"]): value_pick = v_cand @@ -3982,51 +4098,33 @@ class SingleMatchOrchestrator: playable_rows = [row for row in market_rows if row.get("playable")] - # GUARANTEED PICK LOGIC (Optimized - same as football) MIN_ODDS = 1.30 - MIN_CONFIDENCE = 40.0 - HIGH_ACCURACY_MARKETS = {"ML", "TOT", "SPREAD"} - - high_accuracy_picks = [ + playable_with_odds = [ row for row in playable_rows - if row.get("market_type") in HIGH_ACCURACY_MARKETS - and float(row.get("odds", 0.0)) >= MIN_ODDS - and float(row.get("calibrated_confidence", 0.0)) >= MIN_CONFIDENCE + if float(row.get("odds", 0.0)) >= MIN_ODDS ] - - if high_accuracy_picks: - high_accuracy_picks.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True) - main_pick = high_accuracy_picks[0] - main_pick["is_guaranteed"] = True - main_pick["pick_reason"] = "high_accuracy_market" + + if playable_with_odds: + playable_with_odds.sort( + key=lambda r: ( + float(r.get("ev_edge", 0.0)), + float(r.get("play_score", 0.0)), + ), + reverse=True, + ) + main_pick = playable_with_odds[0] + main_pick["is_guaranteed"] = False + main_pick["pick_reason"] = "positive_ev_pick" else: - guaranteed_picks = [ - row for row in playable_rows - if float(row.get("odds", 0.0)) >= MIN_ODDS - and float(row.get("calibrated_confidence", 0.0)) >= MIN_CONFIDENCE - ] - - if guaranteed_picks: - guaranteed_picks.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True) - main_pick = guaranteed_picks[0] - main_pick["is_guaranteed"] = True - main_pick["pick_reason"] = "confidence_threshold_met" - else: - playable_with_odds = [ - row for row in playable_rows - if float(row.get("odds", 0.0)) >= MIN_ODDS - ] - if playable_with_odds: - playable_with_odds.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True) - main_pick = playable_with_odds[0] - main_pick["is_guaranteed"] = False - main_pick["pick_reason"] = "odds_only_fallback" - else: - fallback_with_odds = [r for r in market_rows if float(r.get("odds", 0.0)) > 1.0] - main_pick = playable_rows[0] if playable_rows else (fallback_with_odds[0] if fallback_with_odds else (market_rows[0] if market_rows else None)) - if main_pick: - main_pick["is_guaranteed"] = False - main_pick["pick_reason"] = "last_resort" + fallback_with_odds = [r for r in market_rows if float(r.get("odds", 0.0)) > 1.0] + fallback_with_odds.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True) + main_pick = fallback_with_odds[0] if fallback_with_odds else (market_rows[0] if market_rows else None) + if main_pick: + main_pick["is_guaranteed"] = False + main_pick["playable"] = False + main_pick["stake_units"] = 0.0 + main_pick["bet_grade"] = "PASS" + main_pick["pick_reason"] = "no_playable_value_found" supporting: List[Dict[str, Any]] = [] for row in market_rows: @@ -4518,6 +4616,121 @@ class SingleMatchOrchestrator: return True return self._v25_market_odds(odds, market, pick) > 1.01 + def _odds_band_verdict( + self, + data: MatchData, + market: str, + pick: str, + implied_prob: float, + ) -> Dict[str, Any]: + features = getattr(data, "odds_band_features", {}) or {} + market_key = str(market or "").upper() + if not isinstance(features, dict) or implied_prob <= 0.0: + return { + "required": market_key in self.odds_band_min_sample, + "available": False, + "band_prob": 0.0, + "band_sample": 0.0, + "band_edge": 0.0, + "aligned": False, + "reason": "odds_band_unavailable", + } + + pick_key = self._normalize_pick_token(pick) + band_prob = 0.0 + sample = 0.0 + + if market_key == "MS": + if pick_key == "1": + band_prob = float(features.get("home_band_ms_win_rate", 0.0) or 0.0) + sample = float(features.get("home_band_ms_sample", 0.0) or 0.0) + elif pick_key == "2": + band_prob = float(features.get("away_band_ms_win_rate", 0.0) or 0.0) + sample = float(features.get("away_band_ms_sample", 0.0) or 0.0) + elif pick_key in {"X", "0"}: + home_draw = float(features.get("home_band_ms_draw_rate", 0.0) or 0.0) + away_draw = float(features.get("away_band_ms_draw_rate", 0.0) or 0.0) + band_prob = (home_draw + away_draw) / 2.0 if home_draw and away_draw else max(home_draw, away_draw) + sample = max( + float(features.get("home_band_ms_sample", 0.0) or 0.0), + float(features.get("away_band_ms_sample", 0.0) or 0.0), + ) + elif market_key == "DC": + dc_key = pick_key.replace("-", "").lower() + band_prob = float(features.get(f"band_dc_{dc_key}_rate", 0.0) or 0.0) + sample = float(features.get(f"band_dc_{dc_key}_sample", 0.0) or 0.0) + elif market_key in {"OU15", "OU25", "OU35"}: + suffix = {"OU15": "ou15", "OU25": "ou25", "OU35": "ou35"}[market_key] + rate_key = "over_rate" if self._pick_is_over(pick_key) else "under_rate" + band_prob = float(features.get(f"band_{suffix}_{rate_key}", 0.0) or 0.0) + sample = float(features.get(f"band_{suffix}_sample", 0.0) or 0.0) + elif market_key == "BTTS": + is_yes = "VAR" in pick_key or "YES" in pick_key or pick_key == "Y" + band_prob = float(features.get(f"band_btts_{'yes' if is_yes else 'no'}_rate", 0.0) or 0.0) + sample = float(features.get("band_btts_sample", 0.0) or 0.0) + elif market_key == "HT": + if pick_key == "1": + band_prob = float(features.get("home_band_ht_win_rate", 0.0) or 0.0) + sample = float(features.get("home_band_ht_sample", 0.0) or 0.0) + elif pick_key == "2": + band_prob = float(features.get("away_band_ht_win_rate", 0.0) or 0.0) + sample = float(features.get("away_band_ht_sample", 0.0) or 0.0) + elif pick_key in {"X", "0"}: + home_draw = float(features.get("home_band_ht_draw_rate", 0.0) or 0.0) + away_draw = float(features.get("away_band_ht_draw_rate", 0.0) or 0.0) + band_prob = (home_draw + away_draw) / 2.0 if home_draw and away_draw else max(home_draw, away_draw) + sample = max( + float(features.get("home_band_ht_sample", 0.0) or 0.0), + float(features.get("away_band_ht_sample", 0.0) or 0.0), + ) + elif market_key in {"HT_OU05", "HT_OU15"}: + suffix = "ht_ou05" if market_key == "HT_OU05" else "ht_ou15" + rate_key = "over_rate" if self._pick_is_over(pick_key) else "under_rate" + band_prob = float(features.get(f"band_{suffix}_{rate_key}", 0.0) or 0.0) + sample = float(features.get(f"band_{suffix}_sample", 0.0) or 0.0) + + band_edge = band_prob - implied_prob if band_prob > 0.0 else 0.0 + required_sample = float(self.odds_band_min_sample.get(market_key, 0.0)) + required_edge = float(self.odds_band_min_edge.get(market_key, 0.0)) + available = band_prob > 0.0 and sample >= required_sample + aligned = available and band_edge >= required_edge + + reason = "odds_band_confirms_value" + if required_sample > 0.0 and sample < required_sample: + reason = "odds_band_sample_too_low" + elif band_prob <= 0.0: + reason = "odds_band_missing_probability" + elif band_edge < required_edge: + reason = f"odds_band_no_value_{band_edge:+.3f}" + + return { + "required": market_key in self.odds_band_min_sample, + "available": available, + "band_prob": band_prob, + "band_sample": sample, + "band_edge": band_edge, + "aligned": aligned, + "reason": reason, + } + + @staticmethod + def _normalize_pick_token(pick: str) -> str: + return ( + str(pick or "") + .strip() + .upper() + .replace("İ", "I") + .replace("Ü", "U") + .replace("Ş", "S") + .replace("Ğ", "G") + .replace("Ö", "O") + .replace("Ç", "C") + ) + + @staticmethod + def _pick_is_over(pick_key: str) -> bool: + return "UST" in pick_key or "OVER" in pick_key + @staticmethod def _goal_line_for_market(market: str) -> Optional[float]: return { @@ -4968,12 +5181,8 @@ class SingleMatchOrchestrator: calibrated_conf = max(1.0, min(99.0, raw_conf * calibration)) min_conf = self.market_min_conf.get(market, 55.0) - # ── V2 Quant: EV Edge formula ────────────────────────────────── - # Old: edge = prob - (1/odd) ← simple probability difference - # New: edge = (prob × odd) - 1 ← Expected Value (what a quant uses) implied_prob = (1.0 / odd) if odd > 1.0 else 0.0 - ev_edge = (prob * odd) - 1.0 if odd > 1.0 else 0.0 - simple_edge = prob - implied_prob if implied_prob > 0 else 0.0 + band_verdict = self._odds_band_verdict(data, market, str(row.get("pick") or ""), implied_prob) # ── V31: League-specific odds reliability ────────────────────── # Higher reliability → trust odds-based edge more in play_score @@ -4995,6 +5204,25 @@ class SingleMatchOrchestrator: quality_label, 5.0, ) + # V33: Removed probability deflation. Deflating probability breaks normalization + # (probs no longer sum to 1) and mathematically guarantees negative EV edge. + # Data quality and confidence penalties are already applied to play_score. + model_calibrated_prob = prob + band_prob = float(band_verdict.get("band_prob", 0.0) or 0.0) + if bool(band_verdict.get("available")): + calibrated_probability = ( + (model_calibrated_prob * 0.45) + + (band_prob * 0.35) + + (implied_prob * 0.20) + ) + elif implied_prob > 0.0: + calibrated_probability = (model_calibrated_prob * 0.65) + (implied_prob * 0.35) + else: + calibrated_probability = model_calibrated_prob + calibrated_probability = max(0.0, min(0.99, calibrated_probability)) + model_edge = model_calibrated_prob - implied_prob if implied_prob > 0 else 0.0 + ev_edge = (calibrated_probability * odd) - 1.0 if odd > 1.0 else 0.0 + simple_edge = calibrated_probability - implied_prob if implied_prob > 0 else 0.0 home_n = len(data.home_lineup or []) away_n = len(data.away_lineup or []) @@ -5005,22 +5233,18 @@ class SingleMatchOrchestrator: lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0))) lineup_penalty += max(1.0, (1.0 - lineup_conf) * 5.0) - # V31: edge contribution weighted by league odds reliability - base_score = calibrated_conf + (simple_edge * 100.0 * edge_multiplier) - play_score = max( - 0.0, - min(100.0, base_score - risk_penalty - quality_penalty - lineup_penalty), - ) - # ── V20+ Safety gates (PRESERVED) ───────────────────────────── min_play_score = self.market_min_play_score.get(market, 68.0) min_edge = self.market_min_edge.get(market, 0.02) reasons: List[str] = [] playable = True + + is_value_sniper = ev_edge >= 0.03 if calibrated_conf < min_conf: - playable = False - reasons.append("below_calibrated_conf_threshold") + if not is_value_sniper: + playable = False + reasons.append("below_calibrated_conf_threshold") if market in self.ODDS_REQUIRED_MARKETS and odd <= 1.01: playable = False reasons.append("market_odds_missing") @@ -5037,18 +5261,33 @@ class SingleMatchOrchestrator: # Most pre-match predictions use probable_xi — blocking kills all output lineup_penalty += 6.0 reasons.append("lineup_probable_xi_penalty") + base_score = calibrated_conf + (simple_edge * 100.0 * edge_multiplier) + play_score = max( + 0.0, + min(100.0, base_score - risk_penalty - quality_penalty - lineup_penalty), + ) + if bool(band_verdict.get("required")) and not bool(band_verdict.get("aligned")): + if not is_value_sniper: + playable = False + reasons.append(str(band_verdict.get("reason") or "odds_band_not_aligned")) + if bool(band_verdict.get("required")) and implied_prob > 0.0 and model_edge <= 0.0: + if not is_value_sniper: + playable = False + reasons.append(f"model_not_above_market_{model_edge:+.3f}") # V31: negative edge threshold adapts to league reliability # Reliable league: stricter (-0.03), unreliable: looser (-0.08) neg_edge_threshold = -0.03 - (1.0 - odds_rel) * 0.05 if odd > 1.0 and simple_edge < neg_edge_threshold: - playable = False - reasons.append(f"negative_model_edge_{simple_edge:+.3f}") + if not is_value_sniper: + playable = False + reasons.append(f"negative_model_edge_{simple_edge:+.3f}") if odd > 1.0 and ev_edge < min_edge: playable = False reasons.append(f"below_market_edge_threshold_{ev_edge:+.3f}") if play_score < min_play_score: - playable = False - reasons.append("insufficient_play_score") + if not is_value_sniper: + playable = False + reasons.append("insufficient_play_score") if not reasons: reasons.append("market_passed_all_gates") @@ -5068,15 +5307,15 @@ class SingleMatchOrchestrator: elif ev_edge > 0.10: grade = "A" # V2 Quant: Fractional Kelly Criterion (¼ Kelly, 10-unit bankroll) - stake_units = self._kelly_stake(prob, odd) + stake_units = self._kelly_stake(calibrated_probability, odd) reasons.append(f"ev_edge_{ev_edge:+.1%}_grade_A") elif ev_edge > 0.05: grade = "B" - stake_units = self._kelly_stake(prob, odd) + stake_units = self._kelly_stake(calibrated_probability, odd) reasons.append(f"ev_edge_{ev_edge:+.1%}_grade_B") elif ev_edge > 0.02: grade = "C" - stake_units = self._kelly_stake(prob, odd) + stake_units = self._kelly_stake(calibrated_probability, odd) reasons.append(f"ev_edge_{ev_edge:+.1%}_grade_C") else: # Passes all V20+ gates but no mathematical edge over bookie @@ -5093,8 +5332,16 @@ class SingleMatchOrchestrator: "min_required_play_score": round(min_play_score, 1), "min_required_edge": round(min_edge, 4), "edge": round(ev_edge, 4), + "model_probability": round(prob, 4), + "model_edge": round(model_edge, 4), + "calibrated_probability": round(calibrated_probability, 4), "implied_prob": round(implied_prob, 4), "ev_edge": round(ev_edge, 4), + "is_value_sniper": is_value_sniper, + "odds_band_probability": round(float(band_verdict.get("band_prob", 0.0) or 0.0), 4), + "odds_band_sample": round(float(band_verdict.get("band_sample", 0.0) or 0.0), 1), + "odds_band_edge": round(float(band_verdict.get("band_edge", 0.0) or 0.0), 4), + "odds_band_aligned": bool(band_verdict.get("aligned")), "odds_reliability": round(odds_rel, 4), "play_score": round(play_score, 1), "playable": playable, @@ -5145,7 +5392,15 @@ class SingleMatchOrchestrator: "stake_units": float(row.get("stake_units", 0.0)), "play_score": row.get("play_score", 0.0), "ev_edge": row.get("ev_edge", row.get("edge", 0.0)), + "is_value_sniper": bool(row.get("is_value_sniper")), + "model_probability": row.get("model_probability", row.get("probability", 0.0)), + "model_edge": row.get("model_edge", 0.0), + "calibrated_probability": row.get("calibrated_probability", row.get("probability", 0.0)), "implied_prob": row.get("implied_prob", 0.0), + "odds_band_probability": row.get("odds_band_probability", 0.0), + "odds_band_sample": row.get("odds_band_sample", 0.0), + "odds_band_edge": row.get("odds_band_edge", 0.0), + "odds_band_aligned": bool(row.get("odds_band_aligned")), "odds_reliability": row.get("odds_reliability", 0.35), "odds": row.get("odds", 0.0), "reasons": row.get("decision_reasons", []), @@ -5187,6 +5442,11 @@ class SingleMatchOrchestrator: ref_score = 1.0 if data.referee_name else 0.6 if not data.referee_name: flags.append("missing_referee") + if data.source_table == "live_matches": + flags.append("live_match_pre_match_features") + feature_source = str(getattr(data, "feature_source", "") or "") + if feature_source == "live_prematch_enrichment": + flags.append("ai_features_inferred_from_history") total_score = (odds_score * 0.45) + (lineup_score * 0.45) + (ref_score * 0.10) @@ -5196,6 +5456,10 @@ class SingleMatchOrchestrator: label = "MEDIUM" else: label = "LOW" + if label == "HIGH" and ( + data.lineup_source == "probable_xi" or not data.referee_name + ): + label = "MEDIUM" return { "label": label, @@ -5204,6 +5468,7 @@ class SingleMatchOrchestrator: "away_lineup_count": away_n, "lineup_source": data.lineup_source, "lineup_confidence": round(float(getattr(data, "lineup_confidence", 0.0) or 0.0), 3), + "feature_source": feature_source or "unknown", "flags": flags, } diff --git a/src/common/utils/match-status.util.ts b/src/common/utils/match-status.util.ts index 53a20a3..1db15fc 100644 --- a/src/common/utils/match-status.util.ts +++ b/src/common/utils/match-status.util.ts @@ -81,7 +81,6 @@ export const LIVE_STATUS_VALUES_FOR_DB = [ "Playing", "Half Time", "liveGame", - "minutes", ]; export const LIVE_STATE_VALUES_FOR_DB = [ @@ -110,7 +109,6 @@ export const FINISHED_STATUS_VALUES_FOR_DB = [ "postGame", "posted", "Posted", - "state", ]; export const FINISHED_STATE_VALUES_FOR_DB = [ diff --git a/src/modules/predictions/dto/index.ts b/src/modules/predictions/dto/index.ts index ed7bcaf..5c94b61 100755 --- a/src/modules/predictions/dto/index.ts +++ b/src/modules/predictions/dto/index.ts @@ -148,6 +148,27 @@ export class MatchPickDto { @ApiProperty({ required: false, default: 0 }) implied_prob?: number; + @ApiProperty({ required: false, default: 0 }) + model_probability?: number; + + @ApiProperty({ required: false, default: 0 }) + model_edge?: number; + + @ApiProperty({ required: false, default: 0 }) + calibrated_probability?: number; + + @ApiProperty({ required: false, default: 0 }) + odds_band_probability?: number; + + @ApiProperty({ required: false, default: 0 }) + odds_band_sample?: number; + + @ApiProperty({ required: false, default: 0 }) + odds_band_edge?: number; + + @ApiProperty({ required: false, default: false }) + odds_band_aligned?: boolean; + @ApiProperty() play_score: number; @@ -171,6 +192,9 @@ export class MatchPickDto { enum: ["CORE", "VALUE", "LEAN", "LONGSHOT", "PASS"], }) signal_tier?: SignalTier; + + @ApiProperty({ required: false, default: false }) + is_guaranteed?: boolean; } export class MatchBetAdviceDto { @@ -227,6 +251,27 @@ export class MatchBetSummaryItemDto { @ApiProperty({ required: false, default: 0 }) implied_prob?: number; + @ApiProperty({ required: false, default: 0 }) + model_probability?: number; + + @ApiProperty({ required: false, default: 0 }) + model_edge?: number; + + @ApiProperty({ required: false, default: 0 }) + calibrated_probability?: number; + + @ApiProperty({ required: false, default: 0 }) + odds_band_probability?: number; + + @ApiProperty({ required: false, default: 0 }) + odds_band_sample?: number; + + @ApiProperty({ required: false, default: 0 }) + odds_band_edge?: number; + + @ApiProperty({ required: false, default: false }) + odds_band_aligned?: boolean; + @ApiProperty({ required: false, default: 0 }) odds?: number; diff --git a/src/modules/predictions/predictions.service.ts b/src/modules/predictions/predictions.service.ts index b2c3dd0..6bc035c 100755 --- a/src/modules/predictions/predictions.service.ts +++ b/src/modules/predictions/predictions.service.ts @@ -60,7 +60,7 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { confidence_interval_too_wide_for_main_pick: "Ana seçim için güven aralığı çok geniş", confidence_band_low: "Güven bandı düşük", - playable_edge_found: "Oynanabilir avantaj bulundu", + playable_edge_found: "Model avantaj sinyali bulundu", market_signal_dominant: "Piyasa sinyali baskın", team_form_signal_dominant: "Takım formuna dayalı sinyaller çok baskın", lineup_signal_strong: "İlk on bir sinyali güçlü", @@ -77,7 +77,12 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { limited_data_confidence: "Veri kısıtlı olduğu için güven sınırlı", data_quality_issue: "Veri kalitesi sorunu var", high_risk_low_data_quality: "Risk yüksek, veri kalitesi düşük", - insufficient_play_score: "Oynanabilirlik puanı yetersiz", + insufficient_play_score: "Model sinyali yetersiz", + odds_band_confirms_value: "Tarihsel oran bandı değeri doğruluyor", + odds_band_sample_too_low: "Tarihsel oran bandı örneklemi yetersiz", + odds_band_missing_probability: "Tarihsel oran bandı olasılığı yok", + odds_band_unavailable: "Tarihsel oran bandı kullanılamıyor", + odds_band_not_aligned: "Model ve tarihsel oran bandı aynı yönde değil", no_bet_conditions_met: "Bahis koşulları oluşmadı", market_passed_all_gates: "Market tüm güvenlik kontrollerini geçti", no_ev_edge_minimum_stake: @@ -129,10 +134,7 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { private readonly feederService: FeederService, @Optional() private readonly predictionsQueue?: PredictionsQueue, ) { - this.aiEngineUrl = this.configService.get( - "AI_ENGINE_URL", - "http://localhost:8000", - ); + this.aiEngineUrl = this.resolveAiEngineUrl(); this.aiEngineClient = new AiEngineClient({ baseUrl: this.aiEngineUrl, logger: this.logger, @@ -421,6 +423,59 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { } } + private resolveAiEngineUrl(): string { + const configuredUrl = this.configService.get( + "AI_ENGINE_URL", + "http://localhost:8000", + ); + const localEnvUrl = this.readLocalEnvValue("AI_ENGINE_URL"); + + if ( + process.env.NODE_ENV !== "production" && + localEnvUrl && + localEnvUrl !== configuredUrl && + this.isLocalhostUrl(configuredUrl) && + this.isLocalhostUrl(localEnvUrl) + ) { + this.logger.warn( + `AI_ENGINE_URL inherited from parent process (${configuredUrl}) differs from .env.local (${localEnvUrl}); using .env.local for local development`, + ); + return localEnvUrl; + } + + return configuredUrl; + } + + private readLocalEnvValue(key: string): string | null { + const filePath = path.join(process.cwd(), ".env.local"); + if (!fs.existsSync(filePath)) { + return null; + } + + const line = fs + .readFileSync(filePath, "utf8") + .split(/\r?\n/u) + .find((entry) => entry.trim().startsWith(`${key}=`)); + + if (!line) { + return null; + } + + return line + .slice(line.indexOf("=") + 1) + .trim() + .replace(/^['"]|['"]$/gu, ""); + } + + private isLocalhostUrl(value: string): boolean { + try { + const url = new URL(value); + return ["localhost", "127.0.0.1", "::1"].includes(url.hostname); + } catch { + return false; + } + } + private async getMatchContext(matchId: string): Promise { const match = await this.prisma.match.findUnique({ where: { id: matchId }, @@ -705,6 +760,7 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { ), confidence_interval: interval, signal_tier: this.classifySignalTier(record, interval), + is_guaranteed: false, }; } @@ -793,7 +849,7 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { const evMatch = normalized.match(/^ev_edge_([-+][\d.]+%)_grade_(\w)$/); if (evMatch) { - return `Beklenen avantaj ${evMatch[1]} (Not ${evMatch[2]})`; + return `Teorik avantaj sinyali: Not ${evMatch[2]}`; } const negativeEdgeMatch = normalized.match( @@ -803,6 +859,13 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { return `Model avantajı negatif (${negativeEdgeMatch[1]})`; } + const bandNoValueMatch = normalized.match( + /^odds_band_no_value_([-+]?[\d.]+)$/, + ); + if (bandNoValueMatch) { + return `Tarihsel oran bandı değeri doğrulamadı (${bandNoValueMatch[1]})`; + } + const edgeThresholdMatch = normalized.match( /^below_market_edge_threshold_([-+]?[\d.]+)$/, ); @@ -1514,8 +1577,15 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { pick: item.pick, playable: item.playable, bet_grade: item.bet_grade, + odds: item.odds, + model_edge: item.model_edge, + calibrated_probability: item.calibrated_probability, calibrated_confidence: item.calibrated_confidence, ev_edge: item.ev_edge ?? 0, + odds_band_probability: item.odds_band_probability, + odds_band_sample: item.odds_band_sample, + odds_band_edge: item.odds_band_edge, + odds_band_aligned: item.odds_band_aligned, stake_units: item.stake_units, })) : []; @@ -1531,8 +1601,15 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { pick: payload.main_pick.pick, playable: payload.main_pick.playable, bet_grade: payload.main_pick.bet_grade, + odds: payload.main_pick.odds, + model_edge: payload.main_pick.model_edge, + calibrated_probability: payload.main_pick.calibrated_probability, calibrated_confidence: payload.main_pick.calibrated_confidence, ev_edge: payload.main_pick.ev_edge ?? 0, + odds_band_probability: payload.main_pick.odds_band_probability, + odds_band_sample: payload.main_pick.odds_band_sample, + odds_band_edge: payload.main_pick.odds_band_edge, + odds_band_aligned: payload.main_pick.odds_band_aligned, stake_units: payload.main_pick.stake_units, } : null, @@ -1542,6 +1619,8 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { pick: payload.value_pick.pick, playable: payload.value_pick.playable, bet_grade: payload.value_pick.bet_grade, + odds: payload.value_pick.odds, + model_edge: payload.value_pick.model_edge, calibrated_confidence: payload.value_pick.calibrated_confidence, ev_edge: payload.value_pick.ev_edge ?? 0, } diff --git a/src/tasks/data-fetcher.task.ts b/src/tasks/data-fetcher.task.ts index 52240b1..cb6ec2f 100755 --- a/src/tasks/data-fetcher.task.ts +++ b/src/tasks/data-fetcher.task.ts @@ -9,6 +9,7 @@ import * as path from "path"; import { Prisma } from "@prisma/client"; import { SidelinedResponse } from "../modules/feeder/feeder.types"; import { + deriveStoredMatchStatus, FINISHED_STATE_VALUES_FOR_DB, FINISHED_STATUS_VALUES_FOR_DB, LIVE_STATE_VALUES_FOR_DB, @@ -74,6 +75,17 @@ interface LiveLineupsJson { away: { xi: unknown[]; subs: unknown[] }; } +interface PendingPredictionRunForSettlement { + id: bigint; + matchId: string; + engineVersion: string; + payloadSummary: unknown; + scoreHome: number | null; + scoreAway: number | null; + htScoreHome: number | null; + htScoreAway: number | null; +} + type SportType = "football" | "basketball"; // ──────────────────────────────────────────────────────────────── @@ -187,6 +199,7 @@ export class DataFetcherTask { await this.syncMatchList(today); await this.syncMatchList(tomorrow); await this.updateLiveScores(); + await this.settlePredictionRuns(); await this.fetchOddsForMatches(); await this.fillMissingLineups(); @@ -263,13 +276,23 @@ export class DataFetcherTask { if (response.data?.data) { const matchData = response.data.data; + const scoreHome = matchData.homeScore ?? null; + const scoreAway = matchData.awayScore ?? null; + const storedStatus = deriveStoredMatchStatus({ + state: matchData.state, + status: matchData.status, + substate: matchData.substate, + scoreHome, + scoreAway, + }); await this.prisma.liveMatch.update({ where: { id: match.id }, data: { - scoreHome: matchData.homeScore ?? null, - scoreAway: matchData.awayScore ?? null, - state: matchData.state || matchData.status, - status: matchData.status, + scoreHome, + scoreAway, + state: matchData.state || null, + substate: matchData.substate || null, + status: storedStatus, updatedAt: new Date(), }, }); @@ -286,6 +309,292 @@ export class DataFetcherTask { } } + private async settlePredictionRuns(): Promise { + try { + const rows = await this.prisma.$queryRawUnsafe< + PendingPredictionRunForSettlement[] + >(` + SELECT + pr.id, + pr.match_id AS "matchId", + pr.engine_version AS "engineVersion", + pr.payload_summary AS "payloadSummary", + m.score_home AS "scoreHome", + m.score_away AS "scoreAway", + m.ht_score_home AS "htScoreHome", + m.ht_score_away AS "htScoreAway" + FROM prediction_runs pr + JOIN matches m ON m.id = pr.match_id + WHERE pr.eventual_outcome IS NULL + AND m.sport = 'football' + AND m.status = 'FT' + AND m.score_home IS NOT NULL + AND m.score_away IS NOT NULL + ORDER BY pr.generated_at ASC + LIMIT 500 + `); + + if (rows.length === 0) return; + + let settled = 0; + for (const row of rows) { + const result = this.resolvePredictionRunSettlement(row); + if (!result) continue; + const closingOddsSnapshot = await this.getClosingOddsSnapshot(row.matchId); + const settlementSummary = { + settled_at: new Date().toISOString(), + model_version: row.engineVersion, + outcome: result.outcome, + unit_profit: result.unitProfit, + final_score: { + home: row.scoreHome, + away: row.scoreAway, + }, + halftime_score: { + home: row.htScoreHome, + away: row.htScoreAway, + }, + closing_odds_snapshot: closingOddsSnapshot, + }; + + await this.prisma.$executeRawUnsafe( + ` + UPDATE prediction_runs + SET eventual_outcome = $1, + unit_profit = $2, + payload_summary = payload_summary || jsonb_build_object('settlement', $3::jsonb) + WHERE id = $4 + `, + result.outcome, + result.unitProfit, + JSON.stringify(settlementSummary), + row.id, + ); + settled++; + } + + if (settled > 0) { + this.logger.log(`Settled ${settled} prediction run(s)`); + } + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + this.logger.warn(`Prediction run settlement skipped: ${message}`); + } + } + + private async getClosingOddsSnapshot( + matchId: string, + ): Promise> { + const liveMatch = await this.prisma.liveMatch.findUnique({ + where: { id: matchId }, + select: { + odds: true, + oddsUpdatedAt: true, + status: true, + state: true, + scoreHome: true, + scoreAway: true, + }, + }); + + if (liveMatch?.odds) { + return { + source: "live_match", + odds: liveMatch.odds, + odds_updated_at: liveMatch.oddsUpdatedAt?.toISOString() ?? null, + status: liveMatch.status ?? null, + state: liveMatch.state ?? null, + score_home: liveMatch.scoreHome, + score_away: liveMatch.scoreAway, + }; + } + + const categories = await this.prisma.oddCategory.findMany({ + where: { matchId }, + select: { + name: true, + selections: { + select: { + name: true, + oddValue: true, + position: true, + updatedAt: true, + }, + orderBy: { position: "asc" }, + take: 12, + }, + }, + orderBy: { name: "asc" }, + take: 24, + }); + + return { + source: "odd_selections", + category_count: categories.length, + categories: categories.map((category) => ({ + name: category.name, + selections: category.selections.map((selection) => ({ + name: selection.name, + odd_value: selection.oddValue, + position: selection.position, + updated_at: selection.updatedAt?.toISOString() ?? null, + })), + })), + }; + } + + private resolvePredictionRunSettlement( + row: PendingPredictionRunForSettlement, + ): { outcome: string; unitProfit: number } | null { + const summary = this.asRecord(row.payloadSummary); + const mainPick = this.asRecord(summary.main_pick); + const market = String(mainPick.market || ""); + const pick = String(mainPick.pick || ""); + const playable = mainPick.playable === true; + const odds = Number(mainPick.odds || 0); + + if (!market || !pick || !playable || !Number.isFinite(odds) || odds <= 1.01) { + return { outcome: "NO_BET", unitProfit: 0 }; + } + + const won = this.isPredictionPickWon({ + market, + pick, + scoreHome: row.scoreHome, + scoreAway: row.scoreAway, + htScoreHome: row.htScoreHome, + htScoreAway: row.htScoreAway, + }); + + if (won === null) return null; + + return { + outcome: `${won ? "WON" : "LOST"}:${market}:${pick}`, + unitProfit: Number((won ? odds - 1 : -1).toFixed(4)), + }; + } + + private isPredictionPickWon(input: { + market: string; + pick: string; + scoreHome: number | null; + scoreAway: number | null; + htScoreHome: number | null; + htScoreAway: number | null; + }): boolean | null { + const market = input.market.toUpperCase(); + const pick = this.normalizePick(input.pick); + const scoreHome = input.scoreHome; + const scoreAway = input.scoreAway; + if (scoreHome === null || scoreAway === null) return null; + + if (market === "MS") { + if (pick === "1") return scoreHome > scoreAway; + if (pick === "X" || pick === "0") return scoreHome === scoreAway; + if (pick === "2") return scoreAway > scoreHome; + return null; + } + + if (market === "DC") { + const normalized = pick.replace("-", ""); + if (normalized === "1X") return scoreHome >= scoreAway; + if (normalized === "X2") return scoreAway >= scoreHome; + if (normalized === "12") return scoreHome !== scoreAway; + return null; + } + + if (market === "BTTS") { + const bothScored = scoreHome > 0 && scoreAway > 0; + if (pick.includes("VAR") || pick.includes("YES") || pick === "Y") { + return bothScored; + } + if (pick.includes("YOK") || pick.includes("NO") || pick === "N") { + return !bothScored; + } + return null; + } + + const goalLine = this.goalLineForMarket(market); + if (goalLine !== null) { + const total = + market.startsWith("HT_") + ? this.nullableSum(input.htScoreHome, input.htScoreAway) + : scoreHome + scoreAway; + if (total === null) return null; + if (this.isOverPick(pick)) return total > goalLine; + return total < goalLine; + } + + if (market === "HT") { + const htHome = input.htScoreHome; + const htAway = input.htScoreAway; + if (htHome === null || htAway === null) return null; + if (pick === "1") return htHome > htAway; + if (pick === "X" || pick === "0") return htHome === htAway; + if (pick === "2") return htAway > htHome; + } + + if (market === "HTFT") { + const htHome = input.htScoreHome; + const htAway = input.htScoreAway; + if (htHome === null || htAway === null || !pick.includes("/")) return null; + const [htPick, ftPick] = pick.split("/"); + return ( + this.isResultPickWon(htPick, htHome, htAway) === true && + this.isResultPickWon(ftPick, scoreHome, scoreAway) === true + ); + } + + return null; + } + + private isResultPickWon( + pick: string, + homeScore: number, + awayScore: number, + ): boolean | null { + if (pick === "1") return homeScore > awayScore; + if (pick === "X" || pick === "0") return homeScore === awayScore; + if (pick === "2") return awayScore > homeScore; + return null; + } + + private goalLineForMarket(market: string): number | null { + if (market === "OU15") return 1.5; + if (market === "OU25") return 2.5; + if (market === "OU35") return 3.5; + if (market === "HT_OU05") return 0.5; + if (market === "HT_OU15") return 1.5; + return null; + } + + private nullableSum(a: number | null, b: number | null): number | null { + if (a === null || b === null) return null; + return a + b; + } + + private normalizePick(value: string): string { + return value + .trim() + .toUpperCase() + .replace(/İ/g, "I") + .replace(/Ü/g, "U") + .replace(/Ş/g, "S") + .replace(/Ğ/g, "G") + .replace(/Ö/g, "O") + .replace(/Ç/g, "C"); + } + + private isOverPick(pick: string): boolean { + return pick.includes("UST") || pick.includes("OVER"); + } + + private asRecord(value: unknown): Record { + return value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : {}; + } + // Phase 3: Odds + referee + lineups + sidelined private async fetchOddsForMatches(): Promise { @@ -705,6 +1014,15 @@ export class DataFetcherTask { // Safe score parsing const sHome = this.asInt(match.homeScore ?? match.score?.home); const sAway = this.asInt(match.awayScore ?? match.score?.away); + const storedStatus = deriveStoredMatchStatus({ + state: match.state, + status: match.status, + substate: match.substate, + statusBoxContent: match.statusBoxContent, + scoreHome: sHome, + scoreAway: sAway, + score: match.score, + }); // Handle postponed matches (ERT = Erteledendi) if (match.statusBoxContent === "ERT") { @@ -733,7 +1051,7 @@ export class DataFetcherTask { leagueId: leagueId, state: match.state || null, substate: match.substate || null, - status: match.status || match.state || "NS", + status: storedStatus, scoreHome: sHome, scoreAway: sAway, homeTeamId: homeTeamId, @@ -748,7 +1066,7 @@ export class DataFetcherTask { leagueId: leagueId, state: match.state || null, substate: match.substate || null, - status: match.status || match.state || "NS", + status: storedStatus, mstUtc: BigInt(match.mstUtc || Date.now()), scoreHome: sHome, scoreAway: sAway,