""" Update Implied Odds in football_ai_features ============================================= Populates implied_home, implied_draw, implied_away, implied_over25, implied_btts from real odds data in odd_categories + odd_selections tables. Also backfills form-based features (home_goals_avg_5, away_goals_avg_5, etc.) from recent match history. Usage: python3 scripts/update_implied_odds.py """ import os import sys import time import psycopg2 from dotenv import load_dotenv load_dotenv() def get_conn(): db_url = os.getenv("DATABASE_URL", "").split("?schema=")[0] return psycopg2.connect(db_url) def update_implied_odds(conn): """Update implied probabilities from real odds data.""" cur = conn.cursor() print("📊 Phase 1: Updating implied odds from real market data...") t0 = time.time() # Step 1: Build odds lookup from odd_categories + odd_selections print(" Loading odds data...") cur.execute(""" SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value FROM odd_selections os JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id WHERE os.odd_value IS NOT NULL AND CAST(os.odd_value AS FLOAT) > 1.0 """) odds_by_match = {} row_count = 0 for match_id, cat_name, sel_name, odd_val in cur.fetchall(): try: v = float(odd_val) if v <= 1.0: continue except (ValueError, TypeError): continue if match_id not in odds_by_match: odds_by_match[match_id] = {} cat_lower = (cat_name or "").lower().strip() sel_lower = (sel_name or "").lower().strip() # Match Result (1X2) if cat_lower == 'maç sonucu': if sel_name == '1': odds_by_match[match_id]['ms_h'] = v elif sel_name in ('0', 'X'): odds_by_match[match_id]['ms_d'] = v elif sel_name == '2': odds_by_match[match_id]['ms_a'] = v # Over/Under 2.5 elif cat_lower == '2,5 alt/üst': if 'üst' in sel_lower: odds_by_match[match_id]['ou25_o'] = v elif 'alt' in sel_lower: odds_by_match[match_id]['ou25_u'] = v # BTTS elif cat_lower == 'karşılıklı gol': if 'var' in sel_lower: odds_by_match[match_id]['btts_y'] = v elif 'yok' in sel_lower: odds_by_match[match_id]['btts_n'] = v row_count += 1 print(f" Loaded odds for {len(odds_by_match)} matches ({row_count} selections) in {time.time()-t0:.1f}s") # Step 2: Calculate implied probabilities and update print(" Calculating implied probabilities...") # Get all match_ids in football_ai_features cur.execute("SELECT match_id FROM football_ai_features") feature_match_ids = {row[0] for row in cur.fetchall()} updated = 0 batch_size = 500 updates = [] for match_id in feature_match_ids: odds = odds_by_match.get(match_id, {}) if not odds: continue # Implied MS probabilities (vig-free normalization) ms_h = odds.get('ms_h', 0) ms_d = odds.get('ms_d', 0) ms_a = odds.get('ms_a', 0) implied_home = 0.33 implied_draw = 0.33 implied_away = 0.33 if ms_h > 1.0 and ms_d > 1.0 and ms_a > 1.0: raw_sum = (1 / ms_h) + (1 / ms_d) + (1 / ms_a) if raw_sum > 0: implied_home = round((1 / ms_h) / raw_sum, 4) implied_draw = round((1 / ms_d) / raw_sum, 4) implied_away = round((1 / ms_a) / raw_sum, 4) # Implied OU25 ou25_o = odds.get('ou25_o', 0) ou25_u = odds.get('ou25_u', 0) implied_over25 = 0.50 if ou25_o > 1.0 and ou25_u > 1.0: raw_sum = (1 / ou25_o) + (1 / ou25_u) if raw_sum > 0: implied_over25 = round((1 / ou25_o) / raw_sum, 4) # Implied BTTS btts_y = odds.get('btts_y', 0) btts_n = odds.get('btts_n', 0) implied_btts = 0.50 if btts_y > 1.0 and btts_n > 1.0: raw_sum = (1 / btts_y) + (1 / btts_n) if raw_sum > 0: implied_btts = round((1 / btts_y) / raw_sum, 4) # Only update if we have real data (not all defaults) has_real_data = (ms_h > 1.0 or ou25_o > 1.0 or btts_y > 1.0) if not has_real_data: continue updates.append(( implied_home, implied_draw, implied_away, implied_over25, implied_btts, match_id )) if len(updates) >= batch_size: cur.executemany(""" UPDATE football_ai_features SET implied_home = %s, implied_draw = %s, implied_away = %s, implied_over25 = %s, implied_btts_yes = %s WHERE match_id = %s """, updates) updated += len(updates) updates = [] # Final batch if updates: cur.executemany(""" UPDATE football_ai_features SET implied_home = %s, implied_draw = %s, implied_away = %s, implied_over25 = %s, implied_btts_yes = %s WHERE match_id = %s """, updates) updated += len(updates) conn.commit() print(f" ✅ Updated implied odds for {updated} matches in {time.time()-t0:.1f}s") return updated def update_form_features(conn): """Backfill form-based features (goals avg, clean sheet rate) from match history.""" cur = conn.cursor() print("\n📊 Phase 2: Updating form-based features...") t0 = time.time() # Load all finished football matches ordered by time print(" Loading match history...") cur.execute(""" SELECT id, home_team_id, away_team_id, score_home, score_away, mst_utc FROM matches WHERE status = 'FT' AND score_home IS NOT NULL AND sport = 'football' ORDER BY mst_utc ASC """) matches = cur.fetchall() print(f" Loaded {len(matches)} finished matches") # Build team history incrementally from collections import defaultdict team_history = defaultdict(list) # team_id -> [(goals_scored, goals_conceded)] # Get all feature match IDs cur.execute("SELECT match_id FROM football_ai_features") feature_match_ids = {row[0] for row in cur.fetchall()} updated = 0 batch_size = 500 updates = [] for match_id, home_id, away_id, score_home, score_away, mst_utc in matches: # Calculate features BEFORE updating history (pre-match features) if match_id in feature_match_ids: h_hist = team_history[home_id][-5:] # last 5 a_hist = team_history[away_id][-5:] # Home team form if h_hist: h_goals_avg = sum(g for g, _ in h_hist) / len(h_hist) h_conceded_avg = sum(c for _, c in h_hist) / len(h_hist) h_cs_rate = sum(1 for _, c in h_hist if c == 0) / len(h_hist) h_scoring_rate = sum(1 for g, _ in h_hist if g > 0) / len(h_hist) else: h_goals_avg, h_conceded_avg = 1.3, 1.2 h_cs_rate, h_scoring_rate = 0.25, 0.75 # Away team form if a_hist: a_goals_avg = sum(g for g, _ in a_hist) / len(a_hist) a_conceded_avg = sum(c for _, c in a_hist) / len(a_hist) a_cs_rate = sum(1 for _, c in a_hist if c == 0) / len(a_hist) a_scoring_rate = sum(1 for g, _ in a_hist if g > 0) / len(a_hist) else: a_goals_avg, a_conceded_avg = 1.3, 1.2 a_cs_rate, a_scoring_rate = 0.25, 0.75 updates.append(( round(h_goals_avg, 3), round(h_conceded_avg, 3), round(h_cs_rate, 3), round(h_scoring_rate, 3), round(a_goals_avg, 3), round(a_conceded_avg, 3), round(a_cs_rate, 3), round(a_scoring_rate, 3), match_id )) if len(updates) >= batch_size: cur.executemany(""" UPDATE football_ai_features SET home_goals_avg_5 = %s, home_conceded_avg_5 = %s, home_clean_sheet_rate = %s, home_scoring_rate = %s, away_goals_avg_5 = %s, away_conceded_avg_5 = %s, away_clean_sheet_rate = %s, away_scoring_rate = %s WHERE match_id = %s """, updates) updated += len(updates) updates = [] # Update history AFTER feature extraction (maintains pre-match invariant) team_history[home_id].append((score_home, score_away)) team_history[away_id].append((score_away, score_home)) # Final batch if updates: cur.executemany(""" UPDATE football_ai_features SET home_goals_avg_5 = %s, home_conceded_avg_5 = %s, home_clean_sheet_rate = %s, home_scoring_rate = %s, away_goals_avg_5 = %s, away_conceded_avg_5 = %s, away_clean_sheet_rate = %s, away_scoring_rate = %s WHERE match_id = %s """, updates) updated += len(updates) conn.commit() print(f" ✅ Updated form features for {updated} matches in {time.time()-t0:.1f}s") return updated def main(): print("🚀 Football AI Features — Implied Odds & Form Backfill") print("=" * 60) conn = get_conn() try: odds_updated = update_implied_odds(conn) form_updated = update_form_features(conn) print(f"\n✅ DONE!") print(f" Implied odds updated: {odds_updated} matches") print(f" Form features updated: {form_updated} matches") finally: conn.close() if __name__ == "__main__": main()