244d8f5366
Deploy Iddaai Backend / build-and-deploy (push) Successful in 6s
- extract_training_data.py: switch from top_leagues.json (23) to qualified_leagues.json (265) - update_implied_odds.py: new script to backfill implied odds from real market data - train_score_model.py: rewrite with v25 102-feature set + temporal split - single_match_orchestrator.py: integrate ML score model with heuristic fallback
308 lines
10 KiB
Python
308 lines
10 KiB
Python
"""
|
||
Update Implied Odds in football_ai_features
|
||
=============================================
|
||
Populates implied_home, implied_draw, implied_away, implied_over25, implied_btts
|
||
from real odds data in odd_categories + odd_selections tables.
|
||
|
||
Also backfills form-based features (home_goals_avg_5, away_goals_avg_5, etc.)
|
||
from recent match history.
|
||
|
||
Usage:
|
||
python3 scripts/update_implied_odds.py
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import time
|
||
import psycopg2
|
||
from dotenv import load_dotenv
|
||
|
||
load_dotenv()
|
||
|
||
|
||
def get_conn():
|
||
db_url = os.getenv("DATABASE_URL", "").split("?schema=")[0]
|
||
return psycopg2.connect(db_url)
|
||
|
||
|
||
def update_implied_odds(conn):
|
||
"""Update implied probabilities from real odds data."""
|
||
cur = conn.cursor()
|
||
|
||
print("📊 Phase 1: Updating implied odds from real market data...")
|
||
t0 = time.time()
|
||
|
||
# Step 1: Build odds lookup from odd_categories + odd_selections
|
||
print(" Loading odds data...")
|
||
cur.execute("""
|
||
SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value
|
||
FROM odd_selections os
|
||
JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
|
||
WHERE os.odd_value IS NOT NULL
|
||
AND CAST(os.odd_value AS FLOAT) > 1.0
|
||
""")
|
||
|
||
odds_by_match = {}
|
||
row_count = 0
|
||
for match_id, cat_name, sel_name, odd_val in cur.fetchall():
|
||
try:
|
||
v = float(odd_val)
|
||
if v <= 1.0:
|
||
continue
|
||
except (ValueError, TypeError):
|
||
continue
|
||
|
||
if match_id not in odds_by_match:
|
||
odds_by_match[match_id] = {}
|
||
|
||
cat_lower = (cat_name or "").lower().strip()
|
||
sel_lower = (sel_name or "").lower().strip()
|
||
|
||
# Match Result (1X2)
|
||
if cat_lower == 'maç sonucu':
|
||
if sel_name == '1':
|
||
odds_by_match[match_id]['ms_h'] = v
|
||
elif sel_name in ('0', 'X'):
|
||
odds_by_match[match_id]['ms_d'] = v
|
||
elif sel_name == '2':
|
||
odds_by_match[match_id]['ms_a'] = v
|
||
|
||
# Over/Under 2.5
|
||
elif cat_lower == '2,5 alt/üst':
|
||
if 'üst' in sel_lower:
|
||
odds_by_match[match_id]['ou25_o'] = v
|
||
elif 'alt' in sel_lower:
|
||
odds_by_match[match_id]['ou25_u'] = v
|
||
|
||
# BTTS
|
||
elif cat_lower == 'karşılıklı gol':
|
||
if 'var' in sel_lower:
|
||
odds_by_match[match_id]['btts_y'] = v
|
||
elif 'yok' in sel_lower:
|
||
odds_by_match[match_id]['btts_n'] = v
|
||
|
||
row_count += 1
|
||
|
||
print(f" Loaded odds for {len(odds_by_match)} matches ({row_count} selections) in {time.time()-t0:.1f}s")
|
||
|
||
# Step 2: Calculate implied probabilities and update
|
||
print(" Calculating implied probabilities...")
|
||
|
||
# Get all match_ids in football_ai_features
|
||
cur.execute("SELECT match_id FROM football_ai_features")
|
||
feature_match_ids = {row[0] for row in cur.fetchall()}
|
||
|
||
updated = 0
|
||
batch_size = 500
|
||
updates = []
|
||
|
||
for match_id in feature_match_ids:
|
||
odds = odds_by_match.get(match_id, {})
|
||
if not odds:
|
||
continue
|
||
|
||
# Implied MS probabilities (vig-free normalization)
|
||
ms_h = odds.get('ms_h', 0)
|
||
ms_d = odds.get('ms_d', 0)
|
||
ms_a = odds.get('ms_a', 0)
|
||
|
||
implied_home = 0.33
|
||
implied_draw = 0.33
|
||
implied_away = 0.33
|
||
|
||
if ms_h > 1.0 and ms_d > 1.0 and ms_a > 1.0:
|
||
raw_sum = (1 / ms_h) + (1 / ms_d) + (1 / ms_a)
|
||
if raw_sum > 0:
|
||
implied_home = round((1 / ms_h) / raw_sum, 4)
|
||
implied_draw = round((1 / ms_d) / raw_sum, 4)
|
||
implied_away = round((1 / ms_a) / raw_sum, 4)
|
||
|
||
# Implied OU25
|
||
ou25_o = odds.get('ou25_o', 0)
|
||
ou25_u = odds.get('ou25_u', 0)
|
||
implied_over25 = 0.50
|
||
|
||
if ou25_o > 1.0 and ou25_u > 1.0:
|
||
raw_sum = (1 / ou25_o) + (1 / ou25_u)
|
||
if raw_sum > 0:
|
||
implied_over25 = round((1 / ou25_o) / raw_sum, 4)
|
||
|
||
# Implied BTTS
|
||
btts_y = odds.get('btts_y', 0)
|
||
btts_n = odds.get('btts_n', 0)
|
||
implied_btts = 0.50
|
||
|
||
if btts_y > 1.0 and btts_n > 1.0:
|
||
raw_sum = (1 / btts_y) + (1 / btts_n)
|
||
if raw_sum > 0:
|
||
implied_btts = round((1 / btts_y) / raw_sum, 4)
|
||
|
||
# Only update if we have real data (not all defaults)
|
||
has_real_data = (ms_h > 1.0 or ou25_o > 1.0 or btts_y > 1.0)
|
||
if not has_real_data:
|
||
continue
|
||
|
||
updates.append((
|
||
implied_home, implied_draw, implied_away,
|
||
implied_over25, implied_btts, match_id
|
||
))
|
||
|
||
if len(updates) >= batch_size:
|
||
cur.executemany("""
|
||
UPDATE football_ai_features
|
||
SET implied_home = %s,
|
||
implied_draw = %s,
|
||
implied_away = %s,
|
||
implied_over25 = %s,
|
||
implied_btts = %s
|
||
WHERE match_id = %s
|
||
""", updates)
|
||
updated += len(updates)
|
||
updates = []
|
||
|
||
# Final batch
|
||
if updates:
|
||
cur.executemany("""
|
||
UPDATE football_ai_features
|
||
SET implied_home = %s,
|
||
implied_draw = %s,
|
||
implied_away = %s,
|
||
implied_over25 = %s,
|
||
implied_btts = %s
|
||
WHERE match_id = %s
|
||
""", updates)
|
||
updated += len(updates)
|
||
|
||
conn.commit()
|
||
print(f" ✅ Updated implied odds for {updated} matches in {time.time()-t0:.1f}s")
|
||
return updated
|
||
|
||
|
||
def update_form_features(conn):
|
||
"""Backfill form-based features (goals avg, clean sheet rate) from match history."""
|
||
cur = conn.cursor()
|
||
|
||
print("\n📊 Phase 2: Updating form-based features...")
|
||
t0 = time.time()
|
||
|
||
# Load all finished football matches ordered by time
|
||
print(" Loading match history...")
|
||
cur.execute("""
|
||
SELECT id, home_team_id, away_team_id, score_home, score_away, mst_utc
|
||
FROM matches
|
||
WHERE status = 'FT'
|
||
AND score_home IS NOT NULL
|
||
AND sport = 'football'
|
||
ORDER BY mst_utc ASC
|
||
""")
|
||
|
||
matches = cur.fetchall()
|
||
print(f" Loaded {len(matches)} finished matches")
|
||
|
||
# Build team history incrementally
|
||
from collections import defaultdict
|
||
team_history = defaultdict(list) # team_id -> [(goals_scored, goals_conceded)]
|
||
|
||
# Get all feature match IDs
|
||
cur.execute("SELECT match_id FROM football_ai_features")
|
||
feature_match_ids = {row[0] for row in cur.fetchall()}
|
||
|
||
updated = 0
|
||
batch_size = 500
|
||
updates = []
|
||
|
||
for match_id, home_id, away_id, score_home, score_away, mst_utc in matches:
|
||
# Calculate features BEFORE updating history (pre-match features)
|
||
if match_id in feature_match_ids:
|
||
h_hist = team_history[home_id][-5:] # last 5
|
||
a_hist = team_history[away_id][-5:]
|
||
|
||
# Home team form
|
||
if h_hist:
|
||
h_goals_avg = sum(g for g, _ in h_hist) / len(h_hist)
|
||
h_conceded_avg = sum(c for _, c in h_hist) / len(h_hist)
|
||
h_cs_rate = sum(1 for _, c in h_hist if c == 0) / len(h_hist)
|
||
h_scoring_rate = sum(1 for g, _ in h_hist if g > 0) / len(h_hist)
|
||
else:
|
||
h_goals_avg, h_conceded_avg = 1.3, 1.2
|
||
h_cs_rate, h_scoring_rate = 0.25, 0.75
|
||
|
||
# Away team form
|
||
if a_hist:
|
||
a_goals_avg = sum(g for g, _ in a_hist) / len(a_hist)
|
||
a_conceded_avg = sum(c for _, c in a_hist) / len(a_hist)
|
||
a_cs_rate = sum(1 for _, c in a_hist if c == 0) / len(a_hist)
|
||
a_scoring_rate = sum(1 for g, _ in a_hist if g > 0) / len(a_hist)
|
||
else:
|
||
a_goals_avg, a_conceded_avg = 1.3, 1.2
|
||
a_cs_rate, a_scoring_rate = 0.25, 0.75
|
||
|
||
updates.append((
|
||
round(h_goals_avg, 3), round(h_conceded_avg, 3),
|
||
round(h_cs_rate, 3), round(h_scoring_rate, 3),
|
||
round(a_goals_avg, 3), round(a_conceded_avg, 3),
|
||
round(a_cs_rate, 3), round(a_scoring_rate, 3),
|
||
match_id
|
||
))
|
||
|
||
if len(updates) >= batch_size:
|
||
cur.executemany("""
|
||
UPDATE football_ai_features
|
||
SET home_goals_avg_5 = %s,
|
||
home_conceded_avg_5 = %s,
|
||
home_clean_sheet_rate = %s,
|
||
home_scoring_rate = %s,
|
||
away_goals_avg_5 = %s,
|
||
away_conceded_avg_5 = %s,
|
||
away_clean_sheet_rate = %s,
|
||
away_scoring_rate = %s
|
||
WHERE match_id = %s
|
||
""", updates)
|
||
updated += len(updates)
|
||
updates = []
|
||
|
||
# Update history AFTER feature extraction (maintains pre-match invariant)
|
||
team_history[home_id].append((score_home, score_away))
|
||
team_history[away_id].append((score_away, score_home))
|
||
|
||
# Final batch
|
||
if updates:
|
||
cur.executemany("""
|
||
UPDATE football_ai_features
|
||
SET home_goals_avg_5 = %s,
|
||
home_conceded_avg_5 = %s,
|
||
home_clean_sheet_rate = %s,
|
||
home_scoring_rate = %s,
|
||
away_goals_avg_5 = %s,
|
||
away_conceded_avg_5 = %s,
|
||
away_clean_sheet_rate = %s,
|
||
away_scoring_rate = %s
|
||
WHERE match_id = %s
|
||
""", updates)
|
||
updated += len(updates)
|
||
|
||
conn.commit()
|
||
print(f" ✅ Updated form features for {updated} matches in {time.time()-t0:.1f}s")
|
||
return updated
|
||
|
||
|
||
def main():
|
||
print("🚀 Football AI Features — Implied Odds & Form Backfill")
|
||
print("=" * 60)
|
||
|
||
conn = get_conn()
|
||
|
||
try:
|
||
odds_updated = update_implied_odds(conn)
|
||
form_updated = update_form_features(conn)
|
||
|
||
print(f"\n✅ DONE!")
|
||
print(f" Implied odds updated: {odds_updated} matches")
|
||
print(f" Form features updated: {form_updated} matches")
|
||
finally:
|
||
conn.close()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|