feat(ai): expand training to 68K+ matches, add score model, backfill implied odds
Deploy Iddaai Backend / build-and-deploy (push) Successful in 6s

- extract_training_data.py: switch from top_leagues.json (23) to qualified_leagues.json (265)
- update_implied_odds.py: new script to backfill implied odds from real market data
- train_score_model.py: rewrite with v25 102-feature set + temporal split
- single_match_orchestrator.py: integrate ML score model with heuristic fallback
This commit is contained in:
2026-05-05 16:04:00 +03:00
parent 9bb8f39bca
commit 244d8f5366
4 changed files with 626 additions and 173 deletions
+307
View File
@@ -0,0 +1,307 @@
"""
Update Implied Odds in football_ai_features
=============================================
Populates implied_home, implied_draw, implied_away, implied_over25, implied_btts
from real odds data in odd_categories + odd_selections tables.
Also backfills form-based features (home_goals_avg_5, away_goals_avg_5, etc.)
from recent match history.
Usage:
python3 scripts/update_implied_odds.py
"""
import os
import sys
import time
import psycopg2
from dotenv import load_dotenv
load_dotenv()
def get_conn():
db_url = os.getenv("DATABASE_URL", "").split("?schema=")[0]
return psycopg2.connect(db_url)
def update_implied_odds(conn):
"""Update implied probabilities from real odds data."""
cur = conn.cursor()
print("📊 Phase 1: Updating implied odds from real market data...")
t0 = time.time()
# Step 1: Build odds lookup from odd_categories + odd_selections
print(" Loading odds data...")
cur.execute("""
SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value
FROM odd_selections os
JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
WHERE os.odd_value IS NOT NULL
AND CAST(os.odd_value AS FLOAT) > 1.0
""")
odds_by_match = {}
row_count = 0
for match_id, cat_name, sel_name, odd_val in cur.fetchall():
try:
v = float(odd_val)
if v <= 1.0:
continue
except (ValueError, TypeError):
continue
if match_id not in odds_by_match:
odds_by_match[match_id] = {}
cat_lower = (cat_name or "").lower().strip()
sel_lower = (sel_name or "").lower().strip()
# Match Result (1X2)
if cat_lower == 'maç sonucu':
if sel_name == '1':
odds_by_match[match_id]['ms_h'] = v
elif sel_name in ('0', 'X'):
odds_by_match[match_id]['ms_d'] = v
elif sel_name == '2':
odds_by_match[match_id]['ms_a'] = v
# Over/Under 2.5
elif cat_lower == '2,5 alt/üst':
if 'üst' in sel_lower:
odds_by_match[match_id]['ou25_o'] = v
elif 'alt' in sel_lower:
odds_by_match[match_id]['ou25_u'] = v
# BTTS
elif cat_lower == 'karşılıklı gol':
if 'var' in sel_lower:
odds_by_match[match_id]['btts_y'] = v
elif 'yok' in sel_lower:
odds_by_match[match_id]['btts_n'] = v
row_count += 1
print(f" Loaded odds for {len(odds_by_match)} matches ({row_count} selections) in {time.time()-t0:.1f}s")
# Step 2: Calculate implied probabilities and update
print(" Calculating implied probabilities...")
# Get all match_ids in football_ai_features
cur.execute("SELECT match_id FROM football_ai_features")
feature_match_ids = {row[0] for row in cur.fetchall()}
updated = 0
batch_size = 500
updates = []
for match_id in feature_match_ids:
odds = odds_by_match.get(match_id, {})
if not odds:
continue
# Implied MS probabilities (vig-free normalization)
ms_h = odds.get('ms_h', 0)
ms_d = odds.get('ms_d', 0)
ms_a = odds.get('ms_a', 0)
implied_home = 0.33
implied_draw = 0.33
implied_away = 0.33
if ms_h > 1.0 and ms_d > 1.0 and ms_a > 1.0:
raw_sum = (1 / ms_h) + (1 / ms_d) + (1 / ms_a)
if raw_sum > 0:
implied_home = round((1 / ms_h) / raw_sum, 4)
implied_draw = round((1 / ms_d) / raw_sum, 4)
implied_away = round((1 / ms_a) / raw_sum, 4)
# Implied OU25
ou25_o = odds.get('ou25_o', 0)
ou25_u = odds.get('ou25_u', 0)
implied_over25 = 0.50
if ou25_o > 1.0 and ou25_u > 1.0:
raw_sum = (1 / ou25_o) + (1 / ou25_u)
if raw_sum > 0:
implied_over25 = round((1 / ou25_o) / raw_sum, 4)
# Implied BTTS
btts_y = odds.get('btts_y', 0)
btts_n = odds.get('btts_n', 0)
implied_btts = 0.50
if btts_y > 1.0 and btts_n > 1.0:
raw_sum = (1 / btts_y) + (1 / btts_n)
if raw_sum > 0:
implied_btts = round((1 / btts_y) / raw_sum, 4)
# Only update if we have real data (not all defaults)
has_real_data = (ms_h > 1.0 or ou25_o > 1.0 or btts_y > 1.0)
if not has_real_data:
continue
updates.append((
implied_home, implied_draw, implied_away,
implied_over25, implied_btts, match_id
))
if len(updates) >= batch_size:
cur.executemany("""
UPDATE football_ai_features
SET implied_home = %s,
implied_draw = %s,
implied_away = %s,
implied_over25 = %s,
implied_btts = %s
WHERE match_id = %s
""", updates)
updated += len(updates)
updates = []
# Final batch
if updates:
cur.executemany("""
UPDATE football_ai_features
SET implied_home = %s,
implied_draw = %s,
implied_away = %s,
implied_over25 = %s,
implied_btts = %s
WHERE match_id = %s
""", updates)
updated += len(updates)
conn.commit()
print(f" ✅ Updated implied odds for {updated} matches in {time.time()-t0:.1f}s")
return updated
def update_form_features(conn):
"""Backfill form-based features (goals avg, clean sheet rate) from match history."""
cur = conn.cursor()
print("\n📊 Phase 2: Updating form-based features...")
t0 = time.time()
# Load all finished football matches ordered by time
print(" Loading match history...")
cur.execute("""
SELECT id, home_team_id, away_team_id, score_home, score_away, mst_utc
FROM matches
WHERE status = 'FT'
AND score_home IS NOT NULL
AND sport = 'football'
ORDER BY mst_utc ASC
""")
matches = cur.fetchall()
print(f" Loaded {len(matches)} finished matches")
# Build team history incrementally
from collections import defaultdict
team_history = defaultdict(list) # team_id -> [(goals_scored, goals_conceded)]
# Get all feature match IDs
cur.execute("SELECT match_id FROM football_ai_features")
feature_match_ids = {row[0] for row in cur.fetchall()}
updated = 0
batch_size = 500
updates = []
for match_id, home_id, away_id, score_home, score_away, mst_utc in matches:
# Calculate features BEFORE updating history (pre-match features)
if match_id in feature_match_ids:
h_hist = team_history[home_id][-5:] # last 5
a_hist = team_history[away_id][-5:]
# Home team form
if h_hist:
h_goals_avg = sum(g for g, _ in h_hist) / len(h_hist)
h_conceded_avg = sum(c for _, c in h_hist) / len(h_hist)
h_cs_rate = sum(1 for _, c in h_hist if c == 0) / len(h_hist)
h_scoring_rate = sum(1 for g, _ in h_hist if g > 0) / len(h_hist)
else:
h_goals_avg, h_conceded_avg = 1.3, 1.2
h_cs_rate, h_scoring_rate = 0.25, 0.75
# Away team form
if a_hist:
a_goals_avg = sum(g for g, _ in a_hist) / len(a_hist)
a_conceded_avg = sum(c for _, c in a_hist) / len(a_hist)
a_cs_rate = sum(1 for _, c in a_hist if c == 0) / len(a_hist)
a_scoring_rate = sum(1 for g, _ in a_hist if g > 0) / len(a_hist)
else:
a_goals_avg, a_conceded_avg = 1.3, 1.2
a_cs_rate, a_scoring_rate = 0.25, 0.75
updates.append((
round(h_goals_avg, 3), round(h_conceded_avg, 3),
round(h_cs_rate, 3), round(h_scoring_rate, 3),
round(a_goals_avg, 3), round(a_conceded_avg, 3),
round(a_cs_rate, 3), round(a_scoring_rate, 3),
match_id
))
if len(updates) >= batch_size:
cur.executemany("""
UPDATE football_ai_features
SET home_goals_avg_5 = %s,
home_conceded_avg_5 = %s,
home_clean_sheet_rate = %s,
home_scoring_rate = %s,
away_goals_avg_5 = %s,
away_conceded_avg_5 = %s,
away_clean_sheet_rate = %s,
away_scoring_rate = %s
WHERE match_id = %s
""", updates)
updated += len(updates)
updates = []
# Update history AFTER feature extraction (maintains pre-match invariant)
team_history[home_id].append((score_home, score_away))
team_history[away_id].append((score_away, score_home))
# Final batch
if updates:
cur.executemany("""
UPDATE football_ai_features
SET home_goals_avg_5 = %s,
home_conceded_avg_5 = %s,
home_clean_sheet_rate = %s,
home_scoring_rate = %s,
away_goals_avg_5 = %s,
away_conceded_avg_5 = %s,
away_clean_sheet_rate = %s,
away_scoring_rate = %s
WHERE match_id = %s
""", updates)
updated += len(updates)
conn.commit()
print(f" ✅ Updated form features for {updated} matches in {time.time()-t0:.1f}s")
return updated
def main():
print("🚀 Football AI Features — Implied Odds & Form Backfill")
print("=" * 60)
conn = get_conn()
try:
odds_updated = update_implied_odds(conn)
form_updated = update_form_features(conn)
print(f"\n✅ DONE!")
print(f" Implied odds updated: {odds_updated} matches")
print(f" Form features updated: {form_updated} matches")
finally:
conn.close()
if __name__ == "__main__":
main()