feat(ai): expand training to 68K+ matches, add score model, backfill implied odds
Deploy Iddaai Backend / build-and-deploy (push) Successful in 6s
Deploy Iddaai Backend / build-and-deploy (push) Successful in 6s
- extract_training_data.py: switch from top_leagues.json (23) to qualified_leagues.json (265) - update_implied_odds.py: new script to backfill implied odds from real market data - train_score_model.py: rewrite with v25 102-feature set + temporal split - single_match_orchestrator.py: integrate ML score model with heuristic fallback
This commit is contained in:
@@ -0,0 +1,307 @@
|
||||
"""
|
||||
Update Implied Odds in football_ai_features
|
||||
=============================================
|
||||
Populates implied_home, implied_draw, implied_away, implied_over25, implied_btts
|
||||
from real odds data in odd_categories + odd_selections tables.
|
||||
|
||||
Also backfills form-based features (home_goals_avg_5, away_goals_avg_5, etc.)
|
||||
from recent match history.
|
||||
|
||||
Usage:
|
||||
python3 scripts/update_implied_odds.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import psycopg2
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def get_conn():
|
||||
db_url = os.getenv("DATABASE_URL", "").split("?schema=")[0]
|
||||
return psycopg2.connect(db_url)
|
||||
|
||||
|
||||
def update_implied_odds(conn):
|
||||
"""Update implied probabilities from real odds data."""
|
||||
cur = conn.cursor()
|
||||
|
||||
print("📊 Phase 1: Updating implied odds from real market data...")
|
||||
t0 = time.time()
|
||||
|
||||
# Step 1: Build odds lookup from odd_categories + odd_selections
|
||||
print(" Loading odds data...")
|
||||
cur.execute("""
|
||||
SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value
|
||||
FROM odd_selections os
|
||||
JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
|
||||
WHERE os.odd_value IS NOT NULL
|
||||
AND CAST(os.odd_value AS FLOAT) > 1.0
|
||||
""")
|
||||
|
||||
odds_by_match = {}
|
||||
row_count = 0
|
||||
for match_id, cat_name, sel_name, odd_val in cur.fetchall():
|
||||
try:
|
||||
v = float(odd_val)
|
||||
if v <= 1.0:
|
||||
continue
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
if match_id not in odds_by_match:
|
||||
odds_by_match[match_id] = {}
|
||||
|
||||
cat_lower = (cat_name or "").lower().strip()
|
||||
sel_lower = (sel_name or "").lower().strip()
|
||||
|
||||
# Match Result (1X2)
|
||||
if cat_lower == 'maç sonucu':
|
||||
if sel_name == '1':
|
||||
odds_by_match[match_id]['ms_h'] = v
|
||||
elif sel_name in ('0', 'X'):
|
||||
odds_by_match[match_id]['ms_d'] = v
|
||||
elif sel_name == '2':
|
||||
odds_by_match[match_id]['ms_a'] = v
|
||||
|
||||
# Over/Under 2.5
|
||||
elif cat_lower == '2,5 alt/üst':
|
||||
if 'üst' in sel_lower:
|
||||
odds_by_match[match_id]['ou25_o'] = v
|
||||
elif 'alt' in sel_lower:
|
||||
odds_by_match[match_id]['ou25_u'] = v
|
||||
|
||||
# BTTS
|
||||
elif cat_lower == 'karşılıklı gol':
|
||||
if 'var' in sel_lower:
|
||||
odds_by_match[match_id]['btts_y'] = v
|
||||
elif 'yok' in sel_lower:
|
||||
odds_by_match[match_id]['btts_n'] = v
|
||||
|
||||
row_count += 1
|
||||
|
||||
print(f" Loaded odds for {len(odds_by_match)} matches ({row_count} selections) in {time.time()-t0:.1f}s")
|
||||
|
||||
# Step 2: Calculate implied probabilities and update
|
||||
print(" Calculating implied probabilities...")
|
||||
|
||||
# Get all match_ids in football_ai_features
|
||||
cur.execute("SELECT match_id FROM football_ai_features")
|
||||
feature_match_ids = {row[0] for row in cur.fetchall()}
|
||||
|
||||
updated = 0
|
||||
batch_size = 500
|
||||
updates = []
|
||||
|
||||
for match_id in feature_match_ids:
|
||||
odds = odds_by_match.get(match_id, {})
|
||||
if not odds:
|
||||
continue
|
||||
|
||||
# Implied MS probabilities (vig-free normalization)
|
||||
ms_h = odds.get('ms_h', 0)
|
||||
ms_d = odds.get('ms_d', 0)
|
||||
ms_a = odds.get('ms_a', 0)
|
||||
|
||||
implied_home = 0.33
|
||||
implied_draw = 0.33
|
||||
implied_away = 0.33
|
||||
|
||||
if ms_h > 1.0 and ms_d > 1.0 and ms_a > 1.0:
|
||||
raw_sum = (1 / ms_h) + (1 / ms_d) + (1 / ms_a)
|
||||
if raw_sum > 0:
|
||||
implied_home = round((1 / ms_h) / raw_sum, 4)
|
||||
implied_draw = round((1 / ms_d) / raw_sum, 4)
|
||||
implied_away = round((1 / ms_a) / raw_sum, 4)
|
||||
|
||||
# Implied OU25
|
||||
ou25_o = odds.get('ou25_o', 0)
|
||||
ou25_u = odds.get('ou25_u', 0)
|
||||
implied_over25 = 0.50
|
||||
|
||||
if ou25_o > 1.0 and ou25_u > 1.0:
|
||||
raw_sum = (1 / ou25_o) + (1 / ou25_u)
|
||||
if raw_sum > 0:
|
||||
implied_over25 = round((1 / ou25_o) / raw_sum, 4)
|
||||
|
||||
# Implied BTTS
|
||||
btts_y = odds.get('btts_y', 0)
|
||||
btts_n = odds.get('btts_n', 0)
|
||||
implied_btts = 0.50
|
||||
|
||||
if btts_y > 1.0 and btts_n > 1.0:
|
||||
raw_sum = (1 / btts_y) + (1 / btts_n)
|
||||
if raw_sum > 0:
|
||||
implied_btts = round((1 / btts_y) / raw_sum, 4)
|
||||
|
||||
# Only update if we have real data (not all defaults)
|
||||
has_real_data = (ms_h > 1.0 or ou25_o > 1.0 or btts_y > 1.0)
|
||||
if not has_real_data:
|
||||
continue
|
||||
|
||||
updates.append((
|
||||
implied_home, implied_draw, implied_away,
|
||||
implied_over25, implied_btts, match_id
|
||||
))
|
||||
|
||||
if len(updates) >= batch_size:
|
||||
cur.executemany("""
|
||||
UPDATE football_ai_features
|
||||
SET implied_home = %s,
|
||||
implied_draw = %s,
|
||||
implied_away = %s,
|
||||
implied_over25 = %s,
|
||||
implied_btts = %s
|
||||
WHERE match_id = %s
|
||||
""", updates)
|
||||
updated += len(updates)
|
||||
updates = []
|
||||
|
||||
# Final batch
|
||||
if updates:
|
||||
cur.executemany("""
|
||||
UPDATE football_ai_features
|
||||
SET implied_home = %s,
|
||||
implied_draw = %s,
|
||||
implied_away = %s,
|
||||
implied_over25 = %s,
|
||||
implied_btts = %s
|
||||
WHERE match_id = %s
|
||||
""", updates)
|
||||
updated += len(updates)
|
||||
|
||||
conn.commit()
|
||||
print(f" ✅ Updated implied odds for {updated} matches in {time.time()-t0:.1f}s")
|
||||
return updated
|
||||
|
||||
|
||||
def update_form_features(conn):
|
||||
"""Backfill form-based features (goals avg, clean sheet rate) from match history."""
|
||||
cur = conn.cursor()
|
||||
|
||||
print("\n📊 Phase 2: Updating form-based features...")
|
||||
t0 = time.time()
|
||||
|
||||
# Load all finished football matches ordered by time
|
||||
print(" Loading match history...")
|
||||
cur.execute("""
|
||||
SELECT id, home_team_id, away_team_id, score_home, score_away, mst_utc
|
||||
FROM matches
|
||||
WHERE status = 'FT'
|
||||
AND score_home IS NOT NULL
|
||||
AND sport = 'football'
|
||||
ORDER BY mst_utc ASC
|
||||
""")
|
||||
|
||||
matches = cur.fetchall()
|
||||
print(f" Loaded {len(matches)} finished matches")
|
||||
|
||||
# Build team history incrementally
|
||||
from collections import defaultdict
|
||||
team_history = defaultdict(list) # team_id -> [(goals_scored, goals_conceded)]
|
||||
|
||||
# Get all feature match IDs
|
||||
cur.execute("SELECT match_id FROM football_ai_features")
|
||||
feature_match_ids = {row[0] for row in cur.fetchall()}
|
||||
|
||||
updated = 0
|
||||
batch_size = 500
|
||||
updates = []
|
||||
|
||||
for match_id, home_id, away_id, score_home, score_away, mst_utc in matches:
|
||||
# Calculate features BEFORE updating history (pre-match features)
|
||||
if match_id in feature_match_ids:
|
||||
h_hist = team_history[home_id][-5:] # last 5
|
||||
a_hist = team_history[away_id][-5:]
|
||||
|
||||
# Home team form
|
||||
if h_hist:
|
||||
h_goals_avg = sum(g for g, _ in h_hist) / len(h_hist)
|
||||
h_conceded_avg = sum(c for _, c in h_hist) / len(h_hist)
|
||||
h_cs_rate = sum(1 for _, c in h_hist if c == 0) / len(h_hist)
|
||||
h_scoring_rate = sum(1 for g, _ in h_hist if g > 0) / len(h_hist)
|
||||
else:
|
||||
h_goals_avg, h_conceded_avg = 1.3, 1.2
|
||||
h_cs_rate, h_scoring_rate = 0.25, 0.75
|
||||
|
||||
# Away team form
|
||||
if a_hist:
|
||||
a_goals_avg = sum(g for g, _ in a_hist) / len(a_hist)
|
||||
a_conceded_avg = sum(c for _, c in a_hist) / len(a_hist)
|
||||
a_cs_rate = sum(1 for _, c in a_hist if c == 0) / len(a_hist)
|
||||
a_scoring_rate = sum(1 for g, _ in a_hist if g > 0) / len(a_hist)
|
||||
else:
|
||||
a_goals_avg, a_conceded_avg = 1.3, 1.2
|
||||
a_cs_rate, a_scoring_rate = 0.25, 0.75
|
||||
|
||||
updates.append((
|
||||
round(h_goals_avg, 3), round(h_conceded_avg, 3),
|
||||
round(h_cs_rate, 3), round(h_scoring_rate, 3),
|
||||
round(a_goals_avg, 3), round(a_conceded_avg, 3),
|
||||
round(a_cs_rate, 3), round(a_scoring_rate, 3),
|
||||
match_id
|
||||
))
|
||||
|
||||
if len(updates) >= batch_size:
|
||||
cur.executemany("""
|
||||
UPDATE football_ai_features
|
||||
SET home_goals_avg_5 = %s,
|
||||
home_conceded_avg_5 = %s,
|
||||
home_clean_sheet_rate = %s,
|
||||
home_scoring_rate = %s,
|
||||
away_goals_avg_5 = %s,
|
||||
away_conceded_avg_5 = %s,
|
||||
away_clean_sheet_rate = %s,
|
||||
away_scoring_rate = %s
|
||||
WHERE match_id = %s
|
||||
""", updates)
|
||||
updated += len(updates)
|
||||
updates = []
|
||||
|
||||
# Update history AFTER feature extraction (maintains pre-match invariant)
|
||||
team_history[home_id].append((score_home, score_away))
|
||||
team_history[away_id].append((score_away, score_home))
|
||||
|
||||
# Final batch
|
||||
if updates:
|
||||
cur.executemany("""
|
||||
UPDATE football_ai_features
|
||||
SET home_goals_avg_5 = %s,
|
||||
home_conceded_avg_5 = %s,
|
||||
home_clean_sheet_rate = %s,
|
||||
home_scoring_rate = %s,
|
||||
away_goals_avg_5 = %s,
|
||||
away_conceded_avg_5 = %s,
|
||||
away_clean_sheet_rate = %s,
|
||||
away_scoring_rate = %s
|
||||
WHERE match_id = %s
|
||||
""", updates)
|
||||
updated += len(updates)
|
||||
|
||||
conn.commit()
|
||||
print(f" ✅ Updated form features for {updated} matches in {time.time()-t0:.1f}s")
|
||||
return updated
|
||||
|
||||
|
||||
def main():
|
||||
print("🚀 Football AI Features — Implied Odds & Form Backfill")
|
||||
print("=" * 60)
|
||||
|
||||
conn = get_conn()
|
||||
|
||||
try:
|
||||
odds_updated = update_implied_odds(conn)
|
||||
form_updated = update_form_features(conn)
|
||||
|
||||
print(f"\n✅ DONE!")
|
||||
print(f" Implied odds updated: {odds_updated} matches")
|
||||
print(f" Form features updated: {form_updated} matches")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user