Files
fahricansecer 5645b38f20
Deploy Iddaai Backend / build-and-deploy (push) Successful in 32s
main
2026-05-05 17:09:11 +03:00

308 lines
10 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Update Implied Odds in football_ai_features
=============================================
Populates implied_home, implied_draw, implied_away, implied_over25, implied_btts
from real odds data in odd_categories + odd_selections tables.
Also backfills form-based features (home_goals_avg_5, away_goals_avg_5, etc.)
from recent match history.
Usage:
python3 scripts/update_implied_odds.py
"""
import os
import sys
import time
import psycopg2
from dotenv import load_dotenv
load_dotenv()
def get_conn():
db_url = os.getenv("DATABASE_URL", "").split("?schema=")[0]
return psycopg2.connect(db_url)
def update_implied_odds(conn):
"""Update implied probabilities from real odds data."""
cur = conn.cursor()
print("📊 Phase 1: Updating implied odds from real market data...")
t0 = time.time()
# Step 1: Build odds lookup from odd_categories + odd_selections
print(" Loading odds data...")
cur.execute("""
SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value
FROM odd_selections os
JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
WHERE os.odd_value IS NOT NULL
AND CAST(os.odd_value AS FLOAT) > 1.0
""")
odds_by_match = {}
row_count = 0
for match_id, cat_name, sel_name, odd_val in cur.fetchall():
try:
v = float(odd_val)
if v <= 1.0:
continue
except (ValueError, TypeError):
continue
if match_id not in odds_by_match:
odds_by_match[match_id] = {}
cat_lower = (cat_name or "").lower().strip()
sel_lower = (sel_name or "").lower().strip()
# Match Result (1X2)
if cat_lower == 'maç sonucu':
if sel_name == '1':
odds_by_match[match_id]['ms_h'] = v
elif sel_name in ('0', 'X'):
odds_by_match[match_id]['ms_d'] = v
elif sel_name == '2':
odds_by_match[match_id]['ms_a'] = v
# Over/Under 2.5
elif cat_lower == '2,5 alt/üst':
if 'üst' in sel_lower:
odds_by_match[match_id]['ou25_o'] = v
elif 'alt' in sel_lower:
odds_by_match[match_id]['ou25_u'] = v
# BTTS
elif cat_lower == 'karşılıklı gol':
if 'var' in sel_lower:
odds_by_match[match_id]['btts_y'] = v
elif 'yok' in sel_lower:
odds_by_match[match_id]['btts_n'] = v
row_count += 1
print(f" Loaded odds for {len(odds_by_match)} matches ({row_count} selections) in {time.time()-t0:.1f}s")
# Step 2: Calculate implied probabilities and update
print(" Calculating implied probabilities...")
# Get all match_ids in football_ai_features
cur.execute("SELECT match_id FROM football_ai_features")
feature_match_ids = {row[0] for row in cur.fetchall()}
updated = 0
batch_size = 500
updates = []
for match_id in feature_match_ids:
odds = odds_by_match.get(match_id, {})
if not odds:
continue
# Implied MS probabilities (vig-free normalization)
ms_h = odds.get('ms_h', 0)
ms_d = odds.get('ms_d', 0)
ms_a = odds.get('ms_a', 0)
implied_home = 0.33
implied_draw = 0.33
implied_away = 0.33
if ms_h > 1.0 and ms_d > 1.0 and ms_a > 1.0:
raw_sum = (1 / ms_h) + (1 / ms_d) + (1 / ms_a)
if raw_sum > 0:
implied_home = round((1 / ms_h) / raw_sum, 4)
implied_draw = round((1 / ms_d) / raw_sum, 4)
implied_away = round((1 / ms_a) / raw_sum, 4)
# Implied OU25
ou25_o = odds.get('ou25_o', 0)
ou25_u = odds.get('ou25_u', 0)
implied_over25 = 0.50
if ou25_o > 1.0 and ou25_u > 1.0:
raw_sum = (1 / ou25_o) + (1 / ou25_u)
if raw_sum > 0:
implied_over25 = round((1 / ou25_o) / raw_sum, 4)
# Implied BTTS
btts_y = odds.get('btts_y', 0)
btts_n = odds.get('btts_n', 0)
implied_btts = 0.50
if btts_y > 1.0 and btts_n > 1.0:
raw_sum = (1 / btts_y) + (1 / btts_n)
if raw_sum > 0:
implied_btts = round((1 / btts_y) / raw_sum, 4)
# Only update if we have real data (not all defaults)
has_real_data = (ms_h > 1.0 or ou25_o > 1.0 or btts_y > 1.0)
if not has_real_data:
continue
updates.append((
implied_home, implied_draw, implied_away,
implied_over25, implied_btts, match_id
))
if len(updates) >= batch_size:
cur.executemany("""
UPDATE football_ai_features
SET implied_home = %s,
implied_draw = %s,
implied_away = %s,
implied_over25 = %s,
implied_btts_yes = %s
WHERE match_id = %s
""", updates)
updated += len(updates)
updates = []
# Final batch
if updates:
cur.executemany("""
UPDATE football_ai_features
SET implied_home = %s,
implied_draw = %s,
implied_away = %s,
implied_over25 = %s,
implied_btts_yes = %s
WHERE match_id = %s
""", updates)
updated += len(updates)
conn.commit()
print(f" ✅ Updated implied odds for {updated} matches in {time.time()-t0:.1f}s")
return updated
def update_form_features(conn):
"""Backfill form-based features (goals avg, clean sheet rate) from match history."""
cur = conn.cursor()
print("\n📊 Phase 2: Updating form-based features...")
t0 = time.time()
# Load all finished football matches ordered by time
print(" Loading match history...")
cur.execute("""
SELECT id, home_team_id, away_team_id, score_home, score_away, mst_utc
FROM matches
WHERE status = 'FT'
AND score_home IS NOT NULL
AND sport = 'football'
ORDER BY mst_utc ASC
""")
matches = cur.fetchall()
print(f" Loaded {len(matches)} finished matches")
# Build team history incrementally
from collections import defaultdict
team_history = defaultdict(list) # team_id -> [(goals_scored, goals_conceded)]
# Get all feature match IDs
cur.execute("SELECT match_id FROM football_ai_features")
feature_match_ids = {row[0] for row in cur.fetchall()}
updated = 0
batch_size = 500
updates = []
for match_id, home_id, away_id, score_home, score_away, mst_utc in matches:
# Calculate features BEFORE updating history (pre-match features)
if match_id in feature_match_ids:
h_hist = team_history[home_id][-5:] # last 5
a_hist = team_history[away_id][-5:]
# Home team form
if h_hist:
h_goals_avg = sum(g for g, _ in h_hist) / len(h_hist)
h_conceded_avg = sum(c for _, c in h_hist) / len(h_hist)
h_cs_rate = sum(1 for _, c in h_hist if c == 0) / len(h_hist)
h_scoring_rate = sum(1 for g, _ in h_hist if g > 0) / len(h_hist)
else:
h_goals_avg, h_conceded_avg = 1.3, 1.2
h_cs_rate, h_scoring_rate = 0.25, 0.75
# Away team form
if a_hist:
a_goals_avg = sum(g for g, _ in a_hist) / len(a_hist)
a_conceded_avg = sum(c for _, c in a_hist) / len(a_hist)
a_cs_rate = sum(1 for _, c in a_hist if c == 0) / len(a_hist)
a_scoring_rate = sum(1 for g, _ in a_hist if g > 0) / len(a_hist)
else:
a_goals_avg, a_conceded_avg = 1.3, 1.2
a_cs_rate, a_scoring_rate = 0.25, 0.75
updates.append((
round(h_goals_avg, 3), round(h_conceded_avg, 3),
round(h_cs_rate, 3), round(h_scoring_rate, 3),
round(a_goals_avg, 3), round(a_conceded_avg, 3),
round(a_cs_rate, 3), round(a_scoring_rate, 3),
match_id
))
if len(updates) >= batch_size:
cur.executemany("""
UPDATE football_ai_features
SET home_goals_avg_5 = %s,
home_conceded_avg_5 = %s,
home_clean_sheet_rate = %s,
home_scoring_rate = %s,
away_goals_avg_5 = %s,
away_conceded_avg_5 = %s,
away_clean_sheet_rate = %s,
away_scoring_rate = %s
WHERE match_id = %s
""", updates)
updated += len(updates)
updates = []
# Update history AFTER feature extraction (maintains pre-match invariant)
team_history[home_id].append((score_home, score_away))
team_history[away_id].append((score_away, score_home))
# Final batch
if updates:
cur.executemany("""
UPDATE football_ai_features
SET home_goals_avg_5 = %s,
home_conceded_avg_5 = %s,
home_clean_sheet_rate = %s,
home_scoring_rate = %s,
away_goals_avg_5 = %s,
away_conceded_avg_5 = %s,
away_clean_sheet_rate = %s,
away_scoring_rate = %s
WHERE match_id = %s
""", updates)
updated += len(updates)
conn.commit()
print(f" ✅ Updated form features for {updated} matches in {time.time()-t0:.1f}s")
return updated
def main():
print("🚀 Football AI Features — Implied Odds & Form Backfill")
print("=" * 60)
conn = get_conn()
try:
odds_updated = update_implied_odds(conn)
form_updated = update_form_features(conn)
print(f"\n✅ DONE!")
print(f" Implied odds updated: {odds_updated} matches")
print(f" Form features updated: {form_updated} matches")
finally:
conn.close()
if __name__ == "__main__":
main()