main
Deploy Iddaai Backend / build-and-deploy (push) Successful in 37s

This commit is contained in:
2026-05-17 02:17:22 +03:00
parent 17ace9bd12
commit 94c7a4481a
53 changed files with 29602 additions and 7832 deletions
+176 -15
View File
@@ -128,7 +128,40 @@ FEATURE_COLS = [
"home_top_scorer_form", "away_top_scorer_form",
"home_avg_player_exp", "away_avg_player_exp",
"home_goals_diversity", "away_goals_diversity",
# V27 H2H Expanded (4)
"h2h_home_goals_avg", "h2h_away_goals_avg",
"h2h_recent_trend", "h2h_venue_advantage",
# V27 Rolling Stats (13)
"home_rolling5_goals", "home_rolling5_conceded",
"home_rolling10_goals", "home_rolling10_conceded",
"home_rolling20_goals", "home_rolling20_conceded",
"away_rolling5_goals", "away_rolling5_conceded",
"away_rolling10_goals", "away_rolling10_conceded",
"home_rolling5_cs", "away_rolling5_cs",
# V27 Venue Stats (4)
"home_venue_goals", "home_venue_conceded",
"away_venue_goals", "away_venue_conceded",
# V27 Goal Trend (2)
"home_goal_trend", "away_goal_trend",
# V27 Calendar (5)
"home_days_rest", "away_days_rest",
"match_month", "is_season_start", "is_season_end",
# V27 Interaction (6)
"attack_vs_defense_home", "attack_vs_defense_away",
"xg_diff", "form_momentum_interaction",
"elo_form_consistency", "upset_x_elo_gap",
# V27 League Expanded (5)
"league_home_win_rate", "league_draw_rate",
"league_btts_rate", "league_ou25_rate",
"league_reliability_score",
# Labels
"score_home", "score_away", "total_goals",
"ht_score_home", "ht_score_away", "ht_total_goals",
@@ -296,6 +329,10 @@ class BatchDataLoader:
SELECT league_id,
AVG(score_home + score_away) as avg_goals,
AVG(CASE WHEN score_home = 0 AND score_away = 0 THEN 1.0 ELSE 0.0 END) as zero_rate,
AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END) as home_win_rate,
AVG(CASE WHEN score_home = score_away THEN 1.0 ELSE 0.0 END) as draw_rate,
AVG(CASE WHEN score_home > 0 AND score_away > 0 THEN 1.0 ELSE 0.0 END) as btts_rate,
AVG(CASE WHEN score_home + score_away > 2.5 THEN 1.0 ELSE 0.0 END) as ou25_rate,
COUNT(*) as match_count
FROM matches
WHERE status = 'FT'
@@ -304,12 +341,17 @@ class BatchDataLoader:
AND league_id IN ({ph})
GROUP BY league_id
""", self.top_league_ids)
for league_id, avg_goals, zero_rate, cnt in self.cur.fetchall():
for row in self.cur.fetchall():
league_id, avg_goals, zero_rate, home_win_rate, draw_rate, btts_rate, ou25_rate, cnt = row
self.league_stats_cache[league_id] = {
"avg_goals": float(avg_goals) if avg_goals else 2.5,
"zero_rate": float(zero_rate) if zero_rate else 0.07,
"match_count": cnt
"home_win_rate": float(home_win_rate) if home_win_rate else 0.45,
"draw_rate": float(draw_rate) if draw_rate else 0.25,
"btts_rate": float(btts_rate) if btts_rate else 0.50,
"ou25_rate": float(ou25_rate) if ou25_rate else 0.50,
"match_count": cnt,
}
def _load_team_history(self):
@@ -666,6 +708,9 @@ class FeatureExtractor:
print(f"\n🔄 Extracting features for {total} matches...", flush=True)
_last_print = t_start
_PRINT_INTERVAL = 60 # her dakika bir ilerleme
# Process chronologically — ELO grows as we go
for i, m in enumerate(matches):
(
@@ -683,17 +728,25 @@ class FeatureExtractor:
league_name,
) = m
if i % 100 == 0 and i > 0:
elapsed = time.time() - t_start
rate = i / elapsed # matches per second
now = time.time()
if now - _last_print >= _PRINT_INTERVAL and i > 0:
elapsed = now - t_start
rate = i / elapsed
remaining = (total - i) / rate if rate > 0 else 0
pct = i / total * 100
pct = i / total * 100
eta_h = int(remaining // 3600)
eta_m = int((remaining % 3600) // 60)
eta_s = int(remaining % 60)
eta_str = (f"{eta_h}s {eta_m}dk" if eta_h else f"{eta_m}dk {eta_s}s")
print(
f" [{i}/{total}] ({pct:.0f}%) | {rate:.1f} maç/s | "
f"ETA: {remaining/60:.1f} dk | skipped: {skipped} | "
f"dq_rejected: {dq_rejected}",
f" [{i:>6}/{total}] %{pct:>4.1f} | "
f"{rate:.1f} maç/s | "
f"bitti: {len(rows):,} | "
f"atlanan: {skipped+dq_rejected} | "
f"ETA: {eta_str}",
flush=True,
)
_last_print = now
row = self._extract_one(
mid, hid, aid, sh, sa, hth, hta, mst, lid,
@@ -882,7 +935,10 @@ class FeatureExtractor:
}
# === LEAGUE FEATURES ===
league = self.loader.league_stats_cache.get(lid, {"avg_goals": 2.5, "zero_rate": 0.07})
league = self.loader.league_stats_cache.get(lid, {
"avg_goals": 2.5, "zero_rate": 0.07, "home_win_rate": 0.45,
"draw_rate": 0.25, "btts_rate": 0.50, "ou25_rate": 0.50, "match_count": 0,
})
league_features = {
"league_avg_goals": league["avg_goals"],
"league_zero_goal_rate": league["zero_rate"],
@@ -953,6 +1009,11 @@ class FeatureExtractor:
home_goals_form = home_sq.get('goals_form', 0)
away_goals_form = away_sq.get('goals_form', 0)
# === V27 ROLLING / VENUE / CALENDAR FEATURES ===
v27 = self._compute_v27_features(hid, aid, mst, elo_features, form_features,
home_momentum_score, away_momentum_score,
upset_feats, h2h_features, league)
# === ASSEMBLE ROW ===
row = {
"match_id": mid,
@@ -960,13 +1021,13 @@ class FeatureExtractor:
"away_team_id": aid,
"league_id": lid,
"mst_utc": mst,
**elo_features,
**form_features,
**h2h_features,
**stats_features,
**odds_features,
"home_xga": form_features["home_conceded_avg"],
"away_xga": form_features["away_conceded_avg"],
**league_features,
@@ -1007,7 +1068,10 @@ class FeatureExtractor:
"away_avg_player_exp": away_sq.get('avg_player_exp', 0.0),
"home_goals_diversity": home_sq.get('goals_diversity', 0.0),
"away_goals_diversity": away_sq.get('goals_diversity', 0.0),
# V27 Features
**v27,
# Labels
"score_home": sh,
"score_away": sa,
@@ -1033,6 +1097,103 @@ class FeatureExtractor:
return row
def _compute_v27_features(self, hid, aid, mst, elo_features, form_features,
home_momentum, away_momentum, upset_feats, h2h_features, league):
"""Compute V27 rolling, venue, calendar, interaction features from pre-loaded data."""
home_history = self.loader.team_matches.get(hid, [])
away_history = self.loader.team_matches.get(aid, [])
def _rolling(history, n):
recent = [m for m in history if m[0] < mst][-n:]
if not recent:
return 1.3, 1.1, 0.0
goals = sum(m[2] for m in recent) / len(recent)
conceded = sum(m[3] for m in recent) / len(recent)
cs = sum(1 for m in recent if m[3] == 0) / len(recent)
return round(goals, 3), round(conceded, 3), round(cs, 3)
def _venue(history, is_home):
recent = [m for m in history if m[0] < mst and m[1] == is_home][-10:]
if not recent:
return 1.3, 1.1
goals = sum(m[2] for m in recent) / len(recent)
conceded = sum(m[3] for m in recent) / len(recent)
return round(goals, 3), round(conceded, 3)
def _days_rest(history):
prior = [m[0] for m in history if m[0] < mst]
if not prior:
return 7.0
last = prior[-1]
return round(min((mst - last) / 86400000.0, 30.0), 1)
h5g, h5c, h5cs = _rolling(home_history, 5)
h10g, h10c, _ = _rolling(home_history, 10)
h20g, h20c, _ = _rolling(home_history, 20)
a5g, a5c, a5cs = _rolling(away_history, 5)
a10g, a10c, _ = _rolling(away_history, 10)
hvg, hvc = _venue(home_history, True)
avg, avc = _venue(away_history, False)
home_rest = _days_rest(home_history)
away_rest = _days_rest(away_history)
import datetime
match_dt = datetime.datetime.utcfromtimestamp(mst / 1000)
match_month = match_dt.month
elo_diff = elo_features["elo_diff"]
form_elo_diff = elo_features["form_elo_diff"]
mom_diff = home_momentum - away_momentum
home_conceded = form_features["home_conceded_avg"]
away_conceded = form_features["away_conceded_avg"]
home_goals = form_features["home_goals_avg"]
away_goals = form_features["away_goals_avg"]
upset_potential = upset_feats.get("upset_potential", 0.0)
h2h_prior = [m for m in home_history if m[0] < mst and m[4] == aid]
h2h_home_goals_avg = sum(m[2] for m in h2h_prior) / len(h2h_prior) if h2h_prior else 1.3
h2h_away_goals_avg = sum(m[3] for m in h2h_prior) / len(h2h_prior) if h2h_prior else 1.1
recent_h2h = h2h_prior[-3:]
h2h_recent_trend = sum(1 if m[2] > m[3] else -1 if m[2] < m[3] else 0 for m in recent_h2h) / max(len(recent_h2h), 1)
venue_h2h = [m for m in h2h_prior if m[1]]
h2h_venue_advantage = sum(1 if m[2] > m[3] else 0 for m in venue_h2h) / max(len(venue_h2h), 1) if venue_h2h else 0.5
league_count = league.get("match_count", 0)
return {
"h2h_home_goals_avg": round(h2h_home_goals_avg, 3),
"h2h_away_goals_avg": round(h2h_away_goals_avg, 3),
"h2h_recent_trend": round(h2h_recent_trend, 3),
"h2h_venue_advantage": round(h2h_venue_advantage, 3),
"home_rolling5_goals": h5g, "home_rolling5_conceded": h5c,
"home_rolling10_goals": h10g, "home_rolling10_conceded": h10c,
"home_rolling20_goals": h20g, "home_rolling20_conceded": h20c,
"away_rolling5_goals": a5g, "away_rolling5_conceded": a5c,
"away_rolling10_goals": a10g, "away_rolling10_conceded": a10c,
"home_rolling5_cs": h5cs, "away_rolling5_cs": a5cs,
"home_venue_goals": hvg, "home_venue_conceded": hvc,
"away_venue_goals": avg, "away_venue_conceded": avc,
"home_goal_trend": round(h5g - h10g, 3),
"away_goal_trend": round(a5g - a10g, 3),
"home_days_rest": home_rest, "away_days_rest": away_rest,
"match_month": float(match_month),
"is_season_start": 1.0 if match_month in (7, 8, 9) else 0.0,
"is_season_end": 1.0 if match_month in (5, 6) else 0.0,
"attack_vs_defense_home": round(home_goals - away_conceded, 3),
"attack_vs_defense_away": round(away_goals - home_conceded, 3),
"xg_diff": round(home_conceded - away_conceded, 3),
"form_momentum_interaction": round(mom_diff * form_elo_diff / 1000.0, 4),
"elo_form_consistency": round(1.0 - abs(elo_diff - form_elo_diff) / max(abs(elo_diff), 100.0), 4),
"upset_x_elo_gap": round(upset_potential * abs(elo_diff) / 500.0, 4),
"league_home_win_rate": league.get("home_win_rate", 0.45),
"league_draw_rate": league.get("draw_rate", 0.25),
"league_btts_rate": league.get("btts_rate", 0.50),
"league_ou25_rate": league.get("ou25_rate", 0.50),
"league_reliability_score": min(1.0, league_count / 500.0) if league_count else 0.3,
}
def _validate_row_quality(
self,
row: dict,