This commit is contained in:
2026-04-23 22:22:59 +03:00
parent df428ed1e8
commit 634204acf0
6 changed files with 2064 additions and 90 deletions
+240
View File
@@ -36,6 +36,11 @@ class FeatureEnrichmentService:
'avg_goals': 2.5,
'btts_rate': 0.5,
'over25_rate': 0.5,
# V27 expanded
'home_goals_avg': 1.3,
'away_goals_avg': 1.1,
'recent_trend': 0.0,
'venue_advantage': 0.0,
}
_DEFAULT_FORM = {
'clean_sheet_rate': 0.2,
@@ -53,6 +58,25 @@ class FeatureEnrichmentService:
_DEFAULT_LEAGUE = {
'avg_goals': 2.7,
'zero_goal_rate': 0.07,
# V27 expanded
'home_win_rate': 0.46,
'draw_rate': 0.26,
'btts_rate': 0.50,
'ou25_rate': 0.50,
'reliability_score': 0.0,
}
_DEFAULT_ROLLING = {
'rolling5_goals': 1.3,
'rolling5_conceded': 1.2,
'rolling10_goals': 1.3,
'rolling10_conceded': 1.2,
'rolling20_goals': 1.3,
'rolling20_conceded': 1.2,
'rolling5_cs': 0.2,
}
_DEFAULT_VENUE = {
'venue_goals': 1.4,
'venue_conceded': 1.1,
}
# ─── 1. Team Stats ──────────────────────────────────────────────
@@ -186,6 +210,13 @@ class FeatureEnrichmentService:
total_goals = 0
btts_count = 0
over25_count = 0
# V27 expanded trackers
home_team_goals_list = []
away_team_goals_list = []
home_team_venue_wins = 0
home_team_venue_total = 0
away_team_venue_wins = 0
away_team_venue_total = 0
for row in rows:
sh = int(row['score_home'])
@@ -195,14 +226,22 @@ class FeatureEnrichmentService:
# Normalise: who is "home team" in THIS prediction context
if str(row['home_team_id']) == home_team_id:
home_team_goals_list.append(sh)
away_team_goals_list.append(sa)
home_team_venue_total += 1
if sh > sa:
home_wins += 1
home_team_venue_wins += 1
elif sh == sa:
draws += 1
else:
# Reversed fixture: away_team was at home
home_team_goals_list.append(sa)
away_team_goals_list.append(sh)
away_team_venue_total += 1
if sa > sh:
home_wins += 1
away_team_venue_wins += 1
elif sh == sa:
draws += 1
@@ -211,6 +250,29 @@ class FeatureEnrichmentService:
if match_goals > 2:
over25_count += 1
# V27: recent_trend = last-5 home_win_rate - first-5 home_win_rate
recent_trend = 0.0
if total >= 6:
recent_5_wins = sum(
1 for r in rows[:5]
if (str(r['home_team_id']) == home_team_id and int(r['score_home']) > int(r['score_away']))
or (str(r['home_team_id']) != home_team_id and int(r['score_away']) > int(r['score_home']))
)
older_5_wins = sum(
1 for r in rows[-5:]
if (str(r['home_team_id']) == home_team_id and int(r['score_home']) > int(r['score_away']))
or (str(r['home_team_id']) != home_team_id and int(r['score_away']) > int(r['score_home']))
)
recent_trend = (recent_5_wins - older_5_wins) / 5.0
# V27: venue_advantage = home_win_rate_at_home - home_win_rate_away
venue_advantage = 0.0
if home_team_venue_total > 0 and away_team_venue_total > 0:
venue_advantage = (
home_team_venue_wins / home_team_venue_total
- away_team_venue_wins / away_team_venue_total
)
return {
'total_matches': total,
'home_win_rate': home_wins / total,
@@ -218,6 +280,11 @@ class FeatureEnrichmentService:
'avg_goals': total_goals / total,
'btts_rate': btts_count / total,
'over25_rate': over25_count / total,
# V27 expanded
'home_goals_avg': _safe_avg(home_team_goals_list, 1.3),
'away_goals_avg': _safe_avg(away_team_goals_list, 1.1),
'recent_trend': round(recent_trend, 4),
'venue_advantage': round(venue_advantage, 4),
}
# ─── 3. Form & Streaks ──────────────────────────────────────────
@@ -433,6 +500,10 @@ class FeatureEnrichmentService:
total = len(rows)
total_goals = 0
zero_goal_matches = 0
home_wins = 0
draw_count = 0
btts_count = 0
over25_count = 0
for row in rows:
sh = int(row['score_home'])
@@ -441,10 +512,24 @@ class FeatureEnrichmentService:
total_goals += match_goals
if match_goals == 0:
zero_goal_matches += 1
if sh > sa:
home_wins += 1
elif sh == sa:
draw_count += 1
if sh > 0 and sa > 0:
btts_count += 1
if match_goals > 2:
over25_count += 1
return {
'avg_goals': total_goals / total,
'zero_goal_rate': zero_goal_matches / total,
# V27 expanded
'home_win_rate': home_wins / total,
'draw_rate': draw_count / total,
'btts_rate': btts_count / total,
'ou25_rate': over25_count / total,
'reliability_score': min(total / 50.0, 1.0),
}
# ─── 6. Momentum ───────────────────────────────────────────────
@@ -514,6 +599,161 @@ class FeatureEnrichmentService:
return round(weighted_score / max_possible, 4)
# ─── 7. Rolling Stats (V27) ─────────────────────────────────────
def compute_rolling_stats(
self,
cur: RealDictCursor,
team_id: str,
before_date_ms: int,
) -> Dict[str, float]:
"""
Rolling goal averages and clean-sheet rates over the last 5/10/20 matches.
Single DB query, three windows computed programmatically.
"""
if not team_id:
return dict(self._DEFAULT_ROLLING)
try:
cur.execute(
"""
SELECT
m.home_team_id,
m.score_home,
m.score_away
FROM matches m
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT 20
""",
(team_id, team_id, before_date_ms),
)
rows = cur.fetchall()
except Exception:
return dict(self._DEFAULT_ROLLING)
if not rows:
return dict(self._DEFAULT_ROLLING)
goals = []
conceded = []
clean_sheets = []
for row in rows:
is_home = str(row['home_team_id']) == team_id
gf = int(row['score_home'] if is_home else row['score_away'])
ga = int(row['score_away'] if is_home else row['score_home'])
goals.append(gf)
conceded.append(ga)
clean_sheets.append(1 if ga == 0 else 0)
n = len(goals)
return {
'rolling5_goals': _safe_avg(goals[:5], 1.3),
'rolling5_conceded': _safe_avg(conceded[:5], 1.2),
'rolling10_goals': _safe_avg(goals[:min(10, n)], 1.3),
'rolling10_conceded': _safe_avg(conceded[:min(10, n)], 1.2),
'rolling20_goals': _safe_avg(goals[:n], 1.3),
'rolling20_conceded': _safe_avg(conceded[:n], 1.2),
'rolling5_cs': _safe_avg(clean_sheets[:5], 0.2),
}
# ─── 8. Venue Stats (V27) ──────────────────────────────────────
def compute_venue_stats(
self,
cur: RealDictCursor,
team_id: str,
before_date_ms: int,
is_home: bool = True,
) -> Dict[str, float]:
"""
Team goals scored/conceded at specific venue (home or away only).
"""
if not team_id:
return dict(self._DEFAULT_VENUE)
venue_col = 'home_team_id' if is_home else 'away_team_id'
try:
cur.execute(
f"""
SELECT m.score_home, m.score_away
FROM matches m
WHERE m.{venue_col} = %s
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT 20
""",
(team_id, before_date_ms),
)
rows = cur.fetchall()
except Exception:
return dict(self._DEFAULT_VENUE)
if not rows:
return dict(self._DEFAULT_VENUE)
goals = []
conceded_list = []
for row in rows:
sh = int(row['score_home'])
sa = int(row['score_away'])
if is_home:
goals.append(sh)
conceded_list.append(sa)
else:
goals.append(sa)
conceded_list.append(sh)
return {
'venue_goals': _safe_avg(goals, 1.4),
'venue_conceded': _safe_avg(conceded_list, 1.1),
}
# ─── 9. Days Rest (V27) ────────────────────────────────────────
def compute_days_rest(
self,
cur: RealDictCursor,
team_id: str,
before_date_ms: int,
) -> float:
"""
Returns number of days since the team's last match.
Default: 7.0 (one-week rest).
"""
if not team_id:
return 7.0
try:
cur.execute(
"""
SELECT m.mst_utc
FROM matches m
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
AND m.status = 'FT'
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT 1
""",
(team_id, team_id, before_date_ms),
)
row = cur.fetchone()
except Exception:
return 7.0
if not row or not row.get('mst_utc'):
return 7.0
last_match_ms = int(row['mst_utc'])
diff_days = (before_date_ms - last_match_ms) / (1000 * 86400)
return round(max(0.0, min(diff_days, 30.0)), 1)
# ─── Utility ────────────────────────────────────────────────────────
def _safe_avg(values: list, default: float) -> float: