first (part 2: other directories)
Deploy Iddaai Backend / build-and-deploy (push) Failing after 18s

This commit is contained in:
2026-04-16 15:11:25 +03:00
parent 7814e0bc6b
commit 2f0b85a0c7
203 changed files with 59989 additions and 0 deletions
+3
View File
@@ -0,0 +1,3 @@
from .single_match_orchestrator import get_single_match_orchestrator
__all__ = ["get_single_match_orchestrator"]
+523
View File
@@ -0,0 +1,523 @@
"""
Feature Enrichment Service
===========================
Computes real statistical features from DB for V25 model input.
Replaces hardcoded defaults in `_build_v25_features()` with rolling
averages from football_team_stats, matches, match_officials, and
match_player_events tables.
Each method receives a psycopg2 cursor + params and returns a dict.
All methods are fail-safe: they return sensible defaults when data
is missing or queries fail.
"""
from __future__ import annotations
from typing import Any, Dict, Optional, Tuple
from psycopg2.extras import RealDictCursor
class FeatureEnrichmentService:
"""Stateless service — all state comes from DB via cursor."""
# ─── Default fallback values ─────────────────────────────────────
_DEFAULT_TEAM_STATS = {
'avg_possession': 50.0,
'avg_shots_on_target': 4.0,
'shot_conversion': 0.1,
'avg_corners': 5.0,
}
_DEFAULT_H2H = {
'total_matches': 0,
'home_win_rate': 0.33,
'draw_rate': 0.33,
'avg_goals': 2.5,
'btts_rate': 0.5,
'over25_rate': 0.5,
}
_DEFAULT_FORM = {
'clean_sheet_rate': 0.2,
'scoring_rate': 0.8,
'winning_streak': 0,
'unbeaten_streak': 0,
}
_DEFAULT_REFEREE = {
'home_bias': 0.0,
'avg_goals': 2.5,
'cards_total': 4.0,
'avg_yellow': 3.0,
'experience': 0,
}
_DEFAULT_LEAGUE = {
'avg_goals': 2.7,
'zero_goal_rate': 0.07,
}
# ─── 1. Team Stats ──────────────────────────────────────────────
def compute_team_stats(
self,
cur: RealDictCursor,
team_id: str,
before_date_ms: int,
limit: int = 10,
) -> Dict[str, float]:
"""
Rolling averages from football_team_stats for a team's last N matches.
Returns avg_possession, avg_shots_on_target, shot_conversion, avg_corners.
"""
if not team_id:
return dict(self._DEFAULT_TEAM_STATS)
try:
cur.execute(
"""
SELECT
mts.possession_percentage,
mts.shots_on_target,
mts.total_shots,
mts.corners
FROM football_team_stats mts
JOIN matches m ON m.id = mts.match_id
WHERE mts.team_id = %s
AND m.status = 'FT'
AND m.mst_utc < %s
AND m.sport = 'football'
AND mts.possession_percentage IS NOT NULL
AND mts.possession_percentage > 0
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(team_id, before_date_ms, limit),
)
rows = cur.fetchall()
except Exception:
return dict(self._DEFAULT_TEAM_STATS)
if not rows:
return dict(self._DEFAULT_TEAM_STATS)
possession_vals = []
sot_vals = []
conversion_vals = []
corner_vals = []
for row in rows:
poss = row.get('possession_percentage')
if poss is not None:
possession_vals.append(float(poss))
sot = row.get('shots_on_target')
if sot is not None:
sot_vals.append(float(sot))
total_shots = row.get('total_shots')
if total_shots and sot and float(total_shots) > 0:
conversion_vals.append(float(sot) / float(total_shots))
corners = row.get('corners')
if corners is not None:
corner_vals.append(float(corners))
return {
'avg_possession': _safe_avg(possession_vals, 50.0),
'avg_shots_on_target': _safe_avg(sot_vals, 4.0),
'shot_conversion': _safe_avg(conversion_vals, 0.1),
'avg_corners': _safe_avg(corner_vals, 5.0),
}
# ─── 2. Head-to-Head ────────────────────────────────────────────
def compute_h2h(
self,
cur: RealDictCursor,
home_team_id: str,
away_team_id: str,
before_date_ms: int,
limit: int = 20,
) -> Dict[str, float]:
"""
Historical head-to-head between two teams (both directions).
Returns total_matches, home_win_rate, draw_rate, avg_goals,
btts_rate, over25_rate.
"""
if not home_team_id or not away_team_id:
return dict(self._DEFAULT_H2H)
try:
cur.execute(
"""
SELECT
m.home_team_id,
m.away_team_id,
m.score_home,
m.score_away
FROM matches m
WHERE m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
AND (
(m.home_team_id = %s AND m.away_team_id = %s) OR
(m.home_team_id = %s AND m.away_team_id = %s)
)
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(
before_date_ms,
home_team_id, away_team_id,
away_team_id, home_team_id,
limit,
),
)
rows = cur.fetchall()
except Exception:
return dict(self._DEFAULT_H2H)
if not rows:
return dict(self._DEFAULT_H2H)
total = len(rows)
home_wins = 0
draws = 0
total_goals = 0
btts_count = 0
over25_count = 0
for row in rows:
sh = int(row['score_home'])
sa = int(row['score_away'])
match_goals = sh + sa
total_goals += match_goals
# Normalise: who is "home team" in THIS prediction context
if str(row['home_team_id']) == home_team_id:
if sh > sa:
home_wins += 1
elif sh == sa:
draws += 1
else:
# Reversed fixture: away_team was at home
if sa > sh:
home_wins += 1
elif sh == sa:
draws += 1
if sh > 0 and sa > 0:
btts_count += 1
if match_goals > 2:
over25_count += 1
return {
'total_matches': total,
'home_win_rate': home_wins / total,
'draw_rate': draws / total,
'avg_goals': total_goals / total,
'btts_rate': btts_count / total,
'over25_rate': over25_count / total,
}
# ─── 3. Form & Streaks ──────────────────────────────────────────
def compute_form_streaks(
self,
cur: RealDictCursor,
team_id: str,
before_date_ms: int,
limit: int = 10,
) -> Dict[str, float]:
"""
Clean sheet rate, scoring rate, and current streaks.
"""
if not team_id:
return dict(self._DEFAULT_FORM)
try:
cur.execute(
"""
SELECT
m.home_team_id,
m.away_team_id,
m.score_home,
m.score_away
FROM matches m
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(team_id, team_id, before_date_ms, limit),
)
rows = cur.fetchall()
except Exception:
return dict(self._DEFAULT_FORM)
if not rows:
return dict(self._DEFAULT_FORM)
total = len(rows)
clean_sheets = 0
scored_count = 0
winning_streak = 0
unbeaten_streak = 0
streak_broken_w = False
streak_broken_u = False
for row in rows:
is_home = str(row['home_team_id']) == team_id
goals_for = int(row['score_home'] if is_home else row['score_away'])
goals_against = int(row['score_away'] if is_home else row['score_home'])
if goals_against == 0:
clean_sheets += 1
if goals_for > 0:
scored_count += 1
# Streak counting (most recent first)
won = goals_for > goals_against
not_lost = goals_for >= goals_against
if not streak_broken_w:
if won:
winning_streak += 1
else:
streak_broken_w = True
if not streak_broken_u:
if not_lost:
unbeaten_streak += 1
else:
streak_broken_u = True
return {
'clean_sheet_rate': clean_sheets / total,
'scoring_rate': scored_count / total,
'winning_streak': winning_streak,
'unbeaten_streak': unbeaten_streak,
}
# ─── 4. Referee Stats ───────────────────────────────────────────
def compute_referee_stats(
self,
cur: RealDictCursor,
referee_name: Optional[str],
before_date_ms: int,
limit: int = 30,
) -> Dict[str, float]:
"""
Referee tendencies: home win bias, avg goals, card rates.
Matches referee by name in match_officials (role_id=1 = Orta Hakem).
"""
if not referee_name:
return dict(self._DEFAULT_REFEREE)
try:
# Get match IDs officiated by this referee
cur.execute(
"""
SELECT
m.home_team_id,
m.score_home,
m.score_away,
m.id AS match_id
FROM match_officials mo
JOIN matches m ON m.id = mo.match_id
WHERE mo.name = %s
AND mo.role_id = 1
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(referee_name, before_date_ms, limit),
)
rows = cur.fetchall()
except Exception:
return dict(self._DEFAULT_REFEREE)
if not rows:
return dict(self._DEFAULT_REFEREE)
total = len(rows)
home_wins = 0
total_goals = 0
match_ids = []
for row in rows:
sh = int(row['score_home'])
sa = int(row['score_away'])
total_goals += sh + sa
if sh > sa:
home_wins += 1
match_ids.append(row['match_id'])
# Card stats from match_player_events
total_yellows = 0.0
total_cards = 0.0
if match_ids:
try:
cur.execute(
"""
SELECT
COUNT(*) FILTER (WHERE event_subtype = 'yc') AS yellows,
COUNT(*) AS total_cards
FROM match_player_events
WHERE match_id = ANY(%s)
AND event_type = 'card'
""",
(match_ids,),
)
card_row = cur.fetchone()
if card_row:
total_yellows = float(card_row.get('yellows') or 0)
total_cards = float(card_row.get('total_cards') or 0)
except Exception:
pass
# home_bias: (actual home win rate) - 0.46 (league average ~46%)
home_bias = (home_wins / total) - 0.46
return {
'home_bias': round(home_bias, 4),
'avg_goals': total_goals / total,
'cards_total': total_cards / total if total > 0 else 4.0,
'avg_yellow': total_yellows / total if total > 0 else 3.0,
'experience': total,
}
# ─── 5. League Averages ─────────────────────────────────────────
def compute_league_averages(
self,
cur: RealDictCursor,
league_id: Optional[str],
before_date_ms: int,
limit: int = 100,
) -> Dict[str, float]:
"""
League-wide scoring tendencies.
"""
if not league_id:
return dict(self._DEFAULT_LEAGUE)
try:
cur.execute(
"""
SELECT
m.score_home,
m.score_away
FROM matches m
WHERE m.league_id = %s
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(league_id, before_date_ms, limit),
)
rows = cur.fetchall()
except Exception:
return dict(self._DEFAULT_LEAGUE)
if not rows:
return dict(self._DEFAULT_LEAGUE)
total = len(rows)
total_goals = 0
zero_goal_matches = 0
for row in rows:
sh = int(row['score_home'])
sa = int(row['score_away'])
match_goals = sh + sa
total_goals += match_goals
if match_goals == 0:
zero_goal_matches += 1
return {
'avg_goals': total_goals / total,
'zero_goal_rate': zero_goal_matches / total,
}
# ─── 6. Momentum ───────────────────────────────────────────────
def compute_momentum(
self,
cur: RealDictCursor,
team_id: str,
before_date_ms: int,
limit: int = 5,
) -> float:
"""
Recency-weighted momentum score: W=3, D=1, L=-1.
Returns normalised score in [-1.0, 1.0].
"""
if not team_id:
return 0.0
try:
cur.execute(
"""
SELECT
m.home_team_id,
m.score_home,
m.score_away
FROM matches m
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(team_id, team_id, before_date_ms, limit),
)
rows = cur.fetchall()
except Exception:
return 0.0
if not rows:
return 0.0
total_count = len(rows)
weighted_score = 0.0
max_possible = 0.0
for idx, row in enumerate(rows):
weight = float(total_count - idx) # most recent = highest weight
is_home = str(row['home_team_id']) == team_id
gf = int(row['score_home'] if is_home else row['score_away'])
ga = int(row['score_away'] if is_home else row['score_home'])
if gf > ga:
result_score = 3.0
elif gf == ga:
result_score = 1.0
else:
result_score = -1.0
weighted_score += result_score * weight
max_possible += 3.0 * weight # max = all wins
if max_possible <= 0:
return 0.0
# Normalise to [-1.0, 1.0]
return round(weighted_score / max_possible, 4)
# ─── Utility ────────────────────────────────────────────────────────
def _safe_avg(values: list, default: float) -> float:
"""Average with fallback for empty lists."""
if not values:
return default
return sum(values) / len(values)
File diff suppressed because it is too large Load Diff
+282
View File
@@ -0,0 +1,282 @@
"""
V2 Betting Engine — FastAPI Router
Async endpoint that orchestrates: DB → Features → Model → Quant → Response.
Mounted as a sub-router on the existing main.py app, so both V20+ (legacy)
and V2 endpoints coexist.
"""
from __future__ import annotations
import logging
import time
from typing import Any
from fastapi import APIRouter, HTTPException
from core.quant import (
MarketPick,
RiskResult,
analyze_market,
assess_risk,
)
from data.database import get_session
from features.extractor import MatchFeatures, extract_features
from models.betting_engine import get_predictor
from schemas.response import (
BetAdvice,
BetSummaryRow,
DataQuality,
EngineBreakdown,
MarketProbs,
MatchInfo,
PickDetail,
PredictionResponse,
RiskAssessment,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/v2", tags=["V2 Betting Engine"])
# ═══════════════════════════════════════════════════════════════════════════
# Endpoints
# ═══════════════════════════════════════════════════════════════════════════
@router.post("/analyze/{match_id}", response_model=PredictionResponse)
async def analyze_match_v2(match_id: str) -> PredictionResponse:
"""
Full single-match analysis pipeline:
1. Extract leakage-free features from PostgreSQL
2. Run calibrated ensemble predictions (MS, OU25, BTTS)
3. Calculate edges via implied probability comparison
4. Apply Fractional Kelly staking
5. Grade & rank picks
6. Assess risk
7. Return SingleMatchPredictionPackage
"""
started_at = time.perf_counter()
# ─── Step 1: Feature extraction ───────────────────────────────────
async with get_session() as session:
feats = await extract_features(session, match_id)
if feats is None:
raise HTTPException(
status_code=404,
detail=f"Match {match_id} not found or insufficient data.",
)
# ─── Step 2: Model predictions ────────────────────────────────────
predictor = get_predictor()
X = feats.to_model_array()
all_probs = predictor.predict_all(X, feats)
# ─── Step 3: Quantitative analysis per market ─────────────────────
ms_odds_map = {"1": feats.odds_home, "X": feats.odds_draw, "2": feats.odds_away}
ou25_odds_map = {"Under": feats.odds_under25, "Over": feats.odds_over25}
btts_odds_map = {"No": feats.odds_btts_no, "Yes": feats.odds_btts_yes}
ms_pick = analyze_market("MS", all_probs["MS"], ms_odds_map, feats.data_quality_score)
ou25_pick = analyze_market("OU25", all_probs["OU25"], ou25_odds_map, feats.data_quality_score)
btts_pick = analyze_market("BTTS", all_probs["BTTS"], btts_odds_map, feats.data_quality_score)
all_picks = [ms_pick, ou25_pick, btts_pick]
# ─── Step 4: Select main pick (highest play_score among playable) ─
playable_picks = [p for p in all_picks if p.playable]
playable_picks.sort(key=lambda p: p.play_score, reverse=True)
main_pick: MarketPick | None = playable_picks[0] if playable_picks else None
supporting = playable_picks[1:] if len(playable_picks) > 1 else []
# Value pick: best playable with odds >= 1.60
value_candidates = [p for p in playable_picks if p.odds >= 1.60]
value_pick: MarketPick | None = value_candidates[0] if value_candidates else None
# If value_pick IS the main_pick, try the next candidate
if value_pick and main_pick and value_pick.market == main_pick.market:
value_pick = value_candidates[1] if len(value_candidates) > 1 else None
# Aggressive pick: highest edge regardless of playability
all_picks_by_edge = sorted(all_picks, key=lambda p: p.edge, reverse=True)
aggressive = all_picks_by_edge[0] if all_picks_by_edge and all_picks_by_edge[0].edge > 0 else None
# ─── Step 5: Risk assessment ──────────────────────────────────────
implied_prob_fav = max(feats.implied_prob_home, feats.implied_prob_away)
risk = assess_risk(
missing_players_impact=feats.missing_players_impact,
data_quality_score=feats.data_quality_score,
elo_diff=feats.elo_diff,
implied_prob_fav=implied_prob_fav,
)
# ─── Step 6: Build response ───────────────────────────────────────
elapsed_ms = int((time.perf_counter() - started_at) * 1000)
response = PredictionResponse(
model_version="v2.betting_engine",
match_info=MatchInfo(
match_id=match_id,
match_name=feats.match_name,
home_team=feats.home_team_name,
away_team=feats.away_team_name,
league=feats.league_name,
match_date_ms=feats.match_date_ms,
),
data_quality=DataQuality(
label=_quality_label(feats.data_quality_score),
score=feats.data_quality_score,
flags=feats.data_quality_flags,
),
risk=RiskAssessment(
level=risk.level,
score=risk.score,
is_surprise_risk=risk.is_surprise_risk,
surprise_type=risk.surprise_type,
warnings=risk.warnings,
),
engine_breakdown=EngineBreakdown(
team=round(feats.elo_diff / 100.0, 2),
player=round(-feats.missing_players_impact, 2),
odds=round(implied_prob_fav, 2),
referee=0.0,
),
main_pick=_pick_to_detail(main_pick, feats) if main_pick else None,
value_pick=_pick_to_detail(value_pick, feats) if value_pick else None,
bet_advice=BetAdvice(
playable=main_pick is not None,
suggested_stake_units=main_pick.stake_units if main_pick else 0.0,
reason=(
f"Best value: {main_pick.market} {main_pick.pick} "
f"(edge {main_pick.edge:.1%}, grade {main_pick.bet_grade})"
if main_pick
else "no_playable_edge_found"
),
),
bet_summary=[_pick_to_summary(p) for p in all_picks],
supporting_picks=[_pick_to_detail(p, feats) for p in supporting],
aggressive_pick=_pick_to_detail(aggressive, feats) if aggressive else None,
market_board={
"MS": MarketProbs(
pick=ms_pick.pick,
confidence=round(ms_pick.probability * 100, 1),
probs=all_probs["MS"],
).model_dump(),
"OU25": MarketProbs(
pick=ou25_pick.pick,
confidence=round(ou25_pick.probability * 100, 1),
probs=all_probs["OU25"],
).model_dump(),
"BTTS": MarketProbs(
pick=btts_pick.pick,
confidence=round(btts_pick.probability * 100, 1),
probs=all_probs["BTTS"],
).model_dump(),
},
reasoning_factors=_build_reasoning(feats, main_pick, risk, elapsed_ms),
)
logger.info(
"V2 analyze %s%s in %dms (main: %s %s, edge: %s)",
match_id,
response.bet_advice.reason,
elapsed_ms,
main_pick.market if main_pick else "NONE",
main_pick.pick if main_pick else "",
f"{main_pick.edge:.1%}" if main_pick else "N/A",
)
return response
@router.get("/health")
async def v2_health():
predictor = get_predictor()
return {
"status": "healthy",
"engine": "v2.betting_engine",
"models_loaded": predictor.is_ready,
}
# ═══════════════════════════════════════════════════════════════════════════
# Helpers
# ═══════════════════════════════════════════════════════════════════════════
def _quality_label(score: float) -> str:
if score >= 0.8:
return "HIGH"
if score >= 0.5:
return "MEDIUM"
return "LOW"
def _pick_to_detail(pick: MarketPick, feats: MatchFeatures) -> PickDetail:
implied = {
"MS": {"1": feats.implied_prob_home, "X": feats.implied_prob_draw, "2": feats.implied_prob_away},
"OU25": {"Over": feats.implied_prob_over25, "Under": feats.implied_prob_under25},
"BTTS": {"Yes": feats.implied_prob_btts_yes, "No": feats.implied_prob_btts_no},
}
raw_conf = pick.probability * 100.0
market_implied = implied.get(pick.market, {}).get(pick.pick, 0.33)
return PickDetail(
market=pick.market,
pick=pick.pick,
probability=pick.probability,
confidence=round(raw_conf, 1),
odds=pick.odds,
raw_confidence=round(raw_conf, 1),
calibrated_confidence=round(raw_conf, 1),
min_required_confidence=round(market_implied * 100, 1),
edge=pick.edge,
play_score=pick.play_score,
playable=pick.playable,
bet_grade=pick.bet_grade,
stake_units=pick.stake_units,
decision_reasons=pick.decision_reasons,
)
def _pick_to_summary(pick: MarketPick) -> BetSummaryRow:
return BetSummaryRow(
market=pick.market,
pick=pick.pick,
raw_confidence=round(pick.probability * 100, 1),
calibrated_confidence=round(pick.probability * 100, 1),
bet_grade=pick.bet_grade,
playable=pick.playable,
stake_units=pick.stake_units,
play_score=pick.play_score,
reasons=pick.decision_reasons,
)
def _build_reasoning(
feats: MatchFeatures,
main_pick: MarketPick | None,
risk: RiskResult,
elapsed_ms: int,
) -> list[str]:
reasons: list[str] = []
reasons.append(f"ELO: {feats.home_elo:.0f} vs {feats.away_elo:.0f} (diff: {feats.elo_diff:+.0f})")
reasons.append(
f"Form (last 5): Home {feats.home_avg_goals_scored:.1f}GF/{feats.home_avg_goals_conceded:.1f}GA "
f"— Away {feats.away_avg_goals_scored:.1f}GF/{feats.away_avg_goals_conceded:.1f}GA"
)
reasons.append(
f"Implied probs: H={feats.implied_prob_home:.0%} D={feats.implied_prob_draw:.0%} "
f"A={feats.implied_prob_away:.0%}"
)
if feats.missing_players_impact > 0:
reasons.append(f"Missing player impact: {feats.missing_players_impact:.2f}")
if main_pick:
reasons.append(
f"Best edge: {main_pick.market} {main_pick.pick} "
f"{main_pick.edge:+.1%} (grade {main_pick.bet_grade})"
)
reasons.append(f"Risk: {risk.level} (score {risk.score:.2f})")
reasons.append(f"Data quality: {feats.data_quality_score:.0%}")
reasons.append(f"Inference time: {elapsed_ms}ms")
return reasons