Add backtest pipeline, betting_brain filters, score coherence + social v3
betting_brain.py: - HARD_MIN_SAMPLES=50 floor for calibrator bypass - ev_edge < 0 + >= 0.20 hard vetoes - BTTS muted (grid search found no profitable config) - Per-market optimal envelopes (MS, OU25) - Score coherence filter: main_pick must agree with score prediction - HTFT reversal cross-check for MS picks feature_builder.py / data_loader.py: - Real home/away_position from data (was hardcoded 10) - Cup detection wired into UpsetEngine - _estimate_league_position with 300-day season filter New scripts: - diagnostic_backtest.py: per-bet diagnostic backtest with loss patterns - optimize_filters.py: grid search per-market optimal thresholds - analyze_backtest_csv.py: root-cause hypothesis testing on CSV - compare_backtests.py: side-by-side validation with verdict - test_score_coherence.py: smoke test for coherence filter (20/20 pass) Reports: - diagnostic_backtest_20260525_024437 (50-match smoke) - diagnostic_backtest_20260525_035649 (1000-match in-sample) - filter_optimization_patch.json (grid search winners per market) Social poster v3: - satori + resvg HTML/CSS rendering pipeline - Twemoji football/basketball + flag SVGs - caption SEO: 12 curated hashtags per post - image SEO: descriptive filenames + .json metadata sidecar - /health, /preview-png, /run-now endpoints Docs: - mds/SESSION_HANDOFF.md: full session state for cross-machine continuity - mds/SOCIAL_POSTER_SETUP.md: API keys + test commands Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -225,20 +225,43 @@ class FeatureBuilderMixin:
|
||||
if enrichment_failures:
|
||||
print(f"⚠️ Enrichment partial failures for {data.match_id}: {', '.join(enrichment_failures)}")
|
||||
|
||||
# ── Cup game detection (used by upset engine + elo dampening below) ──
|
||||
_league_name_lower = (getattr(data, 'league_name', '') or '').lower()
|
||||
_cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal",
|
||||
"trophy", "shield", "ziraat", "süper kupa", "super cup",
|
||||
"beker", "taça", "taca")
|
||||
_is_cup_match = any(kw in _league_name_lower for kw in _cup_keywords)
|
||||
|
||||
# ── League size hint: top European leagues 18-20 teams, lower 16-24 ──
|
||||
# We don't have a per-league team count, so fall back to 20 (standard).
|
||||
# When standings infra lands this should pull from seasons table.
|
||||
_league_total_teams = 20
|
||||
|
||||
# Upset engine features
|
||||
upset_atmosphere, upset_motivation, upset_fatigue = 0.0, 0.0, 0.0
|
||||
try:
|
||||
upset_engine = get_upset_engine()
|
||||
# Use the real position estimates from data_loader; fall back to mid-
|
||||
# table (10) only when the loader couldn't compute one. Hardcoding 10
|
||||
# for every team made motivation_score collapse to 0 for everyone.
|
||||
_home_pos = getattr(data, 'home_position', None)
|
||||
_away_pos = getattr(data, 'away_position', None)
|
||||
if _home_pos is None or _home_pos <= 0:
|
||||
_home_pos = 10
|
||||
if _away_pos is None or _away_pos <= 0:
|
||||
_away_pos = 10
|
||||
upset_feats = upset_engine.get_features(
|
||||
home_team_name=getattr(data, 'home_team_name', '') or '',
|
||||
home_team_id=data.home_team_id,
|
||||
away_team_name=getattr(data, 'away_team_name', '') or '',
|
||||
league_name=getattr(data, 'league_name', '') or '',
|
||||
home_position=10,
|
||||
away_position=10,
|
||||
home_position=_home_pos,
|
||||
away_position=_away_pos,
|
||||
match_date_ms=data.match_date_ms,
|
||||
is_cup_match=_is_cup_match,
|
||||
home_days_rest=int(home_rest),
|
||||
away_days_rest=int(away_rest),
|
||||
total_teams=_league_total_teams,
|
||||
)
|
||||
upset_atmosphere = upset_feats.get('upset_atmosphere', 0.0)
|
||||
upset_motivation = upset_feats.get('upset_motivation', 0.0)
|
||||
@@ -276,15 +299,10 @@ class FeatureBuilderMixin:
|
||||
is_season_start = 1.0 if match_month in (7, 8, 9) else 0.0
|
||||
is_season_end = 1.0 if match_month in (5, 6) else 0.0
|
||||
|
||||
# ── Cup game detection: dampen home advantage in feature space ──
|
||||
_league_name = (getattr(data, 'league_name', '') or '').lower()
|
||||
_cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal",
|
||||
"trophy", "shield", "ziraat", "süper kupa", "super cup")
|
||||
_is_cup = any(kw in _league_name for kw in _cup_keywords)
|
||||
|
||||
# ── Derived / Interaction features (V27) ──
|
||||
# Cup games: home ELO advantage is ~30% weaker (rotation, lower motivation)
|
||||
elo_diff = (home_elo - away_elo) * (0.70 if _is_cup else 1.0)
|
||||
# Uses _is_cup_match computed earlier (before upset engine call).
|
||||
elo_diff = (home_elo - away_elo) * (0.70 if _is_cup_match else 1.0)
|
||||
form_elo_diff = home_form_elo_val - away_form_elo_val
|
||||
attack_vs_defense_home = data.home_goals_avg - data.away_conceded_avg
|
||||
attack_vs_defense_away = data.away_goals_avg - data.home_conceded_avg
|
||||
|
||||
Reference in New Issue
Block a user