Add backtest pipeline, betting_brain filters, score coherence + social v3

betting_brain.py:
- HARD_MIN_SAMPLES=50 floor for calibrator bypass
- ev_edge < 0 + >= 0.20 hard vetoes
- BTTS muted (grid search found no profitable config)
- Per-market optimal envelopes (MS, OU25)
- Score coherence filter: main_pick must agree with score prediction
- HTFT reversal cross-check for MS picks

feature_builder.py / data_loader.py:
- Real home/away_position from data (was hardcoded 10)
- Cup detection wired into UpsetEngine
- _estimate_league_position with 300-day season filter

New scripts:
- diagnostic_backtest.py: per-bet diagnostic backtest with loss patterns
- optimize_filters.py: grid search per-market optimal thresholds
- analyze_backtest_csv.py: root-cause hypothesis testing on CSV
- compare_backtests.py: side-by-side validation with verdict
- test_score_coherence.py: smoke test for coherence filter (20/20 pass)

Reports:
- diagnostic_backtest_20260525_024437 (50-match smoke)
- diagnostic_backtest_20260525_035649 (1000-match in-sample)
- filter_optimization_patch.json (grid search winners per market)

Social poster v3:
- satori + resvg HTML/CSS rendering pipeline
- Twemoji football/basketball + flag SVGs
- caption SEO: 12 curated hashtags per post
- image SEO: descriptive filenames + .json metadata sidecar
- /health, /preview-png, /run-now endpoints

Docs:
- mds/SESSION_HANDOFF.md: full session state for cross-machine continuity
- mds/SOCIAL_POSTER_SETUP.md: API keys + test commands

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 20:43:28 +03:00
parent b619c2454a
commit 988ee2f50d
36 changed files with 5268 additions and 46 deletions
+201 -1
View File
@@ -39,6 +39,31 @@ class BettingBrain:
SNIPER_BLOCKED_MARKETS = {"HT", "HTFT", "OE", "CARDS", "HT_OU05", "HT_OU15"}
# Markets that lose money under every filter combination per the
# diagnostic backtest (1000 matches). Until calibration is rebuilt for
# these specifically, force NO_BET. Re-evaluate after each backtest run.
MUTED_MARKETS = {"BTTS"}
# Per-market optimal filter envelopes derived from the diagnostic
# backtest grid search (reports/filter_optimization_patch.json). Any
# pick falling OUTSIDE this envelope is vetoed. Tightens the playable
# band to the ROI-positive zone identified empirically.
#
# Each entry: {min_conf, min_edge, max_edge, min_odds, max_odds,
# min_reliability, require_v27_agree}
MARKET_OPTIMAL_FILTERS = {
"MS": {
"min_edge": -0.05, "max_edge": 0.15,
"min_odds": 1.20, "max_odds": 10.0,
"min_reliability": 0.0, "require_v27_agree": True,
},
"OU25": {
"min_edge": -1.0, "max_edge": 0.15,
"min_odds": 1.80, "max_odds": 10.0,
"min_reliability": 0.0, "require_v27_agree": False,
},
}
MARKET_PRIORS = {
"DC": 4.0,
"OU15": 3.0,
@@ -86,6 +111,36 @@ class BettingBrain:
watchlist.sort(key=self._candidate_sort_key, reverse=True)
no_value.sort(key=self._candidate_sort_key, reverse=True)
# ── SCORE COHERENCE FILTER ──────────────────────────────────────
# If the model also produced a score prediction (e.g. 1-0), pick
# main_pick from the subset of candidates that would WIN at that
# score. Stops the system from recommending OU25 Üst while also
# predicting 1-0 (only 1 goal). Falls back to original list if no
# coherent candidate exists.
coherent_set = self._score_consistent_markets(guarded)
coherent_flag = False
if coherent_set:
def is_coherent(row: Dict[str, Any]) -> bool:
m = str(row.get("market") or "")
p = str(row.get("pick") or "")
return (m, p) in coherent_set
approved_coh = [r for r in approved if is_coherent(r)]
watchlist_coh = [r for r in watchlist if is_coherent(r)]
if approved_coh:
approved = approved_coh
coherent_flag = True
elif watchlist_coh:
# No coherent BET candidates — at least promote a coherent
# watch over an incoherent BET.
watchlist = watchlist_coh + [r for r in watchlist if not is_coherent(r)]
coherent_flag = True
# Tag every row so the UI/diagnostics can see what happened
for row in judged_rows.values():
row.setdefault("betting_brain", {})
row["betting_brain"]["score_coherent"] = is_coherent(row)
original_main = guarded.get("main_pick") or {}
main_pick = None
decision = "NO_BET"
@@ -142,10 +197,11 @@ class BettingBrain:
rejected = [d for d in decisions if d.get("action") == "REJECT"]
guarded["betting_brain"] = {
"version": "judge-v1",
"version": "judge-v2-score-coherent",
"decision": decision,
"reason": decision_reason,
"main_pick_key": main_key or None,
"score_coherent_filter_applied": coherent_flag,
"approved_count": len(approved),
"watchlist_count": len(watchlist),
"rejected_count": len(rejected),
@@ -243,6 +299,44 @@ class BettingBrain:
if play_score < 50.0 and not is_value_sniper:
vetoes.append("play_score_too_low")
# ── HARD EV-EDGE VETO ───────────────────────────────────────────
# Diagnostic backtest (1000 maç, 524 settled bet) gösterdi ki
# ev_edge < 0 olan bahisler %76 of all picks ve ROI yaklaşık -%16.
# ev_edge < 0 = "model market'in altında olasılık veriyor" = vig'i
# yiyemeyeceğimiz negative-EV bahis. Hard veto: oynama.
# Sniper override hâlâ geçer (yüksek convicted alternatif pick'ler).
if ev_edge < 0.0 and not is_value_sniper:
vetoes.append("negative_ev_edge")
issues.append(f"ev_edge={ev_edge:.3f}_below_zero")
# Trap edge: bizim diagnostic backtest'te ev_edge >= 0.20 olan tüm
# bahisler kaybediyordu (n=10, hepsi -%25+ ROI). Model market'i bu
# kadar yanlış buluyorsa muhtemelen modelin kendisinin yanlış olduğu
# bir senaryo (eksik info, tuhaf maç, vs.) — oynama.
if ev_edge >= 0.20 and not is_value_sniper:
vetoes.append("ev_edge_too_high_trap")
issues.append(f"ev_edge={ev_edge:.3f}_trap_range")
# ── MUTED MARKETS (grid search showed no profitable config) ──
if market in self.MUTED_MARKETS and not is_value_sniper:
vetoes.append("market_muted_by_backtest")
issues.append(f"market_{market}_muted")
# ── PER-MARKET OPTIMAL ENVELOPE (from grid search) ──
envelope = self.MARKET_OPTIMAL_FILTERS.get(market)
if envelope and not is_value_sniper:
if ev_edge < envelope["min_edge"]:
vetoes.append("outside_envelope_edge_low")
if ev_edge > envelope["max_edge"]:
vetoes.append("outside_envelope_edge_high")
if odds and odds < envelope["min_odds"]:
vetoes.append("outside_envelope_odds_low")
if odds and odds > envelope["max_odds"]:
vetoes.append("outside_envelope_odds_high")
if odds_rel < envelope["min_reliability"]:
vetoes.append("outside_envelope_reliability_low")
if envelope["require_v27_agree"] and consensus != "AGREE":
vetoes.append("outside_envelope_v27_must_agree")
if divergence is not None:
if divergence >= self.HARD_DIVERGENCE and not is_value_sniper:
score -= 42.0
@@ -635,6 +729,112 @@ class BettingBrain:
return self._safe_float(ou25.get(key)) if key else None
return None
def _score_consistent_markets(self, package: Dict[str, Any]) -> Optional[set]:
"""Build the set of (market, pick) tuples that WOULD WIN if the
model's own score prediction came true. We use this as a coherence
gate: if the model is confident about a 1-0 outcome but also wants
to play OU25 Üst, those two beliefs contradict each other — and the
score prediction is the more informative one because it aggregates
all market signals into a single most-likely scenario.
Returns None if the score prediction is missing or malformed; in
that case we skip the coherence check.
"""
score_pred = package.get("score_prediction") or {}
ft_raw = str(score_pred.get("ft") or score_pred.get("full_time") or "").strip()
ht_raw = str(score_pred.get("ht") or score_pred.get("half_time") or "").strip()
def parse(s: str) -> Optional[Tuple[int, int]]:
for sep in ("-", ":", ""):
if sep in s:
parts = s.split(sep, 1)
try:
return int(parts[0].strip()), int(parts[1].strip())
except (ValueError, IndexError):
return None
return None
ft = parse(ft_raw)
if ft is None:
return None
ht = parse(ht_raw)
fh, fa = ft
total = fh + fa
consistent: set = set()
# MS / 1X2 — single outcome
if fh > fa:
consistent.add(("MS", "1"))
consistent.add(("ML", "1"))
elif fh < fa:
consistent.add(("MS", "2"))
consistent.add(("ML", "2"))
else:
consistent.add(("MS", "X"))
consistent.add(("ML", "X"))
# DC — two of three legs win at any score
if fh >= fa:
consistent.add(("DC", "1X"))
if fh <= fa:
consistent.add(("DC", "X2"))
if fh != fa:
consistent.add(("DC", "12"))
# Over/Under main lines
for line, market in ((0.5, "OU05"), (1.5, "OU15"),
(2.5, "OU25"), (3.5, "OU35"), (4.5, "OU45")):
if total > line:
for p in ("Üst", "Ust", "Over", "OVER"):
consistent.add((market, p))
elif total < line:
for p in ("Alt", "Under", "UNDER"):
consistent.add((market, p))
# total == line → push, neither side wins → don't add
# BTTS — both teams score
if fh > 0 and fa > 0:
for p in ("Var", "KG Var", "Yes", "YES"):
consistent.add(("BTTS", p))
else:
for p in ("Yok", "KG Yok", "No", "NO"):
consistent.add(("BTTS", p))
# OE — total goals odd/even
if total % 2 == 1:
for p in ("Tek", "Odd", "ODD"):
consistent.add(("OE", p))
else:
for p in ("Çift", "Cift", "Even", "EVEN"):
consistent.add(("OE", p))
# HT-only markets (need HT score)
if ht is not None:
hh, ha = ht
ht_total = hh + ha
if hh > ha:
consistent.add(("HT", "1"))
elif hh < ha:
consistent.add(("HT", "2"))
else:
consistent.add(("HT", "X"))
for line, market in ((0.5, "HT_OU05"), (1.5, "HT_OU15"), (2.5, "HT_OU25")):
if ht_total > line:
for p in ("Üst", "Ust", "Over"):
consistent.add((market, p))
elif ht_total < line:
for p in ("Alt", "Under"):
consistent.add((market, p))
# HTFT — single combo
ht_o = "1" if hh > ha else "2" if hh < ha else "X"
ft_o = "1" if fh > fa else "2" if fh < fa else "X"
consistent.add(("HTFT", f"{ht_o}/{ft_o}"))
consistent.add(("HTFT", f"{ht_o}{ft_o}"))
return consistent
def _triple_value(self, package: Dict[str, Any], key: Optional[str]) -> Optional[Dict[str, Any]]:
if not key:
return None
+13 -1
View File
@@ -449,6 +449,12 @@ class DataLoaderMixin:
return 1.5, 1.2
return weighted_for / total_weight, weighted_against / total_weight
# Approximate European season window — Eredivisie/PL/La Liga start late
# July / mid-August, end May. Using 300 days as a buffer covers most
# competitions while excluding "career points" from previous seasons.
# When a proper seasons table lands this should query season boundaries.
_SEASON_LOOKBACK_MS = 300 * 24 * 60 * 60 * 1000
def _estimate_league_position(
self,
cur: RealDictCursor,
@@ -458,6 +464,7 @@ class DataLoaderMixin:
) -> int:
if not team_id or not league_id:
return 10
season_start_ms = before_date_ms - self._SEASON_LOOKBACK_MS
try:
cur.execute(
"""
@@ -478,6 +485,7 @@ class DataLoaderMixin:
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
AND m.mst_utc >= %s
UNION ALL
SELECT
m.away_team_id AS team_id,
@@ -492,11 +500,15 @@ class DataLoaderMixin:
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
AND m.mst_utc >= %s
) tm
GROUP BY tm.team_id
ORDER BY points DESC
""",
(league_id, before_date_ms, league_id, before_date_ms),
(
league_id, before_date_ms, season_start_ms,
league_id, before_date_ms, season_start_ms,
),
)
rows = cur.fetchall()
if not rows:
@@ -225,20 +225,43 @@ class FeatureBuilderMixin:
if enrichment_failures:
print(f"⚠️ Enrichment partial failures for {data.match_id}: {', '.join(enrichment_failures)}")
# ── Cup game detection (used by upset engine + elo dampening below) ──
_league_name_lower = (getattr(data, 'league_name', '') or '').lower()
_cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal",
"trophy", "shield", "ziraat", "süper kupa", "super cup",
"beker", "taça", "taca")
_is_cup_match = any(kw in _league_name_lower for kw in _cup_keywords)
# ── League size hint: top European leagues 18-20 teams, lower 16-24 ──
# We don't have a per-league team count, so fall back to 20 (standard).
# When standings infra lands this should pull from seasons table.
_league_total_teams = 20
# Upset engine features
upset_atmosphere, upset_motivation, upset_fatigue = 0.0, 0.0, 0.0
try:
upset_engine = get_upset_engine()
# Use the real position estimates from data_loader; fall back to mid-
# table (10) only when the loader couldn't compute one. Hardcoding 10
# for every team made motivation_score collapse to 0 for everyone.
_home_pos = getattr(data, 'home_position', None)
_away_pos = getattr(data, 'away_position', None)
if _home_pos is None or _home_pos <= 0:
_home_pos = 10
if _away_pos is None or _away_pos <= 0:
_away_pos = 10
upset_feats = upset_engine.get_features(
home_team_name=getattr(data, 'home_team_name', '') or '',
home_team_id=data.home_team_id,
away_team_name=getattr(data, 'away_team_name', '') or '',
league_name=getattr(data, 'league_name', '') or '',
home_position=10,
away_position=10,
home_position=_home_pos,
away_position=_away_pos,
match_date_ms=data.match_date_ms,
is_cup_match=_is_cup_match,
home_days_rest=int(home_rest),
away_days_rest=int(away_rest),
total_teams=_league_total_teams,
)
upset_atmosphere = upset_feats.get('upset_atmosphere', 0.0)
upset_motivation = upset_feats.get('upset_motivation', 0.0)
@@ -276,15 +299,10 @@ class FeatureBuilderMixin:
is_season_start = 1.0 if match_month in (7, 8, 9) else 0.0
is_season_end = 1.0 if match_month in (5, 6) else 0.0
# ── Cup game detection: dampen home advantage in feature space ──
_league_name = (getattr(data, 'league_name', '') or '').lower()
_cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal",
"trophy", "shield", "ziraat", "süper kupa", "super cup")
_is_cup = any(kw in _league_name for kw in _cup_keywords)
# ── Derived / Interaction features (V27) ──
# Cup games: home ELO advantage is ~30% weaker (rotation, lower motivation)
elo_diff = (home_elo - away_elo) * (0.70 if _is_cup else 1.0)
# Uses _is_cup_match computed earlier (before upset engine call).
elo_diff = (home_elo - away_elo) * (0.70 if _is_cup_match else 1.0)
form_elo_diff = home_form_elo_val - away_form_elo_val
attack_vs_defense_home = data.home_goals_avg - data.away_conceded_avg
attack_vs_defense_away = data.away_goals_avg - data.home_conceded_avg