Add backtest pipeline, betting_brain filters, score coherence + social v3

betting_brain.py: - HARD_MIN_SAMPLES=50 floor for calibrator bypass - ev_edge < 0 + >= 0.20 hard vetoes - BTTS muted (grid search found no profitable config) - Per-market optimal envelopes (MS, OU25) - Score coherence filter: main_pick must agree with score prediction - HTFT reversal cross-check for MS picks feature_builder.py / data_loader.py: - Real home/away_position from data (was hardcoded 10) - Cup detection wired into UpsetEngine - _estimate_league_position with 300-day season filter New scripts: - diagnostic_backtest.py: per-bet diagnostic backtest with loss patterns - optimize_filters.py: grid search per-market optimal thresholds - analyze_backtest_csv.py: root-cause hypothesis testing on CSV - compare_backtests.py: side-by-side validation with verdict - test_score_coherence.py: smoke test for coherence filter (20/20 pass) Reports: - diagnostic_backtest_20260525_024437 (50-match smoke) - diagnostic_backtest_20260525_035649 (1000-match in-sample) - filter_optimization_patch.json (grid search winners per market) Social poster v3: - satori + resvg HTML/CSS rendering pipeline - Twemoji football/basketball + flag SVGs - caption SEO: 12 curated hashtags per post - image SEO: descriptive filenames + .json metadata sidecar - /health, /preview-png, /run-now endpoints Docs: - mds/SESSION_HANDOFF.md: full session state for cross-machine continuity - mds/SOCIAL_POSTER_SETUP.md: API keys + test commands Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 20:43:28 +03:00
parent b619c2454a
commit 988ee2f50d
36 changed files with 5268 additions and 46 deletions
@@ -39,6 +39,31 @@ class BettingBrain:

    SNIPER_BLOCKED_MARKETS = {"HT", "HTFT", "OE", "CARDS", "HT_OU05", "HT_OU15"}

+    # Markets that lose money under every filter combination per the
+    # diagnostic backtest (1000 matches). Until calibration is rebuilt for
+    # these specifically, force NO_BET. Re-evaluate after each backtest run.
+    MUTED_MARKETS = {"BTTS"}
+
+    # Per-market optimal filter envelopes derived from the diagnostic
+    # backtest grid search (reports/filter_optimization_patch.json). Any
+    # pick falling OUTSIDE this envelope is vetoed. Tightens the playable
+    # band to the ROI-positive zone identified empirically.
+    #
+    # Each entry: {min_conf, min_edge, max_edge, min_odds, max_odds,
+    #              min_reliability, require_v27_agree}
+    MARKET_OPTIMAL_FILTERS = {
+        "MS": {
+            "min_edge": -0.05, "max_edge": 0.15,
+            "min_odds": 1.20,  "max_odds": 10.0,
+            "min_reliability": 0.0, "require_v27_agree": True,
+        },
+        "OU25": {
+            "min_edge": -1.0,  "max_edge": 0.15,
+            "min_odds": 1.80,  "max_odds": 10.0,
+            "min_reliability": 0.0, "require_v27_agree": False,
+        },
+    }
+
    MARKET_PRIORS = {
        "DC": 4.0,
        "OU15": 3.0,
@@ -86,6 +111,36 @@ class BettingBrain:
        watchlist.sort(key=self._candidate_sort_key, reverse=True)
        no_value.sort(key=self._candidate_sort_key, reverse=True)

+        # ── SCORE COHERENCE FILTER ──────────────────────────────────────
+        # If the model also produced a score prediction (e.g. 1-0), pick
+        # main_pick from the subset of candidates that would WIN at that
+        # score. Stops the system from recommending OU25 Üst while also
+        # predicting 1-0 (only 1 goal). Falls back to original list if no
+        # coherent candidate exists.
+        coherent_set = self._score_consistent_markets(guarded)
+        coherent_flag = False
+        if coherent_set:
+            def is_coherent(row: Dict[str, Any]) -> bool:
+                m = str(row.get("market") or "")
+                p = str(row.get("pick") or "")
+                return (m, p) in coherent_set
+
+            approved_coh = [r for r in approved if is_coherent(r)]
+            watchlist_coh = [r for r in watchlist if is_coherent(r)]
+
+            if approved_coh:
+                approved = approved_coh
+                coherent_flag = True
+            elif watchlist_coh:
+                # No coherent BET candidates — at least promote a coherent
+                # watch over an incoherent BET.
+                watchlist = watchlist_coh + [r for r in watchlist if not is_coherent(r)]
+                coherent_flag = True
+            # Tag every row so the UI/diagnostics can see what happened
+            for row in judged_rows.values():
+                row.setdefault("betting_brain", {})
+                row["betting_brain"]["score_coherent"] = is_coherent(row)
+
        original_main = guarded.get("main_pick") or {}
        main_pick = None
        decision = "NO_BET"
@@ -142,10 +197,11 @@ class BettingBrain:

        rejected = [d for d in decisions if d.get("action") == "REJECT"]
        guarded["betting_brain"] = {
-            "version": "judge-v1",
+            "version": "judge-v2-score-coherent",
            "decision": decision,
            "reason": decision_reason,
            "main_pick_key": main_key or None,
+            "score_coherent_filter_applied": coherent_flag,
            "approved_count": len(approved),
            "watchlist_count": len(watchlist),
            "rejected_count": len(rejected),
@@ -243,6 +299,44 @@ class BettingBrain:
        if play_score < 50.0 and not is_value_sniper:
            vetoes.append("play_score_too_low")

+        # ── HARD EV-EDGE VETO ───────────────────────────────────────────
+        # Diagnostic backtest (1000 maç, 524 settled bet) gösterdi ki
+        # ev_edge < 0 olan bahisler %76 of all picks ve ROI yaklaşık -%16.
+        # ev_edge < 0 = "model market'in altında olasılık veriyor" = vig'i
+        # yiyemeyeceğimiz negative-EV bahis. Hard veto: oynama.
+        # Sniper override hâlâ geçer (yüksek convicted alternatif pick'ler).
+        if ev_edge < 0.0 and not is_value_sniper:
+            vetoes.append("negative_ev_edge")
+            issues.append(f"ev_edge={ev_edge:.3f}_below_zero")
+        # Trap edge: bizim diagnostic backtest'te ev_edge >= 0.20 olan tüm
+        # bahisler kaybediyordu (n=10, hepsi -%25+ ROI). Model market'i bu
+        # kadar yanlış buluyorsa muhtemelen modelin kendisinin yanlış olduğu
+        # bir senaryo (eksik info, tuhaf maç, vs.) — oynama.
+        if ev_edge >= 0.20 and not is_value_sniper:
+            vetoes.append("ev_edge_too_high_trap")
+            issues.append(f"ev_edge={ev_edge:.3f}_trap_range")
+
+        # ── MUTED MARKETS (grid search showed no profitable config) ──
+        if market in self.MUTED_MARKETS and not is_value_sniper:
+            vetoes.append("market_muted_by_backtest")
+            issues.append(f"market_{market}_muted")
+
+        # ── PER-MARKET OPTIMAL ENVELOPE (from grid search) ──
+        envelope = self.MARKET_OPTIMAL_FILTERS.get(market)
+        if envelope and not is_value_sniper:
+            if ev_edge < envelope["min_edge"]:
+                vetoes.append("outside_envelope_edge_low")
+            if ev_edge > envelope["max_edge"]:
+                vetoes.append("outside_envelope_edge_high")
+            if odds and odds < envelope["min_odds"]:
+                vetoes.append("outside_envelope_odds_low")
+            if odds and odds > envelope["max_odds"]:
+                vetoes.append("outside_envelope_odds_high")
+            if odds_rel < envelope["min_reliability"]:
+                vetoes.append("outside_envelope_reliability_low")
+            if envelope["require_v27_agree"] and consensus != "AGREE":
+                vetoes.append("outside_envelope_v27_must_agree")
+
        if divergence is not None:
            if divergence >= self.HARD_DIVERGENCE and not is_value_sniper:
                score -= 42.0
@@ -635,6 +729,112 @@ class BettingBrain:
            return self._safe_float(ou25.get(key)) if key else None
        return None

+    def _score_consistent_markets(self, package: Dict[str, Any]) -> Optional[set]:
+        """Build the set of (market, pick) tuples that WOULD WIN if the
+        model's own score prediction came true. We use this as a coherence
+        gate: if the model is confident about a 1-0 outcome but also wants
+        to play OU25 Üst, those two beliefs contradict each other — and the
+        score prediction is the more informative one because it aggregates
+        all market signals into a single most-likely scenario.
+
+        Returns None if the score prediction is missing or malformed; in
+        that case we skip the coherence check.
+        """
+        score_pred = package.get("score_prediction") or {}
+        ft_raw = str(score_pred.get("ft") or score_pred.get("full_time") or "").strip()
+        ht_raw = str(score_pred.get("ht") or score_pred.get("half_time") or "").strip()
+
+        def parse(s: str) -> Optional[Tuple[int, int]]:
+            for sep in ("-", ":", "–"):
+                if sep in s:
+                    parts = s.split(sep, 1)
+                    try:
+                        return int(parts[0].strip()), int(parts[1].strip())
+                    except (ValueError, IndexError):
+                        return None
+            return None
+
+        ft = parse(ft_raw)
+        if ft is None:
+            return None
+        ht = parse(ht_raw)
+
+        fh, fa = ft
+        total = fh + fa
+        consistent: set = set()
+
+        # MS / 1X2 — single outcome
+        if fh > fa:
+            consistent.add(("MS", "1"))
+            consistent.add(("ML", "1"))
+        elif fh < fa:
+            consistent.add(("MS", "2"))
+            consistent.add(("ML", "2"))
+        else:
+            consistent.add(("MS", "X"))
+            consistent.add(("ML", "X"))
+
+        # DC — two of three legs win at any score
+        if fh >= fa:
+            consistent.add(("DC", "1X"))
+        if fh <= fa:
+            consistent.add(("DC", "X2"))
+        if fh != fa:
+            consistent.add(("DC", "12"))
+
+        # Over/Under main lines
+        for line, market in ((0.5, "OU05"), (1.5, "OU15"),
+                              (2.5, "OU25"), (3.5, "OU35"), (4.5, "OU45")):
+            if total > line:
+                for p in ("Üst", "Ust", "Over", "OVER"):
+                    consistent.add((market, p))
+            elif total < line:
+                for p in ("Alt", "Under", "UNDER"):
+                    consistent.add((market, p))
+            # total == line → push, neither side wins → don't add
+
+        # BTTS — both teams score
+        if fh > 0 and fa > 0:
+            for p in ("Var", "KG Var", "Yes", "YES"):
+                consistent.add(("BTTS", p))
+        else:
+            for p in ("Yok", "KG Yok", "No", "NO"):
+                consistent.add(("BTTS", p))
+
+        # OE — total goals odd/even
+        if total % 2 == 1:
+            for p in ("Tek", "Odd", "ODD"):
+                consistent.add(("OE", p))
+        else:
+            for p in ("Çift", "Cift", "Even", "EVEN"):
+                consistent.add(("OE", p))
+
+        # HT-only markets (need HT score)
+        if ht is not None:
+            hh, ha = ht
+            ht_total = hh + ha
+            if hh > ha:
+                consistent.add(("HT", "1"))
+            elif hh < ha:
+                consistent.add(("HT", "2"))
+            else:
+                consistent.add(("HT", "X"))
+            for line, market in ((0.5, "HT_OU05"), (1.5, "HT_OU15"), (2.5, "HT_OU25")):
+                if ht_total > line:
+                    for p in ("Üst", "Ust", "Over"):
+                        consistent.add((market, p))
+                elif ht_total < line:
+                    for p in ("Alt", "Under"):
+                        consistent.add((market, p))
+
+            # HTFT — single combo
+            ht_o = "1" if hh > ha else "2" if hh < ha else "X"
+            ft_o = "1" if fh > fa else "2" if fh < fa else "X"
+            consistent.add(("HTFT", f"{ht_o}/{ft_o}"))
+            consistent.add(("HTFT", f"{ht_o}{ft_o}"))
+
+        return consistent
+
    def _triple_value(self, package: Dict[str, Any], key: Optional[str]) -> Optional[Dict[str, Any]]:
        if not key:
            return None
@@ -449,6 +449,12 @@ class DataLoaderMixin:
            return 1.5, 1.2
        return weighted_for / total_weight, weighted_against / total_weight

+    # Approximate European season window — Eredivisie/PL/La Liga start late
+    # July / mid-August, end May. Using 300 days as a buffer covers most
+    # competitions while excluding "career points" from previous seasons.
+    # When a proper seasons table lands this should query season boundaries.
+    _SEASON_LOOKBACK_MS = 300 * 24 * 60 * 60 * 1000
+
    def _estimate_league_position(
        self,
        cur: RealDictCursor,
@@ -458,6 +464,7 @@ class DataLoaderMixin:
    ) -> int:
        if not team_id or not league_id:
            return 10
+        season_start_ms = before_date_ms - self._SEASON_LOOKBACK_MS
        try:
            cur.execute(
                """
@@ -478,6 +485,7 @@ class DataLoaderMixin:
                      AND m.score_home IS NOT NULL
                      AND m.score_away IS NOT NULL
                      AND m.mst_utc < %s
+                      AND m.mst_utc >= %s
                    UNION ALL
                    SELECT
                        m.away_team_id AS team_id,
@@ -492,11 +500,15 @@ class DataLoaderMixin:
                      AND m.score_home IS NOT NULL
                      AND m.score_away IS NOT NULL
                      AND m.mst_utc < %s
+                      AND m.mst_utc >= %s
                ) tm
                GROUP BY tm.team_id
                ORDER BY points DESC
                """,
-                (league_id, before_date_ms, league_id, before_date_ms),
+                (
+                    league_id, before_date_ms, season_start_ms,
+                    league_id, before_date_ms, season_start_ms,
+                ),
            )
            rows = cur.fetchall()
            if not rows:
@@ -225,20 +225,43 @@ class FeatureBuilderMixin:
        if enrichment_failures:
            print(f"⚠️ Enrichment partial failures for {data.match_id}: {', '.join(enrichment_failures)}")

+        # ── Cup game detection (used by upset engine + elo dampening below) ──
+        _league_name_lower = (getattr(data, 'league_name', '') or '').lower()
+        _cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal",
+                         "trophy", "shield", "ziraat", "süper kupa", "super cup",
+                         "beker", "taça", "taca")
+        _is_cup_match = any(kw in _league_name_lower for kw in _cup_keywords)
+
+        # ── League size hint: top European leagues 18-20 teams, lower 16-24 ──
+        # We don't have a per-league team count, so fall back to 20 (standard).
+        # When standings infra lands this should pull from seasons table.
+        _league_total_teams = 20
+
        # Upset engine features
        upset_atmosphere, upset_motivation, upset_fatigue = 0.0, 0.0, 0.0
        try:
            upset_engine = get_upset_engine()
+            # Use the real position estimates from data_loader; fall back to mid-
+            # table (10) only when the loader couldn't compute one. Hardcoding 10
+            # for every team made motivation_score collapse to 0 for everyone.
+            _home_pos = getattr(data, 'home_position', None)
+            _away_pos = getattr(data, 'away_position', None)
+            if _home_pos is None or _home_pos <= 0:
+                _home_pos = 10
+            if _away_pos is None or _away_pos <= 0:
+                _away_pos = 10
            upset_feats = upset_engine.get_features(
                home_team_name=getattr(data, 'home_team_name', '') or '',
                home_team_id=data.home_team_id,
                away_team_name=getattr(data, 'away_team_name', '') or '',
                league_name=getattr(data, 'league_name', '') or '',
-                home_position=10,
-                away_position=10,
+                home_position=_home_pos,
+                away_position=_away_pos,
                match_date_ms=data.match_date_ms,
+                is_cup_match=_is_cup_match,
                home_days_rest=int(home_rest),
                away_days_rest=int(away_rest),
+                total_teams=_league_total_teams,
            )
            upset_atmosphere = upset_feats.get('upset_atmosphere', 0.0)
            upset_motivation = upset_feats.get('upset_motivation', 0.0)
@@ -276,15 +299,10 @@ class FeatureBuilderMixin:
        is_season_start = 1.0 if match_month in (7, 8, 9) else 0.0
        is_season_end = 1.0 if match_month in (5, 6) else 0.0

-        # ── Cup game detection: dampen home advantage in feature space ──
-        _league_name = (getattr(data, 'league_name', '') or '').lower()
-        _cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal",
-                         "trophy", "shield", "ziraat", "süper kupa", "super cup")
-        _is_cup = any(kw in _league_name for kw in _cup_keywords)
-
        # ── Derived / Interaction features (V27) ──
        # Cup games: home ELO advantage is ~30% weaker (rotation, lower motivation)
-        elo_diff = (home_elo - away_elo) * (0.70 if _is_cup else 1.0)
+        # Uses _is_cup_match computed earlier (before upset engine call).
+        elo_diff = (home_elo - away_elo) * (0.70 if _is_cup_match else 1.0)
        form_elo_diff = home_form_elo_val - away_form_elo_val
        attack_vs_defense_home = data.home_goals_avg - data.away_conceded_avg
        attack_vs_defense_away = data.away_goals_avg - data.home_conceded_avg