Add backtest pipeline, betting_brain filters, score coherence + social v3

betting_brain.py: - HARD_MIN_SAMPLES=50 floor for calibrator bypass - ev_edge < 0 + >= 0.20 hard vetoes - BTTS muted (grid search found no profitable config) - Per-market optimal envelopes (MS, OU25) - Score coherence filter: main_pick must agree with score prediction - HTFT reversal cross-check for MS picks feature_builder.py / data_loader.py: - Real home/away_position from data (was hardcoded 10) - Cup detection wired into UpsetEngine - _estimate_league_position with 300-day season filter New scripts: - diagnostic_backtest.py: per-bet diagnostic backtest with loss patterns - optimize_filters.py: grid search per-market optimal thresholds - analyze_backtest_csv.py: root-cause hypothesis testing on CSV - compare_backtests.py: side-by-side validation with verdict - test_score_coherence.py: smoke test for coherence filter (20/20 pass) Reports: - diagnostic_backtest_20260525_024437 (50-match smoke) - diagnostic_backtest_20260525_035649 (1000-match in-sample) - filter_optimization_patch.json (grid search winners per market) Social poster v3: - satori + resvg HTML/CSS rendering pipeline - Twemoji football/basketball + flag SVGs - caption SEO: 12 curated hashtags per post - image SEO: descriptive filenames + .json metadata sidecar - /health, /preview-png, /run-now endpoints Docs: - mds/SESSION_HANDOFF.md: full session state for cross-machine continuity - mds/SOCIAL_POSTER_SETUP.md: API keys + test commands Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 20:43:28 +03:00
parent b619c2454a
commit 988ee2f50d
36 changed files with 5268 additions and 46 deletions
@@ -39,6 +39,31 @@ class BettingBrain:

    SNIPER_BLOCKED_MARKETS = {"HT", "HTFT", "OE", "CARDS", "HT_OU05", "HT_OU15"}

+    # Markets that lose money under every filter combination per the
+    # diagnostic backtest (1000 matches). Until calibration is rebuilt for
+    # these specifically, force NO_BET. Re-evaluate after each backtest run.
+    MUTED_MARKETS = {"BTTS"}
+
+    # Per-market optimal filter envelopes derived from the diagnostic
+    # backtest grid search (reports/filter_optimization_patch.json). Any
+    # pick falling OUTSIDE this envelope is vetoed. Tightens the playable
+    # band to the ROI-positive zone identified empirically.
+    #
+    # Each entry: {min_conf, min_edge, max_edge, min_odds, max_odds,
+    #              min_reliability, require_v27_agree}
+    MARKET_OPTIMAL_FILTERS = {
+        "MS": {
+            "min_edge": -0.05, "max_edge": 0.15,
+            "min_odds": 1.20,  "max_odds": 10.0,
+            "min_reliability": 0.0, "require_v27_agree": True,
+        },
+        "OU25": {
+            "min_edge": -1.0,  "max_edge": 0.15,
+            "min_odds": 1.80,  "max_odds": 10.0,
+            "min_reliability": 0.0, "require_v27_agree": False,
+        },
+    }
+
    MARKET_PRIORS = {
        "DC": 4.0,
        "OU15": 3.0,
@@ -86,6 +111,36 @@ class BettingBrain:
        watchlist.sort(key=self._candidate_sort_key, reverse=True)
        no_value.sort(key=self._candidate_sort_key, reverse=True)

+        # ── SCORE COHERENCE FILTER ──────────────────────────────────────
+        # If the model also produced a score prediction (e.g. 1-0), pick
+        # main_pick from the subset of candidates that would WIN at that
+        # score. Stops the system from recommending OU25 Üst while also
+        # predicting 1-0 (only 1 goal). Falls back to original list if no
+        # coherent candidate exists.
+        coherent_set = self._score_consistent_markets(guarded)
+        coherent_flag = False
+        if coherent_set:
+            def is_coherent(row: Dict[str, Any]) -> bool:
+                m = str(row.get("market") or "")
+                p = str(row.get("pick") or "")
+                return (m, p) in coherent_set
+
+            approved_coh = [r for r in approved if is_coherent(r)]
+            watchlist_coh = [r for r in watchlist if is_coherent(r)]
+
+            if approved_coh:
+                approved = approved_coh
+                coherent_flag = True
+            elif watchlist_coh:
+                # No coherent BET candidates — at least promote a coherent
+                # watch over an incoherent BET.
+                watchlist = watchlist_coh + [r for r in watchlist if not is_coherent(r)]
+                coherent_flag = True
+            # Tag every row so the UI/diagnostics can see what happened
+            for row in judged_rows.values():
+                row.setdefault("betting_brain", {})
+                row["betting_brain"]["score_coherent"] = is_coherent(row)
+
        original_main = guarded.get("main_pick") or {}
        main_pick = None
        decision = "NO_BET"
@@ -142,10 +197,11 @@ class BettingBrain:

        rejected = [d for d in decisions if d.get("action") == "REJECT"]
        guarded["betting_brain"] = {
-            "version": "judge-v1",
+            "version": "judge-v2-score-coherent",
            "decision": decision,
            "reason": decision_reason,
            "main_pick_key": main_key or None,
+            "score_coherent_filter_applied": coherent_flag,
            "approved_count": len(approved),
            "watchlist_count": len(watchlist),
            "rejected_count": len(rejected),
@@ -243,6 +299,44 @@ class BettingBrain:
        if play_score < 50.0 and not is_value_sniper:
            vetoes.append("play_score_too_low")

+        # ── HARD EV-EDGE VETO ───────────────────────────────────────────
+        # Diagnostic backtest (1000 maç, 524 settled bet) gösterdi ki
+        # ev_edge < 0 olan bahisler %76 of all picks ve ROI yaklaşık -%16.
+        # ev_edge < 0 = "model market'in altında olasılık veriyor" = vig'i
+        # yiyemeyeceğimiz negative-EV bahis. Hard veto: oynama.
+        # Sniper override hâlâ geçer (yüksek convicted alternatif pick'ler).
+        if ev_edge < 0.0 and not is_value_sniper:
+            vetoes.append("negative_ev_edge")
+            issues.append(f"ev_edge={ev_edge:.3f}_below_zero")
+        # Trap edge: bizim diagnostic backtest'te ev_edge >= 0.20 olan tüm
+        # bahisler kaybediyordu (n=10, hepsi -%25+ ROI). Model market'i bu
+        # kadar yanlış buluyorsa muhtemelen modelin kendisinin yanlış olduğu
+        # bir senaryo (eksik info, tuhaf maç, vs.) — oynama.
+        if ev_edge >= 0.20 and not is_value_sniper:
+            vetoes.append("ev_edge_too_high_trap")
+            issues.append(f"ev_edge={ev_edge:.3f}_trap_range")
+
+        # ── MUTED MARKETS (grid search showed no profitable config) ──
+        if market in self.MUTED_MARKETS and not is_value_sniper:
+            vetoes.append("market_muted_by_backtest")
+            issues.append(f"market_{market}_muted")
+
+        # ── PER-MARKET OPTIMAL ENVELOPE (from grid search) ──
+        envelope = self.MARKET_OPTIMAL_FILTERS.get(market)
+        if envelope and not is_value_sniper:
+            if ev_edge < envelope["min_edge"]:
+                vetoes.append("outside_envelope_edge_low")
+            if ev_edge > envelope["max_edge"]:
+                vetoes.append("outside_envelope_edge_high")
+            if odds and odds < envelope["min_odds"]:
+                vetoes.append("outside_envelope_odds_low")
+            if odds and odds > envelope["max_odds"]:
+                vetoes.append("outside_envelope_odds_high")
+            if odds_rel < envelope["min_reliability"]:
+                vetoes.append("outside_envelope_reliability_low")
+            if envelope["require_v27_agree"] and consensus != "AGREE":
+                vetoes.append("outside_envelope_v27_must_agree")
+
        if divergence is not None:
            if divergence >= self.HARD_DIVERGENCE and not is_value_sniper:
                score -= 42.0
@@ -635,6 +729,112 @@ class BettingBrain:
            return self._safe_float(ou25.get(key)) if key else None
        return None

+    def _score_consistent_markets(self, package: Dict[str, Any]) -> Optional[set]:
+        """Build the set of (market, pick) tuples that WOULD WIN if the
+        model's own score prediction came true. We use this as a coherence
+        gate: if the model is confident about a 1-0 outcome but also wants
+        to play OU25 Üst, those two beliefs contradict each other — and the
+        score prediction is the more informative one because it aggregates
+        all market signals into a single most-likely scenario.
+
+        Returns None if the score prediction is missing or malformed; in
+        that case we skip the coherence check.
+        """
+        score_pred = package.get("score_prediction") or {}
+        ft_raw = str(score_pred.get("ft") or score_pred.get("full_time") or "").strip()
+        ht_raw = str(score_pred.get("ht") or score_pred.get("half_time") or "").strip()
+
+        def parse(s: str) -> Optional[Tuple[int, int]]:
+            for sep in ("-", ":", "–"):
+                if sep in s:
+                    parts = s.split(sep, 1)
+                    try:
+                        return int(parts[0].strip()), int(parts[1].strip())
+                    except (ValueError, IndexError):
+                        return None
+            return None
+
+        ft = parse(ft_raw)
+        if ft is None:
+            return None
+        ht = parse(ht_raw)
+
+        fh, fa = ft
+        total = fh + fa
+        consistent: set = set()
+
+        # MS / 1X2 — single outcome
+        if fh > fa:
+            consistent.add(("MS", "1"))
+            consistent.add(("ML", "1"))
+        elif fh < fa:
+            consistent.add(("MS", "2"))
+            consistent.add(("ML", "2"))
+        else:
+            consistent.add(("MS", "X"))
+            consistent.add(("ML", "X"))
+
+        # DC — two of three legs win at any score
+        if fh >= fa:
+            consistent.add(("DC", "1X"))
+        if fh <= fa:
+            consistent.add(("DC", "X2"))
+        if fh != fa:
+            consistent.add(("DC", "12"))
+
+        # Over/Under main lines
+        for line, market in ((0.5, "OU05"), (1.5, "OU15"),
+                              (2.5, "OU25"), (3.5, "OU35"), (4.5, "OU45")):
+            if total > line:
+                for p in ("Üst", "Ust", "Over", "OVER"):
+                    consistent.add((market, p))
+            elif total < line:
+                for p in ("Alt", "Under", "UNDER"):
+                    consistent.add((market, p))
+            # total == line → push, neither side wins → don't add
+
+        # BTTS — both teams score
+        if fh > 0 and fa > 0:
+            for p in ("Var", "KG Var", "Yes", "YES"):
+                consistent.add(("BTTS", p))
+        else:
+            for p in ("Yok", "KG Yok", "No", "NO"):
+                consistent.add(("BTTS", p))
+
+        # OE — total goals odd/even
+        if total % 2 == 1:
+            for p in ("Tek", "Odd", "ODD"):
+                consistent.add(("OE", p))
+        else:
+            for p in ("Çift", "Cift", "Even", "EVEN"):
+                consistent.add(("OE", p))
+
+        # HT-only markets (need HT score)
+        if ht is not None:
+            hh, ha = ht
+            ht_total = hh + ha
+            if hh > ha:
+                consistent.add(("HT", "1"))
+            elif hh < ha:
+                consistent.add(("HT", "2"))
+            else:
+                consistent.add(("HT", "X"))
+            for line, market in ((0.5, "HT_OU05"), (1.5, "HT_OU15"), (2.5, "HT_OU25")):
+                if ht_total > line:
+                    for p in ("Üst", "Ust", "Over"):
+                        consistent.add((market, p))
+                elif ht_total < line:
+                    for p in ("Alt", "Under"):
+                        consistent.add((market, p))
+
+            # HTFT — single combo
+            ht_o = "1" if hh > ha else "2" if hh < ha else "X"
+            ft_o = "1" if fh > fa else "2" if fh < fa else "X"
+            consistent.add(("HTFT", f"{ht_o}/{ft_o}"))
+            consistent.add(("HTFT", f"{ht_o}{ft_o}"))
+
+        return consistent
+
    def _triple_value(self, package: Dict[str, Any], key: Optional[str]) -> Optional[Dict[str, Any]]:
        if not key:
            return None