From 1c03fa5e1c5c5e97e1b5176b4a161e36ad926c66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fahri=20Can=20Se=C3=A7er?= <fahricansecer@gmail.com>
Date: Sat, 6 Jun 2026 14:08:30 +0300
Subject: [PATCH] gg

---
 ai-engine/scripts/market_calibration.py    | 162 ++++++++++++++++++
 ai-engine/scripts/monitor_odds_movement.py | 183 +++++++++++++++++++++
 2 files changed, 345 insertions(+)
 create mode 100644 ai-engine/scripts/market_calibration.py
 create mode 100644 ai-engine/scripts/monitor_odds_movement.py

diff --git a/ai-engine/scripts/market_calibration.py b/ai-engine/scripts/market_calibration.py
new file mode 100644
index 0000000..e3859c8
--- /dev/null
+++ b/ai-engine/scripts/market_calibration.py
@@ -0,0 +1,162 @@
+"""
+Market Calibration Scan — find where the ODDS THEMSELVES are systematically wrong.
+=================================================================================
+The legit, measurable version of "odds şike": pockets (leagues / teams / bands)
+where the market's implied probability does NOT match realized frequency, so a
+SIMPLE rule (no model) is +EV. This is pure market inefficiency — soft pricing
+in obscure leagues, persistent team bias, etc.
+
+Discipline against false 'rigged' pockets (the multiple-comparison trap):
+  * split history by time into HALF-1 (discover) and HALF-2 (validate)
+  * a pocket counts ONLY if it is +EV in BOTH halves with enough bets each
+  * report realized-vs-implied gap (the miscalibration) + ROI
+
+No model. Just odds vs outcomes. Read-only on the training CSV (104k matches
+with odds). Forward 'suspicious line movement' detection needs odds_history
+(currently empty) — separate, forward-only.
+
+Usage: python scripts/market_calibration.py --min-bets 120 --side fav
+"""
+from __future__ import annotations
+import argparse, os, sys
+import numpy as np, pandas as pd
+
+if sys.stdout and hasattr(sys.stdout, "reconfigure"):
+    try: sys.stdout.reconfigure(encoding="utf-8")
+    except Exception: pass
+
+AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
+
+
+def league_names(ids):
+    try:
+        sys.path.insert(0, AI_DIR)
+        from data.db import get_clean_dsn
+        import psycopg2
+        from psycopg2.extras import RealDictCursor
+        ids = [str(i) for i in ids if i is not None]
+        for _ in range(3):
+            try:
+                with psycopg2.connect(get_clean_dsn()) as c:
+                    with c.cursor(cursor_factory=RealDictCursor) as cur:
+                        cur.execute("SELECT id,name FROM leagues WHERE id = ANY(%s)", (ids,))
+                        return {str(r["id"]): r["name"] for r in cur.fetchall()}
+            except Exception:
+                import time; time.sleep(1)
+    except Exception:
+        pass
+    return {}
+
+
+def team_names(ids):
+    try:
+        sys.path.insert(0, AI_DIR)
+        from data.db import get_clean_dsn
+        import psycopg2
+        from psycopg2.extras import RealDictCursor
+        ids = [str(i) for i in ids if i is not None]
+        for _ in range(3):
+            try:
+                with psycopg2.connect(get_clean_dsn()) as c:
+                    with c.cursor(cursor_factory=RealDictCursor) as cur:
+                        cur.execute("SELECT id,name FROM teams WHERE id = ANY(%s)", (ids,))
+                        return {str(r["id"]): r["name"] for r in cur.fetchall()}
+            except Exception:
+                import time; time.sleep(1)
+    except Exception:
+        pass
+    return {}
+
+
+def main():
+    ap = argparse.ArgumentParser(description=__doc__)
+    ap.add_argument("--min-bets", type=int, default=120, help="min bets PER HALF")
+    ap.add_argument("--fav-max", type=float, default=2.5, help="only count favourites below this odds")
+    args = ap.parse_args()
+
+    df = pd.read_csv(CSV, low_memory=False,
+                     usecols=["match_id","league_id","home_team_id","away_team_id","mst_utc",
+                              "odds_ms_h","odds_ms_d","odds_ms_a","score_home","score_away"])
+    df = df.sort_values("mst_utc").reset_index(drop=True)
+    sh = pd.to_numeric(df["score_home"],errors="coerce"); sa = pd.to_numeric(df["score_away"],errors="coerce")
+    ok = sh.notna()&sa.notna()
+    df = df[ok].reset_index(drop=True); sh=sh[ok.values].values; sa=sa[ok.values].values
+    O = df[["odds_ms_h","odds_ms_d","odds_ms_a"]].apply(pd.to_numeric,errors="coerce").fillna(0.0).values
+    valid = (O>1.0).all(1)
+    outcome = np.where(sh>sa,0,np.where(sh==sa,1,2))   # 0 home,1 draw,2 away
+    fav = O.argmin(1); fav_odds = O[np.arange(len(O)),fav]
+    fav_won = (fav==outcome).astype(float)
+    fav_implied = 1.0/fav_odds
+    pnl = np.where(fav_won, fav_odds-1.0, -1.0)
+    half = (np.arange(len(df)) >= len(df)//2).astype(int)   # 0=first half,1=second
+    use = valid & (fav_odds <= args.fav_max)
+
+    base = pd.DataFrame({
+        "league": df["league_id"].astype(str).values,
+        "home": df["home_team_id"].astype(str).values,
+        "fav_is_home": (fav==0),
+        "won": fav_won, "implied": fav_implied, "pnl": pnl, "half": half, "use": use,
+        "fav_odds": fav_odds,
+    })
+    b = base[base["use"]].copy()
+    print(f"{len(b):,} favourite bets (odds<= {args.fav_max}); split into 2 time halves\n")
+    print(f"GLOBAL favourite: realized={100*b['won'].mean():.1f}% implied={100*b['implied'].mean():.1f}% "
+          f"ROI={100*b['pnl'].mean():+.2f}%  (negative = vig; market roughly right)")
+
+    def scan(groupcol, label, namefn, min_bets):
+        rows=[]
+        for key,d in b.groupby(groupcol):
+            h0=d[d["half"]==0]; h1=d[d["half"]==1]
+            if len(h0)<min_bets or len(h1)<min_bets: continue
+            r0=100*h0["pnl"].mean(); r1=100*h1["pnl"].mean()
+            # miscalibration gap: realized - implied (positive = market underprices the favourite)
+            gap=100*(d["won"].mean()-d["implied"].mean())
+            both_pos = r0>0 and r1>0
+            rows.append((min(r0,r1), key, len(d), 100*d["pnl"].mean(), r0, r1, gap, both_pos))
+        rows.sort(reverse=True)
+        names = namefn([r[1] for r in rows[:40]])
+        print(f"\n{'='*82}\n{label}  (✓ = +EV in BOTH halves, the only trustworthy ones)\n{'='*82}")
+        print(f"  {'name':<30}{'n':>6}{'ROI%':>7}{'H1%':>7}{'H2%':>7}{'gap%':>7}  ✓")
+        print("  "+"-"*72)
+        shown=0
+        for mn,key,n,roi,r0,r1,gap,both in rows:
+            if shown>=20 and not both: continue
+            nm=(names.get(key,key) or key)[:28]
+            mark = "✓" if both else ""
+            print(f"  {nm:<30}{n:>6}{roi:>+7.1f}{r0:>+7.1f}{r1:>+7.1f}{gap:>+7.1f}  {mark}")
+            shown+=1
+            if shown>=25: break
+        good=[r for r in rows if r[7]]
+        print(f"\n  -> {len(good)} {label.split()[0].lower()} pockets are +EV in BOTH halves "
+              f"(out of {len(rows)} with enough data)")
+        return good
+
+    scan("league", "BY LEAGUE (favourite flat bet)", league_names, args.min_bets)
+    # team: only when the team is the home favourite (cleanest, most samples)
+    bt = b[b["fav_is_home"]]
+    globals()['b'] = bt  # reuse scan on home-favourite subset
+    # inline team scan
+    rows=[]
+    for key,d in bt.groupby("home"):
+        h0=d[d["half"]==0]; h1=d[d["half"]==1]
+        if len(h0)<max(25,args.min_bets//3) or len(h1)<max(25,args.min_bets//3): continue
+        r0=100*h0["pnl"].mean(); r1=100*h1["pnl"].mean()
+        gap=100*(d["won"].mean()-d["implied"].mean())
+        rows.append((min(r0,r1), key, len(d), 100*d["pnl"].mean(), r0, r1, gap, r0>0 and r1>0))
+    rows.sort(reverse=True)
+    tn=team_names([r[1] for r in rows[:40]])
+    print(f"\n{'='*82}\nBY TEAM as HOME FAVOURITE  (✓ = +EV both halves)\n{'='*82}")
+    print(f"  {'team':<30}{'n':>6}{'ROI%':>7}{'H1%':>7}{'H2%':>7}{'gap%':>7}  ✓")
+    print("  "+"-"*72)
+    for mn,key,n,roi,r0,r1,gap,both in rows[:22]:
+        nm=(tn.get(key,key) or key)[:28]; mark="✓" if both else ""
+        print(f"  {nm:<30}{n:>6}{roi:>+7.1f}{r0:>+7.1f}{r1:>+7.1f}{gap:>+7.1f}  {mark}")
+    good=[r for r in rows if r[7]]
+    print(f"\n  -> {len(good)} teams +EV in BOTH halves (out of {len(rows)})")
+    print("\nREAD: ✓ pockets survived a time-split = candidate real inefficiencies (not noise).")
+    print("Still forward-validate with CLV. No ✓ = market is efficient there; don't bet.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ai-engine/scripts/monitor_odds_movement.py b/ai-engine/scripts/monitor_odds_movement.py
new file mode 100644
index 0000000..28eeb21
--- /dev/null
+++ b/ai-engine/scripts/monitor_odds_movement.py
@@ -0,0 +1,183 @@
+"""
+Odds Movement Monitor — forward steam / odds-anomaly ("şike" signal) detector.
+=============================================================================
+The only viable version of "detect odds manipulation": capture upcoming-match
+odds PERIODICALLY and flag abnormal moves (steam = a price shortening fast =
+money/information arriving, sometimes a fixed match). Retrospective detection is
+impossible here (odds_history empty); this builds the time-series going forward.
+
+No schema change: snapshots append to data/odds_snapshots.jsonl (reads
+live_matches.odds, which the feeder refreshes every 15 min).
+
+Run --snapshot every ~15-20 min (scheduler). Run --report anytime to see the
+current movement watchlist.
+
+For a CLOSING-time bettor the use is mainly a RISK FILTER: a match with heavy
+unexplained late steam against your pick = the market knows something you don't
+→ skip it. (Profiting from steam needs betting BEFORE it, i.e. early.)
+
+Usage:
+  python scripts/monitor_odds_movement.py --snapshot     # capture now (cron this)
+  python scripts/monitor_odds_movement.py --report       # show movement watchlist
+  python scripts/monitor_odds_movement.py --report --min-move 0.10
+"""
+from __future__ import annotations
+import argparse, json, os, sys, time, datetime
+from collections import defaultdict
+
+if sys.stdout and hasattr(sys.stdout, "reconfigure"):
+    try: sys.stdout.reconfigure(encoding="utf-8")
+    except Exception: pass
+
+AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, AI_DIR)
+SNAP = os.path.join(AI_DIR, "data", "odds_snapshots.jsonl")
+
+# markets tracked for steam (Turkish keys as stored in live_matches.odds)
+TRACK = {"Maç Sonucu": ["1", "X", "2"],
+         "2,5 Alt/Üst": ["Üst", "Alt"],
+         "Karşılıklı Gol": ["Var", "Yok"]}
+
+
+def _conn():
+    from data.db import get_clean_dsn
+    import psycopg2
+    last = None
+    for _ in range(3):
+        try:
+            return psycopg2.connect(get_clean_dsn())
+        except Exception as e:
+            last = e; time.sleep(1.2)
+    raise last
+
+
+def _f(x):
+    try: return float(x)
+    except (TypeError, ValueError): return None
+
+
+def snapshot():
+    from psycopg2.extras import RealDictCursor
+    now_ms = int(time.time() * 1000)
+    n = 0
+    with _conn() as c:
+        with c.cursor(cursor_factory=RealDictCursor) as cur:
+            cur.execute("""SELECT id, mst_utc, odds FROM live_matches
+                           WHERE odds IS NOT NULL AND mst_utc > %s
+                           ORDER BY mst_utc ASC""", (now_ms - 2*3600*1000,))
+            rows = cur.fetchall()
+    os.makedirs(os.path.dirname(SNAP), exist_ok=True)
+    with open(SNAP, "a", encoding="utf-8") as f:
+        for r in rows:
+            odds = r["odds"]
+            if isinstance(odds, str):
+                try: odds = json.loads(odds)
+                except Exception: continue
+            if not isinstance(odds, dict): continue
+            compact = {}
+            for cat, sels in TRACK.items():
+                cm = odds.get(cat)
+                if isinstance(cm, dict):
+                    vals = {s: _f(cm.get(s)) for s in sels if _f(cm.get(s))}
+                    if vals: compact[cat] = vals
+            if not compact: continue
+            f.write(json.dumps({"ts": now_ms, "match_id": r["id"],
+                                "mst_utc": r["mst_utc"], "odds": compact},
+                               ensure_ascii=False) + "\n")
+            n += 1
+    print(f"[snapshot] {datetime.datetime.now():%Y-%m-%d %H:%M} captured {n} upcoming matches -> {SNAP}")
+
+
+def _names(ids):
+    try:
+        from psycopg2.extras import RealDictCursor
+        ids = [str(i) for i in ids]
+        if not ids: return {}
+        with _conn() as c:
+            with c.cursor(cursor_factory=RealDictCursor) as cur:
+                cur.execute("""SELECT m.id, ht.name h, at.name a
+                               FROM matches m JOIN teams ht ON ht.id=m.home_team_id
+                               JOIN teams at ON at.id=m.away_team_id WHERE m.id = ANY(%s)""", (ids,))
+                return {str(r["id"]): f"{r['h']} v {r['a']}" for r in cur.fetchall()}
+    except Exception:
+        return {}
+
+
+def report(min_move):
+    if not os.path.exists(SNAP):
+        print("No snapshots yet. Schedule '--snapshot' every ~15-20 min first."); return
+    series = defaultdict(list)   # match_id -> [(ts, mst, odds_compact), ...]
+    with open(SNAP, encoding="utf-8") as f:
+        for line in f:
+            try: d = json.loads(line)
+            except Exception: continue
+            series[d["match_id"]].append((d["ts"], d.get("mst_utc"), d["odds"]))
+
+    now_ms = int(time.time()*1000)
+    flagged = []
+    for mid, snaps in series.items():
+        if len(snaps) < 2: continue
+        snaps.sort(key=lambda x: x[0])
+        mst = snaps[-1][1]
+        # focus on MS market
+        def ms(snap): return snap[2].get("Maç Sonucu", {})
+        op, la = ms(snaps[0]), ms(snaps[-1])
+        best = None  # most-SHORTENED side = the steam (money/info) signal
+        for sel in ("1", "X", "2"):
+            o0, o1 = op.get(sel), la.get(sel)
+            if o0 and o1 and o0 > 1.0 and o1 > 1.0:
+                drift = (o1 - o0) / o0          # negative = shortened = steam
+                if best is None or drift < best[4]:
+                    best = (abs(drift), sel, o0, o1, drift)
+        if best and abs(best[4]) >= min_move:
+            # velocity: biggest single-step move on that selection
+            sel = best[1]; steps = [s[2].get("Maç Sonucu", {}).get(sel) for s in snaps]
+            steps = [x for x in steps if x]
+            vmax = 0.0
+            for i in range(1, len(steps)):
+                if steps[i-1]:
+                    vmax = max(vmax, abs(steps[i]-steps[i-1])/steps[i-1])
+            flagged.append((best[0], mid, best[1], best[2], best[3], best[4], vmax,
+                            len(snaps), mst))
+    flagged.sort(reverse=True)
+    names = _names([f[1] for f in flagged[:30]])
+
+    print("="*84)
+    print("ODDS MOVEMENT WATCHLIST  (MS market; drift = (last-open)/open; ↓ = shortened = steam)")
+    print("="*84)
+    if not flagged:
+        print(f"  No matches moved >= {min_move:.0%} yet. (Need more snapshots over time;")
+        print("   monitor only sees movement once it has captured several snapshots.)")
+        # still show coverage
+        multi = sum(1 for s in series.values() if len(s) >= 2)
+        print(f"\n  coverage: {len(series)} matches tracked, {multi} with >=2 snapshots.")
+        return
+    print(f"  {'match':<34}{'side':>5}{'open':>7}{'last':>7}{'drift':>8}{'maxStep':>8}{'snaps':>6}")
+    print("  "+"-"*78)
+    for ab, mid, sel, o0, o1, drift, vmax, ns, mst in flagged[:25]:
+        nm = (names.get(mid, mid) or mid)[:32]
+        arrow = "↓steam" if drift < 0 else "↑drift"
+        ko = ""
+        if mst:
+            mins = (mst - now_ms)/60000
+            ko = f" KO~{mins/60:.1f}h" if mins > 0 else " (started)"
+        print(f"  {nm:<34}{sel:>5}{o0:>7.2f}{o1:>7.2f}{100*drift:>+7.1f}%{100*vmax:>+7.1f}%{ns:>6}  {arrow}{ko}")
+    print(f"\n  {len(flagged)} matches flagged (moved >= {min_move:.0%}).")
+    print("  ↓steam on a side = market backing it hard (info/possible fix). As a closing")
+    print("  bettor: treat heavy late steam AGAINST your pick as a reason to SKIP.")
+
+
+def main():
+    ap = argparse.ArgumentParser(description=__doc__)
+    ap.add_argument("--snapshot", action="store_true")
+    ap.add_argument("--report", action="store_true")
+    ap.add_argument("--min-move", type=float, default=0.08, help="flag drift >= this fraction (default 0.08)")
+    args = ap.parse_args()
+    if args.snapshot:
+        snapshot()
+    if args.report or not args.snapshot:
+        report(args.min_move)
+
+
+if __name__ == "__main__":
+    main()