From 1c03fa5e1c5c5e97e1b5176b4a161e36ad926c66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fahri=20Can=20Se=C3=A7er?= Date: Sat, 6 Jun 2026 14:08:30 +0300 Subject: [PATCH] gg --- ai-engine/scripts/market_calibration.py | 162 ++++++++++++++++++ ai-engine/scripts/monitor_odds_movement.py | 183 +++++++++++++++++++++ 2 files changed, 345 insertions(+) create mode 100644 ai-engine/scripts/market_calibration.py create mode 100644 ai-engine/scripts/monitor_odds_movement.py diff --git a/ai-engine/scripts/market_calibration.py b/ai-engine/scripts/market_calibration.py new file mode 100644 index 0000000..e3859c8 --- /dev/null +++ b/ai-engine/scripts/market_calibration.py @@ -0,0 +1,162 @@ +""" +Market Calibration Scan — find where the ODDS THEMSELVES are systematically wrong. +================================================================================= +The legit, measurable version of "odds şike": pockets (leagues / teams / bands) +where the market's implied probability does NOT match realized frequency, so a +SIMPLE rule (no model) is +EV. This is pure market inefficiency — soft pricing +in obscure leagues, persistent team bias, etc. + +Discipline against false 'rigged' pockets (the multiple-comparison trap): + * split history by time into HALF-1 (discover) and HALF-2 (validate) + * a pocket counts ONLY if it is +EV in BOTH halves with enough bets each + * report realized-vs-implied gap (the miscalibration) + ROI + +No model. Just odds vs outcomes. Read-only on the training CSV (104k matches +with odds). Forward 'suspicious line movement' detection needs odds_history +(currently empty) — separate, forward-only. + +Usage: python scripts/market_calibration.py --min-bets 120 --side fav +""" +from __future__ import annotations +import argparse, os, sys +import numpy as np, pandas as pd + +if sys.stdout and hasattr(sys.stdout, "reconfigure"): + try: sys.stdout.reconfigure(encoding="utf-8") + except Exception: pass + +AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv") + + +def league_names(ids): + try: + sys.path.insert(0, AI_DIR) + from data.db import get_clean_dsn + import psycopg2 + from psycopg2.extras import RealDictCursor + ids = [str(i) for i in ids if i is not None] + for _ in range(3): + try: + with psycopg2.connect(get_clean_dsn()) as c: + with c.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("SELECT id,name FROM leagues WHERE id = ANY(%s)", (ids,)) + return {str(r["id"]): r["name"] for r in cur.fetchall()} + except Exception: + import time; time.sleep(1) + except Exception: + pass + return {} + + +def team_names(ids): + try: + sys.path.insert(0, AI_DIR) + from data.db import get_clean_dsn + import psycopg2 + from psycopg2.extras import RealDictCursor + ids = [str(i) for i in ids if i is not None] + for _ in range(3): + try: + with psycopg2.connect(get_clean_dsn()) as c: + with c.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("SELECT id,name FROM teams WHERE id = ANY(%s)", (ids,)) + return {str(r["id"]): r["name"] for r in cur.fetchall()} + except Exception: + import time; time.sleep(1) + except Exception: + pass + return {} + + +def main(): + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("--min-bets", type=int, default=120, help="min bets PER HALF") + ap.add_argument("--fav-max", type=float, default=2.5, help="only count favourites below this odds") + args = ap.parse_args() + + df = pd.read_csv(CSV, low_memory=False, + usecols=["match_id","league_id","home_team_id","away_team_id","mst_utc", + "odds_ms_h","odds_ms_d","odds_ms_a","score_home","score_away"]) + df = df.sort_values("mst_utc").reset_index(drop=True) + sh = pd.to_numeric(df["score_home"],errors="coerce"); sa = pd.to_numeric(df["score_away"],errors="coerce") + ok = sh.notna()&sa.notna() + df = df[ok].reset_index(drop=True); sh=sh[ok.values].values; sa=sa[ok.values].values + O = df[["odds_ms_h","odds_ms_d","odds_ms_a"]].apply(pd.to_numeric,errors="coerce").fillna(0.0).values + valid = (O>1.0).all(1) + outcome = np.where(sh>sa,0,np.where(sh==sa,1,2)) # 0 home,1 draw,2 away + fav = O.argmin(1); fav_odds = O[np.arange(len(O)),fav] + fav_won = (fav==outcome).astype(float) + fav_implied = 1.0/fav_odds + pnl = np.where(fav_won, fav_odds-1.0, -1.0) + half = (np.arange(len(df)) >= len(df)//2).astype(int) # 0=first half,1=second + use = valid & (fav_odds <= args.fav_max) + + base = pd.DataFrame({ + "league": df["league_id"].astype(str).values, + "home": df["home_team_id"].astype(str).values, + "fav_is_home": (fav==0), + "won": fav_won, "implied": fav_implied, "pnl": pnl, "half": half, "use": use, + "fav_odds": fav_odds, + }) + b = base[base["use"]].copy() + print(f"{len(b):,} favourite bets (odds<= {args.fav_max}); split into 2 time halves\n") + print(f"GLOBAL favourite: realized={100*b['won'].mean():.1f}% implied={100*b['implied'].mean():.1f}% " + f"ROI={100*b['pnl'].mean():+.2f}% (negative = vig; market roughly right)") + + def scan(groupcol, label, namefn, min_bets): + rows=[] + for key,d in b.groupby(groupcol): + h0=d[d["half"]==0]; h1=d[d["half"]==1] + if len(h0)0 and r1>0 + rows.append((min(r0,r1), key, len(d), 100*d["pnl"].mean(), r0, r1, gap, both_pos)) + rows.sort(reverse=True) + names = namefn([r[1] for r in rows[:40]]) + print(f"\n{'='*82}\n{label} (✓ = +EV in BOTH halves, the only trustworthy ones)\n{'='*82}") + print(f" {'name':<30}{'n':>6}{'ROI%':>7}{'H1%':>7}{'H2%':>7}{'gap%':>7} ✓") + print(" "+"-"*72) + shown=0 + for mn,key,n,roi,r0,r1,gap,both in rows: + if shown>=20 and not both: continue + nm=(names.get(key,key) or key)[:28] + mark = "✓" if both else "" + print(f" {nm:<30}{n:>6}{roi:>+7.1f}{r0:>+7.1f}{r1:>+7.1f}{gap:>+7.1f} {mark}") + shown+=1 + if shown>=25: break + good=[r for r in rows if r[7]] + print(f"\n -> {len(good)} {label.split()[0].lower()} pockets are +EV in BOTH halves " + f"(out of {len(rows)} with enough data)") + return good + + scan("league", "BY LEAGUE (favourite flat bet)", league_names, args.min_bets) + # team: only when the team is the home favourite (cleanest, most samples) + bt = b[b["fav_is_home"]] + globals()['b'] = bt # reuse scan on home-favourite subset + # inline team scan + rows=[] + for key,d in bt.groupby("home"): + h0=d[d["half"]==0]; h1=d[d["half"]==1] + if len(h0)0 and r1>0)) + rows.sort(reverse=True) + tn=team_names([r[1] for r in rows[:40]]) + print(f"\n{'='*82}\nBY TEAM as HOME FAVOURITE (✓ = +EV both halves)\n{'='*82}") + print(f" {'team':<30}{'n':>6}{'ROI%':>7}{'H1%':>7}{'H2%':>7}{'gap%':>7} ✓") + print(" "+"-"*72) + for mn,key,n,roi,r0,r1,gap,both in rows[:22]: + nm=(tn.get(key,key) or key)[:28]; mark="✓" if both else "" + print(f" {nm:<30}{n:>6}{roi:>+7.1f}{r0:>+7.1f}{r1:>+7.1f}{gap:>+7.1f} {mark}") + good=[r for r in rows if r[7]] + print(f"\n -> {len(good)} teams +EV in BOTH halves (out of {len(rows)})") + print("\nREAD: ✓ pockets survived a time-split = candidate real inefficiencies (not noise).") + print("Still forward-validate with CLV. No ✓ = market is efficient there; don't bet.") + + +if __name__ == "__main__": + main() diff --git a/ai-engine/scripts/monitor_odds_movement.py b/ai-engine/scripts/monitor_odds_movement.py new file mode 100644 index 0000000..28eeb21 --- /dev/null +++ b/ai-engine/scripts/monitor_odds_movement.py @@ -0,0 +1,183 @@ +""" +Odds Movement Monitor — forward steam / odds-anomaly ("şike" signal) detector. +============================================================================= +The only viable version of "detect odds manipulation": capture upcoming-match +odds PERIODICALLY and flag abnormal moves (steam = a price shortening fast = +money/information arriving, sometimes a fixed match). Retrospective detection is +impossible here (odds_history empty); this builds the time-series going forward. + +No schema change: snapshots append to data/odds_snapshots.jsonl (reads +live_matches.odds, which the feeder refreshes every 15 min). + +Run --snapshot every ~15-20 min (scheduler). Run --report anytime to see the +current movement watchlist. + +For a CLOSING-time bettor the use is mainly a RISK FILTER: a match with heavy +unexplained late steam against your pick = the market knows something you don't +→ skip it. (Profiting from steam needs betting BEFORE it, i.e. early.) + +Usage: + python scripts/monitor_odds_movement.py --snapshot # capture now (cron this) + python scripts/monitor_odds_movement.py --report # show movement watchlist + python scripts/monitor_odds_movement.py --report --min-move 0.10 +""" +from __future__ import annotations +import argparse, json, os, sys, time, datetime +from collections import defaultdict + +if sys.stdout and hasattr(sys.stdout, "reconfigure"): + try: sys.stdout.reconfigure(encoding="utf-8") + except Exception: pass + +AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, AI_DIR) +SNAP = os.path.join(AI_DIR, "data", "odds_snapshots.jsonl") + +# markets tracked for steam (Turkish keys as stored in live_matches.odds) +TRACK = {"Maç Sonucu": ["1", "X", "2"], + "2,5 Alt/Üst": ["Üst", "Alt"], + "Karşılıklı Gol": ["Var", "Yok"]} + + +def _conn(): + from data.db import get_clean_dsn + import psycopg2 + last = None + for _ in range(3): + try: + return psycopg2.connect(get_clean_dsn()) + except Exception as e: + last = e; time.sleep(1.2) + raise last + + +def _f(x): + try: return float(x) + except (TypeError, ValueError): return None + + +def snapshot(): + from psycopg2.extras import RealDictCursor + now_ms = int(time.time() * 1000) + n = 0 + with _conn() as c: + with c.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("""SELECT id, mst_utc, odds FROM live_matches + WHERE odds IS NOT NULL AND mst_utc > %s + ORDER BY mst_utc ASC""", (now_ms - 2*3600*1000,)) + rows = cur.fetchall() + os.makedirs(os.path.dirname(SNAP), exist_ok=True) + with open(SNAP, "a", encoding="utf-8") as f: + for r in rows: + odds = r["odds"] + if isinstance(odds, str): + try: odds = json.loads(odds) + except Exception: continue + if not isinstance(odds, dict): continue + compact = {} + for cat, sels in TRACK.items(): + cm = odds.get(cat) + if isinstance(cm, dict): + vals = {s: _f(cm.get(s)) for s in sels if _f(cm.get(s))} + if vals: compact[cat] = vals + if not compact: continue + f.write(json.dumps({"ts": now_ms, "match_id": r["id"], + "mst_utc": r["mst_utc"], "odds": compact}, + ensure_ascii=False) + "\n") + n += 1 + print(f"[snapshot] {datetime.datetime.now():%Y-%m-%d %H:%M} captured {n} upcoming matches -> {SNAP}") + + +def _names(ids): + try: + from psycopg2.extras import RealDictCursor + ids = [str(i) for i in ids] + if not ids: return {} + with _conn() as c: + with c.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("""SELECT m.id, ht.name h, at.name a + FROM matches m JOIN teams ht ON ht.id=m.home_team_id + JOIN teams at ON at.id=m.away_team_id WHERE m.id = ANY(%s)""", (ids,)) + return {str(r["id"]): f"{r['h']} v {r['a']}" for r in cur.fetchall()} + except Exception: + return {} + + +def report(min_move): + if not os.path.exists(SNAP): + print("No snapshots yet. Schedule '--snapshot' every ~15-20 min first."); return + series = defaultdict(list) # match_id -> [(ts, mst, odds_compact), ...] + with open(SNAP, encoding="utf-8") as f: + for line in f: + try: d = json.loads(line) + except Exception: continue + series[d["match_id"]].append((d["ts"], d.get("mst_utc"), d["odds"])) + + now_ms = int(time.time()*1000) + flagged = [] + for mid, snaps in series.items(): + if len(snaps) < 2: continue + snaps.sort(key=lambda x: x[0]) + mst = snaps[-1][1] + # focus on MS market + def ms(snap): return snap[2].get("Maç Sonucu", {}) + op, la = ms(snaps[0]), ms(snaps[-1]) + best = None # most-SHORTENED side = the steam (money/info) signal + for sel in ("1", "X", "2"): + o0, o1 = op.get(sel), la.get(sel) + if o0 and o1 and o0 > 1.0 and o1 > 1.0: + drift = (o1 - o0) / o0 # negative = shortened = steam + if best is None or drift < best[4]: + best = (abs(drift), sel, o0, o1, drift) + if best and abs(best[4]) >= min_move: + # velocity: biggest single-step move on that selection + sel = best[1]; steps = [s[2].get("Maç Sonucu", {}).get(sel) for s in snaps] + steps = [x for x in steps if x] + vmax = 0.0 + for i in range(1, len(steps)): + if steps[i-1]: + vmax = max(vmax, abs(steps[i]-steps[i-1])/steps[i-1]) + flagged.append((best[0], mid, best[1], best[2], best[3], best[4], vmax, + len(snaps), mst)) + flagged.sort(reverse=True) + names = _names([f[1] for f in flagged[:30]]) + + print("="*84) + print("ODDS MOVEMENT WATCHLIST (MS market; drift = (last-open)/open; ↓ = shortened = steam)") + print("="*84) + if not flagged: + print(f" No matches moved >= {min_move:.0%} yet. (Need more snapshots over time;") + print(" monitor only sees movement once it has captured several snapshots.)") + # still show coverage + multi = sum(1 for s in series.values() if len(s) >= 2) + print(f"\n coverage: {len(series)} matches tracked, {multi} with >=2 snapshots.") + return + print(f" {'match':<34}{'side':>5}{'open':>7}{'last':>7}{'drift':>8}{'maxStep':>8}{'snaps':>6}") + print(" "+"-"*78) + for ab, mid, sel, o0, o1, drift, vmax, ns, mst in flagged[:25]: + nm = (names.get(mid, mid) or mid)[:32] + arrow = "↓steam" if drift < 0 else "↑drift" + ko = "" + if mst: + mins = (mst - now_ms)/60000 + ko = f" KO~{mins/60:.1f}h" if mins > 0 else " (started)" + print(f" {nm:<34}{sel:>5}{o0:>7.2f}{o1:>7.2f}{100*drift:>+7.1f}%{100*vmax:>+7.1f}%{ns:>6} {arrow}{ko}") + print(f"\n {len(flagged)} matches flagged (moved >= {min_move:.0%}).") + print(" ↓steam on a side = market backing it hard (info/possible fix). As a closing") + print(" bettor: treat heavy late steam AGAINST your pick as a reason to SKIP.") + + +def main(): + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("--snapshot", action="store_true") + ap.add_argument("--report", action="store_true") + ap.add_argument("--min-move", type=float, default=0.08, help="flag drift >= this fraction (default 0.08)") + args = ap.parse_args() + if args.snapshot: + snapshot() + if args.report or not args.snapshot: + report(args.min_move) + + +if __name__ == "__main__": + main()