gg
Deploy Iddaai Backend / build-and-deploy (push) Successful in 33s

This commit is contained in:
2026-06-06 14:08:30 +03:00
parent 9e41407cb5
commit 1c03fa5e1c
2 changed files with 345 additions and 0 deletions
+162
View File
@@ -0,0 +1,162 @@
"""
Market Calibration Scan — find where the ODDS THEMSELVES are systematically wrong.
=================================================================================
The legit, measurable version of "odds şike": pockets (leagues / teams / bands)
where the market's implied probability does NOT match realized frequency, so a
SIMPLE rule (no model) is +EV. This is pure market inefficiency — soft pricing
in obscure leagues, persistent team bias, etc.
Discipline against false 'rigged' pockets (the multiple-comparison trap):
* split history by time into HALF-1 (discover) and HALF-2 (validate)
* a pocket counts ONLY if it is +EV in BOTH halves with enough bets each
* report realized-vs-implied gap (the miscalibration) + ROI
No model. Just odds vs outcomes. Read-only on the training CSV (104k matches
with odds). Forward 'suspicious line movement' detection needs odds_history
(currently empty) — separate, forward-only.
Usage: python scripts/market_calibration.py --min-bets 120 --side fav
"""
from __future__ import annotations
import argparse, os, sys
import numpy as np, pandas as pd
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
try: sys.stdout.reconfigure(encoding="utf-8")
except Exception: pass
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
def league_names(ids):
try:
sys.path.insert(0, AI_DIR)
from data.db import get_clean_dsn
import psycopg2
from psycopg2.extras import RealDictCursor
ids = [str(i) for i in ids if i is not None]
for _ in range(3):
try:
with psycopg2.connect(get_clean_dsn()) as c:
with c.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("SELECT id,name FROM leagues WHERE id = ANY(%s)", (ids,))
return {str(r["id"]): r["name"] for r in cur.fetchall()}
except Exception:
import time; time.sleep(1)
except Exception:
pass
return {}
def team_names(ids):
try:
sys.path.insert(0, AI_DIR)
from data.db import get_clean_dsn
import psycopg2
from psycopg2.extras import RealDictCursor
ids = [str(i) for i in ids if i is not None]
for _ in range(3):
try:
with psycopg2.connect(get_clean_dsn()) as c:
with c.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("SELECT id,name FROM teams WHERE id = ANY(%s)", (ids,))
return {str(r["id"]): r["name"] for r in cur.fetchall()}
except Exception:
import time; time.sleep(1)
except Exception:
pass
return {}
def main():
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--min-bets", type=int, default=120, help="min bets PER HALF")
ap.add_argument("--fav-max", type=float, default=2.5, help="only count favourites below this odds")
args = ap.parse_args()
df = pd.read_csv(CSV, low_memory=False,
usecols=["match_id","league_id","home_team_id","away_team_id","mst_utc",
"odds_ms_h","odds_ms_d","odds_ms_a","score_home","score_away"])
df = df.sort_values("mst_utc").reset_index(drop=True)
sh = pd.to_numeric(df["score_home"],errors="coerce"); sa = pd.to_numeric(df["score_away"],errors="coerce")
ok = sh.notna()&sa.notna()
df = df[ok].reset_index(drop=True); sh=sh[ok.values].values; sa=sa[ok.values].values
O = df[["odds_ms_h","odds_ms_d","odds_ms_a"]].apply(pd.to_numeric,errors="coerce").fillna(0.0).values
valid = (O>1.0).all(1)
outcome = np.where(sh>sa,0,np.where(sh==sa,1,2)) # 0 home,1 draw,2 away
fav = O.argmin(1); fav_odds = O[np.arange(len(O)),fav]
fav_won = (fav==outcome).astype(float)
fav_implied = 1.0/fav_odds
pnl = np.where(fav_won, fav_odds-1.0, -1.0)
half = (np.arange(len(df)) >= len(df)//2).astype(int) # 0=first half,1=second
use = valid & (fav_odds <= args.fav_max)
base = pd.DataFrame({
"league": df["league_id"].astype(str).values,
"home": df["home_team_id"].astype(str).values,
"fav_is_home": (fav==0),
"won": fav_won, "implied": fav_implied, "pnl": pnl, "half": half, "use": use,
"fav_odds": fav_odds,
})
b = base[base["use"]].copy()
print(f"{len(b):,} favourite bets (odds<= {args.fav_max}); split into 2 time halves\n")
print(f"GLOBAL favourite: realized={100*b['won'].mean():.1f}% implied={100*b['implied'].mean():.1f}% "
f"ROI={100*b['pnl'].mean():+.2f}% (negative = vig; market roughly right)")
def scan(groupcol, label, namefn, min_bets):
rows=[]
for key,d in b.groupby(groupcol):
h0=d[d["half"]==0]; h1=d[d["half"]==1]
if len(h0)<min_bets or len(h1)<min_bets: continue
r0=100*h0["pnl"].mean(); r1=100*h1["pnl"].mean()
# miscalibration gap: realized - implied (positive = market underprices the favourite)
gap=100*(d["won"].mean()-d["implied"].mean())
both_pos = r0>0 and r1>0
rows.append((min(r0,r1), key, len(d), 100*d["pnl"].mean(), r0, r1, gap, both_pos))
rows.sort(reverse=True)
names = namefn([r[1] for r in rows[:40]])
print(f"\n{'='*82}\n{label} (✓ = +EV in BOTH halves, the only trustworthy ones)\n{'='*82}")
print(f" {'name':<30}{'n':>6}{'ROI%':>7}{'H1%':>7}{'H2%':>7}{'gap%':>7}")
print(" "+"-"*72)
shown=0
for mn,key,n,roi,r0,r1,gap,both in rows:
if shown>=20 and not both: continue
nm=(names.get(key,key) or key)[:28]
mark = "" if both else ""
print(f" {nm:<30}{n:>6}{roi:>+7.1f}{r0:>+7.1f}{r1:>+7.1f}{gap:>+7.1f} {mark}")
shown+=1
if shown>=25: break
good=[r for r in rows if r[7]]
print(f"\n -> {len(good)} {label.split()[0].lower()} pockets are +EV in BOTH halves "
f"(out of {len(rows)} with enough data)")
return good
scan("league", "BY LEAGUE (favourite flat bet)", league_names, args.min_bets)
# team: only when the team is the home favourite (cleanest, most samples)
bt = b[b["fav_is_home"]]
globals()['b'] = bt # reuse scan on home-favourite subset
# inline team scan
rows=[]
for key,d in bt.groupby("home"):
h0=d[d["half"]==0]; h1=d[d["half"]==1]
if len(h0)<max(25,args.min_bets//3) or len(h1)<max(25,args.min_bets//3): continue
r0=100*h0["pnl"].mean(); r1=100*h1["pnl"].mean()
gap=100*(d["won"].mean()-d["implied"].mean())
rows.append((min(r0,r1), key, len(d), 100*d["pnl"].mean(), r0, r1, gap, r0>0 and r1>0))
rows.sort(reverse=True)
tn=team_names([r[1] for r in rows[:40]])
print(f"\n{'='*82}\nBY TEAM as HOME FAVOURITE (✓ = +EV both halves)\n{'='*82}")
print(f" {'team':<30}{'n':>6}{'ROI%':>7}{'H1%':>7}{'H2%':>7}{'gap%':>7}")
print(" "+"-"*72)
for mn,key,n,roi,r0,r1,gap,both in rows[:22]:
nm=(tn.get(key,key) or key)[:28]; mark="" if both else ""
print(f" {nm:<30}{n:>6}{roi:>+7.1f}{r0:>+7.1f}{r1:>+7.1f}{gap:>+7.1f} {mark}")
good=[r for r in rows if r[7]]
print(f"\n -> {len(good)} teams +EV in BOTH halves (out of {len(rows)})")
print("\nREAD: ✓ pockets survived a time-split = candidate real inefficiencies (not noise).")
print("Still forward-validate with CLV. No ✓ = market is efficient there; don't bet.")
if __name__ == "__main__":
main()
+183
View File
@@ -0,0 +1,183 @@
"""
Odds Movement Monitor — forward steam / odds-anomaly ("şike" signal) detector.
=============================================================================
The only viable version of "detect odds manipulation": capture upcoming-match
odds PERIODICALLY and flag abnormal moves (steam = a price shortening fast =
money/information arriving, sometimes a fixed match). Retrospective detection is
impossible here (odds_history empty); this builds the time-series going forward.
No schema change: snapshots append to data/odds_snapshots.jsonl (reads
live_matches.odds, which the feeder refreshes every 15 min).
Run --snapshot every ~15-20 min (scheduler). Run --report anytime to see the
current movement watchlist.
For a CLOSING-time bettor the use is mainly a RISK FILTER: a match with heavy
unexplained late steam against your pick = the market knows something you don't
→ skip it. (Profiting from steam needs betting BEFORE it, i.e. early.)
Usage:
python scripts/monitor_odds_movement.py --snapshot # capture now (cron this)
python scripts/monitor_odds_movement.py --report # show movement watchlist
python scripts/monitor_odds_movement.py --report --min-move 0.10
"""
from __future__ import annotations
import argparse, json, os, sys, time, datetime
from collections import defaultdict
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
try: sys.stdout.reconfigure(encoding="utf-8")
except Exception: pass
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, AI_DIR)
SNAP = os.path.join(AI_DIR, "data", "odds_snapshots.jsonl")
# markets tracked for steam (Turkish keys as stored in live_matches.odds)
TRACK = {"Maç Sonucu": ["1", "X", "2"],
"2,5 Alt/Üst": ["Üst", "Alt"],
"Karşılıklı Gol": ["Var", "Yok"]}
def _conn():
from data.db import get_clean_dsn
import psycopg2
last = None
for _ in range(3):
try:
return psycopg2.connect(get_clean_dsn())
except Exception as e:
last = e; time.sleep(1.2)
raise last
def _f(x):
try: return float(x)
except (TypeError, ValueError): return None
def snapshot():
from psycopg2.extras import RealDictCursor
now_ms = int(time.time() * 1000)
n = 0
with _conn() as c:
with c.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""SELECT id, mst_utc, odds FROM live_matches
WHERE odds IS NOT NULL AND mst_utc > %s
ORDER BY mst_utc ASC""", (now_ms - 2*3600*1000,))
rows = cur.fetchall()
os.makedirs(os.path.dirname(SNAP), exist_ok=True)
with open(SNAP, "a", encoding="utf-8") as f:
for r in rows:
odds = r["odds"]
if isinstance(odds, str):
try: odds = json.loads(odds)
except Exception: continue
if not isinstance(odds, dict): continue
compact = {}
for cat, sels in TRACK.items():
cm = odds.get(cat)
if isinstance(cm, dict):
vals = {s: _f(cm.get(s)) for s in sels if _f(cm.get(s))}
if vals: compact[cat] = vals
if not compact: continue
f.write(json.dumps({"ts": now_ms, "match_id": r["id"],
"mst_utc": r["mst_utc"], "odds": compact},
ensure_ascii=False) + "\n")
n += 1
print(f"[snapshot] {datetime.datetime.now():%Y-%m-%d %H:%M} captured {n} upcoming matches -> {SNAP}")
def _names(ids):
try:
from psycopg2.extras import RealDictCursor
ids = [str(i) for i in ids]
if not ids: return {}
with _conn() as c:
with c.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""SELECT m.id, ht.name h, at.name a
FROM matches m JOIN teams ht ON ht.id=m.home_team_id
JOIN teams at ON at.id=m.away_team_id WHERE m.id = ANY(%s)""", (ids,))
return {str(r["id"]): f"{r['h']} v {r['a']}" for r in cur.fetchall()}
except Exception:
return {}
def report(min_move):
if not os.path.exists(SNAP):
print("No snapshots yet. Schedule '--snapshot' every ~15-20 min first."); return
series = defaultdict(list) # match_id -> [(ts, mst, odds_compact), ...]
with open(SNAP, encoding="utf-8") as f:
for line in f:
try: d = json.loads(line)
except Exception: continue
series[d["match_id"]].append((d["ts"], d.get("mst_utc"), d["odds"]))
now_ms = int(time.time()*1000)
flagged = []
for mid, snaps in series.items():
if len(snaps) < 2: continue
snaps.sort(key=lambda x: x[0])
mst = snaps[-1][1]
# focus on MS market
def ms(snap): return snap[2].get("Maç Sonucu", {})
op, la = ms(snaps[0]), ms(snaps[-1])
best = None # most-SHORTENED side = the steam (money/info) signal
for sel in ("1", "X", "2"):
o0, o1 = op.get(sel), la.get(sel)
if o0 and o1 and o0 > 1.0 and o1 > 1.0:
drift = (o1 - o0) / o0 # negative = shortened = steam
if best is None or drift < best[4]:
best = (abs(drift), sel, o0, o1, drift)
if best and abs(best[4]) >= min_move:
# velocity: biggest single-step move on that selection
sel = best[1]; steps = [s[2].get("Maç Sonucu", {}).get(sel) for s in snaps]
steps = [x for x in steps if x]
vmax = 0.0
for i in range(1, len(steps)):
if steps[i-1]:
vmax = max(vmax, abs(steps[i]-steps[i-1])/steps[i-1])
flagged.append((best[0], mid, best[1], best[2], best[3], best[4], vmax,
len(snaps), mst))
flagged.sort(reverse=True)
names = _names([f[1] for f in flagged[:30]])
print("="*84)
print("ODDS MOVEMENT WATCHLIST (MS market; drift = (last-open)/open; ↓ = shortened = steam)")
print("="*84)
if not flagged:
print(f" No matches moved >= {min_move:.0%} yet. (Need more snapshots over time;")
print(" monitor only sees movement once it has captured several snapshots.)")
# still show coverage
multi = sum(1 for s in series.values() if len(s) >= 2)
print(f"\n coverage: {len(series)} matches tracked, {multi} with >=2 snapshots.")
return
print(f" {'match':<34}{'side':>5}{'open':>7}{'last':>7}{'drift':>8}{'maxStep':>8}{'snaps':>6}")
print(" "+"-"*78)
for ab, mid, sel, o0, o1, drift, vmax, ns, mst in flagged[:25]:
nm = (names.get(mid, mid) or mid)[:32]
arrow = "↓steam" if drift < 0 else "↑drift"
ko = ""
if mst:
mins = (mst - now_ms)/60000
ko = f" KO~{mins/60:.1f}h" if mins > 0 else " (started)"
print(f" {nm:<34}{sel:>5}{o0:>7.2f}{o1:>7.2f}{100*drift:>+7.1f}%{100*vmax:>+7.1f}%{ns:>6} {arrow}{ko}")
print(f"\n {len(flagged)} matches flagged (moved >= {min_move:.0%}).")
print(" ↓steam on a side = market backing it hard (info/possible fix). As a closing")
print(" bettor: treat heavy late steam AGAINST your pick as a reason to SKIP.")
def main():
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--snapshot", action="store_true")
ap.add_argument("--report", action="store_true")
ap.add_argument("--min-move", type=float, default=0.08, help="flag drift >= this fraction (default 0.08)")
args = ap.parse_args()
if args.snapshot:
snapshot()
if args.report or not args.snapshot:
report(args.min_move)
if __name__ == "__main__":
main()