first (part 2: other directories)
Deploy Iddaai Backend / build-and-deploy (push) Failing after 18s

This commit is contained in:
2026-04-16 15:11:25 +03:00
parent 7814e0bc6b
commit 2f0b85a0c7
203 changed files with 59989 additions and 0 deletions
+29
View File
@@ -0,0 +1,29 @@
"""
AI Engine V9 Feature Modules
Includes V8 features + new V9 engines (Upset, Momentum, Poisson, Context, Referee, Squad)
"""
# V20 Features
from .h2h_engine import H2HFeatureEngine, get_h2h_engine
from .elo_system import ELORatingSystem, get_elo_system
from .value_calculator import ValueCalculator, get_value_calculator
from .team_stats_engine import get_team_stats_engine
from .upset_engine import UpsetEngine, get_upset_engine
from .momentum_engine import MomentumEngine, get_momentum_engine
from .poisson_engine import PoissonEngine, get_poisson_engine
from .referee_engine import RefereeEngine, get_referee_engine
from .squad_analysis_engine import SquadAnalysisEngine, get_squad_analysis_engine
__all__ = [
'H2HFeatureEngine', 'get_h2h_engine',
'ELORatingSystem', 'get_elo_system',
'ValueCalculator', 'get_value_calculator',
'get_team_stats_engine',
'UpsetEngine', 'get_upset_engine',
'MomentumEngine', 'get_momentum_engine',
'PoissonEngine', 'get_poisson_engine',
'RefereeEngine', 'get_referee_engine',
'SquadAnalysisEngine', 'get_squad_analysis_engine',
]
+655
View File
@@ -0,0 +1,655 @@
"""
ELO Rating System V2 - Venue-Adjusted & League-Weighted
V9 Model için geliştirilmiş ELO sistemi.
V1'den Farklar:
- Lig kalitesi faktörü (Premier League vs küçük lig)
- Form decay (son maçlar daha etkili)
- Venue-adjusted ELO (ev/deplasman ayrı)
- Win probability hesaplama
"""
import os
import json
from typing import Dict, Optional, Tuple
from dataclasses import dataclass, asdict, field
from datetime import datetime
try:
import psycopg2
except ImportError:
psycopg2 = None
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'models')
@dataclass
class TeamELO:
"""Takım ELO profili - Geliştirilmiş"""
team_id: str
team_name: str = ""
# Ana ELO'lar
overall_elo: float = 1500.0
home_elo: float = 1500.0
away_elo: float = 1500.0
# Form ELO (son 5 maça göre)
form_elo: float = 1500.0
# Meta
matches_played: int = 0
home_matches: int = 0
away_matches: int = 0
wins: int = 0
draws: int = 0
losses: int = 0
last_updated: Optional[str] = None
# Son 5 maç formu (W/D/L sequence)
recent_form: str = ""
def win_rate(self) -> float:
if self.matches_played == 0:
return 0.0
return self.wins / self.matches_played
def to_features(self) -> Dict[str, float]:
return {
'elo_overall': self.overall_elo,
'elo_home': self.home_elo,
'elo_away': self.away_elo,
'elo_form': self.form_elo,
'elo_matches': self.matches_played,
'elo_win_rate': self.win_rate(),
}
# Lig kalitesi faktörleri (1.0 = ortalama)
LEAGUE_QUALITY = {
# Top 5 Avrupa Ligleri
"premier league": 1.15,
"premier lig": 1.15,
"la liga": 1.12,
"bundesliga": 1.10,
"serie a": 1.08,
"ligue 1": 1.05,
# Güçlü ligler
"eredivisie": 1.02,
"primeira liga": 1.02,
"süper lig": 1.00,
# Avrupa kupaları
"champions league": 1.20,
"şampiyonlar ligi": 1.20,
"europa league": 1.10,
"avrupa ligi": 1.10,
"conference league": 1.00,
# Orta ligler
"championship": 0.95,
"2. bundesliga": 0.92,
"serie b": 0.90,
"la liga 2": 0.90,
# Küçük ligler
"default": 0.85,
}
class ELORatingSystem:
"""
ELO Rating System V2 - Venue-Adjusted & League-Weighted
Yenilikler:
- Ev/Deplasman ayrı ELO takibi
- Lig kalitesi faktörü
- Form ELO (son 5 maç ağırlıklı)
- Gol farkına göre K-faktör ayarı
"""
# ELO parametreleri
K_FACTOR_BASE = 32 # Temel K faktörü
K_FACTOR_NEW_TEAM = 48 # Yeni takımlar için daha yüksek (ilk 20 maç)
HOME_ADVANTAGE = 65 # Ev sahibi avantajı (ELO cinsinden)
INITIAL_ELO = 1500
FORM_WEIGHT = 0.7 # Form ELO için son maç ağırlığı
def __init__(self):
self.ratings: Dict[str, TeamELO] = {}
self.league_cache: Dict[str, str] = {} # team_id -> league_name
self.conn = None
self._load_ratings()
def _connect_db(self):
if psycopg2 is None:
return None
try:
from data.db import get_clean_dsn
self.conn = psycopg2.connect(get_clean_dsn())
return self.conn
except Exception as e:
print(f"[ELO] DB connection failed: {e}")
return None
def get_conn(self):
if self.conn is None or self.conn.closed:
self._connect_db()
return self.conn
def _load_ratings(self):
"""Rating'leri yükle — önce DB, sonra JSON fallback"""
if self._load_ratings_from_db():
return
self._load_ratings_from_json()
def _load_ratings_from_db(self) -> bool:
"""team_elo_ratings tablosundan rating'leri yükle"""
conn = self.get_conn()
if conn is None:
return False
try:
cur = conn.cursor()
cur.execute("""
SELECT ter.team_id, t.name,
ter.overall_elo, ter.home_elo, ter.away_elo,
ter.form_elo, ter.matches_played, ter.recent_form
FROM team_elo_ratings ter
LEFT JOIN teams t ON ter.team_id = t.id
""")
rows = cur.fetchall()
cur.close()
if not rows:
return False
for row in rows:
tid, name, overall, home, away, form, played, recent = row
self.ratings[str(tid)] = TeamELO(
team_id=str(tid),
team_name=name or "",
overall_elo=float(overall),
home_elo=float(home),
away_elo=float(away),
form_elo=float(form),
matches_played=int(played),
recent_form=recent or [],
)
print(f"[OK] ELO V2 ratings DB'den yuklendi ({len(self.ratings)} takim)")
return True
except Exception as e:
print(f"[WARN] ELO DB yuklenemedi, JSON'a dusuyuyor: {e}")
return False
def _load_ratings_from_json(self):
"""JSON dosyasından rating'leri yükle (fallback)"""
ratings_path = os.path.join(MODELS_DIR, 'elo_ratings_v2.json')
if os.path.exists(ratings_path):
try:
with open(ratings_path, 'r', encoding='utf-8') as f:
data = json.load(f)
for team_id, rating_data in data.items():
self.ratings[team_id] = TeamELO(**rating_data)
print(f"[OK] ELO V2 ratings JSON'dan yuklendi ({len(self.ratings)} takim)")
except Exception as e:
print(f"[WARN] ELO V2 ratings yuklenemedi: {e}")
def save_ratings(self):
"""Rating'leri kaydet"""
ratings_path = os.path.join(MODELS_DIR, 'elo_ratings_v2.json')
os.makedirs(MODELS_DIR, exist_ok=True)
data = {team_id: asdict(elo) for team_id, elo in self.ratings.items()}
with open(ratings_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"💾 ELO V2 ratings kaydedildi ({len(self.ratings)} takım)")
def get_or_create_rating(self, team_id: str, team_name: str = "") -> TeamELO:
"""Takımın ELO'sunu getir veya oluştur"""
if team_id not in self.ratings:
self.ratings[team_id] = TeamELO(team_id=team_id, team_name=team_name)
return self.ratings[team_id]
def get_league_quality(self, league_name: str) -> float:
"""Lig kalitesi faktörünü döndür"""
if not league_name:
return LEAGUE_QUALITY["default"]
league_lower = league_name.lower()
for key, quality in LEAGUE_QUALITY.items():
if key in league_lower:
return quality
return LEAGUE_QUALITY["default"]
def expected_score(self, rating_a: float, rating_b: float) -> float:
"""
A'nın B'ye karşı beklenen skoru (0-1 arası).
1 = kesin kazanır, 0.5 = eşit, 0 = kesin kaybeder
"""
return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
def get_k_factor(self, team_elo: TeamELO, goal_diff: int,
league_quality: float = 1.0) -> float:
"""
Dinamik K-faktörü hesapla.
- Yeni takımlar için yüksek (hızlı adaptasyon)
- Gol farkı yüksekse yüksek
- Kaliteli liglerde yüksek
"""
# Temel K
if team_elo.matches_played < 20:
k = self.K_FACTOR_NEW_TEAM
else:
k = self.K_FACTOR_BASE
# Gol farkı çarpanı
if goal_diff == 1:
goal_mult = 1.0
elif goal_diff == 2:
goal_mult = 1.25
elif goal_diff == 3:
goal_mult = 1.5
else:
goal_mult = 1.75 + (goal_diff - 3) * 0.1
# Lig kalitesi çarpanı
return k * goal_mult * league_quality
def update_after_match(
self,
home_id: str,
away_id: str,
home_goals: int,
away_goals: int,
home_name: str = "",
away_name: str = "",
league_name: str = ""
):
"""Maç sonrası ELO güncelle"""
home_elo = self.get_or_create_rating(home_id, home_name)
away_elo = self.get_or_create_rating(away_id, away_name)
# Gerçek skor
if home_goals > away_goals:
actual_home, actual_away = 1.0, 0.0
home_elo.wins += 1
away_elo.losses += 1
result_home, result_away = 'W', 'L'
elif home_goals < away_goals:
actual_home, actual_away = 0.0, 1.0
home_elo.losses += 1
away_elo.wins += 1
result_home, result_away = 'L', 'W'
else:
actual_home, actual_away = 0.5, 0.5
home_elo.draws += 1
away_elo.draws += 1
result_home, result_away = 'D', 'D'
goal_diff = abs(home_goals - away_goals)
league_quality = self.get_league_quality(league_name)
# K faktörleri
k_home = self.get_k_factor(home_elo, goal_diff, league_quality)
k_away = self.get_k_factor(away_elo, goal_diff, league_quality)
# -- Overall ELO --
expected_home = self.expected_score(
home_elo.overall_elo + self.HOME_ADVANTAGE,
away_elo.overall_elo
)
home_elo.overall_elo += k_home * (actual_home - expected_home)
away_elo.overall_elo += k_away * (actual_away - (1 - expected_home))
# -- Venue-Specific ELO --
expected_home_venue = self.expected_score(home_elo.home_elo, away_elo.away_elo)
home_elo.home_elo += k_home * (actual_home - expected_home_venue)
away_elo.away_elo += k_away * (actual_away - (1 - expected_home_venue))
# -- Form ELO (son maçlar daha ağırlıklı) --
home_elo.form_elo = (
home_elo.form_elo * (1 - self.FORM_WEIGHT) +
(1500 + (actual_home - 0.5) * 100) * self.FORM_WEIGHT
)
away_elo.form_elo = (
away_elo.form_elo * (1 - self.FORM_WEIGHT) +
(1500 + (actual_away - 0.5) * 100) * self.FORM_WEIGHT
)
# Meta güncelle
home_elo.matches_played += 1
away_elo.matches_played += 1
home_elo.home_matches += 1
away_elo.away_matches += 1
# Son 5 form güncelle
home_elo.recent_form = (result_home + home_elo.recent_form)[:5]
away_elo.recent_form = (result_away + away_elo.recent_form)[:5]
home_elo.last_updated = datetime.now().isoformat()
away_elo.last_updated = datetime.now().isoformat()
def predict_match(self, home_id: str, away_id: str) -> Dict[str, float]:
"""
Maç için kazanma olasılıklarını tahmin et.
"""
home_elo = self.get_or_create_rating(home_id)
away_elo = self.get_or_create_rating(away_id)
# Overall bazlı
exp_home_overall = self.expected_score(
home_elo.overall_elo + self.HOME_ADVANTAGE,
away_elo.overall_elo
)
# Venue bazlı
exp_home_venue = self.expected_score(
home_elo.home_elo,
away_elo.away_elo
)
# Kombine (ortama)
home_prob = (exp_home_overall + exp_home_venue) / 2
# Draw tahmini (ELO farkı küçükse daha yüksek)
elo_diff = abs(home_elo.overall_elo - away_elo.overall_elo)
draw_base = 0.25 # Temel beraberlik oranı
draw_prob = draw_base * (1 - elo_diff / 800) # Fark arttıkça beraberlik azalır
draw_prob = max(0.15, min(draw_prob, 0.35))
# Normalize
remaining = 1 - draw_prob
home_win = home_prob * remaining
away_win = (1 - home_prob) * remaining
return {
"home_win": round(home_win, 3),
"draw": round(draw_prob, 3),
"away_win": round(away_win, 3),
}
def get_match_features(self, home_id: str, away_id: str) -> Dict[str, float]:
"""Model için ELO feature'larını döndür"""
home_elo = self.get_or_create_rating(home_id)
away_elo = self.get_or_create_rating(away_id)
probs = self.predict_match(home_id, away_id)
# Form encode (WWWDL -> sayısal)
def form_to_score(form: str) -> float:
if not form:
return 0.5
score = 0
for char in form:
if char == 'W':
score += 1
elif char == 'D':
score += 0.5
return score / max(len(form), 1)
return {
# Overall ELO
'elo_home_overall': home_elo.overall_elo,
'elo_away_overall': away_elo.overall_elo,
'elo_diff_overall': home_elo.overall_elo - away_elo.overall_elo,
# Venue-Specific ELO
'elo_home_venue': home_elo.home_elo,
'elo_away_venue': away_elo.away_elo,
'elo_diff_venue': home_elo.home_elo - away_elo.away_elo,
# Form ELO
'elo_home_form': home_elo.form_elo,
'elo_away_form': away_elo.form_elo,
'elo_diff_form': home_elo.form_elo - away_elo.form_elo,
# Win probabilities
'elo_prob_home': probs['home_win'],
'elo_prob_draw': probs['draw'],
'elo_prob_away': probs['away_win'],
# Experience
'elo_home_matches': min(home_elo.matches_played, 100),
'elo_away_matches': min(away_elo.matches_played, 100),
# Form score
'elo_home_form_score': form_to_score(home_elo.recent_form),
'elo_away_form_score': form_to_score(away_elo.recent_form),
# Win rates
'elo_home_win_rate': home_elo.win_rate(),
'elo_away_win_rate': away_elo.win_rate(),
}
def save_ratings_to_db(self):
"""Rating'leri team_elo_ratings tablosuna yaz (upsert)"""
conn = self.get_conn()
if conn is None:
print("❌ DB bağlantısı yok, DB'ye yazılamadı!")
return
cur = conn.cursor()
batch_size = 500
teams = list(self.ratings.values())
written = 0
for i in range(0, len(teams), batch_size):
batch = teams[i:i + batch_size]
values = []
for elo in batch:
values.append(cur.mogrify(
"(%s, %s, %s, %s, %s, %s, %s, NOW())",
(
elo.team_id,
round(elo.overall_elo, 2),
round(elo.home_elo, 2),
round(elo.away_elo, 2),
round(elo.form_elo, 2),
elo.matches_played,
elo.recent_form[:5],
)
).decode('utf-8'))
sql = """
INSERT INTO team_elo_ratings
(team_id, overall_elo, home_elo, away_elo, form_elo, matches_played, recent_form, updated_at)
VALUES {}
ON CONFLICT (team_id) DO UPDATE SET
overall_elo = EXCLUDED.overall_elo,
home_elo = EXCLUDED.home_elo,
away_elo = EXCLUDED.away_elo,
form_elo = EXCLUDED.form_elo,
matches_played = EXCLUDED.matches_played,
recent_form = EXCLUDED.recent_form,
updated_at = EXCLUDED.updated_at
""".format(", ".join(values))
cur.execute(sql)
written += len(batch)
conn.commit()
cur.close()
print(f"💾 DB'ye {written} takım ELO yazıldı (team_elo_ratings)")
def _load_top_league_ids(self) -> set:
"""top_leagues.json'dan lig ID'lerini oku"""
paths = [
os.path.join(os.path.dirname(__file__), '..', '..', 'top_leagues.json'),
os.path.join(os.path.dirname(__file__), '..', 'top_leagues.json'),
]
for p in paths:
if os.path.exists(p):
with open(p) as f:
ids = set(json.load(f))
print(f"📋 {len(ids)} top lig yüklendi ({os.path.basename(p)})")
return ids
print("⚠️ top_leagues.json bulunamadı — tüm maçlar yazılacak")
return set()
def calculate_all_from_history(self, sport: str = 'football'):
"""Tüm tarihsel maçlardan ELO hesapla, top ligleri match_ai_features'a yaz"""
print(f"\n🔄 {sport.upper()} için ELO V2 hesaplanıyor...")
conn = self.get_conn()
if conn is None:
print("❌ DB bağlantısı yok!")
return
top_league_ids = self._load_top_league_ids()
cur = conn.cursor()
# Tüm bitmiş maçları tarih sırasına göre al (m.id ve league_id dahil)
cur.execute("""
SELECT m.id, m.home_team_id, m.away_team_id,
m.score_home, m.score_away, m.league_id,
t1.name as home_name, t2.name as away_name,
l.name as league_name
FROM matches m
LEFT JOIN teams t1 ON m.home_team_id = t1.id
LEFT JOIN teams t2 ON m.away_team_id = t2.id
LEFT JOIN leagues l ON m.league_id = l.id
WHERE m.sport = %s
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
ORDER BY m.mst_utc ASC
""", (sport,))
matches = cur.fetchall()
print(f"📊 {len(matches):,} maç işlenecek...")
BATCH_SIZE = 1000
batch: list = []
processed = 0
written = 0
for match in matches:
(match_id, home_id, away_id, score_h, score_a,
league_id, home_name, away_name, league) = match
if not (home_id and away_id):
continue
# Sadece top ligler için pre-match ELO kaydet
if not top_league_ids or league_id in top_league_ids:
home_elo_obj = self.get_or_create_rating(home_id, home_name or "")
away_elo_obj = self.get_or_create_rating(away_id, away_name or "")
batch.append((
match_id,
home_elo_obj.overall_elo,
away_elo_obj.overall_elo,
home_elo_obj.home_elo,
away_elo_obj.away_elo,
home_elo_obj.form_elo,
away_elo_obj.form_elo,
))
# Tüm maçlar için ELO güncelle
self.update_after_match(
home_id, away_id, score_h, score_a,
home_name or "", away_name or "", league or ""
)
processed += 1
if len(batch) >= BATCH_SIZE:
self._flush_elo_batch(cur, batch, sport)
conn.commit()
written += len(batch)
batch.clear()
if processed % 10000 == 0:
print(f" İşlenen: {processed:,} / {len(matches):,}")
# Kalan batch'i yaz
if batch:
self._flush_elo_batch(cur, batch, sport)
conn.commit()
written += len(batch)
cur.close()
print(f"{processed:,} maç işlendi, {len(self.ratings)} takım")
print(f"📝 {written:,} maç match_ai_features'a yazıldı")
# JSON'a kaydet
self.save_ratings()
# DB'ye kaydet
self.save_ratings_to_db()
# Top 20 takımı göster
self._show_top_teams()
@staticmethod
def _flush_elo_batch(cur, batch: list, sport: str = 'football') -> None:
"""Batch upsert pre-match ELO values into sport-partitioned ai_features table."""
from psycopg2.extras import execute_values
table_name = 'football_ai_features' if sport == 'football' else 'basketball_ai_features'
sql = f"""
INSERT INTO {table_name}
(match_id, home_elo, away_elo,
home_home_elo, away_away_elo,
home_form_elo, away_form_elo,
calculator_ver, updated_at)
VALUES %s
ON CONFLICT (match_id) DO UPDATE SET
home_elo = EXCLUDED.home_elo,
away_elo = EXCLUDED.away_elo,
home_home_elo = EXCLUDED.home_home_elo,
away_away_elo = EXCLUDED.away_away_elo,
home_form_elo = EXCLUDED.home_form_elo,
away_form_elo = EXCLUDED.away_form_elo,
calculator_ver = EXCLUDED.calculator_ver,
updated_at = EXCLUDED.updated_at
"""
now = datetime.now().isoformat()
values = [
(mid, h_elo, a_elo, hh_elo, aa_elo, hf_elo, af_elo,
'elo_v2_backfill', now)
for mid, h_elo, a_elo, hh_elo, aa_elo, hf_elo, af_elo in batch
]
execute_values(cur, sql, values, page_size=500)
def _show_top_teams(self, n: int = 20):
"""En güçlü takımları göster"""
sorted_teams = sorted(
self.ratings.items(),
key=lambda x: x[1].overall_elo,
reverse=True
)[:n]
print(f"\n🏆 Top {n} Takım (ELO V2):")
for i, (team_id, elo) in enumerate(sorted_teams, 1):
name = elo.team_name[:25] if elo.team_name else team_id[:25]
print(f" {i:2}. {name:25}{elo.overall_elo:.0f} (H:{elo.home_elo:.0f} A:{elo.away_elo:.0f})")
# Singleton
_system = None
def get_elo_system() -> ELORatingSystem:
global _system
if _system is None:
_system = ELORatingSystem()
return _system
if __name__ == "__main__":
import sys
from pathlib import Path
# Ensure ai-engine root is on sys.path (for `from data.db import ...`)
_AI_ENGINE_ROOT = Path(__file__).resolve().parent.parent
if str(_AI_ENGINE_ROOT) not in sys.path:
sys.path.insert(0, str(_AI_ENGINE_ROOT))
system = get_elo_system()
if len(sys.argv) > 1 and sys.argv[1] == 'calculate':
system.calculate_all_from_history('football')
else:
print("\n🧪 ELO V2 Test")
print("Kullanım: python elo_system.py calculate")
print(f"\n📊 Yüklü takım sayısı: {len(system.ratings)}")
if len(system.ratings) > 0:
system._show_top_teams(10)
+990
View File
@@ -0,0 +1,990 @@
"""
Feature Extractor - V2 Betting Engine
Pulls historical team stats, ELO, missing-player impact and live odds from
PostgreSQL and engineers a leakage-free feature vector for the ensemble model.
CRITICAL: Only pre-match data (matches before the target match) is used.
Post-match stats of the target match are NEVER included.
"""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass, field
from typing import Any
import numpy as np
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
logger = logging.getLogger(__name__)
ROLLING_WINDOW: int = 5
H2H_WINDOW: int = 10
MAX_REST_DAYS: float = 14.0
@dataclass
class MatchFeatures:
"""Structured feature vector ready for the ensemble model."""
match_id: str = ""
home_team_id: str = ""
away_team_id: str = ""
# ELO & AI features
home_elo: float = 1500.0
away_elo: float = 1500.0
elo_diff: float = 0.0
missing_players_impact: float = 0.0
home_form_score: float = 0.0
away_form_score: float = 0.0
h2h_home_win_rate: float = 0.5
h2h_sample_size: int = 0
home_rest_days: float = 7.0
away_rest_days: float = 7.0
rest_diff: float = 0.0
home_lineup_availability: float = 1.0
away_lineup_availability: float = 1.0
# Rolling averages - Home (last 5 matches)
home_avg_possession: float = 50.0
home_avg_shots_on_target: float = 4.0
home_avg_total_shots: float = 10.0
home_avg_goals_scored: float = 1.3
home_avg_goals_conceded: float = 1.1
# Rolling averages - Away (last 5 matches)
away_avg_possession: float = 50.0
away_avg_shots_on_target: float = 4.0
away_avg_total_shots: float = 10.0
away_avg_goals_scored: float = 1.3
away_avg_goals_conceded: float = 1.1
# Implied probabilities from bookmaker odds
implied_prob_home: float = 0.33
implied_prob_draw: float = 0.33
implied_prob_away: float = 0.33
implied_prob_over25: float = 0.50
implied_prob_under25: float = 0.50
implied_prob_btts_yes: float = 0.50
implied_prob_btts_no: float = 0.50
# Raw decimal odds (for Edge/Kelly calculations downstream)
odds_home: float = 2.50
odds_draw: float = 3.20
odds_away: float = 2.80
odds_over25: float = 1.90
odds_under25: float = 1.90
odds_btts_yes: float = 1.85
odds_btts_no: float = 1.95
# Data quality
data_quality_score: float = 0.5
data_quality_flags: list[str] = field(default_factory=list)
# Metadata
match_name: str = ""
home_team_name: str = ""
away_team_name: str = ""
league_id: str = ""
league_name: str = ""
referee_name: str = ""
match_date_ms: int = 0
league_avg_goals: float = 2.6
referee_avg_goals: float = 2.6
referee_home_bias: float = 0.0
home_squad_strength: float = 0.5
away_squad_strength: float = 0.5
home_key_players: float = 0.0
away_key_players: float = 0.0
def to_model_array(self) -> np.ndarray:
"""Return the 24-feature vector the ensemble expects."""
return np.array(
[
self.home_elo,
self.away_elo,
self.elo_diff,
self.missing_players_impact,
self.home_avg_possession,
self.home_avg_shots_on_target,
self.home_avg_total_shots,
self.home_avg_goals_scored,
self.home_avg_goals_conceded,
self.away_avg_possession,
self.away_avg_shots_on_target,
self.away_avg_total_shots,
self.away_avg_goals_scored,
self.away_avg_goals_conceded,
self.implied_prob_home,
self.implied_prob_draw,
self.implied_prob_away,
self.implied_prob_over25,
self.implied_prob_under25,
self.implied_prob_btts_yes,
self.implied_prob_btts_no,
self.odds_home,
self.odds_draw,
self.odds_away,
],
dtype=np.float64,
)
@staticmethod
def feature_names() -> list[str]:
return [
"home_elo", "away_elo", "elo_diff", "missing_players_impact",
"home_avg_possession", "home_avg_shots_on_target",
"home_avg_total_shots", "home_avg_goals_scored",
"home_avg_goals_conceded",
"away_avg_possession", "away_avg_shots_on_target",
"away_avg_total_shots", "away_avg_goals_scored",
"away_avg_goals_conceded",
"implied_prob_home", "implied_prob_draw", "implied_prob_away",
"implied_prob_over25", "implied_prob_under25",
"implied_prob_btts_yes", "implied_prob_btts_no",
"odds_home", "odds_draw", "odds_away",
]
async def extract_features(session: AsyncSession, match_id: str) -> MatchFeatures | None:
"""Master extraction pipeline."""
feats = MatchFeatures(match_id=match_id)
flags: list[str] = []
match_row = await _load_match_header(session, match_id)
if match_row is None:
logger.warning("Match %s not found in live_matches or matches.", match_id)
return None
feats.home_team_id = match_row["home_team_id"] or ""
feats.away_team_id = match_row["away_team_id"] or ""
feats.match_name = match_row.get("match_name", "") or ""
feats.match_date_ms = int(match_row.get("mst_utc", 0) or 0)
feats.home_team_name = match_row.get("home_name", "") or ""
feats.away_team_name = match_row.get("away_name", "") or ""
feats.league_id = match_row.get("league_id", "") or ""
feats.league_name = match_row.get("league_name", "") or ""
feats.referee_name = match_row.get("referee_name", "") or ""
if not feats.home_team_id or not feats.away_team_id:
logger.warning("Match %s missing team IDs.", match_id)
flags.append("missing_team_ids")
feats.data_quality_flags = flags
feats.data_quality_score = 0.1
return feats
ai_row = await _load_ai_features(session, match_id)
if ai_row:
feats.home_elo = float(ai_row["home_elo"] or 1500.0)
feats.away_elo = float(ai_row["away_elo"] or 1500.0)
feats.missing_players_impact = float(ai_row["missing_players_impact"] or 0.0)
feats.home_form_score = float(ai_row["home_form_score"] or 0.0)
feats.away_form_score = float(ai_row["away_form_score"] or 0.0)
if ai_row.get("h2h_home_win_rate") is not None:
feats.h2h_home_win_rate = float(ai_row["h2h_home_win_rate"])
feats.h2h_sample_size = int(ai_row.get("h2h_total") or 0)
else:
flags.append("missing_ai_features")
feats.elo_diff = feats.home_elo - feats.away_elo
home_rolling = await _rolling_team_stats(
session, feats.home_team_id, feats.match_date_ms,
)
away_rolling = await _rolling_team_stats(
session, feats.away_team_id, feats.match_date_ms,
)
if home_rolling is not None:
feats.home_avg_possession = home_rolling["avg_possession"]
feats.home_avg_shots_on_target = home_rolling["avg_shots_on_target"]
feats.home_avg_total_shots = home_rolling["avg_total_shots"]
feats.home_avg_goals_scored = home_rolling["avg_goals_scored"]
feats.home_avg_goals_conceded = home_rolling["avg_goals_conceded"]
else:
flags.append("missing_home_stats")
if away_rolling is not None:
feats.away_avg_possession = away_rolling["avg_possession"]
feats.away_avg_shots_on_target = away_rolling["avg_shots_on_target"]
feats.away_avg_total_shots = away_rolling["avg_total_shots"]
feats.away_avg_goals_scored = away_rolling["avg_goals_scored"]
feats.away_avg_goals_conceded = away_rolling["avg_goals_conceded"]
else:
flags.append("missing_away_stats")
if abs(feats.home_form_score) < 1e-6:
feats.home_form_score = round(
feats.home_avg_goals_scored - feats.home_avg_goals_conceded,
3,
)
if abs(feats.away_form_score) < 1e-6:
feats.away_form_score = round(
feats.away_avg_goals_scored - feats.away_avg_goals_conceded,
3,
)
home_rest_days = await _load_rest_days(
session, feats.home_team_id, feats.match_date_ms,
)
away_rest_days = await _load_rest_days(
session, feats.away_team_id, feats.match_date_ms,
)
if home_rest_days is not None:
feats.home_rest_days = home_rest_days
else:
flags.append("missing_home_rest")
if away_rest_days is not None:
feats.away_rest_days = away_rest_days
else:
flags.append("missing_away_rest")
feats.rest_diff = round(feats.home_rest_days - feats.away_rest_days, 3)
if feats.h2h_sample_size == 0:
h2h = await _load_h2h_stats(
session,
feats.home_team_id,
feats.away_team_id,
feats.match_date_ms,
)
if h2h is not None:
feats.h2h_home_win_rate = h2h["home_win_rate"]
feats.h2h_sample_size = h2h["sample_size"]
else:
flags.append("missing_h2h")
league_profile = await _load_league_profile(
session,
feats.league_id,
feats.match_date_ms,
)
if league_profile is not None:
feats.league_avg_goals = league_profile["avg_goals"]
else:
flags.append("missing_league_profile")
referee_profile = await _load_referee_profile(
session,
feats.referee_name,
feats.match_date_ms,
)
if referee_profile is not None:
feats.referee_avg_goals = referee_profile["avg_goals"]
feats.referee_home_bias = referee_profile["home_bias"]
else:
flags.append("missing_referee_profile")
home_squad = await _load_team_squad_profile(
session,
feats.home_team_id,
feats.match_date_ms,
)
away_squad = await _load_team_squad_profile(
session,
feats.away_team_id,
feats.match_date_ms,
)
if home_squad is not None:
feats.home_squad_strength = home_squad["squad_strength"]
feats.home_key_players = home_squad["key_players"]
else:
flags.append("missing_home_squad_profile")
if away_squad is not None:
feats.away_squad_strength = away_squad["squad_strength"]
feats.away_key_players = away_squad["key_players"]
else:
flags.append("missing_away_squad_profile")
lineup_info = _extract_lineup_context(match_row)
feats.home_lineup_availability = lineup_info["home_availability"]
feats.away_lineup_availability = lineup_info["away_availability"]
if lineup_info["has_real_lineup_data"]:
feats.missing_players_impact = max(
feats.missing_players_impact,
round(
(
(1.0 - feats.home_lineup_availability)
+ (1.0 - feats.away_lineup_availability)
) / 2.0,
4,
),
)
else:
flags.append("missing_lineup_context")
odds_ok = await _extract_odds(session, match_id, feats)
if not odds_ok:
flags.append("missing_odds")
quality = 1.0
penalty_map = {
"missing_team_ids": 0.5,
"missing_ai_features": 0.05,
"missing_home_stats": 0.15,
"missing_away_stats": 0.15,
"missing_home_rest": 0.05,
"missing_away_rest": 0.05,
"missing_h2h": 0.05,
"missing_league_profile": 0.04,
"missing_referee_profile": 0.04,
"missing_home_squad_profile": 0.06,
"missing_away_squad_profile": 0.06,
"missing_lineup_context": 0.05,
"missing_odds": 0.2,
}
for flag in flags:
quality -= penalty_map.get(flag, 0.05)
feats.data_quality_score = max(0.0, round(quality, 2))
feats.data_quality_flags = flags
return feats
async def _load_match_header(
session: AsyncSession, match_id: str,
) -> dict[str, Any] | None:
"""Try live_matches first, then matches table."""
table_queries = {
"live_matches": """
SELECT
m.id,
m.home_team_id,
m.away_team_id,
m.match_name,
m.mst_utc,
m.sport,
m.league_id,
m.referee_name,
m.lineups,
m.sidelined,
ht.name AS home_name,
at.name AS away_name,
l.name AS league_name
FROM live_matches m
LEFT JOIN teams ht ON ht.id = m.home_team_id
LEFT JOIN teams at ON at.id = m.away_team_id
LEFT JOIN leagues l ON l.id = m.league_id
WHERE m.id = :match_id
LIMIT 1
""",
"matches": """
SELECT
m.id,
m.home_team_id,
m.away_team_id,
m.match_name,
m.mst_utc,
m.sport,
m.league_id,
ref.name AS referee_name,
NULL AS lineups,
NULL AS sidelined,
ht.name AS home_name,
at.name AS away_name,
l.name AS league_name
FROM matches m
LEFT JOIN teams ht ON ht.id = m.home_team_id
LEFT JOIN teams at ON at.id = m.away_team_id
LEFT JOIN leagues l ON l.id = m.league_id
LEFT JOIN match_officials ref ON ref.match_id = m.id AND ref.role_id = 1
WHERE m.id = :match_id
LIMIT 1
""",
}
for table in ("live_matches", "matches"):
query = text(table_queries[table])
result = await session.execute(query, {"match_id": match_id})
row = result.mappings().first()
if row:
return dict(row)
return None
async def _load_ai_features(
session: AsyncSession, match_id: str,
) -> dict[str, Any] | None:
query = text("""
SELECT
home_elo,
away_elo,
missing_players_impact,
home_form_score,
away_form_score,
h2h_home_win_rate,
h2h_total
FROM football_ai_features
WHERE match_id = :match_id
LIMIT 1
""")
result = await session.execute(query, {"match_id": match_id})
row = result.mappings().first()
return dict(row) if row else None
async def _rolling_team_stats(
session: AsyncSession,
team_id: str,
before_mst_utc: int,
) -> dict[str, float] | None:
"""Calculate rolling averages from the team's last N finished matches."""
query = text("""
WITH recent AS (
SELECT
m.id AS match_id,
m.home_team_id,
m.away_team_id,
m.score_home,
m.score_away,
ts.possession_percentage,
ts.shots_on_target,
ts.total_shots
FROM matches m
JOIN football_team_stats ts ON ts.match_id = m.id AND ts.team_id = :team_id
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
AND m.mst_utc < :before_ts
AND m.sport = 'football'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
ORDER BY m.mst_utc DESC
LIMIT :window
)
SELECT
COALESCE(AVG(possession_percentage), 50.0) AS avg_possession,
COALESCE(AVG(shots_on_target), 4.0) AS avg_shots_on_target,
COALESCE(AVG(total_shots), 10.0) AS avg_total_shots,
COALESCE(AVG(
CASE
WHEN home_team_id = :team_id THEN score_home
ELSE score_away
END
), 1.3) AS avg_goals_scored,
COALESCE(AVG(
CASE
WHEN home_team_id = :team_id THEN score_away
ELSE score_home
END
), 1.1) AS avg_goals_conceded,
COUNT(*) AS match_count
FROM recent
""")
result = await session.execute(
query,
{"team_id": team_id, "before_ts": before_mst_utc, "window": ROLLING_WINDOW},
)
row = result.mappings().first()
if row is None or int(row["match_count"]) == 0:
return None
return {
"avg_possession": round(float(row["avg_possession"]), 2),
"avg_shots_on_target": round(float(row["avg_shots_on_target"]), 2),
"avg_total_shots": round(float(row["avg_total_shots"]), 2),
"avg_goals_scored": round(float(row["avg_goals_scored"]), 2),
"avg_goals_conceded": round(float(row["avg_goals_conceded"]), 2),
}
async def _load_rest_days(
session: AsyncSession,
team_id: str,
before_mst_utc: int,
) -> float | None:
query = text("""
SELECT m.mst_utc
FROM matches m
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
AND m.mst_utc < :before_ts
AND m.sport = 'football'
ORDER BY m.mst_utc DESC
LIMIT 1
""")
result = await session.execute(
query,
{"team_id": team_id, "before_ts": before_mst_utc},
)
last_match_ts = result.scalar_one_or_none()
if last_match_ts is None:
return None
rest_days = max(0.0, (float(before_mst_utc) - float(last_match_ts)) / 86400000.0)
return round(min(rest_days, MAX_REST_DAYS), 3)
async def _load_h2h_stats(
session: AsyncSession,
home_team_id: str,
away_team_id: str,
before_mst_utc: int,
) -> dict[str, float | int] | None:
query = text("""
SELECT
m.home_team_id,
m.away_team_id,
m.score_home,
m.score_away
FROM matches m
WHERE m.sport = 'football'
AND m.mst_utc < :before_ts
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND (
(m.home_team_id = :home_team_id AND m.away_team_id = :away_team_id)
OR
(m.home_team_id = :away_team_id AND m.away_team_id = :home_team_id)
)
ORDER BY m.mst_utc DESC
LIMIT :window
""")
result = await session.execute(
query,
{
"home_team_id": home_team_id,
"away_team_id": away_team_id,
"before_ts": before_mst_utc,
"window": H2H_WINDOW,
},
)
rows = result.mappings().all()
if not rows:
return None
home_wins = 0.0
draws = 0.0
sample_size = 0
for row in rows:
score_home = row["score_home"]
score_away = row["score_away"]
if score_home is None or score_away is None:
continue
sample_size += 1
row_home_team_id = row["home_team_id"]
row_away_team_id = row["away_team_id"]
current_home_score = float(score_home) if row_home_team_id == home_team_id else float(score_away)
current_away_score = float(score_away) if row_home_team_id == home_team_id else float(score_home)
if current_home_score > current_away_score:
home_wins += 1.0
elif current_home_score == current_away_score:
draws += 1.0
if sample_size == 0:
return None
# Count draws as a half-win signal instead of throwing them away.
home_win_rate = round((home_wins + draws * 0.5) / sample_size, 4)
return {
"home_win_rate": home_win_rate,
"sample_size": sample_size,
}
async def _load_league_profile(
session: AsyncSession,
league_id: str,
before_mst_utc: int,
) -> dict[str, float] | None:
if not league_id:
return None
query = text("""
SELECT
COALESCE(AVG(m.score_home + m.score_away), 2.6) AS avg_goals,
COUNT(*) AS match_count
FROM (
SELECT score_home, score_away
FROM matches
WHERE league_id = :league_id
AND sport = 'football'
AND status = 'FT'
AND score_home IS NOT NULL
AND score_away IS NOT NULL
AND mst_utc < :before_ts
ORDER BY mst_utc DESC
LIMIT 100
) m
""")
result = await session.execute(
query,
{"league_id": league_id, "before_ts": before_mst_utc},
)
row = result.mappings().first()
if row is None or int(row["match_count"] or 0) == 0:
return None
return {"avg_goals": round(float(row["avg_goals"]), 3)}
async def _load_referee_profile(
session: AsyncSession,
referee_name: str,
before_mst_utc: int,
) -> dict[str, float] | None:
if not referee_name:
return None
query = text("""
SELECT
COALESCE(AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END), 0.46) - 0.46 AS home_bias,
COALESCE(AVG(score_home + score_away), 2.6) AS avg_goals,
COUNT(*) AS match_count
FROM (
SELECT m.score_home, m.score_away
FROM match_officials mo
JOIN matches m ON m.id = mo.match_id
WHERE mo.name = :referee_name
AND mo.role_id = 1
AND m.sport = 'football'
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < :before_ts
ORDER BY m.mst_utc DESC
LIMIT 30
) ref_matches
""")
result = await session.execute(
query,
{"referee_name": referee_name, "before_ts": before_mst_utc},
)
row = result.mappings().first()
if row is None or int(row["match_count"] or 0) == 0:
return None
return {
"home_bias": round(float(row["home_bias"]), 4),
"avg_goals": round(float(row["avg_goals"]), 3),
}
async def _load_team_squad_profile(
session: AsyncSession,
team_id: str,
before_mst_utc: int,
) -> dict[str, float] | None:
if not team_id:
return None
query = text("""
WITH recent_matches AS (
SELECT m.id, m.mst_utc
FROM matches m
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
AND m.sport = 'football'
AND m.status = 'FT'
AND m.mst_utc < :before_ts
ORDER BY m.mst_utc DESC
LIMIT 8
),
player_base AS (
SELECT
mpp.player_id,
COUNT(*)::float AS appearances,
COUNT(*) FILTER (WHERE mpp.is_starting = true)::float AS starts
FROM match_player_participation mpp
JOIN recent_matches rm ON rm.id = mpp.match_id
WHERE mpp.team_id = :team_id
GROUP BY mpp.player_id
),
player_goals AS (
SELECT
mpe.player_id,
COUNT(*) FILTER (
WHERE mpe.event_type = 'goal'
AND COALESCE(mpe.event_subtype, '') NOT ILIKE '%penaltı kaçırma%'
)::float AS goals,
0.0::float AS assists
FROM match_player_events mpe
JOIN recent_matches rm ON rm.id = mpe.match_id
WHERE mpe.team_id = :team_id
GROUP BY mpe.player_id
UNION ALL
SELECT
mpe.assist_player_id AS player_id,
0.0::float AS goals,
COUNT(*) FILTER (
WHERE mpe.event_type = 'goal'
AND mpe.assist_player_id IS NOT NULL
)::float AS assists
FROM match_player_events mpe
JOIN recent_matches rm ON rm.id = mpe.match_id
WHERE mpe.team_id = :team_id
AND mpe.assist_player_id IS NOT NULL
GROUP BY mpe.assist_player_id
),
player_events AS (
SELECT
player_id,
SUM(goals) AS goals,
SUM(assists) AS assists
FROM player_goals
GROUP BY player_id
),
player_scores AS (
SELECT
pb.player_id,
(pb.starts * 1.5)
+ ((pb.appearances - pb.starts) * 0.5)
+ (COALESCE(pe.goals, 0.0) * 2.5)
+ (COALESCE(pe.assists, 0.0) * 1.5) AS score
FROM player_base pb
LEFT JOIN player_events pe ON pe.player_id = pb.player_id
)
SELECT
COALESCE(AVG(top_players.score), 0.0) AS avg_top_score,
COALESCE(COUNT(*) FILTER (WHERE top_players.score >= 6.0), 0) AS key_players,
COALESCE((SELECT COUNT(*) FROM recent_matches), 0) AS match_count
FROM (
SELECT score
FROM player_scores
ORDER BY score DESC
LIMIT 11
) top_players
""")
result = await session.execute(
query,
{"team_id": team_id, "before_ts": before_mst_utc},
)
row = result.mappings().first()
if row is None or int(row["match_count"] or 0) == 0:
return None
avg_top_score = float(row["avg_top_score"] or 0.0)
return {
"squad_strength": round(min(max(avg_top_score / 10.0, 0.0), 1.0), 4),
"key_players": float(row["key_players"] or 0),
}
def _safe_json(value: Any) -> dict[str, Any] | None:
if value is None:
return None
if isinstance(value, dict):
return value
if isinstance(value, str):
try:
parsed = json.loads(value)
except (TypeError, json.JSONDecodeError):
return None
return parsed if isinstance(parsed, dict) else None
return None
def _safe_list(value: Any) -> list[Any]:
if isinstance(value, list):
return value
return []
def _extract_lineup_context(match_row: dict[str, Any]) -> dict[str, float | bool]:
lineups = _safe_json(match_row.get("lineups"))
sidelined = _safe_json(match_row.get("sidelined"))
home_xi_count = 0
away_xi_count = 0
home_sidelined_count = 0
away_sidelined_count = 0
if lineups:
home_xi_count = len(_safe_list(lineups.get("home", {}).get("xi")))
away_xi_count = len(_safe_list(lineups.get("away", {}).get("xi")))
if sidelined:
home_team = sidelined.get("homeTeam", {})
away_team = sidelined.get("awayTeam", {})
home_sidelined_count = max(
int(home_team.get("totalSidelined") or 0),
len(_safe_list(home_team.get("players"))),
)
away_sidelined_count = max(
int(away_team.get("totalSidelined") or 0),
len(_safe_list(away_team.get("players"))),
)
has_real_lineup_data = any(
value > 0
for value in (
home_xi_count,
away_xi_count,
home_sidelined_count,
away_sidelined_count,
)
)
home_availability = _compute_availability(home_xi_count, home_sidelined_count)
away_availability = _compute_availability(away_xi_count, away_sidelined_count)
return {
"home_availability": home_availability,
"away_availability": away_availability,
"has_real_lineup_data": has_real_lineup_data,
}
def _compute_availability(xi_count: int, sidelined_count: int) -> float:
xi_ratio = min(max(xi_count / 11.0, 0.0), 1.0) if xi_count > 0 else 1.0
sidelined_penalty = min(max(sidelined_count / 11.0, 0.0), 1.0) * 0.35
return round(min(max(xi_ratio - sidelined_penalty, 0.0), 1.0), 4)
def _safe_odd(val: Any) -> float:
"""Parse an odds value that might be str, float, int, or None."""
if val is None:
return 0.0
try:
parsed = float(val)
return parsed if parsed > 1.0 else 0.0
except (ValueError, TypeError):
return 0.0
def _implied_prob(decimal_odd: float) -> float:
"""Convert decimal odds to implied probability, clamped [0, 1]."""
if decimal_odd <= 1.0:
return 0.0
return min(1.0, 1.0 / decimal_odd)
async def _extract_odds(
session: AsyncSession,
match_id: str,
feats: MatchFeatures,
) -> bool:
"""Extract odds from live JSON first, then relational tables."""
found = False
odds_json = await _load_live_odds_json(session, match_id)
if odds_json:
found = _parse_odds_json(odds_json, feats)
if not found:
found = await _load_relational_odds(session, match_id, feats)
if found:
feats.implied_prob_home = round(_implied_prob(feats.odds_home), 4)
feats.implied_prob_draw = round(_implied_prob(feats.odds_draw), 4)
feats.implied_prob_away = round(_implied_prob(feats.odds_away), 4)
feats.implied_prob_over25 = round(_implied_prob(feats.odds_over25), 4)
feats.implied_prob_under25 = round(_implied_prob(feats.odds_under25), 4)
feats.implied_prob_btts_yes = round(_implied_prob(feats.odds_btts_yes), 4)
feats.implied_prob_btts_no = round(_implied_prob(feats.odds_btts_no), 4)
return found
async def _load_live_odds_json(
session: AsyncSession, match_id: str,
) -> dict[str, Any] | None:
query = text("SELECT odds FROM live_matches WHERE id = :mid AND odds IS NOT NULL")
result = await session.execute(query, {"mid": match_id})
row = result.scalar_one_or_none()
if row is None:
return None
if isinstance(row, str):
try:
parsed = json.loads(row)
except (json.JSONDecodeError, TypeError):
return None
return parsed if isinstance(parsed, (dict, list)) else None
if isinstance(row, (dict, list)):
return row
return None
def _parse_odds_json(odds_blob: dict[str, Any] | list[Any], feats: MatchFeatures) -> bool:
"""Parse the Mackolik-style odds JSON structure."""
found_any = False
categories: list[dict[str, Any]] = []
if isinstance(odds_blob, list):
categories = [item for item in odds_blob if isinstance(item, dict)]
elif isinstance(odds_blob, dict):
raw_categories = odds_blob.get("categories", odds_blob.get("odds", []))
if isinstance(raw_categories, dict):
categories = [item for item in raw_categories.values() if isinstance(item, dict)]
elif isinstance(raw_categories, list):
categories = [item for item in raw_categories if isinstance(item, dict)]
for cat in categories:
cat_name = (cat.get("name") or cat.get("cn") or "").strip().lower()
selections = cat.get("selections") or cat.get("s") or []
if cat_name in ("mac sonucu", "match result", "1x2", "maç sonucu"):
sels = _selections_to_map(selections)
feats.odds_home = _safe_odd(sels.get("1")) or feats.odds_home
feats.odds_draw = _safe_odd(sels.get("x")) or feats.odds_draw
feats.odds_away = _safe_odd(sels.get("2")) or feats.odds_away
found_any = True
elif cat_name in ("2,5 alt/ust", "over/under 2.5", "2.5 alt/ust", "2,5 alt/üst", "2.5 alt/üst"):
sels = _selections_to_map(selections)
feats.odds_over25 = _safe_odd(sels.get("ust") or sels.get("over") or sels.get("üst")) or feats.odds_over25
feats.odds_under25 = _safe_odd(sels.get("alt") or sels.get("under")) or feats.odds_under25
found_any = True
elif cat_name in ("karsilikli gol", "both teams to score", "btts", "karşılıklı gol"):
sels = _selections_to_map(selections)
feats.odds_btts_yes = _safe_odd(sels.get("var") or sels.get("yes")) or feats.odds_btts_yes
feats.odds_btts_no = _safe_odd(sels.get("yok") or sels.get("no")) or feats.odds_btts_no
found_any = True
return found_any
def _selections_to_map(selections: list[Any] | dict[str, Any]) -> dict[str, Any]:
"""Normalize varied selection structures into {name_lower: odd_value}."""
result: dict[str, Any] = {}
if isinstance(selections, dict):
for key, value in selections.items():
result[str(key).strip().lower()] = value
elif isinstance(selections, list):
for sel in selections:
if isinstance(sel, dict):
name = (sel.get("name") or sel.get("n") or "").strip().lower()
value = sel.get("odd_value") or sel.get("ov") or sel.get("v")
if name:
result[name] = value
return result
async def _load_relational_odds(
session: AsyncSession, match_id: str, feats: MatchFeatures,
) -> bool:
"""Fallback: load odds from odd_categories + odd_selections."""
query = text("""
SELECT oc.name AS cat_name, os.name AS sel_name, os.odd_value
FROM odd_categories oc
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
WHERE oc.match_id = :match_id
AND oc.name IN ('Maç Sonucu', '2,5 Alt/Üst', 'Karşılıklı Gol')
""")
result = await session.execute(query, {"match_id": match_id})
rows = result.mappings().all()
if not rows:
return False
for row in rows:
cat = (row["cat_name"] or "").strip()
sel = (row["sel_name"] or "").strip().lower()
value = _safe_odd(row["odd_value"])
if value <= 1.0:
continue
if cat == "Maç Sonucu":
if sel == "1":
feats.odds_home = value
elif sel == "x":
feats.odds_draw = value
elif sel == "2":
feats.odds_away = value
elif cat == "2,5 Alt/Üst":
if sel in ("üst", "ust", "over"):
feats.odds_over25 = value
elif sel in ("alt", "under"):
feats.odds_under25 = value
elif cat == "Karşılıklı Gol":
if sel in ("var", "yes"):
feats.odds_btts_yes = value
elif sel in ("yok", "no"):
feats.odds_btts_no = value
return True
+256
View File
@@ -0,0 +1,256 @@
"""
Feature Adapter for XGBoost Inference
=====================================
Bridges the gap between V20 Engine outputs (CalculationContext) and XGBoost Models.
Constructs the exact 44-feature vector used in training.
"""
from __future__ import annotations
import os
from typing import Any
import psycopg2
from psycopg2.extensions import connection as PgConnection
import pandas as pd
import numpy as np
from data.db import get_clean_dsn
# Feature definitions (Must match train_xgboost_markets.py)
# NOTE: 68 features - matching the trained XGBoost models
FEATURES = [
# ELO
"home_overall_elo", "away_overall_elo", "elo_diff",
"home_home_elo", "away_away_elo", "form_elo_diff",
# Form
"home_goals_avg", "home_conceded_avg",
"away_goals_avg", "away_conceded_avg",
"home_clean_sheet_rate", "away_clean_sheet_rate",
"home_scoring_rate", "away_scoring_rate",
"home_winning_streak", "away_winning_streak",
# H2H
"h2h_home_win_rate", "h2h_draw_rate",
"h2h_avg_goals", "h2h_btts_rate", "h2h_over25_rate",
# Stats
"home_avg_possession", "away_avg_possession",
"home_avg_shots_on_target", "away_avg_shots_on_target",
"home_shot_conversion", "away_shot_conversion",
# Odds (Implicit market wisdom)
"odds_ms_h", "odds_ms_d", "odds_ms_a",
"implied_home", "implied_draw", "implied_away",
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
"odds_ou05_o", "odds_ou05_u",
"odds_ou15_o", "odds_ou15_u",
"odds_ou25_o", "odds_ou25_u",
"odds_ou35_o", "odds_ou35_u",
"odds_ht_ou05_o", "odds_ht_ou05_u",
"odds_ht_ou15_o", "odds_ht_ou15_u",
"odds_btts_y", "odds_btts_n",
# League/Context
"league_avg_goals", "league_zero_goal_rate",
"home_xga", "away_xga",
# Upset features
"upset_atmosphere", "upset_motivation", "upset_fatigue", "upset_potential",
# Referee features
"referee_home_bias", "referee_avg_goals", "referee_cards_total",
"referee_avg_yellow", "referee_experience",
# Momentum features
"home_momentum_score", "away_momentum_score", "momentum_diff",
]
class FeatureAdapter:
"""
Adapter to convert V20 context into XGBoost-compatible features.
"""
def __init__(self) -> None:
self.conn: PgConnection | None = None
self._connect_db()
self.league_stats_cache: dict[str, dict[str, float]] = {}
def _connect_db(self) -> None:
try:
# FeatureAdapter uses DB only for optional league stats enrichment.
# Keep startup non-blocking when DB/tunnel is unavailable.
if not os.getenv("DATABASE_URL", "").strip():
return
self.conn = psycopg2.connect(get_clean_dsn())
except Exception as e:
print(f"⚠️ FeatureAdapter DB connection failed: {e}")
def get_features(self, ctx: Any) -> pd.DataFrame:
"""
Construct feature vector from CalculationContext.
Returns a DataFrame with 1 row and correct columns.
"""
raw = ctx.team_pred.raw_features
odds = ctx.odds_data or {}
upset_features = getattr(ctx, "upset_features", {}) or {}
momentum_features = getattr(ctx, "momentum_features", {}) or {}
referee_features = getattr(ctx, "referee_features", {}) or {}
# 1. Odds Features
ms_h = float(odds.get("ms_h") or 0)
ms_d = float(odds.get("ms_d") or 0)
ms_a = float(odds.get("ms_a") or 0)
implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33
if ms_h > 0 and ms_d > 0 and ms_a > 0:
raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
implied_home = (1/ms_h) / raw_sum
implied_draw = (1/ms_d) / raw_sum
implied_away = (1/ms_a) / raw_sum
# 2. League Features
# Using ctx.league_id if available, or just defaults
league_stats = self._get_league_stats(ctx.league_id)
# 3. Assemble Dictionary
row = {
# ELO (Explicit float casting)
"home_overall_elo": float(raw.get("home_overall_elo") or 1500),
"away_overall_elo": float(raw.get("away_overall_elo") or 1500),
"elo_diff": float(raw.get("elo_diff") or 0),
"home_home_elo": float(raw.get("home_home_elo") or 1500),
"away_away_elo": float(raw.get("away_away_elo") or 1500),
"form_elo_diff": float(raw.get("form_elo_diff") or 0),
# Form (Explicit float casting)
"home_goals_avg": float(raw.get("home_goals_avg") or 1.3),
"home_conceded_avg": float(raw.get("home_conceded_avg") or 1.2),
"away_goals_avg": float(raw.get("away_goals_avg") or 1.2),
"away_conceded_avg": float(raw.get("away_conceded_avg") or 1.4),
"home_clean_sheet_rate": float(raw.get("home_clean_sheet_rate") or 0.2),
"away_clean_sheet_rate": float(raw.get("away_clean_sheet_rate") or 0.2),
"home_scoring_rate": float(raw.get("home_scoring_rate") or 0.8),
"away_scoring_rate": float(raw.get("away_scoring_rate") or 0.8),
"home_winning_streak": float(raw.get("home_winning_streak") or 0),
"away_winning_streak": float(raw.get("away_winning_streak") or 0),
# H2H (Explicit float casting)
"h2h_home_win_rate": float(raw.get("h2h_home_win_rate") or 0.33),
"h2h_draw_rate": float(raw.get("h2h_draw_rate") or 0.33),
"h2h_avg_goals": float(raw.get("h2h_avg_goals") or 2.5),
"h2h_btts_rate": float(raw.get("h2h_btts_rate") or 0.5),
"h2h_over25_rate": float(raw.get("h2h_over25_rate") or 0.5),
# Stats (Explicit float casting to avoid XGBoost 'object' error)
"home_avg_possession": float(raw.get("home_avg_possession") or 0.5),
"away_avg_possession": float(raw.get("away_avg_possession") or 0.5),
"home_avg_shots_on_target": float(raw.get("home_avg_shots_on_target") or 4.0),
"away_avg_shots_on_target": float(raw.get("away_avg_shots_on_target") or 3.5),
"home_shot_conversion": float(raw.get("home_shot_conversion") or 0.1),
"away_shot_conversion": float(raw.get("away_shot_conversion") or 0.1),
# Odds
"odds_ms_h": ms_h,
"odds_ms_d": ms_d,
"odds_ms_a": ms_a,
"implied_home": implied_home,
"implied_draw": implied_draw,
"implied_away": implied_away,
"odds_ht_ms_h": float(odds.get("ht_ms_h") or 0.0),
"odds_ht_ms_d": float(odds.get("ht_ms_d") or 0.0),
"odds_ht_ms_a": float(odds.get("ht_ms_a") or 0.0),
"odds_ou05_o": float(odds.get("ou05_o") or 0.0),
"odds_ou05_u": float(odds.get("ou05_u") or 0.0),
"odds_ou15_o": float(odds.get("ou15_o") or 0.0),
"odds_ou15_u": float(odds.get("ou15_u") or 0.0),
"odds_ou25_o": float(odds.get("ou25_o") or 0.0),
"odds_ou25_u": float(odds.get("ou25_u") or 0.0),
"odds_ou35_o": float(odds.get("ou35_o") or 0.0),
"odds_ou35_u": float(odds.get("ou35_u") or 0.0),
"odds_ht_ou05_o": float(odds.get("ht_ou05_o") or 0.0),
"odds_ht_ou05_u": float(odds.get("ht_ou05_u") or 0.0),
"odds_ht_ou15_o": float(odds.get("ht_ou15_o") or 0.0),
"odds_ht_ou15_u": float(odds.get("ht_ou15_u") or 0.0),
"odds_btts_y": float(odds.get("btts_y") or 0.0),
"odds_btts_n": float(odds.get("btts_n") or 0.0),
# League/Def
"league_avg_goals": float(league_stats.get("avg_goals") or 2.7),
"league_zero_goal_rate": float(league_stats.get("zero_rate") or 0.07),
"home_xga": float(raw.get("home_xga") or 1.2),
"away_xga": float(raw.get("away_xga") or 1.4),
# Upset features (default values - computed separately in upset_engine_v2)
"upset_atmosphere": float(raw.get("upset_atmosphere") or 0.0),
"upset_motivation": float(raw.get("upset_motivation") or 0.0),
"upset_fatigue": float(raw.get("upset_fatigue") or 0.0),
"upset_potential": float(raw.get("upset_potential") or 0.0),
# Referee features (default values)
"referee_home_bias": float(raw.get("referee_home_bias") or 0.0),
"referee_avg_goals": float(raw.get("referee_avg_goals") or 2.5),
"referee_cards_total": float(raw.get("referee_cards_total") or 4.0),
"referee_avg_yellow": float(raw.get("referee_avg_yellow") or 3.0),
"referee_experience": float(raw.get("referee_experience") or 0),
# Momentum features (default values)
"home_momentum_score": float(raw.get("home_momentum_score") or 0.0),
"away_momentum_score": float(raw.get("away_momentum_score") or 0.0),
"momentum_diff": float(raw.get("momentum_diff") or 0.0),
}
# Return as DataFrame (cols sorted by FEATURES list to ensure alignment)
df = pd.DataFrame([row], columns=FEATURES)
return df
def _get_league_stats(self, league_id: str | None) -> dict[str, float]:
"""Get cached league stats or default."""
if not league_id:
return {"avg_goals": 2.7, "zero_rate": 0.07}
if league_id in self.league_stats_cache:
return self.league_stats_cache[league_id]
if self.conn:
try:
with self.conn.cursor() as cur:
cur.execute("""
SELECT AVG(score_home + score_away),
AVG(CASE WHEN score_home=0 AND score_away=0 THEN 1.0 ELSE 0.0 END)
FROM matches
WHERE league_id = %s AND status = 'FT'
AND mst_utc > EXTRACT(EPOCH FROM NOW() - INTERVAL '1 year')
""", (league_id,))
res = cur.fetchone()
if res and res[0]:
stats = {
"avg_goals": float(res[0]),
"zero_rate": float(res[1])
}
self.league_stats_cache[league_id] = stats
return stats
except Exception:
pass
# Default fallback
return {"avg_goals": 2.7, "zero_rate": 0.07}
# Singleton
_adapter: FeatureAdapter | None = None
def get_feature_adapter() -> FeatureAdapter:
global _adapter
if _adapter is None:
_adapter = FeatureAdapter()
return _adapter
+316
View File
@@ -0,0 +1,316 @@
"""
Head-to-Head (H2H) Feature Engine
Takımların birbirine karşı geçmiş performansını analiz eder.
"""
import os
import psycopg2
from typing import Dict, Optional, Tuple
from dataclasses import dataclass
from functools import lru_cache
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data.db import get_clean_dsn
@dataclass
class H2HProfile:
"""Head-to-Head analiz sonucu"""
total_matches: int
home_wins: int
draws: int
away_wins: int
home_goals_total: int
away_goals_total: int
btts_count: int # Both teams to score
over25_count: int
@property
def home_win_rate(self) -> float:
return self.home_wins / self.total_matches if self.total_matches > 0 else 0.33
@property
def draw_rate(self) -> float:
return self.draws / self.total_matches if self.total_matches > 0 else 0.33
@property
def away_win_rate(self) -> float:
return self.away_wins / self.total_matches if self.total_matches > 0 else 0.33
@property
def avg_total_goals(self) -> float:
return (self.home_goals_total + self.away_goals_total) / self.total_matches if self.total_matches > 0 else 2.5
@property
def btts_rate(self) -> float:
return self.btts_count / self.total_matches if self.total_matches > 0 else 0.5
@property
def over25_rate(self) -> float:
return self.over25_count / self.total_matches if self.total_matches > 0 else 0.5
@property
def home_dominance(self) -> float:
"""Ev sahibinin üstünlük skoru (-1 ile 1 arası)"""
if self.total_matches == 0:
return 0
return (self.home_wins - self.away_wins) / self.total_matches
def to_features(self) -> Dict[str, float]:
"""Feature dictionary döndür"""
return {
'h2h_total_matches': self.total_matches,
'h2h_home_win_rate': self.home_win_rate,
'h2h_draw_rate': self.draw_rate,
'h2h_away_win_rate': self.away_win_rate,
'h2h_avg_goals': self.avg_total_goals,
'h2h_btts_rate': self.btts_rate,
'h2h_over25_rate': self.over25_rate,
'h2h_home_dominance': self.home_dominance,
}
class H2HFeatureEngine:
"""
Head-to-Head Feature Engine
İki takım arasındaki geçmiş karşılaşmaları analiz eder.
"""
def __init__(self):
self.conn = None
self._cache: Dict[Tuple[str, str], H2HProfile] = {}
def get_conn(self):
if self.conn is None or self.conn.closed:
self.conn = psycopg2.connect(get_clean_dsn())
return self.conn
def get_h2h_profile(self, home_team_id: str, away_team_id: str,
before_date: Optional[int] = None,
limit: int = 20) -> H2HProfile:
"""
İki takım arasındaki geçmiş karşılaşmaları analiz et.
Args:
home_team_id: Ev sahibi takım ID
away_team_id: Deplasman takım ID
before_date: Bu tarihten önceki maçlar (mst_utc, milliseconds)
limit: Kaç maç geriye bakılacak
Returns:
H2HProfile: Head-to-head analiz sonucu
"""
cache_key = (home_team_id, away_team_id)
# Cache kontrolü (before_date yoksa)
if before_date is None and cache_key in self._cache:
return self._cache[cache_key]
conn = self.get_conn()
cur = conn.cursor()
# Her iki yöndeki karşılaşmaları al
# (A evde B deplasman + B evde A deplasman)
query = """
SELECT
home_team_id, away_team_id,
score_home, score_away
FROM matches
WHERE (
(home_team_id = %s AND away_team_id = %s)
OR
(home_team_id = %s AND away_team_id = %s)
)
AND score_home IS NOT NULL
AND score_away IS NOT NULL
"""
params = [home_team_id, away_team_id, away_team_id, home_team_id]
if before_date:
query += " AND mst_utc < %s"
params.append(before_date)
query += " ORDER BY mst_utc DESC LIMIT %s"
params.append(limit)
cur.execute(query, params)
matches = cur.fetchall()
if not matches:
return H2HProfile(
total_matches=0, home_wins=0, draws=0, away_wins=0,
home_goals_total=0, away_goals_total=0,
btts_count=0, over25_count=0
)
# İstatistikleri hesapla
home_wins = 0
draws = 0
away_wins = 0
home_goals = 0
away_goals = 0
btts = 0
over25 = 0
for match in matches:
m_home_id, m_away_id, score_h, score_a = match
# Perspektifi normalize et (istenen takım açısından)
if m_home_id == home_team_id:
# Normal sıralama
h_score, a_score = score_h, score_a
else:
# Ters sıralama (rakip evde oynamış)
h_score, a_score = score_a, score_h
# Sonuç
if h_score > a_score:
home_wins += 1
elif h_score < a_score:
away_wins += 1
else:
draws += 1
# Goller
home_goals += h_score
away_goals += a_score
# BTTS
if h_score > 0 and a_score > 0:
btts += 1
# Over 2.5
if h_score + a_score > 2.5:
over25 += 1
profile = H2HProfile(
total_matches=len(matches),
home_wins=home_wins,
draws=draws,
away_wins=away_wins,
home_goals_total=home_goals,
away_goals_total=away_goals,
btts_count=btts,
over25_count=over25
)
# Cache'e kaydet
if before_date is None:
self._cache[cache_key] = profile
return profile
def get_features(self, home_team_id: str, away_team_id: str,
before_date: Optional[int] = None) -> Dict[str, float]:
"""Feature dictionary döndür"""
profile = self.get_h2h_profile(home_team_id, away_team_id, before_date)
return profile.to_features()
def get_momentum(self, home_team_id: str, away_team_id: str,
before_date: Optional[int] = None) -> Dict[str, float]:
"""
Son karşılaşmalardaki momentum/trend analizi.
Son 5 maçtaki trend'e bakar.
"""
profile = self.get_h2h_profile(home_team_id, away_team_id, before_date, limit=5)
# Streak hesapla (ardışık sonuçlar)
conn = self.get_conn()
cur = conn.cursor()
query = """
SELECT home_team_id, score_home, score_away
FROM matches
WHERE (
(home_team_id = %s AND away_team_id = %s)
OR
(home_team_id = %s AND away_team_id = %s)
)
AND score_home IS NOT NULL
"""
params = [home_team_id, away_team_id, away_team_id, home_team_id]
if before_date:
query += " AND mst_utc < %s"
params.append(before_date)
query += " ORDER BY mst_utc DESC LIMIT 5"
cur.execute(query, params)
recent = cur.fetchall()
streak = 0
streak_type = None # 'home', 'away', 'draw'
for match in recent:
m_home_id, score_h, score_a = match
# Perspektifi normalize et
if m_home_id == home_team_id:
result = 'home' if score_h > score_a else ('away' if score_h < score_a else 'draw')
else:
result = 'away' if score_h > score_a else ('home' if score_h < score_a else 'draw')
if streak_type is None:
streak_type = result
streak = 1
elif result == streak_type:
streak += 1
else:
break
return {
'h2h_recent_home_dominance': profile.home_dominance,
'h2h_streak_length': streak,
'h2h_streak_home': 1 if streak_type == 'home' else 0,
'h2h_streak_away': 1 if streak_type == 'away' else 0,
'h2h_streak_draw': 1 if streak_type == 'draw' else 0,
}
# Singleton
_engine = None
def get_h2h_engine() -> H2HFeatureEngine:
global _engine
if _engine is None:
_engine = H2HFeatureEngine()
return _engine
if __name__ == "__main__":
# Test
engine = get_h2h_engine()
# Örnek: Fenerbahçe vs Galatasaray (ID'leri bulunmalı)
# Test için veritabanından bir karşılaşma çekelim
conn = engine.get_conn()
cur = conn.cursor()
cur.execute("""
SELECT home_team_id, away_team_id, match_name
FROM matches
WHERE score_home IS NOT NULL
LIMIT 1
""")
result = cur.fetchone()
if result:
home_id, away_id, name = result
print(f"\n🧪 Test: {name}")
print(f" Home ID: {home_id}")
print(f" Away ID: {away_id}")
profile = engine.get_h2h_profile(home_id, away_id)
print(f"\n📊 H2H Profil:")
print(f" Toplam Maç: {profile.total_matches}")
print(f" Ev Sahibi Kazanma: {profile.home_win_rate:.1%}")
print(f" Beraberlik: {profile.draw_rate:.1%}")
print(f" Deplasman Kazanma: {profile.away_win_rate:.1%}")
print(f" Ortalama Gol: {profile.avg_total_goals:.2f}")
print(f" BTTS Oranı: {profile.btts_rate:.1%}")
print(f" Üst 2.5 Oranı: {profile.over25_rate:.1%}")
print(f" Ev Dominance: {profile.home_dominance:+.2f}")
features = engine.get_features(home_id, away_id)
print(f"\n🔧 Features: {features}")
+343
View File
@@ -0,0 +1,343 @@
"""
HT/FT Tendency Feature Engine
================================
Produces team-level HT/FT tendency features for match prediction.
Computes ~15 features per match based on historical data:
- 1st half scoring/conceding rates
- Comeback rates
- Half-specific goal distribution
- League-level HT/FT profiles
All features are computed from the `matches` table using only data
BEFORE the match date (no future leakage).
"""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from typing import Dict, Optional, Tuple
from dataclasses import dataclass, field
from data.db import get_clean_dsn
import psycopg2
@dataclass
class TeamHtftProfile:
"""HT/FT tendency profile for a single team."""
matches: int = 0
ht_scored: int = 0 # Matches where team scored in 1st half
ht_conceded: int = 0 # Matches where team conceded in 1st half
ht_leading: int = 0 # Matches where team led at HT
ht_trailing: int = 0 # Matches where team trailed at HT
comeback_wins: int = 0 # Trailing at HT -> Won
goals_1h: int = 0
goals_2h: int = 0
conceded_1h: int = 0
conceded_2h: int = 0
@property
def ht_scoring_rate(self):
return self.ht_scored / self.matches if self.matches > 0 else 0.5
@property
def ht_concede_rate(self):
return self.ht_conceded / self.matches if self.matches > 0 else 0.5
@property
def ht_win_rate(self):
return self.ht_leading / self.matches if self.matches > 0 else 0.33
@property
def comeback_rate(self):
return self.comeback_wins / self.ht_trailing if self.ht_trailing > 0 else 0.0
@property
def first_half_goal_pct(self):
total = self.goals_1h + self.goals_2h
return self.goals_1h / total if total > 0 else 0.5
@property
def second_half_surge(self):
"""Ratio of 2H goals vs 1H goals. >1 means more dangerous in 2nd half."""
return self.goals_2h / self.goals_1h if self.goals_1h > 0 else 1.0
@dataclass
class LeagueHtftProfile:
"""League-level HT/FT statistics."""
matches: int = 0
ht_goals_total: int = 0
ft_goals_total: int = 0
reversals: int = 0
htft_counts: Dict[str, int] = field(default_factory=dict)
@property
def avg_ht_goals(self):
return self.ht_goals_total / self.matches if self.matches > 0 else 1.0
@property
def avg_2h_goals(self):
ft = self.ft_goals_total / self.matches if self.matches > 0 else 2.5
return ft - self.avg_ht_goals
@property
def reversal_rate(self):
return self.reversals / self.matches if self.matches > 0 else 0.05
@property
def first_half_pct(self):
return self.ht_goals_total / self.ft_goals_total if self.ft_goals_total > 0 else 0.44
class HtftTendencyEngine:
"""
Computes HT/FT tendency features for a given match.
Uses historical data from `matches` table, filtering by date to
avoid future leakage.
Features are based on team-level and league-level tendencies, which
are DIFFERENT from the existing model features (ELO, form, H2H score).
"""
def __init__(self):
self.conn = None
self._team_cache: Dict[Tuple[str, bool], TeamHtftProfile] = {}
self._league_cache: Dict[str, LeagueHtftProfile] = {}
def get_conn(self):
if self.conn is None or self.conn.closed:
dsn = get_clean_dsn()
self.conn = psycopg2.connect(dsn)
return self.conn
def _get_team_htft_profile(
self,
team_id: str,
is_home: bool,
before_date: Optional[int] = None,
limit: int = 30,
) -> TeamHtftProfile:
"""
Compute HT/FT profile for a team from their recent matches.
Args:
team_id: Team ID
is_home: True = only home matches, False = only away matches
before_date: Only use matches before this timestamp (ms UTC)
limit: Number of recent matches to consider
"""
cache_key = (team_id, is_home, before_date)
if cache_key in self._team_cache:
return self._team_cache[cache_key]
conn = self.get_conn()
cur = conn.cursor()
if is_home:
query = """
SELECT ht_score_home, ht_score_away, score_home, score_away
FROM matches
WHERE home_team_id = %s
AND sport = 'football'
AND status = 'FT'
AND ht_score_home IS NOT NULL
AND ht_score_away IS NOT NULL
"""
else:
query = """
SELECT ht_score_away, ht_score_home, score_away, score_home
FROM matches
WHERE away_team_id = %s
AND sport = 'football'
AND status = 'FT'
AND ht_score_home IS NOT NULL
AND ht_score_away IS NOT NULL
"""
params = [team_id]
if before_date:
query += " AND mst_utc < %s"
params.append(before_date)
query += " ORDER BY mst_utc DESC LIMIT %s"
params.append(limit)
cur.execute(query, params)
rows = cur.fetchall()
cur.close()
profile = TeamHtftProfile()
profile.matches = len(rows)
for ht_mine, ht_opp, ft_mine, ft_opp in rows:
# 1st half scoring
if ht_mine > 0:
profile.ht_scored += 1
if ht_opp > 0:
profile.ht_conceded += 1
# HT situation
if ht_mine > ht_opp:
profile.ht_leading += 1
elif ht_mine < ht_opp:
profile.ht_trailing += 1
# Comeback
if ft_mine > ft_opp:
profile.comeback_wins += 1
# Goal distribution
profile.goals_1h += ht_mine
profile.goals_2h += (ft_mine - ht_mine)
profile.conceded_1h += ht_opp
profile.conceded_2h += (ft_opp - ht_opp)
self._team_cache[cache_key] = profile
return profile
def _get_league_htft_profile(
self,
league_id: str,
before_date: Optional[int] = None,
) -> LeagueHtftProfile:
"""Compute HT/FT profile for a league."""
cache_key = (league_id, before_date)
if cache_key in self._league_cache:
return self._league_cache[cache_key]
conn = self.get_conn()
cur = conn.cursor()
query = """
SELECT ht_score_home, ht_score_away, score_home, score_away
FROM matches
WHERE league_id = %s
AND sport = 'football'
AND status = 'FT'
AND ht_score_home IS NOT NULL
AND ht_score_away IS NOT NULL
"""
params = [league_id]
if before_date:
query += " AND mst_utc < %s"
params.append(before_date)
query += " ORDER BY mst_utc DESC LIMIT 500"
params_final = params
cur.execute(query, params_final)
rows = cur.fetchall()
cur.close()
profile = LeagueHtftProfile()
profile.matches = len(rows)
for hth, hta, sh, sa in rows:
profile.ht_goals_total += hth + hta
profile.ft_goals_total += sh + sa
# Classify HT/FT
ht = "1" if hth > hta else ("2" if hth < hta else "X")
ft = "1" if sh > sa else ("2" if sh < sa else "X")
htft = f"{ht}/{ft}"
profile.htft_counts[htft] = profile.htft_counts.get(htft, 0) + 1
if htft in ("1/2", "2/1"):
profile.reversals += 1
self._league_cache[cache_key] = profile
return profile
def get_features(
self,
home_team_id: str,
away_team_id: str,
league_id: Optional[str] = None,
before_date: Optional[int] = None,
) -> Dict[str, float]:
"""
Get HT/FT tendency features for a match.
Returns dict with ~15 features.
"""
# Team profiles (home side for home team, away side for away team)
home_prof = self._get_team_htft_profile(home_team_id, is_home=True, before_date=before_date)
away_prof = self._get_team_htft_profile(away_team_id, is_home=False, before_date=before_date)
# League profile
league_prof = LeagueHtftProfile()
if league_id:
league_prof = self._get_league_htft_profile(league_id, before_date=before_date)
features = {
# Home team HT/FT tendencies
"htft_home_ht_scoring_rate": home_prof.ht_scoring_rate,
"htft_home_ht_concede_rate": home_prof.ht_concede_rate,
"htft_home_ht_win_rate": home_prof.ht_win_rate,
"htft_home_comeback_rate": home_prof.comeback_rate,
"htft_home_first_half_goal_pct": home_prof.first_half_goal_pct,
"htft_home_second_half_surge": min(home_prof.second_half_surge, 3.0),
# Away team HT/FT tendencies
"htft_away_ht_scoring_rate": away_prof.ht_scoring_rate,
"htft_away_ht_concede_rate": away_prof.ht_concede_rate,
"htft_away_ht_win_rate": away_prof.ht_win_rate,
"htft_away_comeback_rate": away_prof.comeback_rate,
"htft_away_first_half_goal_pct": away_prof.first_half_goal_pct,
"htft_away_second_half_surge": min(away_prof.second_half_surge, 3.0),
# League-level
"htft_league_avg_ht_goals": league_prof.avg_ht_goals,
"htft_league_reversal_rate": league_prof.reversal_rate,
"htft_league_first_half_pct": league_prof.first_half_pct,
# Data quality (how many matches we have for these features)
"htft_home_sample_size": min(home_prof.matches / 30.0, 1.0),
"htft_away_sample_size": min(away_prof.matches / 30.0, 1.0),
}
return features
def clear_cache(self):
"""Clear internal caches (useful between batches)."""
self._team_cache.clear()
self._league_cache.clear()
# Singleton
_engine = None
def get_htft_tendency_engine() -> HtftTendencyEngine:
global _engine
if _engine is None:
_engine = HtftTendencyEngine()
return _engine
# ── Test ─────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
engine = get_htft_tendency_engine()
conn = engine.get_conn()
cur = conn.cursor()
cur.execute("""
SELECT home_team_id, away_team_id, league_id, mst_utc, match_name
FROM matches
WHERE sport = 'football' AND status = 'FT'
AND home_team_id IS NOT NULL AND away_team_id IS NOT NULL
ORDER BY mst_utc DESC LIMIT 3
""")
matches = cur.fetchall()
cur.close()
for hid, aid, lid, mst, name in matches:
print(f"\n🏟️ {name}")
features = engine.get_features(hid, aid, lid, mst)
for k, v in sorted(features.items()):
print(f" {k}: {v:.4f}")
+434
View File
@@ -0,0 +1,434 @@
"""
Momentum Engine - Son Maç Trendleri
V9 Model için takımların anlık form trendini analiz eder.
Faktörler:
1. Gol atma trendi (artan/azalan/stabil)
2. Yenilmezlik/yenilgi serisi
3. Son maç psikolojisi (büyük galibiyet/mağlubiyet etkisi)
4. Ev/Deplasman momentum farkı
"""
import os
import sys
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass, field
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
import psycopg2
from psycopg2.extras import RealDictCursor
except ImportError:
psycopg2 = None
@dataclass
class MomentumData:
"""Takım momentum verileri"""
goals_trend: float = 0.0 # -1 (azalan) to +1 (artan)
conceded_trend: float = 0.0 # -1 (azalan) to +1 (artan) [negatif iyi]
unbeaten_streak: int = 0 # Yenilmezlik serisi
losing_streak: int = 0 # Yenilgi serisi
winning_streak: int = 0 # Galibiyet serisi
last_match_impact: float = 0.0 # Son maç psikolojik etkisi (-1 to +1)
momentum_score: float = 0.0 # Toplam momentum (-1 to +1)
form_direction: str = "stable" # "improving", "declining", "stable"
xg_underperformance: float = 0.0 # (xG_For - Real_Goals) in last matches (>0 means underperforming)
xg_conceded_diff: float = 0.0 # (Real_Conceded - xG_Against) in last matches
class MomentumEngine:
"""
Son maçlardaki trendi analiz eder.
Form yükselişi/düşüşü, seriler ve psikolojik etki.
"""
def __init__(self):
self.conn = None
self._connect_db()
def _connect_db(self):
"""Veritabanına bağlan"""
if psycopg2 is None:
return
try:
from data.db import get_clean_dsn
self.conn = psycopg2.connect(get_clean_dsn())
except Exception as e:
print(f"[MomentumEngine] DB connection failed: {e}")
self.conn = None
def _get_conn(self):
"""Bağlantıyı kontrol et ve döndür"""
if self.conn is None or self.conn.closed:
self._connect_db()
return self.conn
def get_recent_matches(
self,
team_id: str,
before_date_ms: int,
limit: int = 5,
home_only: bool = False,
away_only: bool = False
) -> List[Dict]:
"""
Takımın son maçlarını getir.
Returns:
List of matches with scores and home/away info
"""
conn = self._get_conn()
if conn is None:
return []
try:
cursor = conn.cursor(cursor_factory=RealDictCursor)
conditions = ["mst_utc < %s", "score_home IS NOT NULL"]
params = [before_date_ms]
if home_only:
conditions.append("home_team_id = %s")
params.append(team_id)
elif away_only:
conditions.append("away_team_id = %s")
params.append(team_id)
else:
conditions.append("(home_team_id = %s OR away_team_id = %s)")
params.extend([team_id, team_id])
query = f"""
SELECT
id, home_team_id, away_team_id,
score_home, score_away, mst_utc
FROM matches
WHERE {' AND '.join(conditions)}
ORDER BY mst_utc DESC
LIMIT %s
"""
params.append(limit)
cursor.execute(query, params)
return cursor.fetchall()
except Exception as e:
print(f"[MomentumEngine] Query error: {e}")
return []
def calculate_goals_trend(self, matches: List[Dict], team_id: str) -> Tuple[float, float]:
"""
Gol atma ve yeme trendini hesapla.
Son 3 maç vs önceki 2 maç karşılaştırması.
Returns:
(goals_trend, conceded_trend) - -1 to +1
"""
if len(matches) < 3:
return 0.0, 0.0
# Her maç için gol ve yenilen gol hesapla
goals = []
conceded = []
for match in matches:
if match['home_team_id'] == team_id:
goals.append(match['score_home'])
conceded.append(match['score_away'])
else:
goals.append(match['score_away'])
conceded.append(match['score_home'])
# Son 3 vs önceki maçlar
recent_goals = sum(goals[:3]) / 3 if len(goals) >= 3 else 0
older_goals = sum(goals[3:]) / len(goals[3:]) if len(goals) > 3 else recent_goals
recent_conceded = sum(conceded[:3]) / 3 if len(conceded) >= 3 else 0
older_conceded = sum(conceded[3:]) / len(conceded[3:]) if len(conceded) > 3 else recent_conceded
# Trend hesapla (-1 to +1)
goals_trend = min(max((recent_goals - older_goals) / 2, -1), 1)
conceded_trend = min(max((recent_conceded - older_conceded) / 2, -1), 1)
return goals_trend, conceded_trend
def calculate_streaks(self, matches: List[Dict], team_id: str) -> Tuple[int, int, int]:
"""
Galibiyet, yenilmezlik ve yenilgi serilerini hesapla.
Returns:
(winning_streak, unbeaten_streak, losing_streak)
"""
winning = 0
unbeaten = 0
losing = 0
for match in matches:
# Sonucu belirle
if match['home_team_id'] == team_id:
goals_for = match['score_home']
goals_against = match['score_away']
else:
goals_for = match['score_away']
goals_against = match['score_home']
if goals_for > goals_against: # Galibiyet
if losing == 0: # Henüz yenilgi serisi başlamamış
winning += 1
unbeaten += 1
else:
break
elif goals_for == goals_against: # Beraberlik
if losing == 0:
winning = 0 # Galibiyet serisi bitti
unbeaten += 1
else:
break
else: # Yenilgi
if winning > 0 or unbeaten > 0:
winning = 0
unbeaten = 0
losing += 1
return winning, unbeaten, losing
def calculate_last_match_impact(self, matches: List[Dict], team_id: str) -> float:
"""
Son maçın psikolojik etkisini hesapla.
Büyük galibiyet = +1, büyük mağlubiyet = -1
Returns:
impact score: -1 to +1
"""
if not matches:
return 0.0
last_match = matches[0]
if last_match['home_team_id'] == team_id:
goals_for = last_match['score_home']
goals_against = last_match['score_away']
else:
goals_for = last_match['score_away']
goals_against = last_match['score_home']
goal_diff = goals_for - goals_against
# Gol farkına göre etki
if goal_diff >= 4:
return 1.0 # Çok büyük galibiyet
elif goal_diff >= 2:
return 0.6
elif goal_diff == 1:
return 0.3
elif goal_diff == 0:
return 0.0
elif goal_diff == -1:
return -0.3
elif goal_diff >= -3:
return -0.6
else:
return -1.0 # Çok büyük mağlubiyet
def calculate_xg_underperformance(self, matches: List[Dict], team_id: str) -> Tuple[float, float]:
"""
Calculate if a team chronically underperforms its xG (Expected Goals).
Returns:
(xg_strike_diff, xg_defend_diff)
xg_strike_diff: > 0 means they score LESS than expected (Bad Finishers)
xg_defend_diff: > 0 means they concede MORE than expected (Bad Goalkeeper/Luck)
"""
if not matches:
return 0.0, 0.0
real_scored = 0
xg_created = 0.0
real_conceded = 0
xg_conceded = 0.0
for m in matches:
is_home = (m['home_team_id'] == team_id)
if is_home:
real_scored += m['score_home']
real_conceded += m['score_away']
# Create synthetic xG data (mock based on score for demo since stats table absent)
xg_created += max(0.5, m['score_home'] * 1.5 - 0.5)
xg_conceded += max(0.5, m['score_away'] * 1.5 - 0.5)
else:
real_scored += m['score_away']
real_conceded += m['score_home']
xg_created += max(0.5, m['score_away'] * 1.5 - 0.5)
xg_conceded += max(0.5, m['score_home'] * 1.5 - 0.5)
# Calculate per match diffs
match_count = len(matches)
xg_strike_diff = (xg_created - real_scored) / match_count if match_count else 0
xg_defend_diff = (real_conceded - xg_conceded) / match_count if match_count else 0
return xg_strike_diff, xg_defend_diff
def calculate_momentum(
self,
team_id: str,
before_date_ms: int,
match_limit: int = 5
) -> MomentumData:
"""
Takımın tam momentum analizini yap.
Returns:
MomentumData with all metrics
"""
data = MomentumData()
matches = self.get_recent_matches(team_id, before_date_ms, match_limit)
if not matches:
return data
# 1. Gol trendi
data.goals_trend, data.conceded_trend = self.calculate_goals_trend(matches, team_id)
# 2. Seriler
data.winning_streak, data.unbeaten_streak, data.losing_streak = \
self.calculate_streaks(matches, team_id)
# 3. Son maç etkisi
data.last_match_impact = self.calculate_last_match_impact(matches, team_id)
# 4. Form yönü belirleme
if data.goals_trend > 0.3 and data.conceded_trend < 0:
data.form_direction = "improving"
elif data.goals_trend < -0.3 or data.conceded_trend > 0.3:
data.form_direction = "declining"
else:
data.form_direction = "stable"
# 5. xG Underperformance (Chronik beceriksizlik)
data.xg_underperformance, data.xg_conceded_diff = self.calculate_xg_underperformance(matches, team_id)
# 6. Toplam momentum skoru
momentum = 0.0
# Gol trendi + savunma trendi (ters çevrilmiş)
momentum += data.goals_trend * 0.25
momentum += (-data.conceded_trend) * 0.20
# Seri bonusları
if data.winning_streak >= 3:
momentum += 0.25
elif data.winning_streak >= 2:
momentum += 0.15
elif data.unbeaten_streak >= 5:
momentum += 0.15
if data.losing_streak >= 3:
momentum -= 0.30
elif data.losing_streak >= 2:
momentum -= 0.15
# Son maç etkisi
momentum += data.last_match_impact * 0.20
# Ceza: xG Underperformance Penalty (Beceriksizlik Cezası)
# Eğer takım attığından çok xG üretiyorsa (- puan)
if data.xg_underperformance > 0.5: # Maç başı 0.5 gol eksik atıyor!
momentum -= min(0.3, data.xg_underperformance * 0.2)
# Ceza: xG Defend Underperformance (Kötü kaleci Cezası)
# Eğer beklenenden çok gol yiyorsa
if data.xg_conceded_diff > 0.5:
momentum -= min(0.3, data.xg_conceded_diff * 0.2)
data.momentum_score = min(max(momentum, -1), 1)
return data
def get_features(
self,
home_team_id: str,
away_team_id: str,
match_date_ms: int
) -> Dict[str, float]:
"""
Model için feature dict döndür.
"""
home_momentum = self.calculate_momentum(home_team_id, match_date_ms)
away_momentum = self.calculate_momentum(away_team_id, match_date_ms)
# Form direction encoding
direction_map = {"improving": 1, "stable": 0, "declining": -1}
return {
# Ev sahibi momentum
"home_momentum_score": home_momentum.momentum_score,
"home_goals_trend": home_momentum.goals_trend,
"home_conceded_trend": home_momentum.conceded_trend,
"home_winning_streak": min(home_momentum.winning_streak, 5),
"home_unbeaten_streak": min(home_momentum.unbeaten_streak, 10),
"home_losing_streak": min(home_momentum.losing_streak, 5),
"home_last_impact": home_momentum.last_match_impact,
"home_form_direction": direction_map.get(home_momentum.form_direction, 0),
"home_xg_underperf": home_momentum.xg_underperformance,
"home_xg_conceded_diff": home_momentum.xg_conceded_diff,
# Deplasman momentum
"away_momentum_score": away_momentum.momentum_score,
"away_goals_trend": away_momentum.goals_trend,
"away_conceded_trend": away_momentum.conceded_trend,
"away_winning_streak": min(away_momentum.winning_streak, 5),
"away_unbeaten_streak": min(away_momentum.unbeaten_streak, 10),
"away_losing_streak": min(away_momentum.losing_streak, 5),
"away_last_impact": away_momentum.last_match_impact,
"away_form_direction": direction_map.get(away_momentum.form_direction, 0),
"away_xg_underperf": away_momentum.xg_underperformance,
"away_xg_conceded_diff": away_momentum.xg_conceded_diff,
# Farklar
"momentum_diff": home_momentum.momentum_score - away_momentum.momentum_score,
"trend_diff": (home_momentum.goals_trend - home_momentum.conceded_trend) -
(away_momentum.goals_trend - away_momentum.conceded_trend),
"xg_underperf_diff": home_momentum.xg_underperformance - away_momentum.xg_underperformance,
}
# Singleton instance
_engine_instance = None
def get_momentum_engine() -> MomentumEngine:
"""Singleton pattern ile engine döndür"""
global _engine_instance
if _engine_instance is None:
_engine_instance = MomentumEngine()
return _engine_instance
# Test
if __name__ == "__main__":
engine = get_momentum_engine()
# Test data
print("=" * 60)
print("MOMENTUM ENGINE TEST")
print("=" * 60)
# Örnek hesaplama (DB olmadan)
data = MomentumData(
goals_trend=0.5,
conceded_trend=-0.3,
winning_streak=3,
unbeaten_streak=5,
losing_streak=0,
last_match_impact=0.6,
form_direction="improving"
)
print(f"Goals Trend: {data.goals_trend}")
print(f"Conceded Trend: {data.conceded_trend}")
print(f"Winning Streak: {data.winning_streak}")
print(f"Unbeaten Streak: {data.unbeaten_streak}")
print(f"Form Direction: {data.form_direction}")
print(f"Last Match Impact: {data.last_match_impact}")
+371
View File
@@ -0,0 +1,371 @@
"""
Poisson Engine - Matematiksel Gol Modeli
V9 Model için Poisson dağılımı ile gol olasılıkları hesaplar.
Özellikler:
1. Exact score olasılıkları (0-0, 1-0, 1-1, 2-1, vb.)
2. Over/Under olasılıkları (matematiksel)
3. BTTS (Karşılıklı Gol) olasılıkları
4. Expected Goals (xG) tahmini
"""
import math
from typing import Dict, Tuple, Optional
from dataclasses import dataclass, field
def poisson_prob(lam: float, k: int) -> float:
"""
Poisson olasılık formülü.
P(X = k) = (λ^k * e^(-λ)) / k!
"""
if lam <= 0:
return 1.0 if k == 0 else 0.0
return (math.pow(lam, k) * math.exp(-lam)) / math.factorial(k)
@dataclass
class PoissonPrediction:
"""Poisson tahmin sonuçları"""
home_xg: float = 0.0 # Ev sahibi beklenen gol
away_xg: float = 0.0 # Deplasman beklenen gol
total_xg: float = 0.0 # Toplam beklenen gol
# Maç sonucu olasılıkları
home_win_prob: float = 0.0
draw_prob: float = 0.0
away_win_prob: float = 0.0
# Alt/Üst olasılıkları
over_15_prob: float = 0.0
over_25_prob: float = 0.0
over_35_prob: float = 0.0
under_15_prob: float = 0.0
under_25_prob: float = 0.0
under_35_prob: float = 0.0
# BTTS
btts_yes_prob: float = 0.0
btts_no_prob: float = 0.0
# En olası skorlar
most_likely_scores: list = field(default_factory=list)
class PoissonEngine:
"""
Poisson dağılımı ile gol olasılıkları hesaplar.
İstatistiksel bir yaklaşım - machine learning'den bağımsız.
"""
# Lig bazlı ortalama gol verileri (varsayılan değerler)
DEFAULT_HOME_XG = 1.45
DEFAULT_AWAY_XG = 1.15
DEFAULT_LEAGUE_AVG = 2.60
def __init__(self):
self.max_goals = 7 # Hesaplama için maksimum gol sayısı
def calculate_xg(
self,
home_goals_avg: float,
home_conceded_avg: float,
away_goals_avg: float,
away_conceded_avg: float,
league_home_avg: float = None,
league_away_avg: float = None,
league_total_avg: float = None
) -> Tuple[float, float]:
"""
Beklenen gol (xG) hesapla.
Attack strength * Defense weakness * League average
"""
# Varsayılan lig ortalamaları
if league_home_avg is None:
league_home_avg = self.DEFAULT_HOME_XG
if league_away_avg is None:
league_away_avg = self.DEFAULT_AWAY_XG
if league_total_avg is None:
league_total_avg = self.DEFAULT_LEAGUE_AVG
# Güç hesaplamaları
# Ev sahibi saldırı gücü = Ev gol ortalaması / Lig ev gol ortalaması
home_attack = home_goals_avg / league_home_avg if league_home_avg > 0 else 1.0
# Deplasman savunma zayıflığı = Deplasman yenilen gol / Lig deplasman yenilen
away_defense = away_conceded_avg / league_away_avg if league_away_avg > 0 else 1.0
# Deplasman saldırı gücü
away_attack = away_goals_avg / league_away_avg if league_away_avg > 0 else 1.0
# Ev sahibi savunma zayıflığı
home_defense = home_conceded_avg / league_home_avg if league_home_avg > 0 else 1.0
# Expected Goals
home_xg = home_attack * away_defense * league_home_avg
away_xg = away_attack * home_defense * league_away_avg
# Aşırı değerleri sınırla
home_xg = max(0.3, min(home_xg, 4.0))
away_xg = max(0.2, min(away_xg, 3.5))
return home_xg, away_xg
def calculate_score_matrix(
self,
home_xg: float,
away_xg: float
) -> Dict[Tuple[int, int], float]:
"""
Tüm skor kombinasyonlarının olasılıklarını hesapla.
Returns:
Dict[(home_goals, away_goals)] = probability
"""
matrix = {}
for home_goals in range(self.max_goals + 1):
for away_goals in range(self.max_goals + 1):
prob = poisson_prob(home_xg, home_goals) * poisson_prob(away_xg, away_goals)
matrix[(home_goals, away_goals)] = prob
return matrix
def calculate_match_odds(
self,
home_xg: float,
away_xg: float
) -> Tuple[float, float, float]:
"""
1X2 olasılıklarını hesapla.
Returns:
(home_win, draw, away_win) probabilities
"""
matrix = self.calculate_score_matrix(home_xg, away_xg)
home_win = 0.0
draw = 0.0
away_win = 0.0
for (h, a), prob in matrix.items():
if h > a:
home_win += prob
elif h == a:
draw += prob
else:
away_win += prob
# Normalize (toplam 1 olmalı)
total = home_win + draw + away_win
if total > 0:
home_win /= total
draw /= total
away_win /= total
return home_win, draw, away_win
def calculate_over_under(
self,
home_xg: float,
away_xg: float
) -> Dict[str, float]:
"""
Alt/Üst olasılıklarını hesapla.
"""
matrix = self.calculate_score_matrix(home_xg, away_xg)
over_15 = 0.0
over_25 = 0.0
over_35 = 0.0
for (h, a), prob in matrix.items():
total = h + a
if total > 1.5:
over_15 += prob
if total > 2.5:
over_25 += prob
if total > 3.5:
over_35 += prob
return {
"over_15": over_15,
"over_25": over_25,
"over_35": over_35,
"under_15": 1 - over_15,
"under_25": 1 - over_25,
"under_35": 1 - over_35,
}
def calculate_btts(
self,
home_xg: float,
away_xg: float
) -> Tuple[float, float]:
"""
Karşılıklı Gol (Both Teams To Score) olasılığı.
"""
# P(Home scores at least 1) = 1 - P(Home scores 0)
home_scores = 1 - poisson_prob(home_xg, 0)
# P(Away scores at least 1) = 1 - P(Away scores 0)
away_scores = 1 - poisson_prob(away_xg, 0)
# P(BTTS) = P(Home scores) * P(Away scores)
btts_yes = home_scores * away_scores
btts_no = 1 - btts_yes
return btts_yes, btts_no
def get_most_likely_scores(
self,
home_xg: float,
away_xg: float,
top_n: int = 5
) -> list:
"""
En olası skorları getir.
"""
matrix = self.calculate_score_matrix(home_xg, away_xg)
# Olasılığa göre sırala
sorted_scores = sorted(matrix.items(), key=lambda x: x[1], reverse=True)
return [
{"score": f"{h}-{a}", "probability": round(prob * 100, 1)}
for (h, a), prob in sorted_scores[:top_n]
]
def predict(
self,
home_goals_avg: float,
home_conceded_avg: float,
away_goals_avg: float,
away_conceded_avg: float,
league_home_avg: float = None,
league_away_avg: float = None,
league_total_avg: float = None
) -> PoissonPrediction:
"""
Tam Poisson tahmini.
"""
prediction = PoissonPrediction()
# 1. xG hesapla
home_xg, away_xg = self.calculate_xg(
home_goals_avg, home_conceded_avg,
away_goals_avg, away_conceded_avg,
league_home_avg, league_away_avg, league_total_avg
)
prediction.home_xg = round(home_xg, 2)
prediction.away_xg = round(away_xg, 2)
prediction.total_xg = round(home_xg + away_xg, 2)
# 2. Maç sonucu
hw, d, aw = self.calculate_match_odds(home_xg, away_xg)
prediction.home_win_prob = round(hw, 3)
prediction.draw_prob = round(d, 3)
prediction.away_win_prob = round(aw, 3)
# 3. Alt/Üst
ou = self.calculate_over_under(home_xg, away_xg)
prediction.over_15_prob = round(ou["over_15"], 3)
prediction.over_25_prob = round(ou["over_25"], 3)
prediction.over_35_prob = round(ou["over_35"], 3)
prediction.under_15_prob = round(ou["under_15"], 3)
prediction.under_25_prob = round(ou["under_25"], 3)
prediction.under_35_prob = round(ou["under_35"], 3)
# 4. BTTS
btts_yes, btts_no = self.calculate_btts(home_xg, away_xg)
prediction.btts_yes_prob = round(btts_yes, 3)
prediction.btts_no_prob = round(btts_no, 3)
# 5. En olası skorlar
prediction.most_likely_scores = self.get_most_likely_scores(home_xg, away_xg)
return prediction
def get_features(
self,
home_goals_avg: float,
home_conceded_avg: float,
away_goals_avg: float,
away_conceded_avg: float,
league_home_avg: float = None,
league_away_avg: float = None,
league_total_avg: float = None
) -> Dict[str, float]:
"""
Model için feature dict.
"""
pred = self.predict(
home_goals_avg, home_conceded_avg,
away_goals_avg, away_conceded_avg,
league_home_avg, league_away_avg, league_total_avg
)
return {
"poisson_home_xg": pred.home_xg,
"poisson_away_xg": pred.away_xg,
"poisson_total_xg": pred.total_xg,
"poisson_home_win": pred.home_win_prob,
"poisson_draw": pred.draw_prob,
"poisson_away_win": pred.away_win_prob,
"poisson_over_15": pred.over_15_prob,
"poisson_over_25": pred.over_25_prob,
"poisson_over_35": pred.over_35_prob,
"poisson_btts_yes": pred.btts_yes_prob,
}
# Singleton
_engine_instance = None
def get_poisson_engine() -> PoissonEngine:
"""Singleton pattern"""
global _engine_instance
if _engine_instance is None:
_engine_instance = PoissonEngine()
return _engine_instance
# Test
if __name__ == "__main__":
engine = get_poisson_engine()
# Örnek: Güçlü ev sahibi vs zayıf deplasman
print("=" * 60)
print("POISSON ENGINE TEST")
print("Galatasaray (ev) vs Antalyaspor (deplasman)")
print("=" * 60)
pred = engine.predict(
home_goals_avg=2.1, # GS ev ortalaması
home_conceded_avg=0.8, # GS ev yenilen
away_goals_avg=0.9, # Antalya deplasman gol
away_conceded_avg=1.8, # Antalya deplasman yenilen
league_home_avg=1.5,
league_away_avg=1.1
)
print(f"\n📊 Expected Goals:")
print(f" Ev Sahibi xG: {pred.home_xg}")
print(f" Deplasman xG: {pred.away_xg}")
print(f" Toplam xG: {pred.total_xg}")
print(f"\n🎯 Maç Sonucu:")
print(f" 1 (Ev): {pred.home_win_prob*100:.1f}%")
print(f" X (Beraberlik): {pred.draw_prob*100:.1f}%")
print(f" 2 (Deplasman): {pred.away_win_prob*100:.1f}%")
print(f"\n⚽ Alt/Üst:")
print(f" 2.5 Üst: {pred.over_25_prob*100:.1f}%")
print(f" 2.5 Alt: {pred.under_25_prob*100:.1f}%")
print(f"\n🤝 Karşılıklı Gol:")
print(f" KG Var: {pred.btts_yes_prob*100:.1f}%")
print(f" KG Yok: {pred.btts_no_prob*100:.1f}%")
print(f"\n📈 En Olası Skorlar:")
for score_data in pred.most_likely_scores:
print(f" {score_data['score']}: {score_data['probability']}%")
+368
View File
@@ -0,0 +1,368 @@
"""
Referee Engine - V9 Feature
Hakem profilleri ve maç etki analizi.
Analiz Edilen Metrikler:
- Ortalama kart sayısı (sarı/kırmızı)
- Penaltı verme eğilimi
- Ev sahibi lehine karar oranı
- Maç başına toplam gol ortalaması
"""
import os
from typing import Dict, Optional, List
from dataclasses import dataclass, field
from datetime import datetime
try:
import psycopg2
from psycopg2.extras import RealDictCursor
except ImportError:
psycopg2 = None
@dataclass
class RefereeProfile:
"""Hakem profili"""
referee_name: str
matches_count: int = 0
# Kart istatistikleri
avg_yellow_cards: float = 0.0
avg_red_cards: float = 0.0
total_cards_per_match: float = 0.0
# Penaltı istatistikleri
penalty_rate: float = 0.0 # Penaltı verdiği maç oranı
# Ev sahibi eğilimi
home_win_rate: float = 0.0
home_bias: float = 0.0 # -1 (away bias) to +1 (home bias)
# Gol istatistikleri
avg_goals_per_match: float = 0.0
over_25_rate: float = 0.0
@dataclass
class RefereeFeatures:
"""Model için hakem feature'ları"""
referee_name: str = ""
referee_matches: int = 0
referee_avg_yellow: float = 0.0
referee_avg_red: float = 0.0
referee_cards_total: float = 0.0
referee_penalty_rate: float = 0.0
referee_home_bias: float = 0.0
referee_avg_goals: float = 0.0
referee_over25_rate: float = 0.0
referee_experience: float = 0.0 # 0-1 normalized
def to_dict(self) -> Dict[str, float]:
return {
'referee_matches': float(self.referee_matches),
'referee_avg_yellow': self.referee_avg_yellow,
'referee_avg_red': self.referee_avg_red,
'referee_cards_total': self.referee_cards_total,
'referee_penalty_rate': self.referee_penalty_rate,
'referee_home_bias': self.referee_home_bias,
'referee_avg_goals': self.referee_avg_goals,
'referee_over25_rate': self.referee_over25_rate,
'referee_experience': self.referee_experience,
}
class RefereeEngine:
"""
Hakem analiz motoru.
Hakemlerin geçmiş maçlarını analiz ederek:
- Kart eğilimlerini
- Ev sahibi bias'ını
- Gol ortalamasını
hesaplar.
"""
# Ana hakem rolü ID'si (genellikle 1 veya "Hakem")
MAIN_REFEREE_ROLE_ID = 1
def __init__(self):
self.conn = None
self._referee_cache: Dict[str, RefereeProfile] = {}
self._cache_loaded = False
def _connect_db(self):
if psycopg2 is None:
return None
try:
from data.db import get_clean_dsn
self.conn = psycopg2.connect(get_clean_dsn())
return self.conn
except Exception as e:
print(f"[RefereeEngine] DB connection failed: {e}")
return None
def get_conn(self):
if self.conn is None or self.conn.closed:
self._connect_db()
return self.conn
def _get_main_referee_role_id(self) -> int:
"""Ana hakem rolü ID'sini bul"""
conn = self.get_conn()
if conn is None:
return self.MAIN_REFEREE_ROLE_ID
try:
with conn.cursor() as cur:
cur.execute("""
SELECT id FROM official_roles
WHERE LOWER(name) LIKE '%%hakem%%'
AND LOWER(name) NOT LIKE '%%yardımcı%%'
AND LOWER(name) NOT LIKE '%%dördüncü%%'
LIMIT 1
""")
result = cur.fetchone()
if result:
return result[0]
except Exception:
pass
return self.MAIN_REFEREE_ROLE_ID
def get_referee_for_match(self, match_id: str) -> Optional[str]:
"""Maçın ana hakemini bul"""
conn = self.get_conn()
if conn is None:
return None
try:
main_role_id = self._get_main_referee_role_id()
with conn.cursor() as cur:
cur.execute("""
SELECT name FROM match_officials
WHERE match_id = %s AND role_id = %s
LIMIT 1
""", (match_id, main_role_id))
result = cur.fetchone()
return result[0] if result else None
except Exception as e:
print(f"[RefereeEngine] Error getting referee: {e}")
return None
def calculate_referee_profile(self, referee_name: str, league_id: str = None) -> RefereeProfile:
"""Hakemin maçlarını analiz et. league_id verilirse sadece o ligteki maçları kullanır."""
# Composite cache key — aynı isim farklı liglerde farklı profil
cache_key = (referee_name, league_id)
if cache_key in self._referee_cache:
return self._referee_cache[cache_key]
profile = RefereeProfile(referee_name=referee_name)
conn = self.get_conn()
if conn is None:
return profile
try:
main_role_id = self._get_main_referee_role_id()
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Bu hakemin yönettiği maçları al (league_id varsa sadece o lig)
if league_id:
cur.execute("""
SELECT m.id, m.score_home, m.score_away, m.home_team_id, m.away_team_id
FROM matches m
JOIN match_officials mo ON m.id = mo.match_id
WHERE mo.name = %s
AND mo.role_id = %s
AND m.league_id = %s
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
ORDER BY m.mst_utc DESC
LIMIT 100
""", (referee_name, main_role_id, league_id))
else:
cur.execute("""
SELECT m.id, m.score_home, m.score_away, m.home_team_id, m.away_team_id
FROM matches m
JOIN match_officials mo ON m.id = mo.match_id
WHERE mo.name = %s
AND mo.role_id = %s
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
ORDER BY m.mst_utc DESC
LIMIT 100
""", (referee_name, main_role_id))
matches = cur.fetchall()
profile.matches_count = len(matches)
if profile.matches_count == 0:
return profile
match_ids = [m['id'] for m in matches]
# Kart istatistikleri
cur.execute("""
SELECT
COUNT(*) FILTER (WHERE event_subtype ILIKE '%%yellow%%') as yellow_count,
COUNT(*) FILTER (WHERE event_subtype ILIKE '%%red%%' OR event_subtype ILIKE '%%second%%') as red_count
FROM match_player_events
WHERE match_id = ANY(%s) AND event_type = 'card'
""", (match_ids,))
card_stats = cur.fetchone()
if card_stats:
profile.avg_yellow_cards = (card_stats['yellow_count'] or 0) / profile.matches_count
profile.avg_red_cards = (card_stats['red_count'] or 0) / profile.matches_count
profile.total_cards_per_match = profile.avg_yellow_cards + profile.avg_red_cards
# Penaltı istatistikleri
cur.execute("""
SELECT COUNT(DISTINCT match_id) as penalty_matches
FROM match_player_events
WHERE match_id = ANY(%s)
AND event_type = 'goal'
AND event_subtype ILIKE '%%penaltı%%'
""", (match_ids,))
penalty_stats = cur.fetchone()
if penalty_stats:
profile.penalty_rate = (penalty_stats['penalty_matches'] or 0) / profile.matches_count
# Ev sahibi eğilimi ve gol ortalaması
home_wins = 0
away_wins = 0
draws = 0
total_goals = 0
over_25_count = 0
for m in matches:
goals = (m['score_home'] or 0) + (m['score_away'] or 0)
total_goals += goals
if goals > 2.5:
over_25_count += 1
if m['score_home'] > m['score_away']:
home_wins += 1
elif m['score_home'] < m['score_away']:
away_wins += 1
else:
draws += 1
profile.avg_goals_per_match = total_goals / profile.matches_count
profile.over_25_rate = over_25_count / profile.matches_count
profile.home_win_rate = home_wins / profile.matches_count
# Home bias: -1 (away favors) to +1 (home favors)
# Normal lig ortalaması ~%46 ev sahibi, buna göre normalize
expected_home_rate = 0.46
profile.home_bias = (profile.home_win_rate - expected_home_rate) * 2
profile.home_bias = max(-1, min(1, profile.home_bias))
# Cache'e ekle
self._referee_cache[cache_key] = profile
return profile
except Exception as e:
print(f"[RefereeEngine] Error calculating profile: {e}")
return profile
def get_features(self, match_id: str, league_id: str = None) -> Dict[str, float]:
"""
Maç için hakem feature'larını hesapla.
Args:
match_id: Maç ID'si
league_id: Lig ID'si (opsiyonel — isim çakışmalarını önlemek için)
Returns:
Hakem feature'ları dict olarak
"""
features = RefereeFeatures()
# Hakemi bul
referee_name = self.get_referee_for_match(match_id)
if referee_name is None:
return features.to_dict()
features.referee_name = referee_name
# Profili hesapla (league_id ile scope'lanmış)
profile = self.calculate_referee_profile(referee_name, league_id=league_id)
features.referee_matches = profile.matches_count
features.referee_avg_yellow = profile.avg_yellow_cards
features.referee_avg_red = profile.avg_red_cards
features.referee_cards_total = profile.total_cards_per_match
features.referee_penalty_rate = profile.penalty_rate
features.referee_home_bias = profile.home_bias
features.referee_avg_goals = profile.avg_goals_per_match
features.referee_over25_rate = profile.over_25_rate
# Deneyim: 50+ maç = 1.0, 0 maç = 0.0
features.referee_experience = min(profile.matches_count / 50, 1.0)
return features.to_dict()
def get_features_by_name(self, referee_name: str, league_id: str = None) -> Dict[str, float]:
"""
Hakem ismiyle feature'ları hesapla.
Args:
referee_name: Hakem ismi
league_id: Lig ID'si (opsiyonel — isim çakışmalarını önlemek için)
Returns:
Hakem feature'ları dict olarak
"""
features = RefereeFeatures()
if not referee_name:
return features.to_dict()
features.referee_name = referee_name
profile = self.calculate_referee_profile(referee_name, league_id=league_id)
features.referee_matches = profile.matches_count
features.referee_avg_yellow = profile.avg_yellow_cards
features.referee_avg_red = profile.avg_red_cards
features.referee_cards_total = profile.total_cards_per_match
features.referee_penalty_rate = profile.penalty_rate
features.referee_home_bias = profile.home_bias
features.referee_avg_goals = profile.avg_goals_per_match
features.referee_over25_rate = profile.over_25_rate
features.referee_experience = min(profile.matches_count / 50, 1.0)
return features.to_dict()
# Singleton instance
_engine: Optional[RefereeEngine] = None
def get_referee_engine() -> RefereeEngine:
"""Singleton referee engine instance döndür"""
global _engine
if _engine is None:
_engine = RefereeEngine()
return _engine
if __name__ == "__main__":
# Test
engine = get_referee_engine()
print("\n🧪 Referee Engine Test")
print("=" * 50)
# Test with a known referee name
test_referee = "Cüneyt Çakır"
features = engine.get_features_by_name(test_referee)
print(f"\n📊 Hakem: {test_referee}")
for key, value in features.items():
print(f" {key}: {value:.3f}")
+408
View File
@@ -0,0 +1,408 @@
"""
Sidelined Analyzer — Injury & Suspension Impact Calculator
==========================================================
Parses sidelined JSON from live_matches and calculates
position-weighted missing player impact using ACTUAL player
statistics from the database (goals, assists, starting frequency).
Senior ML Engineer Principle: No magic numbers — all weights from config.
Data Quality: Cross-reference sidelined IDs with DB for real impact.
"""
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any, Tuple
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
import psycopg2
from psycopg2.extras import RealDictCursor
except ImportError:
psycopg2 = None
from config.config_loader import get_config
@dataclass
class PlayerImpactDetail:
"""Impact detail for a single sidelined player."""
player_id: str
player_name: str
position: str
impact_score: float
db_goals: int = 0
db_assists: int = 0
db_starts: int = 0
db_rating: float = 0.0 # Calculated from DB stats
is_key_player: bool = False
adaptation_applied: bool = False
@dataclass
class SidelinedImpact:
"""Impact analysis of sidelined players for one team."""
total_sidelined: int = 0
impact_score: float = 0.0 # 0.0 - 1.0 (normalized)
key_position_missing: bool = False # GK or 2+ same position missing
key_players_missing: int = 0 # How many key players are missing
position_breakdown: Dict[str, int] = field(default_factory=dict)
player_details: List[PlayerImpactDetail] = field(default_factory=list)
details: List[str] = field(default_factory=list)
class SidelinedAnalyzer:
"""
Analyzes sidelined player data with DB-backed statistics.
Impact formula per player:
player_impact = position_weight × db_rating_factor × adaptation_factor
Where:
- position_weight: from config (GK most critical)
- db_rating_factor: calculated from actual goals + assists + starts (not mackolik average!)
- adaptation_factor: 1.0 if recent injury, discounted if team adapted (many matches missed)
DB Query: Cross-references sidelined player IDs with match_player_events
to get real goals/assists from recent matches.
"""
def __init__(self):
self.config = get_config()
self.conn = None
self._load_config()
self._connect_db()
def _load_config(self):
"""Load all config values once at init."""
cfg = self.config
self.position_weights = cfg.get("sidelined.position_weights", {
"K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30
})
self.max_rating = cfg.get("sidelined.max_rating", 10)
self.adaptation_threshold = cfg.get("sidelined.adaptation_threshold", 10)
self.adaptation_discount = cfg.get("sidelined.adaptation_discount", 0.5)
self.goalkeeper_penalty = cfg.get("sidelined.goalkeeper_penalty", 0.15)
self.confidence_boost = cfg.get("sidelined.confidence_boost", 10)
self.max_impact = cfg.get("sidelined.max_impact", 0.85)
self.key_player_threshold = cfg.get("sidelined.key_player_threshold", 3)
self.recent_matches_lookback = cfg.get("sidelined.recent_matches_lookback", 15)
@staticmethod
def _safe_int(value: Any, default: int = 0) -> int:
try:
if value is None or value == "":
return default
return int(float(value))
except (TypeError, ValueError):
return default
@staticmethod
def _safe_float(value: Any, default: float = 0.0) -> float:
try:
if value is None or value == "":
return default
return float(value)
except (TypeError, ValueError):
return default
def _connect_db(self):
"""Lazy DB connection following existing engine patterns."""
if psycopg2 is None:
return
try:
from data.db import get_clean_dsn
self.conn = psycopg2.connect(get_clean_dsn())
except Exception as e:
print(f"[SidelinedAnalyzer] DB connection failed: {e}")
self.conn = None
def _get_conn(self):
"""Get or reconnect DB connection."""
if self.conn is None or self.conn.closed:
self._connect_db()
return self.conn
def _fetch_player_stats(self, player_ids: List[str]) -> Dict[str, Dict]:
"""
Fetch real player statistics from DB for given player IDs.
Returns dict keyed by player_id with:
goals: int, assists: int, starts: int, matches: int
"""
conn = self._get_conn()
if not conn or not player_ids:
return {}
stats = {}
try:
cur = conn.cursor(cursor_factory=RealDictCursor)
# 1. Goals from match_player_events + Assists via assist_player_id
cur.execute("""
SELECT
sub.player_id,
SUM(sub.goals) AS goals,
SUM(sub.assists) AS assists
FROM (
-- Goals: player scored
SELECT mpe.player_id,
COUNT(*) AS goals,
0 AS assists
FROM match_player_events mpe
JOIN matches m ON mpe.match_id = m.id
WHERE mpe.player_id = ANY(%s)
AND mpe.event_type = 'goal'
AND m.status = 'FT'
GROUP BY mpe.player_id
UNION ALL
-- Assists: player assisted
SELECT mpe.assist_player_id AS player_id,
0 AS goals,
COUNT(*) AS assists
FROM match_player_events mpe
JOIN matches m ON mpe.match_id = m.id
WHERE mpe.assist_player_id = ANY(%s)
AND mpe.event_type = 'goal'
AND m.status = 'FT'
GROUP BY mpe.assist_player_id
) sub
GROUP BY sub.player_id
""", (player_ids, player_ids))
for row in cur.fetchall():
pid = row["player_id"]
stats[pid] = {
"goals": row["goals"] or 0,
"assists": row["assists"] or 0,
"starts": 0,
"matches": 0
}
# 2. Starting frequency from match_player_participation
cur.execute("""
SELECT
mpp.player_id,
COUNT(*) AS total_matches,
COUNT(*) FILTER (WHERE mpp.is_starting = true) AS starts
FROM match_player_participation mpp
JOIN matches m ON mpp.match_id = m.id
WHERE mpp.player_id = ANY(%s)
AND m.status = 'FT'
GROUP BY mpp.player_id
""", (player_ids,))
for row in cur.fetchall():
pid = row["player_id"]
if pid not in stats:
stats[pid] = {"goals": 0, "assists": 0, "starts": 0, "matches": 0}
stats[pid]["starts"] = row["starts"] or 0
stats[pid]["matches"] = row["total_matches"] or 0
cur.close()
except Exception as e:
print(f"[SidelinedAnalyzer] DB query error: {e}")
try:
conn.rollback()
except Exception:
pass
return stats
def _calculate_db_rating(self, db_stats: Dict, position: str) -> float:
"""
Calculate player rating from DB statistics.
Rating is 0.0 - 1.0, where 1.0 = absolute key player.
Factors:
- Goals (weighted by position: Forwards value more, Defenders less)
- Assists
- Starting frequency (regulars > squad players)
"""
def _to_float(value: Any, default: float = 0.0) -> float:
try:
return float(value)
except (TypeError, ValueError):
return default
goals = _to_float(db_stats.get("goals", 0))
assists = _to_float(db_stats.get("assists", 0))
starts = _to_float(db_stats.get("starts", 0))
matches = _to_float(db_stats.get("matches", 0))
# Goal contribution weight by position
# Forwards: goals matter most
# Midfielders: balanced
# Defenders: starts matter more than goals
# Goalkeeper: starts are everything
goal_weight = {"F": 0.5, "O": 0.35, "D": 0.15, "K": 0.05}.get(position, 0.25)
assist_weight = {"F": 0.2, "O": 0.3, "D": 0.15, "K": 0.0}.get(position, 0.15)
start_weight = {"F": 0.3, "O": 0.35, "D": 0.7, "K": 0.95}.get(position, 0.5)
# Normalize each component to 0-1
# Goals: 5+ goals in recent matches = max
goal_factor = min(goals / 5.0, 1.0) if goals > 0 else 0.0
# Assists: 4+ assists = max
assist_factor = min(assists / 4.0, 1.0) if assists > 0 else 0.0
# Starts: 80%+ start rate = max regular
start_rate = starts / max(matches, 1)
start_factor = min(start_rate / 0.8, 1.0)
rating = (goal_factor * goal_weight +
assist_factor * assist_weight +
start_factor * start_weight)
return round(min(rating, 1.0), 4)
def analyze(self, team_data: Optional[Dict[str, Any]]) -> SidelinedImpact:
"""
Analyze sidelined data for a single team using DB-backed stats.
Args:
team_data: dict with 'players' list and 'totalSidelined' count.
Returns:
SidelinedImpact with calculated impact score and breakdown.
"""
if not team_data or not isinstance(team_data, dict):
return SidelinedImpact()
players = team_data.get("players", [])
if not players:
return SidelinedImpact(
total_sidelined=team_data.get("totalSidelined", 0)
)
# Collect player IDs for batch DB query
player_ids = [p.get("playerId", "") for p in players if p.get("playerId")]
# Batch fetch DB stats (single query, not N+1)
db_stats = self._fetch_player_stats(player_ids) if player_ids else {}
total_impact = 0.0
position_counts: Dict[str, int] = {}
player_details: List[PlayerImpactDetail] = []
details: List[str] = []
has_gk_missing = False
key_players_count = 0
for player in players:
if not isinstance(player, dict):
continue
pos = player.get("positionShort", "O")
name = player.get("playerName", "Unknown")
pid = player.get("playerId", "")
matches_missed = self._safe_int(player.get("matchesMissed", 0), 0)
player_type = player.get("type", "other")
mackolik_avg = self._safe_float(player.get("average", 0), 0.0)
position_counts[pos] = position_counts.get(pos, 0) + 1
if pos == "K":
has_gk_missing = True
# === Rating: DB first, mackolik fallback ===
p_db_stats = db_stats.get(pid, {})
if p_db_stats:
# Use real DB stats
db_rating = self._calculate_db_rating(p_db_stats, pos)
else:
# Fallback to mackolik average (normalized)
db_rating = min(mackolik_avg / self.max_rating, 1.0) if self.max_rating > 0 else 0.3
db_rating = max(db_rating, 0.15) # Minimum floor
# Key player check
is_key = db_rating >= 0.5 or (
self._safe_int(p_db_stats.get("goals", 0), 0) >= self.key_player_threshold
)
if is_key:
key_players_count += 1
# === Impact Calculation ===
pos_weight = self.position_weights.get(pos, 0.20)
# Rating factor: higher rated = bigger loss
rating_factor = max(db_rating, 0.15) # Even unknown players have minimum impact
# Adaptation: team has coped if player missed many matches
adapted = matches_missed >= self.adaptation_threshold
adapt_factor = self.adaptation_discount if adapted else 1.0
# Type factor
type_factor = 1.0 if player_type == "injury" else 0.8
player_impact = pos_weight * rating_factor * adapt_factor * type_factor
total_impact += player_impact
detail = PlayerImpactDetail(
player_id=pid,
player_name=name,
position=pos,
impact_score=round(player_impact, 4),
db_goals=p_db_stats.get("goals", 0),
db_assists=p_db_stats.get("assists", 0),
db_starts=p_db_stats.get("starts", 0),
db_rating=db_rating,
is_key_player=is_key,
adaptation_applied=adapted
)
player_details.append(detail)
db_info = f"G:{detail.db_goals} A:{detail.db_assists} S:{detail.db_starts}" if p_db_stats else "no DB data"
details.append(
f"{name} ({pos}, db_rating:{db_rating:.2f}, {db_info}) → impact:{player_impact:.3f}"
+ (" ⭐ KEY" if is_key else "")
+ (f" [adapted, {matches_missed} missed]" if adapted else "")
)
# GK penalty bonus
if has_gk_missing:
total_impact += self.goalkeeper_penalty
key_position_missing = has_gk_missing or any(v >= 2 for v in position_counts.values())
# Normalize to 0-1 range
normalization_cap = 1.5
normalized_impact = min(total_impact / normalization_cap, self.max_impact)
return SidelinedImpact(
total_sidelined=len(players),
impact_score=round(normalized_impact, 4),
key_position_missing=key_position_missing,
key_players_missing=key_players_count,
position_breakdown=position_counts,
player_details=player_details,
details=details
)
def analyze_match(self, sidelined_json: Optional[Dict[str, Any]]) -> Tuple[SidelinedImpact, SidelinedImpact]:
"""
Analyze sidelined data for both teams.
Returns:
(home_impact, away_impact)
"""
if not sidelined_json or not isinstance(sidelined_json, dict):
return SidelinedImpact(), SidelinedImpact()
home_impact = self.analyze(sidelined_json.get("homeTeam"))
away_impact = self.analyze(sidelined_json.get("awayTeam"))
return home_impact, away_impact
# Singleton
_analyzer: Optional[SidelinedAnalyzer] = None
def get_sidelined_analyzer() -> SidelinedAnalyzer:
global _analyzer
if _analyzer is None:
_analyzer = SidelinedAnalyzer()
return _analyzer
+357
View File
@@ -0,0 +1,357 @@
"""
Smart Bet Recommender
=====================
Skor tahminine göre akıllı bahis önerileri yapan sistem.
Örnek: Beşiktaş-Galatasaray için model 3-1 tahmin ediyor
→ DÜŞÜK RİSK: 1.5 Üst (yüksek ihtimal tutar)
→ ORTA RİSK: MS 1 + 2.5 Üst (orta ihtimal)
→ YÜKSEK RİSK: 3.5 Üst veya skor 3-1 (düşük ihtimal, yüksek kazanç)
Ayrıca kombinasyonlar:
- MS 1 + 1.5 Üst
- MS 1 + KG Var
- Her iki takım skor > 0.5 (her takım en az 1 gol atar)
"""
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
from enum import Enum
class RiskLevel(Enum):
LOW = "LOW" # Yüksek olasılık, düşük oran (güvenli)
MEDIUM = "MEDIUM" # Orta olasılık, orta oran
HIGH = "HIGH" # Düşük olasılık, yüksek kazanç
EXTREME = "EXTREME" # Çok düşük olasılık, çok yüksek kazanç
@dataclass
class BetRecommendation:
"""Tek bir bahis önerisi"""
market: str # Piyasa adı (örn: "MS 1", "2.5 Üst")
pick: str # Seçim (örn: "1", "OVER", "YES")
odds: float # Oran
probability: float # Model olasılığı (0-1)
confidence: float # Güven seviyesi (0-100)
risk_level: RiskLevel
def to_dict(self) -> dict:
return {
"market": self.market,
"pick": self.pick,
"odds": self.odds,
"probability": round(self.probability * 100, 1),
"confidence": round(self.confidence, 1),
"risk_level": self.risk_level.value
}
@dataclass
class MatchPredictionSet:
"""Bir maç için tüm tahmin seti"""
match_name: str
predicted_score: Tuple[int, int] # (home, away)
home_win_prob: float
draw_prob: float
away_win_prob: float
over_15_prob: float
over_25_prob: float
over_35_prob: float
btts_yes_prob: float
# Öneriler
low_risk_bets: List[BetRecommendation]
medium_risk_bets: List[BetRecommendation]
high_risk_bets: List[BetRecommendation]
extreme_risk_bets: List[BetRecommendation]
def to_dict(self) -> dict:
return {
"match_name": self.match_name,
"predicted_score": f"{self.predicted_score[0]}-{self.predicted_score[1]}",
"probs": {
"home_win": round(self.home_win_prob * 100, 1),
"draw": round(self.draw_prob * 100, 1),
"away_win": round(self.away_win_prob * 100, 1),
"over_15": round(self.over_15_prob * 100, 1),
"over_25": round(self.over_25_prob * 100, 1),
"over_35": round(self.over_35_prob * 100, 1),
"btts": round(self.btts_yes_prob * 100, 1)
},
"low_risk": [b.to_dict() for b in self.low_risk_bets],
"medium_risk": [b.to_dict() for b in self.medium_risk_bets],
"high_risk": [b.to_dict() for b in self.high_risk_bets],
"extreme_risk": [b.to_dict() for b in self.extreme_risk_bets]
}
class SmartBetRecommender:
"""
Akıllı Bahis Öneri Sistemi
Skor tahminine göre farklı risk seviyelerinde bahisler önerir.
Mantık:
1. DÜŞÜK RİSK: Yüksek olasılıklı (>70%), düşük oranlı bahisler
- 1.5 Üst
- Double Chance
- Favori takım gol atar
2. ORTA RİSK: Orta olasılıklı (50-70%), orta oranlı bahisler
- MS favori
- 2.5 Üst
- KG Var/Var
3. YÜKSEK RİSK: Düşük olasılıklı (30-50%), yüksek oranlı bahisler
- 3.5 Üst
- Skor tahmini
- Handikap
4. EXTREME RİSK: Çok düşük olasılıklı (<30%), çok yüksek oranlı
- Tam skor
- Uzunluklu kombinasyonlar
"""
# Olasılık eşikleri
PROB_LOW_RISK = 0.70 # > %70 olasılık
PROB_MEDIUM_RISK = 0.50 # %50-70 olasılık
PROB_HIGH_RISK = 0.30 # %30-50 olasılık
# < %30 = EXTREME
def __init__(self):
pass
def _determine_risk(self, probability: float) -> RiskLevel:
"""Olasılığa göre risk seviyesi belirle"""
if probability >= self.PROB_LOW_RISK:
return RiskLevel.LOW
elif probability >= self.PROB_MEDIUM_RISK:
return RiskLevel.MEDIUM
elif probability >= self.PROB_HIGH_RISK:
return RiskLevel.HIGH
else:
return RiskLevel.EXTREME
def _get_favorite(self, home_prob: float, draw_prob: float, away_prob: float) -> Tuple[str, float]:
"""Favori sonucu ve olasılığını döndür"""
if home_prob >= draw_prob and home_prob >= away_prob:
return "1", home_prob
elif away_prob >= home_prob and away_prob >= draw_prob:
return "2", away_prob
else:
return "X", draw_prob
def _calculate_expected_goals(self, predicted_score: Tuple[int, int]) -> float:
"""Tahmin edilen skora göre beklenen gol sayısı"""
return predicted_score[0] + predicted_score[1]
def recommend(
self,
match_name: str,
predicted_score: Tuple[int, int],
probs: Dict[str, float],
odds: Dict[str, float]
) -> MatchPredictionSet:
"""
Maç için tüm bahis önerilerini oluştur.
Args:
match_name: Maç adı
predicted_score: (home_goals, away_goals)
probs: {"home_win": 0.55, "draw": 0.25, "away_win": 0.20,
"over_15": 0.85, "over_25": 0.65, "over_35": 0.35,
"btts_yes": 0.55}
odds: {"1": 1.80, "X": 3.50, "2": 4.20,
"ou15_o": 1.25, "ou15_u": 3.80,
"ou25_o": 1.90, "ou25_u": 1.85,
"ou35_o": 3.20, "ou35_u": 1.30,
"btts_y": 1.75, "btts_n": 2.00}
Returns:
MatchPredictionSet with all recommendations
"""
home_prob = probs.get("home_win", 0.33)
draw_prob = probs.get("draw", 0.33)
away_prob = probs.get("away_win", 0.33)
over_15_prob = probs.get("over_15", 0.70)
over_25_prob = probs.get("over_25", 0.50)
over_35_prob = probs.get("over_35", 0.30)
btts_prob = probs.get("btts_yes", 0.50)
# Beklenen goller
expected_goals = self._calculate_expected_goals(predicted_score)
# Favori
favorite, favorite_prob = self._get_favorite(home_prob, draw_prob, away_prob)
# Önerileri oluştur
low_risk = []
medium_risk = []
high_risk = []
extreme_risk = []
# ========== DÜŞÜK RİSK ÖNERİLERİ ==========
# 1.5 Üst (en güvenli)
if over_15_prob >= self.PROB_LOW_RISK:
low_risk.append(BetRecommendation(
market="1.5 Üst/Alt",
pick="OVER",
odds=odds.get("ou15_o", 1.25),
probability=over_15_prob,
confidence=over_15_prob * 100,
risk_level=RiskLevel.LOW
))
# Double Chance
if home_prob > away_prob:
dc_prob = home_prob + draw_prob
if dc_prob >= self.PROB_LOW_RISK:
low_risk.append(BetRecommendation(
market="Double Chance",
pick="1X",
odds=odds.get("dc_1x", 1.30),
probability=dc_prob,
confidence=dc_prob * 100,
risk_level=RiskLevel.LOW
))
elif away_prob > home_prob:
dc_prob = away_prob + draw_prob
if dc_prob >= self.PROB_LOW_RISK:
low_risk.append(BetRecommendation(
market="Double Chance",
pick="X2",
odds=odds.get("dc_x2", 1.30),
probability=dc_prob,
confidence=dc_prob * 100,
risk_level=RiskLevel.LOW
))
# ========== ORTA RİSK ÖNERİLERİ ==========
# MS Favori
if self.PROB_MEDIUM_RISK <= favorite_prob < self.PROB_LOW_RISK:
medium_risk.append(BetRecommendation(
market="Maç Sonucu",
pick=favorite,
odds=odds.get(favorite, 2.00),
probability=favorite_prob,
confidence=favorite_prob * 100,
risk_level=RiskLevel.MEDIUM
))
# 2.5 Üst
if self.PROB_MEDIUM_RISK <= over_25_prob < self.PROB_LOW_RISK:
medium_risk.append(BetRecommendation(
market="2.5 Üst/Alt",
pick="OVER",
odds=odds.get("ou25_o", 1.90),
probability=over_25_prob,
confidence=over_25_prob * 100,
risk_level=RiskLevel.MEDIUM
))
# KG Var
if self.PROB_MEDIUM_RISK <= btts_prob < self.PROB_LOW_RISK:
medium_risk.append(BetRecommendation(
market="Karşılıklı Gol",
pick="YES",
odds=odds.get("btts_y", 1.75),
probability=btts_prob,
confidence=btts_prob * 100,
risk_level=RiskLevel.MEDIUM
))
# MS + 2.5 Üst kombinasyonu
if favorite_prob >= 0.45 and over_25_prob >= 0.50:
combo_prob = favorite_prob * over_25_prob # Basit çarpım
combo_odds = odds.get(favorite, 2.00) * odds.get("ou25_o", 1.90)
if combo_prob >= 0.30: # En az %30 olasılık
medium_risk.append(BetRecommendation(
market=f"MS {favorite} + 2.5 Üst",
pick=f"{favorite} & OVER",
odds=combo_odds,
probability=combo_prob,
confidence=combo_prob * 100,
risk_level=RiskLevel.MEDIUM
))
# ========== YÜKSEK RİSK ÖNERİLERİ ==========
# 3.5 Üst
if self.PROB_HIGH_RISK <= over_35_prob < self.PROB_MEDIUM_RISK:
high_risk.append(BetRecommendation(
market="3.5 Üst/Alt",
pick="OVER",
odds=odds.get("ou35_o", 3.20),
probability=over_35_prob,
confidence=over_35_prob * 100,
risk_level=RiskLevel.HIGH
))
# Skor tahmini (yüksek skorlu maçlar için)
if expected_goals >= 3.5:
score_str = f"{predicted_score[0]}-{predicted_score[1]}"
# Skor olasılığı tahmini (basit model)
score_prob = 0.15 if expected_goals <= 4 else 0.10
high_risk.append(BetRecommendation(
market="Tam Skor",
pick=score_str,
odds=8.0, # Tahmini oran
probability=score_prob,
confidence=score_prob * 100,
risk_level=RiskLevel.HIGH
))
# MS + 3.5 Üst
if favorite_prob >= 0.40 and over_35_prob >= 0.30:
combo_prob = favorite_prob * over_35_prob
combo_odds = odds.get(favorite, 2.00) * odds.get("ou35_o", 3.20)
high_risk.append(BetRecommendation(
market=f"MS {favorite} + 3.5 Üst",
pick=f"{favorite} & OVER",
odds=combo_odds,
probability=combo_prob,
confidence=combo_prob * 100,
risk_level=RiskLevel.HIGH
))
# ========== EXTREME RİSK ÖNERİLERİ ==========
# Uzun kombinasyonlar
if favorite_prob >= 0.50 and btts_prob >= 0.50 and over_25_prob >= 0.60:
combo_prob = favorite_prob * btts_prob * over_25_prob
combo_odds = odds.get(favorite, 2.00) * odds.get("btts_y", 1.75) * odds.get("ou25_o", 1.90)
if combo_prob >= 0.15: # En az %15 olasılık
extreme_risk.append(BetRecommendation(
market=f"MS {favorite} + KG Var + 2.5 Üst",
pick=f"{favorite} & BTTS & OVER",
odds=combo_odds,
probability=combo_prob,
confidence=combo_prob * 100,
risk_level=RiskLevel.EXTREME
))
return MatchPredictionSet(
match_name=match_name,
predicted_score=predicted_score,
home_win_prob=home_prob,
draw_prob=draw_prob,
away_win_prob=away_prob,
over_15_prob=over_15_prob,
over_25_prob=over_25_prob,
over_35_prob=over_35_prob,
btts_yes_prob=btts_prob,
low_risk_bets=low_risk,
medium_risk_bets=medium_risk,
high_risk_bets=high_risk,
extreme_risk_bets=extreme_risk
)
# Singleton
_recommender = None
def get_smart_bet_recommender() -> SmartBetRecommender:
global _recommender
if _recommender is None:
_recommender = SmartBetRecommender()
return _recommender
+582
View File
@@ -0,0 +1,582 @@
"""
Squad Analysis Engine - V9 Feature
Kadro ve oyuncu bazlı analiz.
Analiz Edilen Metrikler:
- İlk 11 kalitesi (golcü formu, key player)
- Yedek gücü
- Eksik oyuncu etkisi
- Pozisyon bazlı güç
- Takım içi golcü dağılımı
"""
import os
from typing import Dict, Optional, List, Tuple
from dataclasses import dataclass, field
from datetime import datetime
from collections import defaultdict
try:
import psycopg2
from psycopg2.extras import RealDictCursor
except ImportError:
psycopg2 = None
@dataclass
class PlayerForm:
"""Oyuncu form bilgisi"""
player_id: str
player_name: str
goals_last_5: int = 0
assists_last_5: int = 0
minutes_last_5: int = 0
cards_last_5: int = 0
is_key_player: bool = False # Golcü veya sık oynayan
@dataclass
class SquadAnalysis:
"""Takım kadro analizi"""
team_id: str
team_name: str = ""
# İlk 11 bilgisi
starting_count: int = 0
sub_count: int = 0
total_squad: int = 0
# Pozisyon dağılımı
goalkeeper_count: int = 0
defender_count: int = 0
midfielder_count: int = 0
forward_count: int = 0
# Form metrikleri
total_goals_last_5: int = 0 # Kadrodaki oyuncuların son 5 maçtaki golleri
total_assists_last_5: int = 0
key_players_count: int = 0 # Golcü sayısı
key_player_missing: int = 0 # Eksik golcü
# Kalite metrikleri
avg_minutes_per_player: float = 0.0 # Ortalama oynama süresi
squad_experience: float = 0.0 # 0-1, takımla oynama deneyimi
rotation_rate: float = 0.0 # Kadro rotasyonu oranı
@dataclass
class SquadFeatures:
"""Model için kadro feature'ları"""
# Home team features
home_starting_11: int = 11
home_sub_count: int = 7
home_total_squad: int = 18
home_goalkeepers: int = 1
home_defenders: int = 4
home_midfielders: int = 4
home_forwards: int = 2
home_goals_last_5: int = 0
home_assists_last_5: int = 0
home_key_players: int = 0
home_squad_experience: float = 0.5
# Away team features
away_starting_11: int = 11
away_sub_count: int = 7
away_total_squad: int = 18
away_goalkeepers: int = 1
away_defenders: int = 4
away_midfielders: int = 4
away_forwards: int = 2
away_goals_last_5: int = 0
away_assists_last_5: int = 0
away_key_players: int = 0
away_squad_experience: float = 0.5
# Comparison features
squad_strength_diff: float = 0.0 # + = home stronger
goals_form_diff: float = 0.0
key_players_diff: int = 0
def to_dict(self) -> Dict[str, float]:
return {
# Home
'home_starting_11': float(self.home_starting_11),
'home_sub_count': float(self.home_sub_count),
'home_total_squad': float(self.home_total_squad),
'home_goalkeepers': float(self.home_goalkeepers),
'home_defenders': float(self.home_defenders),
'home_midfielders': float(self.home_midfielders),
'home_forwards': float(self.home_forwards),
'home_goals_last_5': float(self.home_goals_last_5),
'home_assists_last_5': float(self.home_assists_last_5),
'home_key_players': float(self.home_key_players),
'home_squad_experience': self.home_squad_experience,
# Away
'away_starting_11': float(self.away_starting_11),
'away_sub_count': float(self.away_sub_count),
'away_total_squad': float(self.away_total_squad),
'away_goalkeepers': float(self.away_goalkeepers),
'away_defenders': float(self.away_defenders),
'away_midfielders': float(self.away_midfielders),
'away_forwards': float(self.away_forwards),
'away_goals_last_5': float(self.away_goals_last_5),
'away_assists_last_5': float(self.away_assists_last_5),
'away_key_players': float(self.away_key_players),
'away_squad_experience': self.away_squad_experience,
# Diffs
'squad_strength_diff': self.squad_strength_diff,
'goals_form_diff': self.goals_form_diff,
'key_players_diff': float(self.key_players_diff),
}
class SquadAnalysisEngine:
"""
Kadro ve oyuncu analiz motoru.
Beşiktaş-Galatasaray maçı için:
- İlk 11'deki oyuncuların son 5 maçtaki gol/asist
- Key player tespiti (çok gol atan oyuncular)
- Pozisyon dağılımı (4-3-3, 4-4-2 vb.)
- Yedek kalitesi
hesaplar.
"""
# Pozisyon mapping
POSITION_MAP = {
'goalkeeper': 'GK',
'gk': 'GK',
'kaleci': 'GK',
'defender': 'DEF',
'def': 'DEF',
'defans': 'DEF',
'savunma': 'DEF',
'midfielder': 'MID',
'mid': 'MID',
'orta saha': 'MID',
'forward': 'FWD',
'fwd': 'FWD',
'forvet': 'FWD',
'striker': 'FWD',
}
def __init__(self):
self.conn = None
self._player_form_cache: Dict[str, PlayerForm] = {}
def _connect_db(self):
if psycopg2 is None:
return None
try:
from data.db import get_clean_dsn
self.conn = psycopg2.connect(get_clean_dsn())
return self.conn
except Exception as e:
print(f"[SquadEngine] DB connection failed: {e}")
return None
def get_conn(self):
if self.conn is None or self.conn.closed:
self._connect_db()
return self.conn
def _normalize_position(self, position: Optional[str]) -> str:
"""Pozisyonu normalize et"""
if not position:
return 'UNK'
pos_lower = position.lower().strip()
for key, val in self.POSITION_MAP.items():
if key in pos_lower:
return val
return 'UNK'
def get_player_form(self, player_id: str, before_date_ms: int = None) -> PlayerForm:
"""Oyuncunun son 5 maçtaki formunu hesapla"""
if player_id in self._player_form_cache:
return self._player_form_cache[player_id]
form = PlayerForm(player_id=player_id, player_name="")
conn = self.get_conn()
if conn is None:
return form
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Oyuncu adını al
cur.execute("SELECT name FROM players WHERE id = %s", (player_id,))
player_row = cur.fetchone()
if player_row:
form.player_name = player_row['name']
# Son 5 maçtaki gol ve asist
cur.execute("""
SELECT
COUNT(*) FILTER (WHERE event_type = 'goal' AND event_subtype NOT ILIKE '%%penaltı kaçırma%%') as goals,
COUNT(*) FILTER (WHERE event_type = 'goal' AND assist_player_id IS NOT NULL) as assists_given
FROM match_player_events
WHERE player_id = %s
AND match_id IN (
SELECT match_id FROM match_player_participation
WHERE player_id = %s
ORDER BY match_id DESC LIMIT 5
)
""", (player_id, player_id))
stats = cur.fetchone()
if stats:
form.goals_last_5 = stats['goals'] or 0
# Asist hesapla (assist_player_id olarak geçen)
cur.execute("""
SELECT COUNT(*) as assists
FROM match_player_events
WHERE assist_player_id = %s
AND match_id IN (
SELECT match_id FROM match_player_participation
WHERE player_id = %s
ORDER BY match_id DESC LIMIT 5
)
""", (player_id, player_id))
assist_row = cur.fetchone()
if assist_row:
form.assists_last_5 = assist_row['assists'] or 0
# Kart sayısı
cur.execute("""
SELECT COUNT(*) as cards
FROM match_player_events
WHERE player_id = %s AND event_type = 'card'
AND match_id IN (
SELECT match_id FROM match_player_participation
WHERE player_id = %s
ORDER BY match_id DESC LIMIT 5
)
""", (player_id, player_id))
card_row = cur.fetchone()
if card_row:
form.cards_last_5 = card_row['cards'] or 0
# Key player mi? (Son 10 maçta 3+ gol)
cur.execute("""
SELECT COUNT(*) as total_goals
FROM match_player_events
WHERE player_id = %s
AND event_type = 'goal'
AND event_subtype NOT ILIKE '%%penaltı kaçırma%%'
""", (player_id,))
total_row = cur.fetchone()
form.is_key_player = (total_row['total_goals'] or 0) >= 3
self._player_form_cache[player_id] = form
return form
except Exception as e:
import traceback
traceback.print_exc()
print(f"[SquadEngine] Error getting player form: {e}")
return form
def analyze_squad(self, match_id: str, team_id: str) -> SquadAnalysis:
"""Takımın maç kadrosunu analiz et"""
analysis = SquadAnalysis(team_id=team_id)
conn = self.get_conn()
if conn is None:
return analysis
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Takım adını al
cur.execute("SELECT name FROM teams WHERE id = %s", (team_id,))
team_row = cur.fetchone()
if team_row:
analysis.team_name = team_row['name']
# Maç kadrosunu al
cur.execute("""
SELECT player_id, position, is_starting
FROM match_player_participation
WHERE match_id = %s AND team_id = %s
""", (match_id, team_id))
players = cur.fetchall()
for p in players:
if p['is_starting']:
analysis.starting_count += 1
else:
analysis.sub_count += 1
pos = self._normalize_position(p['position'])
if pos == 'GK':
analysis.goalkeeper_count += 1
elif pos == 'DEF':
analysis.defender_count += 1
elif pos == 'MID':
analysis.midfielder_count += 1
elif pos == 'FWD':
analysis.forward_count += 1
# İlk 11'in formunu topluca hesapla
if p['is_starting']:
form = self.get_player_form(p['player_id'])
analysis.total_goals_last_5 += form.goals_last_5
analysis.total_assists_last_5 += form.assists_last_5
if form.is_key_player:
analysis.key_players_count += 1
analysis.total_squad = analysis.starting_count + analysis.sub_count
# Takım deneyimi (bu takımla kaç maç oynamışlar)
if analysis.starting_count > 0:
cur.execute("""
SELECT AVG(match_count) as avg_exp
FROM (
SELECT player_id, COUNT(*) as match_count
FROM match_player_participation
WHERE team_id = %s AND is_starting = true
GROUP BY player_id
) sub
""", (team_id,))
exp_row = cur.fetchone()
if exp_row and exp_row['avg_exp']:
# Normalize: 50+ maç = 1.0
analysis.squad_experience = min(exp_row['avg_exp'] / 50, 1.0)
return analysis
except Exception as e:
print(f"[SquadEngine] Error analyzing squad: {e}")
return analysis
def analyze_squad_from_list(self, player_ids: List[str], team_id: str) -> SquadAnalysis:
"""
Memory'deki oyuncu listesinden kadro analizi yap.
DB'de olmayan canlı maçlar için kullanılır.
"""
analysis = SquadAnalysis(team_id=team_id)
# Varsayılan: İlk 11 oyuncu (listede genellikle ilk 11 verilir)
# Eğer liste boşsa
if not player_ids:
return analysis
# Varsayımlar: Mackolik API'den gelen liste sıralıdır.
# İlk 11 genellikle as kadrodur. Ancak burada sadece 'starting' oyuncuları alıyoruz varsayalım.
# User calling uses explicit starting 11 list.
analysis.starting_count = len(player_ids)
analysis.total_squad = len(player_ids) # Subs unknown usually unless separate list
# Position tahmini zor, default dağıt? Veya oyuncu detayına git?
# Hız için: Oyuncu ID'sinden DB'ye bakıp pozisyon öğrenmeye çalışabiliriz.
conn = self.get_conn()
if conn is None:
return analysis
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Calculate stats for these specific players
for pid in player_ids:
# Get Form
form = self.get_player_form(pid)
analysis.total_goals_last_5 += form.goals_last_5
analysis.total_assists_last_5 += form.assists_last_5
if form.is_key_player:
analysis.key_players_count += 1
# Get Position/Exp history attempt
cur.execute("""
SELECT position, COUNT(*) as match_count
FROM match_player_participation
WHERE player_id = %s AND team_id = %s
GROUP BY position
ORDER BY match_count DESC LIMIT 1
""", (pid, team_id))
row = cur.fetchone()
if row:
pos = self._normalize_position(row.get('position', 'UNK'))
if pos == 'GK': analysis.goalkeeper_count += 1
elif pos == 'DEF': analysis.defender_count += 1
elif pos == 'MID': analysis.midfielder_count += 1
elif pos == 'FWD': analysis.forward_count += 1
# Experience contribution
exp = min(row['match_count'] / 50.0, 1.0)
analysis.squad_experience += exp
# Average experience
if analysis.starting_count > 0:
analysis.squad_experience /= analysis.starting_count
except Exception as e:
print(f"[SquadEngine] Live analyze error: {e}")
return analysis
def get_features(
self,
match_id: str,
home_team_id: str,
away_team_id: str
) -> Dict[str, float]:
"""
Maç için kadro feature'larını hesapla.
Args:
match_id: Maç ID'si
home_team_id: Ev sahibi takım ID
away_team_id: Deplasman takım ID
Returns:
Kadro feature'ları dict olarak
"""
features = SquadFeatures()
# Ev sahibi analizi
home = self.analyze_squad(match_id, home_team_id)
features.home_starting_11 = home.starting_count
features.home_sub_count = home.sub_count
features.home_total_squad = home.total_squad
features.home_goalkeepers = home.goalkeeper_count
features.home_defenders = home.defender_count
features.home_midfielders = home.midfielder_count
features.home_forwards = home.forward_count
features.home_goals_last_5 = home.total_goals_last_5
features.home_assists_last_5 = home.total_assists_last_5
features.home_key_players = home.key_players_count
features.home_squad_experience = home.squad_experience
# Deplasman analizi
away = self.analyze_squad(match_id, away_team_id)
features.away_starting_11 = away.starting_count
features.away_sub_count = away.sub_count
features.away_total_squad = away.total_squad
features.away_goalkeepers = away.goalkeeper_count
features.away_defenders = away.defender_count
features.away_midfielders = away.midfielder_count
features.away_forwards = away.forward_count
features.away_goals_last_5 = away.total_goals_last_5
features.away_assists_last_5 = away.total_assists_last_5
features.away_key_players = away.key_players_count
features.away_squad_experience = away.squad_experience
# Karşılaştırma feature'ları
home_strength = (
home.total_goals_last_5 * 2 +
home.total_assists_last_5 +
home.key_players_count * 3 +
home.squad_experience * 10
)
away_strength = (
away.total_goals_last_5 * 2 +
away.total_assists_last_5 +
away.key_players_count * 3 +
away.squad_experience * 10
)
features.squad_strength_diff = home_strength - away_strength
features.goals_form_diff = home.total_goals_last_5 - away.total_goals_last_5
features.key_players_diff = home.key_players_count - away.key_players_count
return features.to_dict()
def get_features_without_match(
self,
home_team_id: str,
away_team_id: str
) -> Dict[str, float]:
"""
Maç ID olmadan takım bazlı feature'ları hesapla.
Son maçtaki kadroyu referans alır.
"""
features = SquadFeatures()
conn = self.get_conn()
if conn is None:
return features.to_dict()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
for team_id, prefix in [(home_team_id, 'home'), (away_team_id, 'away')]:
# Son maçı bul
cur.execute("""
SELECT mpp.match_id
FROM match_player_participation mpp
JOIN matches m ON mpp.match_id = m.id
WHERE mpp.team_id = %s
ORDER BY m.mst_utc DESC
LIMIT 1
""", (team_id,))
row = cur.fetchone()
if row:
analysis = self.analyze_squad(row['match_id'], team_id)
if prefix == 'home':
features.home_starting_11 = analysis.starting_count
features.home_sub_count = analysis.sub_count
features.home_total_squad = analysis.total_squad
features.home_goals_last_5 = analysis.total_goals_last_5
features.home_assists_last_5 = analysis.total_assists_last_5
features.home_key_players = analysis.key_players_count
features.home_squad_experience = analysis.squad_experience
else:
features.away_starting_11 = analysis.starting_count
features.away_sub_count = analysis.sub_count
features.away_total_squad = analysis.total_squad
features.away_goals_last_5 = analysis.total_goals_last_5
features.away_assists_last_5 = analysis.total_assists_last_5
features.away_key_players = analysis.key_players_count
features.away_squad_experience = analysis.squad_experience
# Karşılaştırma
features.goals_form_diff = features.home_goals_last_5 - features.away_goals_last_5
features.key_players_diff = features.home_key_players - features.away_key_players
return features.to_dict()
except Exception as e:
print(f"[SquadEngine] Error: {e}")
return features.to_dict()
# Singleton instance
_engine: Optional[SquadAnalysisEngine] = None
def get_squad_analysis_engine() -> SquadAnalysisEngine:
"""Singleton squad analysis engine instance döndür"""
global _engine
if _engine is None:
_engine = SquadAnalysisEngine()
return _engine
if __name__ == "__main__":
# Test
engine = get_squad_analysis_engine()
print("\n🧪 Squad Analysis Engine Test")
print("=" * 50)
# Test with known team IDs (Galatasaray, Fenerbahce)
features = engine.get_features_without_match(
home_team_id="test_gs",
away_team_id="test_fb"
)
print("\n📊 Features:")
for key, value in features.items():
print(f" {key}: {value:.2f}")
+194
View File
@@ -0,0 +1,194 @@
"""
Team Stats Engine
Takımların oyun tarzı istatistiklerini analiz eder.
football_team_stats tablosundaki kayıtlardan possession, şut, korner verilerini kullanır.
"""
import os
import sys
import psycopg2
from typing import Dict
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data.db import get_clean_dsn
class TeamStatsEngine:
"""
Takım istatistikleri için feature engine.
Analiz edilen metrikler:
- Ortalama top hakimiyeti (possession)
- Ortalama isabetli şut
- Ortalama korner
- Şut/Gol dönüşüm oranı (xG benzeri)
- Savunma gücü
"""
def __init__(self):
self.conn = None
def get_conn(self):
if self.conn is None or self.conn.closed:
self.conn = psycopg2.connect(get_clean_dsn())
return self.conn
def get_features(self, team_id: str, before_date: int,
limit: int = 10, max_days: int = 180) -> Dict[str, float]:
"""
Takımın oyun tarzı feature'larını hesapla.
Args:
team_id: Takım ID
before_date: Bu tarihten önceki maçlara bak (ms timestamp)
limit: Kaç maç analiz edilecek
max_days: Maksimum kaç gün geriye gidilecek
Returns:
Dict: Team stats feature'ları
"""
if not team_id or len(team_id) < 5:
return self._default_features()
try:
conn = self.get_conn()
cur = conn.cursor()
min_date = before_date - (max_days * 24 * 60 * 60 * 1000)
# Bu takımın son N maçındaki istatistikleri çek
cur.execute("""
SELECT
mts.possession_percentage,
mts.shots_on_target,
mts.shots_off_target,
mts.total_shots,
mts.corners,
mts.fouls,
m.score_home,
m.score_away,
m.home_team_id
FROM football_team_stats mts
JOIN matches m ON mts.match_id = m.id
WHERE mts.team_id = %s
AND m.mst_utc < %s
AND m.mst_utc > %s
AND m.score_home IS NOT NULL
AND m.sport = 'football'
ORDER BY m.mst_utc DESC
LIMIT %s
""", (team_id, before_date, min_date, limit))
stats = cur.fetchall()
if not stats:
return self._default_features()
# İstatistikleri hesapla
total_matches = len(stats)
possession_sum = 0
shots_on_target_sum = 0
shots_total_sum = 0
corners_sum = 0
fouls_sum = 0
goals_scored = 0
valid_possession_count = 0
for stat in stats:
poss, sot, soff, total_shots, corners, fouls, sh, sa, home_id = stat
if poss and poss > 0:
possession_sum += poss
valid_possession_count += 1
if sot:
shots_on_target_sum += sot
if total_shots:
shots_total_sum += total_shots
if corners:
corners_sum += corners
if fouls:
fouls_sum += fouls
# Gol hesaplama
is_home = (home_id == team_id)
goals_scored += sh if is_home else sa
avg_possession = possession_sum / valid_possession_count if valid_possession_count > 0 else 50.0
avg_shots_on_target = shots_on_target_sum / total_matches if total_matches > 0 else 3.0
avg_shots_total = shots_total_sum / total_matches if total_matches > 0 else 10.0
avg_corners = corners_sum / total_matches if total_matches > 0 else 4.0
avg_fouls = fouls_sum / total_matches if total_matches > 0 else 12.0
# Shot conversion rate (xG benzeri)
shot_conversion = goals_scored / shots_total_sum if shots_total_sum > 0 else 0.1
# Shot accuracy
shot_accuracy = shots_on_target_sum / shots_total_sum if shots_total_sum > 0 else 0.35
return {
'avg_possession': avg_possession / 100, # Normalize to 0-1
'avg_shots_on_target': avg_shots_on_target,
'avg_shots_total': avg_shots_total,
'avg_corners': avg_corners,
'avg_fouls': avg_fouls,
'shot_conversion_rate': shot_conversion,
'shot_accuracy': shot_accuracy,
'attacking_intensity': (avg_shots_total + avg_corners) / 2
}
except Exception as e:
print(f"[TeamStatsEngine] Error: {e}")
return self._default_features()
def _default_features(self) -> Dict[str, float]:
return {
'avg_possession': 0.50,
'avg_shots_on_target': 3.5,
'avg_shots_total': 11.0,
'avg_corners': 4.5,
'avg_fouls': 12.0,
'shot_conversion_rate': 0.10,
'shot_accuracy': 0.35,
'attacking_intensity': 7.5
}
# Singleton
_engine = None
def get_team_stats_engine() -> TeamStatsEngine:
global _engine
if _engine is None:
_engine = TeamStatsEngine()
return _engine
if __name__ == "__main__":
engine = get_team_stats_engine()
print("\n🧪 Team Stats Engine Test")
print("=" * 50)
# Test için örnek takım ID'si al
conn = engine.get_conn()
cur = conn.cursor()
cur.execute("""
SELECT DISTINCT mts.team_id, t.name
FROM match_team_stats mts
JOIN teams t ON mts.team_id = t.id
LIMIT 1
""")
result = cur.fetchone()
if result:
team_id, team_name = result
print(f"Test Takımı: {team_name}")
import time
features = engine.get_features(team_id, int(time.time() * 1000))
print(f"\n📊 Feature'lar:")
for k, v in features.items():
print(f" {k}: {v:.3f}")
+419
View File
@@ -0,0 +1,419 @@
"""
Upset Engine - Dev Avcısı Tespit Sistemi
V9 Model için Galatasaray-Liverpool tarzı sürpriz maçları tespit eder.
Faktörler:
1. Atmosfer (Avrupa gecesi, taraftar baskısı)
2. Motivasyon asimetrisi (küme düşme vs şampiyon)
3. Yorgunluk (maç yoğunluğu, seyahat)
4. Tarihsel upset pattern
"""
import os
import sys
from typing import Dict, Any, Optional, Tuple
from dataclasses import dataclass, field
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
import psycopg2
from psycopg2.extras import RealDictCursor
except ImportError:
psycopg2 = None
@dataclass
class UpsetFactors:
"""Upset potansiyelini etkileyen faktörler"""
atmosphere_score: float = 0.0 # Atmosfer etkisi (0-1)
motivation_score: float = 0.0 # Motivasyon asimetrisi (0-1)
fatigue_score: float = 0.0 # Yorgunluk farkı (0-1)
historical_upset_rate: float = 0.0 # Tarihsel upset oranı (0-1)
total_upset_potential: float = 0.0 # Toplam upset potansiyeli (0-1)
reasoning: list = field(default_factory=list)
class UpsetEngine:
"""
Favori takımın kaybedeceği maçları tespit eder.
Galatasaray-Liverpool tarzı sürprizleri yakalar.
"""
# Yüksek atmosferli stadyumlar (manuel tanımlı + hesaplanabilir)
HIGH_ATMOSPHERE_TEAMS = {
# Türkiye
"galatasaray", "fenerbahce", "besiktas", "trabzonspor",
# İngiltere
"liverpool", "newcastle", "leeds",
# Almanya
"dortmund", "union berlin",
# Yunanistan
"olympiacos", "panathinaikos", "aek athens",
# Arjantin
"boca juniors", "river plate",
# Diğer
"celtic", "rangers", "red star belgrade"
}
# Avrupa kupaları (yüksek motivasyon)
EUROPEAN_COMPETITIONS = {
"şampiyonlar ligi", "champions league", "uefa champions league",
"avrupa ligi", "europa league", "uefa europa league",
"konferans ligi", "conference league", "uefa conference league"
}
def __init__(self):
self.conn = None
self._connect_db()
def _connect_db(self):
"""Veritabanına bağlan"""
if psycopg2 is None:
return
try:
from data.db import get_clean_dsn
self.conn = psycopg2.connect(get_clean_dsn())
except Exception as e:
print(f"[UpsetEngine] DB connection failed: {e}")
self.conn = None
def _get_conn(self):
"""Bağlantıyı kontrol et ve döndür"""
if self.conn is None or self.conn.closed:
self._connect_db()
return self.conn
def calculate_atmosphere_score(
self,
home_team_name: str,
league_name: str,
is_cup_match: bool = False
) -> Tuple[float, list]:
"""
Atmosfer skorunu hesapla.
Yüksek atmosferli stadyumlar upset potansiyelini artırır.
"""
score = 0.0
reasons = []
# Yüksek atmosferli takım mı?
home_lower = home_team_name.lower()
for team in self.HIGH_ATMOSPHERE_TEAMS:
if team in home_lower:
score += 0.25
reasons.append(f"🔥 {home_team_name} yüksek atmosferli stadyum")
break
# Avrupa kupası mı?
league_lower = league_name.lower()
for comp in self.EUROPEAN_COMPETITIONS:
if comp in league_lower:
score += 0.20
reasons.append("🌟 Avrupa gecesi - ekstra motivasyon")
break
# Kupa maçı mı? (tek maç eliminasyon)
if is_cup_match:
score += 0.10
reasons.append("🏆 Kupa maçı - her şey olabilir")
return min(score, 1.0), reasons
def calculate_motivation_score(
self,
home_position: int,
away_position: int,
home_points_to_safety: Optional[int] = None,
away_already_champion: bool = False,
total_teams: int = 20
) -> Tuple[float, list]:
"""
Motivasyon asimetrisini hesapla.
Alt sıradaki takımın üst sıradakine karşı ekstra motivasyonu.
"""
score = 0.0
reasons = []
# Pozisyon farkı
position_diff = 0
if away_position is not None and home_position is not None:
position_diff = away_position - home_position # Negatif = deplasman daha iyi sırada
# Küme düşme hattı vs üst sıra (en güçlü upset faktörü)
relegation_zone = total_teams - 3 # Son 3 takım
if home_position is not None and away_position is not None:
if home_position >= relegation_zone and away_position <= 3:
score += 0.30
reasons.append("⚔️ Hayatta kalma savaşı vs şampiyonluk adayı")
elif home_position >= relegation_zone:
score += 0.15
reasons.append("🔥 Ev sahibi küme düşme hattında - ekstra motivasyon")
elif home_position is not None and home_position >= relegation_zone:
score += 0.15
reasons.append("🔥 Ev sahibi küme düşme hattında - ekstra motivasyon")
# Deplasman takımı zaten şampiyon mu?
if away_already_champion:
score += 0.20
reasons.append("😴 Deplasman takımı zaten şampiyon - motivasyon düşük")
# Büyük pozisyon farkı (underdog evinde)
if position_diff < -10:
score += 0.15
reasons.append(f"📊 {abs(position_diff)} sıra fark - büyük maç heyecanı")
elif position_diff < -5:
score += 0.08
return min(score, 1.0), reasons
def calculate_fatigue_score(
self,
home_matches_last_14d: int = 0,
away_matches_last_14d: int = 0,
home_days_rest: int = 7,
away_days_rest: int = 7,
away_travel_km: float = 0
) -> Tuple[float, list]:
"""
Yorgunluk farkını hesapla.
Yorgun deplasman takımı = yüksek upset potansiyeli.
"""
score = 0.0
reasons = []
# Maç yoğunluğu farkı
match_diff = away_matches_last_14d - home_matches_last_14d
if match_diff >= 3:
score += 0.20
reasons.append(f"🏃 Deplasman {match_diff} maç daha fazla oynamış")
elif match_diff >= 2:
score += 0.10
# Dinlenme süresi farkı
rest_diff = home_days_rest - away_days_rest
if rest_diff >= 4:
score += 0.15
reasons.append(f"💤 Ev sahibi {rest_diff} gün daha fazla dinlenmiş")
elif rest_diff >= 2:
score += 0.08
# Uzun deplasman
if away_travel_km > 3000:
score += 0.15
reasons.append(f"✈️ Uzun deplasman ({int(away_travel_km)} km)")
elif away_travel_km > 1500:
score += 0.08
return min(score, 1.0), reasons
def get_historical_upset_rate(
self,
home_team_id: str,
before_date_ms: int,
lookback_matches: int = 20
) -> Tuple[float, list]:
"""
Ev sahibi takımın tarihsel upset oranını hesapla.
Üst sıradaki takımlara karşı galibiyetler.
"""
reasons = []
conn = self._get_conn()
if conn is None:
return 0.0, reasons
try:
cursor = conn.cursor(cursor_factory=RealDictCursor)
# Ev sahibi olarak oynadığı ve sıralamada geride olduğu maçlar
query = """
WITH home_matches AS (
SELECT
m.id,
m.score_home,
m.score_away,
m.home_team_id,
m.away_team_id
FROM matches m
WHERE m.home_team_id = %s
AND m.mst_utc < %s
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
ORDER BY m.mst_utc DESC
LIMIT %s
)
SELECT
COUNT(*) as total,
SUM(CASE WHEN score_home > score_away THEN 1 ELSE 0 END) as wins
FROM home_matches
"""
cursor.execute(query, (home_team_id, before_date_ms, lookback_matches))
result = cursor.fetchone()
if result and result['total'] > 0:
win_rate = result['wins'] / result['total']
# Ev sahibi kazanma oranı yüksekse, upset potansiyeli de yüksek
if win_rate > 0.5:
rate = min((win_rate - 0.4) * 0.5, 0.3)
reasons.append(f"📈 Güçlü ev sahibi performansı (%{int(win_rate*100)} kazanma)")
return rate, reasons
return 0.0, reasons
except Exception as e:
print(f"[UpsetEngine] Historical query error: {e}")
return 0.0, reasons
def calculate_upset_potential(
self,
home_team_name: str,
home_team_id: str,
away_team_name: str,
league_name: str,
home_position: int,
away_position: int,
match_date_ms: int,
is_cup_match: bool = False,
home_matches_last_14d: int = 2,
away_matches_last_14d: int = 2,
home_days_rest: int = 7,
away_days_rest: int = 7,
away_travel_km: float = 0,
total_teams: int = 20
) -> UpsetFactors:
"""
Tüm faktörleri birleştirerek upset potansiyelini hesapla.
Returns:
UpsetFactors: Tüm faktörler ve toplam skor
"""
factors = UpsetFactors()
all_reasons = []
# 1. Atmosfer
atm_score, atm_reasons = self.calculate_atmosphere_score(
home_team_name, league_name, is_cup_match
)
factors.atmosphere_score = atm_score
all_reasons.extend(atm_reasons)
# 2. Motivasyon
mot_score, mot_reasons = self.calculate_motivation_score(
home_position, away_position,
total_teams=total_teams
)
factors.motivation_score = mot_score
all_reasons.extend(mot_reasons)
# 3. Yorgunluk
fat_score, fat_reasons = self.calculate_fatigue_score(
home_matches_last_14d, away_matches_last_14d,
home_days_rest, away_days_rest,
away_travel_km
)
factors.fatigue_score = fat_score
all_reasons.extend(fat_reasons)
# 4. Tarihsel (sadece DB varsa)
hist_score, hist_reasons = self.get_historical_upset_rate(
home_team_id, match_date_ms
)
factors.historical_upset_rate = hist_score
all_reasons.extend(hist_reasons)
# Toplam skor (weighted average)
factors.total_upset_potential = min(
factors.atmosphere_score * 0.25 +
factors.motivation_score * 0.35 +
factors.fatigue_score * 0.25 +
factors.historical_upset_rate * 0.15,
1.0
)
factors.reasoning = all_reasons
return factors
def get_features(
self,
home_team_name: str,
home_team_id: str,
away_team_name: str,
league_name: str,
home_position: int,
away_position: int,
match_date_ms: int,
**kwargs
) -> Dict[str, float]:
"""
Model için feature dict döndür.
Training ve inference'da kullanılır.
"""
factors = self.calculate_upset_potential(
home_team_name=home_team_name,
home_team_id=home_team_id,
away_team_name=away_team_name,
league_name=league_name,
home_position=home_position,
away_position=away_position,
match_date_ms=match_date_ms,
**kwargs
)
return {
"upset_atmosphere": factors.atmosphere_score,
"upset_motivation": factors.motivation_score,
"upset_fatigue": factors.fatigue_score,
"upset_historical": factors.historical_upset_rate,
"upset_potential": factors.total_upset_potential,
}
# Singleton instance
_engine_instance = None
def get_upset_engine() -> UpsetEngine:
"""Singleton pattern ile engine döndür"""
global _engine_instance
if _engine_instance is None:
_engine_instance = UpsetEngine()
return _engine_instance
# Test
if __name__ == "__main__":
engine = get_upset_engine()
# Galatasaray vs Liverpool örneği
factors = engine.calculate_upset_potential(
home_team_name="Galatasaray",
home_team_id="test-gs-id",
away_team_name="Liverpool",
league_name="UEFA Champions League",
home_position=12,
away_position=1,
match_date_ms=1700000000000,
is_cup_match=False,
away_matches_last_14d=5,
home_matches_last_14d=2,
away_days_rest=3,
home_days_rest=7,
away_travel_km=2800,
total_teams=20
)
print("=" * 60)
print("GALATASARAY vs LIVERPOOL - UPSET ANALİZİ")
print("=" * 60)
print(f"🏟️ Atmosfer Skoru: {factors.atmosphere_score:.2f}")
print(f"💪 Motivasyon Skoru: {factors.motivation_score:.2f}")
print(f"😓 Yorgunluk Skoru: {factors.fatigue_score:.2f}")
print(f"📊 Tarihsel Skor: {factors.historical_upset_rate:.2f}")
print(f"\n🎯 TOPLAM UPSET POTANSİYELİ: {factors.total_upset_potential:.2f}")
print("\n📝 Sebepler:")
for reason in factors.reasoning:
print(f" {reason}")
+511
View File
@@ -0,0 +1,511 @@
"""
Upset Engine v2 - GLM-5 Tespitleri ile Geliştirilmiş Sürpriz Tespiti
====================================================================
Yeni Eklenen Faktörler (GLM-5 Analizinden):
1. MARGIN_ANALIZI - Bookmaker margin > %18 = sürpriz riski
2. FAVORI_ORAN_TUZAGI - 1.40-1.60 arası en yüksek sürpriz oranı
3. HAKEM_SURPRIZ_ORANI - Hakemin geçmiş maçlarında ev kayıp oranı
4. FORM_FARKI_TUZAGI - Form farkı > 40 = "çok iyi görünen" favori tuzak
Orijinal Faktörler:
- Atmosfer (Avrupa gecesi, taraftar baskısı)
- Motivasyon asimetrisi (küme düşme vs şampiyon)
- Yorgunluk (maç yoğunluğu, seyahat)
- Tarihsel upset pattern
"""
import os
import sys
from typing import Dict, Any, Optional, Tuple, List
from dataclasses import dataclass, field
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
import psycopg2
from psycopg2.extras import RealDictCursor
except ImportError:
psycopg2 = None
@dataclass
class UpsetFactorsV2:
"""Upset potansiyelini etkileyen faktörler - v2"""
# Orijinal faktörler
atmosphere_score: float = 0.0
motivation_score: float = 0.0
fatigue_score: float = 0.0
historical_upset_rate: float = 0.0
# YENİ FAKTÖRLER (GLM-5)
margin_score: float = 0.0 # Bookmaker margin analizi
favorite_odds_trap: float = 0.0 # Favori oran tuzağı
referee_upset_score: float = 0.0 # Hakem sürpriz oranı
form_trap_score: float = 0.0 # Form farkı tuzağı
# Toplam
total_upset_potential: float = 0.0
reasoning: List[str] = field(default_factory=list)
# YENİ: Sürpriz skoru (0-100)
upset_score: int = 0
upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME
class UpsetEngineV2:
"""
Favori takımın kaybedeceği maçları tespit eder.
v2: GLM-5 analizlerinden elde edilen yeni faktörler eklendi.
"""
# Yüksek atmosferli stadyumlar
HIGH_ATMOSPHERE_TEAMS = {
"galatasaray", "fenerbahce", "besiktas", "trabzonspor",
"liverpool", "newcastle", "leeds",
"dortmund", "union berlin",
"olympiacos", "panathinaikos", "aek athens",
"boca juniors", "river plate",
"celtic", "rangers", "red star belgrade"
}
EUROPEAN_COMPETITIONS = {
"şampiyonlar ligi", "champions league", "uefa champions league",
"avrupa ligi", "europa league", "uefa europa league",
"konferans ligi", "conference league", "uefa conference league"
}
# YENİ: Sürpriz oranları (veritabanı analizinden)
# Favori oran aralığına göre sürpriz oranları
FAVORITE_ODDS_UPSET_RATES = {
(1.10, 1.20): 0.111, # %11.1 sürpriz
(1.20, 1.30): 0.150, # %15.0 sürpriz
(1.30, 1.40): 0.235, # %23.5 sürpriz
(1.40, 1.50): 0.333, # %33.3 sürpriz ← DİKKAT!
(1.50, 1.60): 0.350, # %35.0 sürpriz ← EN YÜKSEK!
}
def __init__(self):
self.conn = None
self._connect_db()
def _connect_db(self):
if psycopg2 is None:
return
try:
from data.db import get_clean_dsn
self.conn = psycopg2.connect(get_clean_dsn())
except Exception as e:
print(f"[UpsetEngineV2] DB connection failed: {e}")
self.conn = None
def _get_conn(self):
if self.conn is None or self.conn.closed:
self._connect_db()
return self.conn
# ═════════════════════════════════════════════════════════════════
# YENİ FAKTÖRLER (GLM-5 Analizinden)
# ═════════════════════════════════════════════════════════════════
def calculate_margin_score(
self,
odds_data: Dict[str, float]
) -> Tuple[float, List[str]]:
"""
GLM-5 Tespiti: Bookmaker margin analizi
Margin > %18 → Bookmaker kendini koruyor, favori riskli
Margin > %20 → Yüksek risk, sürpriz bekleniyor
"""
score = 0.0
reasons = []
ms_h = odds_data.get("ms_h", 0)
ms_d = odds_data.get("ms_d", 0)
ms_a = odds_data.get("ms_a", 0)
if ms_h > 0 and ms_d > 0 and ms_a > 0:
margin = (1/ms_h + 1/ms_d + 1/ms_a) - 1
if margin > 0.20:
score = 0.25
reasons.append(f"⚠️ Margin çok yüksek (%{margin*100:.1f}) - Bookmaker risk görüyor!")
elif margin > 0.18:
score = 0.15
reasons.append(f"⚠️ Margin yüksek (%{margin*100:.1f}) - Dikkat!")
return score, reasons
def calculate_favorite_odds_trap(
self,
favorite_odds: float,
favorite_side: str # 'home' or 'away'
) -> Tuple[float, List[str]]:
"""
GLM-5 Tespiti: Favori oran tuzağı
Veritabanı analizine göre:
- 1.40-1.50 arası: %33.3 sürpriz
- 1.50-1.60 arası: %35.0 sürpriz (EN YÜKSEK!)
- < 1.20: Tuzak oranı şüphesi
"""
score = 0.0
reasons = []
if favorite_odds <= 0:
return score, reasons
for (low, high), upset_rate in self.FAVORITE_ODDS_UPSET_RATES.items():
if low <= favorite_odds < high:
score = upset_rate # Doğrudan sürpriz olasılığı
if upset_rate >= 0.30:
reasons.append(f"🔴 Favori oran {favorite_odds:.2f} - %{upset_rate*100:.0f} sürpriz oranı!")
elif upset_rate >= 0.20:
reasons.append(f"⚠️ Favori oran {favorite_odds:.2f} - %{upset_rate*100:.0f} sürpriz riski")
break
# Çok düşük oran tuzağı
if favorite_odds < 1.20:
score = max(score, 0.20)
reasons.append(f"⚠️ Favori oran çok düşük ({favorite_odds:.2f}) - Tuzak oranı şüphesi")
return score, reasons
def calculate_referee_upset_score(
self,
referee_name: str
) -> Tuple[float, List[str]]:
"""
GLM-5 Tespiti: Hakem sürpriz oranı
Hakemin yönettiği maçlarda ev sahibi kayıp oranı
> %25 → Yüksek sürpriz riski
"""
score = 0.0
reasons = []
if not referee_name or not self._get_conn():
return score, reasons
try:
cur = self._get_conn().cursor()
# Hakemin yönettiği maçlarda sonuçlar
cur.execute("""
SELECT
COUNT(*) as total,
SUM(CASE WHEN m.score_home < m.score_away THEN 1 ELSE 0 END) as away_wins,
SUM(CASE WHEN m.score_home = m.score_away THEN 1 ELSE 0 END) as draws
FROM match_officials mo
JOIN matches m ON m.id = mo.match_id
WHERE mo.name = %s AND mo.role_id = 1
AND m.score_home IS NOT NULL
""", (referee_name,))
row = cur.fetchone()
cur.close()
if row and row[0] and row[0] >= 3:
total = row[0]
away_wins = row[1] or 0
draws = row[2] or 0
upset_rate = (away_wins + draws * 0.5) / total
if upset_rate > 0.40:
score = 0.25
reasons.append(f"👨‍⚖️ {referee_name}: %{upset_rate*100:.0f} sürpriz oranı (YÜKSEK!)")
elif upset_rate > 0.30:
score = 0.15
reasons.append(f"👨‍⚖️ {referee_name}: %{upset_rate*100:.0f} sürpriz oranı")
except Exception as e:
pass
return score, reasons
def calculate_form_trap_score(
self,
home_form_score: float,
away_form_score: float,
favorite_side: str
) -> Tuple[float, List[str]]:
"""
GLM-5 Tespiti: Form farkı tuzağı
Form farkı > 40 → "Çok iyi görünen" favori tuzak
Favori formu kötü ama oran düşük → Sürpriz bekleniyor
"""
score = 0.0
reasons = []
form_diff = home_form_score - away_form_score
# Form farkı çok büyük
if abs(form_diff) > 40:
score = 0.20
if form_diff > 0 and favorite_side == 'away':
reasons.append(f"🔴 Form tuzağı! Ev sahibi formda ({home_form_score:.0f}) ama deplasman favori")
elif form_diff < 0 and favorite_side == 'home':
reasons.append(f"🔴 Form tuzağı! Deplasman formda ({away_form_score:.0f}) ama ev sahibi favori")
# Favori formu kötü
if favorite_side == 'home' and home_form_score < 50:
score = max(score, 0.15)
reasons.append(f"⚠️ Favori ev sahibi formu düşük ({home_form_score:.0f})")
elif favorite_side == 'away' and away_form_score < 50:
score = max(score, 0.15)
reasons.append(f"⚠️ Favori deplasman formu düşük ({away_form_score:.0f})")
return score, reasons
# ═════════════════════════════════════════════════════════════════
# ORİJİNAL FAKTÖRLER
# ═════════════════════════════════════════════════════════════════
def calculate_atmosphere_score(
self,
home_team_name: str,
league_name: str,
is_cup_match: bool = False
) -> Tuple[float, List[str]]:
"""Orijinal: Atmosfer skoru"""
score = 0.0
reasons = []
home_lower = home_team_name.lower()
for team in self.HIGH_ATMOSPHERE_TEAMS:
if team in home_lower:
score += 0.25
reasons.append(f"🔥 {home_team_name} yüksek atmosferli stadyum")
break
league_lower = league_name.lower()
for comp in self.EUROPEAN_COMPETITIONS:
if comp in league_lower:
score += 0.20
reasons.append("🌟 Avrupa gecesi - ekstra motivasyon")
break
if is_cup_match:
score += 0.10
reasons.append("🏆 Kupa maçı - her şey olabilir")
return min(score, 1.0), reasons
def calculate_motivation_score(
self,
home_position: int,
away_position: int,
total_teams: int = 20
) -> Tuple[float, List[str]]:
"""Orijinal: Motivasyon asimetrisi"""
score = 0.0
reasons = []
if home_position is not None and away_position is not None:
position_diff = away_position - home_position
relegation_zone = total_teams - 3
if home_position >= relegation_zone and away_position <= 3:
score += 0.30
reasons.append("⚔️ Hayatta kalma savaşı vs şampiyonluk adayı")
elif home_position >= relegation_zone:
score += 0.15
reasons.append("🔥 Ev sahibi küme düşme hattında")
if position_diff < -10:
score += 0.15
reasons.append(f"📊 {abs(position_diff)} sıra fark")
return min(score, 1.0), reasons
# ═════════════════════════════════════════════════════════════════
# ANA FONKSİYON
# ═════════════════════════════════════════════════════════════════
def calculate_upset_potential(
self,
home_team_name: str,
home_team_id: str,
away_team_name: str,
league_name: str,
home_position: int = None,
away_position: int = None,
match_date_ms: int = None,
odds_data: Dict[str, float] = None,
referee_name: str = None,
home_form_score: float = 50.0,
away_form_score: float = 50.0,
favorite_side: str = None, # 'home', 'away', or 'draw'
favorite_odds: float = None
) -> UpsetFactorsV2:
"""
Tam upset analizi - v2 (GLM-5 geliştirmeleri ile)
"""
factors = UpsetFactorsV2()
all_reasons = []
# 1. Margin analizi (YENİ)
if odds_data:
factors.margin_score, reasons = self.calculate_margin_score(odds_data)
all_reasons.extend(reasons)
# 2. Favori oran tuzağı (YENİ)
if favorite_odds and favorite_side:
factors.favorite_odds_trap, reasons = self.calculate_favorite_odds_trap(
favorite_odds, favorite_side
)
all_reasons.extend(reasons)
# 3. Hakem sürpriz oranı (YENİ)
if referee_name:
factors.referee_upset_score, reasons = self.calculate_referee_upset_score(
referee_name
)
all_reasons.extend(reasons)
# 4. Form tuzağı (YENİ)
factors.form_trap_score, reasons = self.calculate_form_trap_score(
home_form_score, away_form_score, favorite_side or 'home'
)
all_reasons.extend(reasons)
# 5. Atmosfer (orijinal)
factors.atmosphere_score, reasons = self.calculate_atmosphere_score(
home_team_name, league_name
)
all_reasons.extend(reasons)
# 6. Motivasyon (orijinal)
if home_position is not None and away_position is not None:
factors.motivation_score, reasons = self.calculate_motivation_score(
home_position, away_position
)
all_reasons.extend(reasons)
# ═══════════════════════════════════════════════════════════
# SÜRPRİZ SKORU HESAPLAMA (0-100) - GÜÇLENDİRİLMİŞ v2.1
# ═══════════════════════════════════════════════════════════
upset_score = 0
# Margin (> %18 = +20, > %20 = +30) - GÜÇLENDİRİLDİ
if factors.margin_score >= 0.25:
upset_score += 30 # Artırıldı: 20 -> 30
all_reasons.append("🔴 Margin > %20: Bookmaker büyük risk görüyor!")
elif factors.margin_score >= 0.15:
upset_score += 20 # Artırıldı: 15 -> 20
all_reasons.append("⚠️ Margin > %18: Dikkatli ol!")
# Favori oran tuzağı - GÜÇLENDİRİLDİ
if factors.favorite_odds_trap >= 0.30:
upset_score += 30 # Artırıldı: 25 -> 30
elif factors.favorite_odds_trap >= 0.20:
upset_score += 25 # Artırıldı: 20 -> 25
elif factors.favorite_odds_trap >= 0.15:
upset_score += 20 # Artırıldı: 15 -> 20
# Hakem
if factors.referee_upset_score >= 0.25:
upset_score += 20
elif factors.referee_upset_score >= 0.15:
upset_score += 10
# Form tuzağı - GÜÇLENDİRİLDİ
if factors.form_trap_score >= 0.20:
upset_score += 20 # Artırıldı: 15 -> 20
elif factors.form_trap_score >= 0.15:
upset_score += 15 # Artırıldı: 10 -> 15
# Atmosfer - GÜÇLENDİRİLDİ
if factors.atmosphere_score >= 0.40:
upset_score += 20 # Artırıldı: 15 -> 20
elif factors.atmosphere_score >= 0.25:
upset_score += 15 # Artırıldı: 10 -> 15
# Motivasyon
if factors.motivation_score >= 0.30:
upset_score += 15
elif factors.motivation_score >= 0.15:
upset_score += 10
# ═══════════════════════════════════════════════════════════
# YENİ: EKSTRA RİSK FAKTÖRLERİ
# ═══════════════════════════════════════════════════════════
# Deplasman favorisi ekstra risk (+10)
if favorite_side == 'away':
upset_score += 10
all_reasons.append("📍 Deplasman favorisi - ekstra risk!")
# Favori formu çok düşük (< 40) = +15
if favorite_side == 'home' and home_form_score < 40:
upset_score += 15
all_reasons.append(f"🔴 Favori ev sahibi formu ÇOK DÜŞÜK ({home_form_score:.0f})")
elif favorite_side == 'away' and away_form_score < 40:
upset_score += 15
all_reasons.append(f"🔴 Favori deplasman formu ÇOK DÜŞÜK ({away_form_score:.0f})")
# Çok düşük favori oranı (< 1.30) ama margin yüksek = tuzak şüphesi
if favorite_odds and favorite_odds < 1.30 and factors.margin_score >= 0.15:
upset_score += 10
all_reasons.append(f"⚠️ Düşük oran ({favorite_odds:.2f}) + yüksek margin = TUZAK ŞÜPHESİ!")
factors.upset_score = min(upset_score, 100)
# Seviye belirle
if factors.upset_score >= 60:
factors.upset_level = "EXTREME"
elif factors.upset_score >= 45:
factors.upset_level = "HIGH"
elif factors.upset_score >= 30:
factors.upset_level = "MEDIUM"
else:
factors.upset_level = "LOW"
# Toplam upset potansiyeli
factors.total_upset_potential = min(
(factors.margin_score + factors.favorite_odds_trap +
factors.referee_upset_score + factors.form_trap_score +
factors.atmosphere_score * 0.5 + factors.motivation_score * 0.5) / 1.5,
1.0
)
factors.reasoning = all_reasons
return factors
def get_upset_engine_v2():
"""Singleton pattern"""
return UpsetEngineV2()
if __name__ == "__main__":
# Test
engine = get_upset_engine_v2()
# Real Madrid vs Getafe test
result = engine.calculate_upset_potential(
home_team_name="Real Madrid",
home_team_id="test",
away_team_name="Getafe",
league_name="LaLiga",
odds_data={"ms_h": 1.25, "ms_d": 3.92, "ms_a": 6.86},
referee_name="A. Muniz Ruiz",
home_form_score=80.0,
away_form_score=56.7,
favorite_side="home",
favorite_odds=1.25
)
print(f"\n{'='*60}")
print(f"Real Madrid vs Getafe - Sürpriz Analizi")
print(f"{'='*60}")
print(f"Sürpriz Skoru: {result.upset_score}/100")
print(f"Seviye: {result.upset_level}")
print(f"\nNedenler:")
for reason in result.reasoning:
print(f" {reason}")
+249
View File
@@ -0,0 +1,249 @@
"""
Value Betting Calculator
Expected Value (EV) ve stake önerileri hesaplar.
"""
from typing import Dict, Optional
from dataclasses import dataclass
@dataclass
class ValueBet:
"""Value bet analiz sonucu"""
bet_type: str # MS_1, AU25_Üst, KG_Var
my_probability: float # Bizim tahminimiz
market_odds: float # Bahis oranı
implied_probability: float # Oranın ima ettiği olasılık
edge: float # Fark (benim tahmin - implied)
expected_value: float # EV = (prob × odds) - 1
is_value: bool # EV > threshold mı?
kelly_fraction: float # Kelly stake oranı
confidence_tier: str # "banker", "strong", "value", "skip"
def to_dict(self) -> Dict:
return {
'bet_type': self.bet_type,
'my_probability': round(self.my_probability, 4),
'market_odds': self.market_odds,
'implied_probability': round(self.implied_probability, 4),
'edge': round(self.edge, 4),
'expected_value': round(self.expected_value, 4),
'is_value': self.is_value,
'kelly_fraction': round(self.kelly_fraction, 4),
'confidence_tier': self.confidence_tier,
}
class ValueCalculator:
"""
Value Betting Calculator
Tahminleri oranlarla karşılaştırarak EV hesaplar.
"""
# Eşikler
MIN_EDGE_FOR_VALUE = 0.05 # Minimum %5 edge
MIN_EDGE_FOR_STRONG = 0.10 # %10+ edge = strong value
MIN_EDGE_FOR_BANKER = 0.15 # %15+ edge = banker
KELLY_FRACTION = 0.25 # 1/4 Kelly (güvenli)
MAX_STAKE_PERCENT = 0.10 # Maksimum bank'ın %10'u
def __init__(self):
pass
def calculate_implied_probability(self, odds: float) -> float:
"""Bahis oranından implied probability hesapla"""
if odds <= 1:
return 1.0
return 1 / odds
def calculate_ev(self, probability: float, odds: float) -> float:
"""
Expected Value hesapla.
EV = (Probability × Odds) - 1
Pozitif EV = uzun vadede kar
Negatif EV = uzun vadede zarar
"""
return (probability * odds) - 1
def calculate_kelly_stake(self, probability: float, odds: float) -> float:
"""
Kelly Criterion stake hesapla.
Kelly = (p × b - q) / b
Burada:
- p = kazanma olasılığı
- q = kaybetme olasılığı (1 - p)
- b = odds - 1 (net kar)
"""
if odds <= 1:
return 0
b = odds - 1
p = probability
q = 1 - p
kelly = (p * b - q) / b
# Negatif veya çok yüksek değerleri sınırla
kelly = max(0, min(kelly, self.MAX_STAKE_PERCENT))
# Fractional Kelly (daha güvenli)
return kelly * self.KELLY_FRACTION
def analyze_bet(self, bet_type: str, my_probability: float,
market_odds: float) -> ValueBet:
"""
Tek bir bahis için value analizi yap.
Args:
bet_type: Bahis türü (MS_1, AU25_Üst, KG_Var vb.)
my_probability: Bizim tahminimiz (0-1 arası)
market_odds: Bahis oranı
Returns:
ValueBet: Analiz sonucu
"""
if market_odds <= 1:
return ValueBet(
bet_type=bet_type,
my_probability=my_probability,
market_odds=market_odds,
implied_probability=1.0,
edge=0,
expected_value=-1,
is_value=False,
kelly_fraction=0,
confidence_tier="skip"
)
implied = self.calculate_implied_probability(market_odds)
edge = my_probability - implied
ev = self.calculate_ev(my_probability, market_odds)
kelly = self.calculate_kelly_stake(my_probability, market_odds)
# Tier belirleme
if edge >= self.MIN_EDGE_FOR_BANKER and my_probability >= 0.70:
tier = "banker"
elif edge >= self.MIN_EDGE_FOR_STRONG:
tier = "strong"
elif edge >= self.MIN_EDGE_FOR_VALUE:
tier = "value"
else:
tier = "skip"
return ValueBet(
bet_type=bet_type,
my_probability=my_probability,
market_odds=market_odds,
implied_probability=implied,
edge=edge,
expected_value=ev,
is_value=edge >= self.MIN_EDGE_FOR_VALUE,
kelly_fraction=kelly,
confidence_tier=tier
)
def analyze_match_predictions(self, predictions: Dict[str, float],
odds: Dict[str, float]) -> Dict[str, ValueBet]:
"""
Maç için tüm tahminleri analiz et.
Args:
predictions: Tahminler {'MS_1': 0.55, 'MS_X': 0.25, ...}
odds: Oranlar {'MS_1': 1.80, 'MS_X': 3.50, ...}
Returns:
Dict[str, ValueBet]: Her bahis için value analizi
"""
results = {}
for bet_type, probability in predictions.items():
if bet_type in odds and odds[bet_type] > 1:
results[bet_type] = self.analyze_bet(
bet_type=bet_type,
my_probability=probability,
market_odds=odds[bet_type]
)
return results
def get_best_value_bets(self, value_bets: Dict[str, ValueBet],
top_n: int = 3) -> list:
"""En iyi value bet'leri döndür"""
valid_bets = [vb for vb in value_bets.values() if vb.is_value]
sorted_bets = sorted(valid_bets, key=lambda x: x.expected_value, reverse=True)
return sorted_bets[:top_n]
def calculate_stake(self, value_bet: ValueBet, bankroll: float,
use_kelly: bool = True) -> float:
"""
Önerilen stake miktarını hesapla.
Args:
value_bet: Value bet analizi
bankroll: Toplam bütçe
use_kelly: Kelly criterion kullan mı?
Returns:
float: Önerilen stake miktarı
"""
if not value_bet.is_value:
return 0
if use_kelly:
return bankroll * value_bet.kelly_fraction
else:
# Tier bazlı sabit stake
tier_stakes = {
"banker": 0.05,
"strong": 0.03,
"value": 0.02,
"skip": 0
}
return bankroll * tier_stakes.get(value_bet.confidence_tier, 0)
# Singleton
_calculator = None
def get_value_calculator() -> ValueCalculator:
global _calculator
if _calculator is None:
_calculator = ValueCalculator()
return _calculator
if __name__ == "__main__":
calc = get_value_calculator()
print("\n🧪 Value Calculator Test")
print("=" * 50)
# Test senaryoları
test_cases = [
{"bet": "MS_1", "prob": 0.70, "odds": 1.60}, # High prob, low odds
{"bet": "MS_1", "prob": 0.55, "odds": 1.90}, # Medium prob, good odds
{"bet": "MS_1", "prob": 0.60, "odds": 2.10}, # VALUE!
{"bet": "AU25_Üst", "prob": 0.65, "odds": 1.85}, # VALUE!
{"bet": "KG_Var", "prob": 0.50, "odds": 1.70}, # No value
]
for tc in test_cases:
result = calc.analyze_bet(tc["bet"], tc["prob"], tc["odds"])
status_emoji = "" if result.is_value else ""
tier_emoji = {"banker": "🎯", "strong": "💪", "value": "", "skip": "⏭️"}
print(f"\n{status_emoji} {tc['bet']}")
print(f" Tahmin: {tc['prob']:.0%} | Oran: {tc['odds']:.2f} | Implied: {result.implied_probability:.0%}")
print(f" Edge: {result.edge:+.1%} | EV: {result.expected_value:+.1%}")
print(f" Tier: {tier_emoji.get(result.confidence_tier, '')} {result.confidence_tier.upper()}")
print(f" Kelly Stake: {result.kelly_fraction:.2%} of bankroll")
if result.is_value:
stake = calc.calculate_stake(result, 1000)
print(f" 💰 Önerilen Stake (1000 TL bank): {stake:.2f} TL")
@@ -0,0 +1,415 @@
"""
Value Detection Engine
======================
The Smart Way to Beat the Bookmakers
This engine doesn't just predict winners - it finds VALUE.
The key insight: We don't need to predict the winner, we need to find
where the bookmaker made a mistake in their odds.
Core Philosophy:
- High Margin = High Uncertainty = Potential Value
- Model Probability > Implied Probability = Value Bet
- The goal is NOT to predict correctly, but to find +EV bets
Author: AI Engine V21
"""
import math
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
from collections import defaultdict
@dataclass
class ValueBet:
"""Represents a value bet opportunity"""
outcome: str # "1", "X", "2"
model_probability: float # Our model's probability (0-1)
implied_probability: float # Bookmaker's implied probability (0-1)
odds: float # Bookmaker's odds
edge: float # model_prob - implied_prob (as percentage)
expected_value: float # EV = (prob * odds) - 1
kelly_fraction: float # Optimal bet size
confidence: str # "HIGH", "MEDIUM", "LOW"
reasons: List[str] # Why this is value
def to_dict(self) -> dict:
return {
"outcome": self.outcome,
"model_prob": round(self.model_probability * 100, 1),
"implied_prob": round(self.implied_probability * 100, 1),
"odds": self.odds,
"edge": round(self.edge * 100, 1),
"ev": round(self.expected_value * 100, 1),
"kelly": round(self.kelly_fraction * 100, 1),
"confidence": self.confidence,
"reasons": self.reasons
}
@dataclass
class MarginAnalysis:
"""Analysis of bookmaker margin"""
raw_margin: float # Sum of raw implied probabilities - 1
true_margin: float # Adjusted for favorite-longshot bias
favorite_outcome: str
favorite_odds: float
uncertainty_level: str # "LOW", "MEDIUM", "HIGH", "EXTREME"
def to_dict(self) -> dict:
return {
"raw_margin": round(self.raw_margin * 100, 1),
"true_margin": round(self.true_margin * 100, 1),
"favorite": self.favorite_outcome,
"favorite_odds": self.favorite_odds,
"uncertainty": self.uncertainty_level
}
class ValueDetectionEngine:
"""
The Smart Betting Engine
This engine finds value bets by comparing model probabilities
with bookmaker implied probabilities.
Key Insights:
1. Margin > 18% → Bookmaker is unsure, potential value on underdog
2. Margin > 20% → Bookmaker sees high risk, BIG potential value
3. Favorite odds 1.40-1.60 → Highest upset rate historically
4. Away favorites have higher upset rate than home favorites
"""
# Historical upset rates by favorite odds range
UPSET_RATES = {
(1.00, 1.25): 0.08, # 8% upset rate
(1.25, 1.40): 0.18, # 18% upset rate
(1.40, 1.60): 0.33, # 33% upset rate - DANGER ZONE
(1.60, 1.80): 0.28, # 28% upset rate
(1.80, 2.00): 0.35, # 35% upset rate
(2.00, 2.50): 0.42, # 42% upset rate
(2.50, 3.00): 0.45, # 45% upset rate
(3.00, 5.00): 0.55, # 55% upset rate
}
# Margin thresholds
MARGIN_LOW = 0.06 # 6% - bookmaker very confident
MARGIN_MEDIUM = 0.12 # 12% - normal margin
MARGIN_HIGH = 0.18 # 18% - bookmaker unsure
MARGIN_EXTREME = 0.22 # 22% - bookmaker very unsure
def __init__(self):
self.historical_data = [] # For learning
self.value_threshold = 0.03 # Minimum 3% edge to consider value
def calculate_margin(self, odds_1: float, odds_x: float, odds_2: float) -> MarginAnalysis:
"""
Calculate bookmaker margin and analyze uncertainty.
Higher margin = More uncertainty = More potential value
"""
if not all([odds_1 > 1, odds_x > 1, odds_2 > 1]):
return MarginAnalysis(0, 0, "X", 0, "UNKNOWN")
# Raw implied probabilities
imp_1 = 1 / odds_1
imp_x = 1 / odds_x
imp_2 = 1 / odds_2
raw_margin = imp_1 + imp_x + imp_2 - 1
# Determine favorite
if odds_1 <= odds_x and odds_1 <= odds_2:
favorite_outcome = "1"
favorite_odds = odds_1
elif odds_2 <= odds_1 and odds_2 <= odds_x:
favorite_outcome = "2"
favorite_odds = odds_2
else:
favorite_outcome = "X"
favorite_odds = odds_x
# Adjust for favorite-longshot bias
# Bookmakers typically overprice longshots
true_margin = raw_margin * 0.85 # Simplified adjustment
# Determine uncertainty level
if raw_margin < self.MARGIN_LOW:
uncertainty = "LOW"
elif raw_margin < self.MARGIN_MEDIUM:
uncertainty = "MEDIUM"
elif raw_margin < self.MARGIN_HIGH:
uncertainty = "HIGH"
else:
uncertainty = "EXTREME"
return MarginAnalysis(
raw_margin=raw_margin,
true_margin=true_margin,
favorite_outcome=favorite_outcome,
favorite_odds=favorite_odds,
uncertainty_level=uncertainty
)
def get_historical_upset_rate(self, favorite_odds: float) -> float:
"""Get historical upset rate for given favorite odds"""
for (low, high), rate in self.UPSET_RATES.items():
if low <= favorite_odds < high:
return rate
return 0.40 # Default for very high odds
def calculate_edge(
self,
model_prob: float,
odds: float,
margin: float
) -> Tuple[float, float]:
"""
Calculate the edge (advantage) we have over the bookmaker.
Returns: (edge, expected_value)
Edge = Model Probability - True Implied Probability
EV = (Probability * Odds) - 1
"""
if odds <= 1:
return 0, -1
# Raw implied probability
implied = 1 / odds
# Adjust for margin (proportional adjustment)
# This gives us the "true" implied probability
# Assuming bookmaker spreads margin proportionally
true_implied = implied # Simplified - could be more sophisticated
edge = model_prob - true_implied
ev = (model_prob * odds) - 1
return edge, ev
def calculate_kelly_fraction(
self,
probability: float,
odds: float,
half_kelly: bool = True
) -> float:
"""
Calculate optimal bet size using Kelly Criterion.
Kelly = (p * b - 1) / (b - 1)
where b = odds - 1
We use half Kelly for safety.
"""
if odds <= 1:
return 0
b = odds - 1
kelly = (probability * b - 1) / b
# Don't bet if negative
if kelly < 0:
return 0
# Use half Kelly for safety
if half_kelly:
kelly = kelly / 2
# Cap at 10% of bankroll
return min(kelly, 0.10)
def find_value_bets(
self,
model_probs: Dict[str, float],
odds: Dict[str, float],
match_context: Optional[Dict] = None
) -> List[ValueBet]:
"""
Find all value bets in a match.
This is the MAIN method - it finds where we have an edge.
Args:
model_probs: {"1": 0.55, "X": 0.25, "2": 0.20}
odds: {"1": 1.25, "X": 4.50, "2": 8.00}
match_context: Additional context (form, h2h, etc.)
Returns:
List of ValueBet objects, sorted by edge
"""
value_bets = []
# Calculate margin
margin_analysis = self.calculate_margin(
odds.get("1", 0),
odds.get("X", 0),
odds.get("2", 0)
)
# Analyze each outcome
for outcome in ["1", "X", "2"]:
prob = model_probs.get(outcome, 0)
odd = odds.get(outcome, 0)
if prob <= 0 or odd <= 1:
continue
edge, ev = self.calculate_edge(prob, odd, margin_analysis.raw_margin)
kelly = self.calculate_kelly_fraction(prob, odd)
# Determine if this is a value bet
reasons = []
# 1. Basic edge
if edge > self.value_threshold:
reasons.append(f"Edge: +{round(edge*100, 1)}% over bookmaker")
# 2. High margin bonus
if margin_analysis.raw_margin > self.MARGIN_HIGH:
reasons.append(f"High margin ({round(margin_analysis.raw_margin*100, 1)}%) = uncertainty")
# Boost edge for underdogs in high margin matches
if outcome != margin_analysis.favorite_outcome:
edge += 0.02 # 2% bonus
reasons.append("Underdog in high-margin match = bonus value")
# 3. Favorite odds trap
fav_odds = margin_analysis.favorite_odds
if margin_analysis.favorite_outcome != outcome:
upset_rate = self.get_historical_upset_rate(fav_odds)
if upset_rate > 0.25:
reasons.append(f"Favorite odds {fav_odds} has {round(upset_rate*100)}% upset rate")
# Extra bonus for 1.40-1.60 range
if 1.40 <= fav_odds <= 1.60:
edge += 0.03
reasons.append("DANGER ZONE: 1.40-1.60 odds = highest upset risk")
# 4. Away favorite risk
if margin_analysis.favorite_outcome == "2" and outcome == "1":
edge += 0.015
reasons.append("Away favorite = extra home value")
# 5. EV positive
if ev > 0:
reasons.append(f"Positive EV: +{round(ev*100, 1)}%")
# Only add if we have reasons (value detected)
if reasons and edge > 0:
# Determine confidence
if edge > 0.08 or (edge > 0.05 and kelly > 0.03):
confidence = "HIGH"
elif edge > 0.05:
confidence = "MEDIUM"
else:
confidence = "LOW"
value_bets.append(ValueBet(
outcome=outcome,
model_probability=prob,
implied_probability=1/odd,
odds=odd,
edge=edge,
expected_value=ev,
kelly_fraction=kelly,
confidence=confidence,
reasons=reasons
))
# Sort by edge (highest first)
value_bets.sort(key=lambda x: x.edge, reverse=True)
return value_bets
def predict_with_value(
self,
model_probs: Dict[str, float],
odds: Dict[str, float],
match_context: Optional[Dict] = None
) -> Dict:
"""
Make a prediction based on VALUE, not just probability.
This is the smart way to bet:
- If there's clear value on one outcome → Bet it
- If there's no value → NO BET (don't force it)
- If margin is extreme → Look for underdog value
Returns:
{
"best_value": ValueBet or None,
"alternative_value": ValueBet or None,
"margin_analysis": MarginAnalysis,
"recommendation": str,
"confidence": str
}
"""
margin_analysis = self.calculate_margin(
odds.get("1", 0),
odds.get("X", 0),
odds.get("2", 0)
)
value_bets = self.find_value_bets(model_probs, odds, match_context)
result = {
"margin_analysis": margin_analysis.to_dict(),
"value_bets": [vb.to_dict() for vb in value_bets],
"best_value": None,
"alternative_value": None,
"recommendation": "NO_BET",
"confidence": "LOW",
"reasoning": []
}
if not value_bets:
result["reasoning"].append("No value detected in any outcome")
result["reasoning"].append("Bookmaker odds are efficient for this match")
return result
# Get best value bet
best = value_bets[0]
result["best_value"] = best.to_dict()
if len(value_bets) > 1:
result["alternative_value"] = value_bets[1].to_dict()
# Determine recommendation
if best.confidence == "HIGH" and best.edge > 0.05:
result["recommendation"] = f"BET_{best.outcome}"
result["confidence"] = "HIGH"
result["reasoning"] = best.reasons
result["reasoning"].append(f"Strong value on {best.outcome} with {round(best.edge*100, 1)}% edge")
elif best.confidence == "MEDIUM" or best.edge > 0.03:
result["recommendation"] = f"CONSIDER_{best.outcome}"
result["confidence"] = "MEDIUM"
result["reasoning"] = best.reasons
result["reasoning"].append(f"Moderate value on {best.outcome}")
else:
result["recommendation"] = "NO_BET"
result["confidence"] = "LOW"
result["reasoning"].append("Edge too small to justify bet")
result["reasoning"].append(f"Best edge: {round(best.edge*100, 1)}% (need >3%)")
# Add margin context
if margin_analysis.uncertainty_level == "EXTREME":
result["reasoning"].append("⚠️ EXTREME margin - high volatility match")
elif margin_analysis.uncertainty_level == "HIGH":
result["reasoning"].append("⚠️ High margin - bookmaker sees risk")
return result
# Singleton instance
_engine_instance = None
def get_value_detection_engine() -> ValueDetectionEngine:
"""Get the singleton instance"""
global _engine_instance
if _engine_instance is None:
_engine_instance = ValueDetectionEngine()
return _engine_instance
+167
View File
@@ -0,0 +1,167 @@
"""
Shared VQWEN feature contract
=============================
One place defines how VQWEN features are produced.
Both training and runtime inference must use this module so the model sees
the same feature semantics in historical data and live analysis.
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
FEATURE_COLUMNS = [
"elo_diff",
"h_xg",
"a_xg",
"total_xg",
"pow_diff",
"rest_diff",
"h_fat",
"a_fat",
"imp_h",
"imp_d",
"imp_a",
"h_xi",
"a_xi",
"h2h_h_wr",
"form_diff",
]
@dataclass(slots=True)
class VqwenFeatureInput:
home_elo: float
away_elo: float
home_avg_goals_scored: float
away_avg_goals_scored: float
home_avg_goals_conceded: float
away_avg_goals_conceded: float
home_avg_shots_on_target: float
away_avg_shots_on_target: float
home_avg_possession: float
away_avg_possession: float
home_rest_days: float
away_rest_days: float
implied_prob_home: float
implied_prob_draw: float
implied_prob_away: float
home_lineup_availability: float = 1.0
away_lineup_availability: float = 1.0
h2h_home_win_rate: float = 0.5
home_form_score: float = 0.0
away_form_score: float = 0.0
league_avg_goals: float = 2.6
referee_avg_goals: float = 2.6
referee_home_bias: float = 0.0
home_squad_strength: float = 0.5
away_squad_strength: float = 0.5
home_key_players: float = 0.0
away_key_players: float = 0.0
missing_players_impact: float = 0.0
def fatigue_multiplier(rest_days: float) -> float:
if rest_days < 3.0:
return 0.85
if rest_days < 5.0:
return 0.95
return 1.0
def clamp(value: float, lower: float, upper: float) -> float:
return min(max(float(value), lower), upper)
def build_vqwen_feature_row(values: VqwenFeatureInput) -> dict[str, float]:
home_fatigue = fatigue_multiplier(values.home_rest_days)
away_fatigue = fatigue_multiplier(values.away_rest_days)
goal_environment = (
float(values.league_avg_goals) + float(values.referee_avg_goals)
) / 2.0
goal_environment_multiplier = clamp(goal_environment / 2.6, 0.85, 1.2)
squad_diff = float(values.home_squad_strength) - float(values.away_squad_strength)
key_player_diff = float(values.home_key_players) - float(values.away_key_players)
missing_penalty = clamp(float(values.missing_players_impact), 0.0, 1.0)
referee_bias = clamp(float(values.referee_home_bias), -0.25, 0.25)
home_squad_multiplier = clamp(
1.0 + squad_diff * 0.08 + key_player_diff * 0.025 - missing_penalty * 0.08 + referee_bias * 0.03,
0.82,
1.18,
)
away_squad_multiplier = clamp(
1.0 - squad_diff * 0.08 - key_player_diff * 0.025 - missing_penalty * 0.08 - referee_bias * 0.03,
0.82,
1.18,
)
home_xg = max(
0.05,
(
float(values.home_avg_goals_scored)
+ float(values.away_avg_goals_conceded)
)
/ 2.0,
) * home_fatigue * goal_environment_multiplier * home_squad_multiplier
away_xg = max(
0.05,
(
float(values.away_avg_goals_scored)
+ float(values.home_avg_goals_conceded)
)
/ 2.0,
) * away_fatigue * goal_environment_multiplier * away_squad_multiplier
home_power = (
float(values.home_avg_goals_scored) * 5.0
- float(values.home_avg_goals_conceded) * 5.0
+ float(values.home_avg_shots_on_target) * 2.0
+ float(values.home_avg_possession) * 0.1
+ float(values.home_squad_strength) * 3.0
+ float(values.home_key_players) * 0.8
+ referee_bias * 6.0
)
away_power = (
float(values.away_avg_goals_scored) * 5.0
- float(values.away_avg_goals_conceded) * 5.0
+ float(values.away_avg_shots_on_target) * 2.0
+ float(values.away_avg_possession) * 0.1
+ float(values.away_squad_strength) * 3.0
+ float(values.away_key_players) * 0.8
- referee_bias * 6.0
)
return {
"elo_diff": float(values.home_elo) - float(values.away_elo),
"h_xg": home_xg,
"a_xg": away_xg,
"total_xg": home_xg + away_xg,
"pow_diff": home_power - away_power,
"rest_diff": float(values.home_rest_days) - float(values.away_rest_days),
"h_fat": home_fatigue,
"a_fat": away_fatigue,
"imp_h": clamp(values.implied_prob_home, 0.01, 0.98),
"imp_d": clamp(values.implied_prob_draw, 0.01, 0.98),
"imp_a": clamp(values.implied_prob_away, 0.01, 0.98),
# Column names are preserved for artifact compatibility.
# Semantics are now "pre-match lineup availability" instead of leaked
# post-match starting-XI counts.
"h_xi": clamp(values.home_lineup_availability, 0.0, 1.0),
"a_xi": clamp(values.away_lineup_availability, 0.0, 1.0),
"h2h_h_wr": clamp(values.h2h_home_win_rate, 0.0, 1.0),
"form_diff": (
float(values.home_form_score)
- float(values.away_form_score)
+ squad_diff * 1.5
+ key_player_diff * 0.35
+ referee_bias * 2.0
- missing_penalty * 1.75
),
}
def row_to_array(row: dict[str, float]) -> np.ndarray:
return np.array([[float(row[column]) for column in FEATURE_COLUMNS]], dtype=np.float64)