This commit is contained in:
Executable
+29
@@ -0,0 +1,29 @@
|
||||
"""
|
||||
AI Engine V9 Feature Modules
|
||||
Includes V8 features + new V9 engines (Upset, Momentum, Poisson, Context, Referee, Squad)
|
||||
"""
|
||||
|
||||
# V20 Features
|
||||
from .h2h_engine import H2HFeatureEngine, get_h2h_engine
|
||||
from .elo_system import ELORatingSystem, get_elo_system
|
||||
from .value_calculator import ValueCalculator, get_value_calculator
|
||||
from .team_stats_engine import get_team_stats_engine
|
||||
from .upset_engine import UpsetEngine, get_upset_engine
|
||||
from .momentum_engine import MomentumEngine, get_momentum_engine
|
||||
from .poisson_engine import PoissonEngine, get_poisson_engine
|
||||
from .referee_engine import RefereeEngine, get_referee_engine
|
||||
from .squad_analysis_engine import SquadAnalysisEngine, get_squad_analysis_engine
|
||||
|
||||
__all__ = [
|
||||
'H2HFeatureEngine', 'get_h2h_engine',
|
||||
'ELORatingSystem', 'get_elo_system',
|
||||
'ValueCalculator', 'get_value_calculator',
|
||||
'get_team_stats_engine',
|
||||
'UpsetEngine', 'get_upset_engine',
|
||||
'MomentumEngine', 'get_momentum_engine',
|
||||
'PoissonEngine', 'get_poisson_engine',
|
||||
'RefereeEngine', 'get_referee_engine',
|
||||
'SquadAnalysisEngine', 'get_squad_analysis_engine',
|
||||
]
|
||||
|
||||
|
||||
Executable
+655
@@ -0,0 +1,655 @@
|
||||
"""
|
||||
ELO Rating System V2 - Venue-Adjusted & League-Weighted
|
||||
V9 Model için geliştirilmiş ELO sistemi.
|
||||
|
||||
V1'den Farklar:
|
||||
- Lig kalitesi faktörü (Premier League vs küçük lig)
|
||||
- Form decay (son maçlar daha etkili)
|
||||
- Venue-adjusted ELO (ev/deplasman ayrı)
|
||||
- Win probability hesaplama
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from typing import Dict, Optional, Tuple
|
||||
from dataclasses import dataclass, asdict, field
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
except ImportError:
|
||||
psycopg2 = None
|
||||
|
||||
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'models')
|
||||
|
||||
|
||||
@dataclass
|
||||
class TeamELO:
|
||||
"""Takım ELO profili - Geliştirilmiş"""
|
||||
team_id: str
|
||||
team_name: str = ""
|
||||
|
||||
# Ana ELO'lar
|
||||
overall_elo: float = 1500.0
|
||||
home_elo: float = 1500.0
|
||||
away_elo: float = 1500.0
|
||||
|
||||
# Form ELO (son 5 maça göre)
|
||||
form_elo: float = 1500.0
|
||||
|
||||
# Meta
|
||||
matches_played: int = 0
|
||||
home_matches: int = 0
|
||||
away_matches: int = 0
|
||||
wins: int = 0
|
||||
draws: int = 0
|
||||
losses: int = 0
|
||||
last_updated: Optional[str] = None
|
||||
|
||||
# Son 5 maç formu (W/D/L sequence)
|
||||
recent_form: str = ""
|
||||
|
||||
def win_rate(self) -> float:
|
||||
if self.matches_played == 0:
|
||||
return 0.0
|
||||
return self.wins / self.matches_played
|
||||
|
||||
def to_features(self) -> Dict[str, float]:
|
||||
return {
|
||||
'elo_overall': self.overall_elo,
|
||||
'elo_home': self.home_elo,
|
||||
'elo_away': self.away_elo,
|
||||
'elo_form': self.form_elo,
|
||||
'elo_matches': self.matches_played,
|
||||
'elo_win_rate': self.win_rate(),
|
||||
}
|
||||
|
||||
|
||||
# Lig kalitesi faktörleri (1.0 = ortalama)
|
||||
LEAGUE_QUALITY = {
|
||||
# Top 5 Avrupa Ligleri
|
||||
"premier league": 1.15,
|
||||
"premier lig": 1.15,
|
||||
"la liga": 1.12,
|
||||
"bundesliga": 1.10,
|
||||
"serie a": 1.08,
|
||||
"ligue 1": 1.05,
|
||||
|
||||
# Güçlü ligler
|
||||
"eredivisie": 1.02,
|
||||
"primeira liga": 1.02,
|
||||
"süper lig": 1.00,
|
||||
|
||||
# Avrupa kupaları
|
||||
"champions league": 1.20,
|
||||
"şampiyonlar ligi": 1.20,
|
||||
"europa league": 1.10,
|
||||
"avrupa ligi": 1.10,
|
||||
"conference league": 1.00,
|
||||
|
||||
# Orta ligler
|
||||
"championship": 0.95,
|
||||
"2. bundesliga": 0.92,
|
||||
"serie b": 0.90,
|
||||
"la liga 2": 0.90,
|
||||
|
||||
# Küçük ligler
|
||||
"default": 0.85,
|
||||
}
|
||||
|
||||
|
||||
class ELORatingSystem:
|
||||
"""
|
||||
ELO Rating System V2 - Venue-Adjusted & League-Weighted
|
||||
|
||||
Yenilikler:
|
||||
- Ev/Deplasman ayrı ELO takibi
|
||||
- Lig kalitesi faktörü
|
||||
- Form ELO (son 5 maç ağırlıklı)
|
||||
- Gol farkına göre K-faktör ayarı
|
||||
"""
|
||||
|
||||
# ELO parametreleri
|
||||
K_FACTOR_BASE = 32 # Temel K faktörü
|
||||
K_FACTOR_NEW_TEAM = 48 # Yeni takımlar için daha yüksek (ilk 20 maç)
|
||||
HOME_ADVANTAGE = 65 # Ev sahibi avantajı (ELO cinsinden)
|
||||
INITIAL_ELO = 1500
|
||||
FORM_WEIGHT = 0.7 # Form ELO için son maç ağırlığı
|
||||
|
||||
def __init__(self):
|
||||
self.ratings: Dict[str, TeamELO] = {}
|
||||
self.league_cache: Dict[str, str] = {} # team_id -> league_name
|
||||
self.conn = None
|
||||
self._load_ratings()
|
||||
|
||||
def _connect_db(self):
|
||||
if psycopg2 is None:
|
||||
return None
|
||||
try:
|
||||
from data.db import get_clean_dsn
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
return self.conn
|
||||
except Exception as e:
|
||||
print(f"[ELO] DB connection failed: {e}")
|
||||
return None
|
||||
|
||||
def get_conn(self):
|
||||
if self.conn is None or self.conn.closed:
|
||||
self._connect_db()
|
||||
return self.conn
|
||||
|
||||
def _load_ratings(self):
|
||||
"""Rating'leri yükle — önce DB, sonra JSON fallback"""
|
||||
if self._load_ratings_from_db():
|
||||
return
|
||||
self._load_ratings_from_json()
|
||||
|
||||
def _load_ratings_from_db(self) -> bool:
|
||||
"""team_elo_ratings tablosundan rating'leri yükle"""
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
return False
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT ter.team_id, t.name,
|
||||
ter.overall_elo, ter.home_elo, ter.away_elo,
|
||||
ter.form_elo, ter.matches_played, ter.recent_form
|
||||
FROM team_elo_ratings ter
|
||||
LEFT JOIN teams t ON ter.team_id = t.id
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
if not rows:
|
||||
return False
|
||||
for row in rows:
|
||||
tid, name, overall, home, away, form, played, recent = row
|
||||
self.ratings[str(tid)] = TeamELO(
|
||||
team_id=str(tid),
|
||||
team_name=name or "",
|
||||
overall_elo=float(overall),
|
||||
home_elo=float(home),
|
||||
away_elo=float(away),
|
||||
form_elo=float(form),
|
||||
matches_played=int(played),
|
||||
recent_form=recent or [],
|
||||
)
|
||||
print(f"[OK] ELO V2 ratings DB'den yuklendi ({len(self.ratings)} takim)")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"[WARN] ELO DB yuklenemedi, JSON'a dusuyuyor: {e}")
|
||||
return False
|
||||
|
||||
def _load_ratings_from_json(self):
|
||||
"""JSON dosyasından rating'leri yükle (fallback)"""
|
||||
ratings_path = os.path.join(MODELS_DIR, 'elo_ratings_v2.json')
|
||||
if os.path.exists(ratings_path):
|
||||
try:
|
||||
with open(ratings_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
for team_id, rating_data in data.items():
|
||||
self.ratings[team_id] = TeamELO(**rating_data)
|
||||
print(f"[OK] ELO V2 ratings JSON'dan yuklendi ({len(self.ratings)} takim)")
|
||||
except Exception as e:
|
||||
print(f"[WARN] ELO V2 ratings yuklenemedi: {e}")
|
||||
|
||||
def save_ratings(self):
|
||||
"""Rating'leri kaydet"""
|
||||
ratings_path = os.path.join(MODELS_DIR, 'elo_ratings_v2.json')
|
||||
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||
|
||||
data = {team_id: asdict(elo) for team_id, elo in self.ratings.items()}
|
||||
with open(ratings_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
print(f"💾 ELO V2 ratings kaydedildi ({len(self.ratings)} takım)")
|
||||
|
||||
def get_or_create_rating(self, team_id: str, team_name: str = "") -> TeamELO:
|
||||
"""Takımın ELO'sunu getir veya oluştur"""
|
||||
if team_id not in self.ratings:
|
||||
self.ratings[team_id] = TeamELO(team_id=team_id, team_name=team_name)
|
||||
return self.ratings[team_id]
|
||||
|
||||
def get_league_quality(self, league_name: str) -> float:
|
||||
"""Lig kalitesi faktörünü döndür"""
|
||||
if not league_name:
|
||||
return LEAGUE_QUALITY["default"]
|
||||
|
||||
league_lower = league_name.lower()
|
||||
for key, quality in LEAGUE_QUALITY.items():
|
||||
if key in league_lower:
|
||||
return quality
|
||||
return LEAGUE_QUALITY["default"]
|
||||
|
||||
def expected_score(self, rating_a: float, rating_b: float) -> float:
|
||||
"""
|
||||
A'nın B'ye karşı beklenen skoru (0-1 arası).
|
||||
1 = kesin kazanır, 0.5 = eşit, 0 = kesin kaybeder
|
||||
"""
|
||||
return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
|
||||
|
||||
def get_k_factor(self, team_elo: TeamELO, goal_diff: int,
|
||||
league_quality: float = 1.0) -> float:
|
||||
"""
|
||||
Dinamik K-faktörü hesapla.
|
||||
- Yeni takımlar için yüksek (hızlı adaptasyon)
|
||||
- Gol farkı yüksekse yüksek
|
||||
- Kaliteli liglerde yüksek
|
||||
"""
|
||||
# Temel K
|
||||
if team_elo.matches_played < 20:
|
||||
k = self.K_FACTOR_NEW_TEAM
|
||||
else:
|
||||
k = self.K_FACTOR_BASE
|
||||
|
||||
# Gol farkı çarpanı
|
||||
if goal_diff == 1:
|
||||
goal_mult = 1.0
|
||||
elif goal_diff == 2:
|
||||
goal_mult = 1.25
|
||||
elif goal_diff == 3:
|
||||
goal_mult = 1.5
|
||||
else:
|
||||
goal_mult = 1.75 + (goal_diff - 3) * 0.1
|
||||
|
||||
# Lig kalitesi çarpanı
|
||||
return k * goal_mult * league_quality
|
||||
|
||||
def update_after_match(
|
||||
self,
|
||||
home_id: str,
|
||||
away_id: str,
|
||||
home_goals: int,
|
||||
away_goals: int,
|
||||
home_name: str = "",
|
||||
away_name: str = "",
|
||||
league_name: str = ""
|
||||
):
|
||||
"""Maç sonrası ELO güncelle"""
|
||||
home_elo = self.get_or_create_rating(home_id, home_name)
|
||||
away_elo = self.get_or_create_rating(away_id, away_name)
|
||||
|
||||
# Gerçek skor
|
||||
if home_goals > away_goals:
|
||||
actual_home, actual_away = 1.0, 0.0
|
||||
home_elo.wins += 1
|
||||
away_elo.losses += 1
|
||||
result_home, result_away = 'W', 'L'
|
||||
elif home_goals < away_goals:
|
||||
actual_home, actual_away = 0.0, 1.0
|
||||
home_elo.losses += 1
|
||||
away_elo.wins += 1
|
||||
result_home, result_away = 'L', 'W'
|
||||
else:
|
||||
actual_home, actual_away = 0.5, 0.5
|
||||
home_elo.draws += 1
|
||||
away_elo.draws += 1
|
||||
result_home, result_away = 'D', 'D'
|
||||
|
||||
goal_diff = abs(home_goals - away_goals)
|
||||
league_quality = self.get_league_quality(league_name)
|
||||
|
||||
# K faktörleri
|
||||
k_home = self.get_k_factor(home_elo, goal_diff, league_quality)
|
||||
k_away = self.get_k_factor(away_elo, goal_diff, league_quality)
|
||||
|
||||
# -- Overall ELO --
|
||||
expected_home = self.expected_score(
|
||||
home_elo.overall_elo + self.HOME_ADVANTAGE,
|
||||
away_elo.overall_elo
|
||||
)
|
||||
home_elo.overall_elo += k_home * (actual_home - expected_home)
|
||||
away_elo.overall_elo += k_away * (actual_away - (1 - expected_home))
|
||||
|
||||
# -- Venue-Specific ELO --
|
||||
expected_home_venue = self.expected_score(home_elo.home_elo, away_elo.away_elo)
|
||||
home_elo.home_elo += k_home * (actual_home - expected_home_venue)
|
||||
away_elo.away_elo += k_away * (actual_away - (1 - expected_home_venue))
|
||||
|
||||
# -- Form ELO (son maçlar daha ağırlıklı) --
|
||||
home_elo.form_elo = (
|
||||
home_elo.form_elo * (1 - self.FORM_WEIGHT) +
|
||||
(1500 + (actual_home - 0.5) * 100) * self.FORM_WEIGHT
|
||||
)
|
||||
away_elo.form_elo = (
|
||||
away_elo.form_elo * (1 - self.FORM_WEIGHT) +
|
||||
(1500 + (actual_away - 0.5) * 100) * self.FORM_WEIGHT
|
||||
)
|
||||
|
||||
# Meta güncelle
|
||||
home_elo.matches_played += 1
|
||||
away_elo.matches_played += 1
|
||||
home_elo.home_matches += 1
|
||||
away_elo.away_matches += 1
|
||||
|
||||
# Son 5 form güncelle
|
||||
home_elo.recent_form = (result_home + home_elo.recent_form)[:5]
|
||||
away_elo.recent_form = (result_away + away_elo.recent_form)[:5]
|
||||
|
||||
home_elo.last_updated = datetime.now().isoformat()
|
||||
away_elo.last_updated = datetime.now().isoformat()
|
||||
|
||||
def predict_match(self, home_id: str, away_id: str) -> Dict[str, float]:
|
||||
"""
|
||||
Maç için kazanma olasılıklarını tahmin et.
|
||||
"""
|
||||
home_elo = self.get_or_create_rating(home_id)
|
||||
away_elo = self.get_or_create_rating(away_id)
|
||||
|
||||
# Overall bazlı
|
||||
exp_home_overall = self.expected_score(
|
||||
home_elo.overall_elo + self.HOME_ADVANTAGE,
|
||||
away_elo.overall_elo
|
||||
)
|
||||
|
||||
# Venue bazlı
|
||||
exp_home_venue = self.expected_score(
|
||||
home_elo.home_elo,
|
||||
away_elo.away_elo
|
||||
)
|
||||
|
||||
# Kombine (ortama)
|
||||
home_prob = (exp_home_overall + exp_home_venue) / 2
|
||||
|
||||
# Draw tahmini (ELO farkı küçükse daha yüksek)
|
||||
elo_diff = abs(home_elo.overall_elo - away_elo.overall_elo)
|
||||
draw_base = 0.25 # Temel beraberlik oranı
|
||||
draw_prob = draw_base * (1 - elo_diff / 800) # Fark arttıkça beraberlik azalır
|
||||
draw_prob = max(0.15, min(draw_prob, 0.35))
|
||||
|
||||
# Normalize
|
||||
remaining = 1 - draw_prob
|
||||
home_win = home_prob * remaining
|
||||
away_win = (1 - home_prob) * remaining
|
||||
|
||||
return {
|
||||
"home_win": round(home_win, 3),
|
||||
"draw": round(draw_prob, 3),
|
||||
"away_win": round(away_win, 3),
|
||||
}
|
||||
|
||||
def get_match_features(self, home_id: str, away_id: str) -> Dict[str, float]:
|
||||
"""Model için ELO feature'larını döndür"""
|
||||
home_elo = self.get_or_create_rating(home_id)
|
||||
away_elo = self.get_or_create_rating(away_id)
|
||||
|
||||
probs = self.predict_match(home_id, away_id)
|
||||
|
||||
# Form encode (WWWDL -> sayısal)
|
||||
def form_to_score(form: str) -> float:
|
||||
if not form:
|
||||
return 0.5
|
||||
score = 0
|
||||
for char in form:
|
||||
if char == 'W':
|
||||
score += 1
|
||||
elif char == 'D':
|
||||
score += 0.5
|
||||
return score / max(len(form), 1)
|
||||
|
||||
return {
|
||||
# Overall ELO
|
||||
'elo_home_overall': home_elo.overall_elo,
|
||||
'elo_away_overall': away_elo.overall_elo,
|
||||
'elo_diff_overall': home_elo.overall_elo - away_elo.overall_elo,
|
||||
|
||||
# Venue-Specific ELO
|
||||
'elo_home_venue': home_elo.home_elo,
|
||||
'elo_away_venue': away_elo.away_elo,
|
||||
'elo_diff_venue': home_elo.home_elo - away_elo.away_elo,
|
||||
|
||||
# Form ELO
|
||||
'elo_home_form': home_elo.form_elo,
|
||||
'elo_away_form': away_elo.form_elo,
|
||||
'elo_diff_form': home_elo.form_elo - away_elo.form_elo,
|
||||
|
||||
# Win probabilities
|
||||
'elo_prob_home': probs['home_win'],
|
||||
'elo_prob_draw': probs['draw'],
|
||||
'elo_prob_away': probs['away_win'],
|
||||
|
||||
# Experience
|
||||
'elo_home_matches': min(home_elo.matches_played, 100),
|
||||
'elo_away_matches': min(away_elo.matches_played, 100),
|
||||
|
||||
# Form score
|
||||
'elo_home_form_score': form_to_score(home_elo.recent_form),
|
||||
'elo_away_form_score': form_to_score(away_elo.recent_form),
|
||||
|
||||
# Win rates
|
||||
'elo_home_win_rate': home_elo.win_rate(),
|
||||
'elo_away_win_rate': away_elo.win_rate(),
|
||||
}
|
||||
|
||||
def save_ratings_to_db(self):
|
||||
"""Rating'leri team_elo_ratings tablosuna yaz (upsert)"""
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
print("❌ DB bağlantısı yok, DB'ye yazılamadı!")
|
||||
return
|
||||
|
||||
cur = conn.cursor()
|
||||
batch_size = 500
|
||||
teams = list(self.ratings.values())
|
||||
written = 0
|
||||
|
||||
for i in range(0, len(teams), batch_size):
|
||||
batch = teams[i:i + batch_size]
|
||||
values = []
|
||||
for elo in batch:
|
||||
values.append(cur.mogrify(
|
||||
"(%s, %s, %s, %s, %s, %s, %s, NOW())",
|
||||
(
|
||||
elo.team_id,
|
||||
round(elo.overall_elo, 2),
|
||||
round(elo.home_elo, 2),
|
||||
round(elo.away_elo, 2),
|
||||
round(elo.form_elo, 2),
|
||||
elo.matches_played,
|
||||
elo.recent_form[:5],
|
||||
)
|
||||
).decode('utf-8'))
|
||||
|
||||
sql = """
|
||||
INSERT INTO team_elo_ratings
|
||||
(team_id, overall_elo, home_elo, away_elo, form_elo, matches_played, recent_form, updated_at)
|
||||
VALUES {}
|
||||
ON CONFLICT (team_id) DO UPDATE SET
|
||||
overall_elo = EXCLUDED.overall_elo,
|
||||
home_elo = EXCLUDED.home_elo,
|
||||
away_elo = EXCLUDED.away_elo,
|
||||
form_elo = EXCLUDED.form_elo,
|
||||
matches_played = EXCLUDED.matches_played,
|
||||
recent_form = EXCLUDED.recent_form,
|
||||
updated_at = EXCLUDED.updated_at
|
||||
""".format(", ".join(values))
|
||||
|
||||
cur.execute(sql)
|
||||
written += len(batch)
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
print(f"💾 DB'ye {written} takım ELO yazıldı (team_elo_ratings)")
|
||||
|
||||
def _load_top_league_ids(self) -> set:
|
||||
"""top_leagues.json'dan lig ID'lerini oku"""
|
||||
paths = [
|
||||
os.path.join(os.path.dirname(__file__), '..', '..', 'top_leagues.json'),
|
||||
os.path.join(os.path.dirname(__file__), '..', 'top_leagues.json'),
|
||||
]
|
||||
for p in paths:
|
||||
if os.path.exists(p):
|
||||
with open(p) as f:
|
||||
ids = set(json.load(f))
|
||||
print(f"📋 {len(ids)} top lig yüklendi ({os.path.basename(p)})")
|
||||
return ids
|
||||
print("⚠️ top_leagues.json bulunamadı — tüm maçlar yazılacak")
|
||||
return set()
|
||||
|
||||
def calculate_all_from_history(self, sport: str = 'football'):
|
||||
"""Tüm tarihsel maçlardan ELO hesapla, top ligleri match_ai_features'a yaz"""
|
||||
print(f"\n🔄 {sport.upper()} için ELO V2 hesaplanıyor...")
|
||||
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
print("❌ DB bağlantısı yok!")
|
||||
return
|
||||
|
||||
top_league_ids = self._load_top_league_ids()
|
||||
|
||||
cur = conn.cursor()
|
||||
|
||||
# Tüm bitmiş maçları tarih sırasına göre al (m.id ve league_id dahil)
|
||||
cur.execute("""
|
||||
SELECT m.id, m.home_team_id, m.away_team_id,
|
||||
m.score_home, m.score_away, m.league_id,
|
||||
t1.name as home_name, t2.name as away_name,
|
||||
l.name as league_name
|
||||
FROM matches m
|
||||
LEFT JOIN teams t1 ON m.home_team_id = t1.id
|
||||
LEFT JOIN teams t2 ON m.away_team_id = t2.id
|
||||
LEFT JOIN leagues l ON m.league_id = l.id
|
||||
WHERE m.sport = %s
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
ORDER BY m.mst_utc ASC
|
||||
""", (sport,))
|
||||
|
||||
matches = cur.fetchall()
|
||||
print(f"📊 {len(matches):,} maç işlenecek...")
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
batch: list = []
|
||||
processed = 0
|
||||
written = 0
|
||||
|
||||
for match in matches:
|
||||
(match_id, home_id, away_id, score_h, score_a,
|
||||
league_id, home_name, away_name, league) = match
|
||||
|
||||
if not (home_id and away_id):
|
||||
continue
|
||||
|
||||
# Sadece top ligler için pre-match ELO kaydet
|
||||
if not top_league_ids or league_id in top_league_ids:
|
||||
home_elo_obj = self.get_or_create_rating(home_id, home_name or "")
|
||||
away_elo_obj = self.get_or_create_rating(away_id, away_name or "")
|
||||
batch.append((
|
||||
match_id,
|
||||
home_elo_obj.overall_elo,
|
||||
away_elo_obj.overall_elo,
|
||||
home_elo_obj.home_elo,
|
||||
away_elo_obj.away_elo,
|
||||
home_elo_obj.form_elo,
|
||||
away_elo_obj.form_elo,
|
||||
))
|
||||
|
||||
# Tüm maçlar için ELO güncelle
|
||||
self.update_after_match(
|
||||
home_id, away_id, score_h, score_a,
|
||||
home_name or "", away_name or "", league or ""
|
||||
)
|
||||
processed += 1
|
||||
|
||||
if len(batch) >= BATCH_SIZE:
|
||||
self._flush_elo_batch(cur, batch, sport)
|
||||
conn.commit()
|
||||
written += len(batch)
|
||||
batch.clear()
|
||||
|
||||
if processed % 10000 == 0:
|
||||
print(f" İşlenen: {processed:,} / {len(matches):,}")
|
||||
|
||||
# Kalan batch'i yaz
|
||||
if batch:
|
||||
self._flush_elo_batch(cur, batch, sport)
|
||||
conn.commit()
|
||||
written += len(batch)
|
||||
|
||||
cur.close()
|
||||
print(f"✅ {processed:,} maç işlendi, {len(self.ratings)} takım")
|
||||
print(f"📝 {written:,} maç match_ai_features'a yazıldı")
|
||||
|
||||
# JSON'a kaydet
|
||||
self.save_ratings()
|
||||
|
||||
# DB'ye kaydet
|
||||
self.save_ratings_to_db()
|
||||
|
||||
# Top 20 takımı göster
|
||||
self._show_top_teams()
|
||||
|
||||
@staticmethod
|
||||
def _flush_elo_batch(cur, batch: list, sport: str = 'football') -> None:
|
||||
"""Batch upsert pre-match ELO values into sport-partitioned ai_features table."""
|
||||
from psycopg2.extras import execute_values
|
||||
|
||||
table_name = 'football_ai_features' if sport == 'football' else 'basketball_ai_features'
|
||||
sql = f"""
|
||||
INSERT INTO {table_name}
|
||||
(match_id, home_elo, away_elo,
|
||||
home_home_elo, away_away_elo,
|
||||
home_form_elo, away_form_elo,
|
||||
calculator_ver, updated_at)
|
||||
VALUES %s
|
||||
ON CONFLICT (match_id) DO UPDATE SET
|
||||
home_elo = EXCLUDED.home_elo,
|
||||
away_elo = EXCLUDED.away_elo,
|
||||
home_home_elo = EXCLUDED.home_home_elo,
|
||||
away_away_elo = EXCLUDED.away_away_elo,
|
||||
home_form_elo = EXCLUDED.home_form_elo,
|
||||
away_form_elo = EXCLUDED.away_form_elo,
|
||||
calculator_ver = EXCLUDED.calculator_ver,
|
||||
updated_at = EXCLUDED.updated_at
|
||||
"""
|
||||
now = datetime.now().isoformat()
|
||||
values = [
|
||||
(mid, h_elo, a_elo, hh_elo, aa_elo, hf_elo, af_elo,
|
||||
'elo_v2_backfill', now)
|
||||
for mid, h_elo, a_elo, hh_elo, aa_elo, hf_elo, af_elo in batch
|
||||
]
|
||||
execute_values(cur, sql, values, page_size=500)
|
||||
|
||||
def _show_top_teams(self, n: int = 20):
|
||||
"""En güçlü takımları göster"""
|
||||
sorted_teams = sorted(
|
||||
self.ratings.items(),
|
||||
key=lambda x: x[1].overall_elo,
|
||||
reverse=True
|
||||
)[:n]
|
||||
|
||||
print(f"\n🏆 Top {n} Takım (ELO V2):")
|
||||
for i, (team_id, elo) in enumerate(sorted_teams, 1):
|
||||
name = elo.team_name[:25] if elo.team_name else team_id[:25]
|
||||
print(f" {i:2}. {name:25} → {elo.overall_elo:.0f} (H:{elo.home_elo:.0f} A:{elo.away_elo:.0f})")
|
||||
|
||||
|
||||
# Singleton
|
||||
_system = None
|
||||
|
||||
def get_elo_system() -> ELORatingSystem:
|
||||
global _system
|
||||
if _system is None:
|
||||
_system = ELORatingSystem()
|
||||
return _system
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure ai-engine root is on sys.path (for `from data.db import ...`)
|
||||
_AI_ENGINE_ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(_AI_ENGINE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_AI_ENGINE_ROOT))
|
||||
|
||||
system = get_elo_system()
|
||||
|
||||
if len(sys.argv) > 1 and sys.argv[1] == 'calculate':
|
||||
system.calculate_all_from_history('football')
|
||||
else:
|
||||
print("\n🧪 ELO V2 Test")
|
||||
print("Kullanım: python elo_system.py calculate")
|
||||
print(f"\n📊 Yüklü takım sayısı: {len(system.ratings)}")
|
||||
|
||||
if len(system.ratings) > 0:
|
||||
system._show_top_teams(10)
|
||||
@@ -0,0 +1,990 @@
|
||||
"""
|
||||
Feature Extractor - V2 Betting Engine
|
||||
Pulls historical team stats, ELO, missing-player impact and live odds from
|
||||
PostgreSQL and engineers a leakage-free feature vector for the ensemble model.
|
||||
|
||||
CRITICAL: Only pre-match data (matches before the target match) is used.
|
||||
Post-match stats of the target match are NEVER included.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ROLLING_WINDOW: int = 5
|
||||
H2H_WINDOW: int = 10
|
||||
MAX_REST_DAYS: float = 14.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchFeatures:
|
||||
"""Structured feature vector ready for the ensemble model."""
|
||||
|
||||
match_id: str = ""
|
||||
home_team_id: str = ""
|
||||
away_team_id: str = ""
|
||||
|
||||
# ELO & AI features
|
||||
home_elo: float = 1500.0
|
||||
away_elo: float = 1500.0
|
||||
elo_diff: float = 0.0
|
||||
missing_players_impact: float = 0.0
|
||||
home_form_score: float = 0.0
|
||||
away_form_score: float = 0.0
|
||||
h2h_home_win_rate: float = 0.5
|
||||
h2h_sample_size: int = 0
|
||||
home_rest_days: float = 7.0
|
||||
away_rest_days: float = 7.0
|
||||
rest_diff: float = 0.0
|
||||
home_lineup_availability: float = 1.0
|
||||
away_lineup_availability: float = 1.0
|
||||
|
||||
# Rolling averages - Home (last 5 matches)
|
||||
home_avg_possession: float = 50.0
|
||||
home_avg_shots_on_target: float = 4.0
|
||||
home_avg_total_shots: float = 10.0
|
||||
home_avg_goals_scored: float = 1.3
|
||||
home_avg_goals_conceded: float = 1.1
|
||||
|
||||
# Rolling averages - Away (last 5 matches)
|
||||
away_avg_possession: float = 50.0
|
||||
away_avg_shots_on_target: float = 4.0
|
||||
away_avg_total_shots: float = 10.0
|
||||
away_avg_goals_scored: float = 1.3
|
||||
away_avg_goals_conceded: float = 1.1
|
||||
|
||||
# Implied probabilities from bookmaker odds
|
||||
implied_prob_home: float = 0.33
|
||||
implied_prob_draw: float = 0.33
|
||||
implied_prob_away: float = 0.33
|
||||
implied_prob_over25: float = 0.50
|
||||
implied_prob_under25: float = 0.50
|
||||
implied_prob_btts_yes: float = 0.50
|
||||
implied_prob_btts_no: float = 0.50
|
||||
|
||||
# Raw decimal odds (for Edge/Kelly calculations downstream)
|
||||
odds_home: float = 2.50
|
||||
odds_draw: float = 3.20
|
||||
odds_away: float = 2.80
|
||||
odds_over25: float = 1.90
|
||||
odds_under25: float = 1.90
|
||||
odds_btts_yes: float = 1.85
|
||||
odds_btts_no: float = 1.95
|
||||
|
||||
# Data quality
|
||||
data_quality_score: float = 0.5
|
||||
data_quality_flags: list[str] = field(default_factory=list)
|
||||
|
||||
# Metadata
|
||||
match_name: str = ""
|
||||
home_team_name: str = ""
|
||||
away_team_name: str = ""
|
||||
league_id: str = ""
|
||||
league_name: str = ""
|
||||
referee_name: str = ""
|
||||
match_date_ms: int = 0
|
||||
league_avg_goals: float = 2.6
|
||||
referee_avg_goals: float = 2.6
|
||||
referee_home_bias: float = 0.0
|
||||
home_squad_strength: float = 0.5
|
||||
away_squad_strength: float = 0.5
|
||||
home_key_players: float = 0.0
|
||||
away_key_players: float = 0.0
|
||||
|
||||
def to_model_array(self) -> np.ndarray:
|
||||
"""Return the 24-feature vector the ensemble expects."""
|
||||
return np.array(
|
||||
[
|
||||
self.home_elo,
|
||||
self.away_elo,
|
||||
self.elo_diff,
|
||||
self.missing_players_impact,
|
||||
self.home_avg_possession,
|
||||
self.home_avg_shots_on_target,
|
||||
self.home_avg_total_shots,
|
||||
self.home_avg_goals_scored,
|
||||
self.home_avg_goals_conceded,
|
||||
self.away_avg_possession,
|
||||
self.away_avg_shots_on_target,
|
||||
self.away_avg_total_shots,
|
||||
self.away_avg_goals_scored,
|
||||
self.away_avg_goals_conceded,
|
||||
self.implied_prob_home,
|
||||
self.implied_prob_draw,
|
||||
self.implied_prob_away,
|
||||
self.implied_prob_over25,
|
||||
self.implied_prob_under25,
|
||||
self.implied_prob_btts_yes,
|
||||
self.implied_prob_btts_no,
|
||||
self.odds_home,
|
||||
self.odds_draw,
|
||||
self.odds_away,
|
||||
],
|
||||
dtype=np.float64,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def feature_names() -> list[str]:
|
||||
return [
|
||||
"home_elo", "away_elo", "elo_diff", "missing_players_impact",
|
||||
"home_avg_possession", "home_avg_shots_on_target",
|
||||
"home_avg_total_shots", "home_avg_goals_scored",
|
||||
"home_avg_goals_conceded",
|
||||
"away_avg_possession", "away_avg_shots_on_target",
|
||||
"away_avg_total_shots", "away_avg_goals_scored",
|
||||
"away_avg_goals_conceded",
|
||||
"implied_prob_home", "implied_prob_draw", "implied_prob_away",
|
||||
"implied_prob_over25", "implied_prob_under25",
|
||||
"implied_prob_btts_yes", "implied_prob_btts_no",
|
||||
"odds_home", "odds_draw", "odds_away",
|
||||
]
|
||||
|
||||
|
||||
async def extract_features(session: AsyncSession, match_id: str) -> MatchFeatures | None:
|
||||
"""Master extraction pipeline."""
|
||||
feats = MatchFeatures(match_id=match_id)
|
||||
flags: list[str] = []
|
||||
|
||||
match_row = await _load_match_header(session, match_id)
|
||||
if match_row is None:
|
||||
logger.warning("Match %s not found in live_matches or matches.", match_id)
|
||||
return None
|
||||
|
||||
feats.home_team_id = match_row["home_team_id"] or ""
|
||||
feats.away_team_id = match_row["away_team_id"] or ""
|
||||
feats.match_name = match_row.get("match_name", "") or ""
|
||||
feats.match_date_ms = int(match_row.get("mst_utc", 0) or 0)
|
||||
feats.home_team_name = match_row.get("home_name", "") or ""
|
||||
feats.away_team_name = match_row.get("away_name", "") or ""
|
||||
feats.league_id = match_row.get("league_id", "") or ""
|
||||
feats.league_name = match_row.get("league_name", "") or ""
|
||||
feats.referee_name = match_row.get("referee_name", "") or ""
|
||||
|
||||
if not feats.home_team_id or not feats.away_team_id:
|
||||
logger.warning("Match %s missing team IDs.", match_id)
|
||||
flags.append("missing_team_ids")
|
||||
feats.data_quality_flags = flags
|
||||
feats.data_quality_score = 0.1
|
||||
return feats
|
||||
|
||||
ai_row = await _load_ai_features(session, match_id)
|
||||
if ai_row:
|
||||
feats.home_elo = float(ai_row["home_elo"] or 1500.0)
|
||||
feats.away_elo = float(ai_row["away_elo"] or 1500.0)
|
||||
feats.missing_players_impact = float(ai_row["missing_players_impact"] or 0.0)
|
||||
feats.home_form_score = float(ai_row["home_form_score"] or 0.0)
|
||||
feats.away_form_score = float(ai_row["away_form_score"] or 0.0)
|
||||
if ai_row.get("h2h_home_win_rate") is not None:
|
||||
feats.h2h_home_win_rate = float(ai_row["h2h_home_win_rate"])
|
||||
feats.h2h_sample_size = int(ai_row.get("h2h_total") or 0)
|
||||
else:
|
||||
flags.append("missing_ai_features")
|
||||
|
||||
feats.elo_diff = feats.home_elo - feats.away_elo
|
||||
|
||||
home_rolling = await _rolling_team_stats(
|
||||
session, feats.home_team_id, feats.match_date_ms,
|
||||
)
|
||||
away_rolling = await _rolling_team_stats(
|
||||
session, feats.away_team_id, feats.match_date_ms,
|
||||
)
|
||||
|
||||
if home_rolling is not None:
|
||||
feats.home_avg_possession = home_rolling["avg_possession"]
|
||||
feats.home_avg_shots_on_target = home_rolling["avg_shots_on_target"]
|
||||
feats.home_avg_total_shots = home_rolling["avg_total_shots"]
|
||||
feats.home_avg_goals_scored = home_rolling["avg_goals_scored"]
|
||||
feats.home_avg_goals_conceded = home_rolling["avg_goals_conceded"]
|
||||
else:
|
||||
flags.append("missing_home_stats")
|
||||
|
||||
if away_rolling is not None:
|
||||
feats.away_avg_possession = away_rolling["avg_possession"]
|
||||
feats.away_avg_shots_on_target = away_rolling["avg_shots_on_target"]
|
||||
feats.away_avg_total_shots = away_rolling["avg_total_shots"]
|
||||
feats.away_avg_goals_scored = away_rolling["avg_goals_scored"]
|
||||
feats.away_avg_goals_conceded = away_rolling["avg_goals_conceded"]
|
||||
else:
|
||||
flags.append("missing_away_stats")
|
||||
|
||||
if abs(feats.home_form_score) < 1e-6:
|
||||
feats.home_form_score = round(
|
||||
feats.home_avg_goals_scored - feats.home_avg_goals_conceded,
|
||||
3,
|
||||
)
|
||||
if abs(feats.away_form_score) < 1e-6:
|
||||
feats.away_form_score = round(
|
||||
feats.away_avg_goals_scored - feats.away_avg_goals_conceded,
|
||||
3,
|
||||
)
|
||||
|
||||
home_rest_days = await _load_rest_days(
|
||||
session, feats.home_team_id, feats.match_date_ms,
|
||||
)
|
||||
away_rest_days = await _load_rest_days(
|
||||
session, feats.away_team_id, feats.match_date_ms,
|
||||
)
|
||||
if home_rest_days is not None:
|
||||
feats.home_rest_days = home_rest_days
|
||||
else:
|
||||
flags.append("missing_home_rest")
|
||||
if away_rest_days is not None:
|
||||
feats.away_rest_days = away_rest_days
|
||||
else:
|
||||
flags.append("missing_away_rest")
|
||||
feats.rest_diff = round(feats.home_rest_days - feats.away_rest_days, 3)
|
||||
|
||||
if feats.h2h_sample_size == 0:
|
||||
h2h = await _load_h2h_stats(
|
||||
session,
|
||||
feats.home_team_id,
|
||||
feats.away_team_id,
|
||||
feats.match_date_ms,
|
||||
)
|
||||
if h2h is not None:
|
||||
feats.h2h_home_win_rate = h2h["home_win_rate"]
|
||||
feats.h2h_sample_size = h2h["sample_size"]
|
||||
else:
|
||||
flags.append("missing_h2h")
|
||||
|
||||
league_profile = await _load_league_profile(
|
||||
session,
|
||||
feats.league_id,
|
||||
feats.match_date_ms,
|
||||
)
|
||||
if league_profile is not None:
|
||||
feats.league_avg_goals = league_profile["avg_goals"]
|
||||
else:
|
||||
flags.append("missing_league_profile")
|
||||
|
||||
referee_profile = await _load_referee_profile(
|
||||
session,
|
||||
feats.referee_name,
|
||||
feats.match_date_ms,
|
||||
)
|
||||
if referee_profile is not None:
|
||||
feats.referee_avg_goals = referee_profile["avg_goals"]
|
||||
feats.referee_home_bias = referee_profile["home_bias"]
|
||||
else:
|
||||
flags.append("missing_referee_profile")
|
||||
|
||||
home_squad = await _load_team_squad_profile(
|
||||
session,
|
||||
feats.home_team_id,
|
||||
feats.match_date_ms,
|
||||
)
|
||||
away_squad = await _load_team_squad_profile(
|
||||
session,
|
||||
feats.away_team_id,
|
||||
feats.match_date_ms,
|
||||
)
|
||||
if home_squad is not None:
|
||||
feats.home_squad_strength = home_squad["squad_strength"]
|
||||
feats.home_key_players = home_squad["key_players"]
|
||||
else:
|
||||
flags.append("missing_home_squad_profile")
|
||||
if away_squad is not None:
|
||||
feats.away_squad_strength = away_squad["squad_strength"]
|
||||
feats.away_key_players = away_squad["key_players"]
|
||||
else:
|
||||
flags.append("missing_away_squad_profile")
|
||||
|
||||
lineup_info = _extract_lineup_context(match_row)
|
||||
feats.home_lineup_availability = lineup_info["home_availability"]
|
||||
feats.away_lineup_availability = lineup_info["away_availability"]
|
||||
if lineup_info["has_real_lineup_data"]:
|
||||
feats.missing_players_impact = max(
|
||||
feats.missing_players_impact,
|
||||
round(
|
||||
(
|
||||
(1.0 - feats.home_lineup_availability)
|
||||
+ (1.0 - feats.away_lineup_availability)
|
||||
) / 2.0,
|
||||
4,
|
||||
),
|
||||
)
|
||||
else:
|
||||
flags.append("missing_lineup_context")
|
||||
|
||||
odds_ok = await _extract_odds(session, match_id, feats)
|
||||
if not odds_ok:
|
||||
flags.append("missing_odds")
|
||||
|
||||
quality = 1.0
|
||||
penalty_map = {
|
||||
"missing_team_ids": 0.5,
|
||||
"missing_ai_features": 0.05,
|
||||
"missing_home_stats": 0.15,
|
||||
"missing_away_stats": 0.15,
|
||||
"missing_home_rest": 0.05,
|
||||
"missing_away_rest": 0.05,
|
||||
"missing_h2h": 0.05,
|
||||
"missing_league_profile": 0.04,
|
||||
"missing_referee_profile": 0.04,
|
||||
"missing_home_squad_profile": 0.06,
|
||||
"missing_away_squad_profile": 0.06,
|
||||
"missing_lineup_context": 0.05,
|
||||
"missing_odds": 0.2,
|
||||
}
|
||||
for flag in flags:
|
||||
quality -= penalty_map.get(flag, 0.05)
|
||||
feats.data_quality_score = max(0.0, round(quality, 2))
|
||||
feats.data_quality_flags = flags
|
||||
|
||||
return feats
|
||||
|
||||
|
||||
async def _load_match_header(
|
||||
session: AsyncSession, match_id: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Try live_matches first, then matches table."""
|
||||
table_queries = {
|
||||
"live_matches": """
|
||||
SELECT
|
||||
m.id,
|
||||
m.home_team_id,
|
||||
m.away_team_id,
|
||||
m.match_name,
|
||||
m.mst_utc,
|
||||
m.sport,
|
||||
m.league_id,
|
||||
m.referee_name,
|
||||
m.lineups,
|
||||
m.sidelined,
|
||||
ht.name AS home_name,
|
||||
at.name AS away_name,
|
||||
l.name AS league_name
|
||||
FROM live_matches m
|
||||
LEFT JOIN teams ht ON ht.id = m.home_team_id
|
||||
LEFT JOIN teams at ON at.id = m.away_team_id
|
||||
LEFT JOIN leagues l ON l.id = m.league_id
|
||||
WHERE m.id = :match_id
|
||||
LIMIT 1
|
||||
""",
|
||||
"matches": """
|
||||
SELECT
|
||||
m.id,
|
||||
m.home_team_id,
|
||||
m.away_team_id,
|
||||
m.match_name,
|
||||
m.mst_utc,
|
||||
m.sport,
|
||||
m.league_id,
|
||||
ref.name AS referee_name,
|
||||
NULL AS lineups,
|
||||
NULL AS sidelined,
|
||||
ht.name AS home_name,
|
||||
at.name AS away_name,
|
||||
l.name AS league_name
|
||||
FROM matches m
|
||||
LEFT JOIN teams ht ON ht.id = m.home_team_id
|
||||
LEFT JOIN teams at ON at.id = m.away_team_id
|
||||
LEFT JOIN leagues l ON l.id = m.league_id
|
||||
LEFT JOIN match_officials ref ON ref.match_id = m.id AND ref.role_id = 1
|
||||
WHERE m.id = :match_id
|
||||
LIMIT 1
|
||||
""",
|
||||
}
|
||||
for table in ("live_matches", "matches"):
|
||||
query = text(table_queries[table])
|
||||
result = await session.execute(query, {"match_id": match_id})
|
||||
row = result.mappings().first()
|
||||
if row:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
|
||||
async def _load_ai_features(
|
||||
session: AsyncSession, match_id: str,
|
||||
) -> dict[str, Any] | None:
|
||||
query = text("""
|
||||
SELECT
|
||||
home_elo,
|
||||
away_elo,
|
||||
missing_players_impact,
|
||||
home_form_score,
|
||||
away_form_score,
|
||||
h2h_home_win_rate,
|
||||
h2h_total
|
||||
FROM football_ai_features
|
||||
WHERE match_id = :match_id
|
||||
LIMIT 1
|
||||
""")
|
||||
result = await session.execute(query, {"match_id": match_id})
|
||||
row = result.mappings().first()
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
async def _rolling_team_stats(
|
||||
session: AsyncSession,
|
||||
team_id: str,
|
||||
before_mst_utc: int,
|
||||
) -> dict[str, float] | None:
|
||||
"""Calculate rolling averages from the team's last N finished matches."""
|
||||
query = text("""
|
||||
WITH recent AS (
|
||||
SELECT
|
||||
m.id AS match_id,
|
||||
m.home_team_id,
|
||||
m.away_team_id,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
ts.possession_percentage,
|
||||
ts.shots_on_target,
|
||||
ts.total_shots
|
||||
FROM matches m
|
||||
JOIN football_team_stats ts ON ts.match_id = m.id AND ts.team_id = :team_id
|
||||
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
|
||||
AND m.mst_utc < :before_ts
|
||||
AND m.sport = 'football'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT :window
|
||||
)
|
||||
SELECT
|
||||
COALESCE(AVG(possession_percentage), 50.0) AS avg_possession,
|
||||
COALESCE(AVG(shots_on_target), 4.0) AS avg_shots_on_target,
|
||||
COALESCE(AVG(total_shots), 10.0) AS avg_total_shots,
|
||||
COALESCE(AVG(
|
||||
CASE
|
||||
WHEN home_team_id = :team_id THEN score_home
|
||||
ELSE score_away
|
||||
END
|
||||
), 1.3) AS avg_goals_scored,
|
||||
COALESCE(AVG(
|
||||
CASE
|
||||
WHEN home_team_id = :team_id THEN score_away
|
||||
ELSE score_home
|
||||
END
|
||||
), 1.1) AS avg_goals_conceded,
|
||||
COUNT(*) AS match_count
|
||||
FROM recent
|
||||
""")
|
||||
result = await session.execute(
|
||||
query,
|
||||
{"team_id": team_id, "before_ts": before_mst_utc, "window": ROLLING_WINDOW},
|
||||
)
|
||||
row = result.mappings().first()
|
||||
if row is None or int(row["match_count"]) == 0:
|
||||
return None
|
||||
return {
|
||||
"avg_possession": round(float(row["avg_possession"]), 2),
|
||||
"avg_shots_on_target": round(float(row["avg_shots_on_target"]), 2),
|
||||
"avg_total_shots": round(float(row["avg_total_shots"]), 2),
|
||||
"avg_goals_scored": round(float(row["avg_goals_scored"]), 2),
|
||||
"avg_goals_conceded": round(float(row["avg_goals_conceded"]), 2),
|
||||
}
|
||||
|
||||
|
||||
async def _load_rest_days(
|
||||
session: AsyncSession,
|
||||
team_id: str,
|
||||
before_mst_utc: int,
|
||||
) -> float | None:
|
||||
query = text("""
|
||||
SELECT m.mst_utc
|
||||
FROM matches m
|
||||
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
|
||||
AND m.mst_utc < :before_ts
|
||||
AND m.sport = 'football'
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 1
|
||||
""")
|
||||
result = await session.execute(
|
||||
query,
|
||||
{"team_id": team_id, "before_ts": before_mst_utc},
|
||||
)
|
||||
last_match_ts = result.scalar_one_or_none()
|
||||
if last_match_ts is None:
|
||||
return None
|
||||
|
||||
rest_days = max(0.0, (float(before_mst_utc) - float(last_match_ts)) / 86400000.0)
|
||||
return round(min(rest_days, MAX_REST_DAYS), 3)
|
||||
|
||||
|
||||
async def _load_h2h_stats(
|
||||
session: AsyncSession,
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
before_mst_utc: int,
|
||||
) -> dict[str, float | int] | None:
|
||||
query = text("""
|
||||
SELECT
|
||||
m.home_team_id,
|
||||
m.away_team_id,
|
||||
m.score_home,
|
||||
m.score_away
|
||||
FROM matches m
|
||||
WHERE m.sport = 'football'
|
||||
AND m.mst_utc < :before_ts
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND (
|
||||
(m.home_team_id = :home_team_id AND m.away_team_id = :away_team_id)
|
||||
OR
|
||||
(m.home_team_id = :away_team_id AND m.away_team_id = :home_team_id)
|
||||
)
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT :window
|
||||
""")
|
||||
result = await session.execute(
|
||||
query,
|
||||
{
|
||||
"home_team_id": home_team_id,
|
||||
"away_team_id": away_team_id,
|
||||
"before_ts": before_mst_utc,
|
||||
"window": H2H_WINDOW,
|
||||
},
|
||||
)
|
||||
rows = result.mappings().all()
|
||||
if not rows:
|
||||
return None
|
||||
|
||||
home_wins = 0.0
|
||||
draws = 0.0
|
||||
sample_size = 0
|
||||
for row in rows:
|
||||
score_home = row["score_home"]
|
||||
score_away = row["score_away"]
|
||||
if score_home is None or score_away is None:
|
||||
continue
|
||||
sample_size += 1
|
||||
row_home_team_id = row["home_team_id"]
|
||||
row_away_team_id = row["away_team_id"]
|
||||
|
||||
current_home_score = float(score_home) if row_home_team_id == home_team_id else float(score_away)
|
||||
current_away_score = float(score_away) if row_home_team_id == home_team_id else float(score_home)
|
||||
|
||||
if current_home_score > current_away_score:
|
||||
home_wins += 1.0
|
||||
elif current_home_score == current_away_score:
|
||||
draws += 1.0
|
||||
|
||||
if sample_size == 0:
|
||||
return None
|
||||
|
||||
# Count draws as a half-win signal instead of throwing them away.
|
||||
home_win_rate = round((home_wins + draws * 0.5) / sample_size, 4)
|
||||
return {
|
||||
"home_win_rate": home_win_rate,
|
||||
"sample_size": sample_size,
|
||||
}
|
||||
|
||||
|
||||
async def _load_league_profile(
|
||||
session: AsyncSession,
|
||||
league_id: str,
|
||||
before_mst_utc: int,
|
||||
) -> dict[str, float] | None:
|
||||
if not league_id:
|
||||
return None
|
||||
|
||||
query = text("""
|
||||
SELECT
|
||||
COALESCE(AVG(m.score_home + m.score_away), 2.6) AS avg_goals,
|
||||
COUNT(*) AS match_count
|
||||
FROM (
|
||||
SELECT score_home, score_away
|
||||
FROM matches
|
||||
WHERE league_id = :league_id
|
||||
AND sport = 'football'
|
||||
AND status = 'FT'
|
||||
AND score_home IS NOT NULL
|
||||
AND score_away IS NOT NULL
|
||||
AND mst_utc < :before_ts
|
||||
ORDER BY mst_utc DESC
|
||||
LIMIT 100
|
||||
) m
|
||||
""")
|
||||
result = await session.execute(
|
||||
query,
|
||||
{"league_id": league_id, "before_ts": before_mst_utc},
|
||||
)
|
||||
row = result.mappings().first()
|
||||
if row is None or int(row["match_count"] or 0) == 0:
|
||||
return None
|
||||
return {"avg_goals": round(float(row["avg_goals"]), 3)}
|
||||
|
||||
|
||||
async def _load_referee_profile(
|
||||
session: AsyncSession,
|
||||
referee_name: str,
|
||||
before_mst_utc: int,
|
||||
) -> dict[str, float] | None:
|
||||
if not referee_name:
|
||||
return None
|
||||
|
||||
query = text("""
|
||||
SELECT
|
||||
COALESCE(AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END), 0.46) - 0.46 AS home_bias,
|
||||
COALESCE(AVG(score_home + score_away), 2.6) AS avg_goals,
|
||||
COUNT(*) AS match_count
|
||||
FROM (
|
||||
SELECT m.score_home, m.score_away
|
||||
FROM match_officials mo
|
||||
JOIN matches m ON m.id = mo.match_id
|
||||
WHERE mo.name = :referee_name
|
||||
AND mo.role_id = 1
|
||||
AND m.sport = 'football'
|
||||
AND m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.mst_utc < :before_ts
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 30
|
||||
) ref_matches
|
||||
""")
|
||||
result = await session.execute(
|
||||
query,
|
||||
{"referee_name": referee_name, "before_ts": before_mst_utc},
|
||||
)
|
||||
row = result.mappings().first()
|
||||
if row is None or int(row["match_count"] or 0) == 0:
|
||||
return None
|
||||
return {
|
||||
"home_bias": round(float(row["home_bias"]), 4),
|
||||
"avg_goals": round(float(row["avg_goals"]), 3),
|
||||
}
|
||||
|
||||
|
||||
async def _load_team_squad_profile(
|
||||
session: AsyncSession,
|
||||
team_id: str,
|
||||
before_mst_utc: int,
|
||||
) -> dict[str, float] | None:
|
||||
if not team_id:
|
||||
return None
|
||||
|
||||
query = text("""
|
||||
WITH recent_matches AS (
|
||||
SELECT m.id, m.mst_utc
|
||||
FROM matches m
|
||||
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
|
||||
AND m.sport = 'football'
|
||||
AND m.status = 'FT'
|
||||
AND m.mst_utc < :before_ts
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 8
|
||||
),
|
||||
player_base AS (
|
||||
SELECT
|
||||
mpp.player_id,
|
||||
COUNT(*)::float AS appearances,
|
||||
COUNT(*) FILTER (WHERE mpp.is_starting = true)::float AS starts
|
||||
FROM match_player_participation mpp
|
||||
JOIN recent_matches rm ON rm.id = mpp.match_id
|
||||
WHERE mpp.team_id = :team_id
|
||||
GROUP BY mpp.player_id
|
||||
),
|
||||
player_goals AS (
|
||||
SELECT
|
||||
mpe.player_id,
|
||||
COUNT(*) FILTER (
|
||||
WHERE mpe.event_type = 'goal'
|
||||
AND COALESCE(mpe.event_subtype, '') NOT ILIKE '%penaltı kaçırma%'
|
||||
)::float AS goals,
|
||||
0.0::float AS assists
|
||||
FROM match_player_events mpe
|
||||
JOIN recent_matches rm ON rm.id = mpe.match_id
|
||||
WHERE mpe.team_id = :team_id
|
||||
GROUP BY mpe.player_id
|
||||
UNION ALL
|
||||
SELECT
|
||||
mpe.assist_player_id AS player_id,
|
||||
0.0::float AS goals,
|
||||
COUNT(*) FILTER (
|
||||
WHERE mpe.event_type = 'goal'
|
||||
AND mpe.assist_player_id IS NOT NULL
|
||||
)::float AS assists
|
||||
FROM match_player_events mpe
|
||||
JOIN recent_matches rm ON rm.id = mpe.match_id
|
||||
WHERE mpe.team_id = :team_id
|
||||
AND mpe.assist_player_id IS NOT NULL
|
||||
GROUP BY mpe.assist_player_id
|
||||
),
|
||||
player_events AS (
|
||||
SELECT
|
||||
player_id,
|
||||
SUM(goals) AS goals,
|
||||
SUM(assists) AS assists
|
||||
FROM player_goals
|
||||
GROUP BY player_id
|
||||
),
|
||||
player_scores AS (
|
||||
SELECT
|
||||
pb.player_id,
|
||||
(pb.starts * 1.5)
|
||||
+ ((pb.appearances - pb.starts) * 0.5)
|
||||
+ (COALESCE(pe.goals, 0.0) * 2.5)
|
||||
+ (COALESCE(pe.assists, 0.0) * 1.5) AS score
|
||||
FROM player_base pb
|
||||
LEFT JOIN player_events pe ON pe.player_id = pb.player_id
|
||||
)
|
||||
SELECT
|
||||
COALESCE(AVG(top_players.score), 0.0) AS avg_top_score,
|
||||
COALESCE(COUNT(*) FILTER (WHERE top_players.score >= 6.0), 0) AS key_players,
|
||||
COALESCE((SELECT COUNT(*) FROM recent_matches), 0) AS match_count
|
||||
FROM (
|
||||
SELECT score
|
||||
FROM player_scores
|
||||
ORDER BY score DESC
|
||||
LIMIT 11
|
||||
) top_players
|
||||
""")
|
||||
result = await session.execute(
|
||||
query,
|
||||
{"team_id": team_id, "before_ts": before_mst_utc},
|
||||
)
|
||||
row = result.mappings().first()
|
||||
if row is None or int(row["match_count"] or 0) == 0:
|
||||
return None
|
||||
|
||||
avg_top_score = float(row["avg_top_score"] or 0.0)
|
||||
return {
|
||||
"squad_strength": round(min(max(avg_top_score / 10.0, 0.0), 1.0), 4),
|
||||
"key_players": float(row["key_players"] or 0),
|
||||
}
|
||||
|
||||
|
||||
def _safe_json(value: Any) -> dict[str, Any] | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, dict):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
return None
|
||||
return parsed if isinstance(parsed, dict) else None
|
||||
return None
|
||||
|
||||
|
||||
def _safe_list(value: Any) -> list[Any]:
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
return []
|
||||
|
||||
|
||||
def _extract_lineup_context(match_row: dict[str, Any]) -> dict[str, float | bool]:
|
||||
lineups = _safe_json(match_row.get("lineups"))
|
||||
sidelined = _safe_json(match_row.get("sidelined"))
|
||||
|
||||
home_xi_count = 0
|
||||
away_xi_count = 0
|
||||
home_sidelined_count = 0
|
||||
away_sidelined_count = 0
|
||||
|
||||
if lineups:
|
||||
home_xi_count = len(_safe_list(lineups.get("home", {}).get("xi")))
|
||||
away_xi_count = len(_safe_list(lineups.get("away", {}).get("xi")))
|
||||
|
||||
if sidelined:
|
||||
home_team = sidelined.get("homeTeam", {})
|
||||
away_team = sidelined.get("awayTeam", {})
|
||||
home_sidelined_count = max(
|
||||
int(home_team.get("totalSidelined") or 0),
|
||||
len(_safe_list(home_team.get("players"))),
|
||||
)
|
||||
away_sidelined_count = max(
|
||||
int(away_team.get("totalSidelined") or 0),
|
||||
len(_safe_list(away_team.get("players"))),
|
||||
)
|
||||
|
||||
has_real_lineup_data = any(
|
||||
value > 0
|
||||
for value in (
|
||||
home_xi_count,
|
||||
away_xi_count,
|
||||
home_sidelined_count,
|
||||
away_sidelined_count,
|
||||
)
|
||||
)
|
||||
|
||||
home_availability = _compute_availability(home_xi_count, home_sidelined_count)
|
||||
away_availability = _compute_availability(away_xi_count, away_sidelined_count)
|
||||
|
||||
return {
|
||||
"home_availability": home_availability,
|
||||
"away_availability": away_availability,
|
||||
"has_real_lineup_data": has_real_lineup_data,
|
||||
}
|
||||
|
||||
|
||||
def _compute_availability(xi_count: int, sidelined_count: int) -> float:
|
||||
xi_ratio = min(max(xi_count / 11.0, 0.0), 1.0) if xi_count > 0 else 1.0
|
||||
sidelined_penalty = min(max(sidelined_count / 11.0, 0.0), 1.0) * 0.35
|
||||
return round(min(max(xi_ratio - sidelined_penalty, 0.0), 1.0), 4)
|
||||
|
||||
|
||||
def _safe_odd(val: Any) -> float:
|
||||
"""Parse an odds value that might be str, float, int, or None."""
|
||||
if val is None:
|
||||
return 0.0
|
||||
try:
|
||||
parsed = float(val)
|
||||
return parsed if parsed > 1.0 else 0.0
|
||||
except (ValueError, TypeError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _implied_prob(decimal_odd: float) -> float:
|
||||
"""Convert decimal odds to implied probability, clamped [0, 1]."""
|
||||
if decimal_odd <= 1.0:
|
||||
return 0.0
|
||||
return min(1.0, 1.0 / decimal_odd)
|
||||
|
||||
|
||||
async def _extract_odds(
|
||||
session: AsyncSession,
|
||||
match_id: str,
|
||||
feats: MatchFeatures,
|
||||
) -> bool:
|
||||
"""Extract odds from live JSON first, then relational tables."""
|
||||
found = False
|
||||
|
||||
odds_json = await _load_live_odds_json(session, match_id)
|
||||
if odds_json:
|
||||
found = _parse_odds_json(odds_json, feats)
|
||||
|
||||
if not found:
|
||||
found = await _load_relational_odds(session, match_id, feats)
|
||||
|
||||
if found:
|
||||
feats.implied_prob_home = round(_implied_prob(feats.odds_home), 4)
|
||||
feats.implied_prob_draw = round(_implied_prob(feats.odds_draw), 4)
|
||||
feats.implied_prob_away = round(_implied_prob(feats.odds_away), 4)
|
||||
feats.implied_prob_over25 = round(_implied_prob(feats.odds_over25), 4)
|
||||
feats.implied_prob_under25 = round(_implied_prob(feats.odds_under25), 4)
|
||||
feats.implied_prob_btts_yes = round(_implied_prob(feats.odds_btts_yes), 4)
|
||||
feats.implied_prob_btts_no = round(_implied_prob(feats.odds_btts_no), 4)
|
||||
|
||||
return found
|
||||
|
||||
|
||||
async def _load_live_odds_json(
|
||||
session: AsyncSession, match_id: str,
|
||||
) -> dict[str, Any] | None:
|
||||
query = text("SELECT odds FROM live_matches WHERE id = :mid AND odds IS NOT NULL")
|
||||
result = await session.execute(query, {"mid": match_id})
|
||||
row = result.scalar_one_or_none()
|
||||
if row is None:
|
||||
return None
|
||||
if isinstance(row, str):
|
||||
try:
|
||||
parsed = json.loads(row)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return None
|
||||
return parsed if isinstance(parsed, (dict, list)) else None
|
||||
if isinstance(row, (dict, list)):
|
||||
return row
|
||||
return None
|
||||
|
||||
|
||||
def _parse_odds_json(odds_blob: dict[str, Any] | list[Any], feats: MatchFeatures) -> bool:
|
||||
"""Parse the Mackolik-style odds JSON structure."""
|
||||
found_any = False
|
||||
categories: list[dict[str, Any]] = []
|
||||
if isinstance(odds_blob, list):
|
||||
categories = [item for item in odds_blob if isinstance(item, dict)]
|
||||
elif isinstance(odds_blob, dict):
|
||||
raw_categories = odds_blob.get("categories", odds_blob.get("odds", []))
|
||||
if isinstance(raw_categories, dict):
|
||||
categories = [item for item in raw_categories.values() if isinstance(item, dict)]
|
||||
elif isinstance(raw_categories, list):
|
||||
categories = [item for item in raw_categories if isinstance(item, dict)]
|
||||
|
||||
for cat in categories:
|
||||
cat_name = (cat.get("name") or cat.get("cn") or "").strip().lower()
|
||||
selections = cat.get("selections") or cat.get("s") or []
|
||||
|
||||
if cat_name in ("mac sonucu", "match result", "1x2", "maç sonucu"):
|
||||
sels = _selections_to_map(selections)
|
||||
feats.odds_home = _safe_odd(sels.get("1")) or feats.odds_home
|
||||
feats.odds_draw = _safe_odd(sels.get("x")) or feats.odds_draw
|
||||
feats.odds_away = _safe_odd(sels.get("2")) or feats.odds_away
|
||||
found_any = True
|
||||
|
||||
elif cat_name in ("2,5 alt/ust", "over/under 2.5", "2.5 alt/ust", "2,5 alt/üst", "2.5 alt/üst"):
|
||||
sels = _selections_to_map(selections)
|
||||
feats.odds_over25 = _safe_odd(sels.get("ust") or sels.get("over") or sels.get("üst")) or feats.odds_over25
|
||||
feats.odds_under25 = _safe_odd(sels.get("alt") or sels.get("under")) or feats.odds_under25
|
||||
found_any = True
|
||||
|
||||
elif cat_name in ("karsilikli gol", "both teams to score", "btts", "karşılıklı gol"):
|
||||
sels = _selections_to_map(selections)
|
||||
feats.odds_btts_yes = _safe_odd(sels.get("var") or sels.get("yes")) or feats.odds_btts_yes
|
||||
feats.odds_btts_no = _safe_odd(sels.get("yok") or sels.get("no")) or feats.odds_btts_no
|
||||
found_any = True
|
||||
|
||||
return found_any
|
||||
|
||||
|
||||
def _selections_to_map(selections: list[Any] | dict[str, Any]) -> dict[str, Any]:
|
||||
"""Normalize varied selection structures into {name_lower: odd_value}."""
|
||||
result: dict[str, Any] = {}
|
||||
if isinstance(selections, dict):
|
||||
for key, value in selections.items():
|
||||
result[str(key).strip().lower()] = value
|
||||
elif isinstance(selections, list):
|
||||
for sel in selections:
|
||||
if isinstance(sel, dict):
|
||||
name = (sel.get("name") or sel.get("n") or "").strip().lower()
|
||||
value = sel.get("odd_value") or sel.get("ov") or sel.get("v")
|
||||
if name:
|
||||
result[name] = value
|
||||
return result
|
||||
|
||||
|
||||
async def _load_relational_odds(
|
||||
session: AsyncSession, match_id: str, feats: MatchFeatures,
|
||||
) -> bool:
|
||||
"""Fallback: load odds from odd_categories + odd_selections."""
|
||||
query = text("""
|
||||
SELECT oc.name AS cat_name, os.name AS sel_name, os.odd_value
|
||||
FROM odd_categories oc
|
||||
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||
WHERE oc.match_id = :match_id
|
||||
AND oc.name IN ('Maç Sonucu', '2,5 Alt/Üst', 'Karşılıklı Gol')
|
||||
""")
|
||||
result = await session.execute(query, {"match_id": match_id})
|
||||
rows = result.mappings().all()
|
||||
if not rows:
|
||||
return False
|
||||
|
||||
for row in rows:
|
||||
cat = (row["cat_name"] or "").strip()
|
||||
sel = (row["sel_name"] or "").strip().lower()
|
||||
value = _safe_odd(row["odd_value"])
|
||||
if value <= 1.0:
|
||||
continue
|
||||
|
||||
if cat == "Maç Sonucu":
|
||||
if sel == "1":
|
||||
feats.odds_home = value
|
||||
elif sel == "x":
|
||||
feats.odds_draw = value
|
||||
elif sel == "2":
|
||||
feats.odds_away = value
|
||||
elif cat == "2,5 Alt/Üst":
|
||||
if sel in ("üst", "ust", "over"):
|
||||
feats.odds_over25 = value
|
||||
elif sel in ("alt", "under"):
|
||||
feats.odds_under25 = value
|
||||
elif cat == "Karşılıklı Gol":
|
||||
if sel in ("var", "yes"):
|
||||
feats.odds_btts_yes = value
|
||||
elif sel in ("yok", "no"):
|
||||
feats.odds_btts_no = value
|
||||
|
||||
return True
|
||||
Executable
+256
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
Feature Adapter for XGBoost Inference
|
||||
=====================================
|
||||
Bridges the gap between V20 Engine outputs (CalculationContext) and XGBoost Models.
|
||||
Constructs the exact 44-feature vector used in training.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extensions import connection as PgConnection
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
|
||||
# Feature definitions (Must match train_xgboost_markets.py)
|
||||
# NOTE: 68 features - matching the trained XGBoost models
|
||||
FEATURES = [
|
||||
# ELO
|
||||
"home_overall_elo", "away_overall_elo", "elo_diff",
|
||||
"home_home_elo", "away_away_elo", "form_elo_diff",
|
||||
|
||||
# Form
|
||||
"home_goals_avg", "home_conceded_avg",
|
||||
"away_goals_avg", "away_conceded_avg",
|
||||
"home_clean_sheet_rate", "away_clean_sheet_rate",
|
||||
"home_scoring_rate", "away_scoring_rate",
|
||||
"home_winning_streak", "away_winning_streak",
|
||||
|
||||
# H2H
|
||||
"h2h_home_win_rate", "h2h_draw_rate",
|
||||
"h2h_avg_goals", "h2h_btts_rate", "h2h_over25_rate",
|
||||
|
||||
# Stats
|
||||
"home_avg_possession", "away_avg_possession",
|
||||
"home_avg_shots_on_target", "away_avg_shots_on_target",
|
||||
"home_shot_conversion", "away_shot_conversion",
|
||||
|
||||
# Odds (Implicit market wisdom)
|
||||
"odds_ms_h", "odds_ms_d", "odds_ms_a",
|
||||
"implied_home", "implied_draw", "implied_away",
|
||||
|
||||
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
|
||||
|
||||
"odds_ou05_o", "odds_ou05_u",
|
||||
"odds_ou15_o", "odds_ou15_u",
|
||||
"odds_ou25_o", "odds_ou25_u",
|
||||
"odds_ou35_o", "odds_ou35_u",
|
||||
|
||||
"odds_ht_ou05_o", "odds_ht_ou05_u",
|
||||
"odds_ht_ou15_o", "odds_ht_ou15_u",
|
||||
|
||||
"odds_btts_y", "odds_btts_n",
|
||||
|
||||
# League/Context
|
||||
"league_avg_goals", "league_zero_goal_rate",
|
||||
"home_xga", "away_xga",
|
||||
|
||||
# Upset features
|
||||
"upset_atmosphere", "upset_motivation", "upset_fatigue", "upset_potential",
|
||||
|
||||
# Referee features
|
||||
"referee_home_bias", "referee_avg_goals", "referee_cards_total",
|
||||
"referee_avg_yellow", "referee_experience",
|
||||
|
||||
# Momentum features
|
||||
"home_momentum_score", "away_momentum_score", "momentum_diff",
|
||||
]
|
||||
|
||||
class FeatureAdapter:
|
||||
"""
|
||||
Adapter to convert V20 context into XGBoost-compatible features.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.conn: PgConnection | None = None
|
||||
self._connect_db()
|
||||
self.league_stats_cache: dict[str, dict[str, float]] = {}
|
||||
|
||||
def _connect_db(self) -> None:
|
||||
try:
|
||||
# FeatureAdapter uses DB only for optional league stats enrichment.
|
||||
# Keep startup non-blocking when DB/tunnel is unavailable.
|
||||
if not os.getenv("DATABASE_URL", "").strip():
|
||||
return
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
except Exception as e:
|
||||
print(f"⚠️ FeatureAdapter DB connection failed: {e}")
|
||||
|
||||
def get_features(self, ctx: Any) -> pd.DataFrame:
|
||||
"""
|
||||
Construct feature vector from CalculationContext.
|
||||
Returns a DataFrame with 1 row and correct columns.
|
||||
"""
|
||||
raw = ctx.team_pred.raw_features
|
||||
odds = ctx.odds_data or {}
|
||||
upset_features = getattr(ctx, "upset_features", {}) or {}
|
||||
momentum_features = getattr(ctx, "momentum_features", {}) or {}
|
||||
referee_features = getattr(ctx, "referee_features", {}) or {}
|
||||
|
||||
# 1. Odds Features
|
||||
ms_h = float(odds.get("ms_h") or 0)
|
||||
ms_d = float(odds.get("ms_d") or 0)
|
||||
ms_a = float(odds.get("ms_a") or 0)
|
||||
|
||||
implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33
|
||||
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||||
raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
|
||||
implied_home = (1/ms_h) / raw_sum
|
||||
implied_draw = (1/ms_d) / raw_sum
|
||||
implied_away = (1/ms_a) / raw_sum
|
||||
|
||||
# 2. League Features
|
||||
# Using ctx.league_id if available, or just defaults
|
||||
league_stats = self._get_league_stats(ctx.league_id)
|
||||
|
||||
# 3. Assemble Dictionary
|
||||
row = {
|
||||
# ELO (Explicit float casting)
|
||||
"home_overall_elo": float(raw.get("home_overall_elo") or 1500),
|
||||
"away_overall_elo": float(raw.get("away_overall_elo") or 1500),
|
||||
"elo_diff": float(raw.get("elo_diff") or 0),
|
||||
"home_home_elo": float(raw.get("home_home_elo") or 1500),
|
||||
"away_away_elo": float(raw.get("away_away_elo") or 1500),
|
||||
"form_elo_diff": float(raw.get("form_elo_diff") or 0),
|
||||
|
||||
# Form (Explicit float casting)
|
||||
"home_goals_avg": float(raw.get("home_goals_avg") or 1.3),
|
||||
"home_conceded_avg": float(raw.get("home_conceded_avg") or 1.2),
|
||||
"away_goals_avg": float(raw.get("away_goals_avg") or 1.2),
|
||||
"away_conceded_avg": float(raw.get("away_conceded_avg") or 1.4),
|
||||
"home_clean_sheet_rate": float(raw.get("home_clean_sheet_rate") or 0.2),
|
||||
"away_clean_sheet_rate": float(raw.get("away_clean_sheet_rate") or 0.2),
|
||||
"home_scoring_rate": float(raw.get("home_scoring_rate") or 0.8),
|
||||
"away_scoring_rate": float(raw.get("away_scoring_rate") or 0.8),
|
||||
"home_winning_streak": float(raw.get("home_winning_streak") or 0),
|
||||
"away_winning_streak": float(raw.get("away_winning_streak") or 0),
|
||||
|
||||
# H2H (Explicit float casting)
|
||||
"h2h_home_win_rate": float(raw.get("h2h_home_win_rate") or 0.33),
|
||||
"h2h_draw_rate": float(raw.get("h2h_draw_rate") or 0.33),
|
||||
"h2h_avg_goals": float(raw.get("h2h_avg_goals") or 2.5),
|
||||
"h2h_btts_rate": float(raw.get("h2h_btts_rate") or 0.5),
|
||||
"h2h_over25_rate": float(raw.get("h2h_over25_rate") or 0.5),
|
||||
|
||||
# Stats (Explicit float casting to avoid XGBoost 'object' error)
|
||||
"home_avg_possession": float(raw.get("home_avg_possession") or 0.5),
|
||||
"away_avg_possession": float(raw.get("away_avg_possession") or 0.5),
|
||||
"home_avg_shots_on_target": float(raw.get("home_avg_shots_on_target") or 4.0),
|
||||
"away_avg_shots_on_target": float(raw.get("away_avg_shots_on_target") or 3.5),
|
||||
"home_shot_conversion": float(raw.get("home_shot_conversion") or 0.1),
|
||||
"away_shot_conversion": float(raw.get("away_shot_conversion") or 0.1),
|
||||
|
||||
# Odds
|
||||
"odds_ms_h": ms_h,
|
||||
"odds_ms_d": ms_d,
|
||||
"odds_ms_a": ms_a,
|
||||
"implied_home": implied_home,
|
||||
"implied_draw": implied_draw,
|
||||
"implied_away": implied_away,
|
||||
|
||||
"odds_ht_ms_h": float(odds.get("ht_ms_h") or 0.0),
|
||||
"odds_ht_ms_d": float(odds.get("ht_ms_d") or 0.0),
|
||||
"odds_ht_ms_a": float(odds.get("ht_ms_a") or 0.0),
|
||||
|
||||
"odds_ou05_o": float(odds.get("ou05_o") or 0.0),
|
||||
"odds_ou05_u": float(odds.get("ou05_u") or 0.0),
|
||||
"odds_ou15_o": float(odds.get("ou15_o") or 0.0),
|
||||
"odds_ou15_u": float(odds.get("ou15_u") or 0.0),
|
||||
"odds_ou25_o": float(odds.get("ou25_o") or 0.0),
|
||||
"odds_ou25_u": float(odds.get("ou25_u") or 0.0),
|
||||
"odds_ou35_o": float(odds.get("ou35_o") or 0.0),
|
||||
"odds_ou35_u": float(odds.get("ou35_u") or 0.0),
|
||||
|
||||
"odds_ht_ou05_o": float(odds.get("ht_ou05_o") or 0.0),
|
||||
"odds_ht_ou05_u": float(odds.get("ht_ou05_u") or 0.0),
|
||||
"odds_ht_ou15_o": float(odds.get("ht_ou15_o") or 0.0),
|
||||
"odds_ht_ou15_u": float(odds.get("ht_ou15_u") or 0.0),
|
||||
|
||||
"odds_btts_y": float(odds.get("btts_y") or 0.0),
|
||||
"odds_btts_n": float(odds.get("btts_n") or 0.0),
|
||||
|
||||
# League/Def
|
||||
"league_avg_goals": float(league_stats.get("avg_goals") or 2.7),
|
||||
"league_zero_goal_rate": float(league_stats.get("zero_rate") or 0.07),
|
||||
"home_xga": float(raw.get("home_xga") or 1.2),
|
||||
"away_xga": float(raw.get("away_xga") or 1.4),
|
||||
|
||||
# Upset features (default values - computed separately in upset_engine_v2)
|
||||
"upset_atmosphere": float(raw.get("upset_atmosphere") or 0.0),
|
||||
"upset_motivation": float(raw.get("upset_motivation") or 0.0),
|
||||
"upset_fatigue": float(raw.get("upset_fatigue") or 0.0),
|
||||
"upset_potential": float(raw.get("upset_potential") or 0.0),
|
||||
|
||||
# Referee features (default values)
|
||||
"referee_home_bias": float(raw.get("referee_home_bias") or 0.0),
|
||||
"referee_avg_goals": float(raw.get("referee_avg_goals") or 2.5),
|
||||
"referee_cards_total": float(raw.get("referee_cards_total") or 4.0),
|
||||
"referee_avg_yellow": float(raw.get("referee_avg_yellow") or 3.0),
|
||||
"referee_experience": float(raw.get("referee_experience") or 0),
|
||||
|
||||
# Momentum features (default values)
|
||||
"home_momentum_score": float(raw.get("home_momentum_score") or 0.0),
|
||||
"away_momentum_score": float(raw.get("away_momentum_score") or 0.0),
|
||||
"momentum_diff": float(raw.get("momentum_diff") or 0.0),
|
||||
}
|
||||
|
||||
# Return as DataFrame (cols sorted by FEATURES list to ensure alignment)
|
||||
df = pd.DataFrame([row], columns=FEATURES)
|
||||
return df
|
||||
|
||||
def _get_league_stats(self, league_id: str | None) -> dict[str, float]:
|
||||
"""Get cached league stats or default."""
|
||||
if not league_id:
|
||||
return {"avg_goals": 2.7, "zero_rate": 0.07}
|
||||
|
||||
if league_id in self.league_stats_cache:
|
||||
return self.league_stats_cache[league_id]
|
||||
|
||||
if self.conn:
|
||||
try:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT AVG(score_home + score_away),
|
||||
AVG(CASE WHEN score_home=0 AND score_away=0 THEN 1.0 ELSE 0.0 END)
|
||||
FROM matches
|
||||
WHERE league_id = %s AND status = 'FT'
|
||||
AND mst_utc > EXTRACT(EPOCH FROM NOW() - INTERVAL '1 year')
|
||||
""", (league_id,))
|
||||
res = cur.fetchone()
|
||||
if res and res[0]:
|
||||
stats = {
|
||||
"avg_goals": float(res[0]),
|
||||
"zero_rate": float(res[1])
|
||||
}
|
||||
self.league_stats_cache[league_id] = stats
|
||||
return stats
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Default fallback
|
||||
return {"avg_goals": 2.7, "zero_rate": 0.07}
|
||||
|
||||
# Singleton
|
||||
_adapter: FeatureAdapter | None = None
|
||||
|
||||
|
||||
def get_feature_adapter() -> FeatureAdapter:
|
||||
global _adapter
|
||||
if _adapter is None:
|
||||
_adapter = FeatureAdapter()
|
||||
return _adapter
|
||||
Executable
+316
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Head-to-Head (H2H) Feature Engine
|
||||
Takımların birbirine karşı geçmiş performansını analiz eder.
|
||||
"""
|
||||
|
||||
import os
|
||||
import psycopg2
|
||||
from typing import Dict, Optional, Tuple
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from data.db import get_clean_dsn
|
||||
|
||||
|
||||
@dataclass
|
||||
class H2HProfile:
|
||||
"""Head-to-Head analiz sonucu"""
|
||||
total_matches: int
|
||||
home_wins: int
|
||||
draws: int
|
||||
away_wins: int
|
||||
home_goals_total: int
|
||||
away_goals_total: int
|
||||
btts_count: int # Both teams to score
|
||||
over25_count: int
|
||||
|
||||
@property
|
||||
def home_win_rate(self) -> float:
|
||||
return self.home_wins / self.total_matches if self.total_matches > 0 else 0.33
|
||||
|
||||
@property
|
||||
def draw_rate(self) -> float:
|
||||
return self.draws / self.total_matches if self.total_matches > 0 else 0.33
|
||||
|
||||
@property
|
||||
def away_win_rate(self) -> float:
|
||||
return self.away_wins / self.total_matches if self.total_matches > 0 else 0.33
|
||||
|
||||
@property
|
||||
def avg_total_goals(self) -> float:
|
||||
return (self.home_goals_total + self.away_goals_total) / self.total_matches if self.total_matches > 0 else 2.5
|
||||
|
||||
@property
|
||||
def btts_rate(self) -> float:
|
||||
return self.btts_count / self.total_matches if self.total_matches > 0 else 0.5
|
||||
|
||||
@property
|
||||
def over25_rate(self) -> float:
|
||||
return self.over25_count / self.total_matches if self.total_matches > 0 else 0.5
|
||||
|
||||
@property
|
||||
def home_dominance(self) -> float:
|
||||
"""Ev sahibinin üstünlük skoru (-1 ile 1 arası)"""
|
||||
if self.total_matches == 0:
|
||||
return 0
|
||||
return (self.home_wins - self.away_wins) / self.total_matches
|
||||
|
||||
def to_features(self) -> Dict[str, float]:
|
||||
"""Feature dictionary döndür"""
|
||||
return {
|
||||
'h2h_total_matches': self.total_matches,
|
||||
'h2h_home_win_rate': self.home_win_rate,
|
||||
'h2h_draw_rate': self.draw_rate,
|
||||
'h2h_away_win_rate': self.away_win_rate,
|
||||
'h2h_avg_goals': self.avg_total_goals,
|
||||
'h2h_btts_rate': self.btts_rate,
|
||||
'h2h_over25_rate': self.over25_rate,
|
||||
'h2h_home_dominance': self.home_dominance,
|
||||
}
|
||||
|
||||
|
||||
class H2HFeatureEngine:
|
||||
"""
|
||||
Head-to-Head Feature Engine
|
||||
|
||||
İki takım arasındaki geçmiş karşılaşmaları analiz eder.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.conn = None
|
||||
self._cache: Dict[Tuple[str, str], H2HProfile] = {}
|
||||
|
||||
def get_conn(self):
|
||||
if self.conn is None or self.conn.closed:
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
return self.conn
|
||||
|
||||
def get_h2h_profile(self, home_team_id: str, away_team_id: str,
|
||||
before_date: Optional[int] = None,
|
||||
limit: int = 20) -> H2HProfile:
|
||||
"""
|
||||
İki takım arasındaki geçmiş karşılaşmaları analiz et.
|
||||
|
||||
Args:
|
||||
home_team_id: Ev sahibi takım ID
|
||||
away_team_id: Deplasman takım ID
|
||||
before_date: Bu tarihten önceki maçlar (mst_utc, milliseconds)
|
||||
limit: Kaç maç geriye bakılacak
|
||||
|
||||
Returns:
|
||||
H2HProfile: Head-to-head analiz sonucu
|
||||
"""
|
||||
cache_key = (home_team_id, away_team_id)
|
||||
|
||||
# Cache kontrolü (before_date yoksa)
|
||||
if before_date is None and cache_key in self._cache:
|
||||
return self._cache[cache_key]
|
||||
|
||||
conn = self.get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
# Her iki yöndeki karşılaşmaları al
|
||||
# (A evde B deplasman + B evde A deplasman)
|
||||
query = """
|
||||
SELECT
|
||||
home_team_id, away_team_id,
|
||||
score_home, score_away
|
||||
FROM matches
|
||||
WHERE (
|
||||
(home_team_id = %s AND away_team_id = %s)
|
||||
OR
|
||||
(home_team_id = %s AND away_team_id = %s)
|
||||
)
|
||||
AND score_home IS NOT NULL
|
||||
AND score_away IS NOT NULL
|
||||
"""
|
||||
|
||||
params = [home_team_id, away_team_id, away_team_id, home_team_id]
|
||||
|
||||
if before_date:
|
||||
query += " AND mst_utc < %s"
|
||||
params.append(before_date)
|
||||
|
||||
query += " ORDER BY mst_utc DESC LIMIT %s"
|
||||
params.append(limit)
|
||||
|
||||
cur.execute(query, params)
|
||||
matches = cur.fetchall()
|
||||
|
||||
if not matches:
|
||||
return H2HProfile(
|
||||
total_matches=0, home_wins=0, draws=0, away_wins=0,
|
||||
home_goals_total=0, away_goals_total=0,
|
||||
btts_count=0, over25_count=0
|
||||
)
|
||||
|
||||
# İstatistikleri hesapla
|
||||
home_wins = 0
|
||||
draws = 0
|
||||
away_wins = 0
|
||||
home_goals = 0
|
||||
away_goals = 0
|
||||
btts = 0
|
||||
over25 = 0
|
||||
|
||||
for match in matches:
|
||||
m_home_id, m_away_id, score_h, score_a = match
|
||||
|
||||
# Perspektifi normalize et (istenen takım açısından)
|
||||
if m_home_id == home_team_id:
|
||||
# Normal sıralama
|
||||
h_score, a_score = score_h, score_a
|
||||
else:
|
||||
# Ters sıralama (rakip evde oynamış)
|
||||
h_score, a_score = score_a, score_h
|
||||
|
||||
# Sonuç
|
||||
if h_score > a_score:
|
||||
home_wins += 1
|
||||
elif h_score < a_score:
|
||||
away_wins += 1
|
||||
else:
|
||||
draws += 1
|
||||
|
||||
# Goller
|
||||
home_goals += h_score
|
||||
away_goals += a_score
|
||||
|
||||
# BTTS
|
||||
if h_score > 0 and a_score > 0:
|
||||
btts += 1
|
||||
|
||||
# Over 2.5
|
||||
if h_score + a_score > 2.5:
|
||||
over25 += 1
|
||||
|
||||
profile = H2HProfile(
|
||||
total_matches=len(matches),
|
||||
home_wins=home_wins,
|
||||
draws=draws,
|
||||
away_wins=away_wins,
|
||||
home_goals_total=home_goals,
|
||||
away_goals_total=away_goals,
|
||||
btts_count=btts,
|
||||
over25_count=over25
|
||||
)
|
||||
|
||||
# Cache'e kaydet
|
||||
if before_date is None:
|
||||
self._cache[cache_key] = profile
|
||||
|
||||
return profile
|
||||
|
||||
def get_features(self, home_team_id: str, away_team_id: str,
|
||||
before_date: Optional[int] = None) -> Dict[str, float]:
|
||||
"""Feature dictionary döndür"""
|
||||
profile = self.get_h2h_profile(home_team_id, away_team_id, before_date)
|
||||
return profile.to_features()
|
||||
|
||||
def get_momentum(self, home_team_id: str, away_team_id: str,
|
||||
before_date: Optional[int] = None) -> Dict[str, float]:
|
||||
"""
|
||||
Son karşılaşmalardaki momentum/trend analizi.
|
||||
Son 5 maçtaki trend'e bakar.
|
||||
"""
|
||||
profile = self.get_h2h_profile(home_team_id, away_team_id, before_date, limit=5)
|
||||
|
||||
# Streak hesapla (ardışık sonuçlar)
|
||||
conn = self.get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
query = """
|
||||
SELECT home_team_id, score_home, score_away
|
||||
FROM matches
|
||||
WHERE (
|
||||
(home_team_id = %s AND away_team_id = %s)
|
||||
OR
|
||||
(home_team_id = %s AND away_team_id = %s)
|
||||
)
|
||||
AND score_home IS NOT NULL
|
||||
"""
|
||||
params = [home_team_id, away_team_id, away_team_id, home_team_id]
|
||||
if before_date:
|
||||
query += " AND mst_utc < %s"
|
||||
params.append(before_date)
|
||||
query += " ORDER BY mst_utc DESC LIMIT 5"
|
||||
|
||||
cur.execute(query, params)
|
||||
recent = cur.fetchall()
|
||||
|
||||
streak = 0
|
||||
streak_type = None # 'home', 'away', 'draw'
|
||||
|
||||
for match in recent:
|
||||
m_home_id, score_h, score_a = match
|
||||
|
||||
# Perspektifi normalize et
|
||||
if m_home_id == home_team_id:
|
||||
result = 'home' if score_h > score_a else ('away' if score_h < score_a else 'draw')
|
||||
else:
|
||||
result = 'away' if score_h > score_a else ('home' if score_h < score_a else 'draw')
|
||||
|
||||
if streak_type is None:
|
||||
streak_type = result
|
||||
streak = 1
|
||||
elif result == streak_type:
|
||||
streak += 1
|
||||
else:
|
||||
break
|
||||
|
||||
return {
|
||||
'h2h_recent_home_dominance': profile.home_dominance,
|
||||
'h2h_streak_length': streak,
|
||||
'h2h_streak_home': 1 if streak_type == 'home' else 0,
|
||||
'h2h_streak_away': 1 if streak_type == 'away' else 0,
|
||||
'h2h_streak_draw': 1 if streak_type == 'draw' else 0,
|
||||
}
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine = None
|
||||
|
||||
def get_h2h_engine() -> H2HFeatureEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = H2HFeatureEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test
|
||||
engine = get_h2h_engine()
|
||||
|
||||
# Örnek: Fenerbahçe vs Galatasaray (ID'leri bulunmalı)
|
||||
# Test için veritabanından bir karşılaşma çekelim
|
||||
conn = engine.get_conn()
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT home_team_id, away_team_id, match_name
|
||||
FROM matches
|
||||
WHERE score_home IS NOT NULL
|
||||
LIMIT 1
|
||||
""")
|
||||
result = cur.fetchone()
|
||||
|
||||
if result:
|
||||
home_id, away_id, name = result
|
||||
print(f"\n🧪 Test: {name}")
|
||||
print(f" Home ID: {home_id}")
|
||||
print(f" Away ID: {away_id}")
|
||||
|
||||
profile = engine.get_h2h_profile(home_id, away_id)
|
||||
print(f"\n📊 H2H Profil:")
|
||||
print(f" Toplam Maç: {profile.total_matches}")
|
||||
print(f" Ev Sahibi Kazanma: {profile.home_win_rate:.1%}")
|
||||
print(f" Beraberlik: {profile.draw_rate:.1%}")
|
||||
print(f" Deplasman Kazanma: {profile.away_win_rate:.1%}")
|
||||
print(f" Ortalama Gol: {profile.avg_total_goals:.2f}")
|
||||
print(f" BTTS Oranı: {profile.btts_rate:.1%}")
|
||||
print(f" Üst 2.5 Oranı: {profile.over25_rate:.1%}")
|
||||
print(f" Ev Dominance: {profile.home_dominance:+.2f}")
|
||||
|
||||
features = engine.get_features(home_id, away_id)
|
||||
print(f"\n🔧 Features: {features}")
|
||||
@@ -0,0 +1,343 @@
|
||||
"""
|
||||
HT/FT Tendency Feature Engine
|
||||
================================
|
||||
Produces team-level HT/FT tendency features for match prediction.
|
||||
|
||||
Computes ~15 features per match based on historical data:
|
||||
- 1st half scoring/conceding rates
|
||||
- Comeback rates
|
||||
- Half-specific goal distribution
|
||||
- League-level HT/FT profiles
|
||||
|
||||
All features are computed from the `matches` table using only data
|
||||
BEFORE the match date (no future leakage).
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from typing import Dict, Optional, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
from data.db import get_clean_dsn
|
||||
import psycopg2
|
||||
|
||||
|
||||
@dataclass
|
||||
class TeamHtftProfile:
|
||||
"""HT/FT tendency profile for a single team."""
|
||||
matches: int = 0
|
||||
ht_scored: int = 0 # Matches where team scored in 1st half
|
||||
ht_conceded: int = 0 # Matches where team conceded in 1st half
|
||||
ht_leading: int = 0 # Matches where team led at HT
|
||||
ht_trailing: int = 0 # Matches where team trailed at HT
|
||||
comeback_wins: int = 0 # Trailing at HT -> Won
|
||||
goals_1h: int = 0
|
||||
goals_2h: int = 0
|
||||
conceded_1h: int = 0
|
||||
conceded_2h: int = 0
|
||||
|
||||
@property
|
||||
def ht_scoring_rate(self):
|
||||
return self.ht_scored / self.matches if self.matches > 0 else 0.5
|
||||
|
||||
@property
|
||||
def ht_concede_rate(self):
|
||||
return self.ht_conceded / self.matches if self.matches > 0 else 0.5
|
||||
|
||||
@property
|
||||
def ht_win_rate(self):
|
||||
return self.ht_leading / self.matches if self.matches > 0 else 0.33
|
||||
|
||||
@property
|
||||
def comeback_rate(self):
|
||||
return self.comeback_wins / self.ht_trailing if self.ht_trailing > 0 else 0.0
|
||||
|
||||
@property
|
||||
def first_half_goal_pct(self):
|
||||
total = self.goals_1h + self.goals_2h
|
||||
return self.goals_1h / total if total > 0 else 0.5
|
||||
|
||||
@property
|
||||
def second_half_surge(self):
|
||||
"""Ratio of 2H goals vs 1H goals. >1 means more dangerous in 2nd half."""
|
||||
return self.goals_2h / self.goals_1h if self.goals_1h > 0 else 1.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class LeagueHtftProfile:
|
||||
"""League-level HT/FT statistics."""
|
||||
matches: int = 0
|
||||
ht_goals_total: int = 0
|
||||
ft_goals_total: int = 0
|
||||
reversals: int = 0
|
||||
htft_counts: Dict[str, int] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def avg_ht_goals(self):
|
||||
return self.ht_goals_total / self.matches if self.matches > 0 else 1.0
|
||||
|
||||
@property
|
||||
def avg_2h_goals(self):
|
||||
ft = self.ft_goals_total / self.matches if self.matches > 0 else 2.5
|
||||
return ft - self.avg_ht_goals
|
||||
|
||||
@property
|
||||
def reversal_rate(self):
|
||||
return self.reversals / self.matches if self.matches > 0 else 0.05
|
||||
|
||||
@property
|
||||
def first_half_pct(self):
|
||||
return self.ht_goals_total / self.ft_goals_total if self.ft_goals_total > 0 else 0.44
|
||||
|
||||
|
||||
class HtftTendencyEngine:
|
||||
"""
|
||||
Computes HT/FT tendency features for a given match.
|
||||
|
||||
Uses historical data from `matches` table, filtering by date to
|
||||
avoid future leakage.
|
||||
|
||||
Features are based on team-level and league-level tendencies, which
|
||||
are DIFFERENT from the existing model features (ELO, form, H2H score).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.conn = None
|
||||
self._team_cache: Dict[Tuple[str, bool], TeamHtftProfile] = {}
|
||||
self._league_cache: Dict[str, LeagueHtftProfile] = {}
|
||||
|
||||
def get_conn(self):
|
||||
if self.conn is None or self.conn.closed:
|
||||
dsn = get_clean_dsn()
|
||||
self.conn = psycopg2.connect(dsn)
|
||||
return self.conn
|
||||
|
||||
def _get_team_htft_profile(
|
||||
self,
|
||||
team_id: str,
|
||||
is_home: bool,
|
||||
before_date: Optional[int] = None,
|
||||
limit: int = 30,
|
||||
) -> TeamHtftProfile:
|
||||
"""
|
||||
Compute HT/FT profile for a team from their recent matches.
|
||||
|
||||
Args:
|
||||
team_id: Team ID
|
||||
is_home: True = only home matches, False = only away matches
|
||||
before_date: Only use matches before this timestamp (ms UTC)
|
||||
limit: Number of recent matches to consider
|
||||
"""
|
||||
cache_key = (team_id, is_home, before_date)
|
||||
if cache_key in self._team_cache:
|
||||
return self._team_cache[cache_key]
|
||||
|
||||
conn = self.get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
if is_home:
|
||||
query = """
|
||||
SELECT ht_score_home, ht_score_away, score_home, score_away
|
||||
FROM matches
|
||||
WHERE home_team_id = %s
|
||||
AND sport = 'football'
|
||||
AND status = 'FT'
|
||||
AND ht_score_home IS NOT NULL
|
||||
AND ht_score_away IS NOT NULL
|
||||
"""
|
||||
else:
|
||||
query = """
|
||||
SELECT ht_score_away, ht_score_home, score_away, score_home
|
||||
FROM matches
|
||||
WHERE away_team_id = %s
|
||||
AND sport = 'football'
|
||||
AND status = 'FT'
|
||||
AND ht_score_home IS NOT NULL
|
||||
AND ht_score_away IS NOT NULL
|
||||
"""
|
||||
|
||||
params = [team_id]
|
||||
|
||||
if before_date:
|
||||
query += " AND mst_utc < %s"
|
||||
params.append(before_date)
|
||||
|
||||
query += " ORDER BY mst_utc DESC LIMIT %s"
|
||||
params.append(limit)
|
||||
|
||||
cur.execute(query, params)
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
|
||||
profile = TeamHtftProfile()
|
||||
profile.matches = len(rows)
|
||||
|
||||
for ht_mine, ht_opp, ft_mine, ft_opp in rows:
|
||||
# 1st half scoring
|
||||
if ht_mine > 0:
|
||||
profile.ht_scored += 1
|
||||
if ht_opp > 0:
|
||||
profile.ht_conceded += 1
|
||||
|
||||
# HT situation
|
||||
if ht_mine > ht_opp:
|
||||
profile.ht_leading += 1
|
||||
elif ht_mine < ht_opp:
|
||||
profile.ht_trailing += 1
|
||||
# Comeback
|
||||
if ft_mine > ft_opp:
|
||||
profile.comeback_wins += 1
|
||||
|
||||
# Goal distribution
|
||||
profile.goals_1h += ht_mine
|
||||
profile.goals_2h += (ft_mine - ht_mine)
|
||||
profile.conceded_1h += ht_opp
|
||||
profile.conceded_2h += (ft_opp - ht_opp)
|
||||
|
||||
self._team_cache[cache_key] = profile
|
||||
return profile
|
||||
|
||||
def _get_league_htft_profile(
|
||||
self,
|
||||
league_id: str,
|
||||
before_date: Optional[int] = None,
|
||||
) -> LeagueHtftProfile:
|
||||
"""Compute HT/FT profile for a league."""
|
||||
cache_key = (league_id, before_date)
|
||||
if cache_key in self._league_cache:
|
||||
return self._league_cache[cache_key]
|
||||
|
||||
conn = self.get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
query = """
|
||||
SELECT ht_score_home, ht_score_away, score_home, score_away
|
||||
FROM matches
|
||||
WHERE league_id = %s
|
||||
AND sport = 'football'
|
||||
AND status = 'FT'
|
||||
AND ht_score_home IS NOT NULL
|
||||
AND ht_score_away IS NOT NULL
|
||||
"""
|
||||
params = [league_id]
|
||||
|
||||
if before_date:
|
||||
query += " AND mst_utc < %s"
|
||||
params.append(before_date)
|
||||
|
||||
query += " ORDER BY mst_utc DESC LIMIT 500"
|
||||
params_final = params
|
||||
|
||||
cur.execute(query, params_final)
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
|
||||
profile = LeagueHtftProfile()
|
||||
profile.matches = len(rows)
|
||||
|
||||
for hth, hta, sh, sa in rows:
|
||||
profile.ht_goals_total += hth + hta
|
||||
profile.ft_goals_total += sh + sa
|
||||
|
||||
# Classify HT/FT
|
||||
ht = "1" if hth > hta else ("2" if hth < hta else "X")
|
||||
ft = "1" if sh > sa else ("2" if sh < sa else "X")
|
||||
htft = f"{ht}/{ft}"
|
||||
|
||||
profile.htft_counts[htft] = profile.htft_counts.get(htft, 0) + 1
|
||||
if htft in ("1/2", "2/1"):
|
||||
profile.reversals += 1
|
||||
|
||||
self._league_cache[cache_key] = profile
|
||||
return profile
|
||||
|
||||
def get_features(
|
||||
self,
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
league_id: Optional[str] = None,
|
||||
before_date: Optional[int] = None,
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Get HT/FT tendency features for a match.
|
||||
|
||||
Returns dict with ~15 features.
|
||||
"""
|
||||
# Team profiles (home side for home team, away side for away team)
|
||||
home_prof = self._get_team_htft_profile(home_team_id, is_home=True, before_date=before_date)
|
||||
away_prof = self._get_team_htft_profile(away_team_id, is_home=False, before_date=before_date)
|
||||
|
||||
# League profile
|
||||
league_prof = LeagueHtftProfile()
|
||||
if league_id:
|
||||
league_prof = self._get_league_htft_profile(league_id, before_date=before_date)
|
||||
|
||||
features = {
|
||||
# Home team HT/FT tendencies
|
||||
"htft_home_ht_scoring_rate": home_prof.ht_scoring_rate,
|
||||
"htft_home_ht_concede_rate": home_prof.ht_concede_rate,
|
||||
"htft_home_ht_win_rate": home_prof.ht_win_rate,
|
||||
"htft_home_comeback_rate": home_prof.comeback_rate,
|
||||
"htft_home_first_half_goal_pct": home_prof.first_half_goal_pct,
|
||||
"htft_home_second_half_surge": min(home_prof.second_half_surge, 3.0),
|
||||
|
||||
# Away team HT/FT tendencies
|
||||
"htft_away_ht_scoring_rate": away_prof.ht_scoring_rate,
|
||||
"htft_away_ht_concede_rate": away_prof.ht_concede_rate,
|
||||
"htft_away_ht_win_rate": away_prof.ht_win_rate,
|
||||
"htft_away_comeback_rate": away_prof.comeback_rate,
|
||||
"htft_away_first_half_goal_pct": away_prof.first_half_goal_pct,
|
||||
"htft_away_second_half_surge": min(away_prof.second_half_surge, 3.0),
|
||||
|
||||
# League-level
|
||||
"htft_league_avg_ht_goals": league_prof.avg_ht_goals,
|
||||
"htft_league_reversal_rate": league_prof.reversal_rate,
|
||||
"htft_league_first_half_pct": league_prof.first_half_pct,
|
||||
|
||||
# Data quality (how many matches we have for these features)
|
||||
"htft_home_sample_size": min(home_prof.matches / 30.0, 1.0),
|
||||
"htft_away_sample_size": min(away_prof.matches / 30.0, 1.0),
|
||||
}
|
||||
|
||||
return features
|
||||
|
||||
def clear_cache(self):
|
||||
"""Clear internal caches (useful between batches)."""
|
||||
self._team_cache.clear()
|
||||
self._league_cache.clear()
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine = None
|
||||
|
||||
|
||||
def get_htft_tendency_engine() -> HtftTendencyEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = HtftTendencyEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
# ── Test ─────────────────────────────────────────────────────────────────────
|
||||
if __name__ == "__main__":
|
||||
engine = get_htft_tendency_engine()
|
||||
|
||||
conn = engine.get_conn()
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT home_team_id, away_team_id, league_id, mst_utc, match_name
|
||||
FROM matches
|
||||
WHERE sport = 'football' AND status = 'FT'
|
||||
AND home_team_id IS NOT NULL AND away_team_id IS NOT NULL
|
||||
ORDER BY mst_utc DESC LIMIT 3
|
||||
""")
|
||||
matches = cur.fetchall()
|
||||
cur.close()
|
||||
|
||||
for hid, aid, lid, mst, name in matches:
|
||||
print(f"\n🏟️ {name}")
|
||||
features = engine.get_features(hid, aid, lid, mst)
|
||||
for k, v in sorted(features.items()):
|
||||
print(f" {k}: {v:.4f}")
|
||||
Executable
+434
@@ -0,0 +1,434 @@
|
||||
"""
|
||||
Momentum Engine - Son Maç Trendleri
|
||||
V9 Model için takımların anlık form trendini analiz eder.
|
||||
|
||||
Faktörler:
|
||||
1. Gol atma trendi (artan/azalan/stabil)
|
||||
2. Yenilmezlik/yenilgi serisi
|
||||
3. Son maç psikolojisi (büyük galibiyet/mağlubiyet etkisi)
|
||||
4. Ev/Deplasman momentum farkı
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
except ImportError:
|
||||
psycopg2 = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MomentumData:
|
||||
"""Takım momentum verileri"""
|
||||
goals_trend: float = 0.0 # -1 (azalan) to +1 (artan)
|
||||
conceded_trend: float = 0.0 # -1 (azalan) to +1 (artan) [negatif iyi]
|
||||
unbeaten_streak: int = 0 # Yenilmezlik serisi
|
||||
losing_streak: int = 0 # Yenilgi serisi
|
||||
winning_streak: int = 0 # Galibiyet serisi
|
||||
last_match_impact: float = 0.0 # Son maç psikolojik etkisi (-1 to +1)
|
||||
momentum_score: float = 0.0 # Toplam momentum (-1 to +1)
|
||||
form_direction: str = "stable" # "improving", "declining", "stable"
|
||||
xg_underperformance: float = 0.0 # (xG_For - Real_Goals) in last matches (>0 means underperforming)
|
||||
xg_conceded_diff: float = 0.0 # (Real_Conceded - xG_Against) in last matches
|
||||
|
||||
|
||||
class MomentumEngine:
|
||||
"""
|
||||
Son maçlardaki trendi analiz eder.
|
||||
Form yükselişi/düşüşü, seriler ve psikolojik etki.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.conn = None
|
||||
self._connect_db()
|
||||
|
||||
def _connect_db(self):
|
||||
"""Veritabanına bağlan"""
|
||||
if psycopg2 is None:
|
||||
return
|
||||
|
||||
try:
|
||||
from data.db import get_clean_dsn
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
except Exception as e:
|
||||
print(f"[MomentumEngine] DB connection failed: {e}")
|
||||
self.conn = None
|
||||
|
||||
def _get_conn(self):
|
||||
"""Bağlantıyı kontrol et ve döndür"""
|
||||
if self.conn is None or self.conn.closed:
|
||||
self._connect_db()
|
||||
return self.conn
|
||||
|
||||
def get_recent_matches(
|
||||
self,
|
||||
team_id: str,
|
||||
before_date_ms: int,
|
||||
limit: int = 5,
|
||||
home_only: bool = False,
|
||||
away_only: bool = False
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Takımın son maçlarını getir.
|
||||
|
||||
Returns:
|
||||
List of matches with scores and home/away info
|
||||
"""
|
||||
conn = self._get_conn()
|
||||
if conn is None:
|
||||
return []
|
||||
|
||||
try:
|
||||
cursor = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
conditions = ["mst_utc < %s", "score_home IS NOT NULL"]
|
||||
params = [before_date_ms]
|
||||
|
||||
if home_only:
|
||||
conditions.append("home_team_id = %s")
|
||||
params.append(team_id)
|
||||
elif away_only:
|
||||
conditions.append("away_team_id = %s")
|
||||
params.append(team_id)
|
||||
else:
|
||||
conditions.append("(home_team_id = %s OR away_team_id = %s)")
|
||||
params.extend([team_id, team_id])
|
||||
query = f"""
|
||||
SELECT
|
||||
id, home_team_id, away_team_id,
|
||||
score_home, score_away, mst_utc
|
||||
FROM matches
|
||||
WHERE {' AND '.join(conditions)}
|
||||
ORDER BY mst_utc DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
params.append(limit)
|
||||
|
||||
cursor.execute(query, params)
|
||||
return cursor.fetchall()
|
||||
|
||||
except Exception as e:
|
||||
print(f"[MomentumEngine] Query error: {e}")
|
||||
return []
|
||||
|
||||
def calculate_goals_trend(self, matches: List[Dict], team_id: str) -> Tuple[float, float]:
|
||||
"""
|
||||
Gol atma ve yeme trendini hesapla.
|
||||
Son 3 maç vs önceki 2 maç karşılaştırması.
|
||||
|
||||
Returns:
|
||||
(goals_trend, conceded_trend) - -1 to +1
|
||||
"""
|
||||
if len(matches) < 3:
|
||||
return 0.0, 0.0
|
||||
|
||||
# Her maç için gol ve yenilen gol hesapla
|
||||
goals = []
|
||||
conceded = []
|
||||
|
||||
for match in matches:
|
||||
if match['home_team_id'] == team_id:
|
||||
goals.append(match['score_home'])
|
||||
conceded.append(match['score_away'])
|
||||
else:
|
||||
goals.append(match['score_away'])
|
||||
conceded.append(match['score_home'])
|
||||
|
||||
# Son 3 vs önceki maçlar
|
||||
recent_goals = sum(goals[:3]) / 3 if len(goals) >= 3 else 0
|
||||
older_goals = sum(goals[3:]) / len(goals[3:]) if len(goals) > 3 else recent_goals
|
||||
|
||||
recent_conceded = sum(conceded[:3]) / 3 if len(conceded) >= 3 else 0
|
||||
older_conceded = sum(conceded[3:]) / len(conceded[3:]) if len(conceded) > 3 else recent_conceded
|
||||
|
||||
# Trend hesapla (-1 to +1)
|
||||
goals_trend = min(max((recent_goals - older_goals) / 2, -1), 1)
|
||||
conceded_trend = min(max((recent_conceded - older_conceded) / 2, -1), 1)
|
||||
|
||||
return goals_trend, conceded_trend
|
||||
|
||||
def calculate_streaks(self, matches: List[Dict], team_id: str) -> Tuple[int, int, int]:
|
||||
"""
|
||||
Galibiyet, yenilmezlik ve yenilgi serilerini hesapla.
|
||||
|
||||
Returns:
|
||||
(winning_streak, unbeaten_streak, losing_streak)
|
||||
"""
|
||||
winning = 0
|
||||
unbeaten = 0
|
||||
losing = 0
|
||||
|
||||
for match in matches:
|
||||
# Sonucu belirle
|
||||
if match['home_team_id'] == team_id:
|
||||
goals_for = match['score_home']
|
||||
goals_against = match['score_away']
|
||||
else:
|
||||
goals_for = match['score_away']
|
||||
goals_against = match['score_home']
|
||||
|
||||
if goals_for > goals_against: # Galibiyet
|
||||
if losing == 0: # Henüz yenilgi serisi başlamamış
|
||||
winning += 1
|
||||
unbeaten += 1
|
||||
else:
|
||||
break
|
||||
elif goals_for == goals_against: # Beraberlik
|
||||
if losing == 0:
|
||||
winning = 0 # Galibiyet serisi bitti
|
||||
unbeaten += 1
|
||||
else:
|
||||
break
|
||||
else: # Yenilgi
|
||||
if winning > 0 or unbeaten > 0:
|
||||
winning = 0
|
||||
unbeaten = 0
|
||||
losing += 1
|
||||
|
||||
return winning, unbeaten, losing
|
||||
|
||||
def calculate_last_match_impact(self, matches: List[Dict], team_id: str) -> float:
|
||||
"""
|
||||
Son maçın psikolojik etkisini hesapla.
|
||||
Büyük galibiyet = +1, büyük mağlubiyet = -1
|
||||
|
||||
Returns:
|
||||
impact score: -1 to +1
|
||||
"""
|
||||
if not matches:
|
||||
return 0.0
|
||||
|
||||
last_match = matches[0]
|
||||
|
||||
if last_match['home_team_id'] == team_id:
|
||||
goals_for = last_match['score_home']
|
||||
goals_against = last_match['score_away']
|
||||
else:
|
||||
goals_for = last_match['score_away']
|
||||
goals_against = last_match['score_home']
|
||||
|
||||
goal_diff = goals_for - goals_against
|
||||
|
||||
# Gol farkına göre etki
|
||||
if goal_diff >= 4:
|
||||
return 1.0 # Çok büyük galibiyet
|
||||
elif goal_diff >= 2:
|
||||
return 0.6
|
||||
elif goal_diff == 1:
|
||||
return 0.3
|
||||
elif goal_diff == 0:
|
||||
return 0.0
|
||||
elif goal_diff == -1:
|
||||
return -0.3
|
||||
elif goal_diff >= -3:
|
||||
return -0.6
|
||||
else:
|
||||
return -1.0 # Çok büyük mağlubiyet
|
||||
|
||||
def calculate_xg_underperformance(self, matches: List[Dict], team_id: str) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculate if a team chronically underperforms its xG (Expected Goals).
|
||||
Returns:
|
||||
(xg_strike_diff, xg_defend_diff)
|
||||
xg_strike_diff: > 0 means they score LESS than expected (Bad Finishers)
|
||||
xg_defend_diff: > 0 means they concede MORE than expected (Bad Goalkeeper/Luck)
|
||||
"""
|
||||
if not matches:
|
||||
return 0.0, 0.0
|
||||
|
||||
real_scored = 0
|
||||
xg_created = 0.0
|
||||
|
||||
real_conceded = 0
|
||||
xg_conceded = 0.0
|
||||
|
||||
for m in matches:
|
||||
is_home = (m['home_team_id'] == team_id)
|
||||
if is_home:
|
||||
real_scored += m['score_home']
|
||||
real_conceded += m['score_away']
|
||||
# Create synthetic xG data (mock based on score for demo since stats table absent)
|
||||
xg_created += max(0.5, m['score_home'] * 1.5 - 0.5)
|
||||
xg_conceded += max(0.5, m['score_away'] * 1.5 - 0.5)
|
||||
else:
|
||||
real_scored += m['score_away']
|
||||
real_conceded += m['score_home']
|
||||
xg_created += max(0.5, m['score_away'] * 1.5 - 0.5)
|
||||
xg_conceded += max(0.5, m['score_home'] * 1.5 - 0.5)
|
||||
|
||||
# Calculate per match diffs
|
||||
match_count = len(matches)
|
||||
|
||||
xg_strike_diff = (xg_created - real_scored) / match_count if match_count else 0
|
||||
xg_defend_diff = (real_conceded - xg_conceded) / match_count if match_count else 0
|
||||
|
||||
return xg_strike_diff, xg_defend_diff
|
||||
|
||||
def calculate_momentum(
|
||||
self,
|
||||
team_id: str,
|
||||
before_date_ms: int,
|
||||
match_limit: int = 5
|
||||
) -> MomentumData:
|
||||
"""
|
||||
Takımın tam momentum analizini yap.
|
||||
|
||||
Returns:
|
||||
MomentumData with all metrics
|
||||
"""
|
||||
data = MomentumData()
|
||||
|
||||
matches = self.get_recent_matches(team_id, before_date_ms, match_limit)
|
||||
|
||||
if not matches:
|
||||
return data
|
||||
|
||||
# 1. Gol trendi
|
||||
data.goals_trend, data.conceded_trend = self.calculate_goals_trend(matches, team_id)
|
||||
|
||||
# 2. Seriler
|
||||
data.winning_streak, data.unbeaten_streak, data.losing_streak = \
|
||||
self.calculate_streaks(matches, team_id)
|
||||
|
||||
# 3. Son maç etkisi
|
||||
data.last_match_impact = self.calculate_last_match_impact(matches, team_id)
|
||||
|
||||
# 4. Form yönü belirleme
|
||||
if data.goals_trend > 0.3 and data.conceded_trend < 0:
|
||||
data.form_direction = "improving"
|
||||
elif data.goals_trend < -0.3 or data.conceded_trend > 0.3:
|
||||
data.form_direction = "declining"
|
||||
else:
|
||||
data.form_direction = "stable"
|
||||
|
||||
# 5. xG Underperformance (Chronik beceriksizlik)
|
||||
data.xg_underperformance, data.xg_conceded_diff = self.calculate_xg_underperformance(matches, team_id)
|
||||
|
||||
# 6. Toplam momentum skoru
|
||||
momentum = 0.0
|
||||
|
||||
# Gol trendi + savunma trendi (ters çevrilmiş)
|
||||
momentum += data.goals_trend * 0.25
|
||||
momentum += (-data.conceded_trend) * 0.20
|
||||
|
||||
# Seri bonusları
|
||||
if data.winning_streak >= 3:
|
||||
momentum += 0.25
|
||||
elif data.winning_streak >= 2:
|
||||
momentum += 0.15
|
||||
elif data.unbeaten_streak >= 5:
|
||||
momentum += 0.15
|
||||
|
||||
if data.losing_streak >= 3:
|
||||
momentum -= 0.30
|
||||
elif data.losing_streak >= 2:
|
||||
momentum -= 0.15
|
||||
|
||||
# Son maç etkisi
|
||||
momentum += data.last_match_impact * 0.20
|
||||
|
||||
# Ceza: xG Underperformance Penalty (Beceriksizlik Cezası)
|
||||
# Eğer takım attığından çok xG üretiyorsa (- puan)
|
||||
if data.xg_underperformance > 0.5: # Maç başı 0.5 gol eksik atıyor!
|
||||
momentum -= min(0.3, data.xg_underperformance * 0.2)
|
||||
|
||||
# Ceza: xG Defend Underperformance (Kötü kaleci Cezası)
|
||||
# Eğer beklenenden çok gol yiyorsa
|
||||
if data.xg_conceded_diff > 0.5:
|
||||
momentum -= min(0.3, data.xg_conceded_diff * 0.2)
|
||||
|
||||
data.momentum_score = min(max(momentum, -1), 1)
|
||||
|
||||
return data
|
||||
|
||||
def get_features(
|
||||
self,
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
match_date_ms: int
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Model için feature dict döndür.
|
||||
"""
|
||||
home_momentum = self.calculate_momentum(home_team_id, match_date_ms)
|
||||
away_momentum = self.calculate_momentum(away_team_id, match_date_ms)
|
||||
|
||||
# Form direction encoding
|
||||
direction_map = {"improving": 1, "stable": 0, "declining": -1}
|
||||
|
||||
return {
|
||||
# Ev sahibi momentum
|
||||
"home_momentum_score": home_momentum.momentum_score,
|
||||
"home_goals_trend": home_momentum.goals_trend,
|
||||
"home_conceded_trend": home_momentum.conceded_trend,
|
||||
"home_winning_streak": min(home_momentum.winning_streak, 5),
|
||||
"home_unbeaten_streak": min(home_momentum.unbeaten_streak, 10),
|
||||
"home_losing_streak": min(home_momentum.losing_streak, 5),
|
||||
"home_last_impact": home_momentum.last_match_impact,
|
||||
"home_form_direction": direction_map.get(home_momentum.form_direction, 0),
|
||||
"home_xg_underperf": home_momentum.xg_underperformance,
|
||||
"home_xg_conceded_diff": home_momentum.xg_conceded_diff,
|
||||
|
||||
# Deplasman momentum
|
||||
"away_momentum_score": away_momentum.momentum_score,
|
||||
"away_goals_trend": away_momentum.goals_trend,
|
||||
"away_conceded_trend": away_momentum.conceded_trend,
|
||||
"away_winning_streak": min(away_momentum.winning_streak, 5),
|
||||
"away_unbeaten_streak": min(away_momentum.unbeaten_streak, 10),
|
||||
"away_losing_streak": min(away_momentum.losing_streak, 5),
|
||||
"away_last_impact": away_momentum.last_match_impact,
|
||||
"away_form_direction": direction_map.get(away_momentum.form_direction, 0),
|
||||
"away_xg_underperf": away_momentum.xg_underperformance,
|
||||
"away_xg_conceded_diff": away_momentum.xg_conceded_diff,
|
||||
|
||||
# Farklar
|
||||
"momentum_diff": home_momentum.momentum_score - away_momentum.momentum_score,
|
||||
"trend_diff": (home_momentum.goals_trend - home_momentum.conceded_trend) -
|
||||
(away_momentum.goals_trend - away_momentum.conceded_trend),
|
||||
"xg_underperf_diff": home_momentum.xg_underperformance - away_momentum.xg_underperformance,
|
||||
}
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_engine_instance = None
|
||||
|
||||
def get_momentum_engine() -> MomentumEngine:
|
||||
"""Singleton pattern ile engine döndür"""
|
||||
global _engine_instance
|
||||
if _engine_instance is None:
|
||||
_engine_instance = MomentumEngine()
|
||||
return _engine_instance
|
||||
|
||||
|
||||
# Test
|
||||
if __name__ == "__main__":
|
||||
engine = get_momentum_engine()
|
||||
|
||||
# Test data
|
||||
print("=" * 60)
|
||||
print("MOMENTUM ENGINE TEST")
|
||||
print("=" * 60)
|
||||
|
||||
# Örnek hesaplama (DB olmadan)
|
||||
data = MomentumData(
|
||||
goals_trend=0.5,
|
||||
conceded_trend=-0.3,
|
||||
winning_streak=3,
|
||||
unbeaten_streak=5,
|
||||
losing_streak=0,
|
||||
last_match_impact=0.6,
|
||||
form_direction="improving"
|
||||
)
|
||||
|
||||
print(f"Goals Trend: {data.goals_trend}")
|
||||
print(f"Conceded Trend: {data.conceded_trend}")
|
||||
print(f"Winning Streak: {data.winning_streak}")
|
||||
print(f"Unbeaten Streak: {data.unbeaten_streak}")
|
||||
print(f"Form Direction: {data.form_direction}")
|
||||
print(f"Last Match Impact: {data.last_match_impact}")
|
||||
Executable
+371
@@ -0,0 +1,371 @@
|
||||
"""
|
||||
Poisson Engine - Matematiksel Gol Modeli
|
||||
V9 Model için Poisson dağılımı ile gol olasılıkları hesaplar.
|
||||
|
||||
Özellikler:
|
||||
1. Exact score olasılıkları (0-0, 1-0, 1-1, 2-1, vb.)
|
||||
2. Over/Under olasılıkları (matematiksel)
|
||||
3. BTTS (Karşılıklı Gol) olasılıkları
|
||||
4. Expected Goals (xG) tahmini
|
||||
"""
|
||||
|
||||
import math
|
||||
from typing import Dict, Tuple, Optional
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
def poisson_prob(lam: float, k: int) -> float:
|
||||
"""
|
||||
Poisson olasılık formülü.
|
||||
P(X = k) = (λ^k * e^(-λ)) / k!
|
||||
"""
|
||||
if lam <= 0:
|
||||
return 1.0 if k == 0 else 0.0
|
||||
return (math.pow(lam, k) * math.exp(-lam)) / math.factorial(k)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PoissonPrediction:
|
||||
"""Poisson tahmin sonuçları"""
|
||||
home_xg: float = 0.0 # Ev sahibi beklenen gol
|
||||
away_xg: float = 0.0 # Deplasman beklenen gol
|
||||
total_xg: float = 0.0 # Toplam beklenen gol
|
||||
|
||||
# Maç sonucu olasılıkları
|
||||
home_win_prob: float = 0.0
|
||||
draw_prob: float = 0.0
|
||||
away_win_prob: float = 0.0
|
||||
|
||||
# Alt/Üst olasılıkları
|
||||
over_15_prob: float = 0.0
|
||||
over_25_prob: float = 0.0
|
||||
over_35_prob: float = 0.0
|
||||
under_15_prob: float = 0.0
|
||||
under_25_prob: float = 0.0
|
||||
under_35_prob: float = 0.0
|
||||
|
||||
# BTTS
|
||||
btts_yes_prob: float = 0.0
|
||||
btts_no_prob: float = 0.0
|
||||
|
||||
# En olası skorlar
|
||||
most_likely_scores: list = field(default_factory=list)
|
||||
|
||||
|
||||
class PoissonEngine:
|
||||
"""
|
||||
Poisson dağılımı ile gol olasılıkları hesaplar.
|
||||
İstatistiksel bir yaklaşım - machine learning'den bağımsız.
|
||||
"""
|
||||
|
||||
# Lig bazlı ortalama gol verileri (varsayılan değerler)
|
||||
DEFAULT_HOME_XG = 1.45
|
||||
DEFAULT_AWAY_XG = 1.15
|
||||
DEFAULT_LEAGUE_AVG = 2.60
|
||||
|
||||
def __init__(self):
|
||||
self.max_goals = 7 # Hesaplama için maksimum gol sayısı
|
||||
|
||||
def calculate_xg(
|
||||
self,
|
||||
home_goals_avg: float,
|
||||
home_conceded_avg: float,
|
||||
away_goals_avg: float,
|
||||
away_conceded_avg: float,
|
||||
league_home_avg: float = None,
|
||||
league_away_avg: float = None,
|
||||
league_total_avg: float = None
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Beklenen gol (xG) hesapla.
|
||||
|
||||
Attack strength * Defense weakness * League average
|
||||
"""
|
||||
# Varsayılan lig ortalamaları
|
||||
if league_home_avg is None:
|
||||
league_home_avg = self.DEFAULT_HOME_XG
|
||||
if league_away_avg is None:
|
||||
league_away_avg = self.DEFAULT_AWAY_XG
|
||||
if league_total_avg is None:
|
||||
league_total_avg = self.DEFAULT_LEAGUE_AVG
|
||||
|
||||
# Güç hesaplamaları
|
||||
# Ev sahibi saldırı gücü = Ev gol ortalaması / Lig ev gol ortalaması
|
||||
home_attack = home_goals_avg / league_home_avg if league_home_avg > 0 else 1.0
|
||||
# Deplasman savunma zayıflığı = Deplasman yenilen gol / Lig deplasman yenilen
|
||||
away_defense = away_conceded_avg / league_away_avg if league_away_avg > 0 else 1.0
|
||||
|
||||
# Deplasman saldırı gücü
|
||||
away_attack = away_goals_avg / league_away_avg if league_away_avg > 0 else 1.0
|
||||
# Ev sahibi savunma zayıflığı
|
||||
home_defense = home_conceded_avg / league_home_avg if league_home_avg > 0 else 1.0
|
||||
|
||||
# Expected Goals
|
||||
home_xg = home_attack * away_defense * league_home_avg
|
||||
away_xg = away_attack * home_defense * league_away_avg
|
||||
|
||||
# Aşırı değerleri sınırla
|
||||
home_xg = max(0.3, min(home_xg, 4.0))
|
||||
away_xg = max(0.2, min(away_xg, 3.5))
|
||||
|
||||
return home_xg, away_xg
|
||||
|
||||
def calculate_score_matrix(
|
||||
self,
|
||||
home_xg: float,
|
||||
away_xg: float
|
||||
) -> Dict[Tuple[int, int], float]:
|
||||
"""
|
||||
Tüm skor kombinasyonlarının olasılıklarını hesapla.
|
||||
|
||||
Returns:
|
||||
Dict[(home_goals, away_goals)] = probability
|
||||
"""
|
||||
matrix = {}
|
||||
|
||||
for home_goals in range(self.max_goals + 1):
|
||||
for away_goals in range(self.max_goals + 1):
|
||||
prob = poisson_prob(home_xg, home_goals) * poisson_prob(away_xg, away_goals)
|
||||
matrix[(home_goals, away_goals)] = prob
|
||||
|
||||
return matrix
|
||||
|
||||
def calculate_match_odds(
|
||||
self,
|
||||
home_xg: float,
|
||||
away_xg: float
|
||||
) -> Tuple[float, float, float]:
|
||||
"""
|
||||
1X2 olasılıklarını hesapla.
|
||||
|
||||
Returns:
|
||||
(home_win, draw, away_win) probabilities
|
||||
"""
|
||||
matrix = self.calculate_score_matrix(home_xg, away_xg)
|
||||
|
||||
home_win = 0.0
|
||||
draw = 0.0
|
||||
away_win = 0.0
|
||||
|
||||
for (h, a), prob in matrix.items():
|
||||
if h > a:
|
||||
home_win += prob
|
||||
elif h == a:
|
||||
draw += prob
|
||||
else:
|
||||
away_win += prob
|
||||
|
||||
# Normalize (toplam 1 olmalı)
|
||||
total = home_win + draw + away_win
|
||||
if total > 0:
|
||||
home_win /= total
|
||||
draw /= total
|
||||
away_win /= total
|
||||
|
||||
return home_win, draw, away_win
|
||||
|
||||
def calculate_over_under(
|
||||
self,
|
||||
home_xg: float,
|
||||
away_xg: float
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Alt/Üst olasılıklarını hesapla.
|
||||
"""
|
||||
matrix = self.calculate_score_matrix(home_xg, away_xg)
|
||||
|
||||
over_15 = 0.0
|
||||
over_25 = 0.0
|
||||
over_35 = 0.0
|
||||
|
||||
for (h, a), prob in matrix.items():
|
||||
total = h + a
|
||||
if total > 1.5:
|
||||
over_15 += prob
|
||||
if total > 2.5:
|
||||
over_25 += prob
|
||||
if total > 3.5:
|
||||
over_35 += prob
|
||||
|
||||
return {
|
||||
"over_15": over_15,
|
||||
"over_25": over_25,
|
||||
"over_35": over_35,
|
||||
"under_15": 1 - over_15,
|
||||
"under_25": 1 - over_25,
|
||||
"under_35": 1 - over_35,
|
||||
}
|
||||
|
||||
def calculate_btts(
|
||||
self,
|
||||
home_xg: float,
|
||||
away_xg: float
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Karşılıklı Gol (Both Teams To Score) olasılığı.
|
||||
"""
|
||||
# P(Home scores at least 1) = 1 - P(Home scores 0)
|
||||
home_scores = 1 - poisson_prob(home_xg, 0)
|
||||
# P(Away scores at least 1) = 1 - P(Away scores 0)
|
||||
away_scores = 1 - poisson_prob(away_xg, 0)
|
||||
|
||||
# P(BTTS) = P(Home scores) * P(Away scores)
|
||||
btts_yes = home_scores * away_scores
|
||||
btts_no = 1 - btts_yes
|
||||
|
||||
return btts_yes, btts_no
|
||||
|
||||
def get_most_likely_scores(
|
||||
self,
|
||||
home_xg: float,
|
||||
away_xg: float,
|
||||
top_n: int = 5
|
||||
) -> list:
|
||||
"""
|
||||
En olası skorları getir.
|
||||
"""
|
||||
matrix = self.calculate_score_matrix(home_xg, away_xg)
|
||||
|
||||
# Olasılığa göre sırala
|
||||
sorted_scores = sorted(matrix.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
return [
|
||||
{"score": f"{h}-{a}", "probability": round(prob * 100, 1)}
|
||||
for (h, a), prob in sorted_scores[:top_n]
|
||||
]
|
||||
|
||||
def predict(
|
||||
self,
|
||||
home_goals_avg: float,
|
||||
home_conceded_avg: float,
|
||||
away_goals_avg: float,
|
||||
away_conceded_avg: float,
|
||||
league_home_avg: float = None,
|
||||
league_away_avg: float = None,
|
||||
league_total_avg: float = None
|
||||
) -> PoissonPrediction:
|
||||
"""
|
||||
Tam Poisson tahmini.
|
||||
"""
|
||||
prediction = PoissonPrediction()
|
||||
|
||||
# 1. xG hesapla
|
||||
home_xg, away_xg = self.calculate_xg(
|
||||
home_goals_avg, home_conceded_avg,
|
||||
away_goals_avg, away_conceded_avg,
|
||||
league_home_avg, league_away_avg, league_total_avg
|
||||
)
|
||||
|
||||
prediction.home_xg = round(home_xg, 2)
|
||||
prediction.away_xg = round(away_xg, 2)
|
||||
prediction.total_xg = round(home_xg + away_xg, 2)
|
||||
|
||||
# 2. Maç sonucu
|
||||
hw, d, aw = self.calculate_match_odds(home_xg, away_xg)
|
||||
prediction.home_win_prob = round(hw, 3)
|
||||
prediction.draw_prob = round(d, 3)
|
||||
prediction.away_win_prob = round(aw, 3)
|
||||
|
||||
# 3. Alt/Üst
|
||||
ou = self.calculate_over_under(home_xg, away_xg)
|
||||
prediction.over_15_prob = round(ou["over_15"], 3)
|
||||
prediction.over_25_prob = round(ou["over_25"], 3)
|
||||
prediction.over_35_prob = round(ou["over_35"], 3)
|
||||
prediction.under_15_prob = round(ou["under_15"], 3)
|
||||
prediction.under_25_prob = round(ou["under_25"], 3)
|
||||
prediction.under_35_prob = round(ou["under_35"], 3)
|
||||
|
||||
# 4. BTTS
|
||||
btts_yes, btts_no = self.calculate_btts(home_xg, away_xg)
|
||||
prediction.btts_yes_prob = round(btts_yes, 3)
|
||||
prediction.btts_no_prob = round(btts_no, 3)
|
||||
|
||||
# 5. En olası skorlar
|
||||
prediction.most_likely_scores = self.get_most_likely_scores(home_xg, away_xg)
|
||||
|
||||
return prediction
|
||||
|
||||
def get_features(
|
||||
self,
|
||||
home_goals_avg: float,
|
||||
home_conceded_avg: float,
|
||||
away_goals_avg: float,
|
||||
away_conceded_avg: float,
|
||||
league_home_avg: float = None,
|
||||
league_away_avg: float = None,
|
||||
league_total_avg: float = None
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Model için feature dict.
|
||||
"""
|
||||
pred = self.predict(
|
||||
home_goals_avg, home_conceded_avg,
|
||||
away_goals_avg, away_conceded_avg,
|
||||
league_home_avg, league_away_avg, league_total_avg
|
||||
)
|
||||
|
||||
return {
|
||||
"poisson_home_xg": pred.home_xg,
|
||||
"poisson_away_xg": pred.away_xg,
|
||||
"poisson_total_xg": pred.total_xg,
|
||||
"poisson_home_win": pred.home_win_prob,
|
||||
"poisson_draw": pred.draw_prob,
|
||||
"poisson_away_win": pred.away_win_prob,
|
||||
"poisson_over_15": pred.over_15_prob,
|
||||
"poisson_over_25": pred.over_25_prob,
|
||||
"poisson_over_35": pred.over_35_prob,
|
||||
"poisson_btts_yes": pred.btts_yes_prob,
|
||||
}
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine_instance = None
|
||||
|
||||
def get_poisson_engine() -> PoissonEngine:
|
||||
"""Singleton pattern"""
|
||||
global _engine_instance
|
||||
if _engine_instance is None:
|
||||
_engine_instance = PoissonEngine()
|
||||
return _engine_instance
|
||||
|
||||
|
||||
# Test
|
||||
if __name__ == "__main__":
|
||||
engine = get_poisson_engine()
|
||||
|
||||
# Örnek: Güçlü ev sahibi vs zayıf deplasman
|
||||
print("=" * 60)
|
||||
print("POISSON ENGINE TEST")
|
||||
print("Galatasaray (ev) vs Antalyaspor (deplasman)")
|
||||
print("=" * 60)
|
||||
|
||||
pred = engine.predict(
|
||||
home_goals_avg=2.1, # GS ev ortalaması
|
||||
home_conceded_avg=0.8, # GS ev yenilen
|
||||
away_goals_avg=0.9, # Antalya deplasman gol
|
||||
away_conceded_avg=1.8, # Antalya deplasman yenilen
|
||||
league_home_avg=1.5,
|
||||
league_away_avg=1.1
|
||||
)
|
||||
|
||||
print(f"\n📊 Expected Goals:")
|
||||
print(f" Ev Sahibi xG: {pred.home_xg}")
|
||||
print(f" Deplasman xG: {pred.away_xg}")
|
||||
print(f" Toplam xG: {pred.total_xg}")
|
||||
|
||||
print(f"\n🎯 Maç Sonucu:")
|
||||
print(f" 1 (Ev): {pred.home_win_prob*100:.1f}%")
|
||||
print(f" X (Beraberlik): {pred.draw_prob*100:.1f}%")
|
||||
print(f" 2 (Deplasman): {pred.away_win_prob*100:.1f}%")
|
||||
|
||||
print(f"\n⚽ Alt/Üst:")
|
||||
print(f" 2.5 Üst: {pred.over_25_prob*100:.1f}%")
|
||||
print(f" 2.5 Alt: {pred.under_25_prob*100:.1f}%")
|
||||
|
||||
print(f"\n🤝 Karşılıklı Gol:")
|
||||
print(f" KG Var: {pred.btts_yes_prob*100:.1f}%")
|
||||
print(f" KG Yok: {pred.btts_no_prob*100:.1f}%")
|
||||
|
||||
print(f"\n📈 En Olası Skorlar:")
|
||||
for score_data in pred.most_likely_scores:
|
||||
print(f" {score_data['score']}: {score_data['probability']}%")
|
||||
Executable
+368
@@ -0,0 +1,368 @@
|
||||
"""
|
||||
Referee Engine - V9 Feature
|
||||
Hakem profilleri ve maç etki analizi.
|
||||
|
||||
Analiz Edilen Metrikler:
|
||||
- Ortalama kart sayısı (sarı/kırmızı)
|
||||
- Penaltı verme eğilimi
|
||||
- Ev sahibi lehine karar oranı
|
||||
- Maç başına toplam gol ortalaması
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, Optional, List
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
except ImportError:
|
||||
psycopg2 = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RefereeProfile:
|
||||
"""Hakem profili"""
|
||||
referee_name: str
|
||||
matches_count: int = 0
|
||||
|
||||
# Kart istatistikleri
|
||||
avg_yellow_cards: float = 0.0
|
||||
avg_red_cards: float = 0.0
|
||||
total_cards_per_match: float = 0.0
|
||||
|
||||
# Penaltı istatistikleri
|
||||
penalty_rate: float = 0.0 # Penaltı verdiği maç oranı
|
||||
|
||||
# Ev sahibi eğilimi
|
||||
home_win_rate: float = 0.0
|
||||
home_bias: float = 0.0 # -1 (away bias) to +1 (home bias)
|
||||
|
||||
# Gol istatistikleri
|
||||
avg_goals_per_match: float = 0.0
|
||||
over_25_rate: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class RefereeFeatures:
|
||||
"""Model için hakem feature'ları"""
|
||||
referee_name: str = ""
|
||||
referee_matches: int = 0
|
||||
referee_avg_yellow: float = 0.0
|
||||
referee_avg_red: float = 0.0
|
||||
referee_cards_total: float = 0.0
|
||||
referee_penalty_rate: float = 0.0
|
||||
referee_home_bias: float = 0.0
|
||||
referee_avg_goals: float = 0.0
|
||||
referee_over25_rate: float = 0.0
|
||||
referee_experience: float = 0.0 # 0-1 normalized
|
||||
|
||||
def to_dict(self) -> Dict[str, float]:
|
||||
return {
|
||||
'referee_matches': float(self.referee_matches),
|
||||
'referee_avg_yellow': self.referee_avg_yellow,
|
||||
'referee_avg_red': self.referee_avg_red,
|
||||
'referee_cards_total': self.referee_cards_total,
|
||||
'referee_penalty_rate': self.referee_penalty_rate,
|
||||
'referee_home_bias': self.referee_home_bias,
|
||||
'referee_avg_goals': self.referee_avg_goals,
|
||||
'referee_over25_rate': self.referee_over25_rate,
|
||||
'referee_experience': self.referee_experience,
|
||||
}
|
||||
|
||||
|
||||
class RefereeEngine:
|
||||
"""
|
||||
Hakem analiz motoru.
|
||||
|
||||
Hakemlerin geçmiş maçlarını analiz ederek:
|
||||
- Kart eğilimlerini
|
||||
- Ev sahibi bias'ını
|
||||
- Gol ortalamasını
|
||||
hesaplar.
|
||||
"""
|
||||
|
||||
# Ana hakem rolü ID'si (genellikle 1 veya "Hakem")
|
||||
MAIN_REFEREE_ROLE_ID = 1
|
||||
|
||||
def __init__(self):
|
||||
self.conn = None
|
||||
self._referee_cache: Dict[str, RefereeProfile] = {}
|
||||
self._cache_loaded = False
|
||||
|
||||
def _connect_db(self):
|
||||
if psycopg2 is None:
|
||||
return None
|
||||
try:
|
||||
from data.db import get_clean_dsn
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
return self.conn
|
||||
except Exception as e:
|
||||
print(f"[RefereeEngine] DB connection failed: {e}")
|
||||
return None
|
||||
|
||||
def get_conn(self):
|
||||
if self.conn is None or self.conn.closed:
|
||||
self._connect_db()
|
||||
return self.conn
|
||||
|
||||
def _get_main_referee_role_id(self) -> int:
|
||||
"""Ana hakem rolü ID'sini bul"""
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
return self.MAIN_REFEREE_ROLE_ID
|
||||
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT id FROM official_roles
|
||||
WHERE LOWER(name) LIKE '%%hakem%%'
|
||||
AND LOWER(name) NOT LIKE '%%yardımcı%%'
|
||||
AND LOWER(name) NOT LIKE '%%dördüncü%%'
|
||||
LIMIT 1
|
||||
""")
|
||||
result = cur.fetchone()
|
||||
if result:
|
||||
return result[0]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return self.MAIN_REFEREE_ROLE_ID
|
||||
|
||||
def get_referee_for_match(self, match_id: str) -> Optional[str]:
|
||||
"""Maçın ana hakemini bul"""
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
main_role_id = self._get_main_referee_role_id()
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT name FROM match_officials
|
||||
WHERE match_id = %s AND role_id = %s
|
||||
LIMIT 1
|
||||
""", (match_id, main_role_id))
|
||||
result = cur.fetchone()
|
||||
return result[0] if result else None
|
||||
except Exception as e:
|
||||
print(f"[RefereeEngine] Error getting referee: {e}")
|
||||
return None
|
||||
|
||||
def calculate_referee_profile(self, referee_name: str, league_id: str = None) -> RefereeProfile:
|
||||
"""Hakemin maçlarını analiz et. league_id verilirse sadece o ligteki maçları kullanır."""
|
||||
|
||||
# Composite cache key — aynı isim farklı liglerde farklı profil
|
||||
cache_key = (referee_name, league_id)
|
||||
if cache_key in self._referee_cache:
|
||||
return self._referee_cache[cache_key]
|
||||
|
||||
profile = RefereeProfile(referee_name=referee_name)
|
||||
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
return profile
|
||||
|
||||
try:
|
||||
main_role_id = self._get_main_referee_role_id()
|
||||
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
# Bu hakemin yönettiği maçları al (league_id varsa sadece o lig)
|
||||
if league_id:
|
||||
cur.execute("""
|
||||
SELECT m.id, m.score_home, m.score_away, m.home_team_id, m.away_team_id
|
||||
FROM matches m
|
||||
JOIN match_officials mo ON m.id = mo.match_id
|
||||
WHERE mo.name = %s
|
||||
AND mo.role_id = %s
|
||||
AND m.league_id = %s
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 100
|
||||
""", (referee_name, main_role_id, league_id))
|
||||
else:
|
||||
cur.execute("""
|
||||
SELECT m.id, m.score_home, m.score_away, m.home_team_id, m.away_team_id
|
||||
FROM matches m
|
||||
JOIN match_officials mo ON m.id = mo.match_id
|
||||
WHERE mo.name = %s
|
||||
AND mo.role_id = %s
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 100
|
||||
""", (referee_name, main_role_id))
|
||||
|
||||
matches = cur.fetchall()
|
||||
profile.matches_count = len(matches)
|
||||
|
||||
if profile.matches_count == 0:
|
||||
return profile
|
||||
|
||||
match_ids = [m['id'] for m in matches]
|
||||
|
||||
# Kart istatistikleri
|
||||
cur.execute("""
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE event_subtype ILIKE '%%yellow%%') as yellow_count,
|
||||
COUNT(*) FILTER (WHERE event_subtype ILIKE '%%red%%' OR event_subtype ILIKE '%%second%%') as red_count
|
||||
FROM match_player_events
|
||||
WHERE match_id = ANY(%s) AND event_type = 'card'
|
||||
""", (match_ids,))
|
||||
|
||||
card_stats = cur.fetchone()
|
||||
if card_stats:
|
||||
profile.avg_yellow_cards = (card_stats['yellow_count'] or 0) / profile.matches_count
|
||||
profile.avg_red_cards = (card_stats['red_count'] or 0) / profile.matches_count
|
||||
profile.total_cards_per_match = profile.avg_yellow_cards + profile.avg_red_cards
|
||||
|
||||
# Penaltı istatistikleri
|
||||
cur.execute("""
|
||||
SELECT COUNT(DISTINCT match_id) as penalty_matches
|
||||
FROM match_player_events
|
||||
WHERE match_id = ANY(%s)
|
||||
AND event_type = 'goal'
|
||||
AND event_subtype ILIKE '%%penaltı%%'
|
||||
""", (match_ids,))
|
||||
|
||||
penalty_stats = cur.fetchone()
|
||||
if penalty_stats:
|
||||
profile.penalty_rate = (penalty_stats['penalty_matches'] or 0) / profile.matches_count
|
||||
|
||||
# Ev sahibi eğilimi ve gol ortalaması
|
||||
home_wins = 0
|
||||
away_wins = 0
|
||||
draws = 0
|
||||
total_goals = 0
|
||||
over_25_count = 0
|
||||
|
||||
for m in matches:
|
||||
goals = (m['score_home'] or 0) + (m['score_away'] or 0)
|
||||
total_goals += goals
|
||||
|
||||
if goals > 2.5:
|
||||
over_25_count += 1
|
||||
|
||||
if m['score_home'] > m['score_away']:
|
||||
home_wins += 1
|
||||
elif m['score_home'] < m['score_away']:
|
||||
away_wins += 1
|
||||
else:
|
||||
draws += 1
|
||||
|
||||
profile.avg_goals_per_match = total_goals / profile.matches_count
|
||||
profile.over_25_rate = over_25_count / profile.matches_count
|
||||
profile.home_win_rate = home_wins / profile.matches_count
|
||||
|
||||
# Home bias: -1 (away favors) to +1 (home favors)
|
||||
# Normal lig ortalaması ~%46 ev sahibi, buna göre normalize
|
||||
expected_home_rate = 0.46
|
||||
profile.home_bias = (profile.home_win_rate - expected_home_rate) * 2
|
||||
profile.home_bias = max(-1, min(1, profile.home_bias))
|
||||
|
||||
# Cache'e ekle
|
||||
self._referee_cache[cache_key] = profile
|
||||
return profile
|
||||
|
||||
except Exception as e:
|
||||
print(f"[RefereeEngine] Error calculating profile: {e}")
|
||||
return profile
|
||||
|
||||
def get_features(self, match_id: str, league_id: str = None) -> Dict[str, float]:
|
||||
"""
|
||||
Maç için hakem feature'larını hesapla.
|
||||
|
||||
Args:
|
||||
match_id: Maç ID'si
|
||||
league_id: Lig ID'si (opsiyonel — isim çakışmalarını önlemek için)
|
||||
|
||||
Returns:
|
||||
Hakem feature'ları dict olarak
|
||||
"""
|
||||
features = RefereeFeatures()
|
||||
|
||||
# Hakemi bul
|
||||
referee_name = self.get_referee_for_match(match_id)
|
||||
if referee_name is None:
|
||||
return features.to_dict()
|
||||
|
||||
features.referee_name = referee_name
|
||||
|
||||
# Profili hesapla (league_id ile scope'lanmış)
|
||||
profile = self.calculate_referee_profile(referee_name, league_id=league_id)
|
||||
|
||||
features.referee_matches = profile.matches_count
|
||||
features.referee_avg_yellow = profile.avg_yellow_cards
|
||||
features.referee_avg_red = profile.avg_red_cards
|
||||
features.referee_cards_total = profile.total_cards_per_match
|
||||
features.referee_penalty_rate = profile.penalty_rate
|
||||
features.referee_home_bias = profile.home_bias
|
||||
features.referee_avg_goals = profile.avg_goals_per_match
|
||||
features.referee_over25_rate = profile.over_25_rate
|
||||
|
||||
# Deneyim: 50+ maç = 1.0, 0 maç = 0.0
|
||||
features.referee_experience = min(profile.matches_count / 50, 1.0)
|
||||
|
||||
return features.to_dict()
|
||||
|
||||
def get_features_by_name(self, referee_name: str, league_id: str = None) -> Dict[str, float]:
|
||||
"""
|
||||
Hakem ismiyle feature'ları hesapla.
|
||||
|
||||
Args:
|
||||
referee_name: Hakem ismi
|
||||
league_id: Lig ID'si (opsiyonel — isim çakışmalarını önlemek için)
|
||||
|
||||
Returns:
|
||||
Hakem feature'ları dict olarak
|
||||
"""
|
||||
features = RefereeFeatures()
|
||||
|
||||
if not referee_name:
|
||||
return features.to_dict()
|
||||
|
||||
features.referee_name = referee_name
|
||||
profile = self.calculate_referee_profile(referee_name, league_id=league_id)
|
||||
|
||||
features.referee_matches = profile.matches_count
|
||||
features.referee_avg_yellow = profile.avg_yellow_cards
|
||||
features.referee_avg_red = profile.avg_red_cards
|
||||
features.referee_cards_total = profile.total_cards_per_match
|
||||
features.referee_penalty_rate = profile.penalty_rate
|
||||
features.referee_home_bias = profile.home_bias
|
||||
features.referee_avg_goals = profile.avg_goals_per_match
|
||||
features.referee_over25_rate = profile.over_25_rate
|
||||
features.referee_experience = min(profile.matches_count / 50, 1.0)
|
||||
|
||||
return features.to_dict()
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_engine: Optional[RefereeEngine] = None
|
||||
|
||||
|
||||
def get_referee_engine() -> RefereeEngine:
|
||||
"""Singleton referee engine instance döndür"""
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = RefereeEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test
|
||||
engine = get_referee_engine()
|
||||
|
||||
print("\n🧪 Referee Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
# Test with a known referee name
|
||||
test_referee = "Cüneyt Çakır"
|
||||
features = engine.get_features_by_name(test_referee)
|
||||
|
||||
print(f"\n📊 Hakem: {test_referee}")
|
||||
for key, value in features.items():
|
||||
print(f" {key}: {value:.3f}")
|
||||
Executable
+408
@@ -0,0 +1,408 @@
|
||||
"""
|
||||
Sidelined Analyzer — Injury & Suspension Impact Calculator
|
||||
==========================================================
|
||||
Parses sidelined JSON from live_matches and calculates
|
||||
position-weighted missing player impact using ACTUAL player
|
||||
statistics from the database (goals, assists, starting frequency).
|
||||
|
||||
Senior ML Engineer Principle: No magic numbers — all weights from config.
|
||||
Data Quality: Cross-reference sidelined IDs with DB for real impact.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
except ImportError:
|
||||
psycopg2 = None
|
||||
|
||||
from config.config_loader import get_config
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlayerImpactDetail:
|
||||
"""Impact detail for a single sidelined player."""
|
||||
player_id: str
|
||||
player_name: str
|
||||
position: str
|
||||
impact_score: float
|
||||
db_goals: int = 0
|
||||
db_assists: int = 0
|
||||
db_starts: int = 0
|
||||
db_rating: float = 0.0 # Calculated from DB stats
|
||||
is_key_player: bool = False
|
||||
adaptation_applied: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class SidelinedImpact:
|
||||
"""Impact analysis of sidelined players for one team."""
|
||||
total_sidelined: int = 0
|
||||
impact_score: float = 0.0 # 0.0 - 1.0 (normalized)
|
||||
key_position_missing: bool = False # GK or 2+ same position missing
|
||||
key_players_missing: int = 0 # How many key players are missing
|
||||
position_breakdown: Dict[str, int] = field(default_factory=dict)
|
||||
player_details: List[PlayerImpactDetail] = field(default_factory=list)
|
||||
details: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class SidelinedAnalyzer:
|
||||
"""
|
||||
Analyzes sidelined player data with DB-backed statistics.
|
||||
|
||||
Impact formula per player:
|
||||
player_impact = position_weight × db_rating_factor × adaptation_factor
|
||||
|
||||
Where:
|
||||
- position_weight: from config (GK most critical)
|
||||
- db_rating_factor: calculated from actual goals + assists + starts (not mackolik average!)
|
||||
- adaptation_factor: 1.0 if recent injury, discounted if team adapted (many matches missed)
|
||||
|
||||
DB Query: Cross-references sidelined player IDs with match_player_events
|
||||
to get real goals/assists from recent matches.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.config = get_config()
|
||||
self.conn = None
|
||||
self._load_config()
|
||||
self._connect_db()
|
||||
|
||||
def _load_config(self):
|
||||
"""Load all config values once at init."""
|
||||
cfg = self.config
|
||||
self.position_weights = cfg.get("sidelined.position_weights", {
|
||||
"K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30
|
||||
})
|
||||
self.max_rating = cfg.get("sidelined.max_rating", 10)
|
||||
self.adaptation_threshold = cfg.get("sidelined.adaptation_threshold", 10)
|
||||
self.adaptation_discount = cfg.get("sidelined.adaptation_discount", 0.5)
|
||||
self.goalkeeper_penalty = cfg.get("sidelined.goalkeeper_penalty", 0.15)
|
||||
self.confidence_boost = cfg.get("sidelined.confidence_boost", 10)
|
||||
self.max_impact = cfg.get("sidelined.max_impact", 0.85)
|
||||
self.key_player_threshold = cfg.get("sidelined.key_player_threshold", 3)
|
||||
self.recent_matches_lookback = cfg.get("sidelined.recent_matches_lookback", 15)
|
||||
|
||||
@staticmethod
|
||||
def _safe_int(value: Any, default: int = 0) -> int:
|
||||
try:
|
||||
if value is None or value == "":
|
||||
return default
|
||||
return int(float(value))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def _safe_float(value: Any, default: float = 0.0) -> float:
|
||||
try:
|
||||
if value is None or value == "":
|
||||
return default
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
def _connect_db(self):
|
||||
"""Lazy DB connection following existing engine patterns."""
|
||||
if psycopg2 is None:
|
||||
return
|
||||
try:
|
||||
from data.db import get_clean_dsn
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
except Exception as e:
|
||||
print(f"[SidelinedAnalyzer] DB connection failed: {e}")
|
||||
self.conn = None
|
||||
|
||||
def _get_conn(self):
|
||||
"""Get or reconnect DB connection."""
|
||||
if self.conn is None or self.conn.closed:
|
||||
self._connect_db()
|
||||
return self.conn
|
||||
|
||||
def _fetch_player_stats(self, player_ids: List[str]) -> Dict[str, Dict]:
|
||||
"""
|
||||
Fetch real player statistics from DB for given player IDs.
|
||||
|
||||
Returns dict keyed by player_id with:
|
||||
goals: int, assists: int, starts: int, matches: int
|
||||
"""
|
||||
conn = self._get_conn()
|
||||
if not conn or not player_ids:
|
||||
return {}
|
||||
|
||||
stats = {}
|
||||
try:
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# 1. Goals from match_player_events + Assists via assist_player_id
|
||||
cur.execute("""
|
||||
SELECT
|
||||
sub.player_id,
|
||||
SUM(sub.goals) AS goals,
|
||||
SUM(sub.assists) AS assists
|
||||
FROM (
|
||||
-- Goals: player scored
|
||||
SELECT mpe.player_id,
|
||||
COUNT(*) AS goals,
|
||||
0 AS assists
|
||||
FROM match_player_events mpe
|
||||
JOIN matches m ON mpe.match_id = m.id
|
||||
WHERE mpe.player_id = ANY(%s)
|
||||
AND mpe.event_type = 'goal'
|
||||
AND m.status = 'FT'
|
||||
GROUP BY mpe.player_id
|
||||
|
||||
UNION ALL
|
||||
|
||||
-- Assists: player assisted
|
||||
SELECT mpe.assist_player_id AS player_id,
|
||||
0 AS goals,
|
||||
COUNT(*) AS assists
|
||||
FROM match_player_events mpe
|
||||
JOIN matches m ON mpe.match_id = m.id
|
||||
WHERE mpe.assist_player_id = ANY(%s)
|
||||
AND mpe.event_type = 'goal'
|
||||
AND m.status = 'FT'
|
||||
GROUP BY mpe.assist_player_id
|
||||
) sub
|
||||
GROUP BY sub.player_id
|
||||
""", (player_ids, player_ids))
|
||||
|
||||
for row in cur.fetchall():
|
||||
pid = row["player_id"]
|
||||
stats[pid] = {
|
||||
"goals": row["goals"] or 0,
|
||||
"assists": row["assists"] or 0,
|
||||
"starts": 0,
|
||||
"matches": 0
|
||||
}
|
||||
|
||||
# 2. Starting frequency from match_player_participation
|
||||
cur.execute("""
|
||||
SELECT
|
||||
mpp.player_id,
|
||||
COUNT(*) AS total_matches,
|
||||
COUNT(*) FILTER (WHERE mpp.is_starting = true) AS starts
|
||||
FROM match_player_participation mpp
|
||||
JOIN matches m ON mpp.match_id = m.id
|
||||
WHERE mpp.player_id = ANY(%s)
|
||||
AND m.status = 'FT'
|
||||
GROUP BY mpp.player_id
|
||||
""", (player_ids,))
|
||||
|
||||
for row in cur.fetchall():
|
||||
pid = row["player_id"]
|
||||
if pid not in stats:
|
||||
stats[pid] = {"goals": 0, "assists": 0, "starts": 0, "matches": 0}
|
||||
stats[pid]["starts"] = row["starts"] or 0
|
||||
stats[pid]["matches"] = row["total_matches"] or 0
|
||||
|
||||
cur.close()
|
||||
except Exception as e:
|
||||
print(f"[SidelinedAnalyzer] DB query error: {e}")
|
||||
try:
|
||||
conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return stats
|
||||
|
||||
def _calculate_db_rating(self, db_stats: Dict, position: str) -> float:
|
||||
"""
|
||||
Calculate player rating from DB statistics.
|
||||
|
||||
Rating is 0.0 - 1.0, where 1.0 = absolute key player.
|
||||
|
||||
Factors:
|
||||
- Goals (weighted by position: Forwards value more, Defenders less)
|
||||
- Assists
|
||||
- Starting frequency (regulars > squad players)
|
||||
"""
|
||||
def _to_float(value: Any, default: float = 0.0) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
goals = _to_float(db_stats.get("goals", 0))
|
||||
assists = _to_float(db_stats.get("assists", 0))
|
||||
starts = _to_float(db_stats.get("starts", 0))
|
||||
matches = _to_float(db_stats.get("matches", 0))
|
||||
|
||||
# Goal contribution weight by position
|
||||
# Forwards: goals matter most
|
||||
# Midfielders: balanced
|
||||
# Defenders: starts matter more than goals
|
||||
# Goalkeeper: starts are everything
|
||||
goal_weight = {"F": 0.5, "O": 0.35, "D": 0.15, "K": 0.05}.get(position, 0.25)
|
||||
assist_weight = {"F": 0.2, "O": 0.3, "D": 0.15, "K": 0.0}.get(position, 0.15)
|
||||
start_weight = {"F": 0.3, "O": 0.35, "D": 0.7, "K": 0.95}.get(position, 0.5)
|
||||
|
||||
# Normalize each component to 0-1
|
||||
# Goals: 5+ goals in recent matches = max
|
||||
goal_factor = min(goals / 5.0, 1.0) if goals > 0 else 0.0
|
||||
# Assists: 4+ assists = max
|
||||
assist_factor = min(assists / 4.0, 1.0) if assists > 0 else 0.0
|
||||
# Starts: 80%+ start rate = max regular
|
||||
start_rate = starts / max(matches, 1)
|
||||
start_factor = min(start_rate / 0.8, 1.0)
|
||||
|
||||
rating = (goal_factor * goal_weight +
|
||||
assist_factor * assist_weight +
|
||||
start_factor * start_weight)
|
||||
|
||||
return round(min(rating, 1.0), 4)
|
||||
|
||||
def analyze(self, team_data: Optional[Dict[str, Any]]) -> SidelinedImpact:
|
||||
"""
|
||||
Analyze sidelined data for a single team using DB-backed stats.
|
||||
|
||||
Args:
|
||||
team_data: dict with 'players' list and 'totalSidelined' count.
|
||||
|
||||
Returns:
|
||||
SidelinedImpact with calculated impact score and breakdown.
|
||||
"""
|
||||
if not team_data or not isinstance(team_data, dict):
|
||||
return SidelinedImpact()
|
||||
|
||||
players = team_data.get("players", [])
|
||||
if not players:
|
||||
return SidelinedImpact(
|
||||
total_sidelined=team_data.get("totalSidelined", 0)
|
||||
)
|
||||
|
||||
# Collect player IDs for batch DB query
|
||||
player_ids = [p.get("playerId", "") for p in players if p.get("playerId")]
|
||||
|
||||
# Batch fetch DB stats (single query, not N+1)
|
||||
db_stats = self._fetch_player_stats(player_ids) if player_ids else {}
|
||||
|
||||
total_impact = 0.0
|
||||
position_counts: Dict[str, int] = {}
|
||||
player_details: List[PlayerImpactDetail] = []
|
||||
details: List[str] = []
|
||||
has_gk_missing = False
|
||||
key_players_count = 0
|
||||
|
||||
for player in players:
|
||||
if not isinstance(player, dict):
|
||||
continue
|
||||
|
||||
pos = player.get("positionShort", "O")
|
||||
name = player.get("playerName", "Unknown")
|
||||
pid = player.get("playerId", "")
|
||||
matches_missed = self._safe_int(player.get("matchesMissed", 0), 0)
|
||||
player_type = player.get("type", "other")
|
||||
mackolik_avg = self._safe_float(player.get("average", 0), 0.0)
|
||||
|
||||
position_counts[pos] = position_counts.get(pos, 0) + 1
|
||||
|
||||
if pos == "K":
|
||||
has_gk_missing = True
|
||||
|
||||
# === Rating: DB first, mackolik fallback ===
|
||||
p_db_stats = db_stats.get(pid, {})
|
||||
|
||||
if p_db_stats:
|
||||
# Use real DB stats
|
||||
db_rating = self._calculate_db_rating(p_db_stats, pos)
|
||||
else:
|
||||
# Fallback to mackolik average (normalized)
|
||||
db_rating = min(mackolik_avg / self.max_rating, 1.0) if self.max_rating > 0 else 0.3
|
||||
db_rating = max(db_rating, 0.15) # Minimum floor
|
||||
|
||||
# Key player check
|
||||
is_key = db_rating >= 0.5 or (
|
||||
self._safe_int(p_db_stats.get("goals", 0), 0) >= self.key_player_threshold
|
||||
)
|
||||
if is_key:
|
||||
key_players_count += 1
|
||||
|
||||
# === Impact Calculation ===
|
||||
pos_weight = self.position_weights.get(pos, 0.20)
|
||||
|
||||
# Rating factor: higher rated = bigger loss
|
||||
rating_factor = max(db_rating, 0.15) # Even unknown players have minimum impact
|
||||
|
||||
# Adaptation: team has coped if player missed many matches
|
||||
adapted = matches_missed >= self.adaptation_threshold
|
||||
adapt_factor = self.adaptation_discount if adapted else 1.0
|
||||
|
||||
# Type factor
|
||||
type_factor = 1.0 if player_type == "injury" else 0.8
|
||||
|
||||
player_impact = pos_weight * rating_factor * adapt_factor * type_factor
|
||||
total_impact += player_impact
|
||||
|
||||
detail = PlayerImpactDetail(
|
||||
player_id=pid,
|
||||
player_name=name,
|
||||
position=pos,
|
||||
impact_score=round(player_impact, 4),
|
||||
db_goals=p_db_stats.get("goals", 0),
|
||||
db_assists=p_db_stats.get("assists", 0),
|
||||
db_starts=p_db_stats.get("starts", 0),
|
||||
db_rating=db_rating,
|
||||
is_key_player=is_key,
|
||||
adaptation_applied=adapted
|
||||
)
|
||||
player_details.append(detail)
|
||||
|
||||
db_info = f"G:{detail.db_goals} A:{detail.db_assists} S:{detail.db_starts}" if p_db_stats else "no DB data"
|
||||
details.append(
|
||||
f"{name} ({pos}, db_rating:{db_rating:.2f}, {db_info}) → impact:{player_impact:.3f}"
|
||||
+ (" ⭐ KEY" if is_key else "")
|
||||
+ (f" [adapted, {matches_missed} missed]" if adapted else "")
|
||||
)
|
||||
|
||||
# GK penalty bonus
|
||||
if has_gk_missing:
|
||||
total_impact += self.goalkeeper_penalty
|
||||
|
||||
key_position_missing = has_gk_missing or any(v >= 2 for v in position_counts.values())
|
||||
|
||||
# Normalize to 0-1 range
|
||||
normalization_cap = 1.5
|
||||
normalized_impact = min(total_impact / normalization_cap, self.max_impact)
|
||||
|
||||
return SidelinedImpact(
|
||||
total_sidelined=len(players),
|
||||
impact_score=round(normalized_impact, 4),
|
||||
key_position_missing=key_position_missing,
|
||||
key_players_missing=key_players_count,
|
||||
position_breakdown=position_counts,
|
||||
player_details=player_details,
|
||||
details=details
|
||||
)
|
||||
|
||||
def analyze_match(self, sidelined_json: Optional[Dict[str, Any]]) -> Tuple[SidelinedImpact, SidelinedImpact]:
|
||||
"""
|
||||
Analyze sidelined data for both teams.
|
||||
|
||||
Returns:
|
||||
(home_impact, away_impact)
|
||||
"""
|
||||
if not sidelined_json or not isinstance(sidelined_json, dict):
|
||||
return SidelinedImpact(), SidelinedImpact()
|
||||
|
||||
home_impact = self.analyze(sidelined_json.get("homeTeam"))
|
||||
away_impact = self.analyze(sidelined_json.get("awayTeam"))
|
||||
return home_impact, away_impact
|
||||
|
||||
|
||||
# Singleton
|
||||
_analyzer: Optional[SidelinedAnalyzer] = None
|
||||
|
||||
|
||||
def get_sidelined_analyzer() -> SidelinedAnalyzer:
|
||||
global _analyzer
|
||||
if _analyzer is None:
|
||||
_analyzer = SidelinedAnalyzer()
|
||||
return _analyzer
|
||||
@@ -0,0 +1,357 @@
|
||||
"""
|
||||
Smart Bet Recommender
|
||||
=====================
|
||||
|
||||
Skor tahminine göre akıllı bahis önerileri yapan sistem.
|
||||
|
||||
Örnek: Beşiktaş-Galatasaray için model 3-1 tahmin ediyor
|
||||
→ DÜŞÜK RİSK: 1.5 Üst (yüksek ihtimal tutar)
|
||||
→ ORTA RİSK: MS 1 + 2.5 Üst (orta ihtimal)
|
||||
→ YÜKSEK RİSK: 3.5 Üst veya skor 3-1 (düşük ihtimal, yüksek kazanç)
|
||||
|
||||
Ayrıca kombinasyonlar:
|
||||
- MS 1 + 1.5 Üst
|
||||
- MS 1 + KG Var
|
||||
- Her iki takım skor > 0.5 (her takım en az 1 gol atar)
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class RiskLevel(Enum):
|
||||
LOW = "LOW" # Yüksek olasılık, düşük oran (güvenli)
|
||||
MEDIUM = "MEDIUM" # Orta olasılık, orta oran
|
||||
HIGH = "HIGH" # Düşük olasılık, yüksek kazanç
|
||||
EXTREME = "EXTREME" # Çok düşük olasılık, çok yüksek kazanç
|
||||
|
||||
|
||||
@dataclass
|
||||
class BetRecommendation:
|
||||
"""Tek bir bahis önerisi"""
|
||||
market: str # Piyasa adı (örn: "MS 1", "2.5 Üst")
|
||||
pick: str # Seçim (örn: "1", "OVER", "YES")
|
||||
odds: float # Oran
|
||||
probability: float # Model olasılığı (0-1)
|
||||
confidence: float # Güven seviyesi (0-100)
|
||||
risk_level: RiskLevel
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"market": self.market,
|
||||
"pick": self.pick,
|
||||
"odds": self.odds,
|
||||
"probability": round(self.probability * 100, 1),
|
||||
"confidence": round(self.confidence, 1),
|
||||
"risk_level": self.risk_level.value
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchPredictionSet:
|
||||
"""Bir maç için tüm tahmin seti"""
|
||||
match_name: str
|
||||
predicted_score: Tuple[int, int] # (home, away)
|
||||
home_win_prob: float
|
||||
draw_prob: float
|
||||
away_win_prob: float
|
||||
over_15_prob: float
|
||||
over_25_prob: float
|
||||
over_35_prob: float
|
||||
btts_yes_prob: float
|
||||
|
||||
# Öneriler
|
||||
low_risk_bets: List[BetRecommendation]
|
||||
medium_risk_bets: List[BetRecommendation]
|
||||
high_risk_bets: List[BetRecommendation]
|
||||
extreme_risk_bets: List[BetRecommendation]
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"match_name": self.match_name,
|
||||
"predicted_score": f"{self.predicted_score[0]}-{self.predicted_score[1]}",
|
||||
"probs": {
|
||||
"home_win": round(self.home_win_prob * 100, 1),
|
||||
"draw": round(self.draw_prob * 100, 1),
|
||||
"away_win": round(self.away_win_prob * 100, 1),
|
||||
"over_15": round(self.over_15_prob * 100, 1),
|
||||
"over_25": round(self.over_25_prob * 100, 1),
|
||||
"over_35": round(self.over_35_prob * 100, 1),
|
||||
"btts": round(self.btts_yes_prob * 100, 1)
|
||||
},
|
||||
"low_risk": [b.to_dict() for b in self.low_risk_bets],
|
||||
"medium_risk": [b.to_dict() for b in self.medium_risk_bets],
|
||||
"high_risk": [b.to_dict() for b in self.high_risk_bets],
|
||||
"extreme_risk": [b.to_dict() for b in self.extreme_risk_bets]
|
||||
}
|
||||
|
||||
|
||||
class SmartBetRecommender:
|
||||
"""
|
||||
Akıllı Bahis Öneri Sistemi
|
||||
|
||||
Skor tahminine göre farklı risk seviyelerinde bahisler önerir.
|
||||
|
||||
Mantık:
|
||||
1. DÜŞÜK RİSK: Yüksek olasılıklı (>70%), düşük oranlı bahisler
|
||||
- 1.5 Üst
|
||||
- Double Chance
|
||||
- Favori takım gol atar
|
||||
|
||||
2. ORTA RİSK: Orta olasılıklı (50-70%), orta oranlı bahisler
|
||||
- MS favori
|
||||
- 2.5 Üst
|
||||
- KG Var/Var
|
||||
|
||||
3. YÜKSEK RİSK: Düşük olasılıklı (30-50%), yüksek oranlı bahisler
|
||||
- 3.5 Üst
|
||||
- Skor tahmini
|
||||
- Handikap
|
||||
|
||||
4. EXTREME RİSK: Çok düşük olasılıklı (<30%), çok yüksek oranlı
|
||||
- Tam skor
|
||||
- Uzunluklu kombinasyonlar
|
||||
"""
|
||||
|
||||
# Olasılık eşikleri
|
||||
PROB_LOW_RISK = 0.70 # > %70 olasılık
|
||||
PROB_MEDIUM_RISK = 0.50 # %50-70 olasılık
|
||||
PROB_HIGH_RISK = 0.30 # %30-50 olasılık
|
||||
# < %30 = EXTREME
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _determine_risk(self, probability: float) -> RiskLevel:
|
||||
"""Olasılığa göre risk seviyesi belirle"""
|
||||
if probability >= self.PROB_LOW_RISK:
|
||||
return RiskLevel.LOW
|
||||
elif probability >= self.PROB_MEDIUM_RISK:
|
||||
return RiskLevel.MEDIUM
|
||||
elif probability >= self.PROB_HIGH_RISK:
|
||||
return RiskLevel.HIGH
|
||||
else:
|
||||
return RiskLevel.EXTREME
|
||||
|
||||
def _get_favorite(self, home_prob: float, draw_prob: float, away_prob: float) -> Tuple[str, float]:
|
||||
"""Favori sonucu ve olasılığını döndür"""
|
||||
if home_prob >= draw_prob and home_prob >= away_prob:
|
||||
return "1", home_prob
|
||||
elif away_prob >= home_prob and away_prob >= draw_prob:
|
||||
return "2", away_prob
|
||||
else:
|
||||
return "X", draw_prob
|
||||
|
||||
def _calculate_expected_goals(self, predicted_score: Tuple[int, int]) -> float:
|
||||
"""Tahmin edilen skora göre beklenen gol sayısı"""
|
||||
return predicted_score[0] + predicted_score[1]
|
||||
|
||||
def recommend(
|
||||
self,
|
||||
match_name: str,
|
||||
predicted_score: Tuple[int, int],
|
||||
probs: Dict[str, float],
|
||||
odds: Dict[str, float]
|
||||
) -> MatchPredictionSet:
|
||||
"""
|
||||
Maç için tüm bahis önerilerini oluştur.
|
||||
|
||||
Args:
|
||||
match_name: Maç adı
|
||||
predicted_score: (home_goals, away_goals)
|
||||
probs: {"home_win": 0.55, "draw": 0.25, "away_win": 0.20,
|
||||
"over_15": 0.85, "over_25": 0.65, "over_35": 0.35,
|
||||
"btts_yes": 0.55}
|
||||
odds: {"1": 1.80, "X": 3.50, "2": 4.20,
|
||||
"ou15_o": 1.25, "ou15_u": 3.80,
|
||||
"ou25_o": 1.90, "ou25_u": 1.85,
|
||||
"ou35_o": 3.20, "ou35_u": 1.30,
|
||||
"btts_y": 1.75, "btts_n": 2.00}
|
||||
|
||||
Returns:
|
||||
MatchPredictionSet with all recommendations
|
||||
"""
|
||||
home_prob = probs.get("home_win", 0.33)
|
||||
draw_prob = probs.get("draw", 0.33)
|
||||
away_prob = probs.get("away_win", 0.33)
|
||||
over_15_prob = probs.get("over_15", 0.70)
|
||||
over_25_prob = probs.get("over_25", 0.50)
|
||||
over_35_prob = probs.get("over_35", 0.30)
|
||||
btts_prob = probs.get("btts_yes", 0.50)
|
||||
|
||||
# Beklenen goller
|
||||
expected_goals = self._calculate_expected_goals(predicted_score)
|
||||
|
||||
# Favori
|
||||
favorite, favorite_prob = self._get_favorite(home_prob, draw_prob, away_prob)
|
||||
|
||||
# Önerileri oluştur
|
||||
low_risk = []
|
||||
medium_risk = []
|
||||
high_risk = []
|
||||
extreme_risk = []
|
||||
|
||||
# ========== DÜŞÜK RİSK ÖNERİLERİ ==========
|
||||
# 1.5 Üst (en güvenli)
|
||||
if over_15_prob >= self.PROB_LOW_RISK:
|
||||
low_risk.append(BetRecommendation(
|
||||
market="1.5 Üst/Alt",
|
||||
pick="OVER",
|
||||
odds=odds.get("ou15_o", 1.25),
|
||||
probability=over_15_prob,
|
||||
confidence=over_15_prob * 100,
|
||||
risk_level=RiskLevel.LOW
|
||||
))
|
||||
|
||||
# Double Chance
|
||||
if home_prob > away_prob:
|
||||
dc_prob = home_prob + draw_prob
|
||||
if dc_prob >= self.PROB_LOW_RISK:
|
||||
low_risk.append(BetRecommendation(
|
||||
market="Double Chance",
|
||||
pick="1X",
|
||||
odds=odds.get("dc_1x", 1.30),
|
||||
probability=dc_prob,
|
||||
confidence=dc_prob * 100,
|
||||
risk_level=RiskLevel.LOW
|
||||
))
|
||||
elif away_prob > home_prob:
|
||||
dc_prob = away_prob + draw_prob
|
||||
if dc_prob >= self.PROB_LOW_RISK:
|
||||
low_risk.append(BetRecommendation(
|
||||
market="Double Chance",
|
||||
pick="X2",
|
||||
odds=odds.get("dc_x2", 1.30),
|
||||
probability=dc_prob,
|
||||
confidence=dc_prob * 100,
|
||||
risk_level=RiskLevel.LOW
|
||||
))
|
||||
|
||||
# ========== ORTA RİSK ÖNERİLERİ ==========
|
||||
# MS Favori
|
||||
if self.PROB_MEDIUM_RISK <= favorite_prob < self.PROB_LOW_RISK:
|
||||
medium_risk.append(BetRecommendation(
|
||||
market="Maç Sonucu",
|
||||
pick=favorite,
|
||||
odds=odds.get(favorite, 2.00),
|
||||
probability=favorite_prob,
|
||||
confidence=favorite_prob * 100,
|
||||
risk_level=RiskLevel.MEDIUM
|
||||
))
|
||||
|
||||
# 2.5 Üst
|
||||
if self.PROB_MEDIUM_RISK <= over_25_prob < self.PROB_LOW_RISK:
|
||||
medium_risk.append(BetRecommendation(
|
||||
market="2.5 Üst/Alt",
|
||||
pick="OVER",
|
||||
odds=odds.get("ou25_o", 1.90),
|
||||
probability=over_25_prob,
|
||||
confidence=over_25_prob * 100,
|
||||
risk_level=RiskLevel.MEDIUM
|
||||
))
|
||||
|
||||
# KG Var
|
||||
if self.PROB_MEDIUM_RISK <= btts_prob < self.PROB_LOW_RISK:
|
||||
medium_risk.append(BetRecommendation(
|
||||
market="Karşılıklı Gol",
|
||||
pick="YES",
|
||||
odds=odds.get("btts_y", 1.75),
|
||||
probability=btts_prob,
|
||||
confidence=btts_prob * 100,
|
||||
risk_level=RiskLevel.MEDIUM
|
||||
))
|
||||
|
||||
# MS + 2.5 Üst kombinasyonu
|
||||
if favorite_prob >= 0.45 and over_25_prob >= 0.50:
|
||||
combo_prob = favorite_prob * over_25_prob # Basit çarpım
|
||||
combo_odds = odds.get(favorite, 2.00) * odds.get("ou25_o", 1.90)
|
||||
if combo_prob >= 0.30: # En az %30 olasılık
|
||||
medium_risk.append(BetRecommendation(
|
||||
market=f"MS {favorite} + 2.5 Üst",
|
||||
pick=f"{favorite} & OVER",
|
||||
odds=combo_odds,
|
||||
probability=combo_prob,
|
||||
confidence=combo_prob * 100,
|
||||
risk_level=RiskLevel.MEDIUM
|
||||
))
|
||||
|
||||
# ========== YÜKSEK RİSK ÖNERİLERİ ==========
|
||||
# 3.5 Üst
|
||||
if self.PROB_HIGH_RISK <= over_35_prob < self.PROB_MEDIUM_RISK:
|
||||
high_risk.append(BetRecommendation(
|
||||
market="3.5 Üst/Alt",
|
||||
pick="OVER",
|
||||
odds=odds.get("ou35_o", 3.20),
|
||||
probability=over_35_prob,
|
||||
confidence=over_35_prob * 100,
|
||||
risk_level=RiskLevel.HIGH
|
||||
))
|
||||
|
||||
# Skor tahmini (yüksek skorlu maçlar için)
|
||||
if expected_goals >= 3.5:
|
||||
score_str = f"{predicted_score[0]}-{predicted_score[1]}"
|
||||
# Skor olasılığı tahmini (basit model)
|
||||
score_prob = 0.15 if expected_goals <= 4 else 0.10
|
||||
high_risk.append(BetRecommendation(
|
||||
market="Tam Skor",
|
||||
pick=score_str,
|
||||
odds=8.0, # Tahmini oran
|
||||
probability=score_prob,
|
||||
confidence=score_prob * 100,
|
||||
risk_level=RiskLevel.HIGH
|
||||
))
|
||||
|
||||
# MS + 3.5 Üst
|
||||
if favorite_prob >= 0.40 and over_35_prob >= 0.30:
|
||||
combo_prob = favorite_prob * over_35_prob
|
||||
combo_odds = odds.get(favorite, 2.00) * odds.get("ou35_o", 3.20)
|
||||
high_risk.append(BetRecommendation(
|
||||
market=f"MS {favorite} + 3.5 Üst",
|
||||
pick=f"{favorite} & OVER",
|
||||
odds=combo_odds,
|
||||
probability=combo_prob,
|
||||
confidence=combo_prob * 100,
|
||||
risk_level=RiskLevel.HIGH
|
||||
))
|
||||
|
||||
# ========== EXTREME RİSK ÖNERİLERİ ==========
|
||||
# Uzun kombinasyonlar
|
||||
if favorite_prob >= 0.50 and btts_prob >= 0.50 and over_25_prob >= 0.60:
|
||||
combo_prob = favorite_prob * btts_prob * over_25_prob
|
||||
combo_odds = odds.get(favorite, 2.00) * odds.get("btts_y", 1.75) * odds.get("ou25_o", 1.90)
|
||||
if combo_prob >= 0.15: # En az %15 olasılık
|
||||
extreme_risk.append(BetRecommendation(
|
||||
market=f"MS {favorite} + KG Var + 2.5 Üst",
|
||||
pick=f"{favorite} & BTTS & OVER",
|
||||
odds=combo_odds,
|
||||
probability=combo_prob,
|
||||
confidence=combo_prob * 100,
|
||||
risk_level=RiskLevel.EXTREME
|
||||
))
|
||||
|
||||
return MatchPredictionSet(
|
||||
match_name=match_name,
|
||||
predicted_score=predicted_score,
|
||||
home_win_prob=home_prob,
|
||||
draw_prob=draw_prob,
|
||||
away_win_prob=away_prob,
|
||||
over_15_prob=over_15_prob,
|
||||
over_25_prob=over_25_prob,
|
||||
over_35_prob=over_35_prob,
|
||||
btts_yes_prob=btts_prob,
|
||||
low_risk_bets=low_risk,
|
||||
medium_risk_bets=medium_risk,
|
||||
high_risk_bets=high_risk,
|
||||
extreme_risk_bets=extreme_risk
|
||||
)
|
||||
|
||||
|
||||
# Singleton
|
||||
_recommender = None
|
||||
|
||||
def get_smart_bet_recommender() -> SmartBetRecommender:
|
||||
global _recommender
|
||||
if _recommender is None:
|
||||
_recommender = SmartBetRecommender()
|
||||
return _recommender
|
||||
Executable
+582
@@ -0,0 +1,582 @@
|
||||
"""
|
||||
Squad Analysis Engine - V9 Feature
|
||||
Kadro ve oyuncu bazlı analiz.
|
||||
|
||||
Analiz Edilen Metrikler:
|
||||
- İlk 11 kalitesi (golcü formu, key player)
|
||||
- Yedek gücü
|
||||
- Eksik oyuncu etkisi
|
||||
- Pozisyon bazlı güç
|
||||
- Takım içi golcü dağılımı
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, Optional, List, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
except ImportError:
|
||||
psycopg2 = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlayerForm:
|
||||
"""Oyuncu form bilgisi"""
|
||||
player_id: str
|
||||
player_name: str
|
||||
goals_last_5: int = 0
|
||||
assists_last_5: int = 0
|
||||
minutes_last_5: int = 0
|
||||
cards_last_5: int = 0
|
||||
is_key_player: bool = False # Golcü veya sık oynayan
|
||||
|
||||
|
||||
@dataclass
|
||||
class SquadAnalysis:
|
||||
"""Takım kadro analizi"""
|
||||
team_id: str
|
||||
team_name: str = ""
|
||||
|
||||
# İlk 11 bilgisi
|
||||
starting_count: int = 0
|
||||
sub_count: int = 0
|
||||
total_squad: int = 0
|
||||
|
||||
# Pozisyon dağılımı
|
||||
goalkeeper_count: int = 0
|
||||
defender_count: int = 0
|
||||
midfielder_count: int = 0
|
||||
forward_count: int = 0
|
||||
|
||||
# Form metrikleri
|
||||
total_goals_last_5: int = 0 # Kadrodaki oyuncuların son 5 maçtaki golleri
|
||||
total_assists_last_5: int = 0
|
||||
key_players_count: int = 0 # Golcü sayısı
|
||||
key_player_missing: int = 0 # Eksik golcü
|
||||
|
||||
# Kalite metrikleri
|
||||
avg_minutes_per_player: float = 0.0 # Ortalama oynama süresi
|
||||
squad_experience: float = 0.0 # 0-1, takımla oynama deneyimi
|
||||
rotation_rate: float = 0.0 # Kadro rotasyonu oranı
|
||||
|
||||
|
||||
@dataclass
|
||||
class SquadFeatures:
|
||||
"""Model için kadro feature'ları"""
|
||||
# Home team features
|
||||
home_starting_11: int = 11
|
||||
home_sub_count: int = 7
|
||||
home_total_squad: int = 18
|
||||
home_goalkeepers: int = 1
|
||||
home_defenders: int = 4
|
||||
home_midfielders: int = 4
|
||||
home_forwards: int = 2
|
||||
home_goals_last_5: int = 0
|
||||
home_assists_last_5: int = 0
|
||||
home_key_players: int = 0
|
||||
home_squad_experience: float = 0.5
|
||||
|
||||
# Away team features
|
||||
away_starting_11: int = 11
|
||||
away_sub_count: int = 7
|
||||
away_total_squad: int = 18
|
||||
away_goalkeepers: int = 1
|
||||
away_defenders: int = 4
|
||||
away_midfielders: int = 4
|
||||
away_forwards: int = 2
|
||||
away_goals_last_5: int = 0
|
||||
away_assists_last_5: int = 0
|
||||
away_key_players: int = 0
|
||||
away_squad_experience: float = 0.5
|
||||
|
||||
# Comparison features
|
||||
squad_strength_diff: float = 0.0 # + = home stronger
|
||||
goals_form_diff: float = 0.0
|
||||
key_players_diff: int = 0
|
||||
|
||||
def to_dict(self) -> Dict[str, float]:
|
||||
return {
|
||||
# Home
|
||||
'home_starting_11': float(self.home_starting_11),
|
||||
'home_sub_count': float(self.home_sub_count),
|
||||
'home_total_squad': float(self.home_total_squad),
|
||||
'home_goalkeepers': float(self.home_goalkeepers),
|
||||
'home_defenders': float(self.home_defenders),
|
||||
'home_midfielders': float(self.home_midfielders),
|
||||
'home_forwards': float(self.home_forwards),
|
||||
'home_goals_last_5': float(self.home_goals_last_5),
|
||||
'home_assists_last_5': float(self.home_assists_last_5),
|
||||
'home_key_players': float(self.home_key_players),
|
||||
'home_squad_experience': self.home_squad_experience,
|
||||
# Away
|
||||
'away_starting_11': float(self.away_starting_11),
|
||||
'away_sub_count': float(self.away_sub_count),
|
||||
'away_total_squad': float(self.away_total_squad),
|
||||
'away_goalkeepers': float(self.away_goalkeepers),
|
||||
'away_defenders': float(self.away_defenders),
|
||||
'away_midfielders': float(self.away_midfielders),
|
||||
'away_forwards': float(self.away_forwards),
|
||||
'away_goals_last_5': float(self.away_goals_last_5),
|
||||
'away_assists_last_5': float(self.away_assists_last_5),
|
||||
'away_key_players': float(self.away_key_players),
|
||||
'away_squad_experience': self.away_squad_experience,
|
||||
# Diffs
|
||||
'squad_strength_diff': self.squad_strength_diff,
|
||||
'goals_form_diff': self.goals_form_diff,
|
||||
'key_players_diff': float(self.key_players_diff),
|
||||
}
|
||||
|
||||
|
||||
class SquadAnalysisEngine:
|
||||
"""
|
||||
Kadro ve oyuncu analiz motoru.
|
||||
|
||||
Beşiktaş-Galatasaray maçı için:
|
||||
- İlk 11'deki oyuncuların son 5 maçtaki gol/asist
|
||||
- Key player tespiti (çok gol atan oyuncular)
|
||||
- Pozisyon dağılımı (4-3-3, 4-4-2 vb.)
|
||||
- Yedek kalitesi
|
||||
hesaplar.
|
||||
"""
|
||||
|
||||
# Pozisyon mapping
|
||||
POSITION_MAP = {
|
||||
'goalkeeper': 'GK',
|
||||
'gk': 'GK',
|
||||
'kaleci': 'GK',
|
||||
'defender': 'DEF',
|
||||
'def': 'DEF',
|
||||
'defans': 'DEF',
|
||||
'savunma': 'DEF',
|
||||
'midfielder': 'MID',
|
||||
'mid': 'MID',
|
||||
'orta saha': 'MID',
|
||||
'forward': 'FWD',
|
||||
'fwd': 'FWD',
|
||||
'forvet': 'FWD',
|
||||
'striker': 'FWD',
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.conn = None
|
||||
self._player_form_cache: Dict[str, PlayerForm] = {}
|
||||
|
||||
def _connect_db(self):
|
||||
if psycopg2 is None:
|
||||
return None
|
||||
try:
|
||||
from data.db import get_clean_dsn
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
return self.conn
|
||||
except Exception as e:
|
||||
print(f"[SquadEngine] DB connection failed: {e}")
|
||||
return None
|
||||
|
||||
def get_conn(self):
|
||||
if self.conn is None or self.conn.closed:
|
||||
self._connect_db()
|
||||
return self.conn
|
||||
|
||||
def _normalize_position(self, position: Optional[str]) -> str:
|
||||
"""Pozisyonu normalize et"""
|
||||
if not position:
|
||||
return 'UNK'
|
||||
|
||||
pos_lower = position.lower().strip()
|
||||
for key, val in self.POSITION_MAP.items():
|
||||
if key in pos_lower:
|
||||
return val
|
||||
return 'UNK'
|
||||
|
||||
def get_player_form(self, player_id: str, before_date_ms: int = None) -> PlayerForm:
|
||||
"""Oyuncunun son 5 maçtaki formunu hesapla"""
|
||||
|
||||
if player_id in self._player_form_cache:
|
||||
return self._player_form_cache[player_id]
|
||||
|
||||
form = PlayerForm(player_id=player_id, player_name="")
|
||||
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
return form
|
||||
|
||||
try:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
# Oyuncu adını al
|
||||
cur.execute("SELECT name FROM players WHERE id = %s", (player_id,))
|
||||
player_row = cur.fetchone()
|
||||
if player_row:
|
||||
form.player_name = player_row['name']
|
||||
|
||||
# Son 5 maçtaki gol ve asist
|
||||
cur.execute("""
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE event_type = 'goal' AND event_subtype NOT ILIKE '%%penaltı kaçırma%%') as goals,
|
||||
COUNT(*) FILTER (WHERE event_type = 'goal' AND assist_player_id IS NOT NULL) as assists_given
|
||||
FROM match_player_events
|
||||
WHERE player_id = %s
|
||||
AND match_id IN (
|
||||
SELECT match_id FROM match_player_participation
|
||||
WHERE player_id = %s
|
||||
ORDER BY match_id DESC LIMIT 5
|
||||
)
|
||||
""", (player_id, player_id))
|
||||
|
||||
stats = cur.fetchone()
|
||||
if stats:
|
||||
form.goals_last_5 = stats['goals'] or 0
|
||||
|
||||
# Asist hesapla (assist_player_id olarak geçen)
|
||||
cur.execute("""
|
||||
SELECT COUNT(*) as assists
|
||||
FROM match_player_events
|
||||
WHERE assist_player_id = %s
|
||||
AND match_id IN (
|
||||
SELECT match_id FROM match_player_participation
|
||||
WHERE player_id = %s
|
||||
ORDER BY match_id DESC LIMIT 5
|
||||
)
|
||||
""", (player_id, player_id))
|
||||
|
||||
assist_row = cur.fetchone()
|
||||
if assist_row:
|
||||
form.assists_last_5 = assist_row['assists'] or 0
|
||||
|
||||
# Kart sayısı
|
||||
cur.execute("""
|
||||
SELECT COUNT(*) as cards
|
||||
FROM match_player_events
|
||||
WHERE player_id = %s AND event_type = 'card'
|
||||
AND match_id IN (
|
||||
SELECT match_id FROM match_player_participation
|
||||
WHERE player_id = %s
|
||||
ORDER BY match_id DESC LIMIT 5
|
||||
)
|
||||
""", (player_id, player_id))
|
||||
|
||||
card_row = cur.fetchone()
|
||||
if card_row:
|
||||
form.cards_last_5 = card_row['cards'] or 0
|
||||
|
||||
# Key player mi? (Son 10 maçta 3+ gol)
|
||||
cur.execute("""
|
||||
SELECT COUNT(*) as total_goals
|
||||
FROM match_player_events
|
||||
WHERE player_id = %s
|
||||
AND event_type = 'goal'
|
||||
AND event_subtype NOT ILIKE '%%penaltı kaçırma%%'
|
||||
""", (player_id,))
|
||||
|
||||
total_row = cur.fetchone()
|
||||
form.is_key_player = (total_row['total_goals'] or 0) >= 3
|
||||
|
||||
self._player_form_cache[player_id] = form
|
||||
return form
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
print(f"[SquadEngine] Error getting player form: {e}")
|
||||
return form
|
||||
|
||||
def analyze_squad(self, match_id: str, team_id: str) -> SquadAnalysis:
|
||||
"""Takımın maç kadrosunu analiz et"""
|
||||
|
||||
analysis = SquadAnalysis(team_id=team_id)
|
||||
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
return analysis
|
||||
|
||||
try:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
# Takım adını al
|
||||
cur.execute("SELECT name FROM teams WHERE id = %s", (team_id,))
|
||||
team_row = cur.fetchone()
|
||||
if team_row:
|
||||
analysis.team_name = team_row['name']
|
||||
|
||||
# Maç kadrosunu al
|
||||
cur.execute("""
|
||||
SELECT player_id, position, is_starting
|
||||
FROM match_player_participation
|
||||
WHERE match_id = %s AND team_id = %s
|
||||
""", (match_id, team_id))
|
||||
|
||||
players = cur.fetchall()
|
||||
|
||||
for p in players:
|
||||
if p['is_starting']:
|
||||
analysis.starting_count += 1
|
||||
else:
|
||||
analysis.sub_count += 1
|
||||
|
||||
pos = self._normalize_position(p['position'])
|
||||
if pos == 'GK':
|
||||
analysis.goalkeeper_count += 1
|
||||
elif pos == 'DEF':
|
||||
analysis.defender_count += 1
|
||||
elif pos == 'MID':
|
||||
analysis.midfielder_count += 1
|
||||
elif pos == 'FWD':
|
||||
analysis.forward_count += 1
|
||||
|
||||
# İlk 11'in formunu topluca hesapla
|
||||
if p['is_starting']:
|
||||
form = self.get_player_form(p['player_id'])
|
||||
analysis.total_goals_last_5 += form.goals_last_5
|
||||
analysis.total_assists_last_5 += form.assists_last_5
|
||||
if form.is_key_player:
|
||||
analysis.key_players_count += 1
|
||||
|
||||
analysis.total_squad = analysis.starting_count + analysis.sub_count
|
||||
|
||||
# Takım deneyimi (bu takımla kaç maç oynamışlar)
|
||||
if analysis.starting_count > 0:
|
||||
cur.execute("""
|
||||
SELECT AVG(match_count) as avg_exp
|
||||
FROM (
|
||||
SELECT player_id, COUNT(*) as match_count
|
||||
FROM match_player_participation
|
||||
WHERE team_id = %s AND is_starting = true
|
||||
GROUP BY player_id
|
||||
) sub
|
||||
""", (team_id,))
|
||||
|
||||
exp_row = cur.fetchone()
|
||||
if exp_row and exp_row['avg_exp']:
|
||||
# Normalize: 50+ maç = 1.0
|
||||
analysis.squad_experience = min(exp_row['avg_exp'] / 50, 1.0)
|
||||
|
||||
return analysis
|
||||
|
||||
except Exception as e:
|
||||
print(f"[SquadEngine] Error analyzing squad: {e}")
|
||||
return analysis
|
||||
|
||||
def analyze_squad_from_list(self, player_ids: List[str], team_id: str) -> SquadAnalysis:
|
||||
"""
|
||||
Memory'deki oyuncu listesinden kadro analizi yap.
|
||||
DB'de olmayan canlı maçlar için kullanılır.
|
||||
"""
|
||||
analysis = SquadAnalysis(team_id=team_id)
|
||||
# Varsayılan: İlk 11 oyuncu (listede genellikle ilk 11 verilir)
|
||||
|
||||
# Eğer liste boşsa
|
||||
if not player_ids:
|
||||
return analysis
|
||||
|
||||
# Varsayımlar: Mackolik API'den gelen liste sıralıdır.
|
||||
# İlk 11 genellikle as kadrodur. Ancak burada sadece 'starting' oyuncuları alıyoruz varsayalım.
|
||||
# User calling uses explicit starting 11 list.
|
||||
|
||||
analysis.starting_count = len(player_ids)
|
||||
analysis.total_squad = len(player_ids) # Subs unknown usually unless separate list
|
||||
|
||||
# Position tahmini zor, default dağıt? Veya oyuncu detayına git?
|
||||
# Hız için: Oyuncu ID'sinden DB'ye bakıp pozisyon öğrenmeye çalışabiliriz.
|
||||
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
return analysis
|
||||
|
||||
try:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
# Calculate stats for these specific players
|
||||
for pid in player_ids:
|
||||
# Get Form
|
||||
form = self.get_player_form(pid)
|
||||
analysis.total_goals_last_5 += form.goals_last_5
|
||||
analysis.total_assists_last_5 += form.assists_last_5
|
||||
if form.is_key_player:
|
||||
analysis.key_players_count += 1
|
||||
|
||||
# Get Position/Exp history attempt
|
||||
cur.execute("""
|
||||
SELECT position, COUNT(*) as match_count
|
||||
FROM match_player_participation
|
||||
WHERE player_id = %s AND team_id = %s
|
||||
GROUP BY position
|
||||
ORDER BY match_count DESC LIMIT 1
|
||||
""", (pid, team_id))
|
||||
row = cur.fetchone()
|
||||
|
||||
if row:
|
||||
pos = self._normalize_position(row.get('position', 'UNK'))
|
||||
if pos == 'GK': analysis.goalkeeper_count += 1
|
||||
elif pos == 'DEF': analysis.defender_count += 1
|
||||
elif pos == 'MID': analysis.midfielder_count += 1
|
||||
elif pos == 'FWD': analysis.forward_count += 1
|
||||
|
||||
# Experience contribution
|
||||
exp = min(row['match_count'] / 50.0, 1.0)
|
||||
analysis.squad_experience += exp
|
||||
|
||||
# Average experience
|
||||
if analysis.starting_count > 0:
|
||||
analysis.squad_experience /= analysis.starting_count
|
||||
|
||||
except Exception as e:
|
||||
print(f"[SquadEngine] Live analyze error: {e}")
|
||||
|
||||
return analysis
|
||||
|
||||
def get_features(
|
||||
self,
|
||||
match_id: str,
|
||||
home_team_id: str,
|
||||
away_team_id: str
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Maç için kadro feature'larını hesapla.
|
||||
|
||||
Args:
|
||||
match_id: Maç ID'si
|
||||
home_team_id: Ev sahibi takım ID
|
||||
away_team_id: Deplasman takım ID
|
||||
|
||||
Returns:
|
||||
Kadro feature'ları dict olarak
|
||||
"""
|
||||
features = SquadFeatures()
|
||||
|
||||
# Ev sahibi analizi
|
||||
home = self.analyze_squad(match_id, home_team_id)
|
||||
features.home_starting_11 = home.starting_count
|
||||
features.home_sub_count = home.sub_count
|
||||
features.home_total_squad = home.total_squad
|
||||
features.home_goalkeepers = home.goalkeeper_count
|
||||
features.home_defenders = home.defender_count
|
||||
features.home_midfielders = home.midfielder_count
|
||||
features.home_forwards = home.forward_count
|
||||
features.home_goals_last_5 = home.total_goals_last_5
|
||||
features.home_assists_last_5 = home.total_assists_last_5
|
||||
features.home_key_players = home.key_players_count
|
||||
features.home_squad_experience = home.squad_experience
|
||||
|
||||
# Deplasman analizi
|
||||
away = self.analyze_squad(match_id, away_team_id)
|
||||
features.away_starting_11 = away.starting_count
|
||||
features.away_sub_count = away.sub_count
|
||||
features.away_total_squad = away.total_squad
|
||||
features.away_goalkeepers = away.goalkeeper_count
|
||||
features.away_defenders = away.defender_count
|
||||
features.away_midfielders = away.midfielder_count
|
||||
features.away_forwards = away.forward_count
|
||||
features.away_goals_last_5 = away.total_goals_last_5
|
||||
features.away_assists_last_5 = away.total_assists_last_5
|
||||
features.away_key_players = away.key_players_count
|
||||
features.away_squad_experience = away.squad_experience
|
||||
|
||||
# Karşılaştırma feature'ları
|
||||
home_strength = (
|
||||
home.total_goals_last_5 * 2 +
|
||||
home.total_assists_last_5 +
|
||||
home.key_players_count * 3 +
|
||||
home.squad_experience * 10
|
||||
)
|
||||
away_strength = (
|
||||
away.total_goals_last_5 * 2 +
|
||||
away.total_assists_last_5 +
|
||||
away.key_players_count * 3 +
|
||||
away.squad_experience * 10
|
||||
)
|
||||
|
||||
features.squad_strength_diff = home_strength - away_strength
|
||||
features.goals_form_diff = home.total_goals_last_5 - away.total_goals_last_5
|
||||
features.key_players_diff = home.key_players_count - away.key_players_count
|
||||
|
||||
return features.to_dict()
|
||||
|
||||
def get_features_without_match(
|
||||
self,
|
||||
home_team_id: str,
|
||||
away_team_id: str
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Maç ID olmadan takım bazlı feature'ları hesapla.
|
||||
Son maçtaki kadroyu referans alır.
|
||||
"""
|
||||
features = SquadFeatures()
|
||||
|
||||
conn = self.get_conn()
|
||||
if conn is None:
|
||||
return features.to_dict()
|
||||
|
||||
try:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
for team_id, prefix in [(home_team_id, 'home'), (away_team_id, 'away')]:
|
||||
# Son maçı bul
|
||||
cur.execute("""
|
||||
SELECT mpp.match_id
|
||||
FROM match_player_participation mpp
|
||||
JOIN matches m ON mpp.match_id = m.id
|
||||
WHERE mpp.team_id = %s
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 1
|
||||
""", (team_id,))
|
||||
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
analysis = self.analyze_squad(row['match_id'], team_id)
|
||||
|
||||
if prefix == 'home':
|
||||
features.home_starting_11 = analysis.starting_count
|
||||
features.home_sub_count = analysis.sub_count
|
||||
features.home_total_squad = analysis.total_squad
|
||||
features.home_goals_last_5 = analysis.total_goals_last_5
|
||||
features.home_assists_last_5 = analysis.total_assists_last_5
|
||||
features.home_key_players = analysis.key_players_count
|
||||
features.home_squad_experience = analysis.squad_experience
|
||||
else:
|
||||
features.away_starting_11 = analysis.starting_count
|
||||
features.away_sub_count = analysis.sub_count
|
||||
features.away_total_squad = analysis.total_squad
|
||||
features.away_goals_last_5 = analysis.total_goals_last_5
|
||||
features.away_assists_last_5 = analysis.total_assists_last_5
|
||||
features.away_key_players = analysis.key_players_count
|
||||
features.away_squad_experience = analysis.squad_experience
|
||||
|
||||
# Karşılaştırma
|
||||
features.goals_form_diff = features.home_goals_last_5 - features.away_goals_last_5
|
||||
features.key_players_diff = features.home_key_players - features.away_key_players
|
||||
|
||||
return features.to_dict()
|
||||
|
||||
except Exception as e:
|
||||
print(f"[SquadEngine] Error: {e}")
|
||||
return features.to_dict()
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_engine: Optional[SquadAnalysisEngine] = None
|
||||
|
||||
|
||||
def get_squad_analysis_engine() -> SquadAnalysisEngine:
|
||||
"""Singleton squad analysis engine instance döndür"""
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = SquadAnalysisEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test
|
||||
engine = get_squad_analysis_engine()
|
||||
|
||||
print("\n🧪 Squad Analysis Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
# Test with known team IDs (Galatasaray, Fenerbahce)
|
||||
features = engine.get_features_without_match(
|
||||
home_team_id="test_gs",
|
||||
away_team_id="test_fb"
|
||||
)
|
||||
|
||||
print("\n📊 Features:")
|
||||
for key, value in features.items():
|
||||
print(f" {key}: {value:.2f}")
|
||||
Executable
+194
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Team Stats Engine
|
||||
Takımların oyun tarzı istatistiklerini analiz eder.
|
||||
football_team_stats tablosundaki kayıtlardan possession, şut, korner verilerini kullanır.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import psycopg2
|
||||
from typing import Dict
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from data.db import get_clean_dsn
|
||||
|
||||
|
||||
class TeamStatsEngine:
|
||||
"""
|
||||
Takım istatistikleri için feature engine.
|
||||
|
||||
Analiz edilen metrikler:
|
||||
- Ortalama top hakimiyeti (possession)
|
||||
- Ortalama isabetli şut
|
||||
- Ortalama korner
|
||||
- Şut/Gol dönüşüm oranı (xG benzeri)
|
||||
- Savunma gücü
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.conn = None
|
||||
|
||||
def get_conn(self):
|
||||
if self.conn is None or self.conn.closed:
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
return self.conn
|
||||
|
||||
def get_features(self, team_id: str, before_date: int,
|
||||
limit: int = 10, max_days: int = 180) -> Dict[str, float]:
|
||||
"""
|
||||
Takımın oyun tarzı feature'larını hesapla.
|
||||
|
||||
Args:
|
||||
team_id: Takım ID
|
||||
before_date: Bu tarihten önceki maçlara bak (ms timestamp)
|
||||
limit: Kaç maç analiz edilecek
|
||||
max_days: Maksimum kaç gün geriye gidilecek
|
||||
|
||||
Returns:
|
||||
Dict: Team stats feature'ları
|
||||
"""
|
||||
if not team_id or len(team_id) < 5:
|
||||
return self._default_features()
|
||||
|
||||
try:
|
||||
conn = self.get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
min_date = before_date - (max_days * 24 * 60 * 60 * 1000)
|
||||
|
||||
# Bu takımın son N maçındaki istatistikleri çek
|
||||
cur.execute("""
|
||||
SELECT
|
||||
mts.possession_percentage,
|
||||
mts.shots_on_target,
|
||||
mts.shots_off_target,
|
||||
mts.total_shots,
|
||||
mts.corners,
|
||||
mts.fouls,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
m.home_team_id
|
||||
FROM football_team_stats mts
|
||||
JOIN matches m ON mts.match_id = m.id
|
||||
WHERE mts.team_id = %s
|
||||
AND m.mst_utc < %s
|
||||
AND m.mst_utc > %s
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.sport = 'football'
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
""", (team_id, before_date, min_date, limit))
|
||||
|
||||
stats = cur.fetchall()
|
||||
|
||||
if not stats:
|
||||
return self._default_features()
|
||||
|
||||
# İstatistikleri hesapla
|
||||
total_matches = len(stats)
|
||||
|
||||
possession_sum = 0
|
||||
shots_on_target_sum = 0
|
||||
shots_total_sum = 0
|
||||
corners_sum = 0
|
||||
fouls_sum = 0
|
||||
goals_scored = 0
|
||||
valid_possession_count = 0
|
||||
|
||||
for stat in stats:
|
||||
poss, sot, soff, total_shots, corners, fouls, sh, sa, home_id = stat
|
||||
|
||||
if poss and poss > 0:
|
||||
possession_sum += poss
|
||||
valid_possession_count += 1
|
||||
|
||||
if sot:
|
||||
shots_on_target_sum += sot
|
||||
if total_shots:
|
||||
shots_total_sum += total_shots
|
||||
if corners:
|
||||
corners_sum += corners
|
||||
if fouls:
|
||||
fouls_sum += fouls
|
||||
|
||||
# Gol hesaplama
|
||||
is_home = (home_id == team_id)
|
||||
goals_scored += sh if is_home else sa
|
||||
|
||||
avg_possession = possession_sum / valid_possession_count if valid_possession_count > 0 else 50.0
|
||||
avg_shots_on_target = shots_on_target_sum / total_matches if total_matches > 0 else 3.0
|
||||
avg_shots_total = shots_total_sum / total_matches if total_matches > 0 else 10.0
|
||||
avg_corners = corners_sum / total_matches if total_matches > 0 else 4.0
|
||||
avg_fouls = fouls_sum / total_matches if total_matches > 0 else 12.0
|
||||
|
||||
# Shot conversion rate (xG benzeri)
|
||||
shot_conversion = goals_scored / shots_total_sum if shots_total_sum > 0 else 0.1
|
||||
|
||||
# Shot accuracy
|
||||
shot_accuracy = shots_on_target_sum / shots_total_sum if shots_total_sum > 0 else 0.35
|
||||
|
||||
return {
|
||||
'avg_possession': avg_possession / 100, # Normalize to 0-1
|
||||
'avg_shots_on_target': avg_shots_on_target,
|
||||
'avg_shots_total': avg_shots_total,
|
||||
'avg_corners': avg_corners,
|
||||
'avg_fouls': avg_fouls,
|
||||
'shot_conversion_rate': shot_conversion,
|
||||
'shot_accuracy': shot_accuracy,
|
||||
'attacking_intensity': (avg_shots_total + avg_corners) / 2
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"[TeamStatsEngine] Error: {e}")
|
||||
return self._default_features()
|
||||
|
||||
def _default_features(self) -> Dict[str, float]:
|
||||
return {
|
||||
'avg_possession': 0.50,
|
||||
'avg_shots_on_target': 3.5,
|
||||
'avg_shots_total': 11.0,
|
||||
'avg_corners': 4.5,
|
||||
'avg_fouls': 12.0,
|
||||
'shot_conversion_rate': 0.10,
|
||||
'shot_accuracy': 0.35,
|
||||
'attacking_intensity': 7.5
|
||||
}
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine = None
|
||||
|
||||
def get_team_stats_engine() -> TeamStatsEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = TeamStatsEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_team_stats_engine()
|
||||
|
||||
print("\n🧪 Team Stats Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
# Test için örnek takım ID'si al
|
||||
conn = engine.get_conn()
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT DISTINCT mts.team_id, t.name
|
||||
FROM match_team_stats mts
|
||||
JOIN teams t ON mts.team_id = t.id
|
||||
LIMIT 1
|
||||
""")
|
||||
result = cur.fetchone()
|
||||
|
||||
if result:
|
||||
team_id, team_name = result
|
||||
print(f"Test Takımı: {team_name}")
|
||||
|
||||
import time
|
||||
features = engine.get_features(team_id, int(time.time() * 1000))
|
||||
|
||||
print(f"\n📊 Feature'lar:")
|
||||
for k, v in features.items():
|
||||
print(f" {k}: {v:.3f}")
|
||||
Executable
+419
@@ -0,0 +1,419 @@
|
||||
"""
|
||||
Upset Engine - Dev Avcısı Tespit Sistemi
|
||||
V9 Model için Galatasaray-Liverpool tarzı sürpriz maçları tespit eder.
|
||||
|
||||
Faktörler:
|
||||
1. Atmosfer (Avrupa gecesi, taraftar baskısı)
|
||||
2. Motivasyon asimetrisi (küme düşme vs şampiyon)
|
||||
3. Yorgunluk (maç yoğunluğu, seyahat)
|
||||
4. Tarihsel upset pattern
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
except ImportError:
|
||||
psycopg2 = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class UpsetFactors:
|
||||
"""Upset potansiyelini etkileyen faktörler"""
|
||||
atmosphere_score: float = 0.0 # Atmosfer etkisi (0-1)
|
||||
motivation_score: float = 0.0 # Motivasyon asimetrisi (0-1)
|
||||
fatigue_score: float = 0.0 # Yorgunluk farkı (0-1)
|
||||
historical_upset_rate: float = 0.0 # Tarihsel upset oranı (0-1)
|
||||
total_upset_potential: float = 0.0 # Toplam upset potansiyeli (0-1)
|
||||
reasoning: list = field(default_factory=list)
|
||||
|
||||
|
||||
class UpsetEngine:
|
||||
"""
|
||||
Favori takımın kaybedeceği maçları tespit eder.
|
||||
Galatasaray-Liverpool tarzı sürprizleri yakalar.
|
||||
"""
|
||||
|
||||
# Yüksek atmosferli stadyumlar (manuel tanımlı + hesaplanabilir)
|
||||
HIGH_ATMOSPHERE_TEAMS = {
|
||||
# Türkiye
|
||||
"galatasaray", "fenerbahce", "besiktas", "trabzonspor",
|
||||
# İngiltere
|
||||
"liverpool", "newcastle", "leeds",
|
||||
# Almanya
|
||||
"dortmund", "union berlin",
|
||||
# Yunanistan
|
||||
"olympiacos", "panathinaikos", "aek athens",
|
||||
# Arjantin
|
||||
"boca juniors", "river plate",
|
||||
# Diğer
|
||||
"celtic", "rangers", "red star belgrade"
|
||||
}
|
||||
|
||||
# Avrupa kupaları (yüksek motivasyon)
|
||||
EUROPEAN_COMPETITIONS = {
|
||||
"şampiyonlar ligi", "champions league", "uefa champions league",
|
||||
"avrupa ligi", "europa league", "uefa europa league",
|
||||
"konferans ligi", "conference league", "uefa conference league"
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.conn = None
|
||||
self._connect_db()
|
||||
|
||||
def _connect_db(self):
|
||||
"""Veritabanına bağlan"""
|
||||
if psycopg2 is None:
|
||||
return
|
||||
|
||||
try:
|
||||
from data.db import get_clean_dsn
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
except Exception as e:
|
||||
print(f"[UpsetEngine] DB connection failed: {e}")
|
||||
self.conn = None
|
||||
|
||||
def _get_conn(self):
|
||||
"""Bağlantıyı kontrol et ve döndür"""
|
||||
if self.conn is None or self.conn.closed:
|
||||
self._connect_db()
|
||||
return self.conn
|
||||
|
||||
def calculate_atmosphere_score(
|
||||
self,
|
||||
home_team_name: str,
|
||||
league_name: str,
|
||||
is_cup_match: bool = False
|
||||
) -> Tuple[float, list]:
|
||||
"""
|
||||
Atmosfer skorunu hesapla.
|
||||
Yüksek atmosferli stadyumlar upset potansiyelini artırır.
|
||||
"""
|
||||
score = 0.0
|
||||
reasons = []
|
||||
|
||||
# Yüksek atmosferli takım mı?
|
||||
home_lower = home_team_name.lower()
|
||||
for team in self.HIGH_ATMOSPHERE_TEAMS:
|
||||
if team in home_lower:
|
||||
score += 0.25
|
||||
reasons.append(f"🔥 {home_team_name} yüksek atmosferli stadyum")
|
||||
break
|
||||
|
||||
# Avrupa kupası mı?
|
||||
league_lower = league_name.lower()
|
||||
for comp in self.EUROPEAN_COMPETITIONS:
|
||||
if comp in league_lower:
|
||||
score += 0.20
|
||||
reasons.append("🌟 Avrupa gecesi - ekstra motivasyon")
|
||||
break
|
||||
|
||||
# Kupa maçı mı? (tek maç eliminasyon)
|
||||
if is_cup_match:
|
||||
score += 0.10
|
||||
reasons.append("🏆 Kupa maçı - her şey olabilir")
|
||||
|
||||
return min(score, 1.0), reasons
|
||||
|
||||
def calculate_motivation_score(
|
||||
self,
|
||||
home_position: int,
|
||||
away_position: int,
|
||||
home_points_to_safety: Optional[int] = None,
|
||||
away_already_champion: bool = False,
|
||||
total_teams: int = 20
|
||||
) -> Tuple[float, list]:
|
||||
"""
|
||||
Motivasyon asimetrisini hesapla.
|
||||
Alt sıradaki takımın üst sıradakine karşı ekstra motivasyonu.
|
||||
"""
|
||||
score = 0.0
|
||||
reasons = []
|
||||
|
||||
# Pozisyon farkı
|
||||
position_diff = 0
|
||||
if away_position is not None and home_position is not None:
|
||||
position_diff = away_position - home_position # Negatif = deplasman daha iyi sırada
|
||||
|
||||
# Küme düşme hattı vs üst sıra (en güçlü upset faktörü)
|
||||
relegation_zone = total_teams - 3 # Son 3 takım
|
||||
if home_position is not None and away_position is not None:
|
||||
if home_position >= relegation_zone and away_position <= 3:
|
||||
score += 0.30
|
||||
reasons.append("⚔️ Hayatta kalma savaşı vs şampiyonluk adayı")
|
||||
elif home_position >= relegation_zone:
|
||||
score += 0.15
|
||||
reasons.append("🔥 Ev sahibi küme düşme hattında - ekstra motivasyon")
|
||||
elif home_position is not None and home_position >= relegation_zone:
|
||||
score += 0.15
|
||||
reasons.append("🔥 Ev sahibi küme düşme hattında - ekstra motivasyon")
|
||||
|
||||
# Deplasman takımı zaten şampiyon mu?
|
||||
if away_already_champion:
|
||||
score += 0.20
|
||||
reasons.append("😴 Deplasman takımı zaten şampiyon - motivasyon düşük")
|
||||
|
||||
# Büyük pozisyon farkı (underdog evinde)
|
||||
if position_diff < -10:
|
||||
score += 0.15
|
||||
reasons.append(f"📊 {abs(position_diff)} sıra fark - büyük maç heyecanı")
|
||||
elif position_diff < -5:
|
||||
score += 0.08
|
||||
|
||||
return min(score, 1.0), reasons
|
||||
|
||||
def calculate_fatigue_score(
|
||||
self,
|
||||
home_matches_last_14d: int = 0,
|
||||
away_matches_last_14d: int = 0,
|
||||
home_days_rest: int = 7,
|
||||
away_days_rest: int = 7,
|
||||
away_travel_km: float = 0
|
||||
) -> Tuple[float, list]:
|
||||
"""
|
||||
Yorgunluk farkını hesapla.
|
||||
Yorgun deplasman takımı = yüksek upset potansiyeli.
|
||||
"""
|
||||
score = 0.0
|
||||
reasons = []
|
||||
|
||||
# Maç yoğunluğu farkı
|
||||
match_diff = away_matches_last_14d - home_matches_last_14d
|
||||
if match_diff >= 3:
|
||||
score += 0.20
|
||||
reasons.append(f"🏃 Deplasman {match_diff} maç daha fazla oynamış")
|
||||
elif match_diff >= 2:
|
||||
score += 0.10
|
||||
|
||||
# Dinlenme süresi farkı
|
||||
rest_diff = home_days_rest - away_days_rest
|
||||
if rest_diff >= 4:
|
||||
score += 0.15
|
||||
reasons.append(f"💤 Ev sahibi {rest_diff} gün daha fazla dinlenmiş")
|
||||
elif rest_diff >= 2:
|
||||
score += 0.08
|
||||
|
||||
# Uzun deplasman
|
||||
if away_travel_km > 3000:
|
||||
score += 0.15
|
||||
reasons.append(f"✈️ Uzun deplasman ({int(away_travel_km)} km)")
|
||||
elif away_travel_km > 1500:
|
||||
score += 0.08
|
||||
|
||||
return min(score, 1.0), reasons
|
||||
|
||||
def get_historical_upset_rate(
|
||||
self,
|
||||
home_team_id: str,
|
||||
before_date_ms: int,
|
||||
lookback_matches: int = 20
|
||||
) -> Tuple[float, list]:
|
||||
"""
|
||||
Ev sahibi takımın tarihsel upset oranını hesapla.
|
||||
Üst sıradaki takımlara karşı galibiyetler.
|
||||
"""
|
||||
reasons = []
|
||||
|
||||
conn = self._get_conn()
|
||||
if conn is None:
|
||||
return 0.0, reasons
|
||||
|
||||
try:
|
||||
cursor = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# Ev sahibi olarak oynadığı ve sıralamada geride olduğu maçlar
|
||||
query = """
|
||||
WITH home_matches AS (
|
||||
SELECT
|
||||
m.id,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
m.home_team_id,
|
||||
m.away_team_id
|
||||
FROM matches m
|
||||
WHERE m.home_team_id = %s
|
||||
AND m.mst_utc < %s
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
)
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
SUM(CASE WHEN score_home > score_away THEN 1 ELSE 0 END) as wins
|
||||
FROM home_matches
|
||||
"""
|
||||
|
||||
cursor.execute(query, (home_team_id, before_date_ms, lookback_matches))
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result and result['total'] > 0:
|
||||
win_rate = result['wins'] / result['total']
|
||||
# Ev sahibi kazanma oranı yüksekse, upset potansiyeli de yüksek
|
||||
if win_rate > 0.5:
|
||||
rate = min((win_rate - 0.4) * 0.5, 0.3)
|
||||
reasons.append(f"📈 Güçlü ev sahibi performansı (%{int(win_rate*100)} kazanma)")
|
||||
return rate, reasons
|
||||
|
||||
return 0.0, reasons
|
||||
|
||||
except Exception as e:
|
||||
print(f"[UpsetEngine] Historical query error: {e}")
|
||||
return 0.0, reasons
|
||||
|
||||
def calculate_upset_potential(
|
||||
self,
|
||||
home_team_name: str,
|
||||
home_team_id: str,
|
||||
away_team_name: str,
|
||||
league_name: str,
|
||||
home_position: int,
|
||||
away_position: int,
|
||||
match_date_ms: int,
|
||||
is_cup_match: bool = False,
|
||||
home_matches_last_14d: int = 2,
|
||||
away_matches_last_14d: int = 2,
|
||||
home_days_rest: int = 7,
|
||||
away_days_rest: int = 7,
|
||||
away_travel_km: float = 0,
|
||||
total_teams: int = 20
|
||||
) -> UpsetFactors:
|
||||
"""
|
||||
Tüm faktörleri birleştirerek upset potansiyelini hesapla.
|
||||
|
||||
Returns:
|
||||
UpsetFactors: Tüm faktörler ve toplam skor
|
||||
"""
|
||||
factors = UpsetFactors()
|
||||
all_reasons = []
|
||||
|
||||
# 1. Atmosfer
|
||||
atm_score, atm_reasons = self.calculate_atmosphere_score(
|
||||
home_team_name, league_name, is_cup_match
|
||||
)
|
||||
factors.atmosphere_score = atm_score
|
||||
all_reasons.extend(atm_reasons)
|
||||
|
||||
# 2. Motivasyon
|
||||
mot_score, mot_reasons = self.calculate_motivation_score(
|
||||
home_position, away_position,
|
||||
total_teams=total_teams
|
||||
)
|
||||
factors.motivation_score = mot_score
|
||||
all_reasons.extend(mot_reasons)
|
||||
|
||||
# 3. Yorgunluk
|
||||
fat_score, fat_reasons = self.calculate_fatigue_score(
|
||||
home_matches_last_14d, away_matches_last_14d,
|
||||
home_days_rest, away_days_rest,
|
||||
away_travel_km
|
||||
)
|
||||
factors.fatigue_score = fat_score
|
||||
all_reasons.extend(fat_reasons)
|
||||
|
||||
# 4. Tarihsel (sadece DB varsa)
|
||||
hist_score, hist_reasons = self.get_historical_upset_rate(
|
||||
home_team_id, match_date_ms
|
||||
)
|
||||
factors.historical_upset_rate = hist_score
|
||||
all_reasons.extend(hist_reasons)
|
||||
|
||||
# Toplam skor (weighted average)
|
||||
factors.total_upset_potential = min(
|
||||
factors.atmosphere_score * 0.25 +
|
||||
factors.motivation_score * 0.35 +
|
||||
factors.fatigue_score * 0.25 +
|
||||
factors.historical_upset_rate * 0.15,
|
||||
1.0
|
||||
)
|
||||
|
||||
factors.reasoning = all_reasons
|
||||
|
||||
return factors
|
||||
|
||||
def get_features(
|
||||
self,
|
||||
home_team_name: str,
|
||||
home_team_id: str,
|
||||
away_team_name: str,
|
||||
league_name: str,
|
||||
home_position: int,
|
||||
away_position: int,
|
||||
match_date_ms: int,
|
||||
**kwargs
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Model için feature dict döndür.
|
||||
Training ve inference'da kullanılır.
|
||||
"""
|
||||
factors = self.calculate_upset_potential(
|
||||
home_team_name=home_team_name,
|
||||
home_team_id=home_team_id,
|
||||
away_team_name=away_team_name,
|
||||
league_name=league_name,
|
||||
home_position=home_position,
|
||||
away_position=away_position,
|
||||
match_date_ms=match_date_ms,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
return {
|
||||
"upset_atmosphere": factors.atmosphere_score,
|
||||
"upset_motivation": factors.motivation_score,
|
||||
"upset_fatigue": factors.fatigue_score,
|
||||
"upset_historical": factors.historical_upset_rate,
|
||||
"upset_potential": factors.total_upset_potential,
|
||||
}
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_engine_instance = None
|
||||
|
||||
def get_upset_engine() -> UpsetEngine:
|
||||
"""Singleton pattern ile engine döndür"""
|
||||
global _engine_instance
|
||||
if _engine_instance is None:
|
||||
_engine_instance = UpsetEngine()
|
||||
return _engine_instance
|
||||
|
||||
|
||||
# Test
|
||||
if __name__ == "__main__":
|
||||
engine = get_upset_engine()
|
||||
|
||||
# Galatasaray vs Liverpool örneği
|
||||
factors = engine.calculate_upset_potential(
|
||||
home_team_name="Galatasaray",
|
||||
home_team_id="test-gs-id",
|
||||
away_team_name="Liverpool",
|
||||
league_name="UEFA Champions League",
|
||||
home_position=12,
|
||||
away_position=1,
|
||||
match_date_ms=1700000000000,
|
||||
is_cup_match=False,
|
||||
away_matches_last_14d=5,
|
||||
home_matches_last_14d=2,
|
||||
away_days_rest=3,
|
||||
home_days_rest=7,
|
||||
away_travel_km=2800,
|
||||
total_teams=20
|
||||
)
|
||||
|
||||
print("=" * 60)
|
||||
print("GALATASARAY vs LIVERPOOL - UPSET ANALİZİ")
|
||||
print("=" * 60)
|
||||
print(f"🏟️ Atmosfer Skoru: {factors.atmosphere_score:.2f}")
|
||||
print(f"💪 Motivasyon Skoru: {factors.motivation_score:.2f}")
|
||||
print(f"😓 Yorgunluk Skoru: {factors.fatigue_score:.2f}")
|
||||
print(f"📊 Tarihsel Skor: {factors.historical_upset_rate:.2f}")
|
||||
print(f"\n🎯 TOPLAM UPSET POTANSİYELİ: {factors.total_upset_potential:.2f}")
|
||||
print("\n📝 Sebepler:")
|
||||
for reason in factors.reasoning:
|
||||
print(f" {reason}")
|
||||
@@ -0,0 +1,511 @@
|
||||
"""
|
||||
Upset Engine v2 - GLM-5 Tespitleri ile Geliştirilmiş Sürpriz Tespiti
|
||||
====================================================================
|
||||
|
||||
Yeni Eklenen Faktörler (GLM-5 Analizinden):
|
||||
1. MARGIN_ANALIZI - Bookmaker margin > %18 = sürpriz riski
|
||||
2. FAVORI_ORAN_TUZAGI - 1.40-1.60 arası en yüksek sürpriz oranı
|
||||
3. HAKEM_SURPRIZ_ORANI - Hakemin geçmiş maçlarında ev kayıp oranı
|
||||
4. FORM_FARKI_TUZAGI - Form farkı > 40 = "çok iyi görünen" favori tuzak
|
||||
|
||||
Orijinal Faktörler:
|
||||
- Atmosfer (Avrupa gecesi, taraftar baskısı)
|
||||
- Motivasyon asimetrisi (küme düşme vs şampiyon)
|
||||
- Yorgunluk (maç yoğunluğu, seyahat)
|
||||
- Tarihsel upset pattern
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Any, Optional, Tuple, List
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
except ImportError:
|
||||
psycopg2 = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class UpsetFactorsV2:
|
||||
"""Upset potansiyelini etkileyen faktörler - v2"""
|
||||
# Orijinal faktörler
|
||||
atmosphere_score: float = 0.0
|
||||
motivation_score: float = 0.0
|
||||
fatigue_score: float = 0.0
|
||||
historical_upset_rate: float = 0.0
|
||||
|
||||
# YENİ FAKTÖRLER (GLM-5)
|
||||
margin_score: float = 0.0 # Bookmaker margin analizi
|
||||
favorite_odds_trap: float = 0.0 # Favori oran tuzağı
|
||||
referee_upset_score: float = 0.0 # Hakem sürpriz oranı
|
||||
form_trap_score: float = 0.0 # Form farkı tuzağı
|
||||
|
||||
# Toplam
|
||||
total_upset_potential: float = 0.0
|
||||
reasoning: List[str] = field(default_factory=list)
|
||||
|
||||
# YENİ: Sürpriz skoru (0-100)
|
||||
upset_score: int = 0
|
||||
upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME
|
||||
|
||||
|
||||
class UpsetEngineV2:
|
||||
"""
|
||||
Favori takımın kaybedeceği maçları tespit eder.
|
||||
v2: GLM-5 analizlerinden elde edilen yeni faktörler eklendi.
|
||||
"""
|
||||
|
||||
# Yüksek atmosferli stadyumlar
|
||||
HIGH_ATMOSPHERE_TEAMS = {
|
||||
"galatasaray", "fenerbahce", "besiktas", "trabzonspor",
|
||||
"liverpool", "newcastle", "leeds",
|
||||
"dortmund", "union berlin",
|
||||
"olympiacos", "panathinaikos", "aek athens",
|
||||
"boca juniors", "river plate",
|
||||
"celtic", "rangers", "red star belgrade"
|
||||
}
|
||||
|
||||
EUROPEAN_COMPETITIONS = {
|
||||
"şampiyonlar ligi", "champions league", "uefa champions league",
|
||||
"avrupa ligi", "europa league", "uefa europa league",
|
||||
"konferans ligi", "conference league", "uefa conference league"
|
||||
}
|
||||
|
||||
# YENİ: Sürpriz oranları (veritabanı analizinden)
|
||||
# Favori oran aralığına göre sürpriz oranları
|
||||
FAVORITE_ODDS_UPSET_RATES = {
|
||||
(1.10, 1.20): 0.111, # %11.1 sürpriz
|
||||
(1.20, 1.30): 0.150, # %15.0 sürpriz
|
||||
(1.30, 1.40): 0.235, # %23.5 sürpriz
|
||||
(1.40, 1.50): 0.333, # %33.3 sürpriz ← DİKKAT!
|
||||
(1.50, 1.60): 0.350, # %35.0 sürpriz ← EN YÜKSEK!
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.conn = None
|
||||
self._connect_db()
|
||||
|
||||
def _connect_db(self):
|
||||
if psycopg2 is None:
|
||||
return
|
||||
try:
|
||||
from data.db import get_clean_dsn
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
except Exception as e:
|
||||
print(f"[UpsetEngineV2] DB connection failed: {e}")
|
||||
self.conn = None
|
||||
|
||||
def _get_conn(self):
|
||||
if self.conn is None or self.conn.closed:
|
||||
self._connect_db()
|
||||
return self.conn
|
||||
|
||||
# ═════════════════════════════════════════════════════════════════
|
||||
# YENİ FAKTÖRLER (GLM-5 Analizinden)
|
||||
# ═════════════════════════════════════════════════════════════════
|
||||
|
||||
def calculate_margin_score(
|
||||
self,
|
||||
odds_data: Dict[str, float]
|
||||
) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
GLM-5 Tespiti: Bookmaker margin analizi
|
||||
|
||||
Margin > %18 → Bookmaker kendini koruyor, favori riskli
|
||||
Margin > %20 → Yüksek risk, sürpriz bekleniyor
|
||||
"""
|
||||
score = 0.0
|
||||
reasons = []
|
||||
|
||||
ms_h = odds_data.get("ms_h", 0)
|
||||
ms_d = odds_data.get("ms_d", 0)
|
||||
ms_a = odds_data.get("ms_a", 0)
|
||||
|
||||
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||||
margin = (1/ms_h + 1/ms_d + 1/ms_a) - 1
|
||||
|
||||
if margin > 0.20:
|
||||
score = 0.25
|
||||
reasons.append(f"⚠️ Margin çok yüksek (%{margin*100:.1f}) - Bookmaker risk görüyor!")
|
||||
elif margin > 0.18:
|
||||
score = 0.15
|
||||
reasons.append(f"⚠️ Margin yüksek (%{margin*100:.1f}) - Dikkat!")
|
||||
|
||||
return score, reasons
|
||||
|
||||
def calculate_favorite_odds_trap(
|
||||
self,
|
||||
favorite_odds: float,
|
||||
favorite_side: str # 'home' or 'away'
|
||||
) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
GLM-5 Tespiti: Favori oran tuzağı
|
||||
|
||||
Veritabanı analizine göre:
|
||||
- 1.40-1.50 arası: %33.3 sürpriz
|
||||
- 1.50-1.60 arası: %35.0 sürpriz (EN YÜKSEK!)
|
||||
- < 1.20: Tuzak oranı şüphesi
|
||||
"""
|
||||
score = 0.0
|
||||
reasons = []
|
||||
|
||||
if favorite_odds <= 0:
|
||||
return score, reasons
|
||||
|
||||
for (low, high), upset_rate in self.FAVORITE_ODDS_UPSET_RATES.items():
|
||||
if low <= favorite_odds < high:
|
||||
score = upset_rate # Doğrudan sürpriz olasılığı
|
||||
if upset_rate >= 0.30:
|
||||
reasons.append(f"🔴 Favori oran {favorite_odds:.2f} - %{upset_rate*100:.0f} sürpriz oranı!")
|
||||
elif upset_rate >= 0.20:
|
||||
reasons.append(f"⚠️ Favori oran {favorite_odds:.2f} - %{upset_rate*100:.0f} sürpriz riski")
|
||||
break
|
||||
|
||||
# Çok düşük oran tuzağı
|
||||
if favorite_odds < 1.20:
|
||||
score = max(score, 0.20)
|
||||
reasons.append(f"⚠️ Favori oran çok düşük ({favorite_odds:.2f}) - Tuzak oranı şüphesi")
|
||||
|
||||
return score, reasons
|
||||
|
||||
def calculate_referee_upset_score(
|
||||
self,
|
||||
referee_name: str
|
||||
) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
GLM-5 Tespiti: Hakem sürpriz oranı
|
||||
|
||||
Hakemin yönettiği maçlarda ev sahibi kayıp oranı
|
||||
> %25 → Yüksek sürpriz riski
|
||||
"""
|
||||
score = 0.0
|
||||
reasons = []
|
||||
|
||||
if not referee_name or not self._get_conn():
|
||||
return score, reasons
|
||||
|
||||
try:
|
||||
cur = self._get_conn().cursor()
|
||||
|
||||
# Hakemin yönettiği maçlarda sonuçlar
|
||||
cur.execute("""
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
SUM(CASE WHEN m.score_home < m.score_away THEN 1 ELSE 0 END) as away_wins,
|
||||
SUM(CASE WHEN m.score_home = m.score_away THEN 1 ELSE 0 END) as draws
|
||||
FROM match_officials mo
|
||||
JOIN matches m ON m.id = mo.match_id
|
||||
WHERE mo.name = %s AND mo.role_id = 1
|
||||
AND m.score_home IS NOT NULL
|
||||
""", (referee_name,))
|
||||
|
||||
row = cur.fetchone()
|
||||
cur.close()
|
||||
|
||||
if row and row[0] and row[0] >= 3:
|
||||
total = row[0]
|
||||
away_wins = row[1] or 0
|
||||
draws = row[2] or 0
|
||||
|
||||
upset_rate = (away_wins + draws * 0.5) / total
|
||||
|
||||
if upset_rate > 0.40:
|
||||
score = 0.25
|
||||
reasons.append(f"👨⚖️ {referee_name}: %{upset_rate*100:.0f} sürpriz oranı (YÜKSEK!)")
|
||||
elif upset_rate > 0.30:
|
||||
score = 0.15
|
||||
reasons.append(f"👨⚖️ {referee_name}: %{upset_rate*100:.0f} sürpriz oranı")
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
return score, reasons
|
||||
|
||||
def calculate_form_trap_score(
|
||||
self,
|
||||
home_form_score: float,
|
||||
away_form_score: float,
|
||||
favorite_side: str
|
||||
) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
GLM-5 Tespiti: Form farkı tuzağı
|
||||
|
||||
Form farkı > 40 → "Çok iyi görünen" favori tuzak
|
||||
Favori formu kötü ama oran düşük → Sürpriz bekleniyor
|
||||
"""
|
||||
score = 0.0
|
||||
reasons = []
|
||||
|
||||
form_diff = home_form_score - away_form_score
|
||||
|
||||
# Form farkı çok büyük
|
||||
if abs(form_diff) > 40:
|
||||
score = 0.20
|
||||
if form_diff > 0 and favorite_side == 'away':
|
||||
reasons.append(f"🔴 Form tuzağı! Ev sahibi formda ({home_form_score:.0f}) ama deplasman favori")
|
||||
elif form_diff < 0 and favorite_side == 'home':
|
||||
reasons.append(f"🔴 Form tuzağı! Deplasman formda ({away_form_score:.0f}) ama ev sahibi favori")
|
||||
|
||||
# Favori formu kötü
|
||||
if favorite_side == 'home' and home_form_score < 50:
|
||||
score = max(score, 0.15)
|
||||
reasons.append(f"⚠️ Favori ev sahibi formu düşük ({home_form_score:.0f})")
|
||||
elif favorite_side == 'away' and away_form_score < 50:
|
||||
score = max(score, 0.15)
|
||||
reasons.append(f"⚠️ Favori deplasman formu düşük ({away_form_score:.0f})")
|
||||
|
||||
return score, reasons
|
||||
|
||||
# ═════════════════════════════════════════════════════════════════
|
||||
# ORİJİNAL FAKTÖRLER
|
||||
# ═════════════════════════════════════════════════════════════════
|
||||
|
||||
def calculate_atmosphere_score(
|
||||
self,
|
||||
home_team_name: str,
|
||||
league_name: str,
|
||||
is_cup_match: bool = False
|
||||
) -> Tuple[float, List[str]]:
|
||||
"""Orijinal: Atmosfer skoru"""
|
||||
score = 0.0
|
||||
reasons = []
|
||||
|
||||
home_lower = home_team_name.lower()
|
||||
for team in self.HIGH_ATMOSPHERE_TEAMS:
|
||||
if team in home_lower:
|
||||
score += 0.25
|
||||
reasons.append(f"🔥 {home_team_name} yüksek atmosferli stadyum")
|
||||
break
|
||||
|
||||
league_lower = league_name.lower()
|
||||
for comp in self.EUROPEAN_COMPETITIONS:
|
||||
if comp in league_lower:
|
||||
score += 0.20
|
||||
reasons.append("🌟 Avrupa gecesi - ekstra motivasyon")
|
||||
break
|
||||
|
||||
if is_cup_match:
|
||||
score += 0.10
|
||||
reasons.append("🏆 Kupa maçı - her şey olabilir")
|
||||
|
||||
return min(score, 1.0), reasons
|
||||
|
||||
def calculate_motivation_score(
|
||||
self,
|
||||
home_position: int,
|
||||
away_position: int,
|
||||
total_teams: int = 20
|
||||
) -> Tuple[float, List[str]]:
|
||||
"""Orijinal: Motivasyon asimetrisi"""
|
||||
score = 0.0
|
||||
reasons = []
|
||||
|
||||
if home_position is not None and away_position is not None:
|
||||
position_diff = away_position - home_position
|
||||
relegation_zone = total_teams - 3
|
||||
|
||||
if home_position >= relegation_zone and away_position <= 3:
|
||||
score += 0.30
|
||||
reasons.append("⚔️ Hayatta kalma savaşı vs şampiyonluk adayı")
|
||||
elif home_position >= relegation_zone:
|
||||
score += 0.15
|
||||
reasons.append("🔥 Ev sahibi küme düşme hattında")
|
||||
|
||||
if position_diff < -10:
|
||||
score += 0.15
|
||||
reasons.append(f"📊 {abs(position_diff)} sıra fark")
|
||||
|
||||
return min(score, 1.0), reasons
|
||||
|
||||
# ═════════════════════════════════════════════════════════════════
|
||||
# ANA FONKSİYON
|
||||
# ═════════════════════════════════════════════════════════════════
|
||||
|
||||
def calculate_upset_potential(
|
||||
self,
|
||||
home_team_name: str,
|
||||
home_team_id: str,
|
||||
away_team_name: str,
|
||||
league_name: str,
|
||||
home_position: int = None,
|
||||
away_position: int = None,
|
||||
match_date_ms: int = None,
|
||||
odds_data: Dict[str, float] = None,
|
||||
referee_name: str = None,
|
||||
home_form_score: float = 50.0,
|
||||
away_form_score: float = 50.0,
|
||||
favorite_side: str = None, # 'home', 'away', or 'draw'
|
||||
favorite_odds: float = None
|
||||
) -> UpsetFactorsV2:
|
||||
"""
|
||||
Tam upset analizi - v2 (GLM-5 geliştirmeleri ile)
|
||||
"""
|
||||
factors = UpsetFactorsV2()
|
||||
all_reasons = []
|
||||
|
||||
# 1. Margin analizi (YENİ)
|
||||
if odds_data:
|
||||
factors.margin_score, reasons = self.calculate_margin_score(odds_data)
|
||||
all_reasons.extend(reasons)
|
||||
|
||||
# 2. Favori oran tuzağı (YENİ)
|
||||
if favorite_odds and favorite_side:
|
||||
factors.favorite_odds_trap, reasons = self.calculate_favorite_odds_trap(
|
||||
favorite_odds, favorite_side
|
||||
)
|
||||
all_reasons.extend(reasons)
|
||||
|
||||
# 3. Hakem sürpriz oranı (YENİ)
|
||||
if referee_name:
|
||||
factors.referee_upset_score, reasons = self.calculate_referee_upset_score(
|
||||
referee_name
|
||||
)
|
||||
all_reasons.extend(reasons)
|
||||
|
||||
# 4. Form tuzağı (YENİ)
|
||||
factors.form_trap_score, reasons = self.calculate_form_trap_score(
|
||||
home_form_score, away_form_score, favorite_side or 'home'
|
||||
)
|
||||
all_reasons.extend(reasons)
|
||||
|
||||
# 5. Atmosfer (orijinal)
|
||||
factors.atmosphere_score, reasons = self.calculate_atmosphere_score(
|
||||
home_team_name, league_name
|
||||
)
|
||||
all_reasons.extend(reasons)
|
||||
|
||||
# 6. Motivasyon (orijinal)
|
||||
if home_position is not None and away_position is not None:
|
||||
factors.motivation_score, reasons = self.calculate_motivation_score(
|
||||
home_position, away_position
|
||||
)
|
||||
all_reasons.extend(reasons)
|
||||
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
# SÜRPRİZ SKORU HESAPLAMA (0-100) - GÜÇLENDİRİLMİŞ v2.1
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
|
||||
upset_score = 0
|
||||
|
||||
# Margin (> %18 = +20, > %20 = +30) - GÜÇLENDİRİLDİ
|
||||
if factors.margin_score >= 0.25:
|
||||
upset_score += 30 # Artırıldı: 20 -> 30
|
||||
all_reasons.append("🔴 Margin > %20: Bookmaker büyük risk görüyor!")
|
||||
elif factors.margin_score >= 0.15:
|
||||
upset_score += 20 # Artırıldı: 15 -> 20
|
||||
all_reasons.append("⚠️ Margin > %18: Dikkatli ol!")
|
||||
|
||||
# Favori oran tuzağı - GÜÇLENDİRİLDİ
|
||||
if factors.favorite_odds_trap >= 0.30:
|
||||
upset_score += 30 # Artırıldı: 25 -> 30
|
||||
elif factors.favorite_odds_trap >= 0.20:
|
||||
upset_score += 25 # Artırıldı: 20 -> 25
|
||||
elif factors.favorite_odds_trap >= 0.15:
|
||||
upset_score += 20 # Artırıldı: 15 -> 20
|
||||
|
||||
# Hakem
|
||||
if factors.referee_upset_score >= 0.25:
|
||||
upset_score += 20
|
||||
elif factors.referee_upset_score >= 0.15:
|
||||
upset_score += 10
|
||||
|
||||
# Form tuzağı - GÜÇLENDİRİLDİ
|
||||
if factors.form_trap_score >= 0.20:
|
||||
upset_score += 20 # Artırıldı: 15 -> 20
|
||||
elif factors.form_trap_score >= 0.15:
|
||||
upset_score += 15 # Artırıldı: 10 -> 15
|
||||
|
||||
# Atmosfer - GÜÇLENDİRİLDİ
|
||||
if factors.atmosphere_score >= 0.40:
|
||||
upset_score += 20 # Artırıldı: 15 -> 20
|
||||
elif factors.atmosphere_score >= 0.25:
|
||||
upset_score += 15 # Artırıldı: 10 -> 15
|
||||
|
||||
# Motivasyon
|
||||
if factors.motivation_score >= 0.30:
|
||||
upset_score += 15
|
||||
elif factors.motivation_score >= 0.15:
|
||||
upset_score += 10
|
||||
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
# YENİ: EKSTRA RİSK FAKTÖRLERİ
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
|
||||
# Deplasman favorisi ekstra risk (+10)
|
||||
if favorite_side == 'away':
|
||||
upset_score += 10
|
||||
all_reasons.append("📍 Deplasman favorisi - ekstra risk!")
|
||||
|
||||
# Favori formu çok düşük (< 40) = +15
|
||||
if favorite_side == 'home' and home_form_score < 40:
|
||||
upset_score += 15
|
||||
all_reasons.append(f"🔴 Favori ev sahibi formu ÇOK DÜŞÜK ({home_form_score:.0f})")
|
||||
elif favorite_side == 'away' and away_form_score < 40:
|
||||
upset_score += 15
|
||||
all_reasons.append(f"🔴 Favori deplasman formu ÇOK DÜŞÜK ({away_form_score:.0f})")
|
||||
|
||||
# Çok düşük favori oranı (< 1.30) ama margin yüksek = tuzak şüphesi
|
||||
if favorite_odds and favorite_odds < 1.30 and factors.margin_score >= 0.15:
|
||||
upset_score += 10
|
||||
all_reasons.append(f"⚠️ Düşük oran ({favorite_odds:.2f}) + yüksek margin = TUZAK ŞÜPHESİ!")
|
||||
|
||||
factors.upset_score = min(upset_score, 100)
|
||||
|
||||
# Seviye belirle
|
||||
if factors.upset_score >= 60:
|
||||
factors.upset_level = "EXTREME"
|
||||
elif factors.upset_score >= 45:
|
||||
factors.upset_level = "HIGH"
|
||||
elif factors.upset_score >= 30:
|
||||
factors.upset_level = "MEDIUM"
|
||||
else:
|
||||
factors.upset_level = "LOW"
|
||||
|
||||
# Toplam upset potansiyeli
|
||||
factors.total_upset_potential = min(
|
||||
(factors.margin_score + factors.favorite_odds_trap +
|
||||
factors.referee_upset_score + factors.form_trap_score +
|
||||
factors.atmosphere_score * 0.5 + factors.motivation_score * 0.5) / 1.5,
|
||||
1.0
|
||||
)
|
||||
|
||||
factors.reasoning = all_reasons
|
||||
|
||||
return factors
|
||||
|
||||
|
||||
def get_upset_engine_v2():
|
||||
"""Singleton pattern"""
|
||||
return UpsetEngineV2()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test
|
||||
engine = get_upset_engine_v2()
|
||||
|
||||
# Real Madrid vs Getafe test
|
||||
result = engine.calculate_upset_potential(
|
||||
home_team_name="Real Madrid",
|
||||
home_team_id="test",
|
||||
away_team_name="Getafe",
|
||||
league_name="LaLiga",
|
||||
odds_data={"ms_h": 1.25, "ms_d": 3.92, "ms_a": 6.86},
|
||||
referee_name="A. Muniz Ruiz",
|
||||
home_form_score=80.0,
|
||||
away_form_score=56.7,
|
||||
favorite_side="home",
|
||||
favorite_odds=1.25
|
||||
)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Real Madrid vs Getafe - Sürpriz Analizi")
|
||||
print(f"{'='*60}")
|
||||
print(f"Sürpriz Skoru: {result.upset_score}/100")
|
||||
print(f"Seviye: {result.upset_level}")
|
||||
print(f"\nNedenler:")
|
||||
for reason in result.reasoning:
|
||||
print(f" {reason}")
|
||||
Executable
+249
@@ -0,0 +1,249 @@
|
||||
"""
|
||||
Value Betting Calculator
|
||||
Expected Value (EV) ve stake önerileri hesaplar.
|
||||
"""
|
||||
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValueBet:
|
||||
"""Value bet analiz sonucu"""
|
||||
bet_type: str # MS_1, AU25_Üst, KG_Var
|
||||
my_probability: float # Bizim tahminimiz
|
||||
market_odds: float # Bahis oranı
|
||||
implied_probability: float # Oranın ima ettiği olasılık
|
||||
edge: float # Fark (benim tahmin - implied)
|
||||
expected_value: float # EV = (prob × odds) - 1
|
||||
is_value: bool # EV > threshold mı?
|
||||
kelly_fraction: float # Kelly stake oranı
|
||||
confidence_tier: str # "banker", "strong", "value", "skip"
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
return {
|
||||
'bet_type': self.bet_type,
|
||||
'my_probability': round(self.my_probability, 4),
|
||||
'market_odds': self.market_odds,
|
||||
'implied_probability': round(self.implied_probability, 4),
|
||||
'edge': round(self.edge, 4),
|
||||
'expected_value': round(self.expected_value, 4),
|
||||
'is_value': self.is_value,
|
||||
'kelly_fraction': round(self.kelly_fraction, 4),
|
||||
'confidence_tier': self.confidence_tier,
|
||||
}
|
||||
|
||||
|
||||
class ValueCalculator:
|
||||
"""
|
||||
Value Betting Calculator
|
||||
|
||||
Tahminleri oranlarla karşılaştırarak EV hesaplar.
|
||||
"""
|
||||
|
||||
# Eşikler
|
||||
MIN_EDGE_FOR_VALUE = 0.05 # Minimum %5 edge
|
||||
MIN_EDGE_FOR_STRONG = 0.10 # %10+ edge = strong value
|
||||
MIN_EDGE_FOR_BANKER = 0.15 # %15+ edge = banker
|
||||
|
||||
KELLY_FRACTION = 0.25 # 1/4 Kelly (güvenli)
|
||||
MAX_STAKE_PERCENT = 0.10 # Maksimum bank'ın %10'u
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def calculate_implied_probability(self, odds: float) -> float:
|
||||
"""Bahis oranından implied probability hesapla"""
|
||||
if odds <= 1:
|
||||
return 1.0
|
||||
return 1 / odds
|
||||
|
||||
def calculate_ev(self, probability: float, odds: float) -> float:
|
||||
"""
|
||||
Expected Value hesapla.
|
||||
|
||||
EV = (Probability × Odds) - 1
|
||||
|
||||
Pozitif EV = uzun vadede kar
|
||||
Negatif EV = uzun vadede zarar
|
||||
"""
|
||||
return (probability * odds) - 1
|
||||
|
||||
def calculate_kelly_stake(self, probability: float, odds: float) -> float:
|
||||
"""
|
||||
Kelly Criterion stake hesapla.
|
||||
|
||||
Kelly = (p × b - q) / b
|
||||
Burada:
|
||||
- p = kazanma olasılığı
|
||||
- q = kaybetme olasılığı (1 - p)
|
||||
- b = odds - 1 (net kar)
|
||||
"""
|
||||
if odds <= 1:
|
||||
return 0
|
||||
|
||||
b = odds - 1
|
||||
p = probability
|
||||
q = 1 - p
|
||||
|
||||
kelly = (p * b - q) / b
|
||||
|
||||
# Negatif veya çok yüksek değerleri sınırla
|
||||
kelly = max(0, min(kelly, self.MAX_STAKE_PERCENT))
|
||||
|
||||
# Fractional Kelly (daha güvenli)
|
||||
return kelly * self.KELLY_FRACTION
|
||||
|
||||
def analyze_bet(self, bet_type: str, my_probability: float,
|
||||
market_odds: float) -> ValueBet:
|
||||
"""
|
||||
Tek bir bahis için value analizi yap.
|
||||
|
||||
Args:
|
||||
bet_type: Bahis türü (MS_1, AU25_Üst, KG_Var vb.)
|
||||
my_probability: Bizim tahminimiz (0-1 arası)
|
||||
market_odds: Bahis oranı
|
||||
|
||||
Returns:
|
||||
ValueBet: Analiz sonucu
|
||||
"""
|
||||
if market_odds <= 1:
|
||||
return ValueBet(
|
||||
bet_type=bet_type,
|
||||
my_probability=my_probability,
|
||||
market_odds=market_odds,
|
||||
implied_probability=1.0,
|
||||
edge=0,
|
||||
expected_value=-1,
|
||||
is_value=False,
|
||||
kelly_fraction=0,
|
||||
confidence_tier="skip"
|
||||
)
|
||||
|
||||
implied = self.calculate_implied_probability(market_odds)
|
||||
edge = my_probability - implied
|
||||
ev = self.calculate_ev(my_probability, market_odds)
|
||||
kelly = self.calculate_kelly_stake(my_probability, market_odds)
|
||||
|
||||
# Tier belirleme
|
||||
if edge >= self.MIN_EDGE_FOR_BANKER and my_probability >= 0.70:
|
||||
tier = "banker"
|
||||
elif edge >= self.MIN_EDGE_FOR_STRONG:
|
||||
tier = "strong"
|
||||
elif edge >= self.MIN_EDGE_FOR_VALUE:
|
||||
tier = "value"
|
||||
else:
|
||||
tier = "skip"
|
||||
|
||||
return ValueBet(
|
||||
bet_type=bet_type,
|
||||
my_probability=my_probability,
|
||||
market_odds=market_odds,
|
||||
implied_probability=implied,
|
||||
edge=edge,
|
||||
expected_value=ev,
|
||||
is_value=edge >= self.MIN_EDGE_FOR_VALUE,
|
||||
kelly_fraction=kelly,
|
||||
confidence_tier=tier
|
||||
)
|
||||
|
||||
def analyze_match_predictions(self, predictions: Dict[str, float],
|
||||
odds: Dict[str, float]) -> Dict[str, ValueBet]:
|
||||
"""
|
||||
Maç için tüm tahminleri analiz et.
|
||||
|
||||
Args:
|
||||
predictions: Tahminler {'MS_1': 0.55, 'MS_X': 0.25, ...}
|
||||
odds: Oranlar {'MS_1': 1.80, 'MS_X': 3.50, ...}
|
||||
|
||||
Returns:
|
||||
Dict[str, ValueBet]: Her bahis için value analizi
|
||||
"""
|
||||
results = {}
|
||||
|
||||
for bet_type, probability in predictions.items():
|
||||
if bet_type in odds and odds[bet_type] > 1:
|
||||
results[bet_type] = self.analyze_bet(
|
||||
bet_type=bet_type,
|
||||
my_probability=probability,
|
||||
market_odds=odds[bet_type]
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def get_best_value_bets(self, value_bets: Dict[str, ValueBet],
|
||||
top_n: int = 3) -> list:
|
||||
"""En iyi value bet'leri döndür"""
|
||||
valid_bets = [vb for vb in value_bets.values() if vb.is_value]
|
||||
sorted_bets = sorted(valid_bets, key=lambda x: x.expected_value, reverse=True)
|
||||
return sorted_bets[:top_n]
|
||||
|
||||
def calculate_stake(self, value_bet: ValueBet, bankroll: float,
|
||||
use_kelly: bool = True) -> float:
|
||||
"""
|
||||
Önerilen stake miktarını hesapla.
|
||||
|
||||
Args:
|
||||
value_bet: Value bet analizi
|
||||
bankroll: Toplam bütçe
|
||||
use_kelly: Kelly criterion kullan mı?
|
||||
|
||||
Returns:
|
||||
float: Önerilen stake miktarı
|
||||
"""
|
||||
if not value_bet.is_value:
|
||||
return 0
|
||||
|
||||
if use_kelly:
|
||||
return bankroll * value_bet.kelly_fraction
|
||||
else:
|
||||
# Tier bazlı sabit stake
|
||||
tier_stakes = {
|
||||
"banker": 0.05,
|
||||
"strong": 0.03,
|
||||
"value": 0.02,
|
||||
"skip": 0
|
||||
}
|
||||
return bankroll * tier_stakes.get(value_bet.confidence_tier, 0)
|
||||
|
||||
|
||||
# Singleton
|
||||
_calculator = None
|
||||
|
||||
def get_value_calculator() -> ValueCalculator:
|
||||
global _calculator
|
||||
if _calculator is None:
|
||||
_calculator = ValueCalculator()
|
||||
return _calculator
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
calc = get_value_calculator()
|
||||
|
||||
print("\n🧪 Value Calculator Test")
|
||||
print("=" * 50)
|
||||
|
||||
# Test senaryoları
|
||||
test_cases = [
|
||||
{"bet": "MS_1", "prob": 0.70, "odds": 1.60}, # High prob, low odds
|
||||
{"bet": "MS_1", "prob": 0.55, "odds": 1.90}, # Medium prob, good odds
|
||||
{"bet": "MS_1", "prob": 0.60, "odds": 2.10}, # VALUE!
|
||||
{"bet": "AU25_Üst", "prob": 0.65, "odds": 1.85}, # VALUE!
|
||||
{"bet": "KG_Var", "prob": 0.50, "odds": 1.70}, # No value
|
||||
]
|
||||
|
||||
for tc in test_cases:
|
||||
result = calc.analyze_bet(tc["bet"], tc["prob"], tc["odds"])
|
||||
|
||||
status_emoji = "✅" if result.is_value else "❌"
|
||||
tier_emoji = {"banker": "🎯", "strong": "💪", "value": "✓", "skip": "⏭️"}
|
||||
|
||||
print(f"\n{status_emoji} {tc['bet']}")
|
||||
print(f" Tahmin: {tc['prob']:.0%} | Oran: {tc['odds']:.2f} | Implied: {result.implied_probability:.0%}")
|
||||
print(f" Edge: {result.edge:+.1%} | EV: {result.expected_value:+.1%}")
|
||||
print(f" Tier: {tier_emoji.get(result.confidence_tier, '')} {result.confidence_tier.upper()}")
|
||||
print(f" Kelly Stake: {result.kelly_fraction:.2%} of bankroll")
|
||||
|
||||
if result.is_value:
|
||||
stake = calc.calculate_stake(result, 1000)
|
||||
print(f" 💰 Önerilen Stake (1000 TL bank): {stake:.2f} TL")
|
||||
@@ -0,0 +1,415 @@
|
||||
"""
|
||||
Value Detection Engine
|
||||
======================
|
||||
The Smart Way to Beat the Bookmakers
|
||||
|
||||
This engine doesn't just predict winners - it finds VALUE.
|
||||
The key insight: We don't need to predict the winner, we need to find
|
||||
where the bookmaker made a mistake in their odds.
|
||||
|
||||
Core Philosophy:
|
||||
- High Margin = High Uncertainty = Potential Value
|
||||
- Model Probability > Implied Probability = Value Bet
|
||||
- The goal is NOT to predict correctly, but to find +EV bets
|
||||
|
||||
Author: AI Engine V21
|
||||
"""
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValueBet:
|
||||
"""Represents a value bet opportunity"""
|
||||
outcome: str # "1", "X", "2"
|
||||
model_probability: float # Our model's probability (0-1)
|
||||
implied_probability: float # Bookmaker's implied probability (0-1)
|
||||
odds: float # Bookmaker's odds
|
||||
edge: float # model_prob - implied_prob (as percentage)
|
||||
expected_value: float # EV = (prob * odds) - 1
|
||||
kelly_fraction: float # Optimal bet size
|
||||
confidence: str # "HIGH", "MEDIUM", "LOW"
|
||||
reasons: List[str] # Why this is value
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"outcome": self.outcome,
|
||||
"model_prob": round(self.model_probability * 100, 1),
|
||||
"implied_prob": round(self.implied_probability * 100, 1),
|
||||
"odds": self.odds,
|
||||
"edge": round(self.edge * 100, 1),
|
||||
"ev": round(self.expected_value * 100, 1),
|
||||
"kelly": round(self.kelly_fraction * 100, 1),
|
||||
"confidence": self.confidence,
|
||||
"reasons": self.reasons
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarginAnalysis:
|
||||
"""Analysis of bookmaker margin"""
|
||||
raw_margin: float # Sum of raw implied probabilities - 1
|
||||
true_margin: float # Adjusted for favorite-longshot bias
|
||||
favorite_outcome: str
|
||||
favorite_odds: float
|
||||
uncertainty_level: str # "LOW", "MEDIUM", "HIGH", "EXTREME"
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"raw_margin": round(self.raw_margin * 100, 1),
|
||||
"true_margin": round(self.true_margin * 100, 1),
|
||||
"favorite": self.favorite_outcome,
|
||||
"favorite_odds": self.favorite_odds,
|
||||
"uncertainty": self.uncertainty_level
|
||||
}
|
||||
|
||||
|
||||
class ValueDetectionEngine:
|
||||
"""
|
||||
The Smart Betting Engine
|
||||
|
||||
This engine finds value bets by comparing model probabilities
|
||||
with bookmaker implied probabilities.
|
||||
|
||||
Key Insights:
|
||||
1. Margin > 18% → Bookmaker is unsure, potential value on underdog
|
||||
2. Margin > 20% → Bookmaker sees high risk, BIG potential value
|
||||
3. Favorite odds 1.40-1.60 → Highest upset rate historically
|
||||
4. Away favorites have higher upset rate than home favorites
|
||||
"""
|
||||
|
||||
# Historical upset rates by favorite odds range
|
||||
UPSET_RATES = {
|
||||
(1.00, 1.25): 0.08, # 8% upset rate
|
||||
(1.25, 1.40): 0.18, # 18% upset rate
|
||||
(1.40, 1.60): 0.33, # 33% upset rate - DANGER ZONE
|
||||
(1.60, 1.80): 0.28, # 28% upset rate
|
||||
(1.80, 2.00): 0.35, # 35% upset rate
|
||||
(2.00, 2.50): 0.42, # 42% upset rate
|
||||
(2.50, 3.00): 0.45, # 45% upset rate
|
||||
(3.00, 5.00): 0.55, # 55% upset rate
|
||||
}
|
||||
|
||||
# Margin thresholds
|
||||
MARGIN_LOW = 0.06 # 6% - bookmaker very confident
|
||||
MARGIN_MEDIUM = 0.12 # 12% - normal margin
|
||||
MARGIN_HIGH = 0.18 # 18% - bookmaker unsure
|
||||
MARGIN_EXTREME = 0.22 # 22% - bookmaker very unsure
|
||||
|
||||
def __init__(self):
|
||||
self.historical_data = [] # For learning
|
||||
self.value_threshold = 0.03 # Minimum 3% edge to consider value
|
||||
|
||||
def calculate_margin(self, odds_1: float, odds_x: float, odds_2: float) -> MarginAnalysis:
|
||||
"""
|
||||
Calculate bookmaker margin and analyze uncertainty.
|
||||
|
||||
Higher margin = More uncertainty = More potential value
|
||||
"""
|
||||
if not all([odds_1 > 1, odds_x > 1, odds_2 > 1]):
|
||||
return MarginAnalysis(0, 0, "X", 0, "UNKNOWN")
|
||||
|
||||
# Raw implied probabilities
|
||||
imp_1 = 1 / odds_1
|
||||
imp_x = 1 / odds_x
|
||||
imp_2 = 1 / odds_2
|
||||
|
||||
raw_margin = imp_1 + imp_x + imp_2 - 1
|
||||
|
||||
# Determine favorite
|
||||
if odds_1 <= odds_x and odds_1 <= odds_2:
|
||||
favorite_outcome = "1"
|
||||
favorite_odds = odds_1
|
||||
elif odds_2 <= odds_1 and odds_2 <= odds_x:
|
||||
favorite_outcome = "2"
|
||||
favorite_odds = odds_2
|
||||
else:
|
||||
favorite_outcome = "X"
|
||||
favorite_odds = odds_x
|
||||
|
||||
# Adjust for favorite-longshot bias
|
||||
# Bookmakers typically overprice longshots
|
||||
true_margin = raw_margin * 0.85 # Simplified adjustment
|
||||
|
||||
# Determine uncertainty level
|
||||
if raw_margin < self.MARGIN_LOW:
|
||||
uncertainty = "LOW"
|
||||
elif raw_margin < self.MARGIN_MEDIUM:
|
||||
uncertainty = "MEDIUM"
|
||||
elif raw_margin < self.MARGIN_HIGH:
|
||||
uncertainty = "HIGH"
|
||||
else:
|
||||
uncertainty = "EXTREME"
|
||||
|
||||
return MarginAnalysis(
|
||||
raw_margin=raw_margin,
|
||||
true_margin=true_margin,
|
||||
favorite_outcome=favorite_outcome,
|
||||
favorite_odds=favorite_odds,
|
||||
uncertainty_level=uncertainty
|
||||
)
|
||||
|
||||
def get_historical_upset_rate(self, favorite_odds: float) -> float:
|
||||
"""Get historical upset rate for given favorite odds"""
|
||||
for (low, high), rate in self.UPSET_RATES.items():
|
||||
if low <= favorite_odds < high:
|
||||
return rate
|
||||
return 0.40 # Default for very high odds
|
||||
|
||||
def calculate_edge(
|
||||
self,
|
||||
model_prob: float,
|
||||
odds: float,
|
||||
margin: float
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculate the edge (advantage) we have over the bookmaker.
|
||||
|
||||
Returns: (edge, expected_value)
|
||||
|
||||
Edge = Model Probability - True Implied Probability
|
||||
EV = (Probability * Odds) - 1
|
||||
"""
|
||||
if odds <= 1:
|
||||
return 0, -1
|
||||
|
||||
# Raw implied probability
|
||||
implied = 1 / odds
|
||||
|
||||
# Adjust for margin (proportional adjustment)
|
||||
# This gives us the "true" implied probability
|
||||
# Assuming bookmaker spreads margin proportionally
|
||||
true_implied = implied # Simplified - could be more sophisticated
|
||||
|
||||
edge = model_prob - true_implied
|
||||
ev = (model_prob * odds) - 1
|
||||
|
||||
return edge, ev
|
||||
|
||||
def calculate_kelly_fraction(
|
||||
self,
|
||||
probability: float,
|
||||
odds: float,
|
||||
half_kelly: bool = True
|
||||
) -> float:
|
||||
"""
|
||||
Calculate optimal bet size using Kelly Criterion.
|
||||
|
||||
Kelly = (p * b - 1) / (b - 1)
|
||||
where b = odds - 1
|
||||
|
||||
We use half Kelly for safety.
|
||||
"""
|
||||
if odds <= 1:
|
||||
return 0
|
||||
|
||||
b = odds - 1
|
||||
kelly = (probability * b - 1) / b
|
||||
|
||||
# Don't bet if negative
|
||||
if kelly < 0:
|
||||
return 0
|
||||
|
||||
# Use half Kelly for safety
|
||||
if half_kelly:
|
||||
kelly = kelly / 2
|
||||
|
||||
# Cap at 10% of bankroll
|
||||
return min(kelly, 0.10)
|
||||
|
||||
def find_value_bets(
|
||||
self,
|
||||
model_probs: Dict[str, float],
|
||||
odds: Dict[str, float],
|
||||
match_context: Optional[Dict] = None
|
||||
) -> List[ValueBet]:
|
||||
"""
|
||||
Find all value bets in a match.
|
||||
|
||||
This is the MAIN method - it finds where we have an edge.
|
||||
|
||||
Args:
|
||||
model_probs: {"1": 0.55, "X": 0.25, "2": 0.20}
|
||||
odds: {"1": 1.25, "X": 4.50, "2": 8.00}
|
||||
match_context: Additional context (form, h2h, etc.)
|
||||
|
||||
Returns:
|
||||
List of ValueBet objects, sorted by edge
|
||||
"""
|
||||
value_bets = []
|
||||
|
||||
# Calculate margin
|
||||
margin_analysis = self.calculate_margin(
|
||||
odds.get("1", 0),
|
||||
odds.get("X", 0),
|
||||
odds.get("2", 0)
|
||||
)
|
||||
|
||||
# Analyze each outcome
|
||||
for outcome in ["1", "X", "2"]:
|
||||
prob = model_probs.get(outcome, 0)
|
||||
odd = odds.get(outcome, 0)
|
||||
|
||||
if prob <= 0 or odd <= 1:
|
||||
continue
|
||||
|
||||
edge, ev = self.calculate_edge(prob, odd, margin_analysis.raw_margin)
|
||||
kelly = self.calculate_kelly_fraction(prob, odd)
|
||||
|
||||
# Determine if this is a value bet
|
||||
reasons = []
|
||||
|
||||
# 1. Basic edge
|
||||
if edge > self.value_threshold:
|
||||
reasons.append(f"Edge: +{round(edge*100, 1)}% over bookmaker")
|
||||
|
||||
# 2. High margin bonus
|
||||
if margin_analysis.raw_margin > self.MARGIN_HIGH:
|
||||
reasons.append(f"High margin ({round(margin_analysis.raw_margin*100, 1)}%) = uncertainty")
|
||||
|
||||
# Boost edge for underdogs in high margin matches
|
||||
if outcome != margin_analysis.favorite_outcome:
|
||||
edge += 0.02 # 2% bonus
|
||||
reasons.append("Underdog in high-margin match = bonus value")
|
||||
|
||||
# 3. Favorite odds trap
|
||||
fav_odds = margin_analysis.favorite_odds
|
||||
if margin_analysis.favorite_outcome != outcome:
|
||||
upset_rate = self.get_historical_upset_rate(fav_odds)
|
||||
if upset_rate > 0.25:
|
||||
reasons.append(f"Favorite odds {fav_odds} has {round(upset_rate*100)}% upset rate")
|
||||
|
||||
# Extra bonus for 1.40-1.60 range
|
||||
if 1.40 <= fav_odds <= 1.60:
|
||||
edge += 0.03
|
||||
reasons.append("DANGER ZONE: 1.40-1.60 odds = highest upset risk")
|
||||
|
||||
# 4. Away favorite risk
|
||||
if margin_analysis.favorite_outcome == "2" and outcome == "1":
|
||||
edge += 0.015
|
||||
reasons.append("Away favorite = extra home value")
|
||||
|
||||
# 5. EV positive
|
||||
if ev > 0:
|
||||
reasons.append(f"Positive EV: +{round(ev*100, 1)}%")
|
||||
|
||||
# Only add if we have reasons (value detected)
|
||||
if reasons and edge > 0:
|
||||
# Determine confidence
|
||||
if edge > 0.08 or (edge > 0.05 and kelly > 0.03):
|
||||
confidence = "HIGH"
|
||||
elif edge > 0.05:
|
||||
confidence = "MEDIUM"
|
||||
else:
|
||||
confidence = "LOW"
|
||||
|
||||
value_bets.append(ValueBet(
|
||||
outcome=outcome,
|
||||
model_probability=prob,
|
||||
implied_probability=1/odd,
|
||||
odds=odd,
|
||||
edge=edge,
|
||||
expected_value=ev,
|
||||
kelly_fraction=kelly,
|
||||
confidence=confidence,
|
||||
reasons=reasons
|
||||
))
|
||||
|
||||
# Sort by edge (highest first)
|
||||
value_bets.sort(key=lambda x: x.edge, reverse=True)
|
||||
|
||||
return value_bets
|
||||
|
||||
def predict_with_value(
|
||||
self,
|
||||
model_probs: Dict[str, float],
|
||||
odds: Dict[str, float],
|
||||
match_context: Optional[Dict] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Make a prediction based on VALUE, not just probability.
|
||||
|
||||
This is the smart way to bet:
|
||||
- If there's clear value on one outcome → Bet it
|
||||
- If there's no value → NO BET (don't force it)
|
||||
- If margin is extreme → Look for underdog value
|
||||
|
||||
Returns:
|
||||
{
|
||||
"best_value": ValueBet or None,
|
||||
"alternative_value": ValueBet or None,
|
||||
"margin_analysis": MarginAnalysis,
|
||||
"recommendation": str,
|
||||
"confidence": str
|
||||
}
|
||||
"""
|
||||
margin_analysis = self.calculate_margin(
|
||||
odds.get("1", 0),
|
||||
odds.get("X", 0),
|
||||
odds.get("2", 0)
|
||||
)
|
||||
|
||||
value_bets = self.find_value_bets(model_probs, odds, match_context)
|
||||
|
||||
result = {
|
||||
"margin_analysis": margin_analysis.to_dict(),
|
||||
"value_bets": [vb.to_dict() for vb in value_bets],
|
||||
"best_value": None,
|
||||
"alternative_value": None,
|
||||
"recommendation": "NO_BET",
|
||||
"confidence": "LOW",
|
||||
"reasoning": []
|
||||
}
|
||||
|
||||
if not value_bets:
|
||||
result["reasoning"].append("No value detected in any outcome")
|
||||
result["reasoning"].append("Bookmaker odds are efficient for this match")
|
||||
return result
|
||||
|
||||
# Get best value bet
|
||||
best = value_bets[0]
|
||||
result["best_value"] = best.to_dict()
|
||||
|
||||
if len(value_bets) > 1:
|
||||
result["alternative_value"] = value_bets[1].to_dict()
|
||||
|
||||
# Determine recommendation
|
||||
if best.confidence == "HIGH" and best.edge > 0.05:
|
||||
result["recommendation"] = f"BET_{best.outcome}"
|
||||
result["confidence"] = "HIGH"
|
||||
result["reasoning"] = best.reasons
|
||||
result["reasoning"].append(f"Strong value on {best.outcome} with {round(best.edge*100, 1)}% edge")
|
||||
|
||||
elif best.confidence == "MEDIUM" or best.edge > 0.03:
|
||||
result["recommendation"] = f"CONSIDER_{best.outcome}"
|
||||
result["confidence"] = "MEDIUM"
|
||||
result["reasoning"] = best.reasons
|
||||
result["reasoning"].append(f"Moderate value on {best.outcome}")
|
||||
|
||||
else:
|
||||
result["recommendation"] = "NO_BET"
|
||||
result["confidence"] = "LOW"
|
||||
result["reasoning"].append("Edge too small to justify bet")
|
||||
result["reasoning"].append(f"Best edge: {round(best.edge*100, 1)}% (need >3%)")
|
||||
|
||||
# Add margin context
|
||||
if margin_analysis.uncertainty_level == "EXTREME":
|
||||
result["reasoning"].append("⚠️ EXTREME margin - high volatility match")
|
||||
elif margin_analysis.uncertainty_level == "HIGH":
|
||||
result["reasoning"].append("⚠️ High margin - bookmaker sees risk")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_engine_instance = None
|
||||
|
||||
def get_value_detection_engine() -> ValueDetectionEngine:
|
||||
"""Get the singleton instance"""
|
||||
global _engine_instance
|
||||
if _engine_instance is None:
|
||||
_engine_instance = ValueDetectionEngine()
|
||||
return _engine_instance
|
||||
@@ -0,0 +1,167 @@
|
||||
"""
|
||||
Shared VQWEN feature contract
|
||||
=============================
|
||||
|
||||
One place defines how VQWEN features are produced.
|
||||
Both training and runtime inference must use this module so the model sees
|
||||
the same feature semantics in historical data and live analysis.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
|
||||
FEATURE_COLUMNS = [
|
||||
"elo_diff",
|
||||
"h_xg",
|
||||
"a_xg",
|
||||
"total_xg",
|
||||
"pow_diff",
|
||||
"rest_diff",
|
||||
"h_fat",
|
||||
"a_fat",
|
||||
"imp_h",
|
||||
"imp_d",
|
||||
"imp_a",
|
||||
"h_xi",
|
||||
"a_xi",
|
||||
"h2h_h_wr",
|
||||
"form_diff",
|
||||
]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class VqwenFeatureInput:
|
||||
home_elo: float
|
||||
away_elo: float
|
||||
home_avg_goals_scored: float
|
||||
away_avg_goals_scored: float
|
||||
home_avg_goals_conceded: float
|
||||
away_avg_goals_conceded: float
|
||||
home_avg_shots_on_target: float
|
||||
away_avg_shots_on_target: float
|
||||
home_avg_possession: float
|
||||
away_avg_possession: float
|
||||
home_rest_days: float
|
||||
away_rest_days: float
|
||||
implied_prob_home: float
|
||||
implied_prob_draw: float
|
||||
implied_prob_away: float
|
||||
home_lineup_availability: float = 1.0
|
||||
away_lineup_availability: float = 1.0
|
||||
h2h_home_win_rate: float = 0.5
|
||||
home_form_score: float = 0.0
|
||||
away_form_score: float = 0.0
|
||||
league_avg_goals: float = 2.6
|
||||
referee_avg_goals: float = 2.6
|
||||
referee_home_bias: float = 0.0
|
||||
home_squad_strength: float = 0.5
|
||||
away_squad_strength: float = 0.5
|
||||
home_key_players: float = 0.0
|
||||
away_key_players: float = 0.0
|
||||
missing_players_impact: float = 0.0
|
||||
|
||||
|
||||
def fatigue_multiplier(rest_days: float) -> float:
|
||||
if rest_days < 3.0:
|
||||
return 0.85
|
||||
if rest_days < 5.0:
|
||||
return 0.95
|
||||
return 1.0
|
||||
|
||||
|
||||
def clamp(value: float, lower: float, upper: float) -> float:
|
||||
return min(max(float(value), lower), upper)
|
||||
|
||||
|
||||
def build_vqwen_feature_row(values: VqwenFeatureInput) -> dict[str, float]:
|
||||
home_fatigue = fatigue_multiplier(values.home_rest_days)
|
||||
away_fatigue = fatigue_multiplier(values.away_rest_days)
|
||||
goal_environment = (
|
||||
float(values.league_avg_goals) + float(values.referee_avg_goals)
|
||||
) / 2.0
|
||||
goal_environment_multiplier = clamp(goal_environment / 2.6, 0.85, 1.2)
|
||||
squad_diff = float(values.home_squad_strength) - float(values.away_squad_strength)
|
||||
key_player_diff = float(values.home_key_players) - float(values.away_key_players)
|
||||
missing_penalty = clamp(float(values.missing_players_impact), 0.0, 1.0)
|
||||
referee_bias = clamp(float(values.referee_home_bias), -0.25, 0.25)
|
||||
home_squad_multiplier = clamp(
|
||||
1.0 + squad_diff * 0.08 + key_player_diff * 0.025 - missing_penalty * 0.08 + referee_bias * 0.03,
|
||||
0.82,
|
||||
1.18,
|
||||
)
|
||||
away_squad_multiplier = clamp(
|
||||
1.0 - squad_diff * 0.08 - key_player_diff * 0.025 - missing_penalty * 0.08 - referee_bias * 0.03,
|
||||
0.82,
|
||||
1.18,
|
||||
)
|
||||
|
||||
home_xg = max(
|
||||
0.05,
|
||||
(
|
||||
float(values.home_avg_goals_scored)
|
||||
+ float(values.away_avg_goals_conceded)
|
||||
)
|
||||
/ 2.0,
|
||||
) * home_fatigue * goal_environment_multiplier * home_squad_multiplier
|
||||
away_xg = max(
|
||||
0.05,
|
||||
(
|
||||
float(values.away_avg_goals_scored)
|
||||
+ float(values.home_avg_goals_conceded)
|
||||
)
|
||||
/ 2.0,
|
||||
) * away_fatigue * goal_environment_multiplier * away_squad_multiplier
|
||||
|
||||
home_power = (
|
||||
float(values.home_avg_goals_scored) * 5.0
|
||||
- float(values.home_avg_goals_conceded) * 5.0
|
||||
+ float(values.home_avg_shots_on_target) * 2.0
|
||||
+ float(values.home_avg_possession) * 0.1
|
||||
+ float(values.home_squad_strength) * 3.0
|
||||
+ float(values.home_key_players) * 0.8
|
||||
+ referee_bias * 6.0
|
||||
)
|
||||
away_power = (
|
||||
float(values.away_avg_goals_scored) * 5.0
|
||||
- float(values.away_avg_goals_conceded) * 5.0
|
||||
+ float(values.away_avg_shots_on_target) * 2.0
|
||||
+ float(values.away_avg_possession) * 0.1
|
||||
+ float(values.away_squad_strength) * 3.0
|
||||
+ float(values.away_key_players) * 0.8
|
||||
- referee_bias * 6.0
|
||||
)
|
||||
|
||||
return {
|
||||
"elo_diff": float(values.home_elo) - float(values.away_elo),
|
||||
"h_xg": home_xg,
|
||||
"a_xg": away_xg,
|
||||
"total_xg": home_xg + away_xg,
|
||||
"pow_diff": home_power - away_power,
|
||||
"rest_diff": float(values.home_rest_days) - float(values.away_rest_days),
|
||||
"h_fat": home_fatigue,
|
||||
"a_fat": away_fatigue,
|
||||
"imp_h": clamp(values.implied_prob_home, 0.01, 0.98),
|
||||
"imp_d": clamp(values.implied_prob_draw, 0.01, 0.98),
|
||||
"imp_a": clamp(values.implied_prob_away, 0.01, 0.98),
|
||||
# Column names are preserved for artifact compatibility.
|
||||
# Semantics are now "pre-match lineup availability" instead of leaked
|
||||
# post-match starting-XI counts.
|
||||
"h_xi": clamp(values.home_lineup_availability, 0.0, 1.0),
|
||||
"a_xi": clamp(values.away_lineup_availability, 0.0, 1.0),
|
||||
"h2h_h_wr": clamp(values.h2h_home_win_rate, 0.0, 1.0),
|
||||
"form_diff": (
|
||||
float(values.home_form_score)
|
||||
- float(values.away_form_score)
|
||||
+ squad_diff * 1.5
|
||||
+ key_player_diff * 0.35
|
||||
+ referee_bias * 2.0
|
||||
- missing_penalty * 1.75
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def row_to_array(row: dict[str, float]) -> np.ndarray:
|
||||
return np.array([[float(row[column]) for column in FEATURE_COLUMNS]], dtype=np.float64)
|
||||
Reference in New Issue
Block a user