feat(ai-engine): value sniper thresholds and logic relaxed

This commit is contained in:
2026-05-06 17:44:45 +03:00
parent 5b5f83c8cf
commit 4f7090e2d9
13 changed files with 2040 additions and 382 deletions
+151 -24
View File
@@ -14,11 +14,40 @@ is missing or queries fail.
from __future__ import annotations
import unicodedata
from typing import Any, Dict, Optional, Tuple
from psycopg2.extras import RealDictCursor
# ─── Turkish Name Normalization ──────────────────────────────────
_TR_CHAR_MAP = str.maketrans(
'çÇğĞıİöÖşŞüÜâÂîÎûÛ',
'cCgGiIoOsSuUaAiIuU',
)
def _normalize_name(name: str) -> str:
"""
Normalize a Turkish referee name for fuzzy matching.
Strips accents, lowercases, removes extra whitespace, and maps
Turkish-specific characters to their ASCII equivalents.
"""
if not name:
return ''
# 1. Turkish-specific character mapping
normalized = name.translate(_TR_CHAR_MAP)
# 2. Unicode NFKD decomposition → strip combining marks
normalized = unicodedata.normalize('NFKD', normalized)
normalized = ''.join(
c for c in normalized if not unicodedata.combining(c)
)
# 3. Lowercase + collapse whitespace
return ' '.join(normalized.lower().split())
class FeatureEnrichmentService:
"""Stateless service — all state comes from DB via cursor."""
@@ -380,34 +409,20 @@ class FeatureEnrichmentService:
"""
Referee tendencies: home win bias, avg goals, card rates.
Matches referee by name in match_officials (role_id=1 = Orta Hakem).
Uses Turkish-aware fuzzy matching as a fallback when exact name
lookup returns zero results.
"""
if not referee_name:
return dict(self._DEFAULT_REFEREE)
try:
# Get match IDs officiated by this referee
cur.execute(
"""
SELECT
m.home_team_id,
m.score_home,
m.score_away,
m.id AS match_id
FROM match_officials mo
JOIN matches m ON m.id = mo.match_id
WHERE mo.name = %s
AND mo.role_id = 1
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(referee_name, before_date_ms, limit),
rows = self._query_referee_matches(cur, referee_name, before_date_ms, limit)
# Fuzzy fallback: if exact match fails, try normalized name search
if not rows:
rows = self._fuzzy_referee_lookup(
cur, referee_name, before_date_ms, limit,
)
rows = cur.fetchall()
except Exception:
return dict(self._DEFAULT_REFEREE)
if not rows:
return dict(self._DEFAULT_REFEREE)
@@ -459,6 +474,118 @@ class FeatureEnrichmentService:
'experience': total,
}
def _query_referee_matches(
self,
cur: RealDictCursor,
referee_name: str,
before_date_ms: int,
limit: int,
) -> list:
"""Exact-match referee lookup in match_officials."""
try:
cur.execute(
"""
SELECT
m.home_team_id,
m.score_home,
m.score_away,
m.id AS match_id
FROM match_officials mo
JOIN matches m ON m.id = mo.match_id
WHERE mo.name = %s
AND mo.role_id = 1
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(referee_name, before_date_ms, limit),
)
return cur.fetchall()
except Exception:
return []
def _fuzzy_referee_lookup(
self,
cur: RealDictCursor,
referee_name: str,
before_date_ms: int,
limit: int,
) -> list:
"""
Fuzzy referee lookup using Turkish name normalization.
Strategy: fetch recent distinct referee names from match_officials,
normalize both the query name and each candidate, and pick the
best match. This handles common mismatches like:
- 'Hüseyin Göçek' vs 'Huseyin Gocek'
- 'Ali Palabıyık' vs 'Ali Palabiyik'
- Extra/missing middle initials
"""
normalized_query = _normalize_name(referee_name)
if not normalized_query:
return []
try:
# Fetch candidate referee names (distinct, recent, role=1)
cur.execute(
"""
SELECT DISTINCT mo.name
FROM match_officials mo
JOIN matches m ON m.id = mo.match_id
WHERE mo.role_id = 1
AND m.status = 'FT'
AND m.mst_utc < %s
ORDER BY mo.name
LIMIT 2000
""",
(before_date_ms,),
)
candidates = cur.fetchall()
except Exception:
return []
if not candidates:
return []
# Find best match by normalized name comparison
best_match: Optional[str] = None
best_score = 0.0
for cand_row in candidates:
cand_name = cand_row.get('name', '')
if not cand_name:
continue
normalized_cand = _normalize_name(cand_name)
# Exact normalized match
if normalized_cand == normalized_query:
best_match = cand_name
best_score = 1.0
break
# Substring containment (handles "First Last" vs "First M. Last")
if (
normalized_query in normalized_cand
or normalized_cand in normalized_query
):
containment_score = min(
len(normalized_query), len(normalized_cand)
) / max(len(normalized_query), len(normalized_cand))
if containment_score > best_score and containment_score > 0.6:
best_match = cand_name
best_score = containment_score
if not best_match:
return []
# Re-query with the resolved name
return self._query_referee_matches(
cur, best_match, before_date_ms, limit,
)
# ─── 5. League Averages ─────────────────────────────────────────
def compute_league_averages(