feat(ai-engine): value sniper thresholds and logic relaxed
This commit is contained in:
@@ -14,11 +14,40 @@ is missing or queries fail.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import unicodedata
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
|
||||
# ─── Turkish Name Normalization ──────────────────────────────────
|
||||
|
||||
_TR_CHAR_MAP = str.maketrans(
|
||||
'çÇğĞıİöÖşŞüÜâÂîÎûÛ',
|
||||
'cCgGiIoOsSuUaAiIuU',
|
||||
)
|
||||
|
||||
|
||||
def _normalize_name(name: str) -> str:
|
||||
"""
|
||||
Normalize a Turkish referee name for fuzzy matching.
|
||||
|
||||
Strips accents, lowercases, removes extra whitespace, and maps
|
||||
Turkish-specific characters to their ASCII equivalents.
|
||||
"""
|
||||
if not name:
|
||||
return ''
|
||||
# 1. Turkish-specific character mapping
|
||||
normalized = name.translate(_TR_CHAR_MAP)
|
||||
# 2. Unicode NFKD decomposition → strip combining marks
|
||||
normalized = unicodedata.normalize('NFKD', normalized)
|
||||
normalized = ''.join(
|
||||
c for c in normalized if not unicodedata.combining(c)
|
||||
)
|
||||
# 3. Lowercase + collapse whitespace
|
||||
return ' '.join(normalized.lower().split())
|
||||
|
||||
|
||||
class FeatureEnrichmentService:
|
||||
"""Stateless service — all state comes from DB via cursor."""
|
||||
|
||||
@@ -380,34 +409,20 @@ class FeatureEnrichmentService:
|
||||
"""
|
||||
Referee tendencies: home win bias, avg goals, card rates.
|
||||
Matches referee by name in match_officials (role_id=1 = Orta Hakem).
|
||||
|
||||
Uses Turkish-aware fuzzy matching as a fallback when exact name
|
||||
lookup returns zero results.
|
||||
"""
|
||||
if not referee_name:
|
||||
return dict(self._DEFAULT_REFEREE)
|
||||
try:
|
||||
# Get match IDs officiated by this referee
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
m.home_team_id,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
m.id AS match_id
|
||||
FROM match_officials mo
|
||||
JOIN matches m ON m.id = mo.match_id
|
||||
WHERE mo.name = %s
|
||||
AND mo.role_id = 1
|
||||
AND m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.mst_utc < %s
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
""",
|
||||
(referee_name, before_date_ms, limit),
|
||||
|
||||
rows = self._query_referee_matches(cur, referee_name, before_date_ms, limit)
|
||||
|
||||
# Fuzzy fallback: if exact match fails, try normalized name search
|
||||
if not rows:
|
||||
rows = self._fuzzy_referee_lookup(
|
||||
cur, referee_name, before_date_ms, limit,
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
except Exception:
|
||||
return dict(self._DEFAULT_REFEREE)
|
||||
|
||||
if not rows:
|
||||
return dict(self._DEFAULT_REFEREE)
|
||||
@@ -459,6 +474,118 @@ class FeatureEnrichmentService:
|
||||
'experience': total,
|
||||
}
|
||||
|
||||
def _query_referee_matches(
|
||||
self,
|
||||
cur: RealDictCursor,
|
||||
referee_name: str,
|
||||
before_date_ms: int,
|
||||
limit: int,
|
||||
) -> list:
|
||||
"""Exact-match referee lookup in match_officials."""
|
||||
try:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
m.home_team_id,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
m.id AS match_id
|
||||
FROM match_officials mo
|
||||
JOIN matches m ON m.id = mo.match_id
|
||||
WHERE mo.name = %s
|
||||
AND mo.role_id = 1
|
||||
AND m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.mst_utc < %s
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
""",
|
||||
(referee_name, before_date_ms, limit),
|
||||
)
|
||||
return cur.fetchall()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
def _fuzzy_referee_lookup(
|
||||
self,
|
||||
cur: RealDictCursor,
|
||||
referee_name: str,
|
||||
before_date_ms: int,
|
||||
limit: int,
|
||||
) -> list:
|
||||
"""
|
||||
Fuzzy referee lookup using Turkish name normalization.
|
||||
|
||||
Strategy: fetch recent distinct referee names from match_officials,
|
||||
normalize both the query name and each candidate, and pick the
|
||||
best match. This handles common mismatches like:
|
||||
- 'Hüseyin Göçek' vs 'Huseyin Gocek'
|
||||
- 'Ali Palabıyık' vs 'Ali Palabiyik'
|
||||
- Extra/missing middle initials
|
||||
"""
|
||||
normalized_query = _normalize_name(referee_name)
|
||||
if not normalized_query:
|
||||
return []
|
||||
|
||||
try:
|
||||
# Fetch candidate referee names (distinct, recent, role=1)
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT DISTINCT mo.name
|
||||
FROM match_officials mo
|
||||
JOIN matches m ON m.id = mo.match_id
|
||||
WHERE mo.role_id = 1
|
||||
AND m.status = 'FT'
|
||||
AND m.mst_utc < %s
|
||||
ORDER BY mo.name
|
||||
LIMIT 2000
|
||||
""",
|
||||
(before_date_ms,),
|
||||
)
|
||||
candidates = cur.fetchall()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
if not candidates:
|
||||
return []
|
||||
|
||||
# Find best match by normalized name comparison
|
||||
best_match: Optional[str] = None
|
||||
best_score = 0.0
|
||||
|
||||
for cand_row in candidates:
|
||||
cand_name = cand_row.get('name', '')
|
||||
if not cand_name:
|
||||
continue
|
||||
normalized_cand = _normalize_name(cand_name)
|
||||
|
||||
# Exact normalized match
|
||||
if normalized_cand == normalized_query:
|
||||
best_match = cand_name
|
||||
best_score = 1.0
|
||||
break
|
||||
|
||||
# Substring containment (handles "First Last" vs "First M. Last")
|
||||
if (
|
||||
normalized_query in normalized_cand
|
||||
or normalized_cand in normalized_query
|
||||
):
|
||||
containment_score = min(
|
||||
len(normalized_query), len(normalized_cand)
|
||||
) / max(len(normalized_query), len(normalized_cand))
|
||||
if containment_score > best_score and containment_score > 0.6:
|
||||
best_match = cand_name
|
||||
best_score = containment_score
|
||||
|
||||
if not best_match:
|
||||
return []
|
||||
|
||||
# Re-query with the resolved name
|
||||
return self._query_referee_matches(
|
||||
cur, best_match, before_date_ms, limit,
|
||||
)
|
||||
|
||||
# ─── 5. League Averages ─────────────────────────────────────────
|
||||
|
||||
def compute_league_averages(
|
||||
|
||||
Reference in New Issue
Block a user