feat(ai-engine): value sniper thresholds and logic relaxed

2026-05-06 17:44:45 +03:00
parent 5b5f83c8cf
commit 4f7090e2d9
13 changed files with 2040 additions and 382 deletions
@@ -14,11 +14,40 @@ is missing or queries fail.

 from __future__ import annotations

+import unicodedata
 from typing import Any, Dict, Optional, Tuple

 from psycopg2.extras import RealDictCursor


+# ─── Turkish Name Normalization ──────────────────────────────────
+
+_TR_CHAR_MAP = str.maketrans(
+    'çÇğĞıİöÖşŞüÜâÂîÎûÛ',
+    'cCgGiIoOsSuUaAiIuU',
+)
+
+
+def _normalize_name(name: str) -> str:
+    """
+    Normalize a Turkish referee name for fuzzy matching.
+
+    Strips accents, lowercases, removes extra whitespace, and maps
+    Turkish-specific characters to their ASCII equivalents.
+    """
+    if not name:
+        return ''
+    # 1. Turkish-specific character mapping
+    normalized = name.translate(_TR_CHAR_MAP)
+    # 2. Unicode NFKD decomposition → strip combining marks
+    normalized = unicodedata.normalize('NFKD', normalized)
+    normalized = ''.join(
+        c for c in normalized if not unicodedata.combining(c)
+    )
+    # 3. Lowercase + collapse whitespace
+    return ' '.join(normalized.lower().split())
+
+
 class FeatureEnrichmentService:
    """Stateless service — all state comes from DB via cursor."""

@@ -380,34 +409,20 @@ class FeatureEnrichmentService:
        """
        Referee tendencies: home win bias, avg goals, card rates.
        Matches referee by name in match_officials (role_id=1 = Orta Hakem).
+
+        Uses Turkish-aware fuzzy matching as a fallback when exact name
+        lookup returns zero results.
        """
        if not referee_name:
            return dict(self._DEFAULT_REFEREE)
-        try:
-            # Get match IDs officiated by this referee
-            cur.execute(
-                """
-                SELECT
-                    m.home_team_id,
-                    m.score_home,
-                    m.score_away,
-                    m.id AS match_id
-                FROM match_officials mo
-                JOIN matches m ON m.id = mo.match_id
-                WHERE mo.name = %s
-                  AND mo.role_id = 1
-                  AND m.status = 'FT'
-                  AND m.score_home IS NOT NULL
-                  AND m.score_away IS NOT NULL
-                  AND m.mst_utc < %s
-                ORDER BY m.mst_utc DESC
-                LIMIT %s
-                """,
-                (referee_name, before_date_ms, limit),
+
+        rows = self._query_referee_matches(cur, referee_name, before_date_ms, limit)
+
+        # Fuzzy fallback: if exact match fails, try normalized name search
+        if not rows:
+            rows = self._fuzzy_referee_lookup(
+                cur, referee_name, before_date_ms, limit,
            )
-            rows = cur.fetchall()
-        except Exception:
-            return dict(self._DEFAULT_REFEREE)

        if not rows:
            return dict(self._DEFAULT_REFEREE)
@@ -459,6 +474,118 @@ class FeatureEnrichmentService:
            'experience': total,
        }

+    def _query_referee_matches(
+        self,
+        cur: RealDictCursor,
+        referee_name: str,
+        before_date_ms: int,
+        limit: int,
+    ) -> list:
+        """Exact-match referee lookup in match_officials."""
+        try:
+            cur.execute(
+                """
+                SELECT
+                    m.home_team_id,
+                    m.score_home,
+                    m.score_away,
+                    m.id AS match_id
+                FROM match_officials mo
+                JOIN matches m ON m.id = mo.match_id
+                WHERE mo.name = %s
+                  AND mo.role_id = 1
+                  AND m.status = 'FT'
+                  AND m.score_home IS NOT NULL
+                  AND m.score_away IS NOT NULL
+                  AND m.mst_utc < %s
+                ORDER BY m.mst_utc DESC
+                LIMIT %s
+                """,
+                (referee_name, before_date_ms, limit),
+            )
+            return cur.fetchall()
+        except Exception:
+            return []
+
+    def _fuzzy_referee_lookup(
+        self,
+        cur: RealDictCursor,
+        referee_name: str,
+        before_date_ms: int,
+        limit: int,
+    ) -> list:
+        """
+        Fuzzy referee lookup using Turkish name normalization.
+
+        Strategy: fetch recent distinct referee names from match_officials,
+        normalize both the query name and each candidate, and pick the
+        best match.  This handles common mismatches like:
+          - 'Hüseyin Göçek' vs 'Huseyin Gocek'
+          - 'Ali Palabıyık' vs 'Ali Palabiyik'
+          - Extra/missing middle initials
+        """
+        normalized_query = _normalize_name(referee_name)
+        if not normalized_query:
+            return []
+
+        try:
+            # Fetch candidate referee names (distinct, recent, role=1)
+            cur.execute(
+                """
+                SELECT DISTINCT mo.name
+                FROM match_officials mo
+                JOIN matches m ON m.id = mo.match_id
+                WHERE mo.role_id = 1
+                  AND m.status = 'FT'
+                  AND m.mst_utc < %s
+                ORDER BY mo.name
+                LIMIT 2000
+                """,
+                (before_date_ms,),
+            )
+            candidates = cur.fetchall()
+        except Exception:
+            return []
+
+        if not candidates:
+            return []
+
+        # Find best match by normalized name comparison
+        best_match: Optional[str] = None
+        best_score = 0.0
+
+        for cand_row in candidates:
+            cand_name = cand_row.get('name', '')
+            if not cand_name:
+                continue
+            normalized_cand = _normalize_name(cand_name)
+
+            # Exact normalized match
+            if normalized_cand == normalized_query:
+                best_match = cand_name
+                best_score = 1.0
+                break
+
+            # Substring containment (handles "First Last" vs "First M. Last")
+            if (
+                normalized_query in normalized_cand
+                or normalized_cand in normalized_query
+            ):
+                containment_score = min(
+                    len(normalized_query), len(normalized_cand)
+                ) / max(len(normalized_query), len(normalized_cand))
+                if containment_score > best_score and containment_score > 0.6:
+                    best_match = cand_name
+                    best_score = containment_score
+
+        if not best_match:
+            return []
+
+        # Re-query with the resolved name
+        return self._query_referee_matches(
+            cur, best_match, before_date_ms, limit,
+        )
+
    # ─── 5. League Averages ─────────────────────────────────────────

    def compute_league_averages(