fix(ai-engine): remove target leakage from training data extraction
Deploy Iddaai Backend / build-and-deploy (push) Successful in 6s
Deploy Iddaai Backend / build-and-deploy (push) Successful in 6s
- goals_form now uses avg of last 5 historical matches instead of current match goals - squad_quality removes current match goals/assists, uses only pre-match known data - adds temporal filtering via match_id -> mst_utc mapping
This commit is contained in:
@@ -424,12 +424,18 @@ class BatchDataLoader:
|
||||
for mid, tid, pid in self.cur.fetchall():
|
||||
starting_players[(mid, tid)].append(pid)
|
||||
|
||||
# 5) Build combined cache
|
||||
# 5) Build match_id → mst_utc mapping for temporal filtering
|
||||
match_mst = {}
|
||||
for m in self.matches:
|
||||
match_mst[m[0]] = m[7] # m[0]=id, m[7]=mst_utc
|
||||
|
||||
# 6) Build combined cache — NO DATA LEAKAGE
|
||||
# goals_form: avg goals from last 5 matches BEFORE this match (not this match!)
|
||||
# squad_quality: only uses pre-match info (lineup, key players) — no current-match goals/assists
|
||||
all_keys = set(participation.keys()) | set(events.keys())
|
||||
for key in all_keys:
|
||||
mid, tid = key
|
||||
part = participation.get(key, {'starting_count': 0, 'total_squad': 0, 'fwd_count': 0})
|
||||
evt = events.get(key, {'goals': 0, 'assists': 0, 'unique_scorers': 0})
|
||||
|
||||
# Count key players in starting XI
|
||||
starters = starting_players.get(key, [])
|
||||
@@ -437,22 +443,30 @@ class BatchDataLoader:
|
||||
kp_total = len(key_players_by_team.get(tid, set()))
|
||||
kp_missing = max(0, kp_total - kp_in_starting)
|
||||
|
||||
# Squad quality: composite score
|
||||
# Squad quality: composite score — ONLY pre-match info (no current-match goals/assists!)
|
||||
squad_quality = (
|
||||
part['starting_count'] * 0.3 +
|
||||
evt['goals'] * 2.0 +
|
||||
evt['assists'] * 1.0 +
|
||||
kp_in_starting * 3.0 +
|
||||
part['fwd_count'] * 1.5
|
||||
)
|
||||
# Missing impact: how many key players are missing
|
||||
missing_impact = min(kp_missing / max(kp_total, 1), 1.0)
|
||||
|
||||
# goals_form: avg goals from last 5 matches BEFORE this match
|
||||
current_mst = match_mst.get(mid, 0)
|
||||
team_history = self.team_matches.get(tid, [])
|
||||
recent_goals = [
|
||||
tm[2] # team_score
|
||||
for tm in team_history
|
||||
if tm[0] < current_mst # only matches BEFORE this one
|
||||
][-5:] # last 5
|
||||
goals_form = sum(recent_goals) / len(recent_goals) if recent_goals else 1.3
|
||||
|
||||
self.squad_cache[key] = {
|
||||
'squad_quality': squad_quality,
|
||||
'key_players': kp_in_starting,
|
||||
'missing_impact': missing_impact,
|
||||
'goals_form': evt['goals'],
|
||||
'goals_form': round(goals_form, 2),
|
||||
}
|
||||
|
||||
def _load_cards_data(self):
|
||||
|
||||
Reference in New Issue
Block a user