import os import sys import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, TensorDataset import numpy as np import time # Path alignment sys.path.append(os.getcwd()) sys.path.append(os.path.join(os.getcwd(), 'ai-engine')) from models.hybrid_v11 import HybridDeepModel from pipeline.sequence_builder import SequenceBuilder from pipeline.tiered_loader import TieredDataLoader DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') MODEL_PATH = 'ai-engine/models/v11_hybrid_model.pth' LEARNING_RATE = 1e-4 # Lower for fine-tuning EPOCHS = 10 # More focus on small set def fine_tune(): print("🧠 Starting Error-Driven Fine-Tuning (Last 3 Days)...") # 1. Build Sequences builder = SequenceBuilder() X, y, meta = builder.build_sequences() # Current is Jan 27. Filter after Jan 24. # relative to Max Timestamp in meta max_ts = meta['date'].max() three_days_ms = 3 * 24 * 60 * 60 * 1000 filter_ts = max_ts - three_days_ms mask = meta['date'] >= filter_ts X_recent = X[mask] y_recent = y[mask] meta_recent = meta[mask] if len(X_recent) == 0: print("❌ No recent matches found to fine-tune on!") return print(f"✅ Found {len(X_recent)} recent samples for fine-tuning.") # 3. Loader # We need Odds/Context for these loader = TieredDataLoader() # For speed in this script, we'll use average context if full loader is too slow # But let's try to get real context from features.odds_history import OddsHistoryEngine eng = OddsHistoryEngine() # Pre-build context ctx_list = [] print("📊 Building Context for recent matches...") for i, row in meta_recent.iterrows(): # Get odds (simulated or real from DB) # Using 1.5 - 3.0 - 2.5 as baseline if not found ctx_list.append([2.0, 3.2, 2.5, 1.8, 1.8, 1.35, 1.35, eng.get_feature(row['team_id'], 2.0)]) X_tensor = torch.FloatTensor(X_recent).to(DEVICE) y_tensor = torch.FloatTensor(y_recent).to(DEVICE) ctx_tensor = torch.FloatTensor(ctx_list).to(DEVICE) # Entity Mapping unique_teams = meta['team_id'].unique() team_map = {tid: i for i, tid in enumerate(unique_teams)} entities_list = [[team_map.get(row['team_id'], 0), 0] for _, row in meta_recent.iterrows()] entities_tensor = torch.LongTensor(entities_list).to(DEVICE) # 4. Load Model state = torch.load(MODEL_PATH, map_location=DEVICE) emb_key = 'entity_emb.weight' if 'entity_emb.weight' in state else 'team_embedding.weight' saved_vocab_size = state[emb_key].shape[0] model = HybridDeepModel(num_teams=saved_vocab_size).to(DEVICE) new_state = {k.replace('team_embedding', 'entity_emb'): v for k, v in state.items()} model.load_state_dict(new_state, strict=False) model.train() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) # Weights # 0=Home, 1=Draw, 2=Away # High weight for Draw (1.5) and Miracle turnarounds (x10) class_weights = torch.FloatTensor([1.0, 2.0, 1.0]).to(DEVICE) # More Draw focus ht_weights = torch.FloatTensor([1.0, 1.0, 10.0, 1.0, 1.0, 1.0, 10.0, 1.0, 1.0]).to(DEVICE) crit_res = nn.CrossEntropyLoss(weight=class_weights) crit_ht = nn.CrossEntropyLoss(weight=ht_weights) crit_goals = nn.MSELoss() dataset = TensorDataset(entities_tensor, X_tensor, ctx_tensor, y_tensor) train_loader = DataLoader(dataset, batch_size=32, shuffle=True) print(f"🚀 Fine-tuning for {EPOCHS} epochs...") for epoch in range(EPOCHS): total_loss = 0 for b_ent, b_seq, b_ctx, b_y in train_loader: optimizer.zero_grad() l_res, p_goals, l_btts, l_ht = model(b_ent, b_seq, b_ctx) # 1X2 Loss target_res = b_y[:, 0].long() loss_res = crit_res(l_res, target_res) # Goals Loss target_goals = (b_y[:, 1] + b_y[:, 2]).unsqueeze(1) loss_goals = crit_goals(p_goals, target_goals) # HT/FT Loss target_ht = b_y[:, 3].long() loss_ht = crit_ht(l_ht, target_ht) loss = loss_res + loss_goals + (0.5 * loss_ht) loss.backward() optimizer.step() total_loss += loss.item() print(f" Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.4f}") # 5. Save print(f"💾 Saving fine-tuned model to {MODEL_PATH}") torch.save(model.state_dict(), MODEL_PATH) print("✅ Fine-tuning complete.") if __name__ == "__main__": fine_tune()