133 lines
4.6 KiB
Python
Executable File
133 lines
4.6 KiB
Python
Executable File
|
|
import os
|
|
import sys
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.optim as optim
|
|
from torch.utils.data import DataLoader, TensorDataset
|
|
import numpy as np
|
|
import time
|
|
|
|
# Path alignment
|
|
sys.path.append(os.getcwd())
|
|
sys.path.append(os.path.join(os.getcwd(), 'ai-engine'))
|
|
|
|
from models.hybrid_v11 import HybridDeepModel
|
|
from pipeline.sequence_builder import SequenceBuilder
|
|
from pipeline.tiered_loader import TieredDataLoader
|
|
|
|
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
MODEL_PATH = 'ai-engine/models/v11_hybrid_model.pth'
|
|
LEARNING_RATE = 1e-4 # Lower for fine-tuning
|
|
EPOCHS = 10 # More focus on small set
|
|
|
|
def fine_tune():
|
|
print("🧠 Starting Error-Driven Fine-Tuning (Last 3 Days)...")
|
|
|
|
# 1. Build Sequences
|
|
builder = SequenceBuilder()
|
|
X, y, meta = builder.build_sequences()
|
|
|
|
# Current is Jan 27. Filter after Jan 24.
|
|
# relative to Max Timestamp in meta
|
|
max_ts = meta['date'].max()
|
|
three_days_ms = 3 * 24 * 60 * 60 * 1000
|
|
filter_ts = max_ts - three_days_ms
|
|
|
|
mask = meta['date'] >= filter_ts
|
|
X_recent = X[mask]
|
|
y_recent = y[mask]
|
|
meta_recent = meta[mask]
|
|
|
|
if len(X_recent) == 0:
|
|
print("❌ No recent matches found to fine-tune on!")
|
|
return
|
|
|
|
print(f"✅ Found {len(X_recent)} recent samples for fine-tuning.")
|
|
|
|
# 3. Loader
|
|
# We need Odds/Context for these
|
|
loader = TieredDataLoader()
|
|
# For speed in this script, we'll use average context if full loader is too slow
|
|
# But let's try to get real context
|
|
from features.odds_history import OddsHistoryEngine
|
|
eng = OddsHistoryEngine()
|
|
|
|
# Pre-build context
|
|
ctx_list = []
|
|
print("📊 Building Context for recent matches...")
|
|
for i, row in meta_recent.iterrows():
|
|
# Get odds (simulated or real from DB)
|
|
# Using 1.5 - 3.0 - 2.5 as baseline if not found
|
|
ctx_list.append([2.0, 3.2, 2.5, 1.8, 1.8, 1.35, 1.35, eng.get_feature(row['team_id'], 2.0)])
|
|
|
|
X_tensor = torch.FloatTensor(X_recent).to(DEVICE)
|
|
y_tensor = torch.FloatTensor(y_recent).to(DEVICE)
|
|
ctx_tensor = torch.FloatTensor(ctx_list).to(DEVICE)
|
|
|
|
# Entity Mapping
|
|
unique_teams = meta['team_id'].unique()
|
|
team_map = {tid: i for i, tid in enumerate(unique_teams)}
|
|
entities_list = [[team_map.get(row['team_id'], 0), 0] for _, row in meta_recent.iterrows()]
|
|
entities_tensor = torch.LongTensor(entities_list).to(DEVICE)
|
|
|
|
# 4. Load Model
|
|
state = torch.load(MODEL_PATH, map_location=DEVICE)
|
|
emb_key = 'entity_emb.weight' if 'entity_emb.weight' in state else 'team_embedding.weight'
|
|
saved_vocab_size = state[emb_key].shape[0]
|
|
|
|
model = HybridDeepModel(num_teams=saved_vocab_size).to(DEVICE)
|
|
new_state = {k.replace('team_embedding', 'entity_emb'): v for k, v in state.items()}
|
|
model.load_state_dict(new_state, strict=False)
|
|
model.train()
|
|
|
|
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
|
|
|
|
# Weights
|
|
# 0=Home, 1=Draw, 2=Away
|
|
# High weight for Draw (1.5) and Miracle turnarounds (x10)
|
|
class_weights = torch.FloatTensor([1.0, 2.0, 1.0]).to(DEVICE) # More Draw focus
|
|
ht_weights = torch.FloatTensor([1.0, 1.0, 10.0, 1.0, 1.0, 1.0, 10.0, 1.0, 1.0]).to(DEVICE)
|
|
|
|
crit_res = nn.CrossEntropyLoss(weight=class_weights)
|
|
crit_ht = nn.CrossEntropyLoss(weight=ht_weights)
|
|
crit_goals = nn.MSELoss()
|
|
|
|
dataset = TensorDataset(entities_tensor, X_tensor, ctx_tensor, y_tensor)
|
|
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
|
|
|
|
print(f"🚀 Fine-tuning for {EPOCHS} epochs...")
|
|
for epoch in range(EPOCHS):
|
|
total_loss = 0
|
|
for b_ent, b_seq, b_ctx, b_y in train_loader:
|
|
optimizer.zero_grad()
|
|
|
|
l_res, p_goals, l_btts, l_ht = model(b_ent, b_seq, b_ctx)
|
|
|
|
# 1X2 Loss
|
|
target_res = b_y[:, 0].long()
|
|
loss_res = crit_res(l_res, target_res)
|
|
|
|
# Goals Loss
|
|
target_goals = (b_y[:, 1] + b_y[:, 2]).unsqueeze(1)
|
|
loss_goals = crit_goals(p_goals, target_goals)
|
|
|
|
# HT/FT Loss
|
|
target_ht = b_y[:, 3].long()
|
|
loss_ht = crit_ht(l_ht, target_ht)
|
|
|
|
loss = loss_res + loss_goals + (0.5 * loss_ht)
|
|
loss.backward()
|
|
optimizer.step()
|
|
total_loss += loss.item()
|
|
|
|
print(f" Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.4f}")
|
|
|
|
# 5. Save
|
|
print(f"💾 Saving fine-tuned model to {MODEL_PATH}")
|
|
torch.save(model.state_dict(), MODEL_PATH)
|
|
print("✅ Fine-tuning complete.")
|
|
|
|
if __name__ == "__main__":
|
|
fine_tune()
|