This commit is contained in:
Executable
+132
@@ -0,0 +1,132 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
# Path alignment
|
||||
sys.path.append(os.getcwd())
|
||||
sys.path.append(os.path.join(os.getcwd(), 'ai-engine'))
|
||||
|
||||
from models.hybrid_v11 import HybridDeepModel
|
||||
from pipeline.sequence_builder import SequenceBuilder
|
||||
from pipeline.tiered_loader import TieredDataLoader
|
||||
|
||||
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
MODEL_PATH = 'ai-engine/models/v11_hybrid_model.pth'
|
||||
LEARNING_RATE = 1e-4 # Lower for fine-tuning
|
||||
EPOCHS = 10 # More focus on small set
|
||||
|
||||
def fine_tune():
|
||||
print("🧠 Starting Error-Driven Fine-Tuning (Last 3 Days)...")
|
||||
|
||||
# 1. Build Sequences
|
||||
builder = SequenceBuilder()
|
||||
X, y, meta = builder.build_sequences()
|
||||
|
||||
# Current is Jan 27. Filter after Jan 24.
|
||||
# relative to Max Timestamp in meta
|
||||
max_ts = meta['date'].max()
|
||||
three_days_ms = 3 * 24 * 60 * 60 * 1000
|
||||
filter_ts = max_ts - three_days_ms
|
||||
|
||||
mask = meta['date'] >= filter_ts
|
||||
X_recent = X[mask]
|
||||
y_recent = y[mask]
|
||||
meta_recent = meta[mask]
|
||||
|
||||
if len(X_recent) == 0:
|
||||
print("❌ No recent matches found to fine-tune on!")
|
||||
return
|
||||
|
||||
print(f"✅ Found {len(X_recent)} recent samples for fine-tuning.")
|
||||
|
||||
# 3. Loader
|
||||
# We need Odds/Context for these
|
||||
loader = TieredDataLoader()
|
||||
# For speed in this script, we'll use average context if full loader is too slow
|
||||
# But let's try to get real context
|
||||
from features.odds_history import OddsHistoryEngine
|
||||
eng = OddsHistoryEngine()
|
||||
|
||||
# Pre-build context
|
||||
ctx_list = []
|
||||
print("📊 Building Context for recent matches...")
|
||||
for i, row in meta_recent.iterrows():
|
||||
# Get odds (simulated or real from DB)
|
||||
# Using 1.5 - 3.0 - 2.5 as baseline if not found
|
||||
ctx_list.append([2.0, 3.2, 2.5, 1.8, 1.8, 1.35, 1.35, eng.get_feature(row['team_id'], 2.0)])
|
||||
|
||||
X_tensor = torch.FloatTensor(X_recent).to(DEVICE)
|
||||
y_tensor = torch.FloatTensor(y_recent).to(DEVICE)
|
||||
ctx_tensor = torch.FloatTensor(ctx_list).to(DEVICE)
|
||||
|
||||
# Entity Mapping
|
||||
unique_teams = meta['team_id'].unique()
|
||||
team_map = {tid: i for i, tid in enumerate(unique_teams)}
|
||||
entities_list = [[team_map.get(row['team_id'], 0), 0] for _, row in meta_recent.iterrows()]
|
||||
entities_tensor = torch.LongTensor(entities_list).to(DEVICE)
|
||||
|
||||
# 4. Load Model
|
||||
state = torch.load(MODEL_PATH, map_location=DEVICE)
|
||||
emb_key = 'entity_emb.weight' if 'entity_emb.weight' in state else 'team_embedding.weight'
|
||||
saved_vocab_size = state[emb_key].shape[0]
|
||||
|
||||
model = HybridDeepModel(num_teams=saved_vocab_size).to(DEVICE)
|
||||
new_state = {k.replace('team_embedding', 'entity_emb'): v for k, v in state.items()}
|
||||
model.load_state_dict(new_state, strict=False)
|
||||
model.train()
|
||||
|
||||
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
|
||||
|
||||
# Weights
|
||||
# 0=Home, 1=Draw, 2=Away
|
||||
# High weight for Draw (1.5) and Miracle turnarounds (x10)
|
||||
class_weights = torch.FloatTensor([1.0, 2.0, 1.0]).to(DEVICE) # More Draw focus
|
||||
ht_weights = torch.FloatTensor([1.0, 1.0, 10.0, 1.0, 1.0, 1.0, 10.0, 1.0, 1.0]).to(DEVICE)
|
||||
|
||||
crit_res = nn.CrossEntropyLoss(weight=class_weights)
|
||||
crit_ht = nn.CrossEntropyLoss(weight=ht_weights)
|
||||
crit_goals = nn.MSELoss()
|
||||
|
||||
dataset = TensorDataset(entities_tensor, X_tensor, ctx_tensor, y_tensor)
|
||||
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
|
||||
|
||||
print(f"🚀 Fine-tuning for {EPOCHS} epochs...")
|
||||
for epoch in range(EPOCHS):
|
||||
total_loss = 0
|
||||
for b_ent, b_seq, b_ctx, b_y in train_loader:
|
||||
optimizer.zero_grad()
|
||||
|
||||
l_res, p_goals, l_btts, l_ht = model(b_ent, b_seq, b_ctx)
|
||||
|
||||
# 1X2 Loss
|
||||
target_res = b_y[:, 0].long()
|
||||
loss_res = crit_res(l_res, target_res)
|
||||
|
||||
# Goals Loss
|
||||
target_goals = (b_y[:, 1] + b_y[:, 2]).unsqueeze(1)
|
||||
loss_goals = crit_goals(p_goals, target_goals)
|
||||
|
||||
# HT/FT Loss
|
||||
target_ht = b_y[:, 3].long()
|
||||
loss_ht = crit_ht(l_ht, target_ht)
|
||||
|
||||
loss = loss_res + loss_goals + (0.5 * loss_ht)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
total_loss += loss.item()
|
||||
|
||||
print(f" Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.4f}")
|
||||
|
||||
# 5. Save
|
||||
print(f"💾 Saving fine-tuned model to {MODEL_PATH}")
|
||||
torch.save(model.state_dict(), MODEL_PATH)
|
||||
print("✅ Fine-tuning complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
fine_tune()
|
||||
Reference in New Issue
Block a user