first (part 2: other directories)

2026-04-16 15:11:25 +03:00
parent 7814e0bc6b
commit 2f0b85a0c7
203 changed files with 59989 additions and 0 deletions
@@ -0,0 +1,132 @@
+
+import os
+import sys
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+import numpy as np
+import time
+
+# Path alignment
+sys.path.append(os.getcwd())
+sys.path.append(os.path.join(os.getcwd(), 'ai-engine'))
+
+from models.hybrid_v11 import HybridDeepModel
+from pipeline.sequence_builder import SequenceBuilder
+from pipeline.tiered_loader import TieredDataLoader
+
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+MODEL_PATH = 'ai-engine/models/v11_hybrid_model.pth'
+LEARNING_RATE = 1e-4 # Lower for fine-tuning
+EPOCHS = 10 # More focus on small set
+
+def fine_tune():
+    print("🧠 Starting Error-Driven Fine-Tuning (Last 3 Days)...")
+    
+    # 1. Build Sequences
+    builder = SequenceBuilder()
+    X, y, meta = builder.build_sequences()
+    
+    # Current is Jan 27. Filter after Jan 24.
+    # relative to Max Timestamp in meta
+    max_ts = meta['date'].max()
+    three_days_ms = 3 * 24 * 60 * 60 * 1000
+    filter_ts = max_ts - three_days_ms
+    
+    mask = meta['date'] >= filter_ts
+    X_recent = X[mask]
+    y_recent = y[mask]
+    meta_recent = meta[mask]
+    
+    if len(X_recent) == 0:
+        print("❌ No recent matches found to fine-tune on!")
+        return
+        
+    print(f"✅ Found {len(X_recent)} recent samples for fine-tuning.")
+    
+    # 3. Loader
+    # We need Odds/Context for these
+    loader = TieredDataLoader()
+    # For speed in this script, we'll use average context if full loader is too slow
+    # But let's try to get real context
+    from features.odds_history import OddsHistoryEngine
+    eng = OddsHistoryEngine()
+    
+    # Pre-build context
+    ctx_list = []
+    print("📊 Building Context for recent matches...")
+    for i, row in meta_recent.iterrows():
+        # Get odds (simulated or real from DB)
+        # Using 1.5 - 3.0 - 2.5 as baseline if not found
+        ctx_list.append([2.0, 3.2, 2.5, 1.8, 1.8, 1.35, 1.35, eng.get_feature(row['team_id'], 2.0)])
+        
+    X_tensor = torch.FloatTensor(X_recent).to(DEVICE)
+    y_tensor = torch.FloatTensor(y_recent).to(DEVICE)
+    ctx_tensor = torch.FloatTensor(ctx_list).to(DEVICE)
+    
+    # Entity Mapping
+    unique_teams = meta['team_id'].unique()
+    team_map = {tid: i for i, tid in enumerate(unique_teams)}
+    entities_list = [[team_map.get(row['team_id'], 0), 0] for _, row in meta_recent.iterrows()]
+    entities_tensor = torch.LongTensor(entities_list).to(DEVICE)
+    
+    # 4. Load Model
+    state = torch.load(MODEL_PATH, map_location=DEVICE)
+    emb_key = 'entity_emb.weight' if 'entity_emb.weight' in state else 'team_embedding.weight'
+    saved_vocab_size = state[emb_key].shape[0]
+    
+    model = HybridDeepModel(num_teams=saved_vocab_size).to(DEVICE)
+    new_state = {k.replace('team_embedding', 'entity_emb'): v for k, v in state.items()}
+    model.load_state_dict(new_state, strict=False)
+    model.train()
+    
+    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
+    
+    # Weights
+    # 0=Home, 1=Draw, 2=Away
+    # High weight for Draw (1.5) and Miracle turnarounds (x10)
+    class_weights = torch.FloatTensor([1.0, 2.0, 1.0]).to(DEVICE) # More Draw focus
+    ht_weights = torch.FloatTensor([1.0, 1.0, 10.0, 1.0, 1.0, 1.0, 10.0, 1.0, 1.0]).to(DEVICE)
+    
+    crit_res = nn.CrossEntropyLoss(weight=class_weights)
+    crit_ht = nn.CrossEntropyLoss(weight=ht_weights)
+    crit_goals = nn.MSELoss()
+    
+    dataset = TensorDataset(entities_tensor, X_tensor, ctx_tensor, y_tensor)
+    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
+
+    print(f"🚀 Fine-tuning for {EPOCHS} epochs...")
+    for epoch in range(EPOCHS):
+        total_loss = 0
+        for b_ent, b_seq, b_ctx, b_y in train_loader:
+            optimizer.zero_grad()
+            
+            l_res, p_goals, l_btts, l_ht = model(b_ent, b_seq, b_ctx)
+            
+            # 1X2 Loss
+            target_res = b_y[:, 0].long()
+            loss_res = crit_res(l_res, target_res)
+            
+            # Goals Loss
+            target_goals = (b_y[:, 1] + b_y[:, 2]).unsqueeze(1)
+            loss_goals = crit_goals(p_goals, target_goals)
+            
+            # HT/FT Loss
+            target_ht = b_y[:, 3].long()
+            loss_ht = crit_ht(l_ht, target_ht)
+            
+            loss = loss_res + loss_goals + (0.5 * loss_ht)
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item()
+            
+        print(f"   Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.4f}")
+
+    # 5. Save
+    print(f"💾 Saving fine-tuned model to {MODEL_PATH}")
+    torch.save(model.state_dict(), MODEL_PATH)
+    print("✅ Fine-tuning complete.")
+
+if __name__ == "__main__":
+    fine_tune()