This commit is contained in:
Executable
+183
@@ -0,0 +1,183 @@
|
||||
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
import pickle
|
||||
import os
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_absolute_error, r2_score
|
||||
|
||||
# Paths
|
||||
DATA_PATH = os.path.join(os.path.dirname(__file__), "../data/training_data.csv")
|
||||
MODEL_PATH = os.path.join(os.path.dirname(__file__), "../models/xgb_score.pkl")
|
||||
|
||||
# Import unified 56-feature array from markets trainer
|
||||
from train_xgboost_markets import FEATURES
|
||||
|
||||
TARGETS = ["score_home", "score_away", "ht_score_home", "ht_score_away"]
|
||||
|
||||
def train():
|
||||
print("🚀 Training Score Prediction Model (XGBoost) - Full Time & Half Time")
|
||||
print("=" * 60)
|
||||
|
||||
if not os.path.exists(DATA_PATH):
|
||||
print(f"❌ Data file not found: {DATA_PATH}")
|
||||
return
|
||||
|
||||
print(f"📦 Loading data from {DATA_PATH}...")
|
||||
df = pd.read_csv(DATA_PATH)
|
||||
|
||||
# Preprocessing
|
||||
# Drop rows where target is missing (should verify)
|
||||
df = df.dropna(subset=TARGETS)
|
||||
|
||||
# Fill feature NaNs with median/mean or 0
|
||||
print(f" Original rows: {len(df)}")
|
||||
|
||||
# Filter valid odds (at least ms_h > 1.0)
|
||||
df = df[df["odds_ms_h"] > 1.0].copy()
|
||||
print(f" Rows with valid odds: {len(df)}")
|
||||
|
||||
X = df[FEATURES]
|
||||
y_home = df["score_home"]
|
||||
y_away = df["score_away"]
|
||||
y_ht_home = df["ht_score_home"]
|
||||
y_ht_away = df["ht_score_away"]
|
||||
|
||||
# Train/Test Split
|
||||
X_train, X_test, y_h_train, y_h_test, y_a_train, y_a_test, y_ht_h_train, y_ht_h_test, y_ht_a_train, y_ht_a_test = train_test_split(
|
||||
X, y_home, y_away, y_ht_home, y_ht_away, test_size=0.2, random_state=42
|
||||
)
|
||||
|
||||
print(f" Training set: {len(X_train)} matches")
|
||||
print(f" Test set: {len(X_test)} matches")
|
||||
|
||||
# --- HOME GOALS MODEL ---
|
||||
print("\n🏠 Training Home Goals Model...")
|
||||
xgb_home = xgb.XGBRegressor(
|
||||
objective='reg:squarederror',
|
||||
n_estimators=1000,
|
||||
learning_rate=0.01,
|
||||
max_depth=5,
|
||||
subsample=0.7,
|
||||
colsample_bytree=0.7,
|
||||
n_jobs=-1,
|
||||
random_state=42,
|
||||
early_stopping_rounds=50 # Configure here for newer XGBoost or remove if not supported in constructor (depends on version)
|
||||
)
|
||||
# Actually, to be safe across versions, let's remove early stopping for now or use validation set properly
|
||||
# Using 'eval_set' without early_stopping_rounds just prints metrics
|
||||
xgb_home = xgb.XGBRegressor(
|
||||
objective='reg:squarederror',
|
||||
n_estimators=1000,
|
||||
learning_rate=0.01,
|
||||
max_depth=5,
|
||||
subsample=0.7,
|
||||
colsample_bytree=0.7,
|
||||
n_jobs=-1,
|
||||
random_state=42
|
||||
)
|
||||
xgb_home.fit(X_train, y_h_train, eval_set=[(X_test, y_h_test)], verbose=False)
|
||||
|
||||
home_preds = xgb_home.predict(X_test)
|
||||
mae_home = mean_absolute_error(y_h_test, home_preds)
|
||||
r2_home = r2_score(y_h_test, home_preds)
|
||||
print(f" ✅ FT Home MAE: {mae_home:.4f} goals")
|
||||
print(f" ✅ FT Home R2: {r2_home:.4f}")
|
||||
|
||||
# --- AWAY GOALS MODEL ---
|
||||
print("\n✈️ Training FT Away Goals Model...")
|
||||
xgb_away = xgb.XGBRegressor(
|
||||
objective='reg:squarederror',
|
||||
n_estimators=1000,
|
||||
learning_rate=0.01,
|
||||
max_depth=5,
|
||||
subsample=0.7,
|
||||
colsample_bytree=0.7,
|
||||
n_jobs=-1,
|
||||
random_state=42
|
||||
)
|
||||
xgb_away.fit(X_train, y_a_train, eval_set=[(X_test, y_a_test)], verbose=False)
|
||||
|
||||
away_preds = xgb_away.predict(X_test)
|
||||
mae_away = mean_absolute_error(y_a_test, away_preds)
|
||||
r2_away = r2_score(y_a_test, away_preds)
|
||||
print(f" ✅ FT Away MAE: {mae_away:.4f} goals")
|
||||
print(f" ✅ FT Away R2: {r2_away:.4f}")
|
||||
|
||||
# --- HT HOME GOALS MODEL ---
|
||||
print("\n🏠 Training HT Home Goals Model...")
|
||||
xgb_ht_home = xgb.XGBRegressor(
|
||||
objective='reg:squarederror',
|
||||
n_estimators=1000,
|
||||
learning_rate=0.01,
|
||||
max_depth=5,
|
||||
subsample=0.7,
|
||||
colsample_bytree=0.7,
|
||||
n_jobs=-1,
|
||||
random_state=42
|
||||
)
|
||||
xgb_ht_home.fit(X_train, y_ht_h_train, eval_set=[(X_test, y_ht_h_test)], verbose=False)
|
||||
|
||||
ht_home_preds = xgb_ht_home.predict(X_test)
|
||||
mae_ht_home = mean_absolute_error(y_ht_h_test, ht_home_preds)
|
||||
print(f" ✅ HT Home MAE: {mae_ht_home:.4f} goals")
|
||||
|
||||
# --- HT AWAY GOALS MODEL ---
|
||||
print("\n✈️ Training HT Away Goals Model...")
|
||||
xgb_ht_away = xgb.XGBRegressor(
|
||||
objective='reg:squarederror',
|
||||
n_estimators=1000,
|
||||
learning_rate=0.01,
|
||||
max_depth=5,
|
||||
subsample=0.7,
|
||||
colsample_bytree=0.7,
|
||||
n_jobs=-1,
|
||||
random_state=42
|
||||
)
|
||||
xgb_ht_away.fit(X_train, y_ht_a_train, eval_set=[(X_test, y_ht_a_test)], verbose=False)
|
||||
|
||||
ht_away_preds = xgb_ht_away.predict(X_test)
|
||||
mae_ht_away = mean_absolute_error(y_ht_a_test, ht_away_preds)
|
||||
print(f" ✅ HT Away MAE: {mae_ht_away:.4f} goals")
|
||||
|
||||
# --- EVALUATE EXACT SCORE ACCURACY (ROUNDED) ---
|
||||
print("\n🎯 Exact FT Score Accuracy (Test Set):")
|
||||
correct = 0
|
||||
close = 0 # Within 1 goal diff for both
|
||||
|
||||
for h_true, a_true, h_pred, a_pred in zip(y_h_test, y_a_test, home_preds, away_preds):
|
||||
h_p = round(h_pred)
|
||||
a_p = round(a_pred)
|
||||
if h_p == h_true and a_p == a_true:
|
||||
correct += 1
|
||||
if abs(h_p - h_true) <= 1 and abs(a_p - a_true) <= 1:
|
||||
close += 1
|
||||
|
||||
acc = correct / len(X_test) * 100
|
||||
close_acc = close / len(X_test) * 100
|
||||
print(f" Exact Match: {acc:.2f}%")
|
||||
print(f" Close Match (+/- 1 goal): {close_acc:.2f}%")
|
||||
|
||||
# Save
|
||||
print(f"\n💾 Saving models to {MODEL_PATH}...")
|
||||
model_data = {
|
||||
"home_model": xgb_home,
|
||||
"away_model": xgb_away,
|
||||
"ht_home_model": xgb_ht_home,
|
||||
"ht_away_model": xgb_ht_away,
|
||||
"features": FEATURES,
|
||||
"meta": {
|
||||
"mae_home": mae_home,
|
||||
"mae_away": mae_away,
|
||||
"mae_ht_home": mae_ht_home,
|
||||
"mae_ht_away": mae_ht_away,
|
||||
"acc": acc
|
||||
}
|
||||
}
|
||||
with open(MODEL_PATH, "wb") as f:
|
||||
pickle.dump(model_data, f)
|
||||
|
||||
print("✅ Done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
train()
|
||||
Reference in New Issue
Block a user