Compare commits
2 Commits
8ce8fa5b94
...
70fdc066c7
| Author | SHA1 | Date | |
|---|---|---|---|
| 70fdc066c7 | |||
| f3362f266c |
@@ -0,0 +1,692 @@
|
|||||||
|
{
|
||||||
|
"trained_at": "2026-05-10 19:48:06",
|
||||||
|
"trainer": "v25_pro",
|
||||||
|
"optuna_trials": 50,
|
||||||
|
"total_features": 114,
|
||||||
|
"markets": {
|
||||||
|
"MS": {
|
||||||
|
"market": "MS",
|
||||||
|
"samples": 106861,
|
||||||
|
"train": 64116,
|
||||||
|
"val": 16029,
|
||||||
|
"cal": 10686,
|
||||||
|
"test": 16030,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"eta": 0.022329400652878233,
|
||||||
|
"subsample": 0.6690795757813364,
|
||||||
|
"colsample_bytree": 0.5042256538541441,
|
||||||
|
"min_child_weight": 6,
|
||||||
|
"gamma": 9.960129417155444e-05,
|
||||||
|
"reg_lambda": 0.5132295377582388,
|
||||||
|
"reg_alpha": 6.804503659726287e-08
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"learning_rate": 0.023142410802706542,
|
||||||
|
"feature_fraction": 0.5728681432360808,
|
||||||
|
"bagging_fraction": 0.6781774410065095,
|
||||||
|
"bagging_freq": 2,
|
||||||
|
"min_child_samples": 26,
|
||||||
|
"lambda_l1": 3.25216937188593e-05,
|
||||||
|
"lambda_l2": 4.8081236902660474e-08
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 643,
|
||||||
|
"lgb_best_iteration": 441,
|
||||||
|
"xgb_optuna_best_logloss": 0.9155,
|
||||||
|
"lgb_optuna_best_logloss": 0.9146,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.5442,
|
||||||
|
"logloss": 0.943
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.5404,
|
||||||
|
"logloss": 0.9438
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.5427,
|
||||||
|
"logloss": 0.943
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.5417,
|
||||||
|
"logloss": 0.9447
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.5437,
|
||||||
|
"logloss": 0.9426
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.5418,
|
||||||
|
"logloss": 0.9435
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"OU15": {
|
||||||
|
"market": "OU15",
|
||||||
|
"samples": 106861,
|
||||||
|
"train": 64116,
|
||||||
|
"val": 16029,
|
||||||
|
"cal": 10686,
|
||||||
|
"test": 16030,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 5,
|
||||||
|
"eta": 0.020779487257177966,
|
||||||
|
"subsample": 0.8109935286948485,
|
||||||
|
"colsample_bytree": 0.9525413847213635,
|
||||||
|
"min_child_weight": 6,
|
||||||
|
"gamma": 0.35330347775044696,
|
||||||
|
"reg_lambda": 5.373541021746059e-07,
|
||||||
|
"reg_alpha": 0.2959430087754284
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 6,
|
||||||
|
"learning_rate": 0.013402310027682367,
|
||||||
|
"feature_fraction": 0.7404728146233901,
|
||||||
|
"bagging_fraction": 0.9712026511549247,
|
||||||
|
"bagging_freq": 6,
|
||||||
|
"min_child_samples": 39,
|
||||||
|
"lambda_l1": 0.39893027986899576,
|
||||||
|
"lambda_l2": 0.0626443611997599
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 353,
|
||||||
|
"lgb_best_iteration": 370,
|
||||||
|
"xgb_optuna_best_logloss": 0.499,
|
||||||
|
"lgb_optuna_best_logloss": 0.4989,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.7521,
|
||||||
|
"logloss": 0.5267
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.7521,
|
||||||
|
"logloss": 0.5344
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.7528,
|
||||||
|
"logloss": 0.5261
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.7505,
|
||||||
|
"logloss": 0.5362
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.7518,
|
||||||
|
"logloss": 0.5261
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.7522,
|
||||||
|
"logloss": 0.5364
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"OU25": {
|
||||||
|
"market": "OU25",
|
||||||
|
"samples": 106861,
|
||||||
|
"train": 64116,
|
||||||
|
"val": 16029,
|
||||||
|
"cal": 10686,
|
||||||
|
"test": 16030,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 5,
|
||||||
|
"eta": 0.01274409160014454,
|
||||||
|
"subsample": 0.8300258899365814,
|
||||||
|
"colsample_bytree": 0.7336425662264429,
|
||||||
|
"min_child_weight": 9,
|
||||||
|
"gamma": 2.5382243933649716e-06,
|
||||||
|
"reg_lambda": 5.096723080351853e-05,
|
||||||
|
"reg_alpha": 0.00040919711449493223
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 6,
|
||||||
|
"learning_rate": 0.02301514680733822,
|
||||||
|
"feature_fraction": 0.9569492061944688,
|
||||||
|
"bagging_fraction": 0.7249143523144639,
|
||||||
|
"bagging_freq": 1,
|
||||||
|
"min_child_samples": 40,
|
||||||
|
"lambda_l1": 9.954995248644963e-08,
|
||||||
|
"lambda_l2": 3.82413187126927e-06
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 475,
|
||||||
|
"lgb_best_iteration": 235,
|
||||||
|
"xgb_optuna_best_logloss": 0.6202,
|
||||||
|
"lgb_optuna_best_logloss": 0.62,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.6221,
|
||||||
|
"logloss": 0.6352
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.6226,
|
||||||
|
"logloss": 0.6344
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.6236,
|
||||||
|
"logloss": 0.6348
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.6231,
|
||||||
|
"logloss": 0.6343
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.6239,
|
||||||
|
"logloss": 0.6349
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.6236,
|
||||||
|
"logloss": 0.6338
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"OU35": {
|
||||||
|
"market": "OU35",
|
||||||
|
"samples": 106861,
|
||||||
|
"train": 64116,
|
||||||
|
"val": 16029,
|
||||||
|
"cal": 10686,
|
||||||
|
"test": 16030,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"eta": 0.012538827444713596,
|
||||||
|
"subsample": 0.7947923612828379,
|
||||||
|
"colsample_bytree": 0.9717654601553765,
|
||||||
|
"min_child_weight": 6,
|
||||||
|
"gamma": 0.011265216242399128,
|
||||||
|
"reg_lambda": 0.12152579364613436,
|
||||||
|
"reg_alpha": 0.013995120492957489
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 6,
|
||||||
|
"learning_rate": 0.013456307557939324,
|
||||||
|
"feature_fraction": 0.8208768633332759,
|
||||||
|
"bagging_fraction": 0.929472334516626,
|
||||||
|
"bagging_freq": 6,
|
||||||
|
"min_child_samples": 35,
|
||||||
|
"lambda_l1": 0.05522724221034949,
|
||||||
|
"lambda_l2": 0.21689047644122147
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 696,
|
||||||
|
"lgb_best_iteration": 412,
|
||||||
|
"xgb_optuna_best_logloss": 0.552,
|
||||||
|
"lgb_optuna_best_logloss": 0.5515,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.7314,
|
||||||
|
"logloss": 0.5466
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.7293,
|
||||||
|
"logloss": 0.5482
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.73,
|
||||||
|
"logloss": 0.5462
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.7298,
|
||||||
|
"logloss": 0.5485
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.7312,
|
||||||
|
"logloss": 0.5462
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.7301,
|
||||||
|
"logloss": 0.5478
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"BTTS": {
|
||||||
|
"market": "BTTS",
|
||||||
|
"samples": 106861,
|
||||||
|
"train": 64116,
|
||||||
|
"val": 16029,
|
||||||
|
"cal": 10686,
|
||||||
|
"test": 16030,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"eta": 0.023533647209064805,
|
||||||
|
"subsample": 0.7469060816054074,
|
||||||
|
"colsample_bytree": 0.8445418254808608,
|
||||||
|
"min_child_weight": 8,
|
||||||
|
"gamma": 1.0503733400514561e-08,
|
||||||
|
"reg_lambda": 2.0919595769527735e-06,
|
||||||
|
"reg_alpha": 0.027277017326535417
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"learning_rate": 0.03900730648793646,
|
||||||
|
"feature_fraction": 0.6968255358438369,
|
||||||
|
"bagging_fraction": 0.7078349435778689,
|
||||||
|
"bagging_freq": 1,
|
||||||
|
"min_child_samples": 46,
|
||||||
|
"lambda_l1": 1.1796591413903922e-05,
|
||||||
|
"lambda_l2": 1.574367227995052e-08
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 462,
|
||||||
|
"lgb_best_iteration": 339,
|
||||||
|
"xgb_optuna_best_logloss": 0.6557,
|
||||||
|
"lgb_optuna_best_logloss": 0.6554,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.5908,
|
||||||
|
"logloss": 0.6637
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.5885,
|
||||||
|
"logloss": 0.6647
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.5891,
|
||||||
|
"logloss": 0.6638
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.5891,
|
||||||
|
"logloss": 0.6702
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.5892,
|
||||||
|
"logloss": 0.6635
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.5885,
|
||||||
|
"logloss": 0.6655
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"HT_RESULT": {
|
||||||
|
"market": "HT_RESULT",
|
||||||
|
"samples": 103641,
|
||||||
|
"train": 62184,
|
||||||
|
"val": 15546,
|
||||||
|
"cal": 10364,
|
||||||
|
"test": 15547,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"eta": 0.01736265891311687,
|
||||||
|
"subsample": 0.8370935625192159,
|
||||||
|
"colsample_bytree": 0.8091927356001175,
|
||||||
|
"min_child_weight": 9,
|
||||||
|
"gamma": 0.0006570311316367184,
|
||||||
|
"reg_lambda": 0.5206211670360164,
|
||||||
|
"reg_alpha": 0.0004530536252850605
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"learning_rate": 0.04842652289664568,
|
||||||
|
"feature_fraction": 0.6277272818879166,
|
||||||
|
"bagging_fraction": 0.9526964840164693,
|
||||||
|
"bagging_freq": 3,
|
||||||
|
"min_child_samples": 23,
|
||||||
|
"lambda_l1": 0.09429192580834124,
|
||||||
|
"lambda_l2": 5.5433175427148124e-08
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 516,
|
||||||
|
"lgb_best_iteration": 136,
|
||||||
|
"xgb_optuna_best_logloss": 1.0128,
|
||||||
|
"lgb_optuna_best_logloss": 1.0126,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.4689,
|
||||||
|
"logloss": 1.0174
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.4685,
|
||||||
|
"logloss": 1.0193
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.4696,
|
||||||
|
"logloss": 1.018
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.4685,
|
||||||
|
"logloss": 1.0248
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.4699,
|
||||||
|
"logloss": 1.0172
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.4693,
|
||||||
|
"logloss": 1.0195
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"HT_OU05": {
|
||||||
|
"market": "HT_OU05",
|
||||||
|
"samples": 103641,
|
||||||
|
"train": 62184,
|
||||||
|
"val": 15546,
|
||||||
|
"cal": 10364,
|
||||||
|
"test": 15547,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"eta": 0.02440515089624656,
|
||||||
|
"subsample": 0.7173767988211683,
|
||||||
|
"colsample_bytree": 0.5705266148307722,
|
||||||
|
"min_child_weight": 10,
|
||||||
|
"gamma": 0.00010295747493868653,
|
||||||
|
"reg_lambda": 0.00048367003442154754,
|
||||||
|
"reg_alpha": 0.00018303274057896783
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"learning_rate": 0.043477055106943,
|
||||||
|
"feature_fraction": 0.5704621124873813,
|
||||||
|
"bagging_fraction": 0.9208787923016158,
|
||||||
|
"bagging_freq": 1,
|
||||||
|
"min_child_samples": 50,
|
||||||
|
"lambda_l1": 0.015064619068942013,
|
||||||
|
"lambda_l2": 6.143857495033091e-07
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 315,
|
||||||
|
"lgb_best_iteration": 133,
|
||||||
|
"xgb_optuna_best_logloss": 0.5756,
|
||||||
|
"lgb_optuna_best_logloss": 0.5757,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.7021,
|
||||||
|
"logloss": 0.5949
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.7011,
|
||||||
|
"logloss": 0.5976
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.7009,
|
||||||
|
"logloss": 0.5954
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.7019,
|
||||||
|
"logloss": 0.6002
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.7012,
|
||||||
|
"logloss": 0.5947
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.7016,
|
||||||
|
"logloss": 0.5994
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"HT_OU15": {
|
||||||
|
"market": "HT_OU15",
|
||||||
|
"samples": 103641,
|
||||||
|
"train": 62184,
|
||||||
|
"val": 15546,
|
||||||
|
"cal": 10364,
|
||||||
|
"test": 15547,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"eta": 0.032235943414662994,
|
||||||
|
"subsample": 0.9298749893021518,
|
||||||
|
"colsample_bytree": 0.8077813949235508,
|
||||||
|
"min_child_weight": 8,
|
||||||
|
"gamma": 0.00020929324388600622,
|
||||||
|
"reg_lambda": 3.2154973975232725e-05,
|
||||||
|
"reg_alpha": 1.5945155621686738e-08
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 5,
|
||||||
|
"learning_rate": 0.013909897616748226,
|
||||||
|
"feature_fraction": 0.5585477334219859,
|
||||||
|
"bagging_fraction": 0.9398770580467641,
|
||||||
|
"bagging_freq": 2,
|
||||||
|
"min_child_samples": 22,
|
||||||
|
"lambda_l1": 0.001865897980802303,
|
||||||
|
"lambda_l2": 2.6934572591055333e-06
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 188,
|
||||||
|
"lgb_best_iteration": 387,
|
||||||
|
"xgb_optuna_best_logloss": 0.616,
|
||||||
|
"lgb_optuna_best_logloss": 0.6159,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.6749,
|
||||||
|
"logloss": 0.6109
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.6747,
|
||||||
|
"logloss": 0.6137
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.6745,
|
||||||
|
"logloss": 0.6112
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.6745,
|
||||||
|
"logloss": 0.6201
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.674,
|
||||||
|
"logloss": 0.6109
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.6744,
|
||||||
|
"logloss": 0.6174
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"HTFT": {
|
||||||
|
"market": "HTFT",
|
||||||
|
"samples": 103641,
|
||||||
|
"train": 62184,
|
||||||
|
"val": 15546,
|
||||||
|
"cal": 10364,
|
||||||
|
"test": 15547,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"eta": 0.015239309183459821,
|
||||||
|
"subsample": 0.7923828997985648,
|
||||||
|
"colsample_bytree": 0.686316507387916,
|
||||||
|
"min_child_weight": 6,
|
||||||
|
"gamma": 0.005249577944740401,
|
||||||
|
"reg_lambda": 2.1813455810361064e-08,
|
||||||
|
"reg_alpha": 3.454483107951557e-06
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"learning_rate": 0.010347899501864056,
|
||||||
|
"feature_fraction": 0.9585697341293057,
|
||||||
|
"bagging_fraction": 0.9413628962257758,
|
||||||
|
"bagging_freq": 2,
|
||||||
|
"min_child_samples": 36,
|
||||||
|
"lambda_l1": 0.0015332771659626943,
|
||||||
|
"lambda_l2": 7.3640280079715765
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 714,
|
||||||
|
"lgb_best_iteration": 602,
|
||||||
|
"xgb_optuna_best_logloss": 1.7863,
|
||||||
|
"lgb_optuna_best_logloss": 1.7862,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.3349,
|
||||||
|
"logloss": 1.8179
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.3332,
|
||||||
|
"logloss": 1.824
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.3367,
|
||||||
|
"logloss": 1.8187
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.335,
|
||||||
|
"logloss": 1.8338
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.3363,
|
||||||
|
"logloss": 1.8176
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.3338,
|
||||||
|
"logloss": 1.828
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ODD_EVEN": {
|
||||||
|
"market": "ODD_EVEN",
|
||||||
|
"samples": 106861,
|
||||||
|
"train": 64116,
|
||||||
|
"val": 16029,
|
||||||
|
"cal": 10686,
|
||||||
|
"test": 16030,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 8,
|
||||||
|
"eta": 0.01010929937405026,
|
||||||
|
"subsample": 0.9492996501687384,
|
||||||
|
"colsample_bytree": 0.9061960005014683,
|
||||||
|
"min_child_weight": 7,
|
||||||
|
"gamma": 2.664416507237002e-08,
|
||||||
|
"reg_lambda": 0.0003748192960525308,
|
||||||
|
"reg_alpha": 0.005287068300306146
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 8,
|
||||||
|
"learning_rate": 0.0634879805509945,
|
||||||
|
"feature_fraction": 0.9993568368122896,
|
||||||
|
"bagging_fraction": 0.9246236397710591,
|
||||||
|
"bagging_freq": 3,
|
||||||
|
"min_child_samples": 16,
|
||||||
|
"lambda_l1": 0.0016414429853061781,
|
||||||
|
"lambda_l2": 6.112007631403553e-05
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 322,
|
||||||
|
"lgb_best_iteration": 55,
|
||||||
|
"xgb_optuna_best_logloss": 0.6777,
|
||||||
|
"lgb_optuna_best_logloss": 0.6762,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.5216,
|
||||||
|
"logloss": 0.684
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.5236,
|
||||||
|
"logloss": 0.6834
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.5279,
|
||||||
|
"logloss": 0.6826
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.5274,
|
||||||
|
"logloss": 0.6861
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.5239,
|
||||||
|
"logloss": 0.6828
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.5236,
|
||||||
|
"logloss": 0.6861
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"CARDS_OU45": {
|
||||||
|
"market": "CARDS_OU45",
|
||||||
|
"samples": 106861,
|
||||||
|
"train": 64116,
|
||||||
|
"val": 16029,
|
||||||
|
"cal": 10686,
|
||||||
|
"test": 16030,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 8,
|
||||||
|
"eta": 0.010098671964329344,
|
||||||
|
"subsample": 0.9969616653360747,
|
||||||
|
"colsample_bytree": 0.5085930751344795,
|
||||||
|
"min_child_weight": 10,
|
||||||
|
"gamma": 0.8600893137103568,
|
||||||
|
"reg_lambda": 7.556243125116086,
|
||||||
|
"reg_alpha": 0.5596869360839299
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 8,
|
||||||
|
"learning_rate": 0.0183440412249233,
|
||||||
|
"feature_fraction": 0.5416111323291537,
|
||||||
|
"bagging_fraction": 0.9754210612419695,
|
||||||
|
"bagging_freq": 2,
|
||||||
|
"min_child_samples": 5,
|
||||||
|
"lambda_l1": 0.09157782079463243,
|
||||||
|
"lambda_l2": 2.559000594641019
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 973,
|
||||||
|
"lgb_best_iteration": 503,
|
||||||
|
"xgb_optuna_best_logloss": 0.6408,
|
||||||
|
"lgb_optuna_best_logloss": 0.6407,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.597,
|
||||||
|
"logloss": 0.6501
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.6019,
|
||||||
|
"logloss": 0.6471
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.5977,
|
||||||
|
"logloss": 0.6486
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.6019,
|
||||||
|
"logloss": 0.6498
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.5964,
|
||||||
|
"logloss": 0.6487
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.6034,
|
||||||
|
"logloss": 0.6467
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"HANDICAP_MS": {
|
||||||
|
"market": "HANDICAP_MS",
|
||||||
|
"samples": 106861,
|
||||||
|
"train": 64116,
|
||||||
|
"val": 16029,
|
||||||
|
"cal": 10686,
|
||||||
|
"test": 16030,
|
||||||
|
"features_used": 114,
|
||||||
|
"xgb_best_params": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"eta": 0.01475719431584365,
|
||||||
|
"subsample": 0.867899230696633,
|
||||||
|
"colsample_bytree": 0.6518567347674479,
|
||||||
|
"min_child_weight": 9,
|
||||||
|
"gamma": 0.34932767754310273,
|
||||||
|
"reg_lambda": 3.3257801082201637e-07,
|
||||||
|
"reg_alpha": 4.6977721450875555e-06
|
||||||
|
},
|
||||||
|
"lgb_best_params": {
|
||||||
|
"max_depth": 7,
|
||||||
|
"learning_rate": 0.019649745228555244,
|
||||||
|
"feature_fraction": 0.7903699430858344,
|
||||||
|
"bagging_fraction": 0.7932436899357213,
|
||||||
|
"bagging_freq": 3,
|
||||||
|
"min_child_samples": 30,
|
||||||
|
"lambda_l1": 9.496143774926949e-08,
|
||||||
|
"lambda_l2": 0.0049885051588706136
|
||||||
|
},
|
||||||
|
"xgb_best_iteration": 1016,
|
||||||
|
"lgb_best_iteration": 364,
|
||||||
|
"xgb_optuna_best_logloss": 0.8328,
|
||||||
|
"lgb_optuna_best_logloss": 0.8322,
|
||||||
|
"test_xgb_raw": {
|
||||||
|
"accuracy": 0.6062,
|
||||||
|
"logloss": 0.871
|
||||||
|
},
|
||||||
|
"test_xgb_calibrated": {
|
||||||
|
"accuracy": 0.6039,
|
||||||
|
"logloss": 0.8729
|
||||||
|
},
|
||||||
|
"test_lgb_raw": {
|
||||||
|
"accuracy": 0.6079,
|
||||||
|
"logloss": 0.8713
|
||||||
|
},
|
||||||
|
"test_lgb_calibrated": {
|
||||||
|
"accuracy": 0.6067,
|
||||||
|
"logloss": 0.8736
|
||||||
|
},
|
||||||
|
"test_ensemble_raw": {
|
||||||
|
"accuracy": 0.6072,
|
||||||
|
"logloss": 0.8707
|
||||||
|
},
|
||||||
|
"test_ensemble_calibrated": {
|
||||||
|
"accuracy": 0.6066,
|
||||||
|
"logloss": 0.8728
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -14,6 +14,7 @@ import json
|
|||||||
import csv
|
import csv
|
||||||
import math
|
import math
|
||||||
import time
|
import time
|
||||||
|
import bisect
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
@@ -119,6 +120,14 @@ FEATURE_COLS = [
|
|||||||
"home_key_players", "away_key_players",
|
"home_key_players", "away_key_players",
|
||||||
"home_missing_impact", "away_missing_impact",
|
"home_missing_impact", "away_missing_impact",
|
||||||
"home_goals_form", "away_goals_form",
|
"home_goals_form", "away_goals_form",
|
||||||
|
|
||||||
|
# Player-Level Features (12)
|
||||||
|
"home_lineup_goals_per90", "away_lineup_goals_per90",
|
||||||
|
"home_lineup_assists_per90", "away_lineup_assists_per90",
|
||||||
|
"home_squad_continuity", "away_squad_continuity",
|
||||||
|
"home_top_scorer_form", "away_top_scorer_form",
|
||||||
|
"home_avg_player_exp", "away_avg_player_exp",
|
||||||
|
"home_goals_diversity", "away_goals_diversity",
|
||||||
|
|
||||||
# Labels
|
# Labels
|
||||||
"score_home", "score_away", "total_goals",
|
"score_home", "score_away", "total_goals",
|
||||||
@@ -336,7 +345,7 @@ class BatchDataLoader:
|
|||||||
self.team_stats[tid].append((mst, poss, sot, tshots, corn, team_goals))
|
self.team_stats[tid].append((mst, poss, sot, tshots, corn, team_goals))
|
||||||
|
|
||||||
def _load_squad_data(self):
|
def _load_squad_data(self):
|
||||||
"""Bulk load squad participation + player events for squad features."""
|
"""Bulk load squad participation + player events + player career for squad features."""
|
||||||
ph = ",".join(["%s"] * len(self.top_league_ids))
|
ph = ",".join(["%s"] * len(self.top_league_ids))
|
||||||
|
|
||||||
# 1) Participation: starting XI count + position distribution per (match, team)
|
# 1) Participation: starting XI count + position distribution per (match, team)
|
||||||
@@ -429,9 +438,90 @@ class BatchDataLoader:
|
|||||||
for m in self.matches:
|
for m in self.matches:
|
||||||
match_mst[m[0]] = m[7] # m[0]=id, m[7]=mst_utc
|
match_mst[m[0]] = m[7] # m[0]=id, m[7]=mst_utc
|
||||||
|
|
||||||
# 6) Build combined cache — NO DATA LEAKAGE
|
# ─── NEW: Player Career Stats (prefix-sum for O(1) temporal lookup) ───
|
||||||
# goals_form: avg goals from last 5 matches BEFORE this match (not this match!)
|
# 6a) Goals per player per match date
|
||||||
# squad_quality: only uses pre-match info (lineup, key players) — no current-match goals/assists
|
self.cur.execute(f"""
|
||||||
|
SELECT mpe.player_id, m.mst_utc,
|
||||||
|
SUM(CASE WHEN mpe.event_type = 'goal'
|
||||||
|
AND COALESCE(mpe.event_subtype, '') NOT ILIKE '%%penaltı kaçırma%%'
|
||||||
|
THEN 1 ELSE 0 END) AS goals
|
||||||
|
FROM match_player_events mpe
|
||||||
|
JOIN matches m ON mpe.match_id = m.id
|
||||||
|
WHERE m.status = 'FT' AND m.sport = 'football' AND m.league_id IN ({ph})
|
||||||
|
GROUP BY mpe.player_id, m.mst_utc
|
||||||
|
""", self.top_league_ids)
|
||||||
|
|
||||||
|
player_goals_raw = defaultdict(dict)
|
||||||
|
for pid, mst, goals in self.cur.fetchall():
|
||||||
|
player_goals_raw[pid][mst] = (player_goals_raw[pid].get(mst, 0)) + (goals or 0)
|
||||||
|
|
||||||
|
# 6b) Assists per player per match date
|
||||||
|
self.cur.execute(f"""
|
||||||
|
SELECT mpe.assist_player_id, m.mst_utc, COUNT(*) AS assists
|
||||||
|
FROM match_player_events mpe
|
||||||
|
JOIN matches m ON mpe.match_id = m.id
|
||||||
|
WHERE m.status = 'FT' AND m.sport = 'football' AND m.league_id IN ({ph})
|
||||||
|
AND mpe.event_type = 'goal' AND mpe.assist_player_id IS NOT NULL
|
||||||
|
GROUP BY mpe.assist_player_id, m.mst_utc
|
||||||
|
""", self.top_league_ids)
|
||||||
|
|
||||||
|
player_assists_raw = defaultdict(dict)
|
||||||
|
for pid, mst, assists in self.cur.fetchall():
|
||||||
|
player_assists_raw[pid][mst] = (player_assists_raw[pid].get(mst, 0)) + (assists or 0)
|
||||||
|
|
||||||
|
# 6c) Player participation dates (starts only)
|
||||||
|
self.cur.execute(f"""
|
||||||
|
SELECT mpp.player_id, m.mst_utc
|
||||||
|
FROM match_player_participation mpp
|
||||||
|
JOIN matches m ON mpp.match_id = m.id
|
||||||
|
WHERE mpp.is_starting = true
|
||||||
|
AND m.status = 'FT' AND m.sport = 'football' AND m.league_id IN ({ph})
|
||||||
|
ORDER BY mpp.player_id, m.mst_utc
|
||||||
|
""", self.top_league_ids)
|
||||||
|
|
||||||
|
player_starts_raw = defaultdict(list)
|
||||||
|
for pid, mst in self.cur.fetchall():
|
||||||
|
player_starts_raw[pid].append(mst)
|
||||||
|
|
||||||
|
# 6d) Build prefix sums per player (goals_prefix[i] = total goals up to start i)
|
||||||
|
player_career = {}
|
||||||
|
all_pids = set(player_starts_raw.keys()) | set(player_goals_raw.keys()) | set(player_assists_raw.keys())
|
||||||
|
for pid in all_pids:
|
||||||
|
starts = sorted(set(player_starts_raw.get(pid, [])))
|
||||||
|
if not starts:
|
||||||
|
continue
|
||||||
|
g_map = player_goals_raw.get(pid, {})
|
||||||
|
a_map = player_assists_raw.get(pid, {})
|
||||||
|
cum_g, cum_a = 0, 0
|
||||||
|
goals_pf, assists_pf = [], []
|
||||||
|
for mst in starts:
|
||||||
|
cum_g += g_map.get(mst, 0)
|
||||||
|
cum_a += a_map.get(mst, 0)
|
||||||
|
goals_pf.append(cum_g)
|
||||||
|
assists_pf.append(cum_a)
|
||||||
|
player_career[pid] = {'msts': starts, 'gp': goals_pf, 'ap': assists_pf}
|
||||||
|
|
||||||
|
# Free raw dicts
|
||||||
|
del player_goals_raw, player_assists_raw, player_starts_raw
|
||||||
|
print(f" 📊 Player careers built: {len(player_career)} players", flush=True)
|
||||||
|
|
||||||
|
# ─── NEW: Team Lineup History (for squad continuity) ───
|
||||||
|
# 7) Per-team sorted lineups: [(mst, frozenset(player_ids))]
|
||||||
|
team_lineup_map = defaultdict(list)
|
||||||
|
for (mid, tid), pids in starting_players.items():
|
||||||
|
mst = match_mst.get(mid, 0)
|
||||||
|
if mst > 0 and pids:
|
||||||
|
team_lineup_map[tid].append((mst, frozenset(pids)))
|
||||||
|
|
||||||
|
team_lineup_history = {}
|
||||||
|
team_lineup_msts = {}
|
||||||
|
for tid, ll in team_lineup_map.items():
|
||||||
|
ll.sort(key=lambda x: x[0])
|
||||||
|
team_lineup_history[tid] = ll
|
||||||
|
team_lineup_msts[tid] = [x[0] for x in ll]
|
||||||
|
del team_lineup_map
|
||||||
|
|
||||||
|
# ─── 8) Build combined cache — NO DATA LEAKAGE ───
|
||||||
all_keys = set(participation.keys()) | set(events.keys())
|
all_keys = set(participation.keys()) | set(events.keys())
|
||||||
for key in all_keys:
|
for key in all_keys:
|
||||||
mid, tid = key
|
mid, tid = key
|
||||||
@@ -443,30 +533,78 @@ class BatchDataLoader:
|
|||||||
kp_total = len(key_players_by_team.get(tid, set()))
|
kp_total = len(key_players_by_team.get(tid, set()))
|
||||||
kp_missing = max(0, kp_total - kp_in_starting)
|
kp_missing = max(0, kp_total - kp_in_starting)
|
||||||
|
|
||||||
# Squad quality: composite score — ONLY pre-match info (no current-match goals/assists!)
|
# Squad quality: composite score — ONLY pre-match info
|
||||||
squad_quality = (
|
squad_quality = (
|
||||||
part['starting_count'] * 0.3 +
|
part['starting_count'] * 0.3 +
|
||||||
kp_in_starting * 3.0 +
|
kp_in_starting * 3.0 +
|
||||||
part['fwd_count'] * 1.5
|
part['fwd_count'] * 1.5
|
||||||
)
|
)
|
||||||
# Missing impact: how many key players are missing
|
|
||||||
missing_impact = min(kp_missing / max(kp_total, 1), 1.0)
|
missing_impact = min(kp_missing / max(kp_total, 1), 1.0)
|
||||||
|
|
||||||
# goals_form: avg goals from last 5 matches BEFORE this match
|
# goals_form: avg goals from last 5 matches BEFORE this match
|
||||||
current_mst = match_mst.get(mid, 0)
|
current_mst = match_mst.get(mid, 0)
|
||||||
team_history = self.team_matches.get(tid, [])
|
team_history = self.team_matches.get(tid, [])
|
||||||
recent_goals = [
|
recent_goals = [
|
||||||
tm[2] # team_score
|
tm[2] for tm in team_history if tm[0] < current_mst
|
||||||
for tm in team_history
|
][-5:]
|
||||||
if tm[0] < current_mst # only matches BEFORE this one
|
|
||||||
][-5:] # last 5
|
|
||||||
goals_form = sum(recent_goals) / len(recent_goals) if recent_goals else 1.3
|
goals_form = sum(recent_goals) / len(recent_goals) if recent_goals else 1.3
|
||||||
|
|
||||||
|
# ─── NEW: Player-level aggregation for starting XI ───
|
||||||
|
lineup_g90, lineup_a90, total_exp = 0.0, 0.0, 0
|
||||||
|
best_scorer_total, best_scorer_id = 0, None
|
||||||
|
scorers_in_lineup = 0
|
||||||
|
|
||||||
|
for pid in starters:
|
||||||
|
pc = player_career.get(pid)
|
||||||
|
if not pc:
|
||||||
|
continue
|
||||||
|
idx = bisect.bisect_left(pc['msts'], current_mst)
|
||||||
|
if idx == 0:
|
||||||
|
continue # no prior matches for this player
|
||||||
|
prior_starts = idx
|
||||||
|
prior_goals = pc['gp'][idx - 1]
|
||||||
|
prior_assists = pc['ap'][idx - 1]
|
||||||
|
lineup_g90 += prior_goals / prior_starts
|
||||||
|
lineup_a90 += prior_assists / prior_starts
|
||||||
|
total_exp += prior_starts
|
||||||
|
if prior_goals > 0:
|
||||||
|
scorers_in_lineup += 1
|
||||||
|
if prior_goals > best_scorer_total:
|
||||||
|
best_scorer_total = prior_goals
|
||||||
|
best_scorer_id = pid
|
||||||
|
|
||||||
|
n_st = len(starters) or 1
|
||||||
|
|
||||||
|
# Top scorer recent form (goals in last 5 starts)
|
||||||
|
top_scorer_form = 0
|
||||||
|
if best_scorer_id:
|
||||||
|
pc = player_career.get(best_scorer_id)
|
||||||
|
if pc:
|
||||||
|
idx = bisect.bisect_left(pc['msts'], current_mst)
|
||||||
|
if idx > 0:
|
||||||
|
s5 = max(0, idx - 5)
|
||||||
|
top_scorer_form = pc['gp'][idx - 1] - (pc['gp'][s5 - 1] if s5 > 0 else 0)
|
||||||
|
|
||||||
|
# Squad continuity (overlap with previous match lineup)
|
||||||
|
squad_continuity = 0.5
|
||||||
|
msts_list = team_lineup_msts.get(tid)
|
||||||
|
if msts_list:
|
||||||
|
li = bisect.bisect_left(msts_list, current_mst)
|
||||||
|
if li > 0:
|
||||||
|
prev_lineup = team_lineup_history[tid][li - 1][1]
|
||||||
|
squad_continuity = len(frozenset(starters) & prev_lineup) / n_st
|
||||||
|
|
||||||
self.squad_cache[key] = {
|
self.squad_cache[key] = {
|
||||||
'squad_quality': squad_quality,
|
'squad_quality': squad_quality,
|
||||||
'key_players': kp_in_starting,
|
'key_players': kp_in_starting,
|
||||||
'missing_impact': missing_impact,
|
'missing_impact': missing_impact,
|
||||||
'goals_form': round(goals_form, 2),
|
'goals_form': round(goals_form, 2),
|
||||||
|
'lineup_goals_per90': round(lineup_g90, 3),
|
||||||
|
'lineup_assists_per90': round(lineup_a90, 3),
|
||||||
|
'squad_continuity': round(squad_continuity, 3),
|
||||||
|
'top_scorer_form': top_scorer_form,
|
||||||
|
'avg_player_exp': round(total_exp / n_st, 1),
|
||||||
|
'goals_diversity': round(scorers_in_lineup / n_st, 3),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _load_cards_data(self):
|
def _load_cards_data(self):
|
||||||
@@ -855,6 +993,20 @@ class FeatureExtractor:
|
|||||||
"away_missing_impact": away_missing_impact,
|
"away_missing_impact": away_missing_impact,
|
||||||
"home_goals_form": home_goals_form,
|
"home_goals_form": home_goals_form,
|
||||||
"away_goals_form": away_goals_form,
|
"away_goals_form": away_goals_form,
|
||||||
|
|
||||||
|
# Player-Level Features
|
||||||
|
"home_lineup_goals_per90": home_sq.get('lineup_goals_per90', 0.0),
|
||||||
|
"away_lineup_goals_per90": away_sq.get('lineup_goals_per90', 0.0),
|
||||||
|
"home_lineup_assists_per90": home_sq.get('lineup_assists_per90', 0.0),
|
||||||
|
"away_lineup_assists_per90": away_sq.get('lineup_assists_per90', 0.0),
|
||||||
|
"home_squad_continuity": home_sq.get('squad_continuity', 0.5),
|
||||||
|
"away_squad_continuity": away_sq.get('squad_continuity', 0.5),
|
||||||
|
"home_top_scorer_form": home_sq.get('top_scorer_form', 0),
|
||||||
|
"away_top_scorer_form": away_sq.get('top_scorer_form', 0),
|
||||||
|
"home_avg_player_exp": home_sq.get('avg_player_exp', 0.0),
|
||||||
|
"away_avg_player_exp": away_sq.get('avg_player_exp', 0.0),
|
||||||
|
"home_goals_diversity": home_sq.get('goals_diversity', 0.0),
|
||||||
|
"away_goals_diversity": away_sq.get('goals_diversity', 0.0),
|
||||||
|
|
||||||
# Labels
|
# Labels
|
||||||
"score_home": sh,
|
"score_home": sh,
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ import optuna
|
|||||||
from optuna.samplers import TPESampler
|
from optuna.samplers import TPESampler
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from sklearn.metrics import accuracy_score, log_loss, classification_report
|
from sklearn.metrics import accuracy_score, log_loss, classification_report
|
||||||
from sklearn.calibration import CalibratedClassifierCV
|
from sklearn.isotonic import IsotonicRegression
|
||||||
from sklearn.base import BaseEstimator, ClassifierMixin
|
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||||
|
|
||||||
optuna.logging.set_verbosity(optuna.logging.WARNING)
|
optuna.logging.set_verbosity(optuna.logging.WARNING)
|
||||||
@@ -38,7 +38,7 @@ REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports", "training_v25")
|
|||||||
os.makedirs(MODELS_DIR, exist_ok=True)
|
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||||
os.makedirs(REPORTS_DIR, exist_ok=True)
|
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||||
|
|
||||||
# ─── Feature Columns (83 features, NO target leakage) ───────────────
|
# ─── Feature Columns (95 features, NO target leakage) ───────────────
|
||||||
FEATURES = [
|
FEATURES = [
|
||||||
# ELO (8)
|
# ELO (8)
|
||||||
"home_overall_elo", "away_overall_elo", "elo_diff",
|
"home_overall_elo", "away_overall_elo", "elo_diff",
|
||||||
@@ -94,6 +94,13 @@ FEATURES = [
|
|||||||
"home_key_players", "away_key_players",
|
"home_key_players", "away_key_players",
|
||||||
"home_missing_impact", "away_missing_impact",
|
"home_missing_impact", "away_missing_impact",
|
||||||
"home_goals_form", "away_goals_form",
|
"home_goals_form", "away_goals_form",
|
||||||
|
# Player-Level Features (12)
|
||||||
|
"home_lineup_goals_per90", "away_lineup_goals_per90",
|
||||||
|
"home_lineup_assists_per90", "away_lineup_assists_per90",
|
||||||
|
"home_squad_continuity", "away_squad_continuity",
|
||||||
|
"home_top_scorer_form", "away_top_scorer_form",
|
||||||
|
"home_avg_player_exp", "away_avg_player_exp",
|
||||||
|
"home_goals_diversity", "away_goals_diversity",
|
||||||
]
|
]
|
||||||
|
|
||||||
MARKET_CONFIGS = [
|
MARKET_CONFIGS = [
|
||||||
@@ -349,18 +356,34 @@ def train_market(df, target_col, market_name, num_class, n_trials):
|
|||||||
print(f"[OK] LGB final: iter={lgb_model.best_iteration}")
|
print(f"[OK] LGB final: iter={lgb_model.best_iteration}")
|
||||||
|
|
||||||
# ── Phase 4: Isotonic Calibration on cal set ─────────────────
|
# ── Phase 4: Isotonic Calibration on cal set ─────────────────
|
||||||
print("[CAL] Fitting Isotonic Regression...")
|
print("[CAL] Fitting Isotonic Regression (per-class)...")
|
||||||
|
|
||||||
# XGB calibration
|
# XGB calibration — manual IsotonicRegression per class
|
||||||
xgb_wrapper = XGBWrapper(xgb_params, num_boost_round=xgb_model.best_iteration)
|
dcal = xgb.DMatrix(X_cal)
|
||||||
xgb_calibrated = CalibratedClassifierCV(xgb_wrapper, method="isotonic", cv="prefit")
|
xgb_cal_raw = xgb_model.predict(dcal)
|
||||||
xgb_wrapper.fit(X_train, y_train)
|
if len(xgb_cal_raw.shape) == 1:
|
||||||
xgb_calibrated.fit(X_cal, y_cal)
|
xgb_cal_raw = np.column_stack([1 - xgb_cal_raw, xgb_cal_raw])
|
||||||
|
|
||||||
# LGB calibration — use raw predictions approach
|
xgb_iso_calibrators = []
|
||||||
lgb_cal_preds = lgb_model.predict(X_cal, num_iteration=lgb_model.best_iteration)
|
for cls_idx in range(num_class):
|
||||||
if len(lgb_cal_preds.shape) == 1:
|
ir = IsotonicRegression(out_of_bounds="clip")
|
||||||
lgb_cal_preds = np.column_stack([1 - lgb_cal_preds, lgb_cal_preds])
|
y_binary = (y_cal == cls_idx).astype(float)
|
||||||
|
ir.fit(xgb_cal_raw[:, cls_idx], y_binary)
|
||||||
|
xgb_iso_calibrators.append(ir)
|
||||||
|
print(f"[OK] XGB Isotonic calibrators fitted: {num_class} classes")
|
||||||
|
|
||||||
|
# LGB calibration — manual IsotonicRegression per class
|
||||||
|
lgb_cal_raw = lgb_model.predict(X_cal, num_iteration=lgb_model.best_iteration)
|
||||||
|
if len(lgb_cal_raw.shape) == 1:
|
||||||
|
lgb_cal_raw = np.column_stack([1 - lgb_cal_raw, lgb_cal_raw])
|
||||||
|
|
||||||
|
lgb_iso_calibrators = []
|
||||||
|
for cls_idx in range(num_class):
|
||||||
|
ir = IsotonicRegression(out_of_bounds="clip")
|
||||||
|
y_binary = (y_cal == cls_idx).astype(float)
|
||||||
|
ir.fit(lgb_cal_raw[:, cls_idx], y_binary)
|
||||||
|
lgb_iso_calibrators.append(ir)
|
||||||
|
print(f"[OK] LGB Isotonic calibrators fitted: {num_class} classes")
|
||||||
|
|
||||||
# ── Phase 5: Evaluate on test set ────────────────────────────
|
# ── Phase 5: Evaluate on test set ────────────────────────────
|
||||||
print("\n[EVAL] Test set evaluation...")
|
print("\n[EVAL] Test set evaluation...")
|
||||||
@@ -371,16 +394,26 @@ def train_market(df, target_col, market_name, num_class, n_trials):
|
|||||||
if len(xgb_raw_probs.shape) == 1:
|
if len(xgb_raw_probs.shape) == 1:
|
||||||
xgb_raw_probs = np.column_stack([1 - xgb_raw_probs, xgb_raw_probs])
|
xgb_raw_probs = np.column_stack([1 - xgb_raw_probs, xgb_raw_probs])
|
||||||
|
|
||||||
# Calibrated XGB
|
# Calibrated XGB — apply isotonic per class + renormalize
|
||||||
xgb_cal_probs = xgb_calibrated.predict_proba(X_test)
|
xgb_cal_probs = np.column_stack([
|
||||||
|
xgb_iso_calibrators[i].predict(xgb_raw_probs[:, i]) for i in range(num_class)
|
||||||
|
])
|
||||||
|
xgb_cal_probs = xgb_cal_probs / xgb_cal_probs.sum(axis=1, keepdims=True)
|
||||||
|
|
||||||
# Raw LGB
|
# Raw LGB
|
||||||
lgb_raw_probs = lgb_model.predict(X_test, num_iteration=lgb_model.best_iteration)
|
lgb_raw_probs = lgb_model.predict(X_test, num_iteration=lgb_model.best_iteration)
|
||||||
if len(lgb_raw_probs.shape) == 1:
|
if len(lgb_raw_probs.shape) == 1:
|
||||||
lgb_raw_probs = np.column_stack([1 - lgb_raw_probs, lgb_raw_probs])
|
lgb_raw_probs = np.column_stack([1 - lgb_raw_probs, lgb_raw_probs])
|
||||||
|
|
||||||
# Ensemble (raw)
|
# Calibrated LGB — apply isotonic per class + renormalize
|
||||||
|
lgb_cal_probs = np.column_stack([
|
||||||
|
lgb_iso_calibrators[i].predict(lgb_raw_probs[:, i]) for i in range(num_class)
|
||||||
|
])
|
||||||
|
lgb_cal_probs = lgb_cal_probs / lgb_cal_probs.sum(axis=1, keepdims=True)
|
||||||
|
|
||||||
|
# Ensembles
|
||||||
raw_ensemble = (xgb_raw_probs + lgb_raw_probs) / 2
|
raw_ensemble = (xgb_raw_probs + lgb_raw_probs) / 2
|
||||||
|
cal_ensemble = (xgb_cal_probs + lgb_cal_probs) / 2
|
||||||
|
|
||||||
def _eval(probs, label):
|
def _eval(probs, label):
|
||||||
preds = np.argmax(probs, axis=1)
|
preds = np.argmax(probs, axis=1)
|
||||||
@@ -392,7 +425,9 @@ def train_market(df, target_col, market_name, num_class, n_trials):
|
|||||||
m_xgb_raw = _eval(xgb_raw_probs, "XGB Raw")
|
m_xgb_raw = _eval(xgb_raw_probs, "XGB Raw")
|
||||||
m_xgb_cal = _eval(xgb_cal_probs, "XGB Calibrated")
|
m_xgb_cal = _eval(xgb_cal_probs, "XGB Calibrated")
|
||||||
m_lgb_raw = _eval(lgb_raw_probs, "LGB Raw")
|
m_lgb_raw = _eval(lgb_raw_probs, "LGB Raw")
|
||||||
|
m_lgb_cal = _eval(lgb_cal_probs, "LGB Calibrated")
|
||||||
m_ensemble = _eval(raw_ensemble, "Ensemble Raw")
|
m_ensemble = _eval(raw_ensemble, "Ensemble Raw")
|
||||||
|
m_cal_ensemble = _eval(cal_ensemble, "Ensemble Calibrated")
|
||||||
|
|
||||||
# Classification report for ensemble
|
# Classification report for ensemble
|
||||||
ens_preds = np.argmax(raw_ensemble, axis=1)
|
ens_preds = np.argmax(raw_ensemble, axis=1)
|
||||||
@@ -409,11 +444,16 @@ def train_market(df, target_col, market_name, num_class, n_trials):
|
|||||||
lgb_model.save_model(lgb_path)
|
lgb_model.save_model(lgb_path)
|
||||||
print(f"[SAVE] {lgb_path}")
|
print(f"[SAVE] {lgb_path}")
|
||||||
|
|
||||||
# Calibrated model
|
# Isotonic calibrators (XGB + LGB)
|
||||||
cal_path = os.path.join(MODELS_DIR, f"cal_xgb_v25_{market_name.lower()}.pkl")
|
xgb_cal_path = os.path.join(MODELS_DIR, f"iso_xgb_v25_{market_name.lower()}.pkl")
|
||||||
with open(cal_path, "wb") as f:
|
with open(xgb_cal_path, "wb") as f:
|
||||||
pickle.dump(xgb_calibrated, f)
|
pickle.dump(xgb_iso_calibrators, f)
|
||||||
print(f"[SAVE] {cal_path}")
|
print(f"[SAVE] {xgb_cal_path}")
|
||||||
|
|
||||||
|
lgb_cal_path = os.path.join(MODELS_DIR, f"iso_lgb_v25_{market_name.lower()}.pkl")
|
||||||
|
with open(lgb_cal_path, "wb") as f:
|
||||||
|
pickle.dump(lgb_iso_calibrators, f)
|
||||||
|
print(f"[SAVE] {lgb_cal_path}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"market": market_name,
|
"market": market_name,
|
||||||
@@ -432,7 +472,9 @@ def train_market(df, target_col, market_name, num_class, n_trials):
|
|||||||
"test_xgb_raw": m_xgb_raw,
|
"test_xgb_raw": m_xgb_raw,
|
||||||
"test_xgb_calibrated": m_xgb_cal,
|
"test_xgb_calibrated": m_xgb_cal,
|
||||||
"test_lgb_raw": m_lgb_raw,
|
"test_lgb_raw": m_lgb_raw,
|
||||||
|
"test_lgb_calibrated": m_lgb_cal,
|
||||||
"test_ensemble_raw": m_ensemble,
|
"test_ensemble_raw": m_ensemble,
|
||||||
|
"test_ensemble_calibrated": m_cal_ensemble,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -495,8 +537,12 @@ def main():
|
|||||||
print("[SUMMARY]")
|
print("[SUMMARY]")
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
for name, m in all_metrics["markets"].items():
|
for name, m in all_metrics["markets"].items():
|
||||||
ens = m.get("test_ensemble_raw", {})
|
ens = m.get("test_ensemble_calibrated", m.get("test_ensemble_raw", {}))
|
||||||
print(f" {name:12s} | Acc={ens.get('accuracy','?'):>6s} | LL={ens.get('logloss','?'):>6s} | "
|
acc = ens.get('accuracy', '?')
|
||||||
|
ll = ens.get('logloss', '?')
|
||||||
|
acc_s = f"{acc:.4f}" if isinstance(acc, float) else str(acc)
|
||||||
|
ll_s = f"{ll:.4f}" if isinstance(ll, float) else str(ll)
|
||||||
|
print(f" {name:12s} | Acc={acc_s:>6s} | LL={ll_s:>6s} | "
|
||||||
f"XGB_iter={m.get('xgb_best_iteration','?')} LGB_iter={m.get('lgb_best_iteration','?')}")
|
f"XGB_iter={m.get('xgb_best_iteration','?')} LGB_iter={m.get('lgb_best_iteration','?')}")
|
||||||
|
|
||||||
print(f"\n[INFO] Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
print(f"\n[INFO] Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
|||||||
Reference in New Issue
Block a user