diff --git a/ai-engine/models/calibration.py b/ai-engine/models/calibration.py index 2ffba17..21c9c1b 100644 --- a/ai-engine/models/calibration.py +++ b/ai-engine/models/calibration.py @@ -149,10 +149,15 @@ class Calibrator: except Exception as e: print(f"[Calibrator] Warning: Failed to load metrics for {market}: {e}") - # Below this sample count, blend isotonic with raw_prob to dampen overfit jumps. - # Above this count, trust isotonic fully. - TRUSTED_SAMPLE_FLOOR = 30 - TRUSTED_SAMPLE_CEILING = 200 + # Below this sample count, the isotonic model is treated as untrained + # (raw_prob is returned). Between MIN and FLOOR we ramp from 0 to ~15% + # trust. Between FLOOR and CEILING we ramp to full trust. + # Rationale: 12-sample calibrators are statistical noise; even 30% + # blending on them propagates the noise into the confidence value the + # betting_brain reads downstream. + HARD_MIN_SAMPLES = 50 + TRUSTED_SAMPLE_FLOOR = 100 + TRUSTED_SAMPLE_CEILING = 400 # Hard cap on how far calibration can move probability in either direction. MAX_DELTA = 0.20 @@ -198,15 +203,21 @@ class Calibrator: # Sparse models barely move probability; mature models dominate. metrics = self.metrics.get(market_key) n_samples = metrics.sample_count if metrics else 0 + if n_samples < self.HARD_MIN_SAMPLES: + # Below 50 samples isotonic fit is unreliable — bypass it + # entirely and return raw_prob. The heuristic shrinkage + # below would still apply a model-version multiplier elsewhere. + return float(np.clip(raw_prob, 0.01, 0.99)) if n_samples >= self.TRUSTED_SAMPLE_CEILING: iso_weight = 1.0 elif n_samples <= self.TRUSTED_SAMPLE_FLOOR: - # Very sparse: at least 30% trust to surface the signal - iso_weight = max(0.30, n_samples / self.TRUSTED_SAMPLE_CEILING) + # Linear ramp from 0% at HARD_MIN_SAMPLES to ~25% at FLOOR + span = self.TRUSTED_SAMPLE_FLOOR - self.HARD_MIN_SAMPLES + iso_weight = 0.25 * (n_samples - self.HARD_MIN_SAMPLES) / span else: - # Linearly ramp 30% → 100% between floor and ceiling + # Linearly ramp 25% → 100% between floor and ceiling span = self.TRUSTED_SAMPLE_CEILING - self.TRUSTED_SAMPLE_FLOOR - iso_weight = 0.30 + 0.70 * (n_samples - self.TRUSTED_SAMPLE_FLOOR) / span + iso_weight = 0.25 + 0.75 * (n_samples - self.TRUSTED_SAMPLE_FLOOR) / span blended = iso_weight * iso_pred + (1.0 - iso_weight) * raw_prob # Cap delta to avoid huge swings on noisy calibrators diff --git a/package.json b/package.json index 53c4797..c6a3ded 100755 --- a/package.json +++ b/package.json @@ -32,6 +32,8 @@ "postman:export": "ts-node -r tsconfig-paths/register src/scripts/export-postman-collection.ts", "predictions:backfill": "ts-node --transpile-only -r tsconfig-paths/register src/scripts/backfill-prediction-runs.ts", "predictions:report": "ts-node --transpile-only -r tsconfig-paths/register src/scripts/print-backtest-report.ts", + "features:enrich": "ts-node --transpile-only -r tsconfig-paths/register src/scripts/run-feature-enrichment.ts", + "features:enrich:heavy": "python ai-engine/scripts/enrich_ai_features.py", "ai:extract:v26": "python3 ai-engine/scripts/extract_training_data_v26.py", "ai:train:v26": "python3 ai-engine/scripts/train_v26_shadow.py", "ai:backtest:v26": "python3 ai-engine/scripts/backtest_v26_shadow.py", diff --git a/prisma.config.ts b/prisma.config.ts index 3368937..d89ada8 100644 --- a/prisma.config.ts +++ b/prisma.config.ts @@ -2,7 +2,8 @@ import path from "node:path"; import { defineConfig, env } from "@prisma/config"; import { config } from "dotenv"; -config({ path: ".env.local" }); +config({ path: ".env" }); +config({ path: ".env.local", override: true }); export default defineConfig({ schema: path.join("prisma", "schema.prisma"), diff --git a/src/scripts/run-feature-enrichment.ts b/src/scripts/run-feature-enrichment.ts new file mode 100644 index 0000000..49ba86d --- /dev/null +++ b/src/scripts/run-feature-enrichment.ts @@ -0,0 +1,47 @@ +/** + * One-shot runner for FeatureEnrichmentTask. + * + * Usage: + * npx ts-node -r tsconfig-paths/register src/scripts/run-feature-enrichment.ts + * + * Backfills football_ai_features rows for all FT football matches in the last + * 60 days that lack one. Safe to re-run — idempotent via ON CONFLICT DO NOTHING. + */ + +import { NestFactory } from "@nestjs/core"; +import { Logger } from "@nestjs/common"; +import { Module } from "@nestjs/common"; +import { DatabaseModule } from "../database/database.module"; +import { FeatureEnrichmentTask } from "../tasks/feature-enrichment.task"; +import { TaskLockService } from "../tasks/task-lock.service"; + +@Module({ + imports: [DatabaseModule], + providers: [FeatureEnrichmentTask, TaskLockService], +}) +class FeatureEnrichmentRunnerModule {} + +async function main() { + const logger = new Logger("FeatureEnrichmentRunner"); + const app = await NestFactory.createApplicationContext( + FeatureEnrichmentRunnerModule, + { logger: ["log", "warn", "error"] }, + ); + + const task = app.get(FeatureEnrichmentTask); + logger.log("Starting one-shot feature enrichment backfill..."); + const started = Date.now(); + const result = await task.runEnrichment(); + const elapsed = Date.now() - started; + logger.log( + `Done in ${elapsed}ms. inserted=${result.inserted} repaired=${result.repaired} movement=${result.movementUpdated}`, + ); + + await app.close(); +} + +main().catch((err) => { + // eslint-disable-next-line no-console + console.error(err); + process.exit(1); +}); diff --git a/src/tasks/feature-enrichment.task.ts b/src/tasks/feature-enrichment.task.ts new file mode 100644 index 0000000..bb5bbf1 --- /dev/null +++ b/src/tasks/feature-enrichment.task.ts @@ -0,0 +1,178 @@ +import { Injectable, Logger } from "@nestjs/common"; +import { Cron } from "@nestjs/schedule"; +import { Prisma } from "@prisma/client"; +import { PrismaService } from "../database/prisma.service"; +import { TaskLockService } from "./task-lock.service"; + +/** + * Ensures every FT football match has a football_ai_features row so the + * Python ai-engine does not fall back to live inference (which triggers the + * `ai_features_inferred_from_history` -18 penalty in betting_brain). + * + * Heavy enrichment (h2h, referee, possession averages, etc.) is handled by the + * separate `ai-engine/scripts/enrich_ai_features.py` script; this task only + * guarantees row existence with ELO + form populated from authoritative + * sources. Default values for the rest are taken from the schema. + */ +@Injectable() +export class FeatureEnrichmentTask { + private readonly logger = new Logger(FeatureEnrichmentTask.name); + + constructor( + private readonly prisma: PrismaService, + private readonly taskLock: TaskLockService, + ) {} + + // Runs between historical-results-sync (08:00) and prediction-settlement (08:30) + @Cron("15 8 * * *", { timeZone: "Europe/Istanbul" }) + async ensureFeatureRows() { + if (process.env.FEEDER_MODE === "historical") { + this.logger.debug("Skipping feature enrichment in historical feeder mode"); + return; + } + await this.taskLock.runWithLease( + "ensureFeatureRows", + 60 * 60 * 1000, + () => this.runEnrichment(), + this.logger, + ); + } + + async runEnrichment(): Promise<{ inserted: number; repaired: number; movementUpdated: number }> { + // One-time reset: previous runs applied a non-idempotent 0-45→0-100 + // multiplier; some rows hit it twice and ended up over-amplified. Delete + // task_v1 rows so they get re-inserted from scratch by the next step. + // This block can be removed after one successful production run. + const resetRows = await this.prisma.$queryRaw<{ deleted: bigint }[]>(Prisma.sql` + WITH d AS ( + DELETE FROM football_ai_features + WHERE calculator_ver = 'feature_enrichment_task_v1' + RETURNING match_id + ) + SELECT COUNT(*)::bigint AS deleted FROM d + `); + const repaired = Number(resetRows[0]?.deleted ?? 0); + + const rows = await this.prisma.$queryRaw<{ inserted: bigint }[]>(Prisma.sql` + WITH form_calc AS ( + SELECT + team_id, + -- Normalize 0-45 raw score (30 win + 15 max goal-bonus per match) to 0-100 + LEAST(100.0, + (AVG(CASE + WHEN score_for > score_against THEN 30 + WHEN score_for = score_against THEN 10 + ELSE 0 + END) + LEAST(AVG(score_for) * 5, 15)) * (100.0 / 45.0) + ) AS form_score + FROM ( + SELECT m.home_team_id AS team_id, m.score_home AS score_for, + m.score_away AS score_against, true AS is_home, + ROW_NUMBER() OVER (PARTITION BY m.home_team_id ORDER BY m.mst_utc DESC) AS rn + FROM matches m + WHERE m.status='FT' AND m.score_home IS NOT NULL AND m.sport='football' + UNION ALL + SELECT m.away_team_id, m.score_away, m.score_home, false, + ROW_NUMBER() OVER (PARTITION BY m.away_team_id ORDER BY m.mst_utc DESC) + FROM matches m + WHERE m.status='FT' AND m.score_home IS NOT NULL AND m.sport='football' + ) recent + WHERE rn <= 5 + GROUP BY team_id + ), + missing AS ( + SELECT m.id, m.home_team_id, m.away_team_id + FROM matches m + LEFT JOIN football_ai_features f ON f.match_id = m.id + WHERE m.sport='football' + AND m.status='FT' + AND m.score_home IS NOT NULL + AND f.match_id IS NULL + AND to_timestamp(m.mst_utc/1000) > now() - interval '60 days' + LIMIT 5000 + ), + inserted AS ( + INSERT INTO football_ai_features ( + match_id, home_elo, away_elo, + home_form_score, away_form_score, + calculator_ver, updated_at + ) + SELECT + mi.id, + COALESCE(eh.overall_elo, 1500.0), + COALESCE(ea.overall_elo, 1500.0), + COALESCE(fh.form_score, 50.0), + COALESCE(fa.form_score, 50.0), + 'feature_enrichment_task_v1', + NOW() + FROM missing mi + LEFT JOIN team_elo_ratings eh ON eh.team_id = mi.home_team_id + LEFT JOIN team_elo_ratings ea ON ea.team_id = mi.away_team_id + LEFT JOIN form_calc fh ON fh.team_id = mi.home_team_id + LEFT JOIN form_calc fa ON fa.team_id = mi.away_team_id + ON CONFLICT (match_id) DO NOTHING + RETURNING match_id + ) + SELECT COUNT(*)::bigint AS inserted FROM inserted + `); + + const inserted = Number(rows[0]?.inserted ?? 0); + + // Step 3: backfill odds_movement_* columns from odd_selections.opening_value + // (settlement task's computeMovementForMatch relies on odds_history which is + // empty in production; bypass it by reading directly from odd_selections). + const movementRows = await this.prisma.$queryRaw<{ updated: bigint }[]>(Prisma.sql` + WITH movement AS ( + SELECT + oc.match_id, + MAX(CASE WHEN LOWER(oc.name) IN ('maç sonucu','mac sonucu','ms') AND os.name='1' + THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_home, + MAX(CASE WHEN LOWER(oc.name) IN ('maç sonucu','mac sonucu','ms') AND os.name IN ('X','0') + THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_draw, + MAX(CASE WHEN LOWER(oc.name) IN ('maç sonucu','mac sonucu','ms') AND os.name='2' + THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_away, + MAX(CASE WHEN (LOWER(oc.name) LIKE '%2,5%' OR LOWER(oc.name) LIKE '%2.5%') + AND (LOWER(os.name) LIKE '%üst%' OR LOWER(os.name) LIKE '%ust%' OR LOWER(os.name) LIKE '%over%') + THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_o25, + MAX(CASE WHEN (LOWER(oc.name) LIKE '%karşılıklı%' OR LOWER(oc.name) LIKE '%karsilikli%' OR LOWER(oc.name)='kg') + AND (LOWER(os.name) IN ('var','yes')) + THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_btts + FROM odd_selections os + JOIN odd_categories oc ON oc.db_id = os.odd_category_db_id + WHERE os.opening_value IS NOT NULL + AND os.odd_value IS NOT NULL + AND os.opening_value::float > 0 + GROUP BY oc.match_id + ), + upd AS ( + UPDATE football_ai_features f + SET odds_movement_home = m.mv_home, + odds_movement_draw = m.mv_draw, + odds_movement_away = m.mv_away, + odds_movement_o25 = m.mv_o25, + odds_movement_btts = m.mv_btts, + odds_sharpness = ( + COALESCE(ABS(m.mv_home),0) + COALESCE(ABS(m.mv_draw),0) + + COALESCE(ABS(m.mv_away),0) + COALESCE(ABS(m.mv_o25),0) + + COALESCE(ABS(m.mv_btts),0) + ) / NULLIF( + (CASE WHEN m.mv_home IS NOT NULL THEN 1 ELSE 0 END) + + (CASE WHEN m.mv_draw IS NOT NULL THEN 1 ELSE 0 END) + + (CASE WHEN m.mv_away IS NOT NULL THEN 1 ELSE 0 END) + + (CASE WHEN m.mv_o25 IS NOT NULL THEN 1 ELSE 0 END) + + (CASE WHEN m.mv_btts IS NOT NULL THEN 1 ELSE 0 END), 0) + FROM movement m + WHERE f.match_id = m.match_id + AND (f.odds_movement_home IS NULL OR f.odds_movement_home = 0) + RETURNING f.match_id + ) + SELECT COUNT(*)::bigint AS updated FROM upd + `); + const movementUpdated = Number(movementRows[0]?.updated ?? 0); + + this.logger.log( + `Feature enrichment finished: inserted=${inserted} repaired=${repaired} movement=${movementUpdated}`, + ); + return { inserted, repaired, movementUpdated }; + } +} diff --git a/src/tasks/prediction-settlement.market-resolver.ts b/src/tasks/prediction-settlement.market-resolver.ts index e4736a8..7adb6a3 100644 --- a/src/tasks/prediction-settlement.market-resolver.ts +++ b/src/tasks/prediction-settlement.market-resolver.ts @@ -82,9 +82,20 @@ const htft: Resolver = (pick, r) => { const doubleChance: Resolver = (pick, r) => { const ft = r.scoreHome > r.scoreAway ? "1" : r.scoreHome < r.scoreAway ? "2" : "X"; - const normalized = pick.replace(/\s/g, "").toUpperCase().split(/\/|-/); - if (normalized.length !== 2) return null; - return normalized.includes(ft); + const raw = pick.replace(/\s/g, "").toUpperCase(); + // Accept "1/X", "1-X" (split form) AND ayraçsız "1X", "X2", "12" (model emits ayraçsız). + let pair: string[] = raw.split(/\/|-/); + if (pair.length === 1) { + if (raw === "1X" || raw === "X2" || raw === "12") { + pair = raw.split(""); + } else if (raw === "X1" || raw === "2X" || raw === "21") { + pair = raw.split(""); + } else { + return null; + } + } + if (pair.length !== 2) return null; + return pair.includes(ft); }; const oddEven: Resolver = (pick, r) => { @@ -111,6 +122,9 @@ const resolvers: Record = { OU05_HT: overUnderHt(0.5), OU15_HT: overUnderHt(1.5), OU25_HT: overUnderHt(2.5), + HT_OU05: overUnderHt(0.5), + HT_OU15: overUnderHt(1.5), + HT_OU25: overUnderHt(2.5), BTTS: btts, KG: btts, HTFT: htft, diff --git a/src/tasks/python-enrichment.task.ts b/src/tasks/python-enrichment.task.ts new file mode 100644 index 0000000..94ac215 --- /dev/null +++ b/src/tasks/python-enrichment.task.ts @@ -0,0 +1,113 @@ +import { Injectable, Logger } from "@nestjs/common"; +import { Cron } from "@nestjs/schedule"; +import { spawn } from "child_process"; +import * as path from "path"; +import * as fs from "fs"; +import { TaskLockService } from "./task-lock.service"; + +/** + * Runs the Python `enrich_ai_features.py` script daily to populate the heavy + * fields (h2h, referee averages, possession, shot conversion, league averages, + * implied odds, etc.) in football_ai_features rows that were created by + * FeatureEnrichmentTask but still hold default/zero values. + * + * Scheduled at 08:25 IST — between FeatureEnrichmentTask (08:15, creates rows) + * and PredictionSettlementTask (08:30, reads features for movement calc). + */ +@Injectable() +export class PythonEnrichmentTask { + private readonly logger = new Logger(PythonEnrichmentTask.name); + + constructor(private readonly taskLock: TaskLockService) {} + + @Cron("25 8 * * *", { timeZone: "Europe/Istanbul" }) + async runHeavyEnrichment() { + if (process.env.FEEDER_MODE === "historical") { + this.logger.debug("Skipping python enrichment in historical feeder mode"); + return; + } + await this.taskLock.runWithLease( + "runHeavyEnrichment", + 4 * 60 * 60 * 1000, + () => this.invokeScript(), + this.logger, + ); + } + + private invokeScript(): Promise { + return new Promise((resolve, reject) => { + const aiEngineDir = path.resolve(__dirname, "../../ai-engine"); + const scriptPath = path.join(aiEngineDir, "scripts", "enrich_ai_features.py"); + if (!fs.existsSync(scriptPath)) { + this.logger.warn(`enrich_ai_features.py not found at ${scriptPath}`); + return resolve(); + } + + const python = this.resolvePythonExecutable(aiEngineDir); + this.logger.log(`Spawning: ${python} ${scriptPath} --batch-size 500`); + + const child = spawn(python, [scriptPath, "--batch-size", "500"], { + cwd: aiEngineDir, + env: { ...process.env, PYTHONIOENCODING: "utf-8" }, + }); + + const tail: string[] = []; + const pushTail = (line: string) => { + tail.push(line); + if (tail.length > 40) tail.shift(); + }; + + child.stdout.on("data", (buf) => { + const text = buf.toString("utf-8"); + for (const line of text.split(/\r?\n/)) { + if (line.trim()) pushTail(line); + } + }); + child.stderr.on("data", (buf) => { + const text = buf.toString("utf-8"); + for (const line of text.split(/\r?\n/)) { + if (line.trim()) pushTail(`[stderr] ${line}`); + } + }); + + child.on("error", (err) => { + this.logger.error(`enrich_ai_features.py spawn error: ${err.message}`); + reject(err); + }); + + child.on("close", (code) => { + const lastLines = tail.slice(-10).join("\n"); + if (code === 0) { + this.logger.log(`enrich_ai_features.py finished cleanly\n${lastLines}`); + resolve(); + } else { + this.logger.error( + `enrich_ai_features.py exited with code ${code}\n${lastLines}`, + ); + // Non-fatal — don't crash the cron, just log. + resolve(); + } + }); + }); + } + + private resolvePythonExecutable(aiEngineDir: string): string { + const candidates = + process.platform === "win32" + ? [ + path.join(aiEngineDir, "venv", "Scripts", "python.exe"), + path.join(aiEngineDir, ".venv", "Scripts", "python.exe"), + "python", + ] + : [ + path.join(aiEngineDir, "venv", "bin", "python3"), + path.join(aiEngineDir, "venv", "bin", "python"), + "python3", + "python", + ]; + for (const candidate of candidates) { + if (candidate.includes(path.sep) && fs.existsSync(candidate)) return candidate; + } + return candidates[candidates.length - 1]; + } +} diff --git a/src/tasks/tasks.module.ts b/src/tasks/tasks.module.ts index d522e16..95a08ee 100755 --- a/src/tasks/tasks.module.ts +++ b/src/tasks/tasks.module.ts @@ -1,9 +1,11 @@ import { Module } from "@nestjs/common"; import { HttpModule } from "@nestjs/axios"; import { DataFetcherTask } from "./data-fetcher.task"; +import { FeatureEnrichmentTask } from "./feature-enrichment.task"; import { HistoricalResultsSyncTask } from "./historical-results-sync.task"; import { LimitResetterTask } from "./limit-resetter.task"; import { PredictionSettlementTask } from "./prediction-settlement.task"; +import { PythonEnrichmentTask } from "./python-enrichment.task"; import { TaskLockService } from "./task-lock.service"; import { DatabaseModule } from "../database/database.module"; import { FeederModule } from "../modules/feeder/feeder.module"; @@ -23,15 +25,19 @@ import { FeederModule } from "../modules/feeder/feeder.module"; providers: [ TaskLockService, DataFetcherTask, + FeatureEnrichmentTask, HistoricalResultsSyncTask, LimitResetterTask, PredictionSettlementTask, + PythonEnrichmentTask, ], exports: [ DataFetcherTask, + FeatureEnrichmentTask, HistoricalResultsSyncTask, LimitResetterTask, PredictionSettlementTask, + PythonEnrichmentTask, ], }) export class TasksModule {}