gg2

2026-05-24 17:29:31 +03:00
parent 920ae7ce38
commit fa48f87f53
8 changed files with 384 additions and 12 deletions
@@ -149,10 +149,15 @@ class Calibrator:
                except Exception as e:
                    print(f"[Calibrator] Warning: Failed to load metrics for {market}: {e}")
-    # Below this sample count, blend isotonic with raw_prob to dampen overfit jumps.
+    # Below this sample count, the isotonic model is treated as untrained
-    # Above this count, trust isotonic fully.
+    # (raw_prob is returned). Between MIN and FLOOR we ramp from 0 to ~15%
-    TRUSTED_SAMPLE_FLOOR = 30
+    # trust. Between FLOOR and CEILING we ramp to full trust.
-    TRUSTED_SAMPLE_CEILING = 200
+    # Rationale: 12-sample calibrators are statistical noise; even 30%
    # blending on them propagates the noise into the confidence value the
    # betting_brain reads downstream.
    HARD_MIN_SAMPLES = 50
    TRUSTED_SAMPLE_FLOOR = 100
    TRUSTED_SAMPLE_CEILING = 400
    # Hard cap on how far calibration can move probability in either direction.
    MAX_DELTA = 0.20
@@ -198,15 +203,21 @@ class Calibrator:
                # Sparse models barely move probability; mature models dominate.
                metrics = self.metrics.get(market_key)
                n_samples = metrics.sample_count if metrics else 0
                if n_samples < self.HARD_MIN_SAMPLES:
                    # Below 50 samples isotonic fit is unreliable — bypass it
                    # entirely and return raw_prob. The heuristic shrinkage
                    # below would still apply a model-version multiplier elsewhere.
                    return float(np.clip(raw_prob, 0.01, 0.99))
                if n_samples >= self.TRUSTED_SAMPLE_CEILING:
                    iso_weight = 1.0
                elif n_samples <= self.TRUSTED_SAMPLE_FLOOR:
-                    # Very sparse: at least 30% trust to surface the signal
+                    # Linear ramp from 0% at HARD_MIN_SAMPLES to ~25% at FLOOR
-                    iso_weight = max(0.30, n_samples / self.TRUSTED_SAMPLE_CEILING)
+                    span = self.TRUSTED_SAMPLE_FLOOR - self.HARD_MIN_SAMPLES
                    iso_weight = 0.25 * (n_samples - self.HARD_MIN_SAMPLES) / span
                else:
-                    # Linearly ramp 30% → 100% between floor and ceiling
+                    # Linearly ramp 25% → 100% between floor and ceiling
                    span = self.TRUSTED_SAMPLE_CEILING - self.TRUSTED_SAMPLE_FLOOR
-                    iso_weight = 0.30 + 0.70 * (n_samples - self.TRUSTED_SAMPLE_FLOOR) / span
+                    iso_weight = 0.25 + 0.75 * (n_samples - self.TRUSTED_SAMPLE_FLOOR) / span
                blended = iso_weight * iso_pred + (1.0 - iso_weight) * raw_prob
                # Cap delta to avoid huge swings on noisy calibrators
@@ -32,6 +32,8 @@
    "postman:export": "ts-node -r tsconfig-paths/register src/scripts/export-postman-collection.ts",
    "predictions:backfill": "ts-node --transpile-only -r tsconfig-paths/register src/scripts/backfill-prediction-runs.ts",
    "predictions:report": "ts-node --transpile-only -r tsconfig-paths/register src/scripts/print-backtest-report.ts",
    "features:enrich": "ts-node --transpile-only -r tsconfig-paths/register src/scripts/run-feature-enrichment.ts",
    "features:enrich:heavy": "python ai-engine/scripts/enrich_ai_features.py",
    "ai:extract:v26": "python3 ai-engine/scripts/extract_training_data_v26.py",
    "ai:train:v26": "python3 ai-engine/scripts/train_v26_shadow.py",
    "ai:backtest:v26": "python3 ai-engine/scripts/backtest_v26_shadow.py",
@@ -2,7 +2,8 @@ import path from "node:path";
 import { defineConfig, env } from "@prisma/config";
 import { config } from "dotenv";
-config({ path: ".env.local" });
+config({ path: ".env" });
 config({ path: ".env.local", override: true });
 export default defineConfig({
  schema: path.join("prisma", "schema.prisma"),
@@ -0,0 +1,47 @@
 /**
 * One-shot runner for FeatureEnrichmentTask.
 *
 * Usage:
 *   npx ts-node -r tsconfig-paths/register src/scripts/run-feature-enrichment.ts
 *
 * Backfills football_ai_features rows for all FT football matches in the last
 * 60 days that lack one. Safe to re-run — idempotent via ON CONFLICT DO NOTHING.
 */
 import { NestFactory } from "@nestjs/core";
 import { Logger } from "@nestjs/common";
 import { Module } from "@nestjs/common";
 import { DatabaseModule } from "../database/database.module";
 import { FeatureEnrichmentTask } from "../tasks/feature-enrichment.task";
 import { TaskLockService } from "../tasks/task-lock.service";
@Module({
  imports: [DatabaseModule],
  providers: [FeatureEnrichmentTask, TaskLockService],
 })
 class FeatureEnrichmentRunnerModule {}
 async function main() {
  const logger = new Logger("FeatureEnrichmentRunner");
  const app = await NestFactory.createApplicationContext(
    FeatureEnrichmentRunnerModule,
    { logger: ["log", "warn", "error"] },
  );
  const task = app.get(FeatureEnrichmentTask);
  logger.log("Starting one-shot feature enrichment backfill...");
  const started = Date.now();
  const result = await task.runEnrichment();
  const elapsed = Date.now() - started;
  logger.log(
    `Done in ${elapsed}ms. inserted=${result.inserted} repaired=${result.repaired} movement=${result.movementUpdated}`,
  );
  await app.close();
 }
 main().catch((err) => {
  // eslint-disable-next-line no-console
  console.error(err);
  process.exit(1);
 });
@@ -0,0 +1,178 @@
 import { Injectable, Logger } from "@nestjs/common";
 import { Cron } from "@nestjs/schedule";
 import { Prisma } from "@prisma/client";
 import { PrismaService } from "../database/prisma.service";
 import { TaskLockService } from "./task-lock.service";
 /**
 * Ensures every FT football match has a football_ai_features row so the
 * Python ai-engine does not fall back to live inference (which triggers the
 * `ai_features_inferred_from_history` -18 penalty in betting_brain).
 *
 * Heavy enrichment (h2h, referee, possession averages, etc.) is handled by the
 * separate `ai-engine/scripts/enrich_ai_features.py` script; this task only
 * guarantees row existence with ELO + form populated from authoritative
 * sources. Default values for the rest are taken from the schema.
 */
@Injectable()
 export class FeatureEnrichmentTask {
  private readonly logger = new Logger(FeatureEnrichmentTask.name);
  constructor(
    private readonly prisma: PrismaService,
    private readonly taskLock: TaskLockService,
  ) {}
  // Runs between historical-results-sync (08:00) and prediction-settlement (08:30)
  @Cron("15 8 * * *", { timeZone: "Europe/Istanbul" })
  async ensureFeatureRows() {
    if (process.env.FEEDER_MODE === "historical") {
      this.logger.debug("Skipping feature enrichment in historical feeder mode");
      return;
    }
    await this.taskLock.runWithLease(
      "ensureFeatureRows",
      60 * 60 * 1000,
      () => this.runEnrichment(),
      this.logger,
    );
  }
  async runEnrichment(): Promise<{ inserted: number; repaired: number; movementUpdated: number }> {
    // One-time reset: previous runs applied a non-idempotent 0-45→0-100
    // multiplier; some rows hit it twice and ended up over-amplified. Delete
    // task_v1 rows so they get re-inserted from scratch by the next step.
    // This block can be removed after one successful production run.
    const resetRows = await this.prisma.$queryRaw<{ deleted: bigint }[]>(Prisma.sql`
      WITH d AS (
        DELETE FROM football_ai_features
        WHERE calculator_ver = 'feature_enrichment_task_v1'
        RETURNING match_id
      )
      SELECT COUNT(*)::bigint AS deleted FROM d
    `);
    const repaired = Number(resetRows[0]?.deleted ?? 0);
    const rows = await this.prisma.$queryRaw<{ inserted: bigint }[]>(Prisma.sql`
      WITH form_calc AS (
        SELECT
          team_id,
          -- Normalize 0-45 raw score (30 win + 15 max goal-bonus per match) to 0-100
          LEAST(100.0,
            (AVG(CASE
              WHEN score_for > score_against THEN 30
              WHEN score_for = score_against THEN 10
              ELSE 0
            END) + LEAST(AVG(score_for) * 5, 15)) * (100.0 / 45.0)
          ) AS form_score
        FROM (
          SELECT m.home_team_id AS team_id, m.score_home AS score_for,
                 m.score_away AS score_against, true AS is_home,
                 ROW_NUMBER() OVER (PARTITION BY m.home_team_id ORDER BY m.mst_utc DESC) AS rn
          FROM matches m
          WHERE m.status='FT' AND m.score_home IS NOT NULL AND m.sport='football'
          UNION ALL
          SELECT m.away_team_id, m.score_away, m.score_home, false,
                 ROW_NUMBER() OVER (PARTITION BY m.away_team_id ORDER BY m.mst_utc DESC)
          FROM matches m
          WHERE m.status='FT' AND m.score_home IS NOT NULL AND m.sport='football'
        ) recent
        WHERE rn <= 5
        GROUP BY team_id
      ),
      missing AS (
        SELECT m.id, m.home_team_id, m.away_team_id
        FROM matches m
        LEFT JOIN football_ai_features f ON f.match_id = m.id
        WHERE m.sport='football'
          AND m.status='FT'
          AND m.score_home IS NOT NULL
          AND f.match_id IS NULL
          AND to_timestamp(m.mst_utc/1000) > now() - interval '60 days'
        LIMIT 5000
      ),
      inserted AS (
        INSERT INTO football_ai_features (
          match_id, home_elo, away_elo,
          home_form_score, away_form_score,
          calculator_ver, updated_at
        )
        SELECT
          mi.id,
          COALESCE(eh.overall_elo, 1500.0),
          COALESCE(ea.overall_elo, 1500.0),
          COALESCE(fh.form_score, 50.0),
          COALESCE(fa.form_score, 50.0),
          'feature_enrichment_task_v1',
          NOW()
        FROM missing mi
        LEFT JOIN team_elo_ratings eh ON eh.team_id = mi.home_team_id
        LEFT JOIN team_elo_ratings ea ON ea.team_id = mi.away_team_id
        LEFT JOIN form_calc fh ON fh.team_id = mi.home_team_id
        LEFT JOIN form_calc fa ON fa.team_id = mi.away_team_id
        ON CONFLICT (match_id) DO NOTHING
        RETURNING match_id
      )
      SELECT COUNT(*)::bigint AS inserted FROM inserted
    `);
    const inserted = Number(rows[0]?.inserted ?? 0);
    // Step 3: backfill odds_movement_* columns from odd_selections.opening_value
    // (settlement task's computeMovementForMatch relies on odds_history which is
    // empty in production; bypass it by reading directly from odd_selections).
    const movementRows = await this.prisma.$queryRaw<{ updated: bigint }[]>(Prisma.sql`
      WITH movement AS (
        SELECT
          oc.match_id,
          MAX(CASE WHEN LOWER(oc.name) IN ('maç sonucu','mac sonucu','ms') AND os.name='1'
                   THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_home,
          MAX(CASE WHEN LOWER(oc.name) IN ('maç sonucu','mac sonucu','ms') AND os.name IN ('X','0')
                   THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_draw,
          MAX(CASE WHEN LOWER(oc.name) IN ('maç sonucu','mac sonucu','ms') AND os.name='2'
                   THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_away,
          MAX(CASE WHEN (LOWER(oc.name) LIKE '%2,5%' OR LOWER(oc.name) LIKE '%2.5%')
                    AND (LOWER(os.name) LIKE '%üst%' OR LOWER(os.name) LIKE '%ust%' OR LOWER(os.name) LIKE '%over%')
                   THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_o25,
          MAX(CASE WHEN (LOWER(oc.name) LIKE '%karşılıklı%' OR LOWER(oc.name) LIKE '%karsilikli%' OR LOWER(oc.name)='kg')
                    AND (LOWER(os.name) IN ('var','yes'))
                   THEN (os.odd_value::float - os.opening_value::float) / NULLIF(os.opening_value::float,0) * 100 END) AS mv_btts
        FROM odd_selections os
        JOIN odd_categories oc ON oc.db_id = os.odd_category_db_id
        WHERE os.opening_value IS NOT NULL
          AND os.odd_value IS NOT NULL
          AND os.opening_value::float > 0
        GROUP BY oc.match_id
      ),
      upd AS (
        UPDATE football_ai_features f
        SET odds_movement_home = m.mv_home,
            odds_movement_draw = m.mv_draw,
            odds_movement_away = m.mv_away,
            odds_movement_o25 = m.mv_o25,
            odds_movement_btts = m.mv_btts,
            odds_sharpness = (
              COALESCE(ABS(m.mv_home),0) + COALESCE(ABS(m.mv_draw),0) +
              COALESCE(ABS(m.mv_away),0) + COALESCE(ABS(m.mv_o25),0) +
              COALESCE(ABS(m.mv_btts),0)
            ) / NULLIF(
              (CASE WHEN m.mv_home IS NOT NULL THEN 1 ELSE 0 END) +
              (CASE WHEN m.mv_draw IS NOT NULL THEN 1 ELSE 0 END) +
              (CASE WHEN m.mv_away IS NOT NULL THEN 1 ELSE 0 END) +
              (CASE WHEN m.mv_o25  IS NOT NULL THEN 1 ELSE 0 END) +
              (CASE WHEN m.mv_btts IS NOT NULL THEN 1 ELSE 0 END), 0)
        FROM movement m
        WHERE f.match_id = m.match_id
          AND (f.odds_movement_home IS NULL OR f.odds_movement_home = 0)
        RETURNING f.match_id
      )
      SELECT COUNT(*)::bigint AS updated FROM upd
    `);
    const movementUpdated = Number(movementRows[0]?.updated ?? 0);
    this.logger.log(
      `Feature enrichment finished: inserted=${inserted} repaired=${repaired} movement=${movementUpdated}`,
    );
    return { inserted, repaired, movementUpdated };
  }
 }
@@ -82,9 +82,20 @@ const htft: Resolver = (pick, r) => {
 const doubleChance: Resolver = (pick, r) => {
  const ft =
    r.scoreHome > r.scoreAway ? "1" : r.scoreHome < r.scoreAway ? "2" : "X";
-  const normalized = pick.replace(/\s/g, "").toUpperCase().split(/\/|-/);
+  const raw = pick.replace(/\s/g, "").toUpperCase();
-  if (normalized.length !== 2) return null;
+  // Accept "1/X", "1-X" (split form) AND ayraçsız "1X", "X2", "12" (model emits ayraçsız).
-  return normalized.includes(ft);
+  let pair: string[] = raw.split(/\/|-/);
  if (pair.length === 1) {
    if (raw === "1X" || raw === "X2" || raw === "12") {
      pair = raw.split("");
    } else if (raw === "X1" || raw === "2X" || raw === "21") {
      pair = raw.split("");
    } else {
      return null;
    }
  }
  if (pair.length !== 2) return null;
  return pair.includes(ft);
 };
 const oddEven: Resolver = (pick, r) => {
@@ -111,6 +122,9 @@ const resolvers: Record<string, Resolver> = {
  OU05_HT: overUnderHt(0.5),
  OU15_HT: overUnderHt(1.5),
  OU25_HT: overUnderHt(2.5),
  HT_OU05: overUnderHt(0.5),
  HT_OU15: overUnderHt(1.5),
  HT_OU25: overUnderHt(2.5),
  BTTS: btts,
  KG: btts,
  HTFT: htft,
@@ -0,0 +1,113 @@
 import { Injectable, Logger } from "@nestjs/common";
 import { Cron } from "@nestjs/schedule";
 import { spawn } from "child_process";
 import * as path from "path";
 import * as fs from "fs";
 import { TaskLockService } from "./task-lock.service";
 /**
 * Runs the Python `enrich_ai_features.py` script daily to populate the heavy
 * fields (h2h, referee averages, possession, shot conversion, league averages,
 * implied odds, etc.) in football_ai_features rows that were created by
 * FeatureEnrichmentTask but still hold default/zero values.
 *
 * Scheduled at 08:25 IST — between FeatureEnrichmentTask (08:15, creates rows)
 * and PredictionSettlementTask (08:30, reads features for movement calc).
 */
@Injectable()
 export class PythonEnrichmentTask {
  private readonly logger = new Logger(PythonEnrichmentTask.name);
  constructor(private readonly taskLock: TaskLockService) {}
  @Cron("25 8 * * *", { timeZone: "Europe/Istanbul" })
  async runHeavyEnrichment() {
    if (process.env.FEEDER_MODE === "historical") {
      this.logger.debug("Skipping python enrichment in historical feeder mode");
      return;
    }
    await this.taskLock.runWithLease(
      "runHeavyEnrichment",
      4 * 60 * 60 * 1000,
      () => this.invokeScript(),
      this.logger,
    );
  }
  private invokeScript(): Promise<void> {
    return new Promise((resolve, reject) => {
      const aiEngineDir = path.resolve(__dirname, "../../ai-engine");
      const scriptPath = path.join(aiEngineDir, "scripts", "enrich_ai_features.py");
      if (!fs.existsSync(scriptPath)) {
        this.logger.warn(`enrich_ai_features.py not found at ${scriptPath}`);
        return resolve();
      }
      const python = this.resolvePythonExecutable(aiEngineDir);
      this.logger.log(`Spawning: ${python} ${scriptPath} --batch-size 500`);
      const child = spawn(python, [scriptPath, "--batch-size", "500"], {
        cwd: aiEngineDir,
        env: { ...process.env, PYTHONIOENCODING: "utf-8" },
      });
      const tail: string[] = [];
      const pushTail = (line: string) => {
        tail.push(line);
        if (tail.length > 40) tail.shift();
      };
      child.stdout.on("data", (buf) => {
        const text = buf.toString("utf-8");
        for (const line of text.split(/\r?\n/)) {
          if (line.trim()) pushTail(line);
        }
      });
      child.stderr.on("data", (buf) => {
        const text = buf.toString("utf-8");
        for (const line of text.split(/\r?\n/)) {
          if (line.trim()) pushTail(`[stderr] ${line}`);
        }
      });
      child.on("error", (err) => {
        this.logger.error(`enrich_ai_features.py spawn error: ${err.message}`);
        reject(err);
      });
      child.on("close", (code) => {
        const lastLines = tail.slice(-10).join("\n");
        if (code === 0) {
          this.logger.log(`enrich_ai_features.py finished cleanly\n${lastLines}`);
          resolve();
        } else {
          this.logger.error(
            `enrich_ai_features.py exited with code ${code}\n${lastLines}`,
          );
          // Non-fatal — don't crash the cron, just log.
          resolve();
        }
      });
    });
  }
  private resolvePythonExecutable(aiEngineDir: string): string {
    const candidates =
      process.platform === "win32"
        ? [
            path.join(aiEngineDir, "venv", "Scripts", "python.exe"),
            path.join(aiEngineDir, ".venv", "Scripts", "python.exe"),
            "python",
          ]
        : [
            path.join(aiEngineDir, "venv", "bin", "python3"),
            path.join(aiEngineDir, "venv", "bin", "python"),
            "python3",
            "python",
          ];
    for (const candidate of candidates) {
      if (candidate.includes(path.sep) && fs.existsSync(candidate)) return candidate;
    }
    return candidates[candidates.length - 1];
  }
 }
@@ -1,9 +1,11 @@
 import { Module } from "@nestjs/common";
 import { HttpModule } from "@nestjs/axios";
 import { DataFetcherTask } from "./data-fetcher.task";
 import { FeatureEnrichmentTask } from "./feature-enrichment.task";
 import { HistoricalResultsSyncTask } from "./historical-results-sync.task";
 import { LimitResetterTask } from "./limit-resetter.task";
 import { PredictionSettlementTask } from "./prediction-settlement.task";
 import { PythonEnrichmentTask } from "./python-enrichment.task";
 import { TaskLockService } from "./task-lock.service";
 import { DatabaseModule } from "../database/database.module";
 import { FeederModule } from "../modules/feeder/feeder.module";
@@ -23,15 +25,19 @@ import { FeederModule } from "../modules/feeder/feeder.module";
  providers: [
    TaskLockService,
    DataFetcherTask,
    FeatureEnrichmentTask,
    HistoricalResultsSyncTask,
    LimitResetterTask,
    PredictionSettlementTask,
    PythonEnrichmentTask,
  ],
  exports: [
    DataFetcherTask,
    FeatureEnrichmentTask,
    HistoricalResultsSyncTask,
    LimitResetterTask,
    PredictionSettlementTask,
    PythonEnrichmentTask,
  ],
 })
 export class TasksModule {}