Compare commits
20 Commits
7c65ffdd58
...
v26-shadow
| Author | SHA1 | Date | |
|---|---|---|---|
| 9e2edd590c | |||
| b5c2edf346 | |||
| bf7473c1e7 | |||
| 1f26a5bf2f | |||
| fb53fdf1df | |||
| 634204acf0 | |||
| df428ed1e8 | |||
| 2ccd6831eb | |||
| 1346924387 | |||
| e4c74025e5 | |||
| c8e7e4e927 | |||
| c8fa4c442d | |||
| 0f917695dd | |||
| 249c57346e | |||
| 182f4aae16 | |||
| 2f0b85a0c7 | |||
| 7814e0bc6b | |||
| b4173c10bb | |||
| 117a3c1f96 | |||
| 6b194314c4 |
@@ -0,0 +1,27 @@
|
|||||||
|
node_modules
|
||||||
|
dist
|
||||||
|
.git
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
*.backup
|
||||||
|
*.dump
|
||||||
|
ai-engine/
|
||||||
|
venv/
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
|
||||||
|
# IDE files
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Ignore test coverage and log files
|
||||||
|
coverage/
|
||||||
|
*.log
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
pnpm-debug.log*
|
||||||
|
|
||||||
|
# Uploads
|
||||||
|
uploads/
|
||||||
|
public/uploads/
|
||||||
+23
-27
@@ -2,38 +2,34 @@ name: Deploy Iddaai Backend
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches: [main]
|
||||||
- main
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
deploy:
|
build-and-deploy:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Code
|
- name: Kodu Cek
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Install sshpass
|
- name: Docker Build
|
||||||
run: sudo apt-get update && sudo apt-get install -y sshpass
|
run: docker build -t iddaai-be:latest .
|
||||||
|
|
||||||
- name: Deploy to Raspberry Pi via rsync
|
- name: Eski Konteyneri Sil
|
||||||
env:
|
run: docker rm -f iddaai-be || true
|
||||||
SERVER_PASSWORD: ${{ secrets.SERVER_PASSWORD }}
|
|
||||||
SERVER_IP: "95.70.252.214"
|
- name: Yeni Versiyonu Baslat
|
||||||
SERVER_PORT: "2222"
|
|
||||||
SERVER_USER: "haruncan"
|
|
||||||
run: |
|
run: |
|
||||||
export SSHPASS=$SERVER_PASSWORD
|
docker run -d \
|
||||||
sshpass -e rsync -avz --exclude node_modules --exclude .git --exclude dist --exclude .next --exclude .DS_Store --exclude venv --exclude 'ai-engine/venv' --exclude __pycache__ -e "ssh -o StrictHostKeyChecking=no -p $SERVER_PORT" ./ $SERVER_USER@$SERVER_IP:~/apps/iddaai/be/
|
--name iddaai-be \
|
||||||
|
--restart unless-stopped \
|
||||||
- name: Restart Backend Docker Service
|
--network iddaai_iddaai-network \
|
||||||
uses: appleboy/ssh-action@v1.0.3
|
-p 127.0.0.1:1810:3005 \
|
||||||
with:
|
-e NODE_ENV=production \
|
||||||
host: "95.70.252.214"
|
-e DATABASE_URL='postgresql://iddaai_user:IddaA1_S4crET!@iddaai-postgres:5432/iddaai_db?schema=public' \
|
||||||
port: "2222"
|
-e REDIS_HOST='iddaai-redis' \
|
||||||
username: "haruncan"
|
-e REDIS_PORT='6379' \
|
||||||
password: ${{ secrets.SERVER_PASSWORD }}
|
-e REDIS_PASSWORD='IddaA1_Redis_Pass!' \
|
||||||
script: |
|
-e AI_ENGINE_URL='http://iddaai-ai-engine:8000' \
|
||||||
cd ~/apps/iddaai
|
-e JWT_SECRET='b7V8jM2wP1L5mQxs2RdfFkAsLpI2oG!w' \
|
||||||
docker compose build backend
|
-e JWT_ACCESS_EXPIRATION='1d' \
|
||||||
docker compose up -d backend
|
iddaai-be:latest /bin/sh -c "npx prisma migrate deploy && node dist/src/main.js"
|
||||||
|
|||||||
+48
@@ -0,0 +1,48 @@
|
|||||||
|
# Node
|
||||||
|
node_modules/
|
||||||
|
dist/
|
||||||
|
dist-*/
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
pnpm-debug.log*
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
venv/
|
||||||
|
.venv/
|
||||||
|
env/
|
||||||
|
|
||||||
|
# Database / Docker Volumes
|
||||||
|
data/
|
||||||
|
postgres-data/
|
||||||
|
redis-data/
|
||||||
|
|
||||||
|
# OS / Editor
|
||||||
|
.DS_Store
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# Tests / Coverage
|
||||||
|
coverage/
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
logs/
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Uploads
|
||||||
|
uploads/
|
||||||
|
public/uploads/
|
||||||
|
|
||||||
|
# Large Datasets and ML Models
|
||||||
|
ai-engine/models/
|
||||||
|
models/
|
||||||
|
colab_export/
|
||||||
|
|
||||||
@@ -0,0 +1,322 @@
|
|||||||
|
# AGENTS.md - Coding Agent Guidelines
|
||||||
|
|
||||||
|
Bu dosya, bu repoda çalışan AI kodlama ajanları için rehberdir.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Build / Lint / Test Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Development
|
||||||
|
npm run start:dev # Dev server with watch mode
|
||||||
|
npm run build # Production build (nest build)
|
||||||
|
|
||||||
|
# Linting & Formatting
|
||||||
|
npm run lint # ESLint with Prettier
|
||||||
|
npm run format # Prettier write
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
npm run test # Run all unit tests
|
||||||
|
npm run test:watch # Watch mode
|
||||||
|
npm run test:e2e # End-to-end tests
|
||||||
|
npx jest src/path/to/file.spec.ts # Run single test file
|
||||||
|
npx jest --testNamePattern="test name" # Run specific test
|
||||||
|
|
||||||
|
# Database
|
||||||
|
npx prisma generate # Generate Prisma client (required after install)
|
||||||
|
npx prisma migrate dev # Run migrations
|
||||||
|
npx prisma db seed # Seed database
|
||||||
|
|
||||||
|
# Feeder Scripts
|
||||||
|
npm run feeder:historical # Historical data fetch
|
||||||
|
npm run feeder:live # Live match data fetch
|
||||||
|
npm run feeder:basketball # Basketball data fetch
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Code Style Guidelines
|
||||||
|
|
||||||
|
### Imports (Sıralama)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// 1. NestJS/common imports
|
||||||
|
import { Controller, Get, Post, Body } from '@nestjs/common';
|
||||||
|
import { ApiTags, ApiOperation } from '@nestjs/swagger';
|
||||||
|
|
||||||
|
// 2. External packages
|
||||||
|
import { plainToInstance } from 'class-transformer';
|
||||||
|
import * as bcrypt from 'bcrypt';
|
||||||
|
|
||||||
|
// 3. Local imports (relative)
|
||||||
|
import { UsersService } from './users.service';
|
||||||
|
import { CreateUserDto } from './dto/user.dto';
|
||||||
|
import { ApiResponse, createSuccessResponse } from '../../common/types';
|
||||||
|
```
|
||||||
|
|
||||||
|
### Formatting
|
||||||
|
|
||||||
|
- **Single quotes** for strings
|
||||||
|
- **Trailing commas** always
|
||||||
|
- Prettier ile formatlama zorunlu
|
||||||
|
- Dosya sonu boş satır
|
||||||
|
|
||||||
|
### Types & Type Safety
|
||||||
|
|
||||||
|
- `strictNullChecks: true` - null/undefined kontrolü zorunlu
|
||||||
|
- `noImplicitAny: false` - any kullanımına izin var (Prisma dynamic access için)
|
||||||
|
- Fonksiyon return type belirt: `async findOne(id: string): Promise<User>`
|
||||||
|
- Interface > Type alias (objeler için)
|
||||||
|
|
||||||
|
### Naming Conventions
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Classes & Interfaces: PascalCase
|
||||||
|
class UsersService {}
|
||||||
|
interface ApiResponse<T> {}
|
||||||
|
|
||||||
|
// Variables & Functions: camelCase
|
||||||
|
const userService = new UsersService();
|
||||||
|
async function findUserById() {}
|
||||||
|
|
||||||
|
// Constants: UPPER_SNAKE_CASE
|
||||||
|
const JWT_SECRET = 'secret';
|
||||||
|
const IS_PUBLIC_KEY = 'isPublic';
|
||||||
|
|
||||||
|
// Files: kebab-case
|
||||||
|
user.dto.ts;
|
||||||
|
users.service.ts;
|
||||||
|
predictions.processor.spec.ts;
|
||||||
|
|
||||||
|
// DTOs: Entity + Dto suffix
|
||||||
|
(CreateUserDto, UpdateUserDto, UserResponseDto);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. DTO Pattern
|
||||||
|
|
||||||
|
### Request DTOs
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
export class CreateUserDto {
|
||||||
|
@ApiPropertyOptional({ example: 'user@example.com' })
|
||||||
|
@IsEmail()
|
||||||
|
email: string;
|
||||||
|
|
||||||
|
@IsString()
|
||||||
|
@MinLength(8)
|
||||||
|
password: string;
|
||||||
|
|
||||||
|
@IsOptional()
|
||||||
|
@IsString()
|
||||||
|
firstName?: string;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Response DTOs (Security Critical)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
@Exclude()
|
||||||
|
export class UserResponseDto {
|
||||||
|
@Expose()
|
||||||
|
id: string;
|
||||||
|
|
||||||
|
@Expose()
|
||||||
|
email: string;
|
||||||
|
|
||||||
|
// passwordHash intentionally NOT exposed
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Controller Usage
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
@Get('me')
|
||||||
|
async getMe(@CurrentUser() user: User): Promise<ApiResponse<UserResponseDto>> {
|
||||||
|
const fullUser = await this.usersService.findOneWithDetails(user.id);
|
||||||
|
return createSuccessResponse(
|
||||||
|
plainToInstance(UserResponseDto, fullUser),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**KRITIK:** Asla raw Prisma entity döndürme. Her zaman Response DTO kullan.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Architecture Patterns
|
||||||
|
|
||||||
|
### Service Layer
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
@Injectable()
|
||||||
|
export class UsersService extends BaseService<
|
||||||
|
User,
|
||||||
|
CreateUserDto,
|
||||||
|
UpdateUserDto
|
||||||
|
> {
|
||||||
|
constructor(prisma: PrismaService) {
|
||||||
|
super(prisma, 'User');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom methods...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Controller Layer
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
@ApiTags('Users')
|
||||||
|
@ApiBearerAuth()
|
||||||
|
@Controller('users')
|
||||||
|
export class UsersController extends BaseController<
|
||||||
|
User,
|
||||||
|
CreateUserDto,
|
||||||
|
UpdateUserDto
|
||||||
|
> {
|
||||||
|
constructor(private readonly usersService: UsersService) {
|
||||||
|
super(usersService, 'User');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### API Response Format
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// All responses use this structure
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"status": 200,
|
||||||
|
"message": "Success",
|
||||||
|
"data": { ... },
|
||||||
|
"errors": []
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper functions
|
||||||
|
createSuccessResponse(data, 'Message')
|
||||||
|
createErrorResponse('Message', 400, ['error1'])
|
||||||
|
createPaginatedResponse(items, total, page, limit)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Error Handling
|
||||||
|
|
||||||
|
### Throw NestJS HTTP Exceptions
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Correct
|
||||||
|
throw new NotFoundException('User not found');
|
||||||
|
throw new ConflictException('EMAIL_ALREADY_EXISTS');
|
||||||
|
throw new UnauthorizedException('INVALID_CREDENTIALS');
|
||||||
|
|
||||||
|
// Wrong
|
||||||
|
throw new Error('User not found'); // Don't use generic Error
|
||||||
|
```
|
||||||
|
|
||||||
|
### i18n Error Keys
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Use translatable keys (check src/i18n/{lang}/errors.json)
|
||||||
|
throw new ConflictException('EMAIL_ALREADY_EXISTS');
|
||||||
|
// Translates to: "Email already exists" (en) / "Email zaten kayıtlı" (tr)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Global Exception Filter
|
||||||
|
|
||||||
|
- Tüm hatalar HTTP 200 ile döner (status body içinde)
|
||||||
|
- `NODE_ENV=development` ise stack trace eklenir
|
||||||
|
- Validation hataları otomatik formatlanır
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Testing
|
||||||
|
|
||||||
|
### Unit Test Structure
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { Test, TestingModule } from '@nestjs/testing';
|
||||||
|
|
||||||
|
describe('UsersService', () => {
|
||||||
|
let service: UsersService;
|
||||||
|
let prisma: PrismaService;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
const module: TestingModule = await Test.createTestingModule({
|
||||||
|
providers: [
|
||||||
|
UsersService,
|
||||||
|
{ provide: PrismaService, useValue: mockPrisma },
|
||||||
|
],
|
||||||
|
}).compile();
|
||||||
|
|
||||||
|
service = module.get<UsersService>(UsersService);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should find user by id', async () => {
|
||||||
|
// Arrange
|
||||||
|
mockPrisma.user.findUnique.mockResolvedValue(mockUser);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const result = await service.findOne('id');
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
expect(result).toEqual(mockUser);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Mocking External Dependencies
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
jest.mock('axios');
|
||||||
|
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
mockedAxios.post.mockResolvedValue({ data: { ok: true } });
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Module Registration
|
||||||
|
|
||||||
|
Redis-enabled modüller için `app.module.ts`:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const redisEnabled = process.env.REDIS_ENABLED === 'true';
|
||||||
|
|
||||||
|
@Module({
|
||||||
|
imports: [
|
||||||
|
...(redisEnabled ? [QueueModule, PredictionsModule] : []),
|
||||||
|
// ...
|
||||||
|
],
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Environment Variables
|
||||||
|
|
||||||
|
Zorunlu (`.env`):
|
||||||
|
|
||||||
|
```env
|
||||||
|
NODE_ENV=development
|
||||||
|
PORT=3005
|
||||||
|
DATABASE_URL=postgresql://postgres:password@localhost:15432/boilerplate_db
|
||||||
|
JWT_SECRET=your-secret-key
|
||||||
|
JWT_ACCESS_EXPIRATION=15m
|
||||||
|
REDIS_ENABLED=false
|
||||||
|
AI_ENGINE_URL=http://127.0.0.1:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. Pre-commit Checklist
|
||||||
|
|
||||||
|
1. `npm run lint` - Lint errors fixed
|
||||||
|
2. `npm run build` - Build succeeds
|
||||||
|
3. `npm run test` - All tests pass
|
||||||
|
4. Response DTOs used for all API responses
|
||||||
|
5. No secrets/credentials in code
|
||||||
+273
@@ -0,0 +1,273 @@
|
|||||||
|
# 🚀 Suggest-Bet-BE — Deployment Guide
|
||||||
|
|
||||||
|
> **Tarih:** 2026-04-03
|
||||||
|
> **Versiyon:** Sport Partition Release (Futbol/Basketbol Ayrımı)
|
||||||
|
> **Amaç:** Masaüstü veya sunucuya kurulum adımları
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔑 Şifreler ve Bağlantı Bilgileri
|
||||||
|
|
||||||
|
| Servis | Kullanıcı | Şifre | Host | Port |
|
||||||
|
|--------|-----------|-------|------|------|
|
||||||
|
| **PostgreSQL** | `suggestbet` | `SuGGesT2026SecuRe` | `localhost` | `15432` |
|
||||||
|
| **Redis** | — | `RedisSecure2026` | `localhost` | `6379` |
|
||||||
|
| **JWT Secret** | — | `9bfa42fbdc6031da6d7c0bd30e9f5b6378a071613d0c02acf95eb576249c3a25` | — | — |
|
||||||
|
|
||||||
|
**Database URL:**
|
||||||
|
```
|
||||||
|
postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db?schema=public
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📋 Gereksinimler
|
||||||
|
|
||||||
|
- **Node.js:** v20.19+
|
||||||
|
- **Docker + Docker Compose:** PostgreSQL + Redis için
|
||||||
|
- **npm:** Paket yöneticisi
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔧 Adım Adım Kurulum
|
||||||
|
|
||||||
|
### Adım 1: Kodu Çek
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/Documents/Suggest-Bet-BE
|
||||||
|
git pull origin main
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adım 2: .env Dosyasını Oluştur
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# /Users/piton/Documents/Suggest-Bet-BE/.env
|
||||||
|
NODE_ENV=development
|
||||||
|
PORT=3005
|
||||||
|
DATABASE_URL="postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db?schema=public"
|
||||||
|
JWT_SECRET=9bfa42fbdc6031da6d7c0bd30e9f5b6378a071613d0c02acf95eb576249c3a25
|
||||||
|
JWT_ACCESS_EXPIRATION=7d
|
||||||
|
JWT_REFRESH_EXPIRATION=7d
|
||||||
|
REDIS_ENABLED=true
|
||||||
|
REDIS_HOST=localhost
|
||||||
|
REDIS_PORT=6379
|
||||||
|
REDIS_PASSWORD=RedisSecure2026
|
||||||
|
DEFAULT_LANGUAGE=en
|
||||||
|
FALLBACK_LANGUAGE=en
|
||||||
|
ENABLE_MAIL=false
|
||||||
|
ENABLE_S3=false
|
||||||
|
ENABLE_WEBSOCKET=false
|
||||||
|
ENABLE_MULTI_TENANCY=false
|
||||||
|
THROTTLE_TTL=60000
|
||||||
|
THROTTLE_LIMIT=100
|
||||||
|
ENABLE_GEMINI=true
|
||||||
|
GOOGLE_API_KEY=your-google-api-key
|
||||||
|
GEMINI_MODEL=gemini-2.5-flash
|
||||||
|
AI_ENGINE_URL=http://127.0.0.1:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adım 3: Docker Infrastructure Başlat
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/Documents/Suggest-Bet-BE
|
||||||
|
docker compose up -d postgres redis
|
||||||
|
```
|
||||||
|
|
||||||
|
PostgreSQL'in hazır olduğunu kontrol et:
|
||||||
|
```bash
|
||||||
|
docker exec -i suggestbet-postgres pg_isready -U suggestbet
|
||||||
|
# Çıktı: /var/run/postgresql:5432 - accepting connections
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adım 4: Dump'u Restore Et
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Dump dosyasını container'a kopyala
|
||||||
|
docker cp /path/to/dump-boilerplate_db-202604020914-v5 suggestbet-postgres:/tmp/dump_file
|
||||||
|
|
||||||
|
# Restore et
|
||||||
|
export PGPASSWORD="SuGGesT2026SecuRe"
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" suggestbet-postgres pg_restore \
|
||||||
|
-U suggestbet -d boilerplate_db --clean --if-exists /tmp/dump_file
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adım 5: Sport Partition Migration'ını Çalıştır
|
||||||
|
|
||||||
|
**Sırayla çalıştır — her biri ayrı transaction:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export PGPASSWORD="SuGGesT2026SecuRe"
|
||||||
|
DB="suggestbet-postgres"
|
||||||
|
MIGRATION_DIR="prisma/migrations/20260403161000_sport_partition"
|
||||||
|
|
||||||
|
# 1. Yeni team stats tabloları oluştur
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i $DB psql -U suggestbet -d boilerplate_db < $MIGRATION_DIR/01_create_team_stats.sql
|
||||||
|
|
||||||
|
# 2. Team stats verilerini kopyala
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i $DB psql -U suggestbet -d boilerplate_db < $MIGRATION_DIR/02_copy_team_stats.sql
|
||||||
|
|
||||||
|
# 3. Yeni AI features tabloları oluştur
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i $DB psql -U suggestbet -d boilerplate_db < $MIGRATION_DIR/03_create_ai_features.sql
|
||||||
|
|
||||||
|
# 4. AI features verilerini kopyala
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i $DB psql -U suggestbet -d boilerplate_db < $MIGRATION_DIR/04_copy_ai_features.sql
|
||||||
|
|
||||||
|
# 5. match_player_stats → basketball_player_stats rename
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i $DB psql -U suggestbet -d boilerplate_db < $MIGRATION_DIR/05_rename_player_stats.sql
|
||||||
|
|
||||||
|
# 6. odd_categories + odd_selections'e sport kolonu ekle
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i $DB psql -U suggestbet -d boilerplate_db < $MIGRATION_DIR/06_add_sport_to_odds.sql
|
||||||
|
```
|
||||||
|
|
||||||
|
**odd_selections için batch update (14.8M satır — her çalıştır 1M günceller):**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Bunu "remaining = 0" olana kadar tekrar tekrar çalıştır
|
||||||
|
export PGPASSWORD="SuGGesT2026SecuRe"
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i suggestbet-postgres psql -U suggestbet -d boilerplate_db -c "
|
||||||
|
WITH t AS (
|
||||||
|
SELECT os.db_id, oc.sport
|
||||||
|
FROM odd_selections os
|
||||||
|
JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE os.sport IS NULL
|
||||||
|
LIMIT 1000000
|
||||||
|
)
|
||||||
|
UPDATE odd_selections SET sport = t.sport FROM t WHERE odd_selections.db_id = t.db_id;
|
||||||
|
|
||||||
|
SELECT COUNT(*) as remaining FROM odd_selections WHERE sport IS NULL;
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Kalan satırlar bitince index oluştur:**
|
||||||
|
```bash
|
||||||
|
export PGPASSWORD="SuGGesT2026SecuRe"
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i suggestbet-postgres psql -U suggestbet -d boilerplate_db -c "
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_odd_selections_sport ON odd_selections(sport) WHERE sport IS NOT NULL;
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adım 6: Bağımlılıkları Yükle + Prisma Generate
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/Documents/Suggest-Bet-BE
|
||||||
|
|
||||||
|
# Bağımlılıkları yükle
|
||||||
|
npm ci
|
||||||
|
|
||||||
|
# Prisma client oluştur
|
||||||
|
npx prisma generate
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adım 7: Build + Başlat
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build
|
||||||
|
npm run build
|
||||||
|
|
||||||
|
# Başlat
|
||||||
|
npm run start:prod
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adım 8: Doğrulama
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Sağlık kontrolü
|
||||||
|
curl http://localhost:3005/api/health
|
||||||
|
|
||||||
|
# Swagger UI
|
||||||
|
open http://localhost:3005/api/docs
|
||||||
|
|
||||||
|
# Yeni tabloları kontrol et
|
||||||
|
export PGPASSWORD="SuGGesT2026SecuRe"
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i suggestbet-postgres psql -U suggestbet -d boilerplate_db -c "
|
||||||
|
SELECT 'football_team_stats' as tbl, COUNT(*) FROM football_team_stats
|
||||||
|
UNION ALL SELECT 'basketball_team_stats', COUNT(*) FROM basketball_team_stats
|
||||||
|
UNION ALL SELECT 'basketball_player_stats', COUNT(*) FROM basketball_player_stats
|
||||||
|
UNION ALL SELECT 'odd_categories (sport)', COUNT(*) FROM odd_categories WHERE sport IS NOT NULL
|
||||||
|
UNION ALL SELECT 'odd_selections (sport)', COUNT(*) FROM odd_selections WHERE sport IS NOT NULL;
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🤖 AI Engine (Opsiyonel)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/Documents/Suggest-Bet-BE/ai-engine
|
||||||
|
|
||||||
|
# Bağımlılıklar
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Başlat
|
||||||
|
uvicorn main:app --host 0.0.0.0 --port 8000
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ✅ Tablo Durumu (Migration Sonrası)
|
||||||
|
|
||||||
|
| Tablo | Satır (~) | Durum |
|
||||||
|
|-------|-----------|-------|
|
||||||
|
| `football_team_stats` | 217,956 | ✅ Yeni |
|
||||||
|
| `basketball_team_stats` | 48,824 | ✅ Yeni |
|
||||||
|
| `basketball_player_stats` | 273,140 | ✅ Rename edildi |
|
||||||
|
| `football_ai_features` | 0 | ⚠️ Feeder dolduracak |
|
||||||
|
| `basketball_ai_features` | 0 | ⚠️ Feeder dolduracak |
|
||||||
|
| `odd_categories (sport)` | 2,695,511 | ✅ Güncellendi |
|
||||||
|
| `odd_selections (sport)` | 14,810,396 | ✅ Güncellendi |
|
||||||
|
| `match_team_stats` (ESKİ) | 266,780 | 🗑️ Silinebilir (yedek olarak kalsın) |
|
||||||
|
| `match_ai_features` (ESKİ) | 0 | 🗑️ Silinebilir |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🗑️ Eski Tabloları Silme (Opsiyonel)
|
||||||
|
|
||||||
|
**SADECE her şey çalıştığını doğruladıktan sonra:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export PGPASSWORD="SuGGesT2026SecuRe"
|
||||||
|
docker exec -e PGPASSWORD="$PGPASSWORD" -i suggestbet-postgres psql -U suggestbet -d boilerplate_db -c "
|
||||||
|
DROP TABLE IF EXISTS match_team_stats CASCADE;
|
||||||
|
DROP TABLE IF EXISTS match_ai_features CASCADE;
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔧 Sorun Giderme
|
||||||
|
|
||||||
|
### PostgreSQL başlamıyor (postmaster.pid hatası)
|
||||||
|
```bash
|
||||||
|
docker compose stop postgres
|
||||||
|
docker compose rm -f postgres
|
||||||
|
docker volume rm suggest-bet-be_pgml_data
|
||||||
|
docker compose up -d postgres
|
||||||
|
# Sonra dump + migration tekrar
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Desktop başlamıyor (disk dolu)
|
||||||
|
```bash
|
||||||
|
# Büyük dosyaları temizle
|
||||||
|
rm -rf ~/Library/Caches/Homebrew/*
|
||||||
|
rm -rf ~/.npm/_cacache
|
||||||
|
docker system prune -af
|
||||||
|
df -h / # En az 3-4GB boş olmalı
|
||||||
|
```
|
||||||
|
|
||||||
|
### Feeder çalışmıyor
|
||||||
|
```bash
|
||||||
|
# Logları kontrol et
|
||||||
|
tail -f logs/app.log # veya docker logs suggestbet-app
|
||||||
|
|
||||||
|
# Manuel feeder çalıştır
|
||||||
|
npm run feeder:live
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📝 Notlar
|
||||||
|
|
||||||
|
- **Veri kaybolmaz** — eski tablolar migration sonrası silinmez, yedek olarak kalır
|
||||||
|
- **Feeder** otomatik yeni tablolara yazar (`footballTeamStats`, `basketballTeamStats`, vb.)
|
||||||
|
- **Redis** opsiyonel — `REDIS_ENABLED=false` yapabilirsin (in-memory fallback)
|
||||||
|
- **Swagger** sadece development modunda aktif
|
||||||
Executable
+59
@@ -0,0 +1,59 @@
|
|||||||
|
# Build stage
|
||||||
|
FROM node:20-alpine AS builder
|
||||||
|
|
||||||
|
# Add build tools for native canvas compilation (fixes 16k page size issues on RPi5 ARM64)
|
||||||
|
RUN apk add --no-cache python3 make g++ cairo-dev pango-dev jpeg-dev giflib-dev librsvg-dev
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy package files
|
||||||
|
COPY package*.json ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN npm ci
|
||||||
|
|
||||||
|
# Copy source code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Generate Prisma client
|
||||||
|
RUN npx prisma generate
|
||||||
|
|
||||||
|
# Build the application
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Production stage
|
||||||
|
FROM node:20-alpine AS production
|
||||||
|
|
||||||
|
# Add runtime dependencies for canvas & prisma
|
||||||
|
RUN apk add --no-cache cairo pango jpeg giflib librsvg openssl
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy package files
|
||||||
|
COPY package*.json ./
|
||||||
|
|
||||||
|
# Install production dependencies only (with build tools for canvas)
|
||||||
|
RUN apk add --no-cache --virtual .build-deps python3 make g++ cairo-dev pango-dev jpeg-dev giflib-dev librsvg-dev \
|
||||||
|
&& npm ci --omit=dev --build-from-source=canvas \
|
||||||
|
&& apk del .build-deps
|
||||||
|
|
||||||
|
# Copy Prisma schema and generate client
|
||||||
|
COPY prisma ./prisma
|
||||||
|
RUN npx prisma generate
|
||||||
|
|
||||||
|
# Copy built application
|
||||||
|
COPY --from=builder /app/dist ./dist
|
||||||
|
|
||||||
|
# Copy i18n files
|
||||||
|
COPY --from=builder /app/src/i18n ./dist/i18n
|
||||||
|
|
||||||
|
# Copy league filter config files (critical: without these, feeder stores ALL matches)
|
||||||
|
COPY top_leagues.json basketball_top_leagues.json ./
|
||||||
|
|
||||||
|
# Set environment
|
||||||
|
ENV NODE_ENV=production
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 3000
|
||||||
|
|
||||||
|
# Start the application
|
||||||
|
CMD ["node", "dist/main.js"]
|
||||||
@@ -0,0 +1,517 @@
|
|||||||
|
# Suggest-Bet-BE — AI Agent Context
|
||||||
|
|
||||||
|
> **Last Updated:** 2026-04-06
|
||||||
|
> **Purpose:** Comprehensive project reference for AI agents working on this codebase.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Project Overview
|
||||||
|
|
||||||
|
**Suggest-Bet-BE** is an **AI-powered sports betting prediction platform** backend. It provides:
|
||||||
|
|
||||||
|
- AI-driven predictions for football & basketball matches
|
||||||
|
- Smart coupon generation (SAFE, BALANCED, AGGRESSIVE, VALUE, MIRACLE strategies)
|
||||||
|
- Live score tracking & odds monitoring
|
||||||
|
- Web scraping from Mackolik.com for historical & live match data
|
||||||
|
- Google Gemini AI for natural language match commentary
|
||||||
|
- User coupon tracking (ROI, Win Rate analytics)
|
||||||
|
|
||||||
|
### Technology Stack
|
||||||
|
|
||||||
|
| Layer | Technology |
|
||||||
|
| ----------- | -------------------------------------------- |
|
||||||
|
| Backend API | NestJS 11 (TypeScript) |
|
||||||
|
| AI Engine | Python FastAPI (v20+) |
|
||||||
|
| Database | PostgreSQL 16 + Prisma ORM |
|
||||||
|
| Queue | BullMQ + Redis (optional) |
|
||||||
|
| Cache | Redis or in-memory fallback |
|
||||||
|
| Auth | JWT + Passport (Access 15min + Refresh 7day) |
|
||||||
|
| Scraping | Axios + Cheerio (Mackolik HTML parsing) |
|
||||||
|
| Logging | Pino (structured logging) |
|
||||||
|
| i18n | nestjs-i18n (TR, EN) |
|
||||||
|
| API Docs | Swagger |
|
||||||
|
| Deploy | Docker Compose |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────────────────────────────────────────────────────────────┐
|
||||||
|
│ CLIENTS (Web/Mobile) │
|
||||||
|
└───────────────────────────────┬──────────────────────────────────┘
|
||||||
|
│ HTTP/REST
|
||||||
|
┌───────────────────────────────▼──────────────────────────────────┐
|
||||||
|
│ NestJS Backend (Port 3005) │
|
||||||
|
│ ┌─────────┬──────────┬──────────┬──────────┬─────────────────┐ │
|
||||||
|
│ │ Auth │ Admin │ Matches │ Leagues │ Predictions │ │
|
||||||
|
│ │ Module │ Module │ Module │ Module │ Module │ │
|
||||||
|
│ ├─────────┼──────────┼──────────┼──────────┼─────────────────┤ │
|
||||||
|
│ │ Coupons │ Analysis │ Gemini │ Social- │ Health │ │
|
||||||
|
│ │ Module │ Module │ Module │ Poster │ Module │ │
|
||||||
|
│ │SporToto │ Feeder │ Users │ │ │ │
|
||||||
|
│ └─────────┴──────────┴──────────┴──────────┴─────────────────┘ │
|
||||||
|
│ ┌──────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ Services: AiService | MatchAnalysis | Scraper │ │
|
||||||
|
│ ├──────────────────────────────────────────────────────────────┤ │
|
||||||
|
│ │ Tasks: DataFetcher (Cron) | LiveUpdater | LimitResetter │ │
|
||||||
|
│ └──────────────────────────────────────────────────────────────┘ │
|
||||||
|
────┬─────────────────┬────────────────────┬──────────────────────┘
|
||||||
|
│ │ │
|
||||||
|
▼ ▼ ▼
|
||||||
|
┌─────────┐ ┌──────────────┐ ┌──────────────────┐
|
||||||
|
│PostgreSQL│ │ Redis/BullMQ │ │ AI Engine (py) │
|
||||||
|
│ (3.6GB) │ │ (Optional) │ │ FastAPI:8000 │
|
||||||
|
└───────── └────────────── └──────────────────
|
||||||
|
│
|
||||||
|
───────▼───────┐
|
||||||
|
│ Mackolik API │
|
||||||
|
│ (Data Source) │
|
||||||
|
└───────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Statistics (~)
|
||||||
|
|
||||||
|
- `matches`: 237K permanent match records
|
||||||
|
- `live_matches`: ~82 active/upcoming matches (daily cycle)
|
||||||
|
- `match_player_participation`: 3.3M
|
||||||
|
- `odd_selections`: 8.5M
|
||||||
|
- `teams`: 19,595 | `players`: 217K | `leagues`: 1,505
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
src/
|
||||||
|
├── app.module.ts # Root module (Redis, Config, i18n, guards)
|
||||||
|
├── main.ts # Entry point, Swagger, Helmet, ValidationPipe
|
||||||
|
├── common/ # Shared layer
|
||||||
|
│ ├── base/ # Generic BaseService<T> & BaseController<T>
|
||||||
|
│ ├── types/ # ApiResponse<T>, pagination DTOs
|
||||||
|
│ ├── filters/ # GlobalExceptionFilter (HTTP 200 wrapper)
|
||||||
|
│ ├── interceptors/ # ResponseInterceptor, SanitizeInterceptor
|
||||||
|
│ ├── decorators/ # @Public(), @Roles(), @CurrentUser()
|
||||||
|
│ └── queues/ # BullMQ queue module
|
||||||
|
├── config/ # Env validation (Zod), config factories
|
||||||
|
├── database/ # PrismaService
|
||||||
|
├── i18n/ # TR/EN translations (common, errors, validation, auth)
|
||||||
|
├── modules/ # 13 feature modules
|
||||||
|
│ ├── admin/ # Superadmin panel (user mgmt, settings, analytics)
|
||||||
|
│ ├── analysis/ # Multi-match analysis orchestration
|
||||||
|
│ ├── auth/ # JWT auth, refresh tokens, guards
|
||||||
|
│ ├── coupons/ # SmartCouponService (5 strategies), UserCouponService
|
||||||
|
│ ├── feeder/ # Historical data scraping (Mackolik)
|
||||||
|
│ ├── gemini/ # Google Gemini AI integration
|
||||||
|
│ ├── health/ # Liveness, readiness, AI Engine health
|
||||||
|
│ ├── leagues/ # Country/league/team discovery, H2H
|
||||||
|
│ ├── matches/ # Match listing, details, active leagues
|
||||||
|
│ ├── predictions/ # AI predictions with BullMQ queue & 6h cache
|
||||||
|
│ ├── social-poster/ # Twitter API v2, Canvas image generation
|
||||||
|
│ ├── spor-toto/ # Spor Toto integration
|
||||||
|
│ └── users/ # User CRUD (BaseController pattern)
|
||||||
|
├── scripts/ # Feeder runners, cleanup scripts
|
||||||
|
├── services/ # Shared services
|
||||||
|
│ ├── ai.service.ts # Python AI Engine bridge
|
||||||
|
│ ├── match-analysis.service.ts # 7-phase analysis orchestrator
|
||||||
|
│ └── scraper.service.ts # Mackolik HTML scraping
|
||||||
|
└── tasks/ # Cron jobs (15min, 30min, daily)
|
||||||
|
├── data-fetcher.task.ts # Live matches, odds fetching
|
||||||
|
├── live-updater.task.ts # Score updates, match finalization
|
||||||
|
└── limit-resetter.task.ts # Usage limits, subscription expiry
|
||||||
|
|
||||||
|
ai-engine/ # Python FastAPI ML engine
|
||||||
|
├── main.py # FastAPI app, routes
|
||||||
|
├── services/ # single_match_orchestrator.py
|
||||||
|
├── core/ # Core algorithms
|
||||||
|
├── features/ # Feature engineering
|
||||||
|
├── models/ # ML models
|
||||||
|
├── training/ # Model training scripts
|
||||||
|
├── config/ # Configuration
|
||||||
|
├── utils/ # Utility functions
|
||||||
|
└── tests/ # Test files
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Key Modules
|
||||||
|
|
||||||
|
### Auth Module
|
||||||
|
|
||||||
|
- Register, Login, Refresh, Logout endpoints
|
||||||
|
- bcrypt (12 rounds), JWT Access (15min) + Refresh Token (7 days, DB-stored)
|
||||||
|
- Global guards: `JwtAuthGuard`, `RolesGuard`, `PermissionsGuard`
|
||||||
|
|
||||||
|
### Predictions Module
|
||||||
|
|
||||||
|
- Requires Redis (`REDIS_ENABLED=true`), conditionally loaded
|
||||||
|
- BullMQ queue with worker processor
|
||||||
|
- 6-hour TTL cache on prediction results
|
||||||
|
- AI Engine call: `POST /v20plus/analyze/{matchId}`
|
||||||
|
|
||||||
|
### Coupons Module
|
||||||
|
|
||||||
|
- `SmartCouponService`: 5 strategies (SAFE ≥78% confidence/2 matches, BALANCED, AGGRESSIVE, VALUE EV+, MIRACLE)
|
||||||
|
- `UserCouponService`: Coupon creation, bet settlement (MS 1/X/2, Alt/Üst, KG Var/Yok)
|
||||||
|
|
||||||
|
### Feeder Module
|
||||||
|
|
||||||
|
- Historical scraping from 2023-06-01 to present (reverse chronological)
|
||||||
|
- Concurrency=20, 300ms delay, 50 max retry, 502 exponential backoff
|
||||||
|
- Resume support with state management
|
||||||
|
|
||||||
|
### Analysis Module
|
||||||
|
|
||||||
|
- Usage limits: Free (10 analyses/3 coupons/day) vs Premium (50 analyses/10 coupons)
|
||||||
|
- 7-phase flow: URL Parse → Scrape → Python Engine → Strategy → Similar Matches → Final Prediction → DB Save
|
||||||
|
|
||||||
|
### Social Poster Module
|
||||||
|
|
||||||
|
- Twitter API v2 integration
|
||||||
|
- Canvas-based prediction card image generation
|
||||||
|
- Gemini-powered Turkish caption generation
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Scheduled Tasks (Cron)
|
||||||
|
|
||||||
|
| Task | Schedule | Description |
|
||||||
|
| --------------------------- | -------------- | -------------------------------------------------------- |
|
||||||
|
| `fetchLiveMatches()` | `*/15 * * * *` | Fetch football matches from Mackolik API |
|
||||||
|
| `fetchOddsForPreMatches()` | `*/15 * * * *` | Fetch odds for upcoming matches (football + basketball) |
|
||||||
|
| `fetchBasketballMatches()` | Manual | Basketball data via `basketball_top_leagues.json` filter |
|
||||||
|
| `updateLiveScores()` | `*/15 * * * *` | Update live match scores |
|
||||||
|
| `finalizeFinishedMatches()` | `*/30 * * * *` | Migrate finished: live_matches → matches table |
|
||||||
|
| `resetUsageLimits()` | `0 3 * * *` | Reset daily usage limits (03:00 Istanbul time) |
|
||||||
|
| `cleanupOldData()` | `0 4 * * *` | Delete 30-day old AI logs, 1-day finished live_matches |
|
||||||
|
| `checkSubscriptions()` | `0 0 * * *` | Mark expired subscriptions |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. AI Engine (Python FastAPI)
|
||||||
|
|
||||||
|
Independent microservice on port 8000.
|
||||||
|
|
||||||
|
### Endpoints
|
||||||
|
|
||||||
|
| Method | Path | Description |
|
||||||
|
| ------ | ---------------------------------- | ------------------------------- |
|
||||||
|
| POST | `/v20plus/analyze/{match_id}` | Single match analysis (main) |
|
||||||
|
| GET | `/v20plus/analyze-htms/{match_id}` | First half - Full time analysis |
|
||||||
|
| GET | `/v20plus/analyze-htft/{match_id}` | HT/FT probabilities |
|
||||||
|
| POST | `/v20plus/coupon` | Smart coupon generation |
|
||||||
|
| GET | `/v20plus/daily-banker` | Daily banker picks |
|
||||||
|
| GET | `/v20plus/reversal-watchlist` | Score reversal watchlist |
|
||||||
|
| GET | `/health` | Health check |
|
||||||
|
|
||||||
|
### Output Structure (`SingleMatchPredictionPackage`)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
{
|
||||||
|
model_version: "v20plus.X",
|
||||||
|
match_info: { match_id, match_name, home_team, away_team, league, match_date_ms },
|
||||||
|
data_quality: { label: "HIGH"|"MEDIUM"|"LOW", score, flags, lineup_counts },
|
||||||
|
risk: { level: "LOW"|"MEDIUM"|"HIGH"|"EXTREME", score, is_surprise_risk, warnings },
|
||||||
|
main_pick: { market, pick, probability, confidence, odds, bet_grade, edge },
|
||||||
|
value_pick: { ... },
|
||||||
|
bet_advice: { playable, suggested_stake_units, reason },
|
||||||
|
bet_summary: [{ market, pick, raw_confidence, calibrated_confidence, bet_grade }],
|
||||||
|
supporting_picks: [...],
|
||||||
|
aggressive_pick: { market, pick, probability, confidence, odds },
|
||||||
|
scenario_top5: [{ score, prob }],
|
||||||
|
score_prediction: { ft, ht, xg_home, xg_away, xg_total },
|
||||||
|
market_board: { ... },
|
||||||
|
reasoning_factors: string[],
|
||||||
|
ai_commentary: string // Turkish commentary from Gemini
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. API Response Format
|
||||||
|
|
||||||
|
All responses follow this standard structure:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"status": 200,
|
||||||
|
"message": "İşlem başarıyla tamamlandı", // i18n translated
|
||||||
|
"data": { ... },
|
||||||
|
"errors": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Critical Rule:** Controllers must NEVER return raw Prisma entities. Always use Response DTOs with `@Exclude()` and `@Expose()` from `class-transformer`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Configuration
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
```env
|
||||||
|
NODE_ENV=development
|
||||||
|
PORT=3005
|
||||||
|
DATABASE_URL=postgresql://user:password@localhost:15432/boilerplate_db
|
||||||
|
JWT_SECRET=your-secret-key
|
||||||
|
JWT_ACCESS_EXPIRATION=15m
|
||||||
|
JWT_REFRESH_EXPIRATION=7d
|
||||||
|
REDIS_ENABLED=false
|
||||||
|
REDIS_HOST=localhost
|
||||||
|
REDIS_PORT=6379
|
||||||
|
AI_ENGINE_URL=http://127.0.0.1:8000
|
||||||
|
ENABLE_GEMINI=false
|
||||||
|
GOOGLE_API_KEY=your-api-key
|
||||||
|
```
|
||||||
|
|
||||||
|
### Config Files
|
||||||
|
|
||||||
|
- `top_leagues.json` — Football top league IDs (live match filter)
|
||||||
|
- `basketball_top_leagues.json` — Basketball top league IDs
|
||||||
|
- `bet-type.json` — Bet type definitions
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. Build & Run Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Development
|
||||||
|
npm run start:dev # Watch mode (port 3005)
|
||||||
|
|
||||||
|
# Production
|
||||||
|
npm run build && npm run start:prod
|
||||||
|
|
||||||
|
# Feeder (Data Collection)
|
||||||
|
npm run feeder:historical # Historical scraping (2023-06→present)
|
||||||
|
npm run feeder:fill-gaps # Fill missing data
|
||||||
|
npm run feeder:basketball # Basketball data
|
||||||
|
npm run feeder:live # Live data
|
||||||
|
|
||||||
|
# Database
|
||||||
|
npx prisma generate # Regenerate Prisma client
|
||||||
|
npx prisma migrate dev # Run migrations
|
||||||
|
npx prisma db seed # Seed database
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
npm run test # Unit tests
|
||||||
|
npm run test:e2e # E2E tests
|
||||||
|
npx jest src/path/to/file.spec.ts # Single test file
|
||||||
|
|
||||||
|
# Lint/Format
|
||||||
|
npm run lint # ESLint with Prettier
|
||||||
|
npm run format # Prettier write
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
docker-compose up -d postgres redis # Infrastructure
|
||||||
|
docker-compose up -d # All services
|
||||||
|
|
||||||
|
# AI Engine (Python)
|
||||||
|
cd ai-engine && uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
||||||
|
|
||||||
|
# Utility
|
||||||
|
npm run swagger:summary # Export endpoint summary
|
||||||
|
npm run cleanup:live # Cleanup live matches
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. Code Style Guidelines
|
||||||
|
|
||||||
|
### Imports Order
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// 1. NestJS/common imports
|
||||||
|
import { Controller, Get, Post, Body } from '@nestjs/common';
|
||||||
|
|
||||||
|
// 2. External packages
|
||||||
|
import * as bcrypt from 'bcrypt';
|
||||||
|
|
||||||
|
// 3. Local imports (relative)
|
||||||
|
import { UsersService } from './users.service';
|
||||||
|
```
|
||||||
|
|
||||||
|
### Naming Conventions
|
||||||
|
|
||||||
|
- Classes/Interfaces: `PascalCase`
|
||||||
|
- Variables/Functions: `camelCase`
|
||||||
|
- Constants: `UPPER_SNAKE_CASE`
|
||||||
|
- Files: `kebab-case`
|
||||||
|
- DTOs: `Entity + Dto` suffix (CreateUserDto, UpdateUserDto)
|
||||||
|
|
||||||
|
### Types
|
||||||
|
|
||||||
|
- `strictNullChecks: true` — null/undefined checks required
|
||||||
|
- `noImplicitAny: false` — `any` allowed (Prisma dynamic access)
|
||||||
|
- Specify function return types: `async findOne(id: string): Promise<User>`
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Use NestJS HTTP Exceptions with i18n keys
|
||||||
|
throw new NotFoundException('USER_NOT_FOUND');
|
||||||
|
throw new ConflictException('EMAIL_ALREADY_EXISTS');
|
||||||
|
|
||||||
|
// Reference src/i18n/{lang}/errors.json for available keys
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. Known Issues & Gotchas
|
||||||
|
|
||||||
|
1. **Predictions module** requires Redis. Disabled when `REDIS_ENABLED=false`.
|
||||||
|
2. **Gemini AI** is optional. Returns `null` commentary when disabled.
|
||||||
|
3. **Global Exception Filter** wraps all errors as HTTP 200 (status in body).
|
||||||
|
4. **Lineup scraping** is disabled — only Team Stats are used (V20 optimization).
|
||||||
|
5. **Feeder V17 AI feature calculation** is disabled — V20 model runs in Python.
|
||||||
|
6. **BigInt serialization**: `BigInt.prototype.toJSON = function() { return this.toString(); }` polyfill in main.ts.
|
||||||
|
7. **i18n assets** copied via `nest-cli.json` `"assets": ["i18n/**/*"]` config.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 12. Reference Files for AI Agents
|
||||||
|
|
||||||
|
When working on this project, consult:
|
||||||
|
|
||||||
|
- `project_summary.md` — Comprehensive project documentation (Turkish)
|
||||||
|
- `README.md` — Architecture decisions, quick start guide
|
||||||
|
- `prompt.md` — AI assistant reference guide with agent roles
|
||||||
|
- `AGENTS.md` — Coding guidelines, DTO patterns, test structure
|
||||||
|
- `.agent/` — Skills and agent role definitions
|
||||||
|
- `top_leagues.json` / `basketball_top_leagues.json` — League filters
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 13. Team Logos
|
||||||
|
|
||||||
|
Team logo URL template: `https://file.mackolikfeeds.com/teams/{teamId}`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 14. 🆕 VQWEN Model Integration (Since 2026-04-06)
|
||||||
|
|
||||||
|
We have integrated a new high-performance prediction engine called **VQWEN v3**.
|
||||||
|
|
||||||
|
### VQWEN Model Features
|
||||||
|
- **Accuracy:** +244.4 Units profit in Time-Series Backtest (75.1% Win Rate on BTTS/Over markets).
|
||||||
|
- **Features Used:**
|
||||||
|
- `ELO Ratings` (Real-time team strength).
|
||||||
|
- `Contextual Goals` (Home/Away specific performance).
|
||||||
|
- `Rest Days` (Fatigue factor for teams playing < 3 days).
|
||||||
|
- `H2H Win Rate` (Historical dominance).
|
||||||
|
- `Form Points` (Last 5 games streak).
|
||||||
|
- `Squad Strength` (Based on starting XI participation).
|
||||||
|
- **Files:**
|
||||||
|
- `ai-engine/scripts/train_vqwen_v3.py` — Training script.
|
||||||
|
- `ai-engine/services/single_match_orchestrator.py` — Integration point.
|
||||||
|
- `ai-engine/models/vqwen/` — Pickle models (`vqwen_ms.pkl`, etc.).
|
||||||
|
|
||||||
|
### New Live Lineup/Sidelined Fetcher
|
||||||
|
- **Problem:** `lineups` and `sidelined` columns in `live_matches` were empty.
|
||||||
|
- **Fix:** Added `updateLineupsAndSidelined()` method to `src/tasks/data-fetcher.task.ts`.
|
||||||
|
- **Mechanism:** Uses `FeederScraperService.fetchStartingFormation` directly via Cron (`*/15 * * * *`).
|
||||||
|
- **Status:** Active.
|
||||||
|
|
||||||
|
### Database Schema Updates
|
||||||
|
- **`substate` Column:** Added to `matches` table to track specific match states (e.g., "penalties", "overtime", "postponed").
|
||||||
|
- **Sport Partition:** Tables are now partitioned by sport (`football_team_stats` vs `basketball_team_stats`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 16. 🔍 HT/FT Reversal Analysis (Since 2026-04-07)
|
||||||
|
|
||||||
|
### HT/FT Reversal (1/2 & 2/1) Pattern Detection
|
||||||
|
|
||||||
|
Reversal matches (İY/MS = 1/2 or 2/1) are statistically rare events that can indicate match-fixing or unusual patterns.
|
||||||
|
|
||||||
|
#### Key Findings (147,248 matches analyzed)
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| **Total Reversal Matches** | 13,112 (8.90%) |
|
||||||
|
| **1/2 (Home leads HT, Away wins FT)** | 5,992 (4.07%) |
|
||||||
|
| **2/1 (Away leads HT, Home wins FT)** | 7,120 (4.84%) |
|
||||||
|
|
||||||
|
#### 🚨 Basketball Leagues Have Suspiciously High Reversal Rates
|
||||||
|
|
||||||
|
| League | Reversals | Total | Rate |
|
||||||
|
|--------|-----------|-------|------|
|
||||||
|
| Eurobasket U20 | 36 | 120 | **30.00%** 🔴 |
|
||||||
|
| EuroLeague 🏀 | 183 | 639 | **28.64%** 🔴 |
|
||||||
|
| PBA Commissioners 🏀 | 54 | 189 | **28.57%** 🔴 |
|
||||||
|
| Ulusal Süper Lig 🏀 | 148 | 547 | **27.06%** 🔴 |
|
||||||
|
| NBA 🏀 | 656 | 2,696 | **24.33%** 🔴 |
|
||||||
|
|
||||||
|
**All top 15 leagues by reversal rate are BASKETBALL.** Football leagues show normal rates (5-8%).
|
||||||
|
|
||||||
|
#### Suspicious Patterns
|
||||||
|
|
||||||
|
1. **Comeback Magnitude:**
|
||||||
|
- 1 goal/point: 36.1% (normal)
|
||||||
|
- 2 goals/points: 13.1% (suspicious)
|
||||||
|
- **3+ goals/points: 50.8%** 🔴 **EXTREMELY HIGH**
|
||||||
|
|
||||||
|
2. **Extreme Comebacks (Basketball):**
|
||||||
|
- Mineros vs Irapuato: HT 39-45 → FT 102-61 (41 point swing!)
|
||||||
|
- Utah vs Memphis: HT 65-64 → FT 103-140 (37 point swing!)
|
||||||
|
- These are statistically near-impossible without manipulation
|
||||||
|
|
||||||
|
3. **Favorite Loss Rate:**
|
||||||
|
- 42.7% of reversals had the pre-match favorite lose (should be ~25-30%)
|
||||||
|
|
||||||
|
#### Impact on Model
|
||||||
|
|
||||||
|
- HT/FT model accuracy: **20.3%** (low due to reversal noise)
|
||||||
|
- Basketball reversal data creates **training noise**
|
||||||
|
- **Recommendation:** Either exclude basketball from HT/FT training or train separate basketball-specific model
|
||||||
|
|
||||||
|
#### HT/FT Model Files
|
||||||
|
|
||||||
|
- **Training script:** `ai-engine/scripts/train_htft_vqwen.py`
|
||||||
|
- **Model output:** `ai-engine/models/xgboost/xgb_ht_ft.json` + `.pkl`
|
||||||
|
- **Features:** 27 (Odds + HT/FT Tendencies + League stats)
|
||||||
|
- **Status:** Working, outputs 9-class probabilities in `market_board.HTFT.probs`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 17. 🐛 Lineup Parsing Fix (Since 2026-04-07)
|
||||||
|
|
||||||
|
### Problem
|
||||||
|
AI Engine reported `"lineup_unavailable"` and `"lineup_incomplete"` flags even when `live_matches.lineups` contained full 11/11 lineup data from Mackolik.
|
||||||
|
|
||||||
|
### Root Cause
|
||||||
|
Mackolik stores lineups in `"stats"` key format:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"stats": {
|
||||||
|
"home": [{ "personId": "...", "position": "...", ... }, ...],
|
||||||
|
"away": [{ "personId": "...", "position": "...", ... }, ...]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
But the parser expected `"xi"`, `"starting"`, or `"lineup"` keys at root level.
|
||||||
|
|
||||||
|
### Fix
|
||||||
|
Updated `_parse_lineups_json()` in `ai-engine/services/single_match_orchestrator.py`:
|
||||||
|
- Added fallback to check `lineups_json.get("stats")` for home/away arrays
|
||||||
|
- Now correctly parses Mackolik's nested format
|
||||||
|
- Result: `home_lineup_count: 11`, `away_lineup_count: 11`, `lineup_source: "confirmed_live"`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 18. Docker Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml services:
|
||||||
|
services:
|
||||||
|
app: # NestJS (port 3000→3000)
|
||||||
|
postgres: # PostgreSQL 17 Alpine (port 15432:5432)
|
||||||
|
redis: # Redis 7 Alpine (port 6379)
|
||||||
|
adminer: # Database UI (dev profile, port 8080)
|
||||||
|
ai-engine: # Python FastAPI (port 8002:8000)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
_This file is maintained for AI agent context. Update when architecture or conventions change._
|
||||||
@@ -0,0 +1,337 @@
|
|||||||
|
# 🚀 Enterprise NestJS Boilerplate (Antigravity Edition)
|
||||||
|
|
||||||
|
[](https://nestjs.com/)
|
||||||
|
[](https://www.typescriptlang.org/)
|
||||||
|
[](https://www.prisma.io/)
|
||||||
|
[](https://www.postgresql.org/)
|
||||||
|
[](https://www.docker.com/)
|
||||||
|
|
||||||
|
> **FOR AI AGENTS & DEVELOPERS:** This documentation is structured to provide deep context, architectural decisions, and operational details to ensure seamless handover to any AI coding assistant (like Antigravity) or human developer.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧠 Project Context & Architecture (Read Me First)
|
||||||
|
|
||||||
|
This is an **opinionated, production-ready** backend boilerplate built with NestJS. It is designed to be scalable, type-safe, and fully localized.
|
||||||
|
|
||||||
|
### 🏗️ Core Philosophy
|
||||||
|
|
||||||
|
- **Type Safety First:** Strict TypeScript configuration. `any` is forbidden. DTOs are the source of truth.
|
||||||
|
- **Generic Abstraction:** `BaseService` and `BaseController` handle 80% of CRUD operations, allowing developers to focus on business logic.
|
||||||
|
- **i18nNative:** Localization is not an afterthought. It is baked into the exception filters, response interceptors, and guards.
|
||||||
|
- **Security by Default:** JWT Auth, RBAC (Role-Based Access Control), Throttling, and Helmet are pre-configured.
|
||||||
|
|
||||||
|
### 📐 Architectural Decision Records (ADR)
|
||||||
|
|
||||||
|
_To understand WHY things are the way they are:_
|
||||||
|
|
||||||
|
1. **Handling i18n Assets:**
|
||||||
|
- **Problem:** Translation JSON files are not TypeScript code, so `tsc` ignores them during build.
|
||||||
|
- **Solution:** We configured `nest-cli.json` with `"assets": ["i18n/**/*"]`. This ensures `src/i18n` is copied to `dist/i18n` automatically.
|
||||||
|
- **Note:** When running with `node`, ensure `dist/main.js` can find these files.
|
||||||
|
|
||||||
|
2. **Global Response Wrapping:**
|
||||||
|
- **Mechanism:** `ResponseInterceptor` wraps all successful responses.
|
||||||
|
- **Feature:** It automatically translates the "Operation successful" message based on the `Accept-Language` header using `I18nService`.
|
||||||
|
- **Output Format:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"status": 200,
|
||||||
|
"message": "İşlem başarıyla tamamlandı", // Translated
|
||||||
|
"data": { ... }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Centralized Error Handling:**
|
||||||
|
- **Mechanism:** `GlobalExceptionFilter` catches all `HttpException` and unknown `Error` types.
|
||||||
|
- **Feature:** It accepts error keys (e.g., `AUTH_REQUIRED`) and translates them using `i18n`. If a translation is found in `errors.json`, it is returned; otherwise, the original message is shown.
|
||||||
|
|
||||||
|
4. **UUID Generation:**
|
||||||
|
- **Decision:** We use Node.js native `crypto.randomUUID()` instead of the external `uuid` package to avoid CommonJS/ESM compatibility issues.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🚀 Quick Start for AI & Humans
|
||||||
|
|
||||||
|
### 1. Prerequisites
|
||||||
|
|
||||||
|
- **Node.js:** v20.19+ (LTS)
|
||||||
|
- **Docker:** For running PostgreSQL and Redis effortlessly.
|
||||||
|
- **Package Manager:** `npm` (Lockfile: `package-lock.json`)
|
||||||
|
|
||||||
|
### 2. Environment Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
# ⚠️ CRITICAL: Ensure DATABASE_URL includes the username!
|
||||||
|
# Example: postgresql://postgres:password@localhost:15432/boilerplate_db
|
||||||
|
# Required for v20 prediction flow:
|
||||||
|
# AI_ENGINE_URL=http://127.0.0.1:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Installation & Database
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install dependencies
|
||||||
|
npm ci
|
||||||
|
|
||||||
|
# Start Infrastructure (Postgres + Redis)
|
||||||
|
docker-compose up -d postgres redis
|
||||||
|
|
||||||
|
# Generate Prisma Client (REQUIRED after install)
|
||||||
|
npx prisma generate
|
||||||
|
|
||||||
|
# Run Migrations
|
||||||
|
npx prisma migrate dev
|
||||||
|
|
||||||
|
# Seed Database (Optional - Creates Admin & Roles)
|
||||||
|
npx prisma db seed
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Running the App
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Debug Mode (Watch) - Best for Development
|
||||||
|
npm run start:dev
|
||||||
|
|
||||||
|
# Production Build & Run
|
||||||
|
npm run build
|
||||||
|
npm run start:prod
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🛡️ Response Standardization & Type Safety Protocol
|
||||||
|
|
||||||
|
This boilerplate enforces a strict **"No-Leak"** policy for API responses to ensure both Security and Developer Experience.
|
||||||
|
|
||||||
|
### 1. The `unknown` Type is Forbidden
|
||||||
|
|
||||||
|
- **Rule:** Controllers must NEVER return `ApiResponse<unknown>` or raw Prisma entities.
|
||||||
|
- **Why:** Returning raw entities risks exposing sensitive fields like `password` hashes or internal metadata. It also breaks contract visibility for frontend developers.
|
||||||
|
|
||||||
|
### 2. DTO Pattern & Serialization
|
||||||
|
|
||||||
|
- **Tool:** We use `class-transformer` for all response serialization.
|
||||||
|
- **Implementation:**
|
||||||
|
- All Response DTOs must use `@Exclude()` class-level decorator.
|
||||||
|
- Only fields explicitly marked with `@Expose()` are returned to the client.
|
||||||
|
- Controllers use `plainToInstance(UserResponseDto, data)` before returning data.
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ Good: Secure & Typed
|
||||||
|
@Get('me')
|
||||||
|
async getMe(@CurrentUser() user: User): Promise<ApiResponse<UserResponseDto>> {
|
||||||
|
return createSuccessResponse(plainToInstance(UserResponseDto, user));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ❌ Bad: Leaks password hash & Weak Types
|
||||||
|
@Get('me')
|
||||||
|
async getMe(@CurrentUser() user: User) {
|
||||||
|
return createSuccessResponse(user);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ⚡ High-Performance Caching (Redis Strategy)
|
||||||
|
|
||||||
|
To ensure enterprise-grade performance, we utilize **Redis** for caching frequently accessed data (e.g., Roles, Permissions).
|
||||||
|
|
||||||
|
- **Library:** `@nestjs/cache-manager` with `cache-manager-redis-yet` (Supports Redis v6+ / v7).
|
||||||
|
- **Configuration:** Global Cache Module in `AppModule`.
|
||||||
|
- **Strategy:** Read-heavy endpoints use `@UseInterceptors(CacheInterceptor)`.
|
||||||
|
- **Invalidation:** Write operations (Create/Update/Delete) manually invalidate relevant cache keys.
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// 1. Automatic Caching
|
||||||
|
@Get('roles')
|
||||||
|
@UseInterceptors(CacheInterceptor)
|
||||||
|
@CacheKey('roles_list') // Unique Key
|
||||||
|
@CacheTTL(60000) // 60 Seconds
|
||||||
|
async getAllRoles() { ... }
|
||||||
|
|
||||||
|
// 2. Manual Invalidation (Inject CACHE_MANAGER)
|
||||||
|
async createRole(...) {
|
||||||
|
// ... create role logic
|
||||||
|
await this.cacheManager.del('roles_list'); // Clear cache
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🤖 Gemini AI Integration (Optional)
|
||||||
|
|
||||||
|
This boilerplate includes an **optional** AI module powered by Google's Gemini API. It's disabled by default and can be enabled during CLI setup or manually.
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
Add these to your `.env` file:
|
||||||
|
|
||||||
|
```env
|
||||||
|
# Enable Gemini AI features
|
||||||
|
ENABLE_GEMINI=true
|
||||||
|
|
||||||
|
# Your Google API Key (get from https://aistudio.google.com/apikey)
|
||||||
|
GOOGLE_API_KEY=your-api-key-here
|
||||||
|
|
||||||
|
# Model to use (optional, defaults to gemini-2.5-flash)
|
||||||
|
GEMINI_MODEL=gemini-2.5-flash
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
The `GeminiService` is globally available when enabled:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { GeminiService } from './modules/gemini';
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class MyService {
|
||||||
|
constructor(private readonly gemini: GeminiService) {}
|
||||||
|
|
||||||
|
async generateContent() {
|
||||||
|
// Check if Gemini is available
|
||||||
|
if (!this.gemini.isAvailable()) {
|
||||||
|
throw new Error('AI features are not enabled');
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1. Simple Text Generation
|
||||||
|
const { text, usage } = await this.gemini.generateText(
|
||||||
|
'Write a product description for a coffee mug',
|
||||||
|
);
|
||||||
|
|
||||||
|
// 2. With System Prompt & Options
|
||||||
|
const { text } = await this.gemini.generateText('Translate: Hello World', {
|
||||||
|
systemPrompt: 'You are a professional Turkish translator',
|
||||||
|
temperature: 0.3,
|
||||||
|
maxTokens: 500,
|
||||||
|
});
|
||||||
|
|
||||||
|
// 3. Multi-turn Chat
|
||||||
|
const { text } = await this.gemini.chat([
|
||||||
|
{ role: 'user', content: 'What is TypeScript?' },
|
||||||
|
{
|
||||||
|
role: 'model',
|
||||||
|
content: 'TypeScript is a typed superset of JavaScript...',
|
||||||
|
},
|
||||||
|
{ role: 'user', content: 'Give me an example' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
// 4. Structured JSON Output
|
||||||
|
interface ProductData {
|
||||||
|
name: string;
|
||||||
|
price: number;
|
||||||
|
features: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
const { data } = await this.gemini.generateJSON<ProductData>(
|
||||||
|
'Generate a product entry for a wireless mouse',
|
||||||
|
'{ name: string, price: number, features: string[] }',
|
||||||
|
);
|
||||||
|
console.log(data.name, data.price); // Fully typed!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Available Methods
|
||||||
|
|
||||||
|
| Method | Description |
|
||||||
|
| ------------------------------------------- | ------------------------------------------------ |
|
||||||
|
| `isAvailable()` | Check if Gemini is properly configured and ready |
|
||||||
|
| `generateText(prompt, options?)` | Generate text from a single prompt |
|
||||||
|
| `chat(messages, options?)` | Multi-turn conversation |
|
||||||
|
| `generateJSON<T>(prompt, schema, options?)` | Generate and parse structured JSON |
|
||||||
|
|
||||||
|
### Options
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface GeminiGenerateOptions {
|
||||||
|
model?: string; // Override default model
|
||||||
|
systemPrompt?: string; // System instructions
|
||||||
|
temperature?: number; // Creativity (0-1)
|
||||||
|
maxTokens?: number; // Max response length
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🌍 Internationalization (i18n) Guide
|
||||||
|
|
||||||
|
Unique to this project is the deep integration of `nestjs-i18n`.
|
||||||
|
|
||||||
|
- **Location:** `src/i18n/{lang}/`
|
||||||
|
- **Files:**
|
||||||
|
- `common.json`: Generic messages (success, welcome)
|
||||||
|
- `errors.json`: Error codes (AUTH_REQUIRED, USER_NOT_FOUND)
|
||||||
|
- `validation.json`: Validation messages (IS_EMAIL)
|
||||||
|
- `auth.json`: Auth specific success messages (LOGIN_SUCCESS)
|
||||||
|
|
||||||
|
**How to Translate a New Error:**
|
||||||
|
|
||||||
|
1. Throw an exception with a key: `throw new ConflictException('EMAIL_EXISTS');`
|
||||||
|
2. Add `"EMAIL_EXISTS": "Email already taken"` to `src/i18n/en/errors.json`.
|
||||||
|
3. Add Turkish translation to `src/i18n/tr/errors.json`.
|
||||||
|
4. Start server; the `GlobalExceptionFilter` handles the rest.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧪 Testing & CI/CD
|
||||||
|
|
||||||
|
- **GitHub Actions:** `.github/workflows/ci.yml` handles build and linting checks on push.
|
||||||
|
- **Local Testing:**
|
||||||
|
```bash
|
||||||
|
npm run test # Unit tests
|
||||||
|
npm run test:e2e # End-to-End tests
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📂 System Map (Directory Structure)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/
|
||||||
|
├── app.module.ts # Root module (Redis, Config, i18n setup)
|
||||||
|
├── main.ts # Entry point
|
||||||
|
├── common/ # Shared resources
|
||||||
|
│ ├── base/ # Abstract BaseService & BaseController (CRUD)
|
||||||
|
│ ├── types/ # Interfaces (ApiResponse, PaginatedData)
|
||||||
|
│ ├── filters/ # Global Exception Filter
|
||||||
|
│ └── interceptors/ # Response Interceptor
|
||||||
|
├── config/ # Application configuration
|
||||||
|
├── database/ # Prisma Service
|
||||||
|
├── i18n/ # Localization assets
|
||||||
|
└── modules/ # Feature modules
|
||||||
|
├── admin/ # Admin capabilities (Roles, Permissions + Caching)
|
||||||
|
│ ├── admin.controller.ts
|
||||||
|
│ └── dto/ # Admin Response DTOs
|
||||||
|
├── auth/ # Authentication layer
|
||||||
|
├── gemini/ # 🤖 Optional AI module (Google Gemini)
|
||||||
|
├── health/ # Health checks
|
||||||
|
└── users/ # User management
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🛠️ Troubleshooting (Known Issues)
|
||||||
|
|
||||||
|
**1. `EADDRINUSE: address already in use`**
|
||||||
|
|
||||||
|
- **Fix:** `lsof -ti:3000 | xargs kill -9`
|
||||||
|
|
||||||
|
**2. `PrismaClientInitializationError` / Database Connection Hangs**
|
||||||
|
|
||||||
|
- **Fix:** Check `.env` `DATABASE_URL`. Ensure `docker-compose up` is running.
|
||||||
|
|
||||||
|
**3. Cache Manager Deprecation Warnings**
|
||||||
|
|
||||||
|
- **Context:** `cache-manager-redis-yet` may show deprecation warnings regarding `Keyv`. This is expected as we wait for the ecosystem to stabilize on `cache-manager` v6/v7. The current implementation is fully functional.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📃 License
|
||||||
|
|
||||||
|
This project is proprietary and confidential.
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.egg-info/
|
||||||
|
*.egg
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
.eggs/
|
||||||
|
|
||||||
|
# Virtual environment
|
||||||
|
venv/
|
||||||
|
.venv/
|
||||||
|
env/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
|
||||||
|
# Test & Coverage
|
||||||
|
.pytest_cache/
|
||||||
|
htmlcov/
|
||||||
|
.coverage
|
||||||
|
*.cover
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Training data (large CSVs)
|
||||||
|
data/training_data*.csv
|
||||||
|
|
||||||
|
# Reports (generated at runtime)
|
||||||
|
reports/
|
||||||
Executable
+39
@@ -0,0 +1,39 @@
|
|||||||
|
# --- AI Engine Dockerfile ---
|
||||||
|
# Python 3.11 with v20+ prediction stack (XGBoost + LightGBM)
|
||||||
|
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# System dependencies
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
gcc \
|
||||||
|
libpq-dev \
|
||||||
|
curl \
|
||||||
|
libgomp1 \
|
||||||
|
procps \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Python dependencies
|
||||||
|
# Install PyTorch CPU version separately to save space
|
||||||
|
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
|
||||||
|
|
||||||
|
# Copy requirements (without torch)
|
||||||
|
COPY requirements-docker.txt requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create models directory
|
||||||
|
RUN mkdir -p /app/models
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
||||||
|
CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health')" || exit 1
|
||||||
|
|
||||||
|
# Start FastAPI with uvicorn
|
||||||
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
@@ -0,0 +1,874 @@
|
|||||||
|
{
|
||||||
|
"meta":{"test_sets":["test"],"test_metrics":[{"best_value":"Min","name":"Logloss"}],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":2000,"learn_sets":["learn"],"name":"experiment"},
|
||||||
|
"iterations":[
|
||||||
|
{"learn":[0.692389481],"iteration":0,"passed_time":0.04679785798,"remaining_time":93.54891809,"test":[0.6924099937]},
|
||||||
|
{"learn":[0.6916338586],"iteration":1,"passed_time":0.08350330552,"remaining_time":83.41980222,"test":[0.6916660956]},
|
||||||
|
{"learn":[0.6910159214],"iteration":2,"passed_time":0.132821758,"remaining_time":88.41501689,"test":[0.691108145]},
|
||||||
|
{"learn":[0.6903417151],"iteration":3,"passed_time":0.162826233,"remaining_time":81.25029026,"test":[0.6904585078]},
|
||||||
|
{"learn":[0.6896961461],"iteration":4,"passed_time":0.1969265393,"remaining_time":78.57368918,"test":[0.689812816]},
|
||||||
|
{"learn":[0.6890979366],"iteration":5,"passed_time":0.2309352918,"remaining_time":76.74749531,"test":[0.689192261]},
|
||||||
|
{"learn":[0.6884946167],"iteration":6,"passed_time":0.2693987513,"remaining_time":76.70167304,"test":[0.6886032715]},
|
||||||
|
{"learn":[0.6879503686],"iteration":7,"passed_time":0.3199759681,"remaining_time":79.67401607,"test":[0.6880706742]},
|
||||||
|
{"learn":[0.6874528094],"iteration":8,"passed_time":0.3645802206,"remaining_time":80.65324659,"test":[0.6876192378]},
|
||||||
|
{"learn":[0.6869036785],"iteration":9,"passed_time":0.4116507506,"remaining_time":81.91849936,"test":[0.6870868859]},
|
||||||
|
{"learn":[0.6863761921],"iteration":10,"passed_time":0.4562469316,"remaining_time":82.49774064,"test":[0.6865493528]},
|
||||||
|
{"learn":[0.6859038678],"iteration":11,"passed_time":0.491541699,"remaining_time":81.43207481,"test":[0.686105086]},
|
||||||
|
{"learn":[0.685410175],"iteration":12,"passed_time":0.5221556769,"remaining_time":79.80948692,"test":[0.6856345086]},
|
||||||
|
{"learn":[0.6849483392],"iteration":13,"passed_time":0.5553110353,"remaining_time":78.77483686,"test":[0.6852027185]},
|
||||||
|
{"learn":[0.6845417792],"iteration":14,"passed_time":0.5952927147,"remaining_time":78.77706925,"test":[0.6848238481]},
|
||||||
|
{"learn":[0.6841038875],"iteration":15,"passed_time":0.6300274185,"remaining_time":78.12339989,"test":[0.6844045699]},
|
||||||
|
{"learn":[0.6836957422],"iteration":16,"passed_time":0.662600544,"remaining_time":77.29040464,"test":[0.6840077621]},
|
||||||
|
{"learn":[0.6832947461],"iteration":17,"passed_time":0.7004221698,"remaining_time":77.12426337,"test":[0.6836197496]},
|
||||||
|
{"learn":[0.6829014105],"iteration":18,"passed_time":0.7300844347,"remaining_time":76.12090869,"test":[0.6832475033]},
|
||||||
|
{"learn":[0.6825264546],"iteration":19,"passed_time":0.7641559459,"remaining_time":75.65143865,"test":[0.6829012069]},
|
||||||
|
{"learn":[0.6822106577],"iteration":20,"passed_time":0.8040792063,"remaining_time":75.77489282,"test":[0.6825880966]},
|
||||||
|
{"learn":[0.6818649349],"iteration":21,"passed_time":0.8356039756,"remaining_time":75.12839381,"test":[0.6822424968]},
|
||||||
|
{"learn":[0.6815467855],"iteration":22,"passed_time":0.8861440327,"remaining_time":76.16985881,"test":[0.6819180513]},
|
||||||
|
{"learn":[0.6812293319],"iteration":23,"passed_time":0.920219319,"remaining_time":75.76472393,"test":[0.6816384467]},
|
||||||
|
{"learn":[0.6808837443],"iteration":24,"passed_time":0.960164738,"remaining_time":75.8530143,"test":[0.6813262593]},
|
||||||
|
{"learn":[0.6805816494],"iteration":25,"passed_time":0.9895547925,"remaining_time":75.13004463,"test":[0.6810353411]},
|
||||||
|
{"learn":[0.6803209634],"iteration":26,"passed_time":1.025550161,"remaining_time":74.94112844,"test":[0.6808138172]},
|
||||||
|
{"learn":[0.6800350862],"iteration":27,"passed_time":1.060852064,"remaining_time":74.71429535,"test":[0.6805550049]},
|
||||||
|
{"learn":[0.6797703947],"iteration":28,"passed_time":1.10467538,"remaining_time":75.07983357,"test":[0.680347991]},
|
||||||
|
{"learn":[0.6794926675],"iteration":29,"passed_time":1.141766834,"remaining_time":74.97602208,"test":[0.680089679]},
|
||||||
|
{"learn":[0.6792251865],"iteration":30,"passed_time":1.180421588,"remaining_time":74.9758099,"test":[0.6798451919]},
|
||||||
|
{"learn":[0.6789670166],"iteration":31,"passed_time":1.213674604,"remaining_time":74.64098814,"test":[0.6796090443]},
|
||||||
|
{"learn":[0.678722402],"iteration":32,"passed_time":1.245848393,"remaining_time":74.26011482,"test":[0.6793890865]},
|
||||||
|
{"learn":[0.678476935],"iteration":33,"passed_time":1.287262512,"remaining_time":74.43406171,"test":[0.6791683772]},
|
||||||
|
{"learn":[0.6782297335],"iteration":34,"passed_time":1.327473991,"remaining_time":74.52818262,"test":[0.6789766369]},
|
||||||
|
{"learn":[0.6780226701],"iteration":35,"passed_time":1.3760549,"remaining_time":75.07143955,"test":[0.6787930242]},
|
||||||
|
{"learn":[0.6778291026],"iteration":36,"passed_time":1.427620019,"remaining_time":75.74102965,"test":[0.6786087714]},
|
||||||
|
{"learn":[0.6776045324],"iteration":37,"passed_time":1.468182407,"remaining_time":75.80457587,"test":[0.6784161299]},
|
||||||
|
{"learn":[0.6773969079],"iteration":38,"passed_time":1.508647379,"remaining_time":75.85788487,"test":[0.6782227897]},
|
||||||
|
{"learn":[0.6771819602],"iteration":39,"passed_time":1.549435187,"remaining_time":75.92232419,"test":[0.6780242369]},
|
||||||
|
{"learn":[0.6769816736],"iteration":40,"passed_time":1.586036608,"remaining_time":75.78160282,"test":[0.6778499631]},
|
||||||
|
{"learn":[0.6767984027],"iteration":41,"passed_time":1.621458864,"remaining_time":75.59086802,"test":[0.6776975784]},
|
||||||
|
{"learn":[0.6766201184],"iteration":42,"passed_time":1.663424818,"remaining_time":75.70517136,"test":[0.6775231674]},
|
||||||
|
{"learn":[0.6764394377],"iteration":43,"passed_time":1.70110089,"remaining_time":75.62166686,"test":[0.6773582124]},
|
||||||
|
{"learn":[0.6762698797],"iteration":44,"passed_time":1.739954496,"remaining_time":75.59135644,"test":[0.6772234666]},
|
||||||
|
{"learn":[0.6760974263],"iteration":45,"passed_time":1.776461223,"remaining_time":75.46098325,"test":[0.6770659843]},
|
||||||
|
{"learn":[0.6759245179],"iteration":46,"passed_time":1.819761638,"remaining_time":75.61690381,"test":[0.6769049529]},
|
||||||
|
{"learn":[0.6757673909],"iteration":47,"passed_time":1.869479807,"remaining_time":76.02551217,"test":[0.6767664194]},
|
||||||
|
{"learn":[0.6756172628],"iteration":48,"passed_time":1.916010121,"remaining_time":76.28848462,"test":[0.6766584917]},
|
||||||
|
{"learn":[0.675474531],"iteration":49,"passed_time":1.953635244,"remaining_time":76.19177452,"test":[0.6765507257]},
|
||||||
|
{"learn":[0.6753286933],"iteration":50,"passed_time":1.993876686,"remaining_time":76.19736591,"test":[0.6764489911]},
|
||||||
|
{"learn":[0.6751900513],"iteration":51,"passed_time":2.038943041,"remaining_time":76.38194316,"test":[0.6763947956]},
|
||||||
|
{"learn":[0.6750574835],"iteration":52,"passed_time":2.080276765,"remaining_time":76.42073325,"test":[0.6762778712]},
|
||||||
|
{"learn":[0.6749329567],"iteration":53,"passed_time":2.158576742,"remaining_time":77.78871001,"test":[0.6761865366]},
|
||||||
|
{"learn":[0.6748033265],"iteration":54,"passed_time":2.220619687,"remaining_time":78.52918711,"test":[0.6760679685]},
|
||||||
|
{"learn":[0.6746797823],"iteration":55,"passed_time":2.286959228,"remaining_time":79.39015604,"test":[0.6759774874]},
|
||||||
|
{"learn":[0.674535525],"iteration":56,"passed_time":2.328472096,"remaining_time":79.3723032,"test":[0.6758500622]},
|
||||||
|
{"learn":[0.6744256514],"iteration":57,"passed_time":2.367031568,"remaining_time":79.25474665,"test":[0.6757625065]},
|
||||||
|
{"learn":[0.674310819],"iteration":58,"passed_time":2.409161286,"remaining_time":79.25732298,"test":[0.6756876412]},
|
||||||
|
{"learn":[0.6741967947],"iteration":59,"passed_time":2.444825903,"remaining_time":79.04937087,"test":[0.6756151069]},
|
||||||
|
{"learn":[0.6740879654],"iteration":60,"passed_time":2.48484996,"remaining_time":78.98564055,"test":[0.6755303655]},
|
||||||
|
{"learn":[0.6739772476],"iteration":61,"passed_time":2.521603395,"remaining_time":78.8204416,"test":[0.6754565036]},
|
||||||
|
{"learn":[0.67388281],"iteration":62,"passed_time":2.554102332,"remaining_time":78.5285114,"test":[0.6753738983]},
|
||||||
|
{"learn":[0.6737789726],"iteration":63,"passed_time":2.593937938,"remaining_time":78.46662263,"test":[0.6752897299]},
|
||||||
|
{"learn":[0.6736812332],"iteration":64,"passed_time":2.623889155,"remaining_time":78.11116175,"test":[0.6752115539]},
|
||||||
|
{"learn":[0.6735930009],"iteration":65,"passed_time":2.660795108,"remaining_time":77.96935967,"test":[0.6751595431]},
|
||||||
|
{"learn":[0.6734947116],"iteration":66,"passed_time":2.695822592,"remaining_time":77.77649358,"test":[0.6750764658]},
|
||||||
|
{"learn":[0.6733961481],"iteration":67,"passed_time":2.725876686,"remaining_time":77.44696703,"test":[0.6750179194]},
|
||||||
|
{"learn":[0.6732990195],"iteration":68,"passed_time":2.761848366,"remaining_time":77.29172746,"test":[0.6749408803]},
|
||||||
|
{"learn":[0.6732133575],"iteration":69,"passed_time":2.791847449,"remaining_time":76.97522253,"test":[0.6748795802]},
|
||||||
|
{"learn":[0.673111539],"iteration":70,"passed_time":2.824541003,"remaining_time":76.73999429,"test":[0.674790372]},
|
||||||
|
{"learn":[0.6730080451],"iteration":71,"passed_time":2.861023716,"remaining_time":76.61185729,"test":[0.6747239773]},
|
||||||
|
{"learn":[0.6729157861],"iteration":72,"passed_time":2.897136588,"remaining_time":76.47646857,"test":[0.6746701254]},
|
||||||
|
{"learn":[0.6728347949],"iteration":73,"passed_time":2.935718661,"remaining_time":76.40802894,"test":[0.6746120937]},
|
||||||
|
{"learn":[0.6727640693],"iteration":74,"passed_time":3.040023476,"remaining_time":78.02726921,"test":[0.6745550085]},
|
||||||
|
{"learn":[0.6726808811],"iteration":75,"passed_time":3.097341794,"remaining_time":78.41165279,"test":[0.6744855074]},
|
||||||
|
{"learn":[0.6726029645],"iteration":76,"passed_time":3.152948955,"remaining_time":78.74182909,"test":[0.6744264172]},
|
||||||
|
{"learn":[0.6725356026],"iteration":77,"passed_time":3.216126808,"remaining_time":79.24866314,"test":[0.674381715]},
|
||||||
|
{"learn":[0.6724606887],"iteration":78,"passed_time":3.256861302,"remaining_time":79.19532355,"test":[0.6743331681]},
|
||||||
|
{"learn":[0.6723849561],"iteration":79,"passed_time":3.305679851,"remaining_time":79.33631641,"test":[0.67428564]},
|
||||||
|
{"learn":[0.6723050519],"iteration":80,"passed_time":3.348083566,"remaining_time":79.32064647,"test":[0.6742202413]},
|
||||||
|
{"learn":[0.6722508802],"iteration":81,"passed_time":3.38129387,"remaining_time":79.08928832,"test":[0.6741620971]},
|
||||||
|
{"learn":[0.6721773904],"iteration":82,"passed_time":3.41660066,"remaining_time":78.91112609,"test":[0.6741109453]},
|
||||||
|
{"learn":[0.6721007598],"iteration":83,"passed_time":3.48099347,"remaining_time":79.39980344,"test":[0.6740556003]},
|
||||||
|
{"learn":[0.6720353564],"iteration":84,"passed_time":3.535359896,"remaining_time":79.64957884,"test":[0.6740146772]},
|
||||||
|
{"learn":[0.6719790902],"iteration":85,"passed_time":3.581806996,"remaining_time":79.71603012,"test":[0.673983295]},
|
||||||
|
{"learn":[0.6719140024],"iteration":86,"passed_time":3.612293661,"remaining_time":79.42893993,"test":[0.6739595301]},
|
||||||
|
{"learn":[0.6718573633],"iteration":87,"passed_time":3.644530261,"remaining_time":79.18570293,"test":[0.6739336659]},
|
||||||
|
{"learn":[0.671795602],"iteration":88,"passed_time":3.67809653,"remaining_time":78.97575809,"test":[0.673890361]},
|
||||||
|
{"learn":[0.6717369134],"iteration":89,"passed_time":3.712417516,"remaining_time":78.78574951,"test":[0.673863586]},
|
||||||
|
{"learn":[0.6716711079],"iteration":90,"passed_time":3.743502971,"remaining_time":78.53128759,"test":[0.6738190616]},
|
||||||
|
{"learn":[0.6716070843],"iteration":91,"passed_time":3.775351679,"remaining_time":78.2975109,"test":[0.6737799295]},
|
||||||
|
{"learn":[0.6715517232],"iteration":92,"passed_time":3.806186247,"remaining_time":78.04728142,"test":[0.6737364374]},
|
||||||
|
{"learn":[0.6714957378],"iteration":93,"passed_time":3.83798807,"remaining_time":77.82133257,"test":[0.6737093719]},
|
||||||
|
{"learn":[0.6714364567],"iteration":94,"passed_time":3.871278973,"remaining_time":77.62933099,"test":[0.6736630475]},
|
||||||
|
{"learn":[0.6713881758],"iteration":95,"passed_time":3.913531039,"remaining_time":77.6183656,"test":[0.67364367]},
|
||||||
|
{"learn":[0.6713336502],"iteration":96,"passed_time":3.945433866,"remaining_time":77.40371802,"test":[0.6735998081]},
|
||||||
|
{"learn":[0.6712700267],"iteration":97,"passed_time":3.989716281,"remaining_time":77.43306496,"test":[0.6735526984]},
|
||||||
|
{"learn":[0.6712154424],"iteration":98,"passed_time":4.020621946,"remaining_time":77.20406384,"test":[0.6735012924]},
|
||||||
|
{"learn":[0.6711600413],"iteration":99,"passed_time":4.053732144,"remaining_time":77.02091074,"test":[0.6734818024]},
|
||||||
|
{"learn":[0.6711060533],"iteration":100,"passed_time":4.084124711,"remaining_time":76.78963194,"test":[0.6734379341]},
|
||||||
|
{"learn":[0.6710494943],"iteration":101,"passed_time":4.116434744,"remaining_time":76.59797199,"test":[0.6734059869]},
|
||||||
|
{"learn":[0.6709936897],"iteration":102,"passed_time":4.148330356,"remaining_time":76.40177365,"test":[0.6733740852]},
|
||||||
|
{"learn":[0.6709472183],"iteration":103,"passed_time":4.176511193,"remaining_time":76.14101176,"test":[0.6733330971]},
|
||||||
|
{"learn":[0.6708914508],"iteration":104,"passed_time":4.2025065,"remaining_time":75.84523636,"test":[0.6733060254]},
|
||||||
|
{"learn":[0.6708388195],"iteration":105,"passed_time":4.232975206,"remaining_time":75.63448151,"test":[0.6732755898]},
|
||||||
|
{"learn":[0.6707885854],"iteration":106,"passed_time":4.261364958,"remaining_time":75.39031649,"test":[0.6732294722]},
|
||||||
|
{"learn":[0.6707454167],"iteration":107,"passed_time":4.290824713,"remaining_time":75.1688922,"test":[0.6732035176]},
|
||||||
|
{"learn":[0.6706973013],"iteration":108,"passed_time":4.324192493,"remaining_time":75.01878903,"test":[0.673196437]},
|
||||||
|
{"learn":[0.6706577031],"iteration":109,"passed_time":4.351512102,"remaining_time":74.76688976,"test":[0.6731652709]},
|
||||||
|
{"learn":[0.67061108],"iteration":110,"passed_time":4.38641502,"remaining_time":74.64808984,"test":[0.673138808]},
|
||||||
|
{"learn":[0.6705625485],"iteration":111,"passed_time":4.424063991,"remaining_time":74.57707871,"test":[0.6731062725]},
|
||||||
|
{"learn":[0.6705146484],"iteration":112,"passed_time":4.45863849,"remaining_time":74.45531709,"test":[0.6730726625]},
|
||||||
|
{"learn":[0.6704704423],"iteration":113,"passed_time":4.497153675,"remaining_time":74.40027922,"test":[0.6730285927]},
|
||||||
|
{"learn":[0.6704155922],"iteration":114,"passed_time":4.533368584,"remaining_time":74.30782417,"test":[0.6729872702]},
|
||||||
|
{"learn":[0.6703687117],"iteration":115,"passed_time":4.564651269,"remaining_time":74.13623268,"test":[0.6729721425]},
|
||||||
|
{"learn":[0.6703324232],"iteration":116,"passed_time":4.596824343,"remaining_time":73.98136956,"test":[0.6729564624]},
|
||||||
|
{"learn":[0.6702884624],"iteration":117,"passed_time":4.628377967,"remaining_time":73.81870623,"test":[0.6729312424]},
|
||||||
|
{"learn":[0.670253478],"iteration":118,"passed_time":4.668052254,"remaining_time":73.78660748,"test":[0.6729354345]},
|
||||||
|
{"learn":[0.6702140804],"iteration":119,"passed_time":4.692108266,"remaining_time":73.50969617,"test":[0.6729085401]},
|
||||||
|
{"learn":[0.6701682529],"iteration":120,"passed_time":4.723741667,"remaining_time":73.354633,"test":[0.6728898322]},
|
||||||
|
{"learn":[0.6701320588],"iteration":121,"passed_time":4.756626425,"remaining_time":73.22085595,"test":[0.6728773638]},
|
||||||
|
{"learn":[0.6700939824],"iteration":122,"passed_time":4.788008428,"remaining_time":73.06578714,"test":[0.6728618874]},
|
||||||
|
{"learn":[0.6700655902],"iteration":123,"passed_time":4.815546648,"remaining_time":72.85456058,"test":[0.6728540413]},
|
||||||
|
{"learn":[0.6700190743],"iteration":124,"passed_time":4.843186806,"remaining_time":72.64780209,"test":[0.6728441291]},
|
||||||
|
{"learn":[0.6699792296],"iteration":125,"passed_time":4.875548614,"remaining_time":72.51411192,"test":[0.672815631]},
|
||||||
|
{"learn":[0.6699379404],"iteration":126,"passed_time":4.916953662,"remaining_time":72.51538748,"test":[0.6728082021]},
|
||||||
|
{"learn":[0.669895454],"iteration":127,"passed_time":4.952918369,"remaining_time":72.43643115,"test":[0.6727900064]},
|
||||||
|
{"learn":[0.6698563938],"iteration":128,"passed_time":4.991585558,"remaining_time":72.39733782,"test":[0.6727649552]},
|
||||||
|
{"learn":[0.6698215571],"iteration":129,"passed_time":5.028084166,"remaining_time":72.32705685,"test":[0.6727467657]},
|
||||||
|
{"learn":[0.6697857067],"iteration":130,"passed_time":5.059198996,"remaining_time":72.18048033,"test":[0.6727396032]},
|
||||||
|
{"learn":[0.6697449303],"iteration":131,"passed_time":5.096035515,"remaining_time":72.1166238,"test":[0.6727245271]},
|
||||||
|
{"learn":[0.6697052425],"iteration":132,"passed_time":5.125282589,"remaining_time":71.94663604,"test":[0.6726955143]},
|
||||||
|
{"learn":[0.6696695553],"iteration":133,"passed_time":5.156392608,"remaining_time":71.80469109,"test":[0.67269209]},
|
||||||
|
{"learn":[0.6696269265],"iteration":134,"passed_time":5.190402292,"remaining_time":71.70444647,"test":[0.672677932]},
|
||||||
|
{"learn":[0.6695969271],"iteration":135,"passed_time":5.221466142,"remaining_time":71.56480065,"test":[0.6726540285]},
|
||||||
|
{"learn":[0.6695489786],"iteration":136,"passed_time":5.251144663,"remaining_time":71.40790151,"test":[0.6726288583]},
|
||||||
|
{"learn":[0.6695173859],"iteration":137,"passed_time":5.274361693,"remaining_time":71.16566285,"test":[0.6725863431]},
|
||||||
|
{"learn":[0.6694811164],"iteration":138,"passed_time":5.309398952,"remaining_time":71.08483058,"test":[0.6725837967]},
|
||||||
|
{"learn":[0.6694477439],"iteration":139,"passed_time":5.344693175,"remaining_time":71.00806646,"test":[0.6725772977]},
|
||||||
|
{"learn":[0.6694082161],"iteration":140,"passed_time":5.377737126,"remaining_time":70.90222211,"test":[0.6725685594]},
|
||||||
|
{"learn":[0.6693679185],"iteration":141,"passed_time":5.416087925,"remaining_time":70.8668406,"test":[0.6725553829]},
|
||||||
|
{"learn":[0.6693341916],"iteration":142,"passed_time":5.452286939,"remaining_time":70.80347444,"test":[0.6725484347]},
|
||||||
|
{"learn":[0.6692933159],"iteration":143,"passed_time":5.490006789,"remaining_time":70.7600875,"test":[0.6725306172]},
|
||||||
|
{"learn":[0.6692619696],"iteration":144,"passed_time":5.521869859,"remaining_time":70.64185233,"test":[0.672543149]},
|
||||||
|
{"learn":[0.6692229289],"iteration":145,"passed_time":5.553520721,"remaining_time":70.5221056,"test":[0.6725196247]},
|
||||||
|
{"learn":[0.6691840164],"iteration":146,"passed_time":5.582178524,"remaining_time":70.3658286,"test":[0.6725226452]},
|
||||||
|
{"learn":[0.6691581406],"iteration":147,"passed_time":5.611368671,"remaining_time":70.21793769,"test":[0.6725056913]},
|
||||||
|
{"learn":[0.6691177196],"iteration":148,"passed_time":5.636941079,"remaining_time":70.02669757,"test":[0.6724771476]},
|
||||||
|
{"learn":[0.6690851126],"iteration":149,"passed_time":5.673704689,"remaining_time":69.97569117,"test":[0.6724439435]},
|
||||||
|
{"learn":[0.6690518144],"iteration":150,"passed_time":5.706346207,"remaining_time":69.87439826,"test":[0.672442532]},
|
||||||
|
{"learn":[0.6690149711],"iteration":151,"passed_time":5.738210991,"remaining_time":69.76456521,"test":[0.6724303064]},
|
||||||
|
{"learn":[0.668993877],"iteration":152,"passed_time":5.765951318,"remaining_time":69.60596133,"test":[0.6724235788]},
|
||||||
|
{"learn":[0.6689596579],"iteration":153,"passed_time":5.795573467,"remaining_time":69.47161442,"test":[0.6724294499]},
|
||||||
|
{"learn":[0.6689372651],"iteration":154,"passed_time":5.81744896,"remaining_time":69.24640858,"test":[0.6724285935]},
|
||||||
|
{"learn":[0.6689003045],"iteration":155,"passed_time":5.853529431,"remaining_time":69.19171968,"test":[0.6724172017]},
|
||||||
|
{"learn":[0.6688680182],"iteration":156,"passed_time":5.888380392,"remaining_time":69.12283479,"test":[0.6724130745]},
|
||||||
|
{"learn":[0.6688348164],"iteration":157,"passed_time":5.924601775,"remaining_time":69.07035741,"test":[0.6723860878]},
|
||||||
|
{"learn":[0.6687947046],"iteration":158,"passed_time":5.964531924,"remaining_time":69.06102687,"test":[0.6723707604]},
|
||||||
|
{"learn":[0.6687605251],"iteration":159,"passed_time":5.996805452,"remaining_time":68.9632627,"test":[0.6723566111]},
|
||||||
|
{"learn":[0.668726253],"iteration":160,"passed_time":6.022341459,"remaining_time":68.78935368,"test":[0.6723469906]},
|
||||||
|
{"learn":[0.6686862718],"iteration":161,"passed_time":6.05082584,"remaining_time":68.65072774,"test":[0.6723287161]},
|
||||||
|
{"learn":[0.668663478],"iteration":162,"passed_time":6.079027554,"remaining_time":68.51026759,"test":[0.6723155898]},
|
||||||
|
{"learn":[0.6686399521],"iteration":163,"passed_time":6.108511297,"remaining_time":68.38552891,"test":[0.6722970834]},
|
||||||
|
{"learn":[0.6686058279],"iteration":164,"passed_time":6.140719309,"remaining_time":68.29224202,"test":[0.6722872244]},
|
||||||
|
{"learn":[0.6685761282],"iteration":165,"passed_time":6.169540017,"remaining_time":68.16226742,"test":[0.6722800481]},
|
||||||
|
{"learn":[0.6685469327],"iteration":166,"passed_time":6.2020892,"remaining_time":68.07442817,"test":[0.6722550973]},
|
||||||
|
{"learn":[0.6685157003],"iteration":167,"passed_time":6.231576547,"remaining_time":67.95385854,"test":[0.6722394313]},
|
||||||
|
{"learn":[0.6684805143],"iteration":168,"passed_time":6.263261652,"remaining_time":67.85817802,"test":[0.6722204135]},
|
||||||
|
{"learn":[0.6684485765],"iteration":169,"passed_time":6.295102833,"remaining_time":67.7649305,"test":[0.6721982148]},
|
||||||
|
{"learn":[0.6684144429],"iteration":170,"passed_time":6.325415964,"remaining_time":67.65605729,"test":[0.6721971176]},
|
||||||
|
{"learn":[0.6683849752],"iteration":171,"passed_time":6.35697084,"remaining_time":67.56129474,"test":[0.6721880705]},
|
||||||
|
{"learn":[0.6683568537],"iteration":172,"passed_time":6.395913563,"remaining_time":67.5452837,"test":[0.672179176]},
|
||||||
|
{"learn":[0.6683266628],"iteration":173,"passed_time":6.437330522,"remaining_time":67.55497433,"test":[0.6721769709]},
|
||||||
|
{"learn":[0.6682937842],"iteration":174,"passed_time":6.472195712,"remaining_time":67.49575528,"test":[0.6721693215]},
|
||||||
|
{"learn":[0.6682657097],"iteration":175,"passed_time":6.503044842,"remaining_time":67.395192,"test":[0.6721581386]},
|
||||||
|
{"learn":[0.6682301443],"iteration":176,"passed_time":6.533528251,"remaining_time":67.29164972,"test":[0.6721638661]},
|
||||||
|
{"learn":[0.6681995916],"iteration":177,"passed_time":6.562589882,"remaining_time":67.17437509,"test":[0.6721598475]},
|
||||||
|
{"learn":[0.6681658267],"iteration":178,"passed_time":6.590816982,"remaining_time":67.04959623,"test":[0.6721433342]},
|
||||||
|
{"learn":[0.6681422687],"iteration":179,"passed_time":6.624646227,"remaining_time":66.98253407,"test":[0.6721335599]},
|
||||||
|
{"learn":[0.6681216601],"iteration":180,"passed_time":6.655147334,"remaining_time":66.88239227,"test":[0.6721300594]},
|
||||||
|
{"learn":[0.6680899019],"iteration":181,"passed_time":6.687788902,"remaining_time":66.80439684,"test":[0.6721153533]},
|
||||||
|
{"learn":[0.6680676394],"iteration":182,"passed_time":6.718057043,"remaining_time":66.7033314,"test":[0.6721076397]},
|
||||||
|
{"learn":[0.6680413672],"iteration":183,"passed_time":6.751300957,"remaining_time":66.6324051,"test":[0.6721009911]},
|
||||||
|
{"learn":[0.6680088406],"iteration":184,"passed_time":6.784288393,"remaining_time":66.55936991,"test":[0.6720999252]},
|
||||||
|
{"learn":[0.6679873982],"iteration":185,"passed_time":6.810905309,"remaining_time":66.42463565,"test":[0.6720953028]},
|
||||||
|
{"learn":[0.6679663544],"iteration":186,"passed_time":6.832974292,"remaining_time":66.24696466,"test":[0.6720942505]},
|
||||||
|
{"learn":[0.6679417375],"iteration":187,"passed_time":6.867184511,"remaining_time":66.18796986,"test":[0.6720856237]},
|
||||||
|
{"learn":[0.6679100197],"iteration":188,"passed_time":6.918652024,"remaining_time":66.29459691,"test":[0.6720876136]},
|
||||||
|
{"learn":[0.667881208],"iteration":189,"passed_time":6.96948149,"remaining_time":66.39348156,"test":[0.6720880182]},
|
||||||
|
{"learn":[0.6678475427],"iteration":190,"passed_time":7.018176318,"remaining_time":66.47058094,"test":[0.6720743856]},
|
||||||
|
{"learn":[0.6678310341],"iteration":191,"passed_time":7.074099623,"remaining_time":66.61443812,"test":[0.6720598415]},
|
||||||
|
{"learn":[0.6678060257],"iteration":192,"passed_time":7.117099742,"remaining_time":66.63522919,"test":[0.6720563492]},
|
||||||
|
{"learn":[0.6677789336],"iteration":193,"passed_time":7.191058554,"remaining_time":66.94356571,"test":[0.6720389527]},
|
||||||
|
{"learn":[0.6677478773],"iteration":194,"passed_time":7.2421897,"remaining_time":67.03667902,"test":[0.6720317324]},
|
||||||
|
{"learn":[0.6677212408],"iteration":195,"passed_time":7.282401129,"remaining_time":67.02781447,"test":[0.672000736]},
|
||||||
|
{"learn":[0.667704316],"iteration":196,"passed_time":7.317019235,"remaining_time":66.96744,"test":[0.6719895017]},
|
||||||
|
{"learn":[0.6676819639],"iteration":197,"passed_time":7.351194179,"remaining_time":66.90329248,"test":[0.6719725302]},
|
||||||
|
{"learn":[0.6676554448],"iteration":198,"passed_time":7.389840926,"remaining_time":66.87991712,"test":[0.6719770493]},
|
||||||
|
{"learn":[0.6676318346],"iteration":199,"passed_time":7.432994652,"remaining_time":66.89695187,"test":[0.6719667172]},
|
||||||
|
{"learn":[0.6676074705],"iteration":200,"passed_time":7.471295231,"remaining_time":66.86995085,"test":[0.6719511616]},
|
||||||
|
{"learn":[0.6675849784],"iteration":201,"passed_time":7.506377837,"remaining_time":66.8141948,"test":[0.6719427289]},
|
||||||
|
{"learn":[0.6675631744],"iteration":202,"passed_time":7.540821494,"remaining_time":66.75298633,"test":[0.6719299116]},
|
||||||
|
{"learn":[0.6675397619],"iteration":203,"passed_time":7.56808212,"remaining_time":66.62880141,"test":[0.6719106583]},
|
||||||
|
{"learn":[0.6675169086],"iteration":204,"passed_time":7.605676901,"remaining_time":66.59604896,"test":[0.6718967065]},
|
||||||
|
{"learn":[0.6674864762],"iteration":205,"passed_time":7.638300222,"remaining_time":66.51995436,"test":[0.671890967]},
|
||||||
|
{"learn":[0.6674670714],"iteration":206,"passed_time":7.665554951,"remaining_time":66.39777791,"test":[0.6718896293]},
|
||||||
|
{"learn":[0.6674375599],"iteration":207,"passed_time":7.700277678,"remaining_time":66.34085384,"test":[0.6718883534]},
|
||||||
|
{"learn":[0.6674148457],"iteration":208,"passed_time":7.734145802,"remaining_time":66.27681881,"test":[0.6718827289]},
|
||||||
|
{"learn":[0.6673974446],"iteration":209,"passed_time":7.766232144,"remaining_time":66.19788351,"test":[0.6718763224]},
|
||||||
|
{"learn":[0.6673812139],"iteration":210,"passed_time":7.796801222,"remaining_time":66.1065279,"test":[0.67187262]},
|
||||||
|
{"learn":[0.6673515687],"iteration":211,"passed_time":7.831891449,"remaining_time":66.05387693,"test":[0.6718590402]},
|
||||||
|
{"learn":[0.6673197956],"iteration":212,"passed_time":7.871259964,"remaining_time":66.0372843,"test":[0.6718455115]},
|
||||||
|
{"learn":[0.6672900754],"iteration":213,"passed_time":7.910110502,"remaining_time":66.01615587,"test":[0.6718253747]},
|
||||||
|
{"learn":[0.6672550009],"iteration":214,"passed_time":7.951342226,"remaining_time":66.01463197,"test":[0.671794877]},
|
||||||
|
{"learn":[0.6672271563],"iteration":215,"passed_time":7.989001461,"remaining_time":65.98323429,"test":[0.6717873786]},
|
||||||
|
{"learn":[0.667204521],"iteration":216,"passed_time":8.025973631,"remaining_time":65.94613357,"test":[0.6717765089]},
|
||||||
|
{"learn":[0.667181968],"iteration":217,"passed_time":8.058434478,"remaining_time":65.87215707,"test":[0.6717616726]},
|
||||||
|
{"learn":[0.6671640023],"iteration":218,"passed_time":8.087145957,"remaining_time":65.76806826,"test":[0.6717499215]},
|
||||||
|
{"learn":[0.66714351],"iteration":219,"passed_time":8.112590578,"remaining_time":65.63823286,"test":[0.6717326052]},
|
||||||
|
{"learn":[0.6671167156],"iteration":220,"passed_time":8.148644349,"remaining_time":65.59474342,"test":[0.6717161937]},
|
||||||
|
{"learn":[0.6670915937],"iteration":221,"passed_time":8.197662625,"remaining_time":65.65515382,"test":[0.6717056951]},
|
||||||
|
{"learn":[0.6670595279],"iteration":222,"passed_time":8.239228431,"remaining_time":65.65519696,"test":[0.6717021438]},
|
||||||
|
{"learn":[0.667033994],"iteration":223,"passed_time":8.268371203,"remaining_time":65.55637168,"test":[0.6716868488]},
|
||||||
|
{"learn":[0.6670008246],"iteration":224,"passed_time":8.298555216,"remaining_time":65.46638004,"test":[0.6716751909]},
|
||||||
|
{"learn":[0.6669858319],"iteration":225,"passed_time":8.327401394,"remaining_time":65.36641625,"test":[0.671670116]},
|
||||||
|
{"learn":[0.6669553964],"iteration":226,"passed_time":8.357648377,"remaining_time":65.27802014,"test":[0.6716558757]},
|
||||||
|
{"learn":[0.6669274683],"iteration":227,"passed_time":8.384989701,"remaining_time":65.16755154,"test":[0.6716559962]},
|
||||||
|
{"learn":[0.666896348],"iteration":228,"passed_time":8.418297538,"remaining_time":65.1039517,"test":[0.6716487875]},
|
||||||
|
{"learn":[0.6668698686],"iteration":229,"passed_time":8.453919972,"remaining_time":65.05842761,"test":[0.6716427451]},
|
||||||
|
{"learn":[0.6668513411],"iteration":230,"passed_time":8.49049033,"remaining_time":65.02024846,"test":[0.6716323255]},
|
||||||
|
{"learn":[0.6668309985],"iteration":231,"passed_time":8.523986676,"remaining_time":64.95865708,"test":[0.6716303547]},
|
||||||
|
{"learn":[0.6668058585],"iteration":232,"passed_time":8.550998228,"remaining_time":64.84812819,"test":[0.6716309509]},
|
||||||
|
{"learn":[0.6667845908],"iteration":233,"passed_time":8.575382398,"remaining_time":64.71848425,"test":[0.6716215401]},
|
||||||
|
{"learn":[0.6667582863],"iteration":234,"passed_time":8.607602961,"remaining_time":64.64859245,"test":[0.6716162103]},
|
||||||
|
{"learn":[0.6667332943],"iteration":235,"passed_time":8.6353786,"remaining_time":64.54579597,"test":[0.6716135097]},
|
||||||
|
{"learn":[0.6667070085],"iteration":236,"passed_time":8.66085309,"remaining_time":64.42651476,"test":[0.6716156696]},
|
||||||
|
{"learn":[0.6666907315],"iteration":237,"passed_time":8.691362456,"remaining_time":64.34529684,"test":[0.6716020054]},
|
||||||
|
{"learn":[0.6666633028],"iteration":238,"passed_time":8.719983169,"remaining_time":64.25058728,"test":[0.6715921704]},
|
||||||
|
{"learn":[0.6666406707],"iteration":239,"passed_time":8.746012652,"remaining_time":64.13742611,"test":[0.6715804466]},
|
||||||
|
{"learn":[0.6666134624],"iteration":240,"passed_time":8.773898765,"remaining_time":64.03853912,"test":[0.6715882966]},
|
||||||
|
{"learn":[0.6665850522],"iteration":241,"passed_time":8.803292064,"remaining_time":63.9511878,"test":[0.6715753942]},
|
||||||
|
{"learn":[0.6665631193],"iteration":242,"passed_time":8.833976809,"remaining_time":63.87365125,"test":[0.6715752261]},
|
||||||
|
{"learn":[0.6665412643],"iteration":243,"passed_time":8.862338006,"remaining_time":63.7797768,"test":[0.6715625509]},
|
||||||
|
{"learn":[0.6665168385],"iteration":244,"passed_time":8.892424073,"remaining_time":63.69879285,"test":[0.6715628214]},
|
||||||
|
{"learn":[0.6664904845],"iteration":245,"passed_time":8.932383667,"remaining_time":63.68862175,"test":[0.6715601629]},
|
||||||
|
{"learn":[0.6664678274],"iteration":246,"passed_time":8.962911123,"remaining_time":63.61126801,"test":[0.6715576255]},
|
||||||
|
{"learn":[0.6664539777],"iteration":247,"passed_time":8.991624872,"remaining_time":63.52147894,"test":[0.6715550274]},
|
||||||
|
{"learn":[0.6664334121],"iteration":248,"passed_time":9.021847081,"remaining_time":63.44278811,"test":[0.6715448645]},
|
||||||
|
{"learn":[0.6664121724],"iteration":249,"passed_time":9.05121341,"remaining_time":63.35849387,"test":[0.6715308166]},
|
||||||
|
{"learn":[0.666392034],"iteration":250,"passed_time":9.085113431,"remaining_time":63.30622865,"test":[0.671519334]},
|
||||||
|
{"learn":[0.666366899],"iteration":251,"passed_time":9.110250512,"remaining_time":63.19332498,"test":[0.6715184071]},
|
||||||
|
{"learn":[0.6663414098],"iteration":252,"passed_time":9.137253573,"remaining_time":63.09399997,"test":[0.6715163019]},
|
||||||
|
{"learn":[0.6663157816],"iteration":253,"passed_time":9.174559864,"remaining_time":63.06606899,"test":[0.6715096094]},
|
||||||
|
{"learn":[0.6662989799],"iteration":254,"passed_time":9.196898204,"remaining_time":62.93563673,"test":[0.6714992963]},
|
||||||
|
{"learn":[0.6662696102],"iteration":255,"passed_time":9.238149902,"remaining_time":62.9348962,"test":[0.6714917256]},
|
||||||
|
{"learn":[0.6662479711],"iteration":256,"passed_time":9.267818291,"remaining_time":62.85528125,"test":[0.671477406]},
|
||||||
|
{"learn":[0.6662231874],"iteration":257,"passed_time":9.297538986,"remaining_time":62.77640665,"test":[0.6714741542]},
|
||||||
|
{"learn":[0.6661947927],"iteration":258,"passed_time":9.324772701,"remaining_time":62.68119411,"test":[0.6714576155]},
|
||||||
|
{"learn":[0.6661669951],"iteration":259,"passed_time":9.357824574,"remaining_time":62.62544138,"test":[0.6714473645]},
|
||||||
|
{"learn":[0.6661426137],"iteration":260,"passed_time":9.388345461,"remaining_time":62.55299907,"test":[0.6714427232]},
|
||||||
|
{"learn":[0.6661216749],"iteration":261,"passed_time":9.427290804,"remaining_time":62.53676114,"test":[0.6714364275]},
|
||||||
|
{"learn":[0.6660983123],"iteration":262,"passed_time":9.461913185,"remaining_time":62.49179925,"test":[0.6714339587]},
|
||||||
|
{"learn":[0.6660803402],"iteration":263,"passed_time":9.496090562,"remaining_time":62.44398945,"test":[0.6714336287]},
|
||||||
|
{"learn":[0.6660617842],"iteration":264,"passed_time":9.524189317,"remaining_time":62.35648477,"test":[0.6714283568]},
|
||||||
|
{"learn":[0.6660443878],"iteration":265,"passed_time":9.55372419,"remaining_time":62.27878852,"test":[0.6714271895]},
|
||||||
|
{"learn":[0.6660176079],"iteration":266,"passed_time":9.590356068,"remaining_time":62.2475171,"test":[0.671413471]},
|
||||||
|
{"learn":[0.6659967546],"iteration":267,"passed_time":9.620235131,"remaining_time":62.17256436,"test":[0.6714072396]},
|
||||||
|
{"learn":[0.6659751467],"iteration":268,"passed_time":9.645948482,"remaining_time":62.0711406,"test":[0.6714002677]},
|
||||||
|
{"learn":[0.6659539329],"iteration":269,"passed_time":9.682675077,"remaining_time":62.04084401,"test":[0.6714001163]},
|
||||||
|
{"learn":[0.6659263951],"iteration":270,"passed_time":9.711914203,"remaining_time":61.96272936,"test":[0.6713933952]},
|
||||||
|
{"learn":[0.6659038921],"iteration":271,"passed_time":9.739142426,"remaining_time":61.87219894,"test":[0.6713926761]},
|
||||||
|
{"learn":[0.6658767418],"iteration":272,"passed_time":9.768751964,"remaining_time":61.79719649,"test":[0.6713836619]},
|
||||||
|
{"learn":[0.6658510507],"iteration":273,"passed_time":9.804576737,"remaining_time":61.76167682,"test":[0.6713772112]},
|
||||||
|
{"learn":[0.6658210119],"iteration":274,"passed_time":9.848653906,"remaining_time":61.77791996,"test":[0.6713603715]},
|
||||||
|
{"learn":[0.6657963011],"iteration":275,"passed_time":9.88663261,"remaining_time":61.75563268,"test":[0.6713560246]},
|
||||||
|
{"learn":[0.6657748552],"iteration":276,"passed_time":9.925808942,"remaining_time":61.74068161,"test":[0.6713837913]},
|
||||||
|
{"learn":[0.6657490013],"iteration":277,"passed_time":9.965409489,"remaining_time":61.72818396,"test":[0.6713684274]},
|
||||||
|
{"learn":[0.665732402],"iteration":278,"passed_time":9.99537326,"remaining_time":61.65604796,"test":[0.6713619356]},
|
||||||
|
{"learn":[0.6657118786],"iteration":279,"passed_time":10.02216777,"remaining_time":61.5647449,"test":[0.6713584836]},
|
||||||
|
{"learn":[0.665684467],"iteration":280,"passed_time":10.05593393,"remaining_time":61.51654955,"test":[0.6713673572]},
|
||||||
|
{"learn":[0.6656584634],"iteration":281,"passed_time":10.08025153,"remaining_time":61.41089406,"test":[0.6713625568]},
|
||||||
|
{"learn":[0.6656309991],"iteration":282,"passed_time":10.11102202,"remaining_time":61.34496401,"test":[0.6713542652]},
|
||||||
|
{"learn":[0.6656073482],"iteration":283,"passed_time":10.14714598,"remaining_time":61.31162855,"test":[0.6713512017]},
|
||||||
|
{"learn":[0.6655890957],"iteration":284,"passed_time":10.17528061,"remaining_time":61.23019734,"test":[0.671342038]},
|
||||||
|
{"learn":[0.6655665563],"iteration":285,"passed_time":10.2021403,"remaining_time":61.14149818,"test":[0.6713279798]},
|
||||||
|
{"learn":[0.6655452454],"iteration":286,"passed_time":10.23423432,"remaining_time":61.08447174,"test":[0.6713123285]},
|
||||||
|
{"learn":[0.6655255286],"iteration":287,"passed_time":10.26481698,"remaining_time":61.0186343,"test":[0.6713035326]},
|
||||||
|
{"learn":[0.6655053548],"iteration":288,"passed_time":10.29945844,"remaining_time":60.97707056,"test":[0.6713022203]},
|
||||||
|
{"learn":[0.6654893396],"iteration":289,"passed_time":10.32366496,"remaining_time":60.87402441,"test":[0.671296041]},
|
||||||
|
{"learn":[0.6654648912],"iteration":290,"passed_time":10.35344703,"remaining_time":60.80426453,"test":[0.6712829551]},
|
||||||
|
{"learn":[0.6654442759],"iteration":291,"passed_time":10.3949915,"remaining_time":60.8035804,"test":[0.6712769751]},
|
||||||
|
{"learn":[0.6654173127],"iteration":292,"passed_time":10.43148765,"remaining_time":60.77320621,"test":[0.6712702915]},
|
||||||
|
{"learn":[0.6653914518],"iteration":293,"passed_time":10.47162738,"remaining_time":60.76393303,"test":[0.6712379343]},
|
||||||
|
{"learn":[0.6653648946],"iteration":294,"passed_time":10.50360107,"remaining_time":60.70725362,"test":[0.6712192006]},
|
||||||
|
{"learn":[0.665344141],"iteration":295,"passed_time":10.53460819,"remaining_time":60.64517686,"test":[0.6712074061]},
|
||||||
|
{"learn":[0.6653140817],"iteration":296,"passed_time":10.57659448,"remaining_time":60.64626395,"test":[0.6711953324]},
|
||||||
|
{"learn":[0.665295365],"iteration":297,"passed_time":10.61260262,"remaining_time":60.61291829,"test":[0.6711891001]},
|
||||||
|
{"learn":[0.6652787488],"iteration":298,"passed_time":10.63910358,"remaining_time":60.52546889,"test":[0.6711870526]},
|
||||||
|
{"learn":[0.6652502991],"iteration":299,"passed_time":10.6681867,"remaining_time":60.45305797,"test":[0.6711812809]},
|
||||||
|
{"learn":[0.665231168],"iteration":300,"passed_time":10.70260503,"remaining_time":60.41104967,"test":[0.6711768946]},
|
||||||
|
{"learn":[0.6652136682],"iteration":301,"passed_time":10.72952096,"remaining_time":60.32690925,"test":[0.6711845012]},
|
||||||
|
{"learn":[0.6651903001],"iteration":302,"passed_time":10.76489952,"remaining_time":60.29054288,"test":[0.6711869636]},
|
||||||
|
{"learn":[0.6651697153],"iteration":303,"passed_time":10.80197155,"remaining_time":60.26363073,"test":[0.671186884]},
|
||||||
|
{"learn":[0.6651525958],"iteration":304,"passed_time":10.82922271,"remaining_time":60.18207375,"test":[0.6711890401]},
|
||||||
|
{"learn":[0.6651322685],"iteration":305,"passed_time":10.8578399,"remaining_time":60.10843394,"test":[0.6711868603]},
|
||||||
|
{"learn":[0.6651113828],"iteration":306,"passed_time":10.89228879,"remaining_time":60.06724727,"test":[0.6711900892]},
|
||||||
|
{"learn":[0.6650886807],"iteration":307,"passed_time":10.93056436,"remaining_time":60.04712628,"test":[0.6711884242]},
|
||||||
|
{"learn":[0.6650622251],"iteration":308,"passed_time":10.97231236,"remaining_time":60.04589061,"test":[0.6711837119]},
|
||||||
|
{"learn":[0.6650429987],"iteration":309,"passed_time":11.00296848,"remaining_time":59.98392494,"test":[0.6711766645]},
|
||||||
|
{"learn":[0.665015513],"iteration":310,"passed_time":11.03002276,"remaining_time":59.90259947,"test":[0.671172959]},
|
||||||
|
{"learn":[0.6650019022],"iteration":311,"passed_time":11.05828865,"remaining_time":59.82817707,"test":[0.6711740433]},
|
||||||
|
{"learn":[0.664979951],"iteration":312,"passed_time":11.09287745,"remaining_time":59.78812863,"test":[0.6711715069]},
|
||||||
|
{"learn":[0.6649549638],"iteration":313,"passed_time":11.1177757,"remaining_time":59.69608229,"test":[0.6711589843]},
|
||||||
|
{"learn":[0.6649340455],"iteration":314,"passed_time":11.14959087,"remaining_time":59.64146228,"test":[0.6711446402]},
|
||||||
|
{"learn":[0.6649162445],"iteration":315,"passed_time":11.18718772,"remaining_time":59.61779784,"test":[0.6711415366]},
|
||||||
|
{"learn":[0.6649048119],"iteration":316,"passed_time":11.21179073,"remaining_time":59.52505932,"test":[0.6711359351]},
|
||||||
|
{"learn":[0.6648796463],"iteration":317,"passed_time":11.24311165,"remaining_time":59.46828238,"test":[0.671143361]},
|
||||||
|
{"learn":[0.6648605481],"iteration":318,"passed_time":11.27486028,"remaining_time":59.41391889,"test":[0.6711353638]},
|
||||||
|
{"learn":[0.6648429084],"iteration":319,"passed_time":11.30400807,"remaining_time":59.34604237,"test":[0.6711444387]},
|
||||||
|
{"learn":[0.6648238121],"iteration":320,"passed_time":11.33488419,"remaining_time":59.28744721,"test":[0.6711487352]},
|
||||||
|
{"learn":[0.6647969527],"iteration":321,"passed_time":11.36208838,"remaining_time":59.20988915,"test":[0.67114436]},
|
||||||
|
{"learn":[0.6647854723],"iteration":322,"passed_time":11.39429642,"remaining_time":59.15862259,"test":[0.6711444722]},
|
||||||
|
{"learn":[0.6647589304],"iteration":323,"passed_time":11.4363998,"remaining_time":59.15866068,"test":[0.6711325635]},
|
||||||
|
{"learn":[0.6647429024],"iteration":324,"passed_time":11.47751019,"remaining_time":59.15332173,"test":[0.6711269403]},
|
||||||
|
{"learn":[0.6647237508],"iteration":325,"passed_time":11.5136833,"remaining_time":59.12241054,"test":[0.6711154078]},
|
||||||
|
{"learn":[0.6647059396],"iteration":326,"passed_time":11.54795566,"remaining_time":59.08174257,"test":[0.6711203043]},
|
||||||
|
{"learn":[0.664686288],"iteration":327,"passed_time":11.57245915,"remaining_time":58.99131613,"test":[0.6711241333]},
|
||||||
|
{"learn":[0.6646532527],"iteration":328,"passed_time":11.60790333,"remaining_time":58.95685857,"test":[0.6711213497]},
|
||||||
|
{"learn":[0.6646306438],"iteration":329,"passed_time":11.63787346,"remaining_time":58.89469298,"test":[0.6711231641]},
|
||||||
|
{"learn":[0.6646098516],"iteration":330,"passed_time":11.66805718,"remaining_time":58.83379887,"test":[0.6711049215]},
|
||||||
|
{"learn":[0.6645858284],"iteration":331,"passed_time":11.70070223,"remaining_time":58.78545579,"test":[0.6711031963]},
|
||||||
|
{"learn":[0.6645707188],"iteration":332,"passed_time":11.724753,"remaining_time":58.69418391,"test":[0.6710996314]},
|
||||||
|
{"learn":[0.6645485788],"iteration":333,"passed_time":11.75795297,"remaining_time":58.64895104,"test":[0.6710867309]},
|
||||||
|
{"learn":[0.6645305696],"iteration":334,"passed_time":11.78053066,"remaining_time":58.55099567,"test":[0.6710914578]},
|
||||||
|
{"learn":[0.6645108881],"iteration":335,"passed_time":11.81570271,"remaining_time":58.51586106,"test":[0.6710929585]},
|
||||||
|
{"learn":[0.6644923286],"iteration":336,"passed_time":11.8448851,"remaining_time":58.45116888,"test":[0.6710984779]},
|
||||||
|
{"learn":[0.6644805222],"iteration":337,"passed_time":11.86964023,"remaining_time":58.36491734,"test":[0.6710923199]},
|
||||||
|
{"learn":[0.6644572776],"iteration":338,"passed_time":11.90591446,"remaining_time":58.33546879,"test":[0.6710893917]},
|
||||||
|
{"learn":[0.6644320741],"iteration":339,"passed_time":11.94145444,"remaining_time":58.30239521,"test":[0.6710923306]},
|
||||||
|
{"learn":[0.6644115048],"iteration":340,"passed_time":11.98658051,"remaining_time":58.31594449,"test":[0.6710927901]},
|
||||||
|
{"learn":[0.6643949013],"iteration":341,"passed_time":12.02038848,"remaining_time":58.27428098,"test":[0.6711092802]},
|
||||||
|
{"learn":[0.6643619789],"iteration":342,"passed_time":12.06653941,"remaining_time":58.29229096,"test":[0.6711012995]},
|
||||||
|
{"learn":[0.6643389502],"iteration":343,"passed_time":12.12283646,"remaining_time":58.35877087,"test":[0.6711015305]},
|
||||||
|
{"learn":[0.6643088915],"iteration":344,"passed_time":12.17733618,"remaining_time":58.41591705,"test":[0.6710975574]},
|
||||||
|
{"learn":[0.664286972],"iteration":345,"passed_time":12.22133732,"remaining_time":58.42223099,"test":[0.6710899474]},
|
||||||
|
{"learn":[0.664274149],"iteration":346,"passed_time":12.2642467,"remaining_time":58.42305415,"test":[0.671085152]},
|
||||||
|
{"learn":[0.6642536926],"iteration":347,"passed_time":12.30091895,"remaining_time":58.39401755,"test":[0.6710814533]},
|
||||||
|
{"learn":[0.6642357634],"iteration":348,"passed_time":12.32484094,"remaining_time":58.30462002,"test":[0.6710701892]},
|
||||||
|
{"learn":[0.664207914],"iteration":349,"passed_time":12.35469303,"remaining_time":58.24355287,"test":[0.67105503]},
|
||||||
|
{"learn":[0.6641853097],"iteration":350,"passed_time":12.40148755,"remaining_time":58.26225919,"test":[0.6710527861]},
|
||||||
|
{"learn":[0.6641654917],"iteration":351,"passed_time":12.43803877,"remaining_time":58.23263605,"test":[0.6710508715]},
|
||||||
|
{"learn":[0.664143804],"iteration":352,"passed_time":12.47995438,"remaining_time":58.22800245,"test":[0.6710560803]},
|
||||||
|
{"learn":[0.6641290647],"iteration":353,"passed_time":12.51241326,"remaining_time":58.17918707,"test":[0.6710465693]},
|
||||||
|
{"learn":[0.6641117244],"iteration":354,"passed_time":12.5417829,"remaining_time":58.11614893,"test":[0.6710440741]},
|
||||||
|
{"learn":[0.6640880219],"iteration":355,"passed_time":12.5692936,"remaining_time":58.0447154,"test":[0.6710496913]},
|
||||||
|
{"learn":[0.6640669415],"iteration":356,"passed_time":12.5976392,"remaining_time":57.97737034,"test":[0.6710404659]},
|
||||||
|
{"learn":[0.6640462999],"iteration":357,"passed_time":12.62815847,"remaining_time":57.92021287,"test":[0.6710293986]},
|
||||||
|
{"learn":[0.664030296],"iteration":358,"passed_time":12.65342509,"remaining_time":57.8391938,"test":[0.6710353817]},
|
||||||
|
{"learn":[0.6640028542],"iteration":359,"passed_time":12.68233453,"remaining_time":57.77507954,"test":[0.6710271815]},
|
||||||
|
{"learn":[0.6639813347],"iteration":360,"passed_time":12.72037964,"remaining_time":57.75263774,"test":[0.6710288077]},
|
||||||
|
{"learn":[0.6639597941],"iteration":361,"passed_time":12.744473,"remaining_time":57.66698004,"test":[0.6710169894]},
|
||||||
|
{"learn":[0.6639429832],"iteration":362,"passed_time":12.77086568,"remaining_time":57.59203063,"test":[0.6710119848]},
|
||||||
|
{"learn":[0.6639222708],"iteration":363,"passed_time":12.81194554,"remaining_time":57.58335961,"test":[0.6710114775]},
|
||||||
|
{"learn":[0.6639065546],"iteration":364,"passed_time":12.84133287,"remaining_time":57.52213492,"test":[0.6710013614]},
|
||||||
|
{"learn":[0.6638823236],"iteration":365,"passed_time":12.87057337,"remaining_time":57.46042866,"test":[0.6709985657]},
|
||||||
|
{"learn":[0.6638648195],"iteration":366,"passed_time":12.8971183,"remaining_time":57.38690512,"test":[0.6709948954]},
|
||||||
|
{"learn":[0.6638436235],"iteration":367,"passed_time":12.93825161,"remaining_time":57.37833324,"test":[0.6709970591]},
|
||||||
|
{"learn":[0.6638208732],"iteration":368,"passed_time":12.97444296,"remaining_time":57.3477411,"test":[0.6709739289]},
|
||||||
|
{"learn":[0.6637956357],"iteration":369,"passed_time":13.00974924,"remaining_time":57.31321963,"test":[0.6709754911]},
|
||||||
|
{"learn":[0.6637718453],"iteration":370,"passed_time":13.03832239,"remaining_time":57.24912984,"test":[0.6709717066]},
|
||||||
|
{"learn":[0.663756918],"iteration":371,"passed_time":13.07843077,"remaining_time":57.23571316,"test":[0.67096845]},
|
||||||
|
{"learn":[0.6637353525],"iteration":372,"passed_time":13.11729124,"remaining_time":57.21671005,"test":[0.6709739445]},
|
||||||
|
{"learn":[0.6637143112],"iteration":373,"passed_time":13.14745329,"remaining_time":57.15978354,"test":[0.6709728881]},
|
||||||
|
{"learn":[0.6636956547],"iteration":374,"passed_time":13.18118022,"remaining_time":57.11844761,"test":[0.6709694284]},
|
||||||
|
{"learn":[0.663680995],"iteration":375,"passed_time":13.20539229,"remaining_time":57.03605604,"test":[0.6709604166]},
|
||||||
|
{"learn":[0.66366728],"iteration":376,"passed_time":13.23563977,"remaining_time":56.97995583,"test":[0.6709605025]},
|
||||||
|
{"learn":[0.6636487567],"iteration":377,"passed_time":13.27428255,"remaining_time":56.96001665,"test":[0.6709603727]},
|
||||||
|
{"learn":[0.6636266904],"iteration":378,"passed_time":13.30625754,"remaining_time":56.91146033,"test":[0.670944339]},
|
||||||
|
{"learn":[0.6636116064],"iteration":379,"passed_time":13.33327871,"remaining_time":56.84187241,"test":[0.6709447187]},
|
||||||
|
{"learn":[0.6635902746],"iteration":380,"passed_time":13.36632239,"remaining_time":56.79809961,"test":[0.6709538679]},
|
||||||
|
{"learn":[0.6635654896],"iteration":381,"passed_time":13.39639051,"remaining_time":56.74177969,"test":[0.6709640912]},
|
||||||
|
{"learn":[0.6635393029],"iteration":382,"passed_time":13.42189438,"remaining_time":56.66632694,"test":[0.6709534847]},
|
||||||
|
{"learn":[0.6635171734],"iteration":383,"passed_time":13.46730432,"remaining_time":56.6749057,"test":[0.6709471555]},
|
||||||
|
{"learn":[0.663500789],"iteration":384,"passed_time":13.50832777,"remaining_time":56.66480351,"test":[0.6709506783]},
|
||||||
|
{"learn":[0.663477743],"iteration":385,"passed_time":13.54029627,"remaining_time":56.61667921,"test":[0.6709546729]},
|
||||||
|
{"learn":[0.6634584806],"iteration":386,"passed_time":13.56996301,"remaining_time":56.5590448,"test":[0.670930774]},
|
||||||
|
{"learn":[0.6634337499],"iteration":387,"passed_time":13.59835745,"remaining_time":56.4962686,"test":[0.6709287322]},
|
||||||
|
{"learn":[0.6634135584],"iteration":388,"passed_time":13.6279617,"remaining_time":56.43867943,"test":[0.6709198643]},
|
||||||
|
{"learn":[0.6633868455],"iteration":389,"passed_time":13.65633448,"remaining_time":56.37615005,"test":[0.6709220389]},
|
||||||
|
{"learn":[0.6633755323],"iteration":390,"passed_time":13.68565529,"remaining_time":56.31769658,"test":[0.6709230923]},
|
||||||
|
{"learn":[0.663356103],"iteration":391,"passed_time":13.71789303,"remaining_time":56.27135714,"test":[0.670930414]},
|
||||||
|
{"learn":[0.6633337631],"iteration":392,"passed_time":13.75060752,"remaining_time":56.2270389,"test":[0.6709354296]},
|
||||||
|
{"learn":[0.663319422],"iteration":393,"passed_time":13.77167974,"remaining_time":56.13532403,"test":[0.6709351544]},
|
||||||
|
{"learn":[0.6632911566],"iteration":394,"passed_time":13.80416242,"remaining_time":56.09033084,"test":[0.6709414935]},
|
||||||
|
{"learn":[0.6632687875],"iteration":395,"passed_time":13.82525369,"remaining_time":55.9992599,"test":[0.6709445943]},
|
||||||
|
{"learn":[0.6632431997],"iteration":396,"passed_time":13.85836516,"remaining_time":55.95707646,"test":[0.6709475685]},
|
||||||
|
{"learn":[0.6632189331],"iteration":397,"passed_time":13.88898168,"remaining_time":55.90489613,"test":[0.6709533591]},
|
||||||
|
{"learn":[0.663201035],"iteration":398,"passed_time":13.91726355,"remaining_time":55.84345598,"test":[0.6709592222]},
|
||||||
|
{"learn":[0.6631898553],"iteration":399,"passed_time":13.95316828,"remaining_time":55.81267311,"test":[0.6709508704]},
|
||||||
|
{"learn":[0.6631712482],"iteration":400,"passed_time":13.99418497,"remaining_time":55.80224881,"test":[0.6709479912]},
|
||||||
|
{"learn":[0.663143025],"iteration":401,"passed_time":14.0253575,"remaining_time":55.75254052,"test":[0.6709417519]},
|
||||||
|
{"learn":[0.663121538],"iteration":402,"passed_time":14.04844239,"remaining_time":55.67087467,"test":[0.6709476082]},
|
||||||
|
{"learn":[0.6631087792],"iteration":403,"passed_time":14.0761289,"remaining_time":55.60767753,"test":[0.6709480979]},
|
||||||
|
{"learn":[0.6630859067],"iteration":404,"passed_time":14.10555105,"remaining_time":55.55149118,"test":[0.6709448724]},
|
||||||
|
{"learn":[0.663066483],"iteration":405,"passed_time":14.1427661,"remaining_time":55.52603242,"test":[0.6709421934]},
|
||||||
|
{"learn":[0.6630443652],"iteration":406,"passed_time":14.18285552,"remaining_time":55.51176619,"test":[0.6709386261]},
|
||||||
|
{"learn":[0.6630250376],"iteration":407,"passed_time":14.21458769,"remaining_time":55.46476372,"test":[0.6709461564]},
|
||||||
|
{"learn":[0.6630007822],"iteration":408,"passed_time":14.24035708,"remaining_time":55.39464088,"test":[0.670934384]},
|
||||||
|
{"learn":[0.6629768728],"iteration":409,"passed_time":14.26711915,"remaining_time":55.32858403,"test":[0.6709312987]},
|
||||||
|
{"learn":[0.6629528093],"iteration":410,"passed_time":14.29943785,"remaining_time":55.28420133,"test":[0.670931806]},
|
||||||
|
{"learn":[0.6629260936],"iteration":411,"passed_time":14.32489173,"remaining_time":55.21341763,"test":[0.6709286111]},
|
||||||
|
{"learn":[0.6629102182],"iteration":412,"passed_time":14.35119075,"remaining_time":55.14610101,"test":[0.6709224729]},
|
||||||
|
{"learn":[0.6628863488],"iteration":413,"passed_time":14.37946054,"remaining_time":55.08653242,"test":[0.6709236504]},
|
||||||
|
{"learn":[0.6628648972],"iteration":414,"passed_time":14.41005914,"remaining_time":55.03600899,"test":[0.6709245901]},
|
||||||
|
{"learn":[0.6628454339],"iteration":415,"passed_time":14.45103793,"remaining_time":55.02510598,"test":[0.6709463437]},
|
||||||
|
{"learn":[0.6628200274],"iteration":416,"passed_time":14.48428995,"remaining_time":54.98472661,"test":[0.6709567049]},
|
||||||
|
{"learn":[0.6627942591],"iteration":417,"passed_time":14.5135184,"remaining_time":54.92915339,"test":[0.670945606]},
|
||||||
|
{"learn":[0.6627744647],"iteration":418,"passed_time":14.53698524,"remaining_time":54.85196578,"test":[0.6709479298]},
|
||||||
|
{"learn":[0.662765485],"iteration":419,"passed_time":14.56542473,"remaining_time":54.79374067,"test":[0.6709464351]},
|
||||||
|
{"learn":[0.6627503257],"iteration":420,"passed_time":14.58728594,"remaining_time":54.71098455,"test":[0.6709414048]},
|
||||||
|
{"learn":[0.6627323029],"iteration":421,"passed_time":14.61501375,"remaining_time":54.65045425,"test":[0.6709414427]},
|
||||||
|
{"learn":[0.6627111509],"iteration":422,"passed_time":14.64231614,"remaining_time":54.58849302,"test":[0.6709296343]},
|
||||||
|
{"learn":[0.6626785863],"iteration":423,"passed_time":14.66665432,"remaining_time":54.51567739,"test":[0.670924721]},
|
||||||
|
{"learn":[0.6626576561],"iteration":424,"passed_time":14.69050441,"remaining_time":54.44128104,"test":[0.670906284]},
|
||||||
|
{"learn":[0.6626363113],"iteration":425,"passed_time":14.71910475,"remaining_time":54.38467341,"test":[0.6708996826]},
|
||||||
|
{"learn":[0.6626181065],"iteration":426,"passed_time":14.73941058,"remaining_time":54.2976413,"test":[0.6708987677]},
|
||||||
|
{"learn":[0.66259794],"iteration":427,"passed_time":14.77242451,"remaining_time":54.25759657,"test":[0.670909526]},
|
||||||
|
{"learn":[0.6625765658],"iteration":428,"passed_time":14.79088688,"remaining_time":54.1642967,"test":[0.6709033226]},
|
||||||
|
{"learn":[0.6625526572],"iteration":429,"passed_time":14.82430966,"remaining_time":54.12596783,"test":[0.6708750209]},
|
||||||
|
{"learn":[0.66253135],"iteration":430,"passed_time":14.84439175,"remaining_time":54.03909666,"test":[0.6708752079]},
|
||||||
|
{"learn":[0.6625035695],"iteration":431,"passed_time":14.8764415,"remaining_time":53.99597284,"test":[0.6708776566]},
|
||||||
|
{"learn":[0.662480212],"iteration":432,"passed_time":14.90666075,"remaining_time":53.94627573,"test":[0.6708736133]},
|
||||||
|
{"learn":[0.6624611632],"iteration":433,"passed_time":14.93845927,"remaining_time":53.90236684,"test":[0.6708754298]},
|
||||||
|
{"learn":[0.6624332625],"iteration":434,"passed_time":14.98024104,"remaining_time":53.89443041,"test":[0.6708751084]},
|
||||||
|
{"learn":[0.6624120584],"iteration":435,"passed_time":15.00605075,"remaining_time":53.82904442,"test":[0.6708642042]},
|
||||||
|
{"learn":[0.6623941719],"iteration":436,"passed_time":15.03384083,"remaining_time":53.77092268,"test":[0.6708610465]},
|
||||||
|
{"learn":[0.6623766304],"iteration":437,"passed_time":15.05972545,"remaining_time":53.70614417,"test":[0.6708574768]},
|
||||||
|
{"learn":[0.6623623329],"iteration":438,"passed_time":15.08505889,"remaining_time":53.63958297,"test":[0.6708557953]},
|
||||||
|
{"learn":[0.6623442925],"iteration":439,"passed_time":15.11080547,"remaining_time":53.57467393,"test":[0.670871378]},
|
||||||
|
{"learn":[0.6623212715],"iteration":440,"passed_time":15.13466304,"remaining_time":53.50326458,"test":[0.6708640187]},
|
||||||
|
{"learn":[0.6623025941],"iteration":441,"passed_time":15.16037021,"remaining_time":53.43859001,"test":[0.6708700565]},
|
||||||
|
{"learn":[0.6622749791],"iteration":442,"passed_time":15.18471062,"remaining_time":53.36928767,"test":[0.6708667534]},
|
||||||
|
{"learn":[0.6622534499],"iteration":443,"passed_time":15.21140556,"remaining_time":53.30843931,"test":[0.6708675383]},
|
||||||
|
{"learn":[0.6622305473],"iteration":444,"passed_time":15.23498219,"remaining_time":53.23684787,"test":[0.6708740175]},
|
||||||
|
{"learn":[0.6622059333],"iteration":445,"passed_time":15.26647355,"remaining_time":53.19304911,"test":[0.6708774523]},
|
||||||
|
{"learn":[0.6621871707],"iteration":446,"passed_time":15.28793136,"remaining_time":53.11444609,"test":[0.6708697231]},
|
||||||
|
{"learn":[0.6621638454],"iteration":447,"passed_time":15.31613827,"remaining_time":53.05947899,"test":[0.6708614971]},
|
||||||
|
{"learn":[0.6621511296],"iteration":448,"passed_time":15.33689091,"remaining_time":52.9788815,"test":[0.6708607946]},
|
||||||
|
{"learn":[0.6621349978],"iteration":449,"passed_time":15.36674634,"remaining_time":52.92990406,"test":[0.6708740865]},
|
||||||
|
{"learn":[0.6621120424],"iteration":450,"passed_time":15.393642,"remaining_time":52.87084582,"test":[0.6708729562]},
|
||||||
|
{"learn":[0.6620958271],"iteration":451,"passed_time":15.42984657,"remaining_time":52.84381082,"test":[0.6708674017]},
|
||||||
|
{"learn":[0.6620793528],"iteration":452,"passed_time":15.46956188,"remaining_time":52.82872456,"test":[0.6708693088]},
|
||||||
|
{"learn":[0.6620572713],"iteration":453,"passed_time":15.49032259,"remaining_time":52.74898396,"test":[0.6708712037]},
|
||||||
|
{"learn":[0.6620395025],"iteration":454,"passed_time":15.52379393,"remaining_time":52.71266289,"test":[0.6708703905]},
|
||||||
|
{"learn":[0.6620188044],"iteration":455,"passed_time":15.55053135,"remaining_time":52.65355352,"test":[0.6708577595]},
|
||||||
|
{"learn":[0.6620017347],"iteration":456,"passed_time":15.57735398,"remaining_time":52.59487352,"test":[0.6708493546]},
|
||||||
|
{"learn":[0.6619811454],"iteration":457,"passed_time":15.60434803,"remaining_time":52.53690973,"test":[0.6708523777]},
|
||||||
|
{"learn":[0.6619695569],"iteration":458,"passed_time":15.63056555,"remaining_time":52.47647387,"test":[0.6708454134]},
|
||||||
|
{"learn":[0.661952377],"iteration":459,"passed_time":15.656355,"remaining_time":52.41475368,"test":[0.6708404483]},
|
||||||
|
{"learn":[0.6619237442],"iteration":460,"passed_time":15.68232112,"remaining_time":52.35377918,"test":[0.6708274771]},
|
||||||
|
{"learn":[0.6619089407],"iteration":461,"passed_time":15.71164945,"remaining_time":52.30414904,"test":[0.6708244992]},
|
||||||
|
{"learn":[0.6618886168],"iteration":462,"passed_time":15.7361944,"remaining_time":52.23872743,"test":[0.6708344314]},
|
||||||
|
{"learn":[0.6618831383],"iteration":463,"passed_time":15.76527735,"remaining_time":52.18850433,"test":[0.6708279081]},
|
||||||
|
{"learn":[0.6618690774],"iteration":464,"passed_time":15.78652262,"remaining_time":52.11249942,"test":[0.6708258106]},
|
||||||
|
{"learn":[0.661845878],"iteration":465,"passed_time":15.81756836,"remaining_time":52.06899113,"test":[0.6708049714]},
|
||||||
|
{"learn":[0.6618290213],"iteration":466,"passed_time":15.83979966,"remaining_time":51.99660146,"test":[0.670810989]},
|
||||||
|
{"learn":[0.6618050064],"iteration":467,"passed_time":15.87342473,"remaining_time":51.9617237,"test":[0.6708212237]},
|
||||||
|
{"learn":[0.6617832833],"iteration":468,"passed_time":15.90381555,"remaining_time":51.9162934,"test":[0.6708221741]},
|
||||||
|
{"learn":[0.6617652311],"iteration":469,"passed_time":15.93502938,"remaining_time":51.87360627,"test":[0.6708259658]},
|
||||||
|
{"learn":[0.6617443144],"iteration":470,"passed_time":15.96919221,"remaining_time":51.84054117,"test":[0.6708159692]},
|
||||||
|
{"learn":[0.6617202619],"iteration":471,"passed_time":15.99477329,"remaining_time":51.77968981,"test":[0.6708136212]},
|
||||||
|
{"learn":[0.6617005831],"iteration":472,"passed_time":16.02279091,"remaining_time":51.72685354,"test":[0.6708224942]},
|
||||||
|
{"learn":[0.6616824419],"iteration":473,"passed_time":16.04763422,"remaining_time":51.66390258,"test":[0.6708363084]},
|
||||||
|
{"learn":[0.6616538226],"iteration":474,"passed_time":16.07374645,"remaining_time":51.60518598,"test":[0.670850875]},
|
||||||
|
{"learn":[0.6616314155],"iteration":475,"passed_time":16.09993591,"remaining_time":51.54685363,"test":[0.6708527236]},
|
||||||
|
{"learn":[0.6616127861],"iteration":476,"passed_time":16.12811357,"remaining_time":51.49500411,"test":[0.6708453401]},
|
||||||
|
{"learn":[0.6616029072],"iteration":477,"passed_time":16.15264086,"remaining_time":51.43163051,"test":[0.6708413844]},
|
||||||
|
{"learn":[0.6615843751],"iteration":478,"passed_time":16.17696751,"remaining_time":51.36778201,"test":[0.6708364569]},
|
||||||
|
{"learn":[0.661563216],"iteration":479,"passed_time":16.20551145,"remaining_time":51.31745293,"test":[0.6708251774]},
|
||||||
|
{"learn":[0.6615432257],"iteration":480,"passed_time":16.22860577,"remaining_time":51.2500045,"test":[0.6708154393]},
|
||||||
|
{"learn":[0.6615263324],"iteration":481,"passed_time":16.25544093,"remaining_time":51.19452144,"test":[0.6708111613]},
|
||||||
|
{"learn":[0.6615033259],"iteration":482,"passed_time":16.27729221,"remaining_time":51.12350369,"test":[0.6708102339]},
|
||||||
|
{"learn":[0.661484293],"iteration":483,"passed_time":16.30502335,"remaining_time":51.07110619,"test":[0.6707929623]},
|
||||||
|
{"learn":[0.6614678231],"iteration":484,"passed_time":16.32842702,"remaining_time":51.00529266,"test":[0.6707900226]},
|
||||||
|
{"learn":[0.6614463024],"iteration":485,"passed_time":16.36272839,"remaining_time":50.97360242,"test":[0.6707832384]},
|
||||||
|
{"learn":[0.6614155436],"iteration":486,"passed_time":16.39272506,"remaining_time":50.92852776,"test":[0.6707739118]},
|
||||||
|
{"learn":[0.6613958945],"iteration":487,"passed_time":16.42636604,"remaining_time":50.89480625,"test":[0.6707737538]},
|
||||||
|
{"learn":[0.661380611],"iteration":488,"passed_time":16.4597142,"remaining_time":50.86018027,"test":[0.6707730234]},
|
||||||
|
{"learn":[0.6613677802],"iteration":489,"passed_time":16.48056007,"remaining_time":50.78703206,"test":[0.6707796291]},
|
||||||
|
{"learn":[0.6613530086],"iteration":490,"passed_time":16.51091177,"remaining_time":50.74331132,"test":[0.670791408]},
|
||||||
|
{"learn":[0.6613248211],"iteration":491,"passed_time":16.53097438,"remaining_time":50.66810846,"test":[0.6707944906]},
|
||||||
|
{"learn":[0.6613059359],"iteration":492,"passed_time":16.56161402,"remaining_time":50.62546112,"test":[0.6707835635]},
|
||||||
|
{"learn":[0.6612729965],"iteration":493,"passed_time":16.5854633,"remaining_time":50.56216139,"test":[0.6707908928]},
|
||||||
|
{"learn":[0.6612624948],"iteration":494,"passed_time":16.61302735,"remaining_time":50.51031547,"test":[0.670796262]},
|
||||||
|
{"learn":[0.6612401679],"iteration":495,"passed_time":16.63896978,"remaining_time":50.45365029,"test":[0.6707877825]},
|
||||||
|
{"learn":[0.6612191637],"iteration":496,"passed_time":16.663707,"remaining_time":50.39346403,"test":[0.6707854132]},
|
||||||
|
{"learn":[0.6611912219],"iteration":497,"passed_time":16.69040179,"remaining_time":50.33932428,"test":[0.6707756206]},
|
||||||
|
{"learn":[0.6611773017],"iteration":498,"passed_time":16.71612789,"remaining_time":50.28238068,"test":[0.6707707899]},
|
||||||
|
{"learn":[0.6611638216],"iteration":499,"passed_time":16.74072553,"remaining_time":50.2221766,"test":[0.6707704386]},
|
||||||
|
{"learn":[0.6611450533],"iteration":500,"passed_time":16.77346538,"remaining_time":50.18647626,"test":[0.6707621465]},
|
||||||
|
{"learn":[0.6611179111],"iteration":501,"passed_time":16.80230735,"remaining_time":50.13915621,"test":[0.6707661931]},
|
||||||
|
{"learn":[0.6610959069],"iteration":502,"passed_time":16.83637769,"remaining_time":50.10747,"test":[0.6707651988]},
|
||||||
|
{"learn":[0.6610728788],"iteration":503,"passed_time":16.87382128,"remaining_time":50.08578697,"test":[0.6707607827]},
|
||||||
|
{"learn":[0.6610436668],"iteration":504,"passed_time":16.92151611,"remaining_time":50.09438927,"test":[0.670760242]},
|
||||||
|
{"learn":[0.6610188976],"iteration":505,"passed_time":16.9898618,"remaining_time":50.16374216,"test":[0.6707506008]},
|
||||||
|
{"learn":[0.6610030555],"iteration":506,"passed_time":17.03818668,"remaining_time":50.17359509,"test":[0.6707452886]},
|
||||||
|
{"learn":[0.6609831174],"iteration":507,"passed_time":17.06933058,"remaining_time":50.13275833,"test":[0.6707355189]},
|
||||||
|
{"learn":[0.6609586562],"iteration":508,"passed_time":17.1106164,"remaining_time":50.12166807,"test":[0.6707312551]},
|
||||||
|
{"learn":[0.660935882],"iteration":509,"passed_time":17.14537899,"remaining_time":50.09140137,"test":[0.6707199485]},
|
||||||
|
{"learn":[0.6609202024],"iteration":510,"passed_time":17.19066307,"remaining_time":50.09177556,"test":[0.6707131947]},
|
||||||
|
{"learn":[0.6609011137],"iteration":511,"passed_time":17.21958034,"remaining_time":50.04440537,"test":[0.6707154112]},
|
||||||
|
{"learn":[0.6608726737],"iteration":512,"passed_time":17.24756917,"remaining_time":49.99441591,"test":[0.6706982346]},
|
||||||
|
{"learn":[0.6608608849],"iteration":513,"passed_time":17.27150822,"remaining_time":49.93280391,"test":[0.6706988941]},
|
||||||
|
{"learn":[0.6608387256],"iteration":514,"passed_time":17.29800365,"remaining_time":49.87870957,"test":[0.6706989098]},
|
||||||
|
{"learn":[0.6608136063],"iteration":515,"passed_time":17.34332283,"remaining_time":49.87885868,"test":[0.670693306]},
|
||||||
|
{"learn":[0.6607946343],"iteration":516,"passed_time":17.37393636,"remaining_time":49.83664916,"test":[0.6706944515]},
|
||||||
|
{"learn":[0.6607703935],"iteration":517,"passed_time":17.4173655,"remaining_time":49.83114994,"test":[0.6706899688]},
|
||||||
|
{"learn":[0.6607509625],"iteration":518,"passed_time":17.46008645,"remaining_time":49.82348368,"test":[0.6706909374]},
|
||||||
|
{"learn":[0.6607238109],"iteration":519,"passed_time":17.4906988,"remaining_time":49.78121967,"test":[0.6706855074]},
|
||||||
|
{"learn":[0.6606999858],"iteration":520,"passed_time":17.5186435,"remaining_time":49.7314275,"test":[0.6706787779]},
|
||||||
|
{"learn":[0.6606813873],"iteration":521,"passed_time":17.54613056,"remaining_time":49.6804233,"test":[0.6706737082]},
|
||||||
|
{"learn":[0.6606610372],"iteration":522,"passed_time":17.57100039,"remaining_time":49.62211774,"test":[0.6706761225]},
|
||||||
|
{"learn":[0.660638456],"iteration":523,"passed_time":17.60084283,"remaining_time":49.5779466,"test":[0.670685455]},
|
||||||
|
{"learn":[0.6606156483],"iteration":524,"passed_time":17.62599925,"remaining_time":49.52066456,"test":[0.6706693855]},
|
||||||
|
{"learn":[0.6605968623],"iteration":525,"passed_time":17.65519625,"remaining_time":49.47482751,"test":[0.6706647216]},
|
||||||
|
{"learn":[0.6605735776],"iteration":526,"passed_time":17.67910836,"remaining_time":49.41428199,"test":[0.6706569188]},
|
||||||
|
{"learn":[0.6605517294],"iteration":527,"passed_time":17.70744827,"remaining_time":49.36621942,"test":[0.6706549134]},
|
||||||
|
{"learn":[0.6605309239],"iteration":528,"passed_time":17.72943083,"remaining_time":49.3005534,"test":[0.6706547978]},
|
||||||
|
{"learn":[0.6605086434],"iteration":529,"passed_time":17.75830336,"remaining_time":49.25416215,"test":[0.6706564214]},
|
||||||
|
{"learn":[0.6604803349],"iteration":530,"passed_time":17.78141858,"remaining_time":49.19190939,"test":[0.6706559196]},
|
||||||
|
{"learn":[0.6604566326],"iteration":531,"passed_time":17.80870208,"remaining_time":49.14130574,"test":[0.6706515072]},
|
||||||
|
{"learn":[0.6604430839],"iteration":532,"passed_time":17.82904188,"remaining_time":49.07167811,"test":[0.6706474616]},
|
||||||
|
{"learn":[0.6604273738],"iteration":533,"passed_time":17.86246645,"remaining_time":49.03815696,"test":[0.6706424204]},
|
||||||
|
{"learn":[0.6604048016],"iteration":534,"passed_time":17.90552779,"remaining_time":49.03102469,"test":[0.6706520008]},
|
||||||
|
{"learn":[0.6603845173],"iteration":535,"passed_time":18.02843143,"remaining_time":49.24183511,"test":[0.6706448306]},
|
||||||
|
{"learn":[0.6603669212],"iteration":536,"passed_time":18.07245966,"remaining_time":49.23651485,"test":[0.6706415789]},
|
||||||
|
{"learn":[0.6603488983],"iteration":537,"passed_time":18.10631942,"remaining_time":49.20341819,"test":[0.6706305359]},
|
||||||
|
{"learn":[0.6603176881],"iteration":538,"passed_time":18.13531438,"remaining_time":49.1571323,"test":[0.6706152774]},
|
||||||
|
{"learn":[0.6602953862],"iteration":539,"passed_time":18.16575265,"remaining_time":49.11481272,"test":[0.670616585]},
|
||||||
|
{"learn":[0.6602672025],"iteration":540,"passed_time":18.20025584,"remaining_time":49.08349958,"test":[0.6705963243]},
|
||||||
|
{"learn":[0.6602568636],"iteration":541,"passed_time":18.22381751,"remaining_time":49.02274158,"test":[0.6706027368]},
|
||||||
|
{"learn":[0.660235705],"iteration":542,"passed_time":18.25438575,"remaining_time":48.98092088,"test":[0.6706003522]},
|
||||||
|
{"learn":[0.6602152295],"iteration":543,"passed_time":18.28070524,"remaining_time":48.9277699,"test":[0.6706044301]},
|
||||||
|
{"learn":[0.6601897709],"iteration":544,"passed_time":18.30768805,"remaining_time":48.87648827,"test":[0.6706047241]},
|
||||||
|
{"learn":[0.6601683731],"iteration":545,"passed_time":18.33807201,"remaining_time":48.83435294,"test":[0.6706038235]},
|
||||||
|
{"learn":[0.6601472267],"iteration":546,"passed_time":18.36776304,"remaining_time":48.79041993,"test":[0.6706026913]},
|
||||||
|
{"learn":[0.6601262337],"iteration":547,"passed_time":18.41134623,"remaining_time":48.78334803,"test":[0.6705845786]},
|
||||||
|
{"learn":[0.6601119991],"iteration":548,"passed_time":18.44405381,"remaining_time":48.74739905,"test":[0.6705873967]},
|
||||||
|
{"learn":[0.6600869973],"iteration":549,"passed_time":18.47010718,"remaining_time":48.69391893,"test":[0.6705755426]},
|
||||||
|
{"learn":[0.6600667497],"iteration":550,"passed_time":18.5036553,"remaining_time":48.66024779,"test":[0.6705715731]},
|
||||||
|
{"learn":[0.6600397508],"iteration":551,"passed_time":18.53164471,"remaining_time":48.61199556,"test":[0.6705757153]},
|
||||||
|
{"learn":[0.660016863],"iteration":552,"passed_time":18.5577607,"remaining_time":48.55891452,"test":[0.6705516814]},
|
||||||
|
{"learn":[0.6599933158],"iteration":553,"passed_time":18.58492994,"remaining_time":48.50867995,"test":[0.6705530864]},
|
||||||
|
{"learn":[0.6599632649],"iteration":554,"passed_time":18.62562092,"remaining_time":48.49373376,"test":[0.6705552479]},
|
||||||
|
{"learn":[0.6599446007],"iteration":555,"passed_time":18.65010209,"remaining_time":48.43659608,"test":[0.6705563336]},
|
||||||
|
{"learn":[0.6599138126],"iteration":556,"passed_time":18.67796421,"remaining_time":48.38833458,"test":[0.6705718544]},
|
||||||
|
{"learn":[0.6598965504],"iteration":557,"passed_time":18.70319381,"remaining_time":48.33334314,"test":[0.6705688384]},
|
||||||
|
{"learn":[0.6598785723],"iteration":558,"passed_time":18.72995694,"remaining_time":48.28241136,"test":[0.6705641528]},
|
||||||
|
{"learn":[0.659860838],"iteration":559,"passed_time":18.75657945,"remaining_time":48.23120429,"test":[0.6705628467]},
|
||||||
|
{"learn":[0.6598408724],"iteration":560,"passed_time":18.78181322,"remaining_time":48.17652269,"test":[0.670558488]},
|
||||||
|
{"learn":[0.6598244857],"iteration":561,"passed_time":18.80867415,"remaining_time":48.12610931,"test":[0.6705544404]},
|
||||||
|
{"learn":[0.6598082469],"iteration":562,"passed_time":18.83488797,"remaining_time":48.0741279,"test":[0.6705617451]},
|
||||||
|
{"learn":[0.6597851673],"iteration":563,"passed_time":18.86939449,"remaining_time":48.04335193,"test":[0.6705631717]},
|
||||||
|
{"learn":[0.6597683521],"iteration":564,"passed_time":18.90235988,"remaining_time":48.00864854,"test":[0.6705636201]},
|
||||||
|
{"learn":[0.6597479006],"iteration":565,"passed_time":18.93001053,"remaining_time":47.96048604,"test":[0.6705537522]},
|
||||||
|
{"learn":[0.6597310938],"iteration":566,"passed_time":18.95858079,"remaining_time":47.91472006,"test":[0.670555083]},
|
||||||
|
{"learn":[0.6597096581],"iteration":567,"passed_time":18.9833487,"remaining_time":47.85942842,"test":[0.6705524541]},
|
||||||
|
{"learn":[0.6596862311],"iteration":568,"passed_time":19.0162481,"remaining_time":47.82469425,"test":[0.6705503132]},
|
||||||
|
{"learn":[0.6596574779],"iteration":569,"passed_time":19.03781666,"remaining_time":47.76154004,"test":[0.6705354602]},
|
||||||
|
{"learn":[0.6596385418],"iteration":570,"passed_time":19.0681355,"remaining_time":47.72043018,"test":[0.6705387012]},
|
||||||
|
{"learn":[0.6596189903],"iteration":571,"passed_time":19.09073714,"remaining_time":47.66009201,"test":[0.6705411923]},
|
||||||
|
{"learn":[0.65959275],"iteration":572,"passed_time":19.11146842,"remaining_time":47.59522765,"test":[0.6705390018]},
|
||||||
|
{"learn":[0.6595730662],"iteration":573,"passed_time":19.141368,"remaining_time":47.55329403,"test":[0.6705354939]},
|
||||||
|
{"learn":[0.6595566809],"iteration":574,"passed_time":19.16428373,"remaining_time":47.49409447,"test":[0.670531296]},
|
||||||
|
{"learn":[0.6595365076],"iteration":575,"passed_time":19.19652276,"remaining_time":47.45807015,"test":[0.6705377163]},
|
||||||
|
{"learn":[0.6595163446],"iteration":576,"passed_time":19.21727405,"remaining_time":47.39372785,"test":[0.6705248875]},
|
||||||
|
{"learn":[0.6594816637],"iteration":577,"passed_time":19.24969594,"remaining_time":47.35824848,"test":[0.6705252902]},
|
||||||
|
{"learn":[0.6594570142],"iteration":578,"passed_time":19.27445137,"remaining_time":47.30396442,"test":[0.6705181562]},
|
||||||
|
{"learn":[0.6594353055],"iteration":579,"passed_time":19.29822455,"remaining_time":47.24737734,"test":[0.6705123446]},
|
||||||
|
{"learn":[0.6594162362],"iteration":580,"passed_time":19.32403522,"remaining_time":47.19587948,"test":[0.6705128345]},
|
||||||
|
{"learn":[0.659395036],"iteration":581,"passed_time":19.35739555,"remaining_time":47.16286408,"test":[0.6705173712]},
|
||||||
|
{"learn":[0.6593798831],"iteration":582,"passed_time":19.39112791,"remaining_time":47.13075172,"test":[0.670541941]},
|
||||||
|
{"learn":[0.6593556719],"iteration":583,"passed_time":19.42704318,"remaining_time":47.1039266,"test":[0.6705463243]},
|
||||||
|
{"learn":[0.6593292627],"iteration":584,"passed_time":19.46022169,"remaining_time":47.07045077,"test":[0.6705513215]},
|
||||||
|
{"learn":[0.6592976737],"iteration":585,"passed_time":19.48332075,"remaining_time":47.01265452,"test":[0.6705455889]},
|
||||||
|
{"learn":[0.6592754841],"iteration":586,"passed_time":19.5115578,"remaining_time":46.9673444,"test":[0.6705408087]},
|
||||||
|
{"learn":[0.6592510441],"iteration":587,"passed_time":19.54275193,"remaining_time":46.92919341,"test":[0.6705510193]},
|
||||||
|
{"learn":[0.6592290326],"iteration":588,"passed_time":19.56411389,"remaining_time":46.86751222,"test":[0.6705456751]},
|
||||||
|
{"learn":[0.6592097404],"iteration":589,"passed_time":19.59700884,"remaining_time":46.8335296,"test":[0.6705402427]},
|
||||||
|
{"learn":[0.6591876204],"iteration":590,"passed_time":19.62169623,"remaining_time":46.77998306,"test":[0.6705443402]},
|
||||||
|
{"learn":[0.6591705995],"iteration":591,"passed_time":19.64747626,"remaining_time":46.72913272,"test":[0.67054441]},
|
||||||
|
{"learn":[0.6591456195],"iteration":592,"passed_time":19.67090184,"remaining_time":46.67278059,"test":[0.6705441955]},
|
||||||
|
{"learn":[0.6591107122],"iteration":593,"passed_time":19.69910949,"remaining_time":46.62785848,"test":[0.6705319356]},
|
||||||
|
{"learn":[0.6590819533],"iteration":594,"passed_time":19.72694709,"remaining_time":46.58211876,"test":[0.6705358843]},
|
||||||
|
{"learn":[0.6590551327],"iteration":595,"passed_time":19.7530808,"remaining_time":46.53242523,"test":[0.6705334396]},
|
||||||
|
{"learn":[0.6590373916],"iteration":596,"passed_time":19.77835609,"remaining_time":46.48079328,"test":[0.6705320462]},
|
||||||
|
{"learn":[0.6590177149],"iteration":597,"passed_time":19.80378809,"remaining_time":46.4296169,"test":[0.6705332043]},
|
||||||
|
{"learn":[0.6589946095],"iteration":598,"passed_time":19.83052585,"remaining_time":46.38158048,"test":[0.6705328363]},
|
||||||
|
{"learn":[0.6589697628],"iteration":599,"passed_time":19.8579153,"remaining_time":46.33513569,"test":[0.6705315638]},
|
||||||
|
{"learn":[0.6589442269],"iteration":600,"passed_time":19.89600309,"remaining_time":46.31365777,"test":[0.6705274435]},
|
||||||
|
{"learn":[0.6589182437],"iteration":601,"passed_time":19.92518872,"remaining_time":46.27145155,"test":[0.670509808]},
|
||||||
|
{"learn":[0.6588837179],"iteration":602,"passed_time":19.95754179,"remaining_time":46.23662666,"test":[0.6705077789]},
|
||||||
|
{"learn":[0.6588674101],"iteration":603,"passed_time":19.99116426,"remaining_time":46.20474388,"test":[0.6705212132]},
|
||||||
|
{"learn":[0.6588406916],"iteration":604,"passed_time":20.01900069,"remaining_time":46.15951398,"test":[0.6705098442]},
|
||||||
|
{"learn":[0.6588149945],"iteration":605,"passed_time":20.04735837,"remaining_time":46.11554053,"test":[0.6705061509]},
|
||||||
|
{"learn":[0.6587866031],"iteration":606,"passed_time":20.07232044,"remaining_time":46.06382599,"test":[0.6705003071]},
|
||||||
|
{"learn":[0.6587636648],"iteration":607,"passed_time":20.09871086,"remaining_time":46.01546959,"test":[0.6705045031]},
|
||||||
|
{"learn":[0.6587502469],"iteration":608,"passed_time":20.12348304,"remaining_time":45.96348917,"test":[0.6705083194]},
|
||||||
|
{"learn":[0.6587292784],"iteration":609,"passed_time":20.14920752,"remaining_time":45.91376797,"test":[0.6705329997]},
|
||||||
|
{"learn":[0.6587104112],"iteration":610,"passed_time":20.17662353,"remaining_time":45.86797068,"test":[0.6705269987]},
|
||||||
|
{"learn":[0.6586953782],"iteration":611,"passed_time":20.20202219,"remaining_time":45.81765818,"test":[0.6705315607]},
|
||||||
|
{"learn":[0.6586641191],"iteration":612,"passed_time":20.23050051,"remaining_time":45.77439512,"test":[0.6705142835]},
|
||||||
|
{"learn":[0.6586450136],"iteration":613,"passed_time":20.25381994,"remaining_time":45.71953492,"test":[0.6705165015]},
|
||||||
|
{"learn":[0.6586136263],"iteration":614,"passed_time":20.28518384,"remaining_time":45.68289369,"test":[0.6705001061]},
|
||||||
|
{"learn":[0.6585862768],"iteration":615,"passed_time":20.3078175,"remaining_time":45.62665489,"test":[0.6705013916]},
|
||||||
|
{"learn":[0.6585585235],"iteration":616,"passed_time":20.33878033,"remaining_time":45.5891948,"test":[0.6705037253]},
|
||||||
|
{"learn":[0.6585371631],"iteration":617,"passed_time":20.36122842,"remaining_time":45.53271469,"test":[0.67049647]},
|
||||||
|
{"learn":[0.6585092632],"iteration":618,"passed_time":20.3943397,"remaining_time":45.50013429,"test":[0.6705005632]},
|
||||||
|
{"learn":[0.6584914317],"iteration":619,"passed_time":20.42384285,"remaining_time":45.45952119,"test":[0.6704957943]},
|
||||||
|
{"learn":[0.6584662432],"iteration":620,"passed_time":20.45411533,"remaining_time":45.42065225,"test":[0.6704955333]},
|
||||||
|
{"learn":[0.6584454668],"iteration":621,"passed_time":20.488223,"remaining_time":45.39030754,"test":[0.6704961207]},
|
||||||
|
{"learn":[0.6584249408],"iteration":622,"passed_time":20.51043528,"remaining_time":45.33365872,"test":[0.6704921459]},
|
||||||
|
{"learn":[0.6583931228],"iteration":623,"passed_time":20.54384208,"remaining_time":45.30180561,"test":[0.6704751713]},
|
||||||
|
{"learn":[0.6583660767],"iteration":624,"passed_time":20.56912557,"remaining_time":45.25207624,"test":[0.6704753101]},
|
||||||
|
{"learn":[0.658354264],"iteration":625,"passed_time":20.59414123,"remaining_time":45.20183714,"test":[0.6704620888]},
|
||||||
|
{"learn":[0.6583253625],"iteration":626,"passed_time":20.61901142,"remaining_time":45.15135993,"test":[0.6704604282]},
|
||||||
|
{"learn":[0.6582968632],"iteration":627,"passed_time":20.6468542,"remaining_time":45.10745855,"test":[0.6704663192]},
|
||||||
|
{"learn":[0.6582687399],"iteration":628,"passed_time":20.67583093,"remaining_time":45.06607981,"test":[0.6704680085]},
|
||||||
|
{"learn":[0.658242535],"iteration":629,"passed_time":20.7010198,"remaining_time":45.01650336,"test":[0.670453228]},
|
||||||
|
{"learn":[0.6582199874],"iteration":630,"passed_time":20.72783977,"remaining_time":44.97054302,"test":[0.6704577785]},
|
||||||
|
{"learn":[0.6581918101],"iteration":631,"passed_time":20.75222724,"remaining_time":44.91937795,"test":[0.67046675]},
|
||||||
|
{"learn":[0.6581735218],"iteration":632,"passed_time":20.78264004,"remaining_time":44.88130954,"test":[0.6704731863]},
|
||||||
|
{"learn":[0.6581445869],"iteration":633,"passed_time":20.80459182,"remaining_time":44.82503538,"test":[0.6704811116]},
|
||||||
|
{"learn":[0.6581202427],"iteration":634,"passed_time":20.83717209,"remaining_time":44.79171637,"test":[0.6704839644]},
|
||||||
|
{"learn":[0.6580977862],"iteration":635,"passed_time":20.86231353,"remaining_time":44.74244599,"test":[0.6704854798]},
|
||||||
|
{"learn":[0.6580724179],"iteration":636,"passed_time":20.89269601,"remaining_time":44.70446572,"test":[0.6704835837]},
|
||||||
|
{"learn":[0.6580426322],"iteration":637,"passed_time":20.93117347,"remaining_time":44.68379039,"test":[0.6704736198]},
|
||||||
|
{"learn":[0.6580111256],"iteration":638,"passed_time":20.96066949,"remaining_time":44.64392985,"test":[0.6704640242]},
|
||||||
|
{"learn":[0.6579834747],"iteration":639,"passed_time":20.9941179,"remaining_time":44.61250055,"test":[0.670465663]},
|
||||||
|
{"learn":[0.6579541367],"iteration":640,"passed_time":21.0224519,"remaining_time":44.57022174,"test":[0.6704646829]},
|
||||||
|
{"learn":[0.6579254503],"iteration":641,"passed_time":21.0522529,"remaining_time":44.53108946,"test":[0.6704600961]},
|
||||||
|
{"learn":[0.657898555],"iteration":642,"passed_time":21.08260618,"remaining_time":44.49315178,"test":[0.6704643207]},
|
||||||
|
{"learn":[0.6578676875],"iteration":643,"passed_time":21.10716702,"remaining_time":44.44304112,"test":[0.6704600533]},
|
||||||
|
{"learn":[0.6578324163],"iteration":644,"passed_time":21.13594828,"remaining_time":44.40187584,"test":[0.6704614691]},
|
||||||
|
{"learn":[0.6578062223],"iteration":645,"passed_time":21.1601277,"remaining_time":44.35110357,"test":[0.6704728212]},
|
||||||
|
{"learn":[0.6577760631],"iteration":646,"passed_time":21.18552999,"remaining_time":44.30297075,"test":[0.6704758731]},
|
||||||
|
{"learn":[0.6577483474],"iteration":647,"passed_time":21.21048648,"remaining_time":44.25397797,"test":[0.6704833026]},
|
||||||
|
{"learn":[0.6577249642],"iteration":648,"passed_time":21.23686209,"remaining_time":44.20801337,"test":[0.6704767664]},
|
||||||
|
{"learn":[0.6576974966],"iteration":649,"passed_time":21.26287585,"remaining_time":44.16135753,"test":[0.6704702727]},
|
||||||
|
{"learn":[0.657675114],"iteration":650,"passed_time":21.28806218,"remaining_time":44.11305051,"test":[0.6704671372]},
|
||||||
|
{"learn":[0.6576447891],"iteration":651,"passed_time":21.31506267,"remaining_time":44.06856515,"test":[0.6704699936]},
|
||||||
|
{"learn":[0.6576102356],"iteration":652,"passed_time":21.3435081,"remaining_time":44.02711394,"test":[0.6704587989]},
|
||||||
|
{"learn":[0.6575793887],"iteration":653,"passed_time":21.37776713,"remaining_time":43.99766753,"test":[0.6704637668]},
|
||||||
|
{"learn":[0.6575543309],"iteration":654,"passed_time":21.40301154,"remaining_time":43.94969545,"test":[0.6704653717]},
|
||||||
|
{"learn":[0.6575340787],"iteration":655,"passed_time":21.44023109,"remaining_time":43.92632711,"test":[0.6704598273]},
|
||||||
|
{"learn":[0.6575061464],"iteration":656,"passed_time":21.4778965,"remaining_time":43.903828,"test":[0.6704522865]},
|
||||||
|
{"learn":[0.657476113],"iteration":657,"passed_time":21.50245582,"remaining_time":43.85455275,"test":[0.6704558586]},
|
||||||
|
{"learn":[0.6574447014],"iteration":658,"passed_time":21.53379663,"remaining_time":43.81915217,"test":[0.6704466331]},
|
||||||
|
{"learn":[0.6574247361],"iteration":659,"passed_time":21.55955041,"remaining_time":43.77242053,"test":[0.6704405886]},
|
||||||
|
{"learn":[0.6574034983],"iteration":660,"passed_time":21.58626671,"remaining_time":43.72770215,"test":[0.6704463767]},
|
||||||
|
{"learn":[0.6573783832],"iteration":661,"passed_time":21.61183918,"remaining_time":43.68072633,"test":[0.6704475216]},
|
||||||
|
{"learn":[0.657357694],"iteration":662,"passed_time":21.6373217,"remaining_time":43.63363366,"test":[0.6704572386]},
|
||||||
|
{"learn":[0.6573411592],"iteration":663,"passed_time":21.66283476,"remaining_time":43.58666753,"test":[0.6704658153]},
|
||||||
|
{"learn":[0.6573118559],"iteration":664,"passed_time":21.68841321,"remaining_time":43.5398972,"test":[0.6704600945]},
|
||||||
|
{"learn":[0.6572819076],"iteration":665,"passed_time":21.71420973,"remaining_time":43.4936273,"test":[0.6704561998]},
|
||||||
|
{"learn":[0.6572430097],"iteration":666,"passed_time":21.74213421,"remaining_time":43.45167151,"test":[0.6704535154]},
|
||||||
|
{"learn":[0.6572160391],"iteration":667,"passed_time":21.77174463,"remaining_time":43.41311953,"test":[0.6704413781]},
|
||||||
|
{"learn":[0.6571931413],"iteration":668,"passed_time":21.81895309,"remaining_time":43.40960622,"test":[0.6704450013]},
|
||||||
|
{"learn":[0.6571737099],"iteration":669,"passed_time":21.84627583,"remaining_time":43.36648784,"test":[0.6704422199]},
|
||||||
|
{"learn":[0.6571532872],"iteration":670,"passed_time":21.88834724,"remaining_time":43.35262814,"test":[0.67044342]},
|
||||||
|
{"learn":[0.6571208939],"iteration":671,"passed_time":21.93403139,"remaining_time":43.34582395,"test":[0.6704415341]},
|
||||||
|
{"learn":[0.6570887673],"iteration":672,"passed_time":21.9714274,"remaining_time":43.32256191,"test":[0.6704439539]},
|
||||||
|
{"learn":[0.6570633692],"iteration":673,"passed_time":22.01942449,"remaining_time":43.32011406,"test":[0.6704498197]},
|
||||||
|
{"learn":[0.6570454361],"iteration":674,"passed_time":22.05319867,"remaining_time":43.2896122,"test":[0.6704452194]},
|
||||||
|
{"learn":[0.6570231031],"iteration":675,"passed_time":22.09079747,"remaining_time":43.26659149,"test":[0.6704366524]},
|
||||||
|
{"learn":[0.6570052089],"iteration":676,"passed_time":22.14192346,"remaining_time":43.26996269,"test":[0.6704427124]},
|
||||||
|
{"learn":[0.6569855794],"iteration":677,"passed_time":22.17624471,"remaining_time":43.24040635,"test":[0.6704395579]},
|
||||||
|
{"learn":[0.6569579709],"iteration":678,"passed_time":22.213192,"remaining_time":43.21594497,"test":[0.6704401246]},
|
||||||
|
{"learn":[0.6569333354],"iteration":679,"passed_time":22.23966403,"remaining_time":43.17111253,"test":[0.6704415621]},
|
||||||
|
{"learn":[0.6569069617],"iteration":680,"passed_time":22.27051241,"remaining_time":43.13481039,"test":[0.6704341343]},
|
||||||
|
{"learn":[0.6568931857],"iteration":681,"passed_time":22.29625075,"remaining_time":43.08864881,"test":[0.6704369615]},
|
||||||
|
{"learn":[0.6568734532],"iteration":682,"passed_time":22.32160622,"remaining_time":43.04180877,"test":[0.6704357425]},
|
||||||
|
{"learn":[0.6568435196],"iteration":683,"passed_time":22.35059872,"remaining_time":43.00202911,"test":[0.6704294622]},
|
||||||
|
{"learn":[0.6568108038],"iteration":684,"passed_time":22.37956576,"remaining_time":42.96223208,"test":[0.6704289794]},
|
||||||
|
{"learn":[0.6567811374],"iteration":685,"passed_time":22.41993338,"remaining_time":42.94430389,"test":[0.6704272409]},
|
||||||
|
{"learn":[0.6567467284],"iteration":686,"passed_time":22.45285267,"remaining_time":42.91207504,"test":[0.6704101162]},
|
||||||
|
{"learn":[0.6567172734],"iteration":687,"passed_time":22.4848431,"remaining_time":42.8780729,"test":[0.6704069439]},
|
||||||
|
{"learn":[0.6566967606],"iteration":688,"passed_time":22.51193834,"remaining_time":42.83476221,"test":[0.6704100747]},
|
||||||
|
{"learn":[0.6566720128],"iteration":689,"passed_time":22.53798671,"remaining_time":42.78951101,"test":[0.6704122261]},
|
||||||
|
{"learn":[0.6566441608],"iteration":690,"passed_time":22.57108439,"remaining_time":42.75766928,"test":[0.6704137826]},
|
||||||
|
{"learn":[0.6566172287],"iteration":691,"passed_time":22.59836588,"remaining_time":42.7148303,"test":[0.6704207952]},
|
||||||
|
{"learn":[0.6565952549],"iteration":692,"passed_time":22.62447507,"remaining_time":42.66982528,"test":[0.6704154834]},
|
||||||
|
{"learn":[0.6565702687],"iteration":693,"passed_time":22.65349415,"remaining_time":42.63035067,"test":[0.6704253514]},
|
||||||
|
{"learn":[0.6565392213],"iteration":694,"passed_time":22.68028991,"remaining_time":42.58673141,"test":[0.6704155636]},
|
||||||
|
{"learn":[0.6565157938],"iteration":695,"passed_time":22.70844406,"remaining_time":42.54570555,"test":[0.6704141298]},
|
||||||
|
{"learn":[0.6564902789],"iteration":696,"passed_time":22.73944116,"remaining_time":42.51003133,"test":[0.6704207635]},
|
||||||
|
{"learn":[0.6564644734],"iteration":697,"passed_time":22.7613976,"remaining_time":42.45750671,"test":[0.6704268341]},
|
||||||
|
{"learn":[0.6564349549],"iteration":698,"passed_time":22.79216825,"remaining_time":42.42147482,"test":[0.6704243126]},
|
||||||
|
{"learn":[0.6564046572],"iteration":699,"passed_time":22.8167121,"remaining_time":42.37389389,"test":[0.6704235165]},
|
||||||
|
{"learn":[0.6563744107],"iteration":700,"passed_time":22.84507296,"remaining_time":42.33345189,"test":[0.6704257736]},
|
||||||
|
{"learn":[0.6563525063],"iteration":701,"passed_time":22.87088832,"remaining_time":42.28833766,"test":[0.6704247758]},
|
||||||
|
{"learn":[0.6563189867],"iteration":702,"passed_time":22.90238907,"remaining_time":42.25376759,"test":[0.6704331799]},
|
||||||
|
{"learn":[0.6562939062],"iteration":703,"passed_time":22.94246813,"remaining_time":42.23499815,"test":[0.6704252722]},
|
||||||
|
{"learn":[0.6562739297],"iteration":704,"passed_time":22.97441688,"remaining_time":42.20123385,"test":[0.6704146644]},
|
||||||
|
{"learn":[0.656256438],"iteration":705,"passed_time":23.00262167,"remaining_time":42.16061253,"test":[0.6704164122]},
|
||||||
|
{"learn":[0.6562366475],"iteration":706,"passed_time":23.033437,"remaining_time":42.12480062,"test":[0.6704118954]},
|
||||||
|
{"learn":[0.6562073096],"iteration":707,"passed_time":23.0545813,"remaining_time":42.07135458,"test":[0.6704043129]},
|
||||||
|
{"learn":[0.6561864222],"iteration":708,"passed_time":23.08699831,"remaining_time":42.03852584,"test":[0.6703978198]},
|
||||||
|
{"learn":[0.6561578826],"iteration":709,"passed_time":23.11590694,"remaining_time":41.99932387,"test":[0.6703935976]},
|
||||||
|
{"learn":[0.6561208567],"iteration":710,"passed_time":23.14362702,"remaining_time":41.9579961,"test":[0.6703839683]},
|
||||||
|
{"learn":[0.6560924703],"iteration":711,"passed_time":23.16985155,"remaining_time":41.91400112,"test":[0.6703843723]},
|
||||||
|
{"learn":[0.6560656907],"iteration":712,"passed_time":23.19510285,"remaining_time":41.86829925,"test":[0.6703879502]},
|
||||||
|
{"learn":[0.6560362588],"iteration":713,"passed_time":23.23034771,"remaining_time":41.84065429,"test":[0.6703895978]},
|
||||||
|
{"learn":[0.6560124527],"iteration":714,"passed_time":23.25923754,"remaining_time":41.80156678,"test":[0.6703894359]},
|
||||||
|
{"learn":[0.6559875055],"iteration":715,"passed_time":23.28703452,"remaining_time":41.76054794,"test":[0.6703928777]},
|
||||||
|
{"learn":[0.6559547281],"iteration":716,"passed_time":23.31161175,"remaining_time":41.71380457,"test":[0.6703933128]},
|
||||||
|
{"learn":[0.6559230866],"iteration":717,"passed_time":23.34170355,"remaining_time":41.67696929,"test":[0.6703844355]},
|
||||||
|
{"learn":[0.6558924823],"iteration":718,"passed_time":23.37263658,"remaining_time":41.64165155,"test":[0.6703825151]},
|
||||||
|
{"learn":[0.6558676469],"iteration":719,"passed_time":23.40571088,"remaining_time":41.61015268,"test":[0.6703983542]},
|
||||||
|
{"learn":[0.6558459277],"iteration":720,"passed_time":23.4389719,"remaining_time":41.57898067,"test":[0.670399556]},
|
||||||
|
{"learn":[0.6558149638],"iteration":721,"passed_time":23.48304084,"remaining_time":41.56693379,"test":[0.6703931808]},
|
||||||
|
{"learn":[0.6557812248],"iteration":722,"passed_time":23.50734531,"remaining_time":41.5198893,"test":[0.6703886918]},
|
||||||
|
{"learn":[0.6557546502],"iteration":723,"passed_time":23.54055835,"remaining_time":41.48860836,"test":[0.6703847574]},
|
||||||
|
{"learn":[0.6557274948],"iteration":724,"passed_time":23.56652491,"remaining_time":41.44457829,"test":[0.6703885941]},
|
||||||
|
{"learn":[0.6557044723],"iteration":725,"passed_time":23.59580183,"remaining_time":41.40640708,"test":[0.6703788615]},
|
||||||
|
{"learn":[0.6556751811],"iteration":726,"passed_time":23.62334313,"remaining_time":41.36522119,"test":[0.6703799906]},
|
||||||
|
{"learn":[0.6556539158],"iteration":727,"passed_time":23.64879831,"remaining_time":41.32042782,"test":[0.6703774518]},
|
||||||
|
{"learn":[0.6556182915],"iteration":728,"passed_time":23.67755213,"remaining_time":41.28143862,"test":[0.6703783496]},
|
||||||
|
{"learn":[0.6555977079],"iteration":729,"passed_time":23.70012944,"remaining_time":41.23173204,"test":[0.6703648854]},
|
||||||
|
{"learn":[0.6555667903],"iteration":730,"passed_time":23.72866102,"remaining_time":41.19243615,"test":[0.6703716654]},
|
||||||
|
{"learn":[0.6555394075],"iteration":731,"passed_time":23.75226732,"remaining_time":41.14463793,"test":[0.6703550938]},
|
||||||
|
{"learn":[0.6555122742],"iteration":732,"passed_time":23.7844108,"remaining_time":41.11166233,"test":[0.6703467057]},
|
||||||
|
{"learn":[0.6554814941],"iteration":733,"passed_time":23.80747563,"remaining_time":41.06303017,"test":[0.6703484503]},
|
||||||
|
{"learn":[0.6554517373],"iteration":734,"passed_time":23.84023587,"remaining_time":41.03115425,"test":[0.6703549183]},
|
||||||
|
{"learn":[0.655429552],"iteration":735,"passed_time":23.87042124,"remaining_time":40.99485387,"test":[0.6703501504]},
|
||||||
|
{"learn":[0.655396579],"iteration":736,"passed_time":23.9087808,"remaining_time":40.97257823,"test":[0.6703672622]},
|
||||||
|
{"learn":[0.6553735864],"iteration":737,"passed_time":23.94161529,"remaining_time":40.94081097,"test":[0.6703560249]},
|
||||||
|
{"learn":[0.6553472597],"iteration":738,"passed_time":23.97478791,"remaining_time":40.90961779,"test":[0.6703547155]},
|
||||||
|
{"learn":[0.6553252832],"iteration":739,"passed_time":24.00628859,"remaining_time":40.87557247,"test":[0.6703593236]},
|
||||||
|
{"learn":[0.6552971659],"iteration":740,"passed_time":24.03623034,"remaining_time":40.83888528,"test":[0.6703606827]},
|
||||||
|
{"learn":[0.6552763852],"iteration":741,"passed_time":24.06404686,"remaining_time":40.79861313,"test":[0.6703511404]},
|
||||||
|
{"learn":[0.6552488203],"iteration":742,"passed_time":24.09270947,"remaining_time":40.75980593,"test":[0.6703431646]},
|
||||||
|
{"learn":[0.65521229],"iteration":743,"passed_time":24.12724624,"remaining_time":40.73094258,"test":[0.6703475116]},
|
||||||
|
{"learn":[0.6551949744],"iteration":744,"passed_time":24.15397955,"remaining_time":40.68891857,"test":[0.6703483634]},
|
||||||
|
{"learn":[0.6551673797],"iteration":745,"passed_time":24.17955779,"remaining_time":40.64499392,"test":[0.6703475713]},
|
||||||
|
{"learn":[0.6551421856],"iteration":746,"passed_time":24.20715317,"remaining_time":40.60450191,"test":[0.670360457]},
|
||||||
|
{"learn":[0.6551255516],"iteration":747,"passed_time":24.23336836,"remaining_time":40.5617342,"test":[0.6703664352]},
|
||||||
|
{"learn":[0.6551019608],"iteration":748,"passed_time":24.2614437,"remaining_time":40.52211759,"test":[0.6703617612]},
|
||||||
|
{"learn":[0.6550758728],"iteration":749,"passed_time":24.29512083,"remaining_time":40.49186805,"test":[0.6703669926]},
|
||||||
|
{"learn":[0.655051966],"iteration":750,"passed_time":24.31839238,"remaining_time":40.44430371,"test":[0.6703670837]},
|
||||||
|
{"learn":[0.6550351058],"iteration":751,"passed_time":24.34977118,"remaining_time":40.41025856,"test":[0.6703706628]},
|
||||||
|
{"learn":[0.6549998756],"iteration":752,"passed_time":24.3762114,"remaining_time":40.36804198,"test":[0.670369618]},
|
||||||
|
{"learn":[0.6549721212],"iteration":753,"passed_time":24.40831154,"remaining_time":40.3352204,"test":[0.6703692351]},
|
||||||
|
{"learn":[0.6549401744],"iteration":754,"passed_time":24.44267281,"remaining_time":40.30612934,"test":[0.6703624433]},
|
||||||
|
{"learn":[0.6549207325],"iteration":755,"passed_time":24.47460721,"remaining_time":40.27303091,"test":[0.6703686285]},
|
||||||
|
{"learn":[0.6548900891],"iteration":756,"passed_time":24.50826603,"remaining_time":40.24276708,"test":[0.6703598432]},
|
||||||
|
{"learn":[0.6548682731],"iteration":757,"passed_time":24.54826542,"remaining_time":40.22288345,"test":[0.6703618766]},
|
||||||
|
{"learn":[0.6548418938],"iteration":758,"passed_time":24.57546587,"remaining_time":40.18201996,"test":[0.6703694148]},
|
||||||
|
{"learn":[0.6548234717],"iteration":759,"passed_time":24.60502723,"remaining_time":40.14504442,"test":[0.6703683652]},
|
||||||
|
{"learn":[0.6547996833],"iteration":760,"passed_time":24.63261096,"remaining_time":40.10486856,"test":[0.6703604855]},
|
||||||
|
{"learn":[0.6547726174],"iteration":761,"passed_time":24.66001655,"remaining_time":40.06443634,"test":[0.6703758987]},
|
||||||
|
{"learn":[0.6547509314],"iteration":762,"passed_time":24.68929907,"remaining_time":40.02708119,"test":[0.6703773302]},
|
||||||
|
{"learn":[0.6547168175],"iteration":763,"passed_time":24.71425118,"remaining_time":39.98274144,"test":[0.6703641028]},
|
||||||
|
{"learn":[0.6546907846],"iteration":764,"passed_time":24.74589169,"remaining_time":39.94924999,"test":[0.6703649602]},
|
||||||
|
{"learn":[0.6546671611],"iteration":765,"passed_time":24.76625006,"remaining_time":39.89758822,"test":[0.6703567811]},
|
||||||
|
{"learn":[0.6546475893],"iteration":766,"passed_time":24.79734832,"remaining_time":39.86327312,"test":[0.6703544688]},
|
||||||
|
{"learn":[0.6546206223],"iteration":767,"passed_time":24.82531049,"remaining_time":39.82393558,"test":[0.6703611821]},
|
||||||
|
{"learn":[0.6545874193],"iteration":768,"passed_time":24.85435247,"remaining_time":39.78635616,"test":[0.6703527821]},
|
||||||
|
{"learn":[0.6545620629],"iteration":769,"passed_time":24.88095966,"remaining_time":39.74490958,"test":[0.6703523616]},
|
||||||
|
{"learn":[0.6545346297],"iteration":770,"passed_time":24.90935211,"remaining_time":39.70634726,"test":[0.6703616298]},
|
||||||
|
{"learn":[0.6545172316],"iteration":771,"passed_time":24.94098876,"remaining_time":39.67297175,"test":[0.6703603551]},
|
||||||
|
{"learn":[0.6544943049],"iteration":772,"passed_time":24.97035098,"remaining_time":39.6359905,"test":[0.6703675655]},
|
||||||
|
{"learn":[0.6544632323],"iteration":773,"passed_time":25.00434422,"remaining_time":39.60636436,"test":[0.6703582411]},
|
||||||
|
{"learn":[0.6544384097],"iteration":774,"passed_time":25.03067441,"remaining_time":39.56461439,"test":[0.6703581437]},
|
||||||
|
{"learn":[0.6544084745],"iteration":775,"passed_time":25.05692652,"remaining_time":39.522781,"test":[0.6703551885]},
|
||||||
|
{"learn":[0.6543765257],"iteration":776,"passed_time":25.08660163,"remaining_time":39.48637554,"test":[0.6703608491]},
|
||||||
|
{"learn":[0.6543536123],"iteration":777,"passed_time":25.10764591,"remaining_time":39.43643098,"test":[0.6703674554]},
|
||||||
|
{"learn":[0.6543303593],"iteration":778,"passed_time":25.13940138,"remaining_time":39.40334928,"test":[0.6703679619]},
|
||||||
|
{"learn":[0.6543005831],"iteration":779,"passed_time":25.15916899,"remaining_time":39.35152074,"test":[0.6703701757]},
|
||||||
|
{"learn":[0.6542678123],"iteration":780,"passed_time":25.18841105,"remaining_time":39.31456219,"test":[0.6703603462]},
|
||||||
|
{"learn":[0.6542439303],"iteration":781,"passed_time":25.21444083,"remaining_time":39.27262012,"test":[0.670359801]},
|
||||||
|
{"learn":[0.6542100401],"iteration":782,"passed_time":25.24017824,"remaining_time":39.23026426,"test":[0.6703523669]},
|
||||||
|
{"learn":[0.6541836178],"iteration":783,"passed_time":25.2660091,"remaining_time":39.18809574,"test":[0.6703365674]},
|
||||||
|
{"learn":[0.654158129],"iteration":784,"passed_time":25.28891553,"remaining_time":39.1414425,"test":[0.6703486118]},
|
||||||
|
{"learn":[0.6541343464],"iteration":785,"passed_time":25.31589904,"remaining_time":39.10114686,"test":[0.6703450011]},
|
||||||
|
{"learn":[0.6541092921],"iteration":786,"passed_time":25.34123581,"remaining_time":39.05834694,"test":[0.6703473135]},
|
||||||
|
{"learn":[0.6540812254],"iteration":787,"passed_time":25.36728606,"remaining_time":39.01668871,"test":[0.670350998]},
|
||||||
|
{"learn":[0.654060259],"iteration":788,"passed_time":25.39177931,"remaining_time":38.97268028,"test":[0.6703417767]},
|
||||||
|
{"learn":[0.6540467253],"iteration":789,"passed_time":25.41712461,"remaining_time":38.9300263,"test":[0.6703349821]},
|
||||||
|
{"learn":[0.6540306837],"iteration":790,"passed_time":25.44804125,"remaining_time":38.89593157,"test":[0.6703457717]},
|
||||||
|
{"learn":[0.6540103667],"iteration":791,"passed_time":25.48249341,"remaining_time":38.86723743,"test":[0.6703506266]},
|
||||||
|
{"learn":[0.6539821302],"iteration":792,"passed_time":25.51450657,"remaining_time":38.83481643,"test":[0.6703596395]},
|
||||||
|
{"learn":[0.6539577914],"iteration":793,"passed_time":25.54216564,"remaining_time":38.79578307,"test":[0.6703799895]},
|
||||||
|
{"learn":[0.653923724],"iteration":794,"passed_time":25.56982738,"remaining_time":38.75678238,"test":[0.6703687687]},
|
||||||
|
{"learn":[0.6539086888],"iteration":795,"passed_time":25.59539769,"remaining_time":38.71464675,"test":[0.6703780675]},
|
||||||
|
{"learn":[0.6538798424],"iteration":796,"passed_time":25.61874122,"remaining_time":38.66919157,"test":[0.670374835]},
|
||||||
|
{"learn":[0.6538566996],"iteration":797,"passed_time":25.64394874,"remaining_time":38.62659947,"test":[0.6703831387]},
|
||||||
|
{"learn":[0.6538290752],"iteration":798,"passed_time":25.66776244,"remaining_time":38.58195581,"test":[0.670377656]},
|
||||||
|
{"learn":[0.6538051255],"iteration":799,"passed_time":25.69593415,"remaining_time":38.54390122,"test":[0.6703689741]},
|
||||||
|
{"learn":[0.6537917354],"iteration":800,"passed_time":25.71651353,"remaining_time":38.49450652,"test":[0.6703709756]},
|
||||||
|
{"learn":[0.6537684302],"iteration":801,"passed_time":25.74304126,"remaining_time":38.45406912,"test":[0.6703737517]},
|
||||||
|
{"learn":[0.6537402991],"iteration":802,"passed_time":25.77084871,"remaining_time":38.41557398,"test":[0.6703818964]},
|
||||||
|
{"learn":[0.6537165427],"iteration":803,"passed_time":25.79028824,"remaining_time":38.36465763,"test":[0.6703812173]},
|
||||||
|
{"learn":[0.6536853601],"iteration":804,"passed_time":25.82203653,"remaining_time":38.3320915,"test":[0.6703960068]},
|
||||||
|
{"learn":[0.6536681479],"iteration":805,"passed_time":25.84395064,"remaining_time":38.28495914,"test":[0.6703976729]},
|
||||||
|
{"learn":[0.6536409101],"iteration":806,"passed_time":25.87390688,"remaining_time":38.24977808,"test":[0.6704024604]},
|
||||||
|
{"learn":[0.6536120189],"iteration":807,"passed_time":25.89606204,"remaining_time":38.20310143,"test":[0.6704085008]},
|
||||||
|
{"learn":[0.6535912493],"iteration":808,"passed_time":25.92585483,"remaining_time":38.16772942,"test":[0.6704076633]},
|
||||||
|
{"learn":[0.6535617421],"iteration":809,"passed_time":25.95539059,"remaining_time":38.13199358,"test":[0.6704111719]},
|
||||||
|
{"learn":[0.6535315174],"iteration":810,"passed_time":25.98822968,"remaining_time":38.10111601,"test":[0.6704220803]},
|
||||||
|
{"learn":[0.6534972927],"iteration":811,"passed_time":26.02835773,"remaining_time":38.08089777,"test":[0.6704265011]},
|
||||||
|
{"learn":[0.6534818476],"iteration":812,"passed_time":26.0558565,"remaining_time":38.04219146,"test":[0.6704251162]},
|
||||||
|
{"learn":[0.6534498323],"iteration":813,"passed_time":26.08151817,"remaining_time":38.00083606,"test":[0.6704375472]},
|
||||||
|
{"learn":[0.6534305025],"iteration":814,"passed_time":26.10848988,"remaining_time":37.96142393,"test":[0.6704319336]},
|
||||||
|
{"learn":[0.6534081059],"iteration":815,"passed_time":26.13143346,"remaining_time":37.91619757,"test":[0.670437614]},
|
||||||
|
{"learn":[0.6533765804],"iteration":816,"passed_time":26.15923661,"remaining_time":37.87806231,"test":[0.6704554331]},
|
||||||
|
{"learn":[0.6533441549],"iteration":817,"passed_time":26.18805523,"remaining_time":37.84141966,"test":[0.6704603317]},
|
||||||
|
{"learn":[0.6533053405],"iteration":818,"passed_time":26.2140726,"remaining_time":37.8007567,"test":[0.6704548042]},
|
||||||
|
{"learn":[0.6532838469],"iteration":819,"passed_time":26.24289367,"remaining_time":37.76416405,"test":[0.6704502654]},
|
||||||
|
{"learn":[0.6532604302],"iteration":820,"passed_time":26.27260776,"remaining_time":37.72887277,"test":[0.6704512072]},
|
||||||
|
{"learn":[0.6532364412],"iteration":821,"passed_time":26.29880394,"remaining_time":37.68855358,"test":[0.6704433481]},
|
||||||
|
{"learn":[0.6532100089],"iteration":822,"passed_time":26.32785215,"remaining_time":37.65234749,"test":[0.6704095112]},
|
||||||
|
{"learn":[0.6531782515],"iteration":823,"passed_time":26.35925682,"remaining_time":37.61952188,"test":[0.6704086019]},
|
||||||
|
{"learn":[0.6531449701],"iteration":824,"passed_time":26.38596096,"remaining_time":37.580005,"test":[0.6703987131]},
|
||||||
|
{"learn":[0.653115452],"iteration":825,"passed_time":26.40854839,"remaining_time":37.53466805,"test":[0.6704019708]},
|
||||||
|
{"learn":[0.6530787602],"iteration":826,"passed_time":26.44419918,"remaining_time":37.50791492,"test":[0.6704046556]},
|
||||||
|
{"learn":[0.653052397],"iteration":827,"passed_time":26.47784276,"remaining_time":37.47829917,"test":[0.6704091961]},
|
||||||
|
{"learn":[0.6530313579],"iteration":828,"passed_time":26.51701028,"remaining_time":37.45647652,"test":[0.6704103204]},
|
||||||
|
{"learn":[0.6530010363],"iteration":829,"passed_time":26.53963123,"remaining_time":37.41128739,"test":[0.6704074257]},
|
||||||
|
{"learn":[0.6529752146],"iteration":830,"passed_time":26.57362226,"remaining_time":37.38214732,"test":[0.6704115335]},
|
||||||
|
{"learn":[0.652954801],"iteration":831,"passed_time":26.59767057,"remaining_time":37.33903754,"test":[0.6704041275]},
|
||||||
|
{"learn":[0.6529330351],"iteration":832,"passed_time":26.62378941,"remaining_time":37.29887425,"test":[0.6704004556]},
|
||||||
|
{"learn":[0.6528993709],"iteration":833,"passed_time":26.65024746,"remaining_time":37.25921887,"test":[0.6704037097]},
|
||||||
|
{"learn":[0.6528665883],"iteration":834,"passed_time":26.67774911,"remaining_time":37.22105115,"test":[0.6704035477]},
|
||||||
|
{"learn":[0.6528413041],"iteration":835,"passed_time":26.70473813,"remaining_time":37.1821952,"test":[0.6704025281]},
|
||||||
|
{"learn":[0.6528217161],"iteration":836,"passed_time":26.72833235,"remaining_time":37.13865056,"test":[0.6704024549]},
|
||||||
|
{"learn":[0.6527978782],"iteration":837,"passed_time":26.76384162,"remaining_time":37.11167537,"test":[0.670405721]},
|
||||||
|
{"learn":[0.6527789461],"iteration":838,"passed_time":26.79137369,"remaining_time":37.07364106,"test":[0.6703983189]},
|
||||||
|
{"learn":[0.6527432001],"iteration":839,"passed_time":26.82295602,"remaining_time":37.04122498,"test":[0.6704035256]},
|
||||||
|
{"learn":[0.6527139767],"iteration":840,"passed_time":26.87217031,"remaining_time":37.03310985,"test":[0.6704047613]},
|
||||||
|
{"learn":[0.6526857244],"iteration":841,"passed_time":26.92488006,"remaining_time":37.0297044,"test":[0.6704139617]},
|
||||||
|
{"learn":[0.652657086],"iteration":842,"passed_time":26.98258041,"remaining_time":37.03303147,"test":[0.6704066193]},
|
||||||
|
{"learn":[0.6526355016],"iteration":843,"passed_time":27.05424841,"remaining_time":37.05534497,"test":[0.670402892]},
|
||||||
|
{"learn":[0.6526054936],"iteration":844,"passed_time":27.09765154,"remaining_time":37.03880181,"test":[0.6704081961]},
|
||||||
|
{"learn":[0.6525793707],"iteration":845,"passed_time":27.12038959,"remaining_time":36.99400661,"test":[0.6704029862]},
|
||||||
|
{"learn":[0.6525584692],"iteration":846,"passed_time":27.14691224,"remaining_time":36.95441537,"test":[0.6704014281]},
|
||||||
|
{"learn":[0.6525279747],"iteration":847,"passed_time":27.18096334,"remaining_time":36.92508227,"test":[0.6704036115]},
|
||||||
|
{"learn":[0.6525038765],"iteration":848,"passed_time":27.20686017,"remaining_time":36.88468322,"test":[0.6704016777]},
|
||||||
|
{"learn":[0.6524849104],"iteration":849,"passed_time":27.23465701,"remaining_time":36.8468889,"test":[0.6704085392]},
|
||||||
|
{"learn":[0.6524610603],"iteration":850,"passed_time":27.26094834,"remaining_time":36.80708536,"test":[0.6704042952]},
|
||||||
|
{"learn":[0.6524357337],"iteration":851,"passed_time":27.28945577,"remaining_time":36.77029957,"test":[0.670394789]},
|
||||||
|
{"learn":[0.6524082286],"iteration":852,"passed_time":27.31865398,"remaining_time":36.73446203,"test":[0.6703885644]},
|
||||||
|
{"learn":[0.65238051],"iteration":853,"passed_time":27.34791322,"remaining_time":36.69872195,"test":[0.6703946813]},
|
||||||
|
{"learn":[0.6523557826],"iteration":854,"passed_time":27.3865535,"remaining_time":36.67555995,"test":[0.6704042137]},
|
||||||
|
{"learn":[0.6523391233],"iteration":855,"passed_time":27.41370907,"remaining_time":36.63701306,"test":[0.6704077517]},
|
||||||
|
{"learn":[0.652325347],"iteration":856,"passed_time":27.43905921,"remaining_time":36.5960848,"test":[0.6704118698]},
|
||||||
|
{"learn":[0.6522924958],"iteration":857,"passed_time":27.47159295,"remaining_time":36.56475425,"test":[0.6704114259]},
|
||||||
|
{"learn":[0.6522623584],"iteration":858,"passed_time":27.50124299,"remaining_time":36.52959052,"test":[0.6704157567]},
|
||||||
|
{"learn":[0.6522343891],"iteration":859,"passed_time":27.53509105,"remaining_time":36.50000442,"test":[0.6703837005]},
|
||||||
|
{"learn":[0.6522094424],"iteration":860,"passed_time":27.57211091,"remaining_time":36.47460432,"test":[0.6703829482]},
|
||||||
|
{"learn":[0.6521841478],"iteration":861,"passed_time":27.59555719,"remaining_time":36.43125764,"test":[0.6703818491]},
|
||||||
|
{"learn":[0.6521657946],"iteration":862,"passed_time":27.6272049,"remaining_time":36.39876242,"test":[0.6703826129]},
|
||||||
|
{"learn":[0.6521304278],"iteration":863,"passed_time":27.65462267,"remaining_time":36.36070759,"test":[0.6703834487]},
|
||||||
|
{"learn":[0.6521045712],"iteration":864,"passed_time":27.68321566,"remaining_time":36.3242194,"test":[0.6703868275]},
|
||||||
|
{"learn":[0.6520753696],"iteration":865,"passed_time":27.71151671,"remaining_time":36.28736714,"test":[0.6703853357]},
|
||||||
|
{"learn":[0.6520519528],"iteration":866,"passed_time":27.73884016,"remaining_time":36.2492571,"test":[0.670450644]},
|
||||||
|
{"learn":[0.6520216555],"iteration":867,"passed_time":27.76583897,"remaining_time":36.21074851,"test":[0.6704556991]},
|
||||||
|
{"learn":[0.6519926935],"iteration":868,"passed_time":27.79498714,"remaining_time":36.17506382,"test":[0.6704535742]},
|
||||||
|
{"learn":[0.6519734186],"iteration":869,"passed_time":27.82082723,"remaining_time":36.13509744,"test":[0.6704495915]}
|
||||||
|
]}
|
||||||
Binary file not shown.
@@ -0,0 +1,871 @@
|
|||||||
|
iter Logloss
|
||||||
|
0 0.692389481
|
||||||
|
1 0.6916338586
|
||||||
|
2 0.6910159214
|
||||||
|
3 0.6903417151
|
||||||
|
4 0.6896961461
|
||||||
|
5 0.6890979366
|
||||||
|
6 0.6884946167
|
||||||
|
7 0.6879503686
|
||||||
|
8 0.6874528094
|
||||||
|
9 0.6869036785
|
||||||
|
10 0.6863761921
|
||||||
|
11 0.6859038678
|
||||||
|
12 0.685410175
|
||||||
|
13 0.6849483392
|
||||||
|
14 0.6845417792
|
||||||
|
15 0.6841038875
|
||||||
|
16 0.6836957422
|
||||||
|
17 0.6832947461
|
||||||
|
18 0.6829014105
|
||||||
|
19 0.6825264546
|
||||||
|
20 0.6822106577
|
||||||
|
21 0.6818649349
|
||||||
|
22 0.6815467855
|
||||||
|
23 0.6812293319
|
||||||
|
24 0.6808837443
|
||||||
|
25 0.6805816494
|
||||||
|
26 0.6803209634
|
||||||
|
27 0.6800350862
|
||||||
|
28 0.6797703947
|
||||||
|
29 0.6794926675
|
||||||
|
30 0.6792251865
|
||||||
|
31 0.6789670166
|
||||||
|
32 0.678722402
|
||||||
|
33 0.678476935
|
||||||
|
34 0.6782297335
|
||||||
|
35 0.6780226701
|
||||||
|
36 0.6778291026
|
||||||
|
37 0.6776045324
|
||||||
|
38 0.6773969079
|
||||||
|
39 0.6771819602
|
||||||
|
40 0.6769816736
|
||||||
|
41 0.6767984027
|
||||||
|
42 0.6766201184
|
||||||
|
43 0.6764394377
|
||||||
|
44 0.6762698797
|
||||||
|
45 0.6760974263
|
||||||
|
46 0.6759245179
|
||||||
|
47 0.6757673909
|
||||||
|
48 0.6756172628
|
||||||
|
49 0.675474531
|
||||||
|
50 0.6753286933
|
||||||
|
51 0.6751900513
|
||||||
|
52 0.6750574835
|
||||||
|
53 0.6749329567
|
||||||
|
54 0.6748033265
|
||||||
|
55 0.6746797823
|
||||||
|
56 0.674535525
|
||||||
|
57 0.6744256514
|
||||||
|
58 0.674310819
|
||||||
|
59 0.6741967947
|
||||||
|
60 0.6740879654
|
||||||
|
61 0.6739772476
|
||||||
|
62 0.67388281
|
||||||
|
63 0.6737789726
|
||||||
|
64 0.6736812332
|
||||||
|
65 0.6735930009
|
||||||
|
66 0.6734947116
|
||||||
|
67 0.6733961481
|
||||||
|
68 0.6732990195
|
||||||
|
69 0.6732133575
|
||||||
|
70 0.673111539
|
||||||
|
71 0.6730080451
|
||||||
|
72 0.6729157861
|
||||||
|
73 0.6728347949
|
||||||
|
74 0.6727640693
|
||||||
|
75 0.6726808811
|
||||||
|
76 0.6726029645
|
||||||
|
77 0.6725356026
|
||||||
|
78 0.6724606887
|
||||||
|
79 0.6723849561
|
||||||
|
80 0.6723050519
|
||||||
|
81 0.6722508802
|
||||||
|
82 0.6721773904
|
||||||
|
83 0.6721007598
|
||||||
|
84 0.6720353564
|
||||||
|
85 0.6719790902
|
||||||
|
86 0.6719140024
|
||||||
|
87 0.6718573633
|
||||||
|
88 0.671795602
|
||||||
|
89 0.6717369134
|
||||||
|
90 0.6716711079
|
||||||
|
91 0.6716070843
|
||||||
|
92 0.6715517232
|
||||||
|
93 0.6714957378
|
||||||
|
94 0.6714364567
|
||||||
|
95 0.6713881758
|
||||||
|
96 0.6713336502
|
||||||
|
97 0.6712700267
|
||||||
|
98 0.6712154424
|
||||||
|
99 0.6711600413
|
||||||
|
100 0.6711060533
|
||||||
|
101 0.6710494943
|
||||||
|
102 0.6709936897
|
||||||
|
103 0.6709472183
|
||||||
|
104 0.6708914508
|
||||||
|
105 0.6708388195
|
||||||
|
106 0.6707885854
|
||||||
|
107 0.6707454167
|
||||||
|
108 0.6706973013
|
||||||
|
109 0.6706577031
|
||||||
|
110 0.67061108
|
||||||
|
111 0.6705625485
|
||||||
|
112 0.6705146484
|
||||||
|
113 0.6704704423
|
||||||
|
114 0.6704155922
|
||||||
|
115 0.6703687117
|
||||||
|
116 0.6703324232
|
||||||
|
117 0.6702884624
|
||||||
|
118 0.670253478
|
||||||
|
119 0.6702140804
|
||||||
|
120 0.6701682529
|
||||||
|
121 0.6701320588
|
||||||
|
122 0.6700939824
|
||||||
|
123 0.6700655902
|
||||||
|
124 0.6700190743
|
||||||
|
125 0.6699792296
|
||||||
|
126 0.6699379404
|
||||||
|
127 0.669895454
|
||||||
|
128 0.6698563938
|
||||||
|
129 0.6698215571
|
||||||
|
130 0.6697857067
|
||||||
|
131 0.6697449303
|
||||||
|
132 0.6697052425
|
||||||
|
133 0.6696695553
|
||||||
|
134 0.6696269265
|
||||||
|
135 0.6695969271
|
||||||
|
136 0.6695489786
|
||||||
|
137 0.6695173859
|
||||||
|
138 0.6694811164
|
||||||
|
139 0.6694477439
|
||||||
|
140 0.6694082161
|
||||||
|
141 0.6693679185
|
||||||
|
142 0.6693341916
|
||||||
|
143 0.6692933159
|
||||||
|
144 0.6692619696
|
||||||
|
145 0.6692229289
|
||||||
|
146 0.6691840164
|
||||||
|
147 0.6691581406
|
||||||
|
148 0.6691177196
|
||||||
|
149 0.6690851126
|
||||||
|
150 0.6690518144
|
||||||
|
151 0.6690149711
|
||||||
|
152 0.668993877
|
||||||
|
153 0.6689596579
|
||||||
|
154 0.6689372651
|
||||||
|
155 0.6689003045
|
||||||
|
156 0.6688680182
|
||||||
|
157 0.6688348164
|
||||||
|
158 0.6687947046
|
||||||
|
159 0.6687605251
|
||||||
|
160 0.668726253
|
||||||
|
161 0.6686862718
|
||||||
|
162 0.668663478
|
||||||
|
163 0.6686399521
|
||||||
|
164 0.6686058279
|
||||||
|
165 0.6685761282
|
||||||
|
166 0.6685469327
|
||||||
|
167 0.6685157003
|
||||||
|
168 0.6684805143
|
||||||
|
169 0.6684485765
|
||||||
|
170 0.6684144429
|
||||||
|
171 0.6683849752
|
||||||
|
172 0.6683568537
|
||||||
|
173 0.6683266628
|
||||||
|
174 0.6682937842
|
||||||
|
175 0.6682657097
|
||||||
|
176 0.6682301443
|
||||||
|
177 0.6681995916
|
||||||
|
178 0.6681658267
|
||||||
|
179 0.6681422687
|
||||||
|
180 0.6681216601
|
||||||
|
181 0.6680899019
|
||||||
|
182 0.6680676394
|
||||||
|
183 0.6680413672
|
||||||
|
184 0.6680088406
|
||||||
|
185 0.6679873982
|
||||||
|
186 0.6679663544
|
||||||
|
187 0.6679417375
|
||||||
|
188 0.6679100197
|
||||||
|
189 0.667881208
|
||||||
|
190 0.6678475427
|
||||||
|
191 0.6678310341
|
||||||
|
192 0.6678060257
|
||||||
|
193 0.6677789336
|
||||||
|
194 0.6677478773
|
||||||
|
195 0.6677212408
|
||||||
|
196 0.667704316
|
||||||
|
197 0.6676819639
|
||||||
|
198 0.6676554448
|
||||||
|
199 0.6676318346
|
||||||
|
200 0.6676074705
|
||||||
|
201 0.6675849784
|
||||||
|
202 0.6675631744
|
||||||
|
203 0.6675397619
|
||||||
|
204 0.6675169086
|
||||||
|
205 0.6674864762
|
||||||
|
206 0.6674670714
|
||||||
|
207 0.6674375599
|
||||||
|
208 0.6674148457
|
||||||
|
209 0.6673974446
|
||||||
|
210 0.6673812139
|
||||||
|
211 0.6673515687
|
||||||
|
212 0.6673197956
|
||||||
|
213 0.6672900754
|
||||||
|
214 0.6672550009
|
||||||
|
215 0.6672271563
|
||||||
|
216 0.667204521
|
||||||
|
217 0.667181968
|
||||||
|
218 0.6671640023
|
||||||
|
219 0.66714351
|
||||||
|
220 0.6671167156
|
||||||
|
221 0.6670915937
|
||||||
|
222 0.6670595279
|
||||||
|
223 0.667033994
|
||||||
|
224 0.6670008246
|
||||||
|
225 0.6669858319
|
||||||
|
226 0.6669553964
|
||||||
|
227 0.6669274683
|
||||||
|
228 0.666896348
|
||||||
|
229 0.6668698686
|
||||||
|
230 0.6668513411
|
||||||
|
231 0.6668309985
|
||||||
|
232 0.6668058585
|
||||||
|
233 0.6667845908
|
||||||
|
234 0.6667582863
|
||||||
|
235 0.6667332943
|
||||||
|
236 0.6667070085
|
||||||
|
237 0.6666907315
|
||||||
|
238 0.6666633028
|
||||||
|
239 0.6666406707
|
||||||
|
240 0.6666134624
|
||||||
|
241 0.6665850522
|
||||||
|
242 0.6665631193
|
||||||
|
243 0.6665412643
|
||||||
|
244 0.6665168385
|
||||||
|
245 0.6664904845
|
||||||
|
246 0.6664678274
|
||||||
|
247 0.6664539777
|
||||||
|
248 0.6664334121
|
||||||
|
249 0.6664121724
|
||||||
|
250 0.666392034
|
||||||
|
251 0.666366899
|
||||||
|
252 0.6663414098
|
||||||
|
253 0.6663157816
|
||||||
|
254 0.6662989799
|
||||||
|
255 0.6662696102
|
||||||
|
256 0.6662479711
|
||||||
|
257 0.6662231874
|
||||||
|
258 0.6661947927
|
||||||
|
259 0.6661669951
|
||||||
|
260 0.6661426137
|
||||||
|
261 0.6661216749
|
||||||
|
262 0.6660983123
|
||||||
|
263 0.6660803402
|
||||||
|
264 0.6660617842
|
||||||
|
265 0.6660443878
|
||||||
|
266 0.6660176079
|
||||||
|
267 0.6659967546
|
||||||
|
268 0.6659751467
|
||||||
|
269 0.6659539329
|
||||||
|
270 0.6659263951
|
||||||
|
271 0.6659038921
|
||||||
|
272 0.6658767418
|
||||||
|
273 0.6658510507
|
||||||
|
274 0.6658210119
|
||||||
|
275 0.6657963011
|
||||||
|
276 0.6657748552
|
||||||
|
277 0.6657490013
|
||||||
|
278 0.665732402
|
||||||
|
279 0.6657118786
|
||||||
|
280 0.665684467
|
||||||
|
281 0.6656584634
|
||||||
|
282 0.6656309991
|
||||||
|
283 0.6656073482
|
||||||
|
284 0.6655890957
|
||||||
|
285 0.6655665563
|
||||||
|
286 0.6655452454
|
||||||
|
287 0.6655255286
|
||||||
|
288 0.6655053548
|
||||||
|
289 0.6654893396
|
||||||
|
290 0.6654648912
|
||||||
|
291 0.6654442759
|
||||||
|
292 0.6654173127
|
||||||
|
293 0.6653914518
|
||||||
|
294 0.6653648946
|
||||||
|
295 0.665344141
|
||||||
|
296 0.6653140817
|
||||||
|
297 0.665295365
|
||||||
|
298 0.6652787488
|
||||||
|
299 0.6652502991
|
||||||
|
300 0.665231168
|
||||||
|
301 0.6652136682
|
||||||
|
302 0.6651903001
|
||||||
|
303 0.6651697153
|
||||||
|
304 0.6651525958
|
||||||
|
305 0.6651322685
|
||||||
|
306 0.6651113828
|
||||||
|
307 0.6650886807
|
||||||
|
308 0.6650622251
|
||||||
|
309 0.6650429987
|
||||||
|
310 0.665015513
|
||||||
|
311 0.6650019022
|
||||||
|
312 0.664979951
|
||||||
|
313 0.6649549638
|
||||||
|
314 0.6649340455
|
||||||
|
315 0.6649162445
|
||||||
|
316 0.6649048119
|
||||||
|
317 0.6648796463
|
||||||
|
318 0.6648605481
|
||||||
|
319 0.6648429084
|
||||||
|
320 0.6648238121
|
||||||
|
321 0.6647969527
|
||||||
|
322 0.6647854723
|
||||||
|
323 0.6647589304
|
||||||
|
324 0.6647429024
|
||||||
|
325 0.6647237508
|
||||||
|
326 0.6647059396
|
||||||
|
327 0.664686288
|
||||||
|
328 0.6646532527
|
||||||
|
329 0.6646306438
|
||||||
|
330 0.6646098516
|
||||||
|
331 0.6645858284
|
||||||
|
332 0.6645707188
|
||||||
|
333 0.6645485788
|
||||||
|
334 0.6645305696
|
||||||
|
335 0.6645108881
|
||||||
|
336 0.6644923286
|
||||||
|
337 0.6644805222
|
||||||
|
338 0.6644572776
|
||||||
|
339 0.6644320741
|
||||||
|
340 0.6644115048
|
||||||
|
341 0.6643949013
|
||||||
|
342 0.6643619789
|
||||||
|
343 0.6643389502
|
||||||
|
344 0.6643088915
|
||||||
|
345 0.664286972
|
||||||
|
346 0.664274149
|
||||||
|
347 0.6642536926
|
||||||
|
348 0.6642357634
|
||||||
|
349 0.664207914
|
||||||
|
350 0.6641853097
|
||||||
|
351 0.6641654917
|
||||||
|
352 0.664143804
|
||||||
|
353 0.6641290647
|
||||||
|
354 0.6641117244
|
||||||
|
355 0.6640880219
|
||||||
|
356 0.6640669415
|
||||||
|
357 0.6640462999
|
||||||
|
358 0.664030296
|
||||||
|
359 0.6640028542
|
||||||
|
360 0.6639813347
|
||||||
|
361 0.6639597941
|
||||||
|
362 0.6639429832
|
||||||
|
363 0.6639222708
|
||||||
|
364 0.6639065546
|
||||||
|
365 0.6638823236
|
||||||
|
366 0.6638648195
|
||||||
|
367 0.6638436235
|
||||||
|
368 0.6638208732
|
||||||
|
369 0.6637956357
|
||||||
|
370 0.6637718453
|
||||||
|
371 0.663756918
|
||||||
|
372 0.6637353525
|
||||||
|
373 0.6637143112
|
||||||
|
374 0.6636956547
|
||||||
|
375 0.663680995
|
||||||
|
376 0.66366728
|
||||||
|
377 0.6636487567
|
||||||
|
378 0.6636266904
|
||||||
|
379 0.6636116064
|
||||||
|
380 0.6635902746
|
||||||
|
381 0.6635654896
|
||||||
|
382 0.6635393029
|
||||||
|
383 0.6635171734
|
||||||
|
384 0.663500789
|
||||||
|
385 0.663477743
|
||||||
|
386 0.6634584806
|
||||||
|
387 0.6634337499
|
||||||
|
388 0.6634135584
|
||||||
|
389 0.6633868455
|
||||||
|
390 0.6633755323
|
||||||
|
391 0.663356103
|
||||||
|
392 0.6633337631
|
||||||
|
393 0.663319422
|
||||||
|
394 0.6632911566
|
||||||
|
395 0.6632687875
|
||||||
|
396 0.6632431997
|
||||||
|
397 0.6632189331
|
||||||
|
398 0.663201035
|
||||||
|
399 0.6631898553
|
||||||
|
400 0.6631712482
|
||||||
|
401 0.663143025
|
||||||
|
402 0.663121538
|
||||||
|
403 0.6631087792
|
||||||
|
404 0.6630859067
|
||||||
|
405 0.663066483
|
||||||
|
406 0.6630443652
|
||||||
|
407 0.6630250376
|
||||||
|
408 0.6630007822
|
||||||
|
409 0.6629768728
|
||||||
|
410 0.6629528093
|
||||||
|
411 0.6629260936
|
||||||
|
412 0.6629102182
|
||||||
|
413 0.6628863488
|
||||||
|
414 0.6628648972
|
||||||
|
415 0.6628454339
|
||||||
|
416 0.6628200274
|
||||||
|
417 0.6627942591
|
||||||
|
418 0.6627744647
|
||||||
|
419 0.662765485
|
||||||
|
420 0.6627503257
|
||||||
|
421 0.6627323029
|
||||||
|
422 0.6627111509
|
||||||
|
423 0.6626785863
|
||||||
|
424 0.6626576561
|
||||||
|
425 0.6626363113
|
||||||
|
426 0.6626181065
|
||||||
|
427 0.66259794
|
||||||
|
428 0.6625765658
|
||||||
|
429 0.6625526572
|
||||||
|
430 0.66253135
|
||||||
|
431 0.6625035695
|
||||||
|
432 0.662480212
|
||||||
|
433 0.6624611632
|
||||||
|
434 0.6624332625
|
||||||
|
435 0.6624120584
|
||||||
|
436 0.6623941719
|
||||||
|
437 0.6623766304
|
||||||
|
438 0.6623623329
|
||||||
|
439 0.6623442925
|
||||||
|
440 0.6623212715
|
||||||
|
441 0.6623025941
|
||||||
|
442 0.6622749791
|
||||||
|
443 0.6622534499
|
||||||
|
444 0.6622305473
|
||||||
|
445 0.6622059333
|
||||||
|
446 0.6621871707
|
||||||
|
447 0.6621638454
|
||||||
|
448 0.6621511296
|
||||||
|
449 0.6621349978
|
||||||
|
450 0.6621120424
|
||||||
|
451 0.6620958271
|
||||||
|
452 0.6620793528
|
||||||
|
453 0.6620572713
|
||||||
|
454 0.6620395025
|
||||||
|
455 0.6620188044
|
||||||
|
456 0.6620017347
|
||||||
|
457 0.6619811454
|
||||||
|
458 0.6619695569
|
||||||
|
459 0.661952377
|
||||||
|
460 0.6619237442
|
||||||
|
461 0.6619089407
|
||||||
|
462 0.6618886168
|
||||||
|
463 0.6618831383
|
||||||
|
464 0.6618690774
|
||||||
|
465 0.661845878
|
||||||
|
466 0.6618290213
|
||||||
|
467 0.6618050064
|
||||||
|
468 0.6617832833
|
||||||
|
469 0.6617652311
|
||||||
|
470 0.6617443144
|
||||||
|
471 0.6617202619
|
||||||
|
472 0.6617005831
|
||||||
|
473 0.6616824419
|
||||||
|
474 0.6616538226
|
||||||
|
475 0.6616314155
|
||||||
|
476 0.6616127861
|
||||||
|
477 0.6616029072
|
||||||
|
478 0.6615843751
|
||||||
|
479 0.661563216
|
||||||
|
480 0.6615432257
|
||||||
|
481 0.6615263324
|
||||||
|
482 0.6615033259
|
||||||
|
483 0.661484293
|
||||||
|
484 0.6614678231
|
||||||
|
485 0.6614463024
|
||||||
|
486 0.6614155436
|
||||||
|
487 0.6613958945
|
||||||
|
488 0.661380611
|
||||||
|
489 0.6613677802
|
||||||
|
490 0.6613530086
|
||||||
|
491 0.6613248211
|
||||||
|
492 0.6613059359
|
||||||
|
493 0.6612729965
|
||||||
|
494 0.6612624948
|
||||||
|
495 0.6612401679
|
||||||
|
496 0.6612191637
|
||||||
|
497 0.6611912219
|
||||||
|
498 0.6611773017
|
||||||
|
499 0.6611638216
|
||||||
|
500 0.6611450533
|
||||||
|
501 0.6611179111
|
||||||
|
502 0.6610959069
|
||||||
|
503 0.6610728788
|
||||||
|
504 0.6610436668
|
||||||
|
505 0.6610188976
|
||||||
|
506 0.6610030555
|
||||||
|
507 0.6609831174
|
||||||
|
508 0.6609586562
|
||||||
|
509 0.660935882
|
||||||
|
510 0.6609202024
|
||||||
|
511 0.6609011137
|
||||||
|
512 0.6608726737
|
||||||
|
513 0.6608608849
|
||||||
|
514 0.6608387256
|
||||||
|
515 0.6608136063
|
||||||
|
516 0.6607946343
|
||||||
|
517 0.6607703935
|
||||||
|
518 0.6607509625
|
||||||
|
519 0.6607238109
|
||||||
|
520 0.6606999858
|
||||||
|
521 0.6606813873
|
||||||
|
522 0.6606610372
|
||||||
|
523 0.660638456
|
||||||
|
524 0.6606156483
|
||||||
|
525 0.6605968623
|
||||||
|
526 0.6605735776
|
||||||
|
527 0.6605517294
|
||||||
|
528 0.6605309239
|
||||||
|
529 0.6605086434
|
||||||
|
530 0.6604803349
|
||||||
|
531 0.6604566326
|
||||||
|
532 0.6604430839
|
||||||
|
533 0.6604273738
|
||||||
|
534 0.6604048016
|
||||||
|
535 0.6603845173
|
||||||
|
536 0.6603669212
|
||||||
|
537 0.6603488983
|
||||||
|
538 0.6603176881
|
||||||
|
539 0.6602953862
|
||||||
|
540 0.6602672025
|
||||||
|
541 0.6602568636
|
||||||
|
542 0.660235705
|
||||||
|
543 0.6602152295
|
||||||
|
544 0.6601897709
|
||||||
|
545 0.6601683731
|
||||||
|
546 0.6601472267
|
||||||
|
547 0.6601262337
|
||||||
|
548 0.6601119991
|
||||||
|
549 0.6600869973
|
||||||
|
550 0.6600667497
|
||||||
|
551 0.6600397508
|
||||||
|
552 0.660016863
|
||||||
|
553 0.6599933158
|
||||||
|
554 0.6599632649
|
||||||
|
555 0.6599446007
|
||||||
|
556 0.6599138126
|
||||||
|
557 0.6598965504
|
||||||
|
558 0.6598785723
|
||||||
|
559 0.659860838
|
||||||
|
560 0.6598408724
|
||||||
|
561 0.6598244857
|
||||||
|
562 0.6598082469
|
||||||
|
563 0.6597851673
|
||||||
|
564 0.6597683521
|
||||||
|
565 0.6597479006
|
||||||
|
566 0.6597310938
|
||||||
|
567 0.6597096581
|
||||||
|
568 0.6596862311
|
||||||
|
569 0.6596574779
|
||||||
|
570 0.6596385418
|
||||||
|
571 0.6596189903
|
||||||
|
572 0.65959275
|
||||||
|
573 0.6595730662
|
||||||
|
574 0.6595566809
|
||||||
|
575 0.6595365076
|
||||||
|
576 0.6595163446
|
||||||
|
577 0.6594816637
|
||||||
|
578 0.6594570142
|
||||||
|
579 0.6594353055
|
||||||
|
580 0.6594162362
|
||||||
|
581 0.659395036
|
||||||
|
582 0.6593798831
|
||||||
|
583 0.6593556719
|
||||||
|
584 0.6593292627
|
||||||
|
585 0.6592976737
|
||||||
|
586 0.6592754841
|
||||||
|
587 0.6592510441
|
||||||
|
588 0.6592290326
|
||||||
|
589 0.6592097404
|
||||||
|
590 0.6591876204
|
||||||
|
591 0.6591705995
|
||||||
|
592 0.6591456195
|
||||||
|
593 0.6591107122
|
||||||
|
594 0.6590819533
|
||||||
|
595 0.6590551327
|
||||||
|
596 0.6590373916
|
||||||
|
597 0.6590177149
|
||||||
|
598 0.6589946095
|
||||||
|
599 0.6589697628
|
||||||
|
600 0.6589442269
|
||||||
|
601 0.6589182437
|
||||||
|
602 0.6588837179
|
||||||
|
603 0.6588674101
|
||||||
|
604 0.6588406916
|
||||||
|
605 0.6588149945
|
||||||
|
606 0.6587866031
|
||||||
|
607 0.6587636648
|
||||||
|
608 0.6587502469
|
||||||
|
609 0.6587292784
|
||||||
|
610 0.6587104112
|
||||||
|
611 0.6586953782
|
||||||
|
612 0.6586641191
|
||||||
|
613 0.6586450136
|
||||||
|
614 0.6586136263
|
||||||
|
615 0.6585862768
|
||||||
|
616 0.6585585235
|
||||||
|
617 0.6585371631
|
||||||
|
618 0.6585092632
|
||||||
|
619 0.6584914317
|
||||||
|
620 0.6584662432
|
||||||
|
621 0.6584454668
|
||||||
|
622 0.6584249408
|
||||||
|
623 0.6583931228
|
||||||
|
624 0.6583660767
|
||||||
|
625 0.658354264
|
||||||
|
626 0.6583253625
|
||||||
|
627 0.6582968632
|
||||||
|
628 0.6582687399
|
||||||
|
629 0.658242535
|
||||||
|
630 0.6582199874
|
||||||
|
631 0.6581918101
|
||||||
|
632 0.6581735218
|
||||||
|
633 0.6581445869
|
||||||
|
634 0.6581202427
|
||||||
|
635 0.6580977862
|
||||||
|
636 0.6580724179
|
||||||
|
637 0.6580426322
|
||||||
|
638 0.6580111256
|
||||||
|
639 0.6579834747
|
||||||
|
640 0.6579541367
|
||||||
|
641 0.6579254503
|
||||||
|
642 0.657898555
|
||||||
|
643 0.6578676875
|
||||||
|
644 0.6578324163
|
||||||
|
645 0.6578062223
|
||||||
|
646 0.6577760631
|
||||||
|
647 0.6577483474
|
||||||
|
648 0.6577249642
|
||||||
|
649 0.6576974966
|
||||||
|
650 0.657675114
|
||||||
|
651 0.6576447891
|
||||||
|
652 0.6576102356
|
||||||
|
653 0.6575793887
|
||||||
|
654 0.6575543309
|
||||||
|
655 0.6575340787
|
||||||
|
656 0.6575061464
|
||||||
|
657 0.657476113
|
||||||
|
658 0.6574447014
|
||||||
|
659 0.6574247361
|
||||||
|
660 0.6574034983
|
||||||
|
661 0.6573783832
|
||||||
|
662 0.657357694
|
||||||
|
663 0.6573411592
|
||||||
|
664 0.6573118559
|
||||||
|
665 0.6572819076
|
||||||
|
666 0.6572430097
|
||||||
|
667 0.6572160391
|
||||||
|
668 0.6571931413
|
||||||
|
669 0.6571737099
|
||||||
|
670 0.6571532872
|
||||||
|
671 0.6571208939
|
||||||
|
672 0.6570887673
|
||||||
|
673 0.6570633692
|
||||||
|
674 0.6570454361
|
||||||
|
675 0.6570231031
|
||||||
|
676 0.6570052089
|
||||||
|
677 0.6569855794
|
||||||
|
678 0.6569579709
|
||||||
|
679 0.6569333354
|
||||||
|
680 0.6569069617
|
||||||
|
681 0.6568931857
|
||||||
|
682 0.6568734532
|
||||||
|
683 0.6568435196
|
||||||
|
684 0.6568108038
|
||||||
|
685 0.6567811374
|
||||||
|
686 0.6567467284
|
||||||
|
687 0.6567172734
|
||||||
|
688 0.6566967606
|
||||||
|
689 0.6566720128
|
||||||
|
690 0.6566441608
|
||||||
|
691 0.6566172287
|
||||||
|
692 0.6565952549
|
||||||
|
693 0.6565702687
|
||||||
|
694 0.6565392213
|
||||||
|
695 0.6565157938
|
||||||
|
696 0.6564902789
|
||||||
|
697 0.6564644734
|
||||||
|
698 0.6564349549
|
||||||
|
699 0.6564046572
|
||||||
|
700 0.6563744107
|
||||||
|
701 0.6563525063
|
||||||
|
702 0.6563189867
|
||||||
|
703 0.6562939062
|
||||||
|
704 0.6562739297
|
||||||
|
705 0.656256438
|
||||||
|
706 0.6562366475
|
||||||
|
707 0.6562073096
|
||||||
|
708 0.6561864222
|
||||||
|
709 0.6561578826
|
||||||
|
710 0.6561208567
|
||||||
|
711 0.6560924703
|
||||||
|
712 0.6560656907
|
||||||
|
713 0.6560362588
|
||||||
|
714 0.6560124527
|
||||||
|
715 0.6559875055
|
||||||
|
716 0.6559547281
|
||||||
|
717 0.6559230866
|
||||||
|
718 0.6558924823
|
||||||
|
719 0.6558676469
|
||||||
|
720 0.6558459277
|
||||||
|
721 0.6558149638
|
||||||
|
722 0.6557812248
|
||||||
|
723 0.6557546502
|
||||||
|
724 0.6557274948
|
||||||
|
725 0.6557044723
|
||||||
|
726 0.6556751811
|
||||||
|
727 0.6556539158
|
||||||
|
728 0.6556182915
|
||||||
|
729 0.6555977079
|
||||||
|
730 0.6555667903
|
||||||
|
731 0.6555394075
|
||||||
|
732 0.6555122742
|
||||||
|
733 0.6554814941
|
||||||
|
734 0.6554517373
|
||||||
|
735 0.655429552
|
||||||
|
736 0.655396579
|
||||||
|
737 0.6553735864
|
||||||
|
738 0.6553472597
|
||||||
|
739 0.6553252832
|
||||||
|
740 0.6552971659
|
||||||
|
741 0.6552763852
|
||||||
|
742 0.6552488203
|
||||||
|
743 0.65521229
|
||||||
|
744 0.6551949744
|
||||||
|
745 0.6551673797
|
||||||
|
746 0.6551421856
|
||||||
|
747 0.6551255516
|
||||||
|
748 0.6551019608
|
||||||
|
749 0.6550758728
|
||||||
|
750 0.655051966
|
||||||
|
751 0.6550351058
|
||||||
|
752 0.6549998756
|
||||||
|
753 0.6549721212
|
||||||
|
754 0.6549401744
|
||||||
|
755 0.6549207325
|
||||||
|
756 0.6548900891
|
||||||
|
757 0.6548682731
|
||||||
|
758 0.6548418938
|
||||||
|
759 0.6548234717
|
||||||
|
760 0.6547996833
|
||||||
|
761 0.6547726174
|
||||||
|
762 0.6547509314
|
||||||
|
763 0.6547168175
|
||||||
|
764 0.6546907846
|
||||||
|
765 0.6546671611
|
||||||
|
766 0.6546475893
|
||||||
|
767 0.6546206223
|
||||||
|
768 0.6545874193
|
||||||
|
769 0.6545620629
|
||||||
|
770 0.6545346297
|
||||||
|
771 0.6545172316
|
||||||
|
772 0.6544943049
|
||||||
|
773 0.6544632323
|
||||||
|
774 0.6544384097
|
||||||
|
775 0.6544084745
|
||||||
|
776 0.6543765257
|
||||||
|
777 0.6543536123
|
||||||
|
778 0.6543303593
|
||||||
|
779 0.6543005831
|
||||||
|
780 0.6542678123
|
||||||
|
781 0.6542439303
|
||||||
|
782 0.6542100401
|
||||||
|
783 0.6541836178
|
||||||
|
784 0.654158129
|
||||||
|
785 0.6541343464
|
||||||
|
786 0.6541092921
|
||||||
|
787 0.6540812254
|
||||||
|
788 0.654060259
|
||||||
|
789 0.6540467253
|
||||||
|
790 0.6540306837
|
||||||
|
791 0.6540103667
|
||||||
|
792 0.6539821302
|
||||||
|
793 0.6539577914
|
||||||
|
794 0.653923724
|
||||||
|
795 0.6539086888
|
||||||
|
796 0.6538798424
|
||||||
|
797 0.6538566996
|
||||||
|
798 0.6538290752
|
||||||
|
799 0.6538051255
|
||||||
|
800 0.6537917354
|
||||||
|
801 0.6537684302
|
||||||
|
802 0.6537402991
|
||||||
|
803 0.6537165427
|
||||||
|
804 0.6536853601
|
||||||
|
805 0.6536681479
|
||||||
|
806 0.6536409101
|
||||||
|
807 0.6536120189
|
||||||
|
808 0.6535912493
|
||||||
|
809 0.6535617421
|
||||||
|
810 0.6535315174
|
||||||
|
811 0.6534972927
|
||||||
|
812 0.6534818476
|
||||||
|
813 0.6534498323
|
||||||
|
814 0.6534305025
|
||||||
|
815 0.6534081059
|
||||||
|
816 0.6533765804
|
||||||
|
817 0.6533441549
|
||||||
|
818 0.6533053405
|
||||||
|
819 0.6532838469
|
||||||
|
820 0.6532604302
|
||||||
|
821 0.6532364412
|
||||||
|
822 0.6532100089
|
||||||
|
823 0.6531782515
|
||||||
|
824 0.6531449701
|
||||||
|
825 0.653115452
|
||||||
|
826 0.6530787602
|
||||||
|
827 0.653052397
|
||||||
|
828 0.6530313579
|
||||||
|
829 0.6530010363
|
||||||
|
830 0.6529752146
|
||||||
|
831 0.652954801
|
||||||
|
832 0.6529330351
|
||||||
|
833 0.6528993709
|
||||||
|
834 0.6528665883
|
||||||
|
835 0.6528413041
|
||||||
|
836 0.6528217161
|
||||||
|
837 0.6527978782
|
||||||
|
838 0.6527789461
|
||||||
|
839 0.6527432001
|
||||||
|
840 0.6527139767
|
||||||
|
841 0.6526857244
|
||||||
|
842 0.652657086
|
||||||
|
843 0.6526355016
|
||||||
|
844 0.6526054936
|
||||||
|
845 0.6525793707
|
||||||
|
846 0.6525584692
|
||||||
|
847 0.6525279747
|
||||||
|
848 0.6525038765
|
||||||
|
849 0.6524849104
|
||||||
|
850 0.6524610603
|
||||||
|
851 0.6524357337
|
||||||
|
852 0.6524082286
|
||||||
|
853 0.65238051
|
||||||
|
854 0.6523557826
|
||||||
|
855 0.6523391233
|
||||||
|
856 0.652325347
|
||||||
|
857 0.6522924958
|
||||||
|
858 0.6522623584
|
||||||
|
859 0.6522343891
|
||||||
|
860 0.6522094424
|
||||||
|
861 0.6521841478
|
||||||
|
862 0.6521657946
|
||||||
|
863 0.6521304278
|
||||||
|
864 0.6521045712
|
||||||
|
865 0.6520753696
|
||||||
|
866 0.6520519528
|
||||||
|
867 0.6520216555
|
||||||
|
868 0.6519926935
|
||||||
|
869 0.6519734186
|
||||||
|
@@ -0,0 +1,871 @@
|
|||||||
|
iter Passed Remaining
|
||||||
|
0 46 93548
|
||||||
|
1 83 83419
|
||||||
|
2 132 88415
|
||||||
|
3 162 81250
|
||||||
|
4 196 78573
|
||||||
|
5 230 76747
|
||||||
|
6 269 76701
|
||||||
|
7 319 79674
|
||||||
|
8 364 80653
|
||||||
|
9 411 81918
|
||||||
|
10 456 82497
|
||||||
|
11 491 81432
|
||||||
|
12 522 79809
|
||||||
|
13 555 78774
|
||||||
|
14 595 78777
|
||||||
|
15 630 78123
|
||||||
|
16 662 77290
|
||||||
|
17 700 77124
|
||||||
|
18 730 76120
|
||||||
|
19 764 75651
|
||||||
|
20 804 75774
|
||||||
|
21 835 75128
|
||||||
|
22 886 76169
|
||||||
|
23 920 75764
|
||||||
|
24 960 75853
|
||||||
|
25 989 75130
|
||||||
|
26 1025 74941
|
||||||
|
27 1060 74714
|
||||||
|
28 1104 75079
|
||||||
|
29 1141 74976
|
||||||
|
30 1180 74975
|
||||||
|
31 1213 74640
|
||||||
|
32 1245 74260
|
||||||
|
33 1287 74434
|
||||||
|
34 1327 74528
|
||||||
|
35 1376 75071
|
||||||
|
36 1427 75741
|
||||||
|
37 1468 75804
|
||||||
|
38 1508 75857
|
||||||
|
39 1549 75922
|
||||||
|
40 1586 75781
|
||||||
|
41 1621 75590
|
||||||
|
42 1663 75705
|
||||||
|
43 1701 75621
|
||||||
|
44 1739 75591
|
||||||
|
45 1776 75460
|
||||||
|
46 1819 75616
|
||||||
|
47 1869 76025
|
||||||
|
48 1916 76288
|
||||||
|
49 1953 76191
|
||||||
|
50 1993 76197
|
||||||
|
51 2038 76381
|
||||||
|
52 2080 76420
|
||||||
|
53 2158 77788
|
||||||
|
54 2220 78529
|
||||||
|
55 2286 79390
|
||||||
|
56 2328 79372
|
||||||
|
57 2367 79254
|
||||||
|
58 2409 79257
|
||||||
|
59 2444 79049
|
||||||
|
60 2484 78985
|
||||||
|
61 2521 78820
|
||||||
|
62 2554 78528
|
||||||
|
63 2593 78466
|
||||||
|
64 2623 78111
|
||||||
|
65 2660 77969
|
||||||
|
66 2695 77776
|
||||||
|
67 2725 77446
|
||||||
|
68 2761 77291
|
||||||
|
69 2791 76975
|
||||||
|
70 2824 76739
|
||||||
|
71 2861 76611
|
||||||
|
72 2897 76476
|
||||||
|
73 2935 76408
|
||||||
|
74 3040 78027
|
||||||
|
75 3097 78411
|
||||||
|
76 3152 78741
|
||||||
|
77 3216 79248
|
||||||
|
78 3256 79195
|
||||||
|
79 3305 79336
|
||||||
|
80 3348 79320
|
||||||
|
81 3381 79089
|
||||||
|
82 3416 78911
|
||||||
|
83 3480 79399
|
||||||
|
84 3535 79649
|
||||||
|
85 3581 79716
|
||||||
|
86 3612 79428
|
||||||
|
87 3644 79185
|
||||||
|
88 3678 78975
|
||||||
|
89 3712 78785
|
||||||
|
90 3743 78531
|
||||||
|
91 3775 78297
|
||||||
|
92 3806 78047
|
||||||
|
93 3837 77821
|
||||||
|
94 3871 77629
|
||||||
|
95 3913 77618
|
||||||
|
96 3945 77403
|
||||||
|
97 3989 77433
|
||||||
|
98 4020 77204
|
||||||
|
99 4053 77020
|
||||||
|
100 4084 76789
|
||||||
|
101 4116 76597
|
||||||
|
102 4148 76401
|
||||||
|
103 4176 76141
|
||||||
|
104 4202 75845
|
||||||
|
105 4232 75634
|
||||||
|
106 4261 75390
|
||||||
|
107 4290 75168
|
||||||
|
108 4324 75018
|
||||||
|
109 4351 74766
|
||||||
|
110 4386 74648
|
||||||
|
111 4424 74577
|
||||||
|
112 4458 74455
|
||||||
|
113 4497 74400
|
||||||
|
114 4533 74307
|
||||||
|
115 4564 74136
|
||||||
|
116 4596 73981
|
||||||
|
117 4628 73818
|
||||||
|
118 4668 73786
|
||||||
|
119 4692 73509
|
||||||
|
120 4723 73354
|
||||||
|
121 4756 73220
|
||||||
|
122 4788 73065
|
||||||
|
123 4815 72854
|
||||||
|
124 4843 72647
|
||||||
|
125 4875 72514
|
||||||
|
126 4916 72515
|
||||||
|
127 4952 72436
|
||||||
|
128 4991 72397
|
||||||
|
129 5028 72327
|
||||||
|
130 5059 72180
|
||||||
|
131 5096 72116
|
||||||
|
132 5125 71946
|
||||||
|
133 5156 71804
|
||||||
|
134 5190 71704
|
||||||
|
135 5221 71564
|
||||||
|
136 5251 71407
|
||||||
|
137 5274 71165
|
||||||
|
138 5309 71084
|
||||||
|
139 5344 71008
|
||||||
|
140 5377 70902
|
||||||
|
141 5416 70866
|
||||||
|
142 5452 70803
|
||||||
|
143 5490 70760
|
||||||
|
144 5521 70641
|
||||||
|
145 5553 70522
|
||||||
|
146 5582 70365
|
||||||
|
147 5611 70217
|
||||||
|
148 5636 70026
|
||||||
|
149 5673 69975
|
||||||
|
150 5706 69874
|
||||||
|
151 5738 69764
|
||||||
|
152 5765 69605
|
||||||
|
153 5795 69471
|
||||||
|
154 5817 69246
|
||||||
|
155 5853 69191
|
||||||
|
156 5888 69122
|
||||||
|
157 5924 69070
|
||||||
|
158 5964 69061
|
||||||
|
159 5996 68963
|
||||||
|
160 6022 68789
|
||||||
|
161 6050 68650
|
||||||
|
162 6079 68510
|
||||||
|
163 6108 68385
|
||||||
|
164 6140 68292
|
||||||
|
165 6169 68162
|
||||||
|
166 6202 68074
|
||||||
|
167 6231 67953
|
||||||
|
168 6263 67858
|
||||||
|
169 6295 67764
|
||||||
|
170 6325 67656
|
||||||
|
171 6356 67561
|
||||||
|
172 6395 67545
|
||||||
|
173 6437 67554
|
||||||
|
174 6472 67495
|
||||||
|
175 6503 67395
|
||||||
|
176 6533 67291
|
||||||
|
177 6562 67174
|
||||||
|
178 6590 67049
|
||||||
|
179 6624 66982
|
||||||
|
180 6655 66882
|
||||||
|
181 6687 66804
|
||||||
|
182 6718 66703
|
||||||
|
183 6751 66632
|
||||||
|
184 6784 66559
|
||||||
|
185 6810 66424
|
||||||
|
186 6832 66246
|
||||||
|
187 6867 66187
|
||||||
|
188 6918 66294
|
||||||
|
189 6969 66393
|
||||||
|
190 7018 66470
|
||||||
|
191 7074 66614
|
||||||
|
192 7117 66635
|
||||||
|
193 7191 66943
|
||||||
|
194 7242 67036
|
||||||
|
195 7282 67027
|
||||||
|
196 7317 66967
|
||||||
|
197 7351 66903
|
||||||
|
198 7389 66879
|
||||||
|
199 7432 66896
|
||||||
|
200 7471 66869
|
||||||
|
201 7506 66814
|
||||||
|
202 7540 66752
|
||||||
|
203 7568 66628
|
||||||
|
204 7605 66596
|
||||||
|
205 7638 66519
|
||||||
|
206 7665 66397
|
||||||
|
207 7700 66340
|
||||||
|
208 7734 66276
|
||||||
|
209 7766 66197
|
||||||
|
210 7796 66106
|
||||||
|
211 7831 66053
|
||||||
|
212 7871 66037
|
||||||
|
213 7910 66016
|
||||||
|
214 7951 66014
|
||||||
|
215 7989 65983
|
||||||
|
216 8025 65946
|
||||||
|
217 8058 65872
|
||||||
|
218 8087 65768
|
||||||
|
219 8112 65638
|
||||||
|
220 8148 65594
|
||||||
|
221 8197 65655
|
||||||
|
222 8239 65655
|
||||||
|
223 8268 65556
|
||||||
|
224 8298 65466
|
||||||
|
225 8327 65366
|
||||||
|
226 8357 65278
|
||||||
|
227 8384 65167
|
||||||
|
228 8418 65103
|
||||||
|
229 8453 65058
|
||||||
|
230 8490 65020
|
||||||
|
231 8523 64958
|
||||||
|
232 8550 64848
|
||||||
|
233 8575 64718
|
||||||
|
234 8607 64648
|
||||||
|
235 8635 64545
|
||||||
|
236 8660 64426
|
||||||
|
237 8691 64345
|
||||||
|
238 8719 64250
|
||||||
|
239 8746 64137
|
||||||
|
240 8773 64038
|
||||||
|
241 8803 63951
|
||||||
|
242 8833 63873
|
||||||
|
243 8862 63779
|
||||||
|
244 8892 63698
|
||||||
|
245 8932 63688
|
||||||
|
246 8962 63611
|
||||||
|
247 8991 63521
|
||||||
|
248 9021 63442
|
||||||
|
249 9051 63358
|
||||||
|
250 9085 63306
|
||||||
|
251 9110 63193
|
||||||
|
252 9137 63093
|
||||||
|
253 9174 63066
|
||||||
|
254 9196 62935
|
||||||
|
255 9238 62934
|
||||||
|
256 9267 62855
|
||||||
|
257 9297 62776
|
||||||
|
258 9324 62681
|
||||||
|
259 9357 62625
|
||||||
|
260 9388 62552
|
||||||
|
261 9427 62536
|
||||||
|
262 9461 62491
|
||||||
|
263 9496 62443
|
||||||
|
264 9524 62356
|
||||||
|
265 9553 62278
|
||||||
|
266 9590 62247
|
||||||
|
267 9620 62172
|
||||||
|
268 9645 62071
|
||||||
|
269 9682 62040
|
||||||
|
270 9711 61962
|
||||||
|
271 9739 61872
|
||||||
|
272 9768 61797
|
||||||
|
273 9804 61761
|
||||||
|
274 9848 61777
|
||||||
|
275 9886 61755
|
||||||
|
276 9925 61740
|
||||||
|
277 9965 61728
|
||||||
|
278 9995 61656
|
||||||
|
279 10022 61564
|
||||||
|
280 10055 61516
|
||||||
|
281 10080 61410
|
||||||
|
282 10111 61344
|
||||||
|
283 10147 61311
|
||||||
|
284 10175 61230
|
||||||
|
285 10202 61141
|
||||||
|
286 10234 61084
|
||||||
|
287 10264 61018
|
||||||
|
288 10299 60977
|
||||||
|
289 10323 60874
|
||||||
|
290 10353 60804
|
||||||
|
291 10394 60803
|
||||||
|
292 10431 60773
|
||||||
|
293 10471 60763
|
||||||
|
294 10503 60707
|
||||||
|
295 10534 60645
|
||||||
|
296 10576 60646
|
||||||
|
297 10612 60612
|
||||||
|
298 10639 60525
|
||||||
|
299 10668 60453
|
||||||
|
300 10702 60411
|
||||||
|
301 10729 60326
|
||||||
|
302 10764 60290
|
||||||
|
303 10801 60263
|
||||||
|
304 10829 60182
|
||||||
|
305 10857 60108
|
||||||
|
306 10892 60067
|
||||||
|
307 10930 60047
|
||||||
|
308 10972 60045
|
||||||
|
309 11002 59983
|
||||||
|
310 11030 59902
|
||||||
|
311 11058 59828
|
||||||
|
312 11092 59788
|
||||||
|
313 11117 59696
|
||||||
|
314 11149 59641
|
||||||
|
315 11187 59617
|
||||||
|
316 11211 59525
|
||||||
|
317 11243 59468
|
||||||
|
318 11274 59413
|
||||||
|
319 11304 59346
|
||||||
|
320 11334 59287
|
||||||
|
321 11362 59209
|
||||||
|
322 11394 59158
|
||||||
|
323 11436 59158
|
||||||
|
324 11477 59153
|
||||||
|
325 11513 59122
|
||||||
|
326 11547 59081
|
||||||
|
327 11572 58991
|
||||||
|
328 11607 58956
|
||||||
|
329 11637 58894
|
||||||
|
330 11668 58833
|
||||||
|
331 11700 58785
|
||||||
|
332 11724 58694
|
||||||
|
333 11757 58648
|
||||||
|
334 11780 58550
|
||||||
|
335 11815 58515
|
||||||
|
336 11844 58451
|
||||||
|
337 11869 58364
|
||||||
|
338 11905 58335
|
||||||
|
339 11941 58302
|
||||||
|
340 11986 58315
|
||||||
|
341 12020 58274
|
||||||
|
342 12066 58292
|
||||||
|
343 12122 58358
|
||||||
|
344 12177 58415
|
||||||
|
345 12221 58422
|
||||||
|
346 12264 58423
|
||||||
|
347 12300 58394
|
||||||
|
348 12324 58304
|
||||||
|
349 12354 58243
|
||||||
|
350 12401 58262
|
||||||
|
351 12438 58232
|
||||||
|
352 12479 58228
|
||||||
|
353 12512 58179
|
||||||
|
354 12541 58116
|
||||||
|
355 12569 58044
|
||||||
|
356 12597 57977
|
||||||
|
357 12628 57920
|
||||||
|
358 12653 57839
|
||||||
|
359 12682 57775
|
||||||
|
360 12720 57752
|
||||||
|
361 12744 57666
|
||||||
|
362 12770 57592
|
||||||
|
363 12811 57583
|
||||||
|
364 12841 57522
|
||||||
|
365 12870 57460
|
||||||
|
366 12897 57386
|
||||||
|
367 12938 57378
|
||||||
|
368 12974 57347
|
||||||
|
369 13009 57313
|
||||||
|
370 13038 57249
|
||||||
|
371 13078 57235
|
||||||
|
372 13117 57216
|
||||||
|
373 13147 57159
|
||||||
|
374 13181 57118
|
||||||
|
375 13205 57036
|
||||||
|
376 13235 56979
|
||||||
|
377 13274 56960
|
||||||
|
378 13306 56911
|
||||||
|
379 13333 56841
|
||||||
|
380 13366 56798
|
||||||
|
381 13396 56741
|
||||||
|
382 13421 56666
|
||||||
|
383 13467 56674
|
||||||
|
384 13508 56664
|
||||||
|
385 13540 56616
|
||||||
|
386 13569 56559
|
||||||
|
387 13598 56496
|
||||||
|
388 13627 56438
|
||||||
|
389 13656 56376
|
||||||
|
390 13685 56317
|
||||||
|
391 13717 56271
|
||||||
|
392 13750 56227
|
||||||
|
393 13771 56135
|
||||||
|
394 13804 56090
|
||||||
|
395 13825 55999
|
||||||
|
396 13858 55957
|
||||||
|
397 13888 55904
|
||||||
|
398 13917 55843
|
||||||
|
399 13953 55812
|
||||||
|
400 13994 55802
|
||||||
|
401 14025 55752
|
||||||
|
402 14048 55670
|
||||||
|
403 14076 55607
|
||||||
|
404 14105 55551
|
||||||
|
405 14142 55526
|
||||||
|
406 14182 55511
|
||||||
|
407 14214 55464
|
||||||
|
408 14240 55394
|
||||||
|
409 14267 55328
|
||||||
|
410 14299 55284
|
||||||
|
411 14324 55213
|
||||||
|
412 14351 55146
|
||||||
|
413 14379 55086
|
||||||
|
414 14410 55036
|
||||||
|
415 14451 55025
|
||||||
|
416 14484 54984
|
||||||
|
417 14513 54929
|
||||||
|
418 14536 54851
|
||||||
|
419 14565 54793
|
||||||
|
420 14587 54710
|
||||||
|
421 14615 54650
|
||||||
|
422 14642 54588
|
||||||
|
423 14666 54515
|
||||||
|
424 14690 54441
|
||||||
|
425 14719 54384
|
||||||
|
426 14739 54297
|
||||||
|
427 14772 54257
|
||||||
|
428 14790 54164
|
||||||
|
429 14824 54125
|
||||||
|
430 14844 54039
|
||||||
|
431 14876 53995
|
||||||
|
432 14906 53946
|
||||||
|
433 14938 53902
|
||||||
|
434 14980 53894
|
||||||
|
435 15006 53829
|
||||||
|
436 15033 53770
|
||||||
|
437 15059 53706
|
||||||
|
438 15085 53639
|
||||||
|
439 15110 53574
|
||||||
|
440 15134 53503
|
||||||
|
441 15160 53438
|
||||||
|
442 15184 53369
|
||||||
|
443 15211 53308
|
||||||
|
444 15234 53236
|
||||||
|
445 15266 53193
|
||||||
|
446 15287 53114
|
||||||
|
447 15316 53059
|
||||||
|
448 15336 52978
|
||||||
|
449 15366 52929
|
||||||
|
450 15393 52870
|
||||||
|
451 15429 52843
|
||||||
|
452 15469 52828
|
||||||
|
453 15490 52748
|
||||||
|
454 15523 52712
|
||||||
|
455 15550 52653
|
||||||
|
456 15577 52594
|
||||||
|
457 15604 52536
|
||||||
|
458 15630 52476
|
||||||
|
459 15656 52414
|
||||||
|
460 15682 52353
|
||||||
|
461 15711 52304
|
||||||
|
462 15736 52238
|
||||||
|
463 15765 52188
|
||||||
|
464 15786 52112
|
||||||
|
465 15817 52068
|
||||||
|
466 15839 51996
|
||||||
|
467 15873 51961
|
||||||
|
468 15903 51916
|
||||||
|
469 15935 51873
|
||||||
|
470 15969 51840
|
||||||
|
471 15994 51779
|
||||||
|
472 16022 51726
|
||||||
|
473 16047 51663
|
||||||
|
474 16073 51605
|
||||||
|
475 16099 51546
|
||||||
|
476 16128 51495
|
||||||
|
477 16152 51431
|
||||||
|
478 16176 51367
|
||||||
|
479 16205 51317
|
||||||
|
480 16228 51250
|
||||||
|
481 16255 51194
|
||||||
|
482 16277 51123
|
||||||
|
483 16305 51071
|
||||||
|
484 16328 51005
|
||||||
|
485 16362 50973
|
||||||
|
486 16392 50928
|
||||||
|
487 16426 50894
|
||||||
|
488 16459 50860
|
||||||
|
489 16480 50787
|
||||||
|
490 16510 50743
|
||||||
|
491 16530 50668
|
||||||
|
492 16561 50625
|
||||||
|
493 16585 50562
|
||||||
|
494 16613 50510
|
||||||
|
495 16638 50453
|
||||||
|
496 16663 50393
|
||||||
|
497 16690 50339
|
||||||
|
498 16716 50282
|
||||||
|
499 16740 50222
|
||||||
|
500 16773 50186
|
||||||
|
501 16802 50139
|
||||||
|
502 16836 50107
|
||||||
|
503 16873 50085
|
||||||
|
504 16921 50094
|
||||||
|
505 16989 50163
|
||||||
|
506 17038 50173
|
||||||
|
507 17069 50132
|
||||||
|
508 17110 50121
|
||||||
|
509 17145 50091
|
||||||
|
510 17190 50091
|
||||||
|
511 17219 50044
|
||||||
|
512 17247 49994
|
||||||
|
513 17271 49932
|
||||||
|
514 17298 49878
|
||||||
|
515 17343 49878
|
||||||
|
516 17373 49836
|
||||||
|
517 17417 49831
|
||||||
|
518 17460 49823
|
||||||
|
519 17490 49781
|
||||||
|
520 17518 49731
|
||||||
|
521 17546 49680
|
||||||
|
522 17571 49622
|
||||||
|
523 17600 49577
|
||||||
|
524 17625 49520
|
||||||
|
525 17655 49474
|
||||||
|
526 17679 49414
|
||||||
|
527 17707 49366
|
||||||
|
528 17729 49300
|
||||||
|
529 17758 49254
|
||||||
|
530 17781 49191
|
||||||
|
531 17808 49141
|
||||||
|
532 17829 49071
|
||||||
|
533 17862 49038
|
||||||
|
534 17905 49031
|
||||||
|
535 18028 49241
|
||||||
|
536 18072 49236
|
||||||
|
537 18106 49203
|
||||||
|
538 18135 49157
|
||||||
|
539 18165 49114
|
||||||
|
540 18200 49083
|
||||||
|
541 18223 49022
|
||||||
|
542 18254 48980
|
||||||
|
543 18280 48927
|
||||||
|
544 18307 48876
|
||||||
|
545 18338 48834
|
||||||
|
546 18367 48790
|
||||||
|
547 18411 48783
|
||||||
|
548 18444 48747
|
||||||
|
549 18470 48693
|
||||||
|
550 18503 48660
|
||||||
|
551 18531 48611
|
||||||
|
552 18557 48558
|
||||||
|
553 18584 48508
|
||||||
|
554 18625 48493
|
||||||
|
555 18650 48436
|
||||||
|
556 18677 48388
|
||||||
|
557 18703 48333
|
||||||
|
558 18729 48282
|
||||||
|
559 18756 48231
|
||||||
|
560 18781 48176
|
||||||
|
561 18808 48126
|
||||||
|
562 18834 48074
|
||||||
|
563 18869 48043
|
||||||
|
564 18902 48008
|
||||||
|
565 18930 47960
|
||||||
|
566 18958 47914
|
||||||
|
567 18983 47859
|
||||||
|
568 19016 47824
|
||||||
|
569 19037 47761
|
||||||
|
570 19068 47720
|
||||||
|
571 19090 47660
|
||||||
|
572 19111 47595
|
||||||
|
573 19141 47553
|
||||||
|
574 19164 47494
|
||||||
|
575 19196 47458
|
||||||
|
576 19217 47393
|
||||||
|
577 19249 47358
|
||||||
|
578 19274 47303
|
||||||
|
579 19298 47247
|
||||||
|
580 19324 47195
|
||||||
|
581 19357 47162
|
||||||
|
582 19391 47130
|
||||||
|
583 19427 47103
|
||||||
|
584 19460 47070
|
||||||
|
585 19483 47012
|
||||||
|
586 19511 46967
|
||||||
|
587 19542 46929
|
||||||
|
588 19564 46867
|
||||||
|
589 19597 46833
|
||||||
|
590 19621 46779
|
||||||
|
591 19647 46729
|
||||||
|
592 19670 46672
|
||||||
|
593 19699 46627
|
||||||
|
594 19726 46582
|
||||||
|
595 19753 46532
|
||||||
|
596 19778 46480
|
||||||
|
597 19803 46429
|
||||||
|
598 19830 46381
|
||||||
|
599 19857 46335
|
||||||
|
600 19896 46313
|
||||||
|
601 19925 46271
|
||||||
|
602 19957 46236
|
||||||
|
603 19991 46204
|
||||||
|
604 20019 46159
|
||||||
|
605 20047 46115
|
||||||
|
606 20072 46063
|
||||||
|
607 20098 46015
|
||||||
|
608 20123 45963
|
||||||
|
609 20149 45913
|
||||||
|
610 20176 45867
|
||||||
|
611 20202 45817
|
||||||
|
612 20230 45774
|
||||||
|
613 20253 45719
|
||||||
|
614 20285 45682
|
||||||
|
615 20307 45626
|
||||||
|
616 20338 45589
|
||||||
|
617 20361 45532
|
||||||
|
618 20394 45500
|
||||||
|
619 20423 45459
|
||||||
|
620 20454 45420
|
||||||
|
621 20488 45390
|
||||||
|
622 20510 45333
|
||||||
|
623 20543 45301
|
||||||
|
624 20569 45252
|
||||||
|
625 20594 45201
|
||||||
|
626 20619 45151
|
||||||
|
627 20646 45107
|
||||||
|
628 20675 45066
|
||||||
|
629 20701 45016
|
||||||
|
630 20727 44970
|
||||||
|
631 20752 44919
|
||||||
|
632 20782 44881
|
||||||
|
633 20804 44825
|
||||||
|
634 20837 44791
|
||||||
|
635 20862 44742
|
||||||
|
636 20892 44704
|
||||||
|
637 20931 44683
|
||||||
|
638 20960 44643
|
||||||
|
639 20994 44612
|
||||||
|
640 21022 44570
|
||||||
|
641 21052 44531
|
||||||
|
642 21082 44493
|
||||||
|
643 21107 44443
|
||||||
|
644 21135 44401
|
||||||
|
645 21160 44351
|
||||||
|
646 21185 44302
|
||||||
|
647 21210 44253
|
||||||
|
648 21236 44208
|
||||||
|
649 21262 44161
|
||||||
|
650 21288 44113
|
||||||
|
651 21315 44068
|
||||||
|
652 21343 44027
|
||||||
|
653 21377 43997
|
||||||
|
654 21403 43949
|
||||||
|
655 21440 43926
|
||||||
|
656 21477 43903
|
||||||
|
657 21502 43854
|
||||||
|
658 21533 43819
|
||||||
|
659 21559 43772
|
||||||
|
660 21586 43727
|
||||||
|
661 21611 43680
|
||||||
|
662 21637 43633
|
||||||
|
663 21662 43586
|
||||||
|
664 21688 43539
|
||||||
|
665 21714 43493
|
||||||
|
666 21742 43451
|
||||||
|
667 21771 43413
|
||||||
|
668 21818 43409
|
||||||
|
669 21846 43366
|
||||||
|
670 21888 43352
|
||||||
|
671 21934 43345
|
||||||
|
672 21971 43322
|
||||||
|
673 22019 43320
|
||||||
|
674 22053 43289
|
||||||
|
675 22090 43266
|
||||||
|
676 22141 43269
|
||||||
|
677 22176 43240
|
||||||
|
678 22213 43215
|
||||||
|
679 22239 43171
|
||||||
|
680 22270 43134
|
||||||
|
681 22296 43088
|
||||||
|
682 22321 43041
|
||||||
|
683 22350 43002
|
||||||
|
684 22379 42962
|
||||||
|
685 22419 42944
|
||||||
|
686 22452 42912
|
||||||
|
687 22484 42878
|
||||||
|
688 22511 42834
|
||||||
|
689 22537 42789
|
||||||
|
690 22571 42757
|
||||||
|
691 22598 42714
|
||||||
|
692 22624 42669
|
||||||
|
693 22653 42630
|
||||||
|
694 22680 42586
|
||||||
|
695 22708 42545
|
||||||
|
696 22739 42510
|
||||||
|
697 22761 42457
|
||||||
|
698 22792 42421
|
||||||
|
699 22816 42373
|
||||||
|
700 22845 42333
|
||||||
|
701 22870 42288
|
||||||
|
702 22902 42253
|
||||||
|
703 22942 42234
|
||||||
|
704 22974 42201
|
||||||
|
705 23002 42160
|
||||||
|
706 23033 42124
|
||||||
|
707 23054 42071
|
||||||
|
708 23086 42038
|
||||||
|
709 23115 41999
|
||||||
|
710 23143 41957
|
||||||
|
711 23169 41914
|
||||||
|
712 23195 41868
|
||||||
|
713 23230 41840
|
||||||
|
714 23259 41801
|
||||||
|
715 23287 41760
|
||||||
|
716 23311 41713
|
||||||
|
717 23341 41676
|
||||||
|
718 23372 41641
|
||||||
|
719 23405 41610
|
||||||
|
720 23438 41578
|
||||||
|
721 23483 41566
|
||||||
|
722 23507 41519
|
||||||
|
723 23540 41488
|
||||||
|
724 23566 41444
|
||||||
|
725 23595 41406
|
||||||
|
726 23623 41365
|
||||||
|
727 23648 41320
|
||||||
|
728 23677 41281
|
||||||
|
729 23700 41231
|
||||||
|
730 23728 41192
|
||||||
|
731 23752 41144
|
||||||
|
732 23784 41111
|
||||||
|
733 23807 41063
|
||||||
|
734 23840 41031
|
||||||
|
735 23870 40994
|
||||||
|
736 23908 40972
|
||||||
|
737 23941 40940
|
||||||
|
738 23974 40909
|
||||||
|
739 24006 40875
|
||||||
|
740 24036 40838
|
||||||
|
741 24064 40798
|
||||||
|
742 24092 40759
|
||||||
|
743 24127 40730
|
||||||
|
744 24153 40688
|
||||||
|
745 24179 40644
|
||||||
|
746 24207 40604
|
||||||
|
747 24233 40561
|
||||||
|
748 24261 40522
|
||||||
|
749 24295 40491
|
||||||
|
750 24318 40444
|
||||||
|
751 24349 40410
|
||||||
|
752 24376 40368
|
||||||
|
753 24408 40335
|
||||||
|
754 24442 40306
|
||||||
|
755 24474 40273
|
||||||
|
756 24508 40242
|
||||||
|
757 24548 40222
|
||||||
|
758 24575 40182
|
||||||
|
759 24605 40145
|
||||||
|
760 24632 40104
|
||||||
|
761 24660 40064
|
||||||
|
762 24689 40027
|
||||||
|
763 24714 39982
|
||||||
|
764 24745 39949
|
||||||
|
765 24766 39897
|
||||||
|
766 24797 39863
|
||||||
|
767 24825 39823
|
||||||
|
768 24854 39786
|
||||||
|
769 24880 39744
|
||||||
|
770 24909 39706
|
||||||
|
771 24940 39672
|
||||||
|
772 24970 39635
|
||||||
|
773 25004 39606
|
||||||
|
774 25030 39564
|
||||||
|
775 25056 39522
|
||||||
|
776 25086 39486
|
||||||
|
777 25107 39436
|
||||||
|
778 25139 39403
|
||||||
|
779 25159 39351
|
||||||
|
780 25188 39314
|
||||||
|
781 25214 39272
|
||||||
|
782 25240 39230
|
||||||
|
783 25266 39188
|
||||||
|
784 25288 39141
|
||||||
|
785 25315 39101
|
||||||
|
786 25341 39058
|
||||||
|
787 25367 39016
|
||||||
|
788 25391 38972
|
||||||
|
789 25417 38930
|
||||||
|
790 25448 38895
|
||||||
|
791 25482 38867
|
||||||
|
792 25514 38834
|
||||||
|
793 25542 38795
|
||||||
|
794 25569 38756
|
||||||
|
795 25595 38714
|
||||||
|
796 25618 38669
|
||||||
|
797 25643 38626
|
||||||
|
798 25667 38581
|
||||||
|
799 25695 38543
|
||||||
|
800 25716 38494
|
||||||
|
801 25743 38454
|
||||||
|
802 25770 38415
|
||||||
|
803 25790 38364
|
||||||
|
804 25822 38332
|
||||||
|
805 25843 38284
|
||||||
|
806 25873 38249
|
||||||
|
807 25896 38203
|
||||||
|
808 25925 38167
|
||||||
|
809 25955 38131
|
||||||
|
810 25988 38101
|
||||||
|
811 26028 38080
|
||||||
|
812 26055 38042
|
||||||
|
813 26081 38000
|
||||||
|
814 26108 37961
|
||||||
|
815 26131 37916
|
||||||
|
816 26159 37878
|
||||||
|
817 26188 37841
|
||||||
|
818 26214 37800
|
||||||
|
819 26242 37764
|
||||||
|
820 26272 37728
|
||||||
|
821 26298 37688
|
||||||
|
822 26327 37652
|
||||||
|
823 26359 37619
|
||||||
|
824 26385 37580
|
||||||
|
825 26408 37534
|
||||||
|
826 26444 37507
|
||||||
|
827 26477 37478
|
||||||
|
828 26517 37456
|
||||||
|
829 26539 37411
|
||||||
|
830 26573 37382
|
||||||
|
831 26597 37339
|
||||||
|
832 26623 37298
|
||||||
|
833 26650 37259
|
||||||
|
834 26677 37221
|
||||||
|
835 26704 37182
|
||||||
|
836 26728 37138
|
||||||
|
837 26763 37111
|
||||||
|
838 26791 37073
|
||||||
|
839 26822 37041
|
||||||
|
840 26872 37033
|
||||||
|
841 26924 37029
|
||||||
|
842 26982 37033
|
||||||
|
843 27054 37055
|
||||||
|
844 27097 37038
|
||||||
|
845 27120 36994
|
||||||
|
846 27146 36954
|
||||||
|
847 27180 36925
|
||||||
|
848 27206 36884
|
||||||
|
849 27234 36846
|
||||||
|
850 27260 36807
|
||||||
|
851 27289 36770
|
||||||
|
852 27318 36734
|
||||||
|
853 27347 36698
|
||||||
|
854 27386 36675
|
||||||
|
855 27413 36637
|
||||||
|
856 27439 36596
|
||||||
|
857 27471 36564
|
||||||
|
858 27501 36529
|
||||||
|
859 27535 36500
|
||||||
|
860 27572 36474
|
||||||
|
861 27595 36431
|
||||||
|
862 27627 36398
|
||||||
|
863 27654 36360
|
||||||
|
864 27683 36324
|
||||||
|
865 27711 36287
|
||||||
|
866 27738 36249
|
||||||
|
867 27765 36210
|
||||||
|
868 27794 36175
|
||||||
|
869 27820 36135
|
||||||
|
Executable
+46
@@ -0,0 +1,46 @@
|
|||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
|
||||||
|
class EnsembleConfig:
|
||||||
|
_instance: Optional['EnsembleConfig'] = None
|
||||||
|
_config: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
def __new__(cls):
|
||||||
|
if cls._instance is None:
|
||||||
|
cls._instance = super(EnsembleConfig, cls).__new__(cls)
|
||||||
|
cls._instance._load_config()
|
||||||
|
return cls._instance
|
||||||
|
|
||||||
|
def _load_config(self):
|
||||||
|
"""Load configuration from YAML file."""
|
||||||
|
config_path = os.path.join(os.path.dirname(__file__), 'ensemble_config.yaml')
|
||||||
|
try:
|
||||||
|
with open(config_path, 'r', encoding='utf-8') as f:
|
||||||
|
self._config = yaml.safe_load(f)
|
||||||
|
# print(f"✅ Loaded ensemble config from {config_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed to load ensemble config: {e}")
|
||||||
|
self._config = {}
|
||||||
|
|
||||||
|
def get(self, key: str, default: Any = None) -> Any:
|
||||||
|
"""Get configuration value by key (supports dot notation for nested keys)."""
|
||||||
|
keys = key.split('.')
|
||||||
|
value = self._config
|
||||||
|
|
||||||
|
try:
|
||||||
|
for k in keys:
|
||||||
|
value = value[k]
|
||||||
|
return value
|
||||||
|
except (KeyError, TypeError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
# Singleton accessor
|
||||||
|
def get_config() -> EnsembleConfig:
|
||||||
|
return EnsembleConfig()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test
|
||||||
|
cfg = get_config()
|
||||||
|
print(f"Weights: {cfg.get('engine_weights')}")
|
||||||
|
print(f"Team Weight: {cfg.get('engine_weights.team')}")
|
||||||
Executable
+186
@@ -0,0 +1,186 @@
|
|||||||
|
engine_weights:
|
||||||
|
team: 0.30
|
||||||
|
player: 0.25
|
||||||
|
odds: 0.30
|
||||||
|
referee: 0.15
|
||||||
|
min_weight: 0.05
|
||||||
|
|
||||||
|
weight_redistribution:
|
||||||
|
player_missing_to_team: 0.5
|
||||||
|
player_missing_to_odds: 0.5
|
||||||
|
referee_missing_to_team: 0.4
|
||||||
|
referee_missing_to_odds: 0.6
|
||||||
|
referee_min_matches: 5
|
||||||
|
|
||||||
|
match_result:
|
||||||
|
min_draw_prob: 0.15
|
||||||
|
|
||||||
|
over_under:
|
||||||
|
prob_min: 0.02
|
||||||
|
prob_max: 0.98
|
||||||
|
ou15_threshold: 0.55
|
||||||
|
ou25_threshold: 0.52
|
||||||
|
ou35_threshold: 0.48
|
||||||
|
btts_threshold: 0.58
|
||||||
|
poisson_blend_weight: 0.25
|
||||||
|
poisson_grid_max: 6
|
||||||
|
|
||||||
|
half_time:
|
||||||
|
ft_to_ht_ratio: 0.42
|
||||||
|
poisson_grid_max: 5
|
||||||
|
ht_over_05_min: 0.20
|
||||||
|
ht_over_05_max: 0.95
|
||||||
|
ht_ou_threshold: 0.55
|
||||||
|
ht_draw_floor: 0.28
|
||||||
|
low_xg_threshold: 2.0
|
||||||
|
low_xg_ratio_adjust: 0.85
|
||||||
|
|
||||||
|
confidence:
|
||||||
|
agreement_boost: 1.3
|
||||||
|
disagreement_penalty: 0.7
|
||||||
|
|
||||||
|
handicap:
|
||||||
|
xg_diff_threshold: 1.2
|
||||||
|
|
||||||
|
corners:
|
||||||
|
xg_multiplier: 3.0
|
||||||
|
baseline: 3.0
|
||||||
|
home_dominant_bonus: 1.5
|
||||||
|
away_dominant_bonus: 1.0
|
||||||
|
dominance_threshold: 0.6
|
||||||
|
line: 9.5
|
||||||
|
|
||||||
|
cards:
|
||||||
|
derby_heat_factor: 1.3
|
||||||
|
line: 4.5
|
||||||
|
|
||||||
|
score:
|
||||||
|
poisson_grid_max: 7
|
||||||
|
ms_confidence_threshold: 15.0
|
||||||
|
|
||||||
|
risk:
|
||||||
|
# Lowered thresholds for better surprise detection (was 0.20+)
|
||||||
|
# Model typically outputs 4-8% for reversals, so we need lower thresholds
|
||||||
|
surprise_threshold: 0.05
|
||||||
|
surprise_threshold_top: 0.05
|
||||||
|
surprise_threshold_non_top: 0.06
|
||||||
|
surprise_threshold_favorite_reversal: 0.06
|
||||||
|
surprise_threshold_favorite_reversal_top: 0.06
|
||||||
|
surprise_threshold_favorite_reversal_non_top: 0.08
|
||||||
|
surprise_threshold_underdog_reversal: 0.05
|
||||||
|
surprise_threshold_underdog_reversal_top: 0.05
|
||||||
|
surprise_threshold_underdog_reversal_non_top: 0.06
|
||||||
|
surprise_threshold_basketball: 0.08
|
||||||
|
surprise_threshold_basketball_top: 0.08
|
||||||
|
surprise_threshold_basketball_non_top: 0.10
|
||||||
|
surprise_min_top_gap: 0.01
|
||||||
|
surprise_min_top_gap_top: 0.01
|
||||||
|
surprise_min_top_gap_non_top: 0.015
|
||||||
|
# New: Upset alert threshold for potential upsets (lower than main threshold)
|
||||||
|
upset_alert_threshold: 0.05 # 5% - alert when reversal prob > 5%
|
||||||
|
htft_temperature: 1.25
|
||||||
|
htft_temperature_top: 1.25
|
||||||
|
htft_temperature_non_top: 1.35
|
||||||
|
htft_temperature_basketball: 1.08
|
||||||
|
htft_temperature_basketball_top: 1.08
|
||||||
|
htft_temperature_basketball_non_top: 1.15
|
||||||
|
htft_reversal_multiplier: 0.60
|
||||||
|
htft_reversal_multiplier_top: 0.60
|
||||||
|
htft_reversal_multiplier_non_top: 0.45
|
||||||
|
htft_reversal_multiplier_favorite: 0.72
|
||||||
|
htft_reversal_multiplier_favorite_top: 0.72
|
||||||
|
htft_reversal_multiplier_favorite_non_top: 0.55
|
||||||
|
htft_reversal_multiplier_underdog: 0.45
|
||||||
|
htft_reversal_multiplier_underdog_top: 0.45
|
||||||
|
htft_reversal_multiplier_underdog_non_top: 0.30
|
||||||
|
htft_reversal_multiplier_basketball: 0.90
|
||||||
|
htft_reversal_multiplier_basketball_top: 0.90
|
||||||
|
htft_reversal_multiplier_basketball_non_top: 0.75
|
||||||
|
htft_reversal_gap_medium: 0.50
|
||||||
|
htft_reversal_gap_strong: 1.00
|
||||||
|
htft_prior_min_matches: 300
|
||||||
|
htft_prior_blend_league: 0.65
|
||||||
|
htft_prior_blend_top: 0.50
|
||||||
|
htft_prior_blend_non_top: 0.58
|
||||||
|
htft_prior_odds_blend_top: 0.35
|
||||||
|
htft_prior_odds_blend_top_with_league: 0.22
|
||||||
|
htft_favorite_balance_gap: 0.20
|
||||||
|
htft_reversal_cap_factor: 2.30
|
||||||
|
extreme_upset: 0.7
|
||||||
|
high_upset: 0.5
|
||||||
|
medium_upset: 0.3
|
||||||
|
extreme_warnings: 3
|
||||||
|
high_warnings: 2
|
||||||
|
balanced_match_gap: 0.1
|
||||||
|
referee_min_data: 10
|
||||||
|
|
||||||
|
recommendations:
|
||||||
|
confidence_threshold: 45
|
||||||
|
value_confidence_min: 10
|
||||||
|
value_confidence_max: 30
|
||||||
|
value_edge_margin: 0.02
|
||||||
|
value_upgrade_edge: 5.0
|
||||||
|
|
||||||
|
# ACİL DÜZELTİLDİ: Güvenilir marketler genişletildi
|
||||||
|
safe_markets: ['ÇŞ', '1.5 Üst/Alt', '2.5 Üst/Alt']
|
||||||
|
|
||||||
|
# ACİL DÜZELTİLDİ: Market bazlı minimum confidence threshold'lar (Artık Olasılık Yüzdesi!)
|
||||||
|
market_min_confidence:
|
||||||
|
MS: 50.0 # Match result is hardest; 50%+ true probability is actually strong
|
||||||
|
ÇŞ: 65.0 # Double chance naturally has high probability (2 sides of 3)
|
||||||
|
1.5 Üst/Alt: 70.0 # 1.5 Goals needs to be highly probable to be worth playing
|
||||||
|
2.5 Üst/Alt: 55.0 # Standard threshold for 50/50 lines
|
||||||
|
3.5 Üst/Alt: 60.0 # Needs higher certianty than 2.5
|
||||||
|
BTTS: 60.0 # Both Teams To Score - raised for accuracy (was 47.7%)
|
||||||
|
|
||||||
|
risk_safe_boost: 1.2
|
||||||
|
risk_ms_penalty_high: 0.5
|
||||||
|
risk_ms_penalty_medium: 0.8
|
||||||
|
risk_other_penalty: 0.7
|
||||||
|
|
||||||
|
# ACİL DÜZELTİLDİ: Market weights güvenilir marketlere göre ayarlandı
|
||||||
|
market_weights:
|
||||||
|
MS: 0.5 # ⬇️ Düşürüldü (zayıf performans)
|
||||||
|
ÇŞ: 1.5 # ⬆️ Artırıldı (güçlü performans)
|
||||||
|
1.5 Üst/Alt: 1.6 # ⬆️ En yüksek (en güvenilir)
|
||||||
|
2.5 Üst/Alt: 1.2 # ⬆️ Artırıldı
|
||||||
|
3.5 Üst/Alt: 0.9 # ⬇️ Düşürüldü
|
||||||
|
BTTS: 0.4 # ⬇️ Düşürüldü (zayıf performans)
|
||||||
|
|
||||||
|
# Confidence Calibration (backtest-derived accuracy)
|
||||||
|
baseline_accuracy: 65.0
|
||||||
|
market_accuracy:
|
||||||
|
MS: 52.1 # ❌ Zayıf
|
||||||
|
ÇŞ: 77.9 # ✅ İyi
|
||||||
|
1.5 Üst/Alt: 82.1 # ✅ Mükemmel
|
||||||
|
2.5 Üst/Alt: 61.4 # ⚠️ Orta
|
||||||
|
3.5 Üst/Alt: 60.7 # ⚠️ Orta
|
||||||
|
BTTS: 50.7 # ❌ Zayıf
|
||||||
|
|
||||||
|
calibration_buckets:
|
||||||
|
ms_home:
|
||||||
|
heavy_fav: 1.40 # home odds <= 1.40
|
||||||
|
fav: 1.80 # home odds > 1.40 and <= 1.80
|
||||||
|
balanced: 2.50 # home odds > 1.80 and <= 2.50
|
||||||
|
underdog: 99.0 # home odds > 2.50
|
||||||
|
|
||||||
|
team_xg:
|
||||||
|
home_base: 1.35
|
||||||
|
away_base: 1.10
|
||||||
|
home_conversion_mult: 3.0
|
||||||
|
away_conversion_mult: 2.5
|
||||||
|
|
||||||
|
sidelined:
|
||||||
|
position_weights:
|
||||||
|
K: 0.35
|
||||||
|
D: 0.20
|
||||||
|
O: 0.25
|
||||||
|
F: 0.30
|
||||||
|
max_rating: 10
|
||||||
|
adaptation_threshold: 10
|
||||||
|
adaptation_discount: 0.5
|
||||||
|
goalkeeper_penalty: 0.15
|
||||||
|
confidence_boost: 10
|
||||||
|
max_impact: 0.85
|
||||||
|
key_player_threshold: 3
|
||||||
|
recent_matches_lookback: 15
|
||||||
Executable
+8
@@ -0,0 +1,8 @@
|
|||||||
|
from .base_calculator import BaseCalculator, CalculationContext
|
||||||
|
from .match_result_calculator import MatchResultCalculator
|
||||||
|
from .over_under_calculator import OverUnderCalculator
|
||||||
|
from .half_time_calculator import HalfTimeCalculator
|
||||||
|
from .score_calculator import ScoreCalculator
|
||||||
|
from .other_markets_calculator import OtherMarketsCalculator
|
||||||
|
from .risk_assessor import RiskAssessor
|
||||||
|
from .bet_recommender import BetRecommender, MarketPredictionDTO
|
||||||
+53
@@ -0,0 +1,53 @@
|
|||||||
|
"""
|
||||||
|
Base classes and context dataclass for all calculators.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CalculationContext:
|
||||||
|
"""Context object holding all inputs for calculators."""
|
||||||
|
|
||||||
|
team_pred: Any
|
||||||
|
player_pred: Any
|
||||||
|
odds_pred: Any
|
||||||
|
referee_pred: Any
|
||||||
|
upset_factors: Any
|
||||||
|
|
||||||
|
weights: dict[str, float]
|
||||||
|
player_mods: dict[str, float]
|
||||||
|
referee_mods: dict[str, float]
|
||||||
|
|
||||||
|
match_id: str
|
||||||
|
home_team_name: str
|
||||||
|
away_team_name: str
|
||||||
|
|
||||||
|
odds_data: dict[str, float]
|
||||||
|
home_xg: float
|
||||||
|
away_xg: float
|
||||||
|
total_xg: float
|
||||||
|
|
||||||
|
league_id: str | None = None
|
||||||
|
sport: str = "football"
|
||||||
|
is_top_league: bool = False
|
||||||
|
|
||||||
|
# Risk info (populated later)
|
||||||
|
risk_level: str = "MEDIUM"
|
||||||
|
is_surprise: bool = False
|
||||||
|
|
||||||
|
# XGBoost Predictions (New)
|
||||||
|
xgboost_preds: dict[str, dict[str, Any]] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
class BaseCalculator:
|
||||||
|
"""Base class for all market calculators."""
|
||||||
|
|
||||||
|
def __init__(self, config: dict[str, Any]) -> None:
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
def calculate(self, ctx: CalculationContext) -> dict[str, Any]:
|
||||||
|
raise NotImplementedError("Subclasses must implement calculate()")
|
||||||
+210
@@ -0,0 +1,210 @@
|
|||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import List, Optional, Any
|
||||||
|
from .base_calculator import BaseCalculator, CalculationContext
|
||||||
|
from .match_result_calculator import MatchResultPrediction
|
||||||
|
from .over_under_calculator import OverUnderPrediction
|
||||||
|
from .risk_assessor import RiskAnalysis
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MarketPredictionDTO:
|
||||||
|
market_type: str
|
||||||
|
pick: str
|
||||||
|
probability: float
|
||||||
|
confidence: float
|
||||||
|
odds: float = 0.0
|
||||||
|
is_recommended: bool = False
|
||||||
|
is_value_bet: bool = False
|
||||||
|
edge: float = 0.0
|
||||||
|
is_skip: bool = False # NEW: If model is unsure, mark as skip
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RecommendationResult:
|
||||||
|
best_bet: Optional[MarketPredictionDTO]
|
||||||
|
recommended_bets: List[MarketPredictionDTO]
|
||||||
|
alternative_bet: Optional[MarketPredictionDTO]
|
||||||
|
value_bets: List[MarketPredictionDTO]
|
||||||
|
skipped_bets: List[MarketPredictionDTO] # NEW: Track what we decided NOT to predict
|
||||||
|
|
||||||
|
|
||||||
|
class BetRecommender(BaseCalculator):
|
||||||
|
def calculate(self,
|
||||||
|
ctx: CalculationContext,
|
||||||
|
ms_res: MatchResultPrediction,
|
||||||
|
ou_res: OverUnderPrediction,
|
||||||
|
risk: RiskAnalysis) -> RecommendationResult:
|
||||||
|
|
||||||
|
odds_data = ctx.odds_data
|
||||||
|
|
||||||
|
# Market-Specific Minimum Confidence Thresholds (Hard Gates)
|
||||||
|
# Below these, we say "I don't know" (SKIP)
|
||||||
|
min_conf_thresholds = {
|
||||||
|
"MS": 45.0, # 3-way is hard, need at least 45%
|
||||||
|
"ÇŞ": 40.0, # Double chance is safer, but still need 40%
|
||||||
|
"1.5 Üst/Alt": 50.0,
|
||||||
|
"2.5 Üst/Alt": 45.0,
|
||||||
|
"3.5 Üst/Alt": 45.0,
|
||||||
|
"BTTS": 45.0,
|
||||||
|
"HT": 40.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prepare candidates
|
||||||
|
markets = [
|
||||||
|
MarketPredictionDTO("MS", ms_res.ms_pick,
|
||||||
|
ms_res.ms_home_prob if ms_res.ms_pick == "1" else (ms_res.ms_away_prob if ms_res.ms_pick == "2" else ms_res.ms_draw_prob),
|
||||||
|
ms_res.ms_confidence,
|
||||||
|
odds_data.get(f"ms_{ms_res.ms_pick.lower()}", 0)),
|
||||||
|
|
||||||
|
MarketPredictionDTO("ÇŞ", ms_res.dc_pick,
|
||||||
|
ms_res.dc_1x_prob if ms_res.dc_pick == "1X" else (ms_res.dc_x2_prob if ms_res.dc_pick == "X2" else ms_res.dc_12_prob),
|
||||||
|
ms_res.dc_confidence,
|
||||||
|
odds_data.get(f"dc_{ms_res.dc_pick.lower()}", 0)),
|
||||||
|
|
||||||
|
MarketPredictionDTO("1.5 Üst/Alt", ou_res.ou15_pick,
|
||||||
|
ou_res.over_15_prob if "Üst" in ou_res.ou15_pick else ou_res.under_15_prob,
|
||||||
|
ou_res.ou15_confidence, 0),
|
||||||
|
|
||||||
|
MarketPredictionDTO("2.5 Üst/Alt", ou_res.ou25_pick,
|
||||||
|
ou_res.over_25_prob if "Üst" in ou_res.ou25_pick else ou_res.under_25_prob,
|
||||||
|
ou_res.ou25_confidence,
|
||||||
|
odds_data.get("ou25_o" if "Üst" in ou_res.ou25_pick else "ou25_u", 0)),
|
||||||
|
|
||||||
|
MarketPredictionDTO("3.5 Üst/Alt", ou_res.ou35_pick,
|
||||||
|
ou_res.over_35_prob if "Üst" in ou_res.ou35_pick else ou_res.under_35_prob,
|
||||||
|
ou_res.ou35_confidence, 0),
|
||||||
|
|
||||||
|
MarketPredictionDTO("BTTS", ou_res.btts_pick,
|
||||||
|
ou_res.btts_yes_prob if "Var" in ou_res.btts_pick else ou_res.btts_no_prob,
|
||||||
|
ou_res.btts_confidence,
|
||||||
|
odds_data.get("btts_y" if "Var" in ou_res.btts_pick else "btts_n", 0)),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Market weights from config (historical accuracy weighting)
|
||||||
|
market_weights = self.config.get("recommendations.market_weights", {})
|
||||||
|
default_weight = 1.0
|
||||||
|
|
||||||
|
safe_markets = set(self.config.get("recommendations.safe_markets", ["ÇŞ", "1.5 Üst/Alt"]))
|
||||||
|
risk_level = risk.risk_level
|
||||||
|
|
||||||
|
# Confidence calibration (backtest-derived accuracy scaling)
|
||||||
|
market_accuracy = self.config.get("recommendations.market_accuracy", {})
|
||||||
|
baseline_accuracy = self.config.get("recommendations.baseline_accuracy", 65.0)
|
||||||
|
|
||||||
|
def _calibrated_confidence(m):
|
||||||
|
"""Scale raw confidence by market's historical accuracy ratio."""
|
||||||
|
accuracy = market_accuracy.get(m.market_type, baseline_accuracy) if isinstance(market_accuracy, dict) else baseline_accuracy
|
||||||
|
ratio = accuracy / baseline_accuracy
|
||||||
|
return m.confidence * ratio
|
||||||
|
|
||||||
|
def _score(m):
|
||||||
|
mw = market_weights.get(m.market_type, default_weight) if isinstance(market_weights, dict) else default_weight
|
||||||
|
|
||||||
|
# 1. Base Score: calibrated confidence * market weight
|
||||||
|
cal_conf = _calibrated_confidence(m)
|
||||||
|
score = cal_conf * mw
|
||||||
|
|
||||||
|
# 2. Value/Edge Bonus
|
||||||
|
odds_val = m.odds if m.odds is not None else 0.0
|
||||||
|
if odds_val > 0:
|
||||||
|
implied = 1.0 / odds_val
|
||||||
|
edge = (m.probability - implied) * 100
|
||||||
|
if edge > 0:
|
||||||
|
score += edge * 4.0
|
||||||
|
|
||||||
|
# 3. Risk adjustment
|
||||||
|
if risk_level in ("HIGH", "EXTREME"):
|
||||||
|
if m.market_type in safe_markets:
|
||||||
|
score *= self.config.get("recommendations.risk_safe_boost", 1.2)
|
||||||
|
elif m.market_type == "MS":
|
||||||
|
score *= self.config.get("recommendations.risk_ms_penalty_high", 0.5)
|
||||||
|
else:
|
||||||
|
score *= self.config.get("recommendations.risk_other_penalty", 0.7)
|
||||||
|
elif risk_level == "MEDIUM":
|
||||||
|
if m.market_type == "MS":
|
||||||
|
score *= self.config.get("recommendations.risk_ms_penalty_medium", 0.8)
|
||||||
|
|
||||||
|
# 4. Extreme Confidence Bonus
|
||||||
|
if cal_conf > 80:
|
||||||
|
score *= 1.15
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
|
recommended = []
|
||||||
|
value_bets = []
|
||||||
|
skipped_bets = []
|
||||||
|
|
||||||
|
conf_thr = self.config.get("recommendations.confidence_threshold", 60)
|
||||||
|
|
||||||
|
val_min = self.config.get("recommendations.value_confidence_min", 45) # Increased from 30
|
||||||
|
val_max = self.config.get("recommendations.value_confidence_max", 60)
|
||||||
|
val_margin = self.config.get("recommendations.value_edge_margin", 0.03) # Increased from 0.02
|
||||||
|
val_upgrade = self.config.get("recommendations.value_upgrade_edge", 5.0)
|
||||||
|
|
||||||
|
for m in markets:
|
||||||
|
# --- SKIP LOGIC (Hard Gate) ---
|
||||||
|
# 1. Confidence is below market threshold
|
||||||
|
min_conf = min_conf_thresholds.get(m.market_type, 45.0)
|
||||||
|
if m.confidence < min_conf:
|
||||||
|
m.is_skip = True
|
||||||
|
skipped_bets.append(m)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 2. Negative Value Edge (Odds are too low for our probability)
|
||||||
|
if m.odds > 0:
|
||||||
|
implied = 1.0 / m.odds
|
||||||
|
edge = m.probability - implied
|
||||||
|
# If our prob is significantly lower than implied (negative edge > 3%), SKIP
|
||||||
|
if edge < -0.03:
|
||||||
|
m.is_skip = True
|
||||||
|
skipped_bets.append(m)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- PROCESS BET ---
|
||||||
|
# 1. Regular recommended
|
||||||
|
if m.confidence >= conf_thr:
|
||||||
|
m.is_recommended = True
|
||||||
|
recommended.append(m)
|
||||||
|
|
||||||
|
# 2. Value bet logic
|
||||||
|
if m.confidence is not None and val_min <= m.confidence <= val_max and m.odds > 0:
|
||||||
|
implied = 1.0 / m.odds
|
||||||
|
if m.probability > (implied + val_margin):
|
||||||
|
m.is_value_bet = True
|
||||||
|
m.edge = (m.probability - implied) * 100
|
||||||
|
|
||||||
|
if m.edge > val_upgrade:
|
||||||
|
m.is_recommended = True
|
||||||
|
recommended.append(m)
|
||||||
|
else:
|
||||||
|
value_bets.append(m)
|
||||||
|
|
||||||
|
# Best bet (from recommended only)
|
||||||
|
best_bet = None
|
||||||
|
if recommended:
|
||||||
|
# Re-sort only recommended markets to find the best one
|
||||||
|
valid_markets = [m for m in markets if not m.is_skip and m.is_recommended]
|
||||||
|
if valid_markets:
|
||||||
|
valid_markets.sort(key=_score, reverse=True)
|
||||||
|
best_bet = valid_markets[0]
|
||||||
|
best_bet.is_recommended = True
|
||||||
|
|
||||||
|
# Alternative bet
|
||||||
|
alternative = None
|
||||||
|
if risk.is_surprise_risk and ms_res.ms_pick in ["1", "2"]:
|
||||||
|
# Check if alternative is not skipped
|
||||||
|
alt_candidate = MarketPredictionDTO(
|
||||||
|
"2.5 Üst/Alt", ou_res.ou25_pick,
|
||||||
|
ou_res.over_25_prob if "Üst" in ou_res.ou25_pick else ou_res.under_25_prob,
|
||||||
|
ou_res.ou25_confidence,
|
||||||
|
odds_data.get("ou25_o" if "Üst" in ou_res.ou25_pick else "ou25_u", 0)
|
||||||
|
)
|
||||||
|
if alt_candidate.confidence >= min_conf_thresholds.get("2.5 Üst/Alt", 45.0):
|
||||||
|
alternative = alt_candidate
|
||||||
|
|
||||||
|
return RecommendationResult(
|
||||||
|
best_bet=best_bet,
|
||||||
|
recommended_bets=recommended,
|
||||||
|
alternative_bet=alternative,
|
||||||
|
value_bets=value_bets,
|
||||||
|
skipped_bets=skipped_bets
|
||||||
|
)
|
||||||
Executable
+32
@@ -0,0 +1,32 @@
|
|||||||
|
def calc_confidence_3way(top_prob: float) -> float:
|
||||||
|
"""Returns the true win probability percentage (e.g. 0.45 -> 45.0)."""
|
||||||
|
return max(0, min(99.0, top_prob * 100))
|
||||||
|
|
||||||
|
def calc_confidence_2way(prob: float) -> float:
|
||||||
|
"""Returns the true win probability percentage for the favored side."""
|
||||||
|
# Find the probability of the >0.5 side
|
||||||
|
win_prob = prob if prob >= 0.5 else (1.0 - prob)
|
||||||
|
return max(0, min(99.0, win_prob * 100))
|
||||||
|
|
||||||
|
def calc_confidence_dc(top_prob: float) -> float:
|
||||||
|
"""Returns the true win probability percentage for double chance."""
|
||||||
|
return max(0, min(99.0, top_prob * 100))
|
||||||
|
|
||||||
|
def calc_confidence_3way_with_agreement(top_prob: float, agreement_ratio: float,
|
||||||
|
boost: float = 1.05, penalty: float = 0.95) -> float:
|
||||||
|
"""
|
||||||
|
Returns the true win probability percentage, slightly adjusted by engine consensus.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
top_prob: highest probability among options
|
||||||
|
agreement_ratio: 0.0 to 1.0 — how many engines agree on the pick
|
||||||
|
"""
|
||||||
|
base = calc_confidence_3way(top_prob)
|
||||||
|
|
||||||
|
# Slight nudge rather than massive swing, to keep it feeling like a true probability
|
||||||
|
if agreement_ratio >= 0.75:
|
||||||
|
return min(99.0, base * boost)
|
||||||
|
elif agreement_ratio <= 0.25:
|
||||||
|
return max(0.0, base * penalty)
|
||||||
|
|
||||||
|
return base
|
||||||
@@ -0,0 +1,131 @@
|
|||||||
|
"""
|
||||||
|
Expert Recommendation Engine (Senior Level)
|
||||||
|
============================================
|
||||||
|
Evaluates ALL markets, classifies by risk, and ensures NO "empty" recommendations.
|
||||||
|
Prioritizes user safety by clearly labeling risk levels.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import List, Optional, Any, Dict
|
||||||
|
from .base_calculator import BaseCalculator, CalculationContext
|
||||||
|
from .match_result_calculator import MatchResultPrediction
|
||||||
|
from .over_under_calculator import OverUnderPrediction
|
||||||
|
from .risk_assessor import RiskAnalysis
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExpertPick:
|
||||||
|
market_type: str
|
||||||
|
pick: str
|
||||||
|
probability: float
|
||||||
|
confidence: float
|
||||||
|
odds: float
|
||||||
|
edge: float # Expected value percentage
|
||||||
|
|
||||||
|
# Risk Classification
|
||||||
|
risk_level: str # SAFE, MEDIUM, RISKY, SURPRISE
|
||||||
|
reasoning: str # Why this pick? (e.g., "High xG support", "Value detected")
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExpertResult:
|
||||||
|
main_pick: ExpertPick
|
||||||
|
safe_alternative: Optional[ExpertPick]
|
||||||
|
value_picks: List[ExpertPick]
|
||||||
|
surprise_picks: List[ExpertPick]
|
||||||
|
market_summary: Dict[str, float] # {market: probability}
|
||||||
|
|
||||||
|
|
||||||
|
class ExpertRecommender(BaseCalculator):
|
||||||
|
def calculate(self,
|
||||||
|
ctx: CalculationContext,
|
||||||
|
ms_res: MatchResultPrediction,
|
||||||
|
ou_res: OverUnderPrediction,
|
||||||
|
risk: RiskAnalysis) -> ExpertResult:
|
||||||
|
|
||||||
|
odds_data = ctx.odds_data
|
||||||
|
all_picks: List[ExpertPick] = []
|
||||||
|
|
||||||
|
# ─── 1. Helper to Evaluate Pick ───
|
||||||
|
def evaluate(market: str, pick: str, prob: float, odd_key: str):
|
||||||
|
odd_val = float(odds_data.get(odd_key, 0))
|
||||||
|
# If odd is missing/low, estimate it via probability (Kelly-ish estimation)
|
||||||
|
if odd_val <= 1.01:
|
||||||
|
odd_val = round(1.0 / (prob + 0.05), 2) # Conservative estimation
|
||||||
|
reasoning = "Derived (No market odd)"
|
||||||
|
else:
|
||||||
|
reasoning = "Market Confirmed"
|
||||||
|
|
||||||
|
implied = 1.0 / odd_val
|
||||||
|
edge = (prob - implied) * 100
|
||||||
|
|
||||||
|
# ─── Risk Classification ───
|
||||||
|
if prob >= 0.75 and odd_val <= 1.45:
|
||||||
|
level = "SAFE"
|
||||||
|
elif edge > 5.0:
|
||||||
|
level = "VALUE"
|
||||||
|
elif odd_val >= 2.50 and prob >= 0.35:
|
||||||
|
level = "SURPRISE"
|
||||||
|
else:
|
||||||
|
level = "MEDIUM"
|
||||||
|
|
||||||
|
all_picks.append(ExpertPick(
|
||||||
|
market_type=market, pick=pick, probability=prob,
|
||||||
|
confidence=prob * 100, odds=odd_val, edge=edge,
|
||||||
|
risk_level=level, reasoning=reasoning
|
||||||
|
))
|
||||||
|
|
||||||
|
# ─── 2. Evaluate All Major Markets ───
|
||||||
|
# MS
|
||||||
|
evaluate("MS", ms_res.ms_pick,
|
||||||
|
ms_res.ms_home_prob if ms_res.ms_pick == "1" else (ms_res.ms_away_prob if ms_res.ms_pick == "2" else ms_res.ms_draw_prob),
|
||||||
|
f"ms_{ms_res.ms_pick.lower()}")
|
||||||
|
|
||||||
|
# Double Chance
|
||||||
|
evaluate("DC", ms_res.dc_pick,
|
||||||
|
ms_res.dc_1x_prob if ms_res.dc_pick == "1X" else (ms_res.dc_x2_prob if ms_res.dc_pick == "X2" else ms_res.dc_12_prob),
|
||||||
|
f"dc_{ms_res.dc_pick.lower()}")
|
||||||
|
|
||||||
|
# OU25
|
||||||
|
evaluate("OU25", ou_res.ou25_pick,
|
||||||
|
ou_res.over_25_prob if "Üst" in ou_res.ou25_pick else ou_res.under_25_prob,
|
||||||
|
"ou25_o" if "Üst" in ou_res.ou25_pick else "ou25_u")
|
||||||
|
|
||||||
|
# BTTS
|
||||||
|
evaluate("BTTS", ou_res.btts_pick,
|
||||||
|
ou_res.btts_yes_prob if "Var" in ou_res.btts_pick else ou_res.btts_no_prob,
|
||||||
|
"btts_y" if "Var" in ou_res.btts_pick else "btts_n")
|
||||||
|
|
||||||
|
# OU15
|
||||||
|
evaluate("OU15", ou_res.ou15_pick,
|
||||||
|
ou_res.over_15_prob if "Üst" in ou_res.ou15_pick else ou_res.under_15_prob,
|
||||||
|
"ou15_o" if "Üst" in ou_res.ou15_pick else "ou15_u")
|
||||||
|
|
||||||
|
# ─── 3. Sort and Select ───
|
||||||
|
# Sort by a mix of Confidence and Edge
|
||||||
|
all_picks.sort(key=lambda p: (p.probability * 0.6) + (max(0, p.edge/100) * 0.4), reverse=True)
|
||||||
|
|
||||||
|
main = all_picks[0]
|
||||||
|
|
||||||
|
# Find Safe Alternative (if main isn't Safe)
|
||||||
|
safe_alt = next((p for p in all_picks if p.risk_level == "SAFE"), None)
|
||||||
|
if safe_alt == main: safe_alt = None
|
||||||
|
|
||||||
|
value_picks = [p for p in all_picks if p.risk_level == "VALUE" and p != main]
|
||||||
|
surprise_picks = [p for p in all_picks if p.risk_level == "SURPRISE"]
|
||||||
|
|
||||||
|
# Market Summary for UI
|
||||||
|
market_summary = {
|
||||||
|
"MS_Home": ms_res.ms_home_prob,
|
||||||
|
"MS_Draw": ms_res.ms_draw_prob,
|
||||||
|
"MS_Away": ms_res.ms_away_prob,
|
||||||
|
"OU25_Over": ou_res.over_25_prob,
|
||||||
|
"BTTS_Yes": ou_res.btts_yes_prob
|
||||||
|
}
|
||||||
|
|
||||||
|
return ExpertResult(
|
||||||
|
main_pick=main,
|
||||||
|
safe_alternative=safe_alt,
|
||||||
|
value_picks=value_picks,
|
||||||
|
surprise_picks=surprise_picks,
|
||||||
|
market_summary=market_summary
|
||||||
|
)
|
||||||
+179
@@ -0,0 +1,179 @@
|
|||||||
|
import math
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from .base_calculator import BaseCalculator, CalculationContext
|
||||||
|
from .confidence import calc_confidence_3way, calc_confidence_2way
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HalfTimePrediction:
|
||||||
|
ht_home_prob: float
|
||||||
|
ht_draw_prob: float
|
||||||
|
ht_away_prob: float
|
||||||
|
ht_pick: str
|
||||||
|
ht_confidence: float
|
||||||
|
|
||||||
|
ht_over_05_prob: float
|
||||||
|
ht_under_05_prob: float
|
||||||
|
ht_over_15_prob: float
|
||||||
|
ht_under_15_prob: float
|
||||||
|
ht_ou_pick: str
|
||||||
|
ht_ou15_pick: str
|
||||||
|
|
||||||
|
ht_home_xg: float
|
||||||
|
ht_away_xg: float
|
||||||
|
|
||||||
|
|
||||||
|
class HalfTimeCalculator(BaseCalculator):
|
||||||
|
|
||||||
|
def _poisson_pmf(self, k, lam):
|
||||||
|
"""Poisson probability mass function."""
|
||||||
|
if lam <= 0:
|
||||||
|
return 1.0 if k == 0 else 0.0
|
||||||
|
return (lam ** k) * math.exp(-lam) / math.factorial(k)
|
||||||
|
|
||||||
|
def calculate(self, ctx: CalculationContext) -> HalfTimePrediction:
|
||||||
|
team_pred = ctx.team_pred
|
||||||
|
odds_pred = ctx.odds_pred
|
||||||
|
|
||||||
|
# Config
|
||||||
|
ft_to_ht_ratio = self.config.get("half_time.ft_to_ht_ratio", 0.42)
|
||||||
|
grid_max = self.config.get("half_time.poisson_grid_max", 5)
|
||||||
|
draw_floor = self.config.get("half_time.ht_draw_floor", 0.35)
|
||||||
|
low_xg_thr = self.config.get("half_time.low_xg_threshold", 2.0)
|
||||||
|
low_xg_adj = self.config.get("half_time.low_xg_ratio_adjust", 0.85)
|
||||||
|
|
||||||
|
# FT xG (blended team + odds)
|
||||||
|
ft_home_xg = (team_pred.home_xg + odds_pred.poisson_home_xg) / 2
|
||||||
|
ft_away_xg = (team_pred.away_xg + odds_pred.poisson_away_xg) / 2
|
||||||
|
total_ft_xg = ft_home_xg + ft_away_xg
|
||||||
|
|
||||||
|
# Dynamic HT ratio: düşük xG maçlarda ratio'yu küçült
|
||||||
|
# Çünkü düşük gollü maçlarda ilk yarıda gol olma ihtimali daha da düşük
|
||||||
|
effective_ratio = ft_to_ht_ratio
|
||||||
|
if total_ft_xg < low_xg_thr:
|
||||||
|
effective_ratio *= low_xg_adj
|
||||||
|
|
||||||
|
# HT xG
|
||||||
|
ht_home_xg = ft_home_xg * effective_ratio
|
||||||
|
ht_away_xg = ft_away_xg * effective_ratio
|
||||||
|
ht_total_xg = ht_home_xg + ht_away_xg
|
||||||
|
|
||||||
|
# Compute HT 1X2 via bivariate Poisson grid
|
||||||
|
ht_home = 0.0
|
||||||
|
ht_away = 0.0
|
||||||
|
ht_draw = 0.0
|
||||||
|
|
||||||
|
# Also compute O/U while iterating
|
||||||
|
total_goals_prob = {}
|
||||||
|
|
||||||
|
for i in range(grid_max):
|
||||||
|
for j in range(grid_max):
|
||||||
|
p = self._poisson_pmf(i, ht_home_xg) * self._poisson_pmf(j, ht_away_xg)
|
||||||
|
if i > j:
|
||||||
|
ht_home += p
|
||||||
|
elif i < j:
|
||||||
|
ht_away += p
|
||||||
|
else:
|
||||||
|
ht_draw += p
|
||||||
|
|
||||||
|
total = i + j
|
||||||
|
total_goals_prob[total] = total_goals_prob.get(total, 0.0) + p
|
||||||
|
|
||||||
|
# Draw floor: düşük xG maçlarda beraberlik olasılığını minimum seviyeye çek
|
||||||
|
if ht_draw < draw_floor:
|
||||||
|
deficit = draw_floor - ht_draw
|
||||||
|
ht_draw = draw_floor
|
||||||
|
# Deficit'i home ve away'den orantılı düş
|
||||||
|
total_ha = ht_home + ht_away
|
||||||
|
if total_ha > 0:
|
||||||
|
ht_home -= deficit * (ht_home / total_ha)
|
||||||
|
ht_away -= deficit * (ht_away / total_ha)
|
||||||
|
|
||||||
|
# Normalize
|
||||||
|
total_prob = ht_home + ht_draw + ht_away
|
||||||
|
if total_prob > 0:
|
||||||
|
ht_home /= total_prob
|
||||||
|
ht_draw /= total_prob
|
||||||
|
ht_away /= total_prob
|
||||||
|
|
||||||
|
# XGBoost Integration (HT 1X2 and HT/FT Models)
|
||||||
|
w_xgb = self.config.get("xgboost.weight_ht", 0.60)
|
||||||
|
xgb_ht_home, xgb_ht_draw, xgb_ht_away = None, None, None
|
||||||
|
|
||||||
|
if "ht_result" in ctx.xgboost_preds:
|
||||||
|
probs = ctx.xgboost_preds["ht_result"]
|
||||||
|
xgb_ht_home, xgb_ht_draw, xgb_ht_away = probs["home"], probs["draw"], probs["away"]
|
||||||
|
elif "ht_ft" in ctx.xgboost_preds:
|
||||||
|
# Fallback to HT/FT marginals
|
||||||
|
htft_payload = ctx.xgboost_preds.get("ht_ft", {})
|
||||||
|
probs = None
|
||||||
|
if isinstance(htft_payload, dict):
|
||||||
|
labels = ("1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2")
|
||||||
|
if all(label in htft_payload for label in labels):
|
||||||
|
probs = [float(htft_payload[label]) for label in labels]
|
||||||
|
|
||||||
|
if probs is None:
|
||||||
|
probs = ctx.xgboost_preds.get("ht_ft_raw")
|
||||||
|
if probs is not None and len(probs) == 9:
|
||||||
|
xgb_ht_home = sum(probs[0:3])
|
||||||
|
xgb_ht_draw = sum(probs[3:6])
|
||||||
|
xgb_ht_away = sum(probs[6:9])
|
||||||
|
|
||||||
|
if xgb_ht_home is not None:
|
||||||
|
ht_home = ht_home * (1 - w_xgb) + xgb_ht_home * w_xgb
|
||||||
|
ht_draw = ht_draw * (1 - w_xgb) + xgb_ht_draw * w_xgb
|
||||||
|
ht_away = ht_away * (1 - w_xgb) + xgb_ht_away * w_xgb
|
||||||
|
|
||||||
|
# Re-normalize
|
||||||
|
total = ht_home + ht_draw + ht_away
|
||||||
|
ht_home /= total
|
||||||
|
ht_draw /= total
|
||||||
|
ht_away /= total
|
||||||
|
|
||||||
|
# HT O/U 0.5
|
||||||
|
ht_over_05 = 1.0 - math.exp(-ht_total_xg)
|
||||||
|
if "ht_ou05" in ctx.xgboost_preds:
|
||||||
|
w_xgb = self.config.get("xgboost.weight_ou", 0.60)
|
||||||
|
xgb_ht_over_05 = float(ctx.xgboost_preds["ht_ou05"])
|
||||||
|
ht_over_05 = ht_over_05 * (1 - w_xgb) + xgb_ht_over_05 * w_xgb
|
||||||
|
|
||||||
|
ht_over_05_min = self.config.get("half_time.ht_over_05_min", 0.20)
|
||||||
|
ht_over_05_max = self.config.get("half_time.ht_over_05_max", 0.95)
|
||||||
|
ht_over_05 = max(ht_over_05_min, min(ht_over_05_max, ht_over_05))
|
||||||
|
|
||||||
|
# HT O/U 1.5
|
||||||
|
# P(total >= 2) = 1 - P(0) - P(1)
|
||||||
|
ht_over_15 = sum(p for g, p in total_goals_prob.items() if g >= 2)
|
||||||
|
if "ht_ou15" in ctx.xgboost_preds:
|
||||||
|
w_xgb = self.config.get("xgboost.weight_ou", 0.60)
|
||||||
|
xgb_ht_over_15 = float(ctx.xgboost_preds["ht_ou15"])
|
||||||
|
ht_over_15 = ht_over_15 * (1 - w_xgb) + xgb_ht_over_15 * w_xgb
|
||||||
|
|
||||||
|
ht_over_15 = max(0.02, min(0.95, ht_over_15))
|
||||||
|
|
||||||
|
# Picks
|
||||||
|
ht_probs = [(ht_home, "İY 1"), (ht_draw, "İY X"), (ht_away, "İY 2")]
|
||||||
|
ht_sorted = sorted(ht_probs, key=lambda x: x[0], reverse=True)
|
||||||
|
ht_pick = ht_sorted[0][1]
|
||||||
|
ht_confidence = calc_confidence_3way(ht_sorted[0][0])
|
||||||
|
|
||||||
|
# HT O/U picks
|
||||||
|
ht_ou_thr = self.config.get("half_time.ht_ou_threshold", 0.55)
|
||||||
|
ht_ou_pick = "İY 0.5 Üst" if ht_over_05 > ht_ou_thr else "İY 0.5 Alt"
|
||||||
|
ht_ou15_pick = "İY 1.5 Üst" if ht_over_15 > 0.45 else "İY 1.5 Alt"
|
||||||
|
|
||||||
|
return HalfTimePrediction(
|
||||||
|
ht_home_prob=ht_home,
|
||||||
|
ht_draw_prob=ht_draw,
|
||||||
|
ht_away_prob=ht_away,
|
||||||
|
ht_pick=ht_pick,
|
||||||
|
ht_confidence=ht_confidence,
|
||||||
|
ht_over_05_prob=ht_over_05,
|
||||||
|
ht_under_05_prob=1.0 - ht_over_05,
|
||||||
|
ht_over_15_prob=ht_over_15,
|
||||||
|
ht_under_15_prob=1.0 - ht_over_15,
|
||||||
|
ht_ou_pick=ht_ou_pick,
|
||||||
|
ht_ou15_pick=ht_ou15_pick,
|
||||||
|
ht_home_xg=ht_home_xg,
|
||||||
|
ht_away_xg=ht_away_xg
|
||||||
|
)
|
||||||
+142
@@ -0,0 +1,142 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from .base_calculator import BaseCalculator, CalculationContext
|
||||||
|
from .confidence import calc_confidence_3way_with_agreement, calc_confidence_dc
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MatchResultPrediction:
|
||||||
|
ms_home_prob: float
|
||||||
|
ms_draw_prob: float
|
||||||
|
ms_away_prob: float
|
||||||
|
ms_pick: str
|
||||||
|
ms_confidence: float
|
||||||
|
|
||||||
|
dc_1x_prob: float
|
||||||
|
dc_x2_prob: float
|
||||||
|
dc_12_prob: float
|
||||||
|
dc_pick: str
|
||||||
|
dc_confidence: float
|
||||||
|
|
||||||
|
class MatchResultCalculator(BaseCalculator):
|
||||||
|
|
||||||
|
def _get_engine_winner(self, home_prob: float, draw_prob: float, away_prob: float) -> str:
|
||||||
|
"""Determine which outcome an engine favors."""
|
||||||
|
probs = {"1": home_prob, "X": draw_prob, "2": away_prob}
|
||||||
|
return max(probs, key=probs.get)
|
||||||
|
|
||||||
|
def calculate(self, ctx: CalculationContext) -> MatchResultPrediction:
|
||||||
|
# Weights
|
||||||
|
w_team = ctx.weights["team"]
|
||||||
|
w_player = ctx.weights["player"]
|
||||||
|
w_odds = ctx.weights["odds"]
|
||||||
|
w_referee = ctx.weights["referee"]
|
||||||
|
|
||||||
|
# Engine predictions
|
||||||
|
team_pred = ctx.team_pred
|
||||||
|
odds_pred = ctx.odds_pred
|
||||||
|
player_mods = ctx.player_mods
|
||||||
|
referee_mods = ctx.referee_mods
|
||||||
|
|
||||||
|
# Weighted ensemble for 1X2
|
||||||
|
ms_home = (
|
||||||
|
team_pred.home_win_prob * w_team +
|
||||||
|
odds_pred.market_home_prob * w_odds +
|
||||||
|
team_pred.home_win_prob * player_mods["home_modifier"] * w_player +
|
||||||
|
odds_pred.market_home_prob * referee_mods["home_modifier"] * w_referee
|
||||||
|
)
|
||||||
|
|
||||||
|
ms_away = (
|
||||||
|
team_pred.away_win_prob * w_team +
|
||||||
|
odds_pred.market_away_prob * w_odds +
|
||||||
|
team_pred.away_win_prob * player_mods["away_modifier"] * w_player +
|
||||||
|
odds_pred.market_away_prob / referee_mods["home_modifier"] * w_referee
|
||||||
|
)
|
||||||
|
|
||||||
|
ms_draw = 1.0 - ms_home - ms_away
|
||||||
|
|
||||||
|
# XGBoost Integration
|
||||||
|
if "ms" in ctx.xgboost_preds:
|
||||||
|
xgb_probs = ctx.xgboost_preds["ms"]
|
||||||
|
w_xgb = self.config.get("xgboost.weight_ms", 0.70)
|
||||||
|
w_heuristic = 1.0 - w_xgb
|
||||||
|
|
||||||
|
ms_home = ms_home * w_heuristic + xgb_probs["home"] * w_xgb
|
||||||
|
ms_draw = ms_draw * w_heuristic + xgb_probs["draw"] * w_xgb
|
||||||
|
ms_away = ms_away * w_heuristic + xgb_probs["away"] * w_xgb
|
||||||
|
|
||||||
|
# Re-normalize
|
||||||
|
total = ms_home + ms_draw + ms_away
|
||||||
|
ms_home /= total
|
||||||
|
ms_draw /= total
|
||||||
|
ms_away /= total
|
||||||
|
|
||||||
|
# Min draw probability clamping
|
||||||
|
min_draw = self.config.get("match_result.min_draw_prob", 0.15)
|
||||||
|
if ms_draw < min_draw:
|
||||||
|
ms_draw = min_draw
|
||||||
|
total = ms_home + ms_away + ms_draw
|
||||||
|
ms_home /= total
|
||||||
|
ms_away /= total
|
||||||
|
ms_draw /= total
|
||||||
|
|
||||||
|
# Double Chance
|
||||||
|
dc_1x = ms_home + ms_draw
|
||||||
|
dc_x2 = ms_draw + ms_away
|
||||||
|
dc_12 = ms_home + ms_away
|
||||||
|
|
||||||
|
# MS pick
|
||||||
|
ms_probs = [(ms_home, "1"), (ms_draw, "X"), (ms_away, "2")]
|
||||||
|
ms_sorted = sorted(ms_probs, key=lambda x: x[0], reverse=True)
|
||||||
|
ms_pick = ms_sorted[0][1]
|
||||||
|
|
||||||
|
# === ENGINE AGREEMENT ===
|
||||||
|
# Determine each engine's winner and calculate agreement ratio
|
||||||
|
team_winner = self._get_engine_winner(
|
||||||
|
team_pred.home_win_prob, team_pred.draw_prob, team_pred.away_win_prob
|
||||||
|
)
|
||||||
|
odds_winner = self._get_engine_winner(
|
||||||
|
odds_pred.market_home_prob, odds_pred.market_draw_prob, odds_pred.market_away_prob
|
||||||
|
)
|
||||||
|
|
||||||
|
# Player-modified: team probs * player modifiers
|
||||||
|
player_adj_home = team_pred.home_win_prob * player_mods["home_modifier"]
|
||||||
|
player_adj_away = team_pred.away_win_prob * player_mods["away_modifier"]
|
||||||
|
player_adj_draw = max(0.01, 1.0 - player_adj_home - player_adj_away)
|
||||||
|
player_winner = self._get_engine_winner(player_adj_home, player_adj_draw, player_adj_away)
|
||||||
|
|
||||||
|
# Referee-modified: odds probs * referee modifiers
|
||||||
|
ref_adj_home = odds_pred.market_home_prob * referee_mods["home_modifier"]
|
||||||
|
ref_adj_away = odds_pred.market_away_prob / referee_mods["home_modifier"]
|
||||||
|
ref_adj_draw = max(0.01, 1.0 - ref_adj_home - ref_adj_away)
|
||||||
|
referee_winner = self._get_engine_winner(ref_adj_home, ref_adj_draw, ref_adj_away)
|
||||||
|
|
||||||
|
# Count how many engines agree with final pick
|
||||||
|
engines = [team_winner, odds_winner, player_winner, referee_winner]
|
||||||
|
agreement_count = sum(1 for e in engines if e == ms_pick)
|
||||||
|
agreement_ratio = agreement_count / len(engines)
|
||||||
|
|
||||||
|
# Confidence with agreement
|
||||||
|
boost = self.config.get("confidence.agreement_boost", 1.3)
|
||||||
|
penalty = self.config.get("confidence.disagreement_penalty", 0.7)
|
||||||
|
ms_confidence = calc_confidence_3way_with_agreement(
|
||||||
|
ms_sorted[0][0], agreement_ratio, boost, penalty
|
||||||
|
)
|
||||||
|
|
||||||
|
# DC pick
|
||||||
|
dc_probs = [(dc_1x, "1X"), (dc_x2, "X2"), (dc_12, "12")]
|
||||||
|
dc_sorted = sorted(dc_probs, key=lambda x: x[0], reverse=True)
|
||||||
|
dc_pick = dc_sorted[0][1]
|
||||||
|
dc_confidence = calc_confidence_dc(dc_sorted[0][0])
|
||||||
|
|
||||||
|
return MatchResultPrediction(
|
||||||
|
ms_home_prob=ms_home,
|
||||||
|
ms_draw_prob=ms_draw,
|
||||||
|
ms_away_prob=ms_away,
|
||||||
|
ms_pick=ms_pick,
|
||||||
|
ms_confidence=ms_confidence,
|
||||||
|
dc_1x_prob=dc_1x,
|
||||||
|
dc_x2_prob=dc_x2,
|
||||||
|
dc_12_prob=dc_12,
|
||||||
|
dc_pick=dc_pick,
|
||||||
|
dc_confidence=dc_confidence
|
||||||
|
)
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Dict, Tuple
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AnomalyResult:
|
||||||
|
is_anomaly: bool
|
||||||
|
side: str = ""
|
||||||
|
severity: float = 0.0
|
||||||
|
reason: str = ""
|
||||||
|
|
||||||
|
class OddsAnomalyDetector:
|
||||||
|
"""
|
||||||
|
Detects mismatches between bookmaker odds and underlying team metrics.
|
||||||
|
A 'Bookmaker Trap' is when a team has very low odds (heavy favorite)
|
||||||
|
but their xG/defense metrics are surprisingly poor.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict):
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
# Thresholds
|
||||||
|
self.fav_odds_threshold = self.config.get("anomaly.fav_odds_threshold", 1.75)
|
||||||
|
self.min_xg_for_fav = self.config.get("anomaly.min_xg_for_fav", 1.25)
|
||||||
|
self.max_conceded_for_fav = self.config.get("anomaly.max_conceded_for_fav", 1.30)
|
||||||
|
self.opp_min_xg_threat = self.config.get("anomaly.opp_min_xg_threat", 1.10)
|
||||||
|
|
||||||
|
def detect_trap(self,
|
||||||
|
odds_data: Dict[str, float],
|
||||||
|
home_xg: float,
|
||||||
|
away_xg: float,
|
||||||
|
home_conceded_avg: float,
|
||||||
|
away_conceded_avg: float) -> tuple[bool, AnomalyResult]:
|
||||||
|
"""
|
||||||
|
Check if the match is a potential odds trap.
|
||||||
|
Returns: (has_trap, AnomalyResult)
|
||||||
|
"""
|
||||||
|
ms_h = odds_data.get("ms_h", 0.0)
|
||||||
|
ms_a = odds_data.get("ms_a", 0.0)
|
||||||
|
|
||||||
|
# Check Home Favorite Trap
|
||||||
|
if 1.0 < ms_h <= self.fav_odds_threshold:
|
||||||
|
# Home is favored. Check metrics.
|
||||||
|
if home_xg < self.min_xg_for_fav and (away_xg > self.opp_min_xg_threat or home_conceded_avg > self.max_conceded_for_fav):
|
||||||
|
severity = (self.fav_odds_threshold - ms_h) + (self.min_xg_for_fav - home_xg)
|
||||||
|
reason = f"🚨 ODDS ANOMALY (TRAP): Home odds ({ms_h}) suspiciously low despite poor metrics (xG: {round(home_xg, 2)}, Conceded: {round(home_conceded_avg, 2)})"
|
||||||
|
return True, AnomalyResult(True, "H", min(10.0, severity * 2), reason)
|
||||||
|
|
||||||
|
# Check Away Favorite Trap
|
||||||
|
if 1.0 < ms_a <= self.fav_odds_threshold:
|
||||||
|
# Away is favored. Check metrics
|
||||||
|
if away_xg < self.min_xg_for_fav and (home_xg > self.opp_min_xg_threat or away_conceded_avg > self.max_conceded_for_fav):
|
||||||
|
severity = (self.fav_odds_threshold - ms_a) + (self.min_xg_for_fav - away_xg)
|
||||||
|
reason = f"🚨 ODDS ANOMALY (TRAP): Away odds ({ms_a}) suspiciously low despite poor metrics (xG: {round(away_xg, 2)}, Conceded: {round(away_conceded_avg, 2)})"
|
||||||
|
return True, AnomalyResult(True, "A", min(10.0, severity * 2), reason)
|
||||||
|
|
||||||
|
return False, AnomalyResult(False)
|
||||||
+115
@@ -0,0 +1,115 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
import math
|
||||||
|
|
||||||
|
from .base_calculator import BaseCalculator, CalculationContext
|
||||||
|
from .match_result_calculator import MatchResultPrediction
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OtherMarketsPrediction:
|
||||||
|
total_corners_pred: float
|
||||||
|
corner_pick: str | None
|
||||||
|
|
||||||
|
total_cards_pred: float
|
||||||
|
card_pick: str
|
||||||
|
cards_over_prob: float
|
||||||
|
cards_under_prob: float
|
||||||
|
cards_confidence: float
|
||||||
|
|
||||||
|
handicap_pick: str
|
||||||
|
handicap_home_prob: float
|
||||||
|
handicap_draw_prob: float
|
||||||
|
handicap_away_prob: float
|
||||||
|
handicap_confidence: float
|
||||||
|
|
||||||
|
odd_even_pick: str
|
||||||
|
odd_prob: float
|
||||||
|
even_prob: float
|
||||||
|
|
||||||
|
|
||||||
|
class OtherMarketsCalculator(BaseCalculator):
|
||||||
|
def calculate(
|
||||||
|
self,
|
||||||
|
ctx: CalculationContext,
|
||||||
|
ms_result: MatchResultPrediction,
|
||||||
|
) -> OtherMarketsPrediction:
|
||||||
|
if "handicap_ms" in ctx.xgboost_preds:
|
||||||
|
handicap_payload = ctx.xgboost_preds["handicap_ms"]
|
||||||
|
handicap_home_prob = float(handicap_payload.get("h1", 0.33))
|
||||||
|
handicap_draw_prob = float(handicap_payload.get("hx", 0.34))
|
||||||
|
handicap_away_prob = float(handicap_payload.get("h2", 0.33))
|
||||||
|
else:
|
||||||
|
xg_diff = ctx.home_xg - ctx.away_xg
|
||||||
|
threshold = float(self.config.get("handicap.xg_diff_threshold", 1.2))
|
||||||
|
if xg_diff > threshold:
|
||||||
|
handicap_home_prob, handicap_draw_prob, handicap_away_prob = 0.58, 0.24, 0.18
|
||||||
|
elif xg_diff < -threshold:
|
||||||
|
handicap_home_prob, handicap_draw_prob, handicap_away_prob = 0.18, 0.24, 0.58
|
||||||
|
else:
|
||||||
|
handicap_home_prob, handicap_draw_prob, handicap_away_prob = 0.28, 0.44, 0.28
|
||||||
|
|
||||||
|
handicap_confidence = max(
|
||||||
|
handicap_home_prob,
|
||||||
|
handicap_draw_prob,
|
||||||
|
handicap_away_prob,
|
||||||
|
) * 100.0
|
||||||
|
if handicap_home_prob >= handicap_draw_prob and handicap_home_prob >= handicap_away_prob:
|
||||||
|
handicap_pick = "H 1 (Ev -1)"
|
||||||
|
elif handicap_away_prob >= handicap_home_prob and handicap_away_prob >= handicap_draw_prob:
|
||||||
|
handicap_pick = "H 2 (Dep -1)"
|
||||||
|
else:
|
||||||
|
handicap_pick = "H 0 (Beraberlik)"
|
||||||
|
|
||||||
|
total_corners = 0.0
|
||||||
|
corner_pick = None
|
||||||
|
|
||||||
|
card_line = float(self.config.get("cards.line", 4.5))
|
||||||
|
if "cards_ou45" in ctx.xgboost_preds:
|
||||||
|
cards_over_prob = float(ctx.xgboost_preds["cards_ou45"])
|
||||||
|
total_cards = 5.0 if cards_over_prob > 0.50 else 3.5
|
||||||
|
else:
|
||||||
|
referee_average = float(ctx.referee_pred.avg_yellow_cards)
|
||||||
|
match_heat = 1.0
|
||||||
|
is_derby = bool(
|
||||||
|
ctx.upset_factors.reasoning
|
||||||
|
and "DERBY" in str(ctx.upset_factors.reasoning[0]),
|
||||||
|
)
|
||||||
|
if is_derby:
|
||||||
|
match_heat = float(self.config.get("cards.derby_heat_factor", 1.3))
|
||||||
|
total_cards = referee_average * match_heat
|
||||||
|
delta = total_cards - card_line
|
||||||
|
cards_over_prob = 1.0 / (1.0 + math.exp(-delta * 0.9))
|
||||||
|
|
||||||
|
cards_over_prob = max(0.02, min(0.98, cards_over_prob))
|
||||||
|
cards_under_prob = 1.0 - cards_over_prob
|
||||||
|
cards_confidence = max(cards_over_prob, cards_under_prob) * 100.0
|
||||||
|
card_pick = f"{card_line} Ust" if cards_over_prob > 0.50 else f"{card_line} Alt"
|
||||||
|
|
||||||
|
lambda_total = ctx.total_xg
|
||||||
|
even_prob = math.exp(-lambda_total) * math.cosh(lambda_total)
|
||||||
|
if "odd_even" in ctx.xgboost_preds:
|
||||||
|
xgb_weight = float(self.config.get("xgboost.weight_ou", 0.60))
|
||||||
|
xgb_even_prob = float(ctx.xgboost_preds["odd_even"])
|
||||||
|
even_prob = even_prob * (1 - xgb_weight) + xgb_even_prob * xgb_weight
|
||||||
|
|
||||||
|
even_prob = max(0.02, min(0.98, even_prob))
|
||||||
|
odd_prob = 1.0 - even_prob
|
||||||
|
odd_even_pick = "Cift" if even_prob > 0.5 else "Tek"
|
||||||
|
|
||||||
|
return OtherMarketsPrediction(
|
||||||
|
total_corners_pred=total_corners,
|
||||||
|
corner_pick=corner_pick,
|
||||||
|
total_cards_pred=total_cards,
|
||||||
|
card_pick=card_pick,
|
||||||
|
cards_over_prob=cards_over_prob,
|
||||||
|
cards_under_prob=cards_under_prob,
|
||||||
|
cards_confidence=cards_confidence,
|
||||||
|
handicap_pick=handicap_pick,
|
||||||
|
handicap_home_prob=handicap_home_prob,
|
||||||
|
handicap_draw_prob=handicap_draw_prob,
|
||||||
|
handicap_away_prob=handicap_away_prob,
|
||||||
|
handicap_confidence=handicap_confidence,
|
||||||
|
odd_even_pick=odd_even_pick,
|
||||||
|
odd_prob=odd_prob,
|
||||||
|
even_prob=even_prob,
|
||||||
|
)
|
||||||
+174
@@ -0,0 +1,174 @@
|
|||||||
|
import math
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from .base_calculator import BaseCalculator, CalculationContext
|
||||||
|
from .confidence import calc_confidence_2way
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OverUnderPrediction:
|
||||||
|
over_15_prob: float
|
||||||
|
under_15_prob: float
|
||||||
|
ou15_pick: str
|
||||||
|
ou15_confidence: float
|
||||||
|
|
||||||
|
over_25_prob: float
|
||||||
|
under_25_prob: float
|
||||||
|
ou25_pick: str
|
||||||
|
ou25_confidence: float
|
||||||
|
|
||||||
|
over_35_prob: float
|
||||||
|
under_35_prob: float
|
||||||
|
ou35_pick: str
|
||||||
|
ou35_confidence: float
|
||||||
|
|
||||||
|
btts_yes_prob: float
|
||||||
|
btts_no_prob: float
|
||||||
|
btts_pick: str
|
||||||
|
btts_confidence: float
|
||||||
|
|
||||||
|
|
||||||
|
class OverUnderCalculator(BaseCalculator):
|
||||||
|
|
||||||
|
def _poisson_pmf(self, k: int, lam: float) -> float:
|
||||||
|
if lam <= 0:
|
||||||
|
return 1.0 if k == 0 else 0.0
|
||||||
|
return (lam ** k) * math.exp(-lam) / math.factorial(k)
|
||||||
|
|
||||||
|
def _poisson_ou_probs(self, home_xg: float, away_xg: float, grid_max: int = 6):
|
||||||
|
"""Bivariate Poisson grid → O/U probabilities."""
|
||||||
|
total_goals_prob = {} # total_goals → cumulative probability
|
||||||
|
|
||||||
|
for i in range(grid_max):
|
||||||
|
for j in range(grid_max):
|
||||||
|
p = self._poisson_pmf(i, home_xg) * self._poisson_pmf(j, away_xg)
|
||||||
|
total = i + j
|
||||||
|
total_goals_prob[total] = total_goals_prob.get(total, 0.0) + p
|
||||||
|
|
||||||
|
# Cumulative
|
||||||
|
over_15 = sum(p for g, p in total_goals_prob.items() if g >= 2)
|
||||||
|
over_25 = sum(p for g, p in total_goals_prob.items() if g >= 3)
|
||||||
|
over_35 = sum(p for g, p in total_goals_prob.items() if g >= 4)
|
||||||
|
|
||||||
|
# BTTS: P(home >= 1) * P(away >= 1)
|
||||||
|
p_home_0 = self._poisson_pmf(0, home_xg)
|
||||||
|
p_away_0 = self._poisson_pmf(0, away_xg)
|
||||||
|
btts_yes = (1 - p_home_0) * (1 - p_away_0)
|
||||||
|
|
||||||
|
return over_15, over_25, over_35, btts_yes
|
||||||
|
|
||||||
|
def calculate(self, ctx: CalculationContext) -> OverUnderPrediction:
|
||||||
|
odds_pred = ctx.odds_pred
|
||||||
|
referee_mods = ctx.referee_mods
|
||||||
|
|
||||||
|
# Config
|
||||||
|
prob_min = self.config.get("over_under.prob_min", 0.02)
|
||||||
|
prob_max = self.config.get("over_under.prob_max", 0.98)
|
||||||
|
blend_w = self.config.get("over_under.poisson_blend_weight", 0.4)
|
||||||
|
grid_max = self.config.get("over_under.poisson_grid_max", 6)
|
||||||
|
|
||||||
|
ou15_thr = self.config.get("over_under.ou15_threshold", 0.55)
|
||||||
|
ou25_thr = self.config.get("over_under.ou25_threshold", 0.52)
|
||||||
|
ou35_thr = self.config.get("over_under.ou35_threshold", 0.48)
|
||||||
|
btts_thr = self.config.get("over_under.btts_threshold", 0.58)
|
||||||
|
|
||||||
|
# 1. Poisson-based O/U from context xG (team + odds average)
|
||||||
|
p_over_15, p_over_25, p_over_35, p_btts = self._poisson_ou_probs(
|
||||||
|
ctx.home_xg, ctx.away_xg, int(grid_max)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Odds-based O/U (from odds engine Poisson)
|
||||||
|
o_over_15 = odds_pred.over_15_prob
|
||||||
|
o_over_25 = odds_pred.over_25_prob
|
||||||
|
o_over_35 = odds_pred.over_35_prob
|
||||||
|
o_btts = odds_pred.btts_yes_prob
|
||||||
|
|
||||||
|
# 3. Blend: poisson xG + odds Poisson
|
||||||
|
# Odds engine already uses Poisson internally, so keep blend weight low
|
||||||
|
# to avoid double-counting. Use majority odds weight for established markets.
|
||||||
|
over_15 = p_over_15 * blend_w + o_over_15 * (1 - blend_w)
|
||||||
|
over_25 = p_over_25 * blend_w + o_over_25 * (1 - blend_w)
|
||||||
|
over_35 = p_over_35 * blend_w + o_over_35 * (1 - blend_w)
|
||||||
|
|
||||||
|
# BTTS: keep primarily from odds engine (it was 63.6% accurate before)
|
||||||
|
# Only a small Poisson contribution to cross-validate
|
||||||
|
btts_blend = min(blend_w, 0.2)
|
||||||
|
btts_yes = p_btts * btts_blend + o_btts * (1 - btts_blend)
|
||||||
|
|
||||||
|
# XGBoost Integration (High Weight)
|
||||||
|
w_xgb = self.config.get("xgboost.weight_ou", 0.70)
|
||||||
|
|
||||||
|
if "ou25" in ctx.xgboost_preds:
|
||||||
|
over_25 = over_25 * (1 - w_xgb) + ctx.xgboost_preds["ou25"] * w_xgb
|
||||||
|
|
||||||
|
if "ou15" in ctx.xgboost_preds:
|
||||||
|
over_15 = over_15 * (1 - w_xgb) + ctx.xgboost_preds["ou15"] * w_xgb
|
||||||
|
|
||||||
|
if "ou35" in ctx.xgboost_preds:
|
||||||
|
over_35 = over_35 * (1 - w_xgb) + ctx.xgboost_preds["ou35"] * w_xgb
|
||||||
|
|
||||||
|
# BTTS: lower XGBoost weight (was 0.70) — Poisson/odds fundamentals matter more
|
||||||
|
w_xgb_btts = self.config.get("xgboost.weight_btts", 0.45)
|
||||||
|
if "btts" in ctx.xgboost_preds:
|
||||||
|
btts_yes = btts_yes * (1 - w_xgb_btts) + ctx.xgboost_preds["btts"] * w_xgb_btts
|
||||||
|
|
||||||
|
# 4. Referee modifier (only applied to goal totals, not BTTS)
|
||||||
|
ou_mod = referee_mods.get("over_25_modifier", 1.0)
|
||||||
|
over_15 *= ou_mod
|
||||||
|
over_25 *= ou_mod
|
||||||
|
over_35 *= ou_mod
|
||||||
|
|
||||||
|
# 5. Clamp
|
||||||
|
over_15 = max(prob_min, min(prob_max, over_15))
|
||||||
|
over_25 = max(prob_min, min(prob_max, over_25))
|
||||||
|
over_35 = max(prob_min, min(prob_max, over_35))
|
||||||
|
btts_yes = max(prob_min, min(prob_max, btts_yes))
|
||||||
|
|
||||||
|
# Picks & Confidence
|
||||||
|
ou15_pick = "Üst 1.5" if over_15 > ou15_thr else "Alt 1.5"
|
||||||
|
ou15_conf = calc_confidence_2way(over_15)
|
||||||
|
|
||||||
|
ou25_pick = "Üst 2.5" if over_25 > ou25_thr else "Alt 2.5"
|
||||||
|
ou25_conf = calc_confidence_2way(over_25)
|
||||||
|
|
||||||
|
ou35_pick = "Üst 3.5" if over_35 > ou35_thr else "Alt 3.5"
|
||||||
|
ou35_conf = calc_confidence_2way(over_35)
|
||||||
|
|
||||||
|
btts_pick = "KG Var" if btts_yes > btts_thr else "KG Yok"
|
||||||
|
btts_conf = calc_confidence_2way(btts_yes)
|
||||||
|
|
||||||
|
# --- SAFE BTTS PENALTY (v2 — tighter thresholds) ---
|
||||||
|
# Penalize BTTS confidence when fundamentals don't strongly support the pick.
|
||||||
|
try:
|
||||||
|
home_conceded = ctx.team_pred.raw_features.get("home_conceded_avg", 1.0)
|
||||||
|
away_conceded = ctx.team_pred.raw_features.get("away_conceded_avg", 1.0)
|
||||||
|
|
||||||
|
if btts_pick == "KG Var":
|
||||||
|
# "Var" needs BOTH teams to score → requires strong attack OR leaky defense
|
||||||
|
# Penalty if either xG is low AND defenses are solid
|
||||||
|
weak_attack = ctx.home_xg < 1.30 or ctx.away_xg < 1.15
|
||||||
|
solid_defense = home_conceded < 1.15 or away_conceded < 1.15
|
||||||
|
if weak_attack and solid_defense:
|
||||||
|
btts_conf *= 0.3
|
||||||
|
else: # KG Yok
|
||||||
|
# "Yok" needs at least one team to fail scoring
|
||||||
|
# Penalty if both have good xG AND both defenses are leaky
|
||||||
|
if ctx.home_xg >= 1.30 and ctx.away_xg >= 1.15 and home_conceded >= 1.20 and away_conceded >= 1.20:
|
||||||
|
btts_conf *= 0.3
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Safe BTTS Check Error: {e}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
return OverUnderPrediction(
|
||||||
|
over_15_prob=over_15, under_15_prob=1-over_15,
|
||||||
|
ou15_pick=ou15_pick, ou15_confidence=ou15_conf,
|
||||||
|
|
||||||
|
over_25_prob=over_25, under_25_prob=1-over_25,
|
||||||
|
ou25_pick=ou25_pick, ou25_confidence=ou25_conf,
|
||||||
|
|
||||||
|
over_35_prob=over_35, under_35_prob=1-over_35,
|
||||||
|
ou35_pick=ou35_pick, ou35_confidence=ou35_conf,
|
||||||
|
|
||||||
|
btts_yes_prob=btts_yes, btts_no_prob=1-btts_yes,
|
||||||
|
btts_pick=btts_pick, btts_confidence=btts_conf
|
||||||
|
)
|
||||||
Executable
+278
@@ -0,0 +1,278 @@
|
|||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Dict, Any, List, Tuple
|
||||||
|
from .base_calculator import BaseCalculator, CalculationContext
|
||||||
|
from .odds_anomaly_detector import OddsAnomalyDetector
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RiskAnalysis:
|
||||||
|
risk_score: float
|
||||||
|
risk_level: str
|
||||||
|
is_surprise_risk: bool
|
||||||
|
reasons: List[str] = field(default_factory=list)
|
||||||
|
surprise_type: str = ""
|
||||||
|
risk_warnings: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
class RiskAssessor(BaseCalculator):
|
||||||
|
"""
|
||||||
|
Assesses risk level of the match based on context and predictions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict):
|
||||||
|
super().__init__(config)
|
||||||
|
self.anomaly_detector = OddsAnomalyDetector(config)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _safe_odd(value: Any) -> float:
|
||||||
|
try:
|
||||||
|
odd = float(value)
|
||||||
|
return odd if odd > 1.01 else 0.0
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
def _favorite_profile_from_odds(self, odds_data: Dict[str, float]) -> Tuple[str, float]:
|
||||||
|
"""
|
||||||
|
Returns (favorite_side, gap_to_second_favorite).
|
||||||
|
favorite_side: H, A, D, or U (unknown)
|
||||||
|
"""
|
||||||
|
ms_h = self._safe_odd((odds_data or {}).get("ms_h"))
|
||||||
|
ms_d = self._safe_odd((odds_data or {}).get("ms_d"))
|
||||||
|
ms_a = self._safe_odd((odds_data or {}).get("ms_a"))
|
||||||
|
|
||||||
|
candidates = [(side, odd) for side, odd in (("H", ms_h), ("D", ms_d), ("A", ms_a)) if odd > 0.0]
|
||||||
|
if len(candidates) < 2:
|
||||||
|
return "U", 0.0
|
||||||
|
|
||||||
|
candidates.sort(key=lambda item: item[1])
|
||||||
|
favorite_side, favorite_odd = candidates[0]
|
||||||
|
second_odd = candidates[1][1]
|
||||||
|
return favorite_side, max(0.0, second_odd - favorite_odd)
|
||||||
|
|
||||||
|
def _dynamic_reversal_threshold(
|
||||||
|
self,
|
||||||
|
ctx: CalculationContext,
|
||||||
|
top_label: str,
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Dynamic threshold for reversal surprise flags.
|
||||||
|
Lower threshold => easier to trigger surprise.
|
||||||
|
"""
|
||||||
|
base_threshold = float(self.config.get("risk.surprise_threshold", 0.20))
|
||||||
|
sport_key = (ctx.sport or "football").lower().strip()
|
||||||
|
is_top_league = bool(getattr(ctx, "is_top_league", False))
|
||||||
|
|
||||||
|
if not is_top_league:
|
||||||
|
base_threshold = float(
|
||||||
|
self.config.get("risk.surprise_threshold_non_top", base_threshold + 0.04),
|
||||||
|
)
|
||||||
|
|
||||||
|
if sport_key == "basketball":
|
||||||
|
if is_top_league:
|
||||||
|
return float(
|
||||||
|
self.config.get("risk.surprise_threshold_basketball_top", self.config.get("risk.surprise_threshold_basketball", 0.30)),
|
||||||
|
)
|
||||||
|
return float(
|
||||||
|
self.config.get("risk.surprise_threshold_basketball_non_top", 0.34),
|
||||||
|
)
|
||||||
|
|
||||||
|
if top_label not in ("1/2", "2/1"):
|
||||||
|
return base_threshold
|
||||||
|
|
||||||
|
winner_side = "A" if top_label == "1/2" else "H"
|
||||||
|
favorite_side, gap = self._favorite_profile_from_odds(ctx.odds_data)
|
||||||
|
|
||||||
|
if is_top_league:
|
||||||
|
favorite_winner_threshold = float(
|
||||||
|
self.config.get(
|
||||||
|
"risk.surprise_threshold_favorite_reversal_top",
|
||||||
|
self.config.get("risk.surprise_threshold_favorite_reversal", 0.26),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
underdog_winner_threshold = float(
|
||||||
|
self.config.get(
|
||||||
|
"risk.surprise_threshold_underdog_reversal_top",
|
||||||
|
self.config.get("risk.surprise_threshold_underdog_reversal", 0.20),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
favorite_winner_threshold = float(
|
||||||
|
self.config.get("risk.surprise_threshold_favorite_reversal_non_top", 0.30),
|
||||||
|
)
|
||||||
|
underdog_winner_threshold = float(
|
||||||
|
self.config.get("risk.surprise_threshold_underdog_reversal_non_top", 0.24),
|
||||||
|
)
|
||||||
|
gap_medium = float(self.config.get("risk.htft_reversal_gap_medium", 0.50))
|
||||||
|
gap_strong = float(self.config.get("risk.htft_reversal_gap_strong", 1.00))
|
||||||
|
|
||||||
|
if favorite_side in ("H", "A"):
|
||||||
|
threshold = (
|
||||||
|
favorite_winner_threshold
|
||||||
|
if winner_side == favorite_side
|
||||||
|
else underdog_winner_threshold
|
||||||
|
)
|
||||||
|
if winner_side != favorite_side and gap >= gap_strong:
|
||||||
|
threshold += 0.03
|
||||||
|
elif winner_side != favorite_side and gap >= gap_medium:
|
||||||
|
threshold += 0.015
|
||||||
|
return threshold
|
||||||
|
|
||||||
|
return base_threshold
|
||||||
|
|
||||||
|
def calculate(self, ctx: CalculationContext, ms_result=None) -> RiskAnalysis:
|
||||||
|
"""
|
||||||
|
Wrapper for assess_risk to match BaseCalculator interface but with extra arg.
|
||||||
|
"""
|
||||||
|
return self.assess_risk(ctx)
|
||||||
|
|
||||||
|
def assess_risk(self, ctx: CalculationContext) -> RiskAnalysis:
|
||||||
|
"""
|
||||||
|
Calculate risk score and level.
|
||||||
|
Returns RiskAnalysis object.
|
||||||
|
"""
|
||||||
|
score = 5.0
|
||||||
|
reasons = []
|
||||||
|
is_surprise = ctx.is_surprise
|
||||||
|
surprise_type = ""
|
||||||
|
|
||||||
|
# 1. League deviation (from UpsetEngine)
|
||||||
|
if ctx.is_surprise:
|
||||||
|
score += 2.0
|
||||||
|
reasons.append("High Upset Potential detected by UpsetEngine")
|
||||||
|
|
||||||
|
# 1.5 Odds Anomaly Detection
|
||||||
|
try:
|
||||||
|
home_conceded = ctx.team_pred.raw_features.get("home_conceded_avg", 1.0)
|
||||||
|
away_conceded = ctx.team_pred.raw_features.get("away_conceded_avg", 1.0)
|
||||||
|
|
||||||
|
has_anomaly, anomaly_res = self.anomaly_detector.detect_trap(
|
||||||
|
ctx.odds_data,
|
||||||
|
ctx.home_xg,
|
||||||
|
ctx.away_xg,
|
||||||
|
home_conceded,
|
||||||
|
away_conceded
|
||||||
|
)
|
||||||
|
|
||||||
|
if has_anomaly:
|
||||||
|
is_surprise = True
|
||||||
|
score += anomaly_res.severity + 2.0
|
||||||
|
surprise_type = "Bookmaker Trap"
|
||||||
|
reasons.append(anomaly_res.reason)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Odds Anomaly Detection Error: {e}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 2. HT/FT Surprise Hunter (XGBoost)
|
||||||
|
# We look for 1/2 (idx 2) and 2/1 (idx 6) from the V20 HT/FT model
|
||||||
|
if "ht_ft" in ctx.xgboost_preds:
|
||||||
|
ht_ft = ctx.xgboost_preds["ht_ft"]
|
||||||
|
valid_items = [(k, float(v)) for k, v in ht_ft.items() if isinstance(v, (int, float))]
|
||||||
|
if valid_items:
|
||||||
|
ranked = sorted(valid_items, key=lambda item: item[1], reverse=True)
|
||||||
|
top_label, top_prob = ranked[0]
|
||||||
|
second_prob = ranked[1][1] if len(ranked) > 1 else 0.0
|
||||||
|
top_gap = top_prob - second_prob
|
||||||
|
|
||||||
|
threshold = self._dynamic_reversal_threshold(ctx, top_label)
|
||||||
|
if getattr(ctx, "is_top_league", False):
|
||||||
|
min_gap = float(self.config.get("risk.surprise_min_top_gap_top", self.config.get("risk.surprise_min_top_gap", 0.02)))
|
||||||
|
else:
|
||||||
|
min_gap = float(self.config.get("risk.surprise_min_top_gap_non_top", 0.03))
|
||||||
|
|
||||||
|
# Trigger surprise only when reversal class is:
|
||||||
|
# - top HT/FT outcome
|
||||||
|
# - above dynamic threshold
|
||||||
|
# - separated from second class with a minimum gap
|
||||||
|
if top_label in ("1/2", "2/1") and top_prob > threshold and top_gap > min_gap:
|
||||||
|
is_surprise = True
|
||||||
|
score += 3.0
|
||||||
|
surprise_type = f"{top_label} Reversal"
|
||||||
|
reasons.append(
|
||||||
|
f"🔥 Surprise Hunter: {top_label} potential ({round(top_prob*100, 1)}%, gap {round(top_gap*100, 1)}pp)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# NEW: Potential Upset Alert - even if reversal is not the top prediction
|
||||||
|
# This catches cases like Bayern vs Augsburg where 1/2 was only 2% but it happened
|
||||||
|
favorite_side, gap = self._favorite_profile_from_odds(ctx.odds_data)
|
||||||
|
|
||||||
|
# Get reversal probabilities
|
||||||
|
prob_12 = float(ht_ft.get("1/2", 0))
|
||||||
|
prob_21 = float(ht_ft.get("2/1", 0))
|
||||||
|
|
||||||
|
# DYNAMIC threshold based on odds - stronger favorite = lower threshold
|
||||||
|
# When home odds are 1.30, even 1% reversal probability is significant
|
||||||
|
base_threshold = float(self.config.get("risk.upset_alert_threshold", 0.05))
|
||||||
|
|
||||||
|
# Calculate dynamic threshold based on favorite strength
|
||||||
|
if favorite_side == "H":
|
||||||
|
home_odds = float(ctx.odds_data.get("ms_h", 2.0))
|
||||||
|
# Stronger favorite (lower odds) = lower threshold
|
||||||
|
# 1.20 odds -> 0.01 threshold, 1.50 odds -> 0.03 threshold, 2.0+ odds -> base threshold
|
||||||
|
if home_odds <= 1.25:
|
||||||
|
dynamic_threshold = 0.01 # 1% - extremely strong favorite
|
||||||
|
elif home_odds <= 1.40:
|
||||||
|
dynamic_threshold = 0.015 # 1.5% - very strong favorite
|
||||||
|
elif home_odds <= 1.60:
|
||||||
|
dynamic_threshold = 0.02 # 2% - strong favorite
|
||||||
|
elif home_odds < 2.00:
|
||||||
|
dynamic_threshold = 0.03 # 3% - moderate favorite
|
||||||
|
else:
|
||||||
|
dynamic_threshold = base_threshold
|
||||||
|
elif favorite_side == "A":
|
||||||
|
away_odds = float(ctx.odds_data.get("ms_a", 2.0))
|
||||||
|
if away_odds <= 1.25:
|
||||||
|
dynamic_threshold = 0.01
|
||||||
|
elif away_odds <= 1.40:
|
||||||
|
dynamic_threshold = 0.015
|
||||||
|
elif away_odds <= 1.60:
|
||||||
|
dynamic_threshold = 0.02
|
||||||
|
elif away_odds < 2.00:
|
||||||
|
dynamic_threshold = 0.03
|
||||||
|
else:
|
||||||
|
dynamic_threshold = base_threshold
|
||||||
|
else:
|
||||||
|
dynamic_threshold = base_threshold
|
||||||
|
|
||||||
|
# Check for potential upset based on favorite
|
||||||
|
if favorite_side == "H" and prob_12 > dynamic_threshold:
|
||||||
|
# Home favorite, but 1/2 (home leads HT, away wins FT) has potential
|
||||||
|
is_surprise = True
|
||||||
|
score += 2.0
|
||||||
|
surprise_type = "1/2 Potential Upset"
|
||||||
|
reasons.append(
|
||||||
|
f"⚠️ UPSET ALERT: Home favorite ({ctx.odds_data.get('ms_h', 'N/A')}) but 1/2 reversal risk ({round(prob_12*100, 1)}% > {round(dynamic_threshold*100, 1)}% threshold)"
|
||||||
|
)
|
||||||
|
elif favorite_side == "A" and prob_21 > dynamic_threshold:
|
||||||
|
# Away favorite, but 2/1 (away leads HT, home wins FT) has potential
|
||||||
|
is_surprise = True
|
||||||
|
score += 2.0
|
||||||
|
surprise_type = "2/1 Potential Upset"
|
||||||
|
reasons.append(
|
||||||
|
f"⚠️ UPSET ALERT: Away favorite ({ctx.odds_data.get('ms_a', 'N/A')}) but 2/1 reversal risk ({round(prob_21*100, 1)}% > {round(dynamic_threshold*100, 1)}% threshold)"
|
||||||
|
)
|
||||||
|
elif gap > 0.5 and (prob_12 > dynamic_threshold or prob_21 > dynamic_threshold):
|
||||||
|
# Strong favorite (big odds gap) with any reversal potential
|
||||||
|
reversal_type = "1/2" if prob_12 > prob_21 else "2/1"
|
||||||
|
reversal_prob = max(prob_12, prob_21)
|
||||||
|
is_surprise = True
|
||||||
|
score += 1.5
|
||||||
|
surprise_type = f"{reversal_type} Potential Upset"
|
||||||
|
reasons.append(
|
||||||
|
f"⚠️ UPSET ALERT: Strong favorite (gap {round(gap, 2)}) with {reversal_type} risk ({round(reversal_prob*100, 1)}%)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Determine level
|
||||||
|
if score < 4.0:
|
||||||
|
level = "LOW"
|
||||||
|
elif score < 7.0:
|
||||||
|
level = "MEDIUM"
|
||||||
|
elif score < 9.0:
|
||||||
|
level = "HIGH"
|
||||||
|
else:
|
||||||
|
level = "EXTREME"
|
||||||
|
|
||||||
|
return RiskAnalysis(
|
||||||
|
risk_score=score,
|
||||||
|
risk_level=level,
|
||||||
|
is_surprise_risk=is_surprise,
|
||||||
|
surprise_type=surprise_type,
|
||||||
|
reasons=reasons
|
||||||
|
)
|
||||||
+229
@@ -0,0 +1,229 @@
|
|||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import pandas as pd
|
||||||
|
import xgboost as xgb
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Dict, Tuple
|
||||||
|
import math
|
||||||
|
from .base_calculator import BaseCalculator, CalculationContext
|
||||||
|
from .confidence import calc_confidence_3way, calc_confidence_dc
|
||||||
|
from .match_result_calculator import MatchResultPrediction
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScorePrediction:
|
||||||
|
predicted_ft_score: str
|
||||||
|
predicted_ht_score: str
|
||||||
|
ft_scores_top5: List[Dict]
|
||||||
|
|
||||||
|
# Reconciled MS/DC predictions (can be updated here)
|
||||||
|
reconciled_ms: MatchResultPrediction = None
|
||||||
|
|
||||||
|
class ScoreCalculator(BaseCalculator):
|
||||||
|
|
||||||
|
def __init__(self, config: Dict):
|
||||||
|
super().__init__(config)
|
||||||
|
self.xgb_home = None
|
||||||
|
self.xgb_away = None
|
||||||
|
self.xgb_ht_home = None
|
||||||
|
self.xgb_ht_away = None
|
||||||
|
self.scaler = None # If used
|
||||||
|
self.features = []
|
||||||
|
self._load_model()
|
||||||
|
|
||||||
|
def _load_model(self):
|
||||||
|
try:
|
||||||
|
model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "models", "xgb_score.pkl")
|
||||||
|
if os.path.exists(model_path):
|
||||||
|
with open(model_path, "rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
# Handle both dictionary and direct model formats (just in case)
|
||||||
|
if isinstance(data, dict):
|
||||||
|
self.xgb_home = data.get("home_model")
|
||||||
|
self.xgb_away = data.get("away_model")
|
||||||
|
self.xgb_ht_home = data.get("ht_home_model")
|
||||||
|
self.xgb_ht_away = data.get("ht_away_model")
|
||||||
|
self.features = data.get("features", [])
|
||||||
|
else:
|
||||||
|
print("⚠️ Unexpected XGB score model format.")
|
||||||
|
print("✅ XGBoost Score Model loaded.")
|
||||||
|
else:
|
||||||
|
print(f"⚠️ XGBoost Score Model not found at {model_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error loading XGBoost Score Model: {e}")
|
||||||
|
|
||||||
|
def _poisson_pmf(self, k, lam):
|
||||||
|
"""Poisson probability mass function."""
|
||||||
|
if lam <= 0:
|
||||||
|
return 1.0 if k == 0 else 0.0
|
||||||
|
return (lam ** k) * math.exp(-lam) / math.factorial(k)
|
||||||
|
|
||||||
|
def calculate(self, ctx: CalculationContext, ms_result: MatchResultPrediction) -> ScorePrediction:
|
||||||
|
# Default Lambdas (fallback)
|
||||||
|
lambda_home = max(0.5, ctx.home_xg)
|
||||||
|
lambda_away = max(0.5, ctx.away_xg)
|
||||||
|
|
||||||
|
# --- XGBOOST PREDICTION ---
|
||||||
|
if self.xgb_home and self.xgb_away and hasattr(ctx.team_pred, "raw_features"):
|
||||||
|
try:
|
||||||
|
# 1. Prepare Features
|
||||||
|
# We need to map ctx data to self.features list columns
|
||||||
|
raw = ctx.team_pred.raw_features
|
||||||
|
odds = ctx.odds_data or {}
|
||||||
|
|
||||||
|
# Use unified feature adapter for exact 56-feature sync
|
||||||
|
from features.feature_adapter import get_feature_adapter
|
||||||
|
df_input = get_feature_adapter().get_features(ctx)
|
||||||
|
|
||||||
|
# Predict FT
|
||||||
|
pred_h = self.xgb_home.predict(df_input)[0]
|
||||||
|
pred_a = self.xgb_away.predict(df_input)[0]
|
||||||
|
|
||||||
|
# Predict HT (if available)
|
||||||
|
if self.xgb_ht_home and self.xgb_ht_away:
|
||||||
|
pred_ht_h = self.xgb_ht_home.predict(df_input)[0]
|
||||||
|
pred_ht_a = self.xgb_ht_away.predict(df_input)[0]
|
||||||
|
|
||||||
|
# Clamp HT predictions (min 0, and shouldn't exceed FT in logic, but models are independent)
|
||||||
|
# We trust the model but ensure sanity (HT <= FT is hard to enforce without joint training, but usually holds)
|
||||||
|
ht_h_val = max(0.0, float(pred_ht_h))
|
||||||
|
ht_a_val = max(0.0, float(pred_ht_a))
|
||||||
|
|
||||||
|
predicted_ht = f"{round(ht_h_val)}-{round(ht_a_val)}"
|
||||||
|
else:
|
||||||
|
# Fallback if HT models missing
|
||||||
|
ht_h_val = max(0.0, float(pred_h) * 0.42)
|
||||||
|
ht_a_val = max(0.0, float(pred_a) * 0.42)
|
||||||
|
predicted_ht = f"{round(ht_h_val)}-{round(ht_a_val)}"
|
||||||
|
|
||||||
|
# Update lambdas with ML predictions
|
||||||
|
lambda_home = max(0.1, min(6.0, float(pred_h)))
|
||||||
|
lambda_away = max(0.1, min(6.0, float(pred_a)))
|
||||||
|
|
||||||
|
# Store raw XGB preds in context
|
||||||
|
ctx.xgboost_preds["score"] = {
|
||||||
|
"home": lambda_home,
|
||||||
|
"away": lambda_away,
|
||||||
|
"ht_home": ht_h_val,
|
||||||
|
"ht_away": ht_a_val
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ XGBoost Score Prediction failed: {e}. Falling back to Poisson xG.")
|
||||||
|
# Fallback to current simple logic if ML fails
|
||||||
|
predicted_ht = f"{round(lambda_home * 0.42)}-{round(lambda_away * 0.42)}"
|
||||||
|
|
||||||
|
# --- POISSON GRID GENERATION ---
|
||||||
|
# Now use lambda_home/away (either ML or fallback) to generate grid
|
||||||
|
score_probs = {}
|
||||||
|
grid_max = self.config.get("score.poisson_grid_max", 7)
|
||||||
|
|
||||||
|
for i in range(grid_max):
|
||||||
|
for j in range(grid_max):
|
||||||
|
p = self._poisson_pmf(i, lambda_home) * self._poisson_pmf(j, lambda_away)
|
||||||
|
score_probs[f"{i}-{j}"] = round(p * 100, 2)
|
||||||
|
|
||||||
|
sorted_scores = sorted(score_probs.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
# --- DERIVE MS PROBS FROM SCORES (CONSISTENCY CHECK) ---
|
||||||
|
poisson_ms_home = sum(p for s, p in score_probs.items()
|
||||||
|
for h, a in [s.split("-")] if int(h) > int(a))
|
||||||
|
poisson_ms_away = sum(p for s, p in score_probs.items()
|
||||||
|
for h, a in [s.split("-")] if int(h) < int(a))
|
||||||
|
poisson_ms_draw = sum(p for s, p in score_probs.items()
|
||||||
|
for h, a in [s.split("-")] if int(h) == int(a))
|
||||||
|
|
||||||
|
# Normalize
|
||||||
|
poisson_total = poisson_ms_home + poisson_ms_away + poisson_ms_draw
|
||||||
|
if poisson_total > 0:
|
||||||
|
poisson_ms_home /= poisson_total
|
||||||
|
poisson_ms_away /= poisson_total
|
||||||
|
poisson_ms_draw /= poisson_total
|
||||||
|
|
||||||
|
# --- HYBRID RECONCILIATION ---
|
||||||
|
|
||||||
|
threshold = self.config.get("score.ms_confidence_threshold", 15.0)
|
||||||
|
reconciled_result = ms_result
|
||||||
|
|
||||||
|
# If original confidence is low, trust new Score Model more
|
||||||
|
if ms_result.ms_confidence < threshold:
|
||||||
|
poisson_probs = [(poisson_ms_home, "1"), (poisson_ms_draw, "X"), (poisson_ms_away, "2")]
|
||||||
|
poisson_sorted = sorted(poisson_probs, key=lambda x: x[0], reverse=True)
|
||||||
|
|
||||||
|
new_ms_pick = poisson_sorted[0][1]
|
||||||
|
new_ms_conf = calc_confidence_3way(poisson_sorted[0][0])
|
||||||
|
|
||||||
|
# Recalculate DC
|
||||||
|
dc_1x = poisson_ms_home + poisson_ms_draw
|
||||||
|
dc_x2 = poisson_ms_draw + poisson_ms_away
|
||||||
|
dc_12 = poisson_ms_home + poisson_ms_away
|
||||||
|
|
||||||
|
dc_probs = [(dc_1x, "1X"), (dc_x2, "X2"), (dc_12, "12")]
|
||||||
|
dc_sorted = sorted(dc_probs, key=lambda x: x[0], reverse=True)
|
||||||
|
new_dc_pick = dc_sorted[0][1]
|
||||||
|
new_dc_conf = calc_confidence_dc(dc_sorted[0][0])
|
||||||
|
|
||||||
|
reconciled_result = MatchResultPrediction(
|
||||||
|
ms_home_prob=poisson_ms_home,
|
||||||
|
ms_draw_prob=poisson_ms_draw,
|
||||||
|
ms_away_prob=poisson_ms_away,
|
||||||
|
ms_pick=new_ms_pick,
|
||||||
|
ms_confidence=new_ms_conf,
|
||||||
|
dc_1x_prob=dc_1x,
|
||||||
|
dc_x2_prob=dc_x2,
|
||||||
|
dc_12_prob=dc_12,
|
||||||
|
dc_pick=new_dc_pick,
|
||||||
|
dc_confidence=new_dc_conf
|
||||||
|
)
|
||||||
|
|
||||||
|
# Select best score that matches MS Pick
|
||||||
|
# NEW LOGIC: We trust XGBoost/Poisson top score over generic MS Pick if MS Confidence is low.
|
||||||
|
# Otherwise, we filter the grid to match the MS pick.
|
||||||
|
ms_pick = reconciled_result.ms_pick
|
||||||
|
|
||||||
|
def _score_matches_ms(score_str, pick):
|
||||||
|
h, a = map(int, score_str.split("-"))
|
||||||
|
if pick == "1": return h > a
|
||||||
|
if pick == "2": return h < a
|
||||||
|
return h == a
|
||||||
|
|
||||||
|
matching_scores = [(s, p) for s, p in sorted_scores if _score_matches_ms(s, ms_pick)]
|
||||||
|
|
||||||
|
# Primary Prediction Strategy:
|
||||||
|
# If MS pick is highly confident, enforce it.
|
||||||
|
# But if the absolute best score in the grid contradicts it and has a high probability (e.g. >10%), trust the score model directly.
|
||||||
|
top_overall_score, top_overall_prob = sorted_scores[0]
|
||||||
|
|
||||||
|
if matching_scores and not (top_overall_prob > 12.0 and not _score_matches_ms(top_overall_score, ms_pick)):
|
||||||
|
predicted_ft = matching_scores[0][0]
|
||||||
|
else:
|
||||||
|
predicted_ft = top_overall_score
|
||||||
|
|
||||||
|
# If we didn't calculate HT via ML (exception case), do it now
|
||||||
|
if 'predicted_ht' not in locals():
|
||||||
|
ft_to_ht = self.config.get("half_time.ft_to_ht_ratio", 0.42)
|
||||||
|
ht_h = round(lambda_home * ft_to_ht)
|
||||||
|
ht_a = round(lambda_away * ft_to_ht)
|
||||||
|
predicted_ht = f"{ht_h}-{ht_a}"
|
||||||
|
|
||||||
|
# --- CONSISTENCY CHECK ---
|
||||||
|
# Ensure HT score <= FT score
|
||||||
|
try:
|
||||||
|
ft_h, ft_a = map(int, predicted_ft.split("-"))
|
||||||
|
ht_h, ht_a = map(int, predicted_ht.split("-"))
|
||||||
|
|
||||||
|
# Clamp HT values
|
||||||
|
ht_h = min(ht_h, ft_h)
|
||||||
|
ht_a = min(ht_a, ft_a)
|
||||||
|
|
||||||
|
predicted_ht = f"{ht_h}-{ht_a}"
|
||||||
|
except ValueError:
|
||||||
|
pass # Malformed score string, ignore correction
|
||||||
|
|
||||||
|
ft_scores = [{"score": s, "prob": p} for s, p in sorted_scores[:5]]
|
||||||
|
|
||||||
|
return ScorePrediction(
|
||||||
|
predicted_ft_score=predicted_ft,
|
||||||
|
predicted_ht_score=predicted_ht,
|
||||||
|
ft_scores_top5=ft_scores,
|
||||||
|
reconciled_ms=reconciled_result
|
||||||
|
)
|
||||||
Executable
+16
@@ -0,0 +1,16 @@
|
|||||||
|
# ai-engine/core/engines/__init__.py
|
||||||
|
"""
|
||||||
|
V20 Ensemble Prediction Engines
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .team_predictor import TeamPredictorEngine, get_team_predictor
|
||||||
|
from .player_predictor import PlayerPredictorEngine, get_player_predictor
|
||||||
|
from .odds_predictor import OddsPredictorEngine, get_odds_predictor
|
||||||
|
from .referee_predictor import RefereePredictorEngine, get_referee_predictor
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"TeamPredictorEngine", "get_team_predictor",
|
||||||
|
"PlayerPredictorEngine", "get_player_predictor",
|
||||||
|
"OddsPredictorEngine", "get_odds_predictor",
|
||||||
|
"RefereePredictorEngine", "get_referee_predictor"
|
||||||
|
]
|
||||||
Executable
+237
@@ -0,0 +1,237 @@
|
|||||||
|
"""
|
||||||
|
Odds Predictor Engine - V20 Ensemble Component
|
||||||
|
Uses market odds and Poisson mathematics for predictions.
|
||||||
|
|
||||||
|
Weight: 30% in ensemble
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Dict, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from features.poisson_engine import get_poisson_engine
|
||||||
|
from features.value_calculator import get_value_calculator
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OddsPrediction:
|
||||||
|
"""Odds engine prediction output."""
|
||||||
|
# Market-implied probabilities
|
||||||
|
market_home_prob: float = 0.33
|
||||||
|
market_draw_prob: float = 0.33
|
||||||
|
market_away_prob: float = 0.33
|
||||||
|
|
||||||
|
# Poisson xG
|
||||||
|
poisson_home_xg: float = 1.3
|
||||||
|
poisson_away_xg: float = 1.1
|
||||||
|
|
||||||
|
# Over/Under probabilities
|
||||||
|
over_15_prob: float = 0.75
|
||||||
|
over_25_prob: float = 0.55
|
||||||
|
over_35_prob: float = 0.30
|
||||||
|
|
||||||
|
# BTTS
|
||||||
|
btts_yes_prob: float = 0.50
|
||||||
|
|
||||||
|
# Most likely scores
|
||||||
|
most_likely_score: str = "1-1"
|
||||||
|
second_likely_score: str = "1-0"
|
||||||
|
third_likely_score: str = "2-1"
|
||||||
|
|
||||||
|
# Value bet opportunities
|
||||||
|
value_bets: list = None
|
||||||
|
|
||||||
|
confidence: float = 0.0
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if self.value_bets is None:
|
||||||
|
self.value_bets = []
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"market_home_prob": round(self.market_home_prob * 100, 1),
|
||||||
|
"market_draw_prob": round(self.market_draw_prob * 100, 1),
|
||||||
|
"market_away_prob": round(self.market_away_prob * 100, 1),
|
||||||
|
"poisson_home_xg": round(self.poisson_home_xg, 2),
|
||||||
|
"poisson_away_xg": round(self.poisson_away_xg, 2),
|
||||||
|
"over_15_prob": round(self.over_15_prob * 100, 1),
|
||||||
|
"over_25_prob": round(self.over_25_prob * 100, 1),
|
||||||
|
"over_35_prob": round(self.over_35_prob * 100, 1),
|
||||||
|
"btts_yes_prob": round(self.btts_yes_prob * 100, 1),
|
||||||
|
"most_likely_score": self.most_likely_score,
|
||||||
|
"second_likely_score": self.second_likely_score,
|
||||||
|
"third_likely_score": self.third_likely_score,
|
||||||
|
"value_bets": self.value_bets,
|
||||||
|
"confidence": round(self.confidence, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class OddsPredictorEngine:
|
||||||
|
"""
|
||||||
|
Odds-based prediction engine.
|
||||||
|
|
||||||
|
Uses:
|
||||||
|
- Market odds to extract implied probabilities
|
||||||
|
- Poisson distribution for mathematical xG
|
||||||
|
- Value calculator for EV+ opportunities
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.poisson_engine = get_poisson_engine()
|
||||||
|
try:
|
||||||
|
self.value_calc = get_value_calculator()
|
||||||
|
except Exception:
|
||||||
|
self.value_calc = None
|
||||||
|
self.default_ms_h = 2.65
|
||||||
|
self.default_ms_d = 3.20
|
||||||
|
self.default_ms_a = 2.65
|
||||||
|
print("✅ OddsPredictorEngine initialized")
|
||||||
|
|
||||||
|
def _odds_to_prob(self, odds: float) -> float:
|
||||||
|
"""Convert decimal odds to probability."""
|
||||||
|
try:
|
||||||
|
odds = float(odds)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return 0.0
|
||||||
|
if odds <= 1.0:
|
||||||
|
return 0.0
|
||||||
|
return 1.0 / odds
|
||||||
|
|
||||||
|
def predict(self,
|
||||||
|
odds_data: Dict[str, float],
|
||||||
|
home_goals_avg: float = 1.5,
|
||||||
|
home_conceded_avg: float = 1.2,
|
||||||
|
away_goals_avg: float = 1.2,
|
||||||
|
away_conceded_avg: float = 1.4) -> OddsPrediction:
|
||||||
|
"""
|
||||||
|
Generate odds-based prediction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
odds_data: Dict with keys like 'ms_h', 'ms_d', 'ms_a', 'ou25_o', 'btts_y'
|
||||||
|
home_goals_avg: Home team's average goals scored
|
||||||
|
home_conceded_avg: Home team's average goals conceded
|
||||||
|
away_goals_avg: Away team's average goals scored
|
||||||
|
away_conceded_avg: Away team's average goals conceded
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
OddsPrediction with market and Poisson analysis
|
||||||
|
"""
|
||||||
|
|
||||||
|
# 1. Extract market probabilities from odds
|
||||||
|
ms_h = odds_data.get("ms_h", self.default_ms_h)
|
||||||
|
ms_d = odds_data.get("ms_d", self.default_ms_d)
|
||||||
|
ms_a = odds_data.get("ms_a", self.default_ms_a)
|
||||||
|
|
||||||
|
# Remove vig to get fair probabilities
|
||||||
|
raw_probs = [
|
||||||
|
self._odds_to_prob(ms_h),
|
||||||
|
self._odds_to_prob(ms_d),
|
||||||
|
self._odds_to_prob(ms_a)
|
||||||
|
]
|
||||||
|
total = sum(raw_probs) or 1
|
||||||
|
|
||||||
|
market_home = raw_probs[0] / total
|
||||||
|
market_draw = raw_probs[1] / total
|
||||||
|
market_away = raw_probs[2] / total
|
||||||
|
|
||||||
|
# 2. Poisson prediction
|
||||||
|
poisson_pred = self.poisson_engine.predict(
|
||||||
|
home_goals_avg, home_conceded_avg,
|
||||||
|
away_goals_avg, away_conceded_avg
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. Get most likely scores
|
||||||
|
likely_scores = poisson_pred.most_likely_scores[:3] if poisson_pred.most_likely_scores else []
|
||||||
|
score_1 = likely_scores[0]["score"] if len(likely_scores) > 0 else "1-1"
|
||||||
|
score_2 = likely_scores[1]["score"] if len(likely_scores) > 1 else "1-0"
|
||||||
|
score_3 = likely_scores[2]["score"] if len(likely_scores) > 2 else "2-1"
|
||||||
|
|
||||||
|
# 4. Value bet detection
|
||||||
|
value_bets = []
|
||||||
|
|
||||||
|
# Check if our Poisson model disagrees with market significantly
|
||||||
|
if abs(poisson_pred.home_win_prob - market_home) > 0.10:
|
||||||
|
if poisson_pred.home_win_prob > market_home:
|
||||||
|
value_bets.append({
|
||||||
|
"market": "MS 1",
|
||||||
|
"edge": round((poisson_pred.home_win_prob - market_home) * 100, 1),
|
||||||
|
"confidence": "medium"
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
value_bets.append({
|
||||||
|
"market": "MS 2",
|
||||||
|
"edge": round((poisson_pred.away_win_prob - market_away) * 100, 1),
|
||||||
|
"confidence": "medium"
|
||||||
|
})
|
||||||
|
|
||||||
|
# O/U value check
|
||||||
|
ou25_o = odds_data.get("ou25_o", 1.9)
|
||||||
|
market_over25 = self._odds_to_prob(ou25_o)
|
||||||
|
if abs(poisson_pred.over_25_prob - market_over25) > 0.08:
|
||||||
|
pick = "2.5 Üst" if poisson_pred.over_25_prob > market_over25 else "2.5 Alt"
|
||||||
|
edge = abs(poisson_pred.over_25_prob - market_over25) * 100
|
||||||
|
value_bets.append({
|
||||||
|
"market": pick,
|
||||||
|
"edge": round(edge, 1),
|
||||||
|
"confidence": "high" if edge > 10 else "medium"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Calculate confidence
|
||||||
|
# Higher when market and Poisson agree
|
||||||
|
agreement = 1.0 - abs(poisson_pred.home_win_prob - market_home)
|
||||||
|
confidence = 50.0 + (agreement * 40) + (len(value_bets) * 5)
|
||||||
|
|
||||||
|
return OddsPrediction(
|
||||||
|
market_home_prob=market_home,
|
||||||
|
market_draw_prob=market_draw,
|
||||||
|
market_away_prob=market_away,
|
||||||
|
poisson_home_xg=poisson_pred.home_xg,
|
||||||
|
poisson_away_xg=poisson_pred.away_xg,
|
||||||
|
over_15_prob=poisson_pred.over_15_prob,
|
||||||
|
over_25_prob=poisson_pred.over_25_prob,
|
||||||
|
over_35_prob=poisson_pred.over_35_prob,
|
||||||
|
btts_yes_prob=poisson_pred.btts_yes_prob,
|
||||||
|
most_likely_score=score_1,
|
||||||
|
second_likely_score=score_2,
|
||||||
|
third_likely_score=score_3,
|
||||||
|
value_bets=value_bets,
|
||||||
|
confidence=min(99.9, confidence)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_engine: Optional[OddsPredictorEngine] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_odds_predictor() -> OddsPredictorEngine:
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = OddsPredictorEngine()
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
engine = get_odds_predictor()
|
||||||
|
|
||||||
|
print("\n🧪 Odds Predictor Engine Test")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
pred = engine.predict(
|
||||||
|
odds_data={
|
||||||
|
"ms_h": 1.85,
|
||||||
|
"ms_d": 3.40,
|
||||||
|
"ms_a": 4.20,
|
||||||
|
"ou25_o": 1.90
|
||||||
|
},
|
||||||
|
home_goals_avg=1.8,
|
||||||
|
home_conceded_avg=1.0,
|
||||||
|
away_goals_avg=1.2,
|
||||||
|
away_conceded_avg=1.5
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n📊 Prediction:")
|
||||||
|
for k, v in pred.to_dict().items():
|
||||||
|
print(f" {k}: {v}")
|
||||||
Executable
+224
@@ -0,0 +1,224 @@
|
|||||||
|
"""
|
||||||
|
Player Predictor Engine - V20 Ensemble Component
|
||||||
|
Analyzes squad quality, key players, and missing player impact.
|
||||||
|
|
||||||
|
Weight: 25% in ensemble
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Dict, Optional, List
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from features.squad_analysis_engine import get_squad_analysis_engine
|
||||||
|
from features.sidelined_analyzer import get_sidelined_analyzer
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PlayerPrediction:
|
||||||
|
"""Player engine prediction output."""
|
||||||
|
home_squad_quality: float = 50.0 # 0-100
|
||||||
|
away_squad_quality: float = 50.0
|
||||||
|
squad_diff: float = 0.0 # -100 to +100
|
||||||
|
home_key_players: int = 0
|
||||||
|
away_key_players: int = 0
|
||||||
|
home_missing_impact: float = 0.0 # 0-1, how much weaker due to missing players
|
||||||
|
away_missing_impact: float = 0.0
|
||||||
|
home_goals_form: int = 0 # Goals in last 5 matches
|
||||||
|
away_goals_form: int = 0
|
||||||
|
lineup_available: bool = False
|
||||||
|
confidence: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"home_squad_quality": round(self.home_squad_quality, 1),
|
||||||
|
"away_squad_quality": round(self.away_squad_quality, 1),
|
||||||
|
"squad_diff": round(self.squad_diff, 1),
|
||||||
|
"home_key_players": self.home_key_players,
|
||||||
|
"away_key_players": self.away_key_players,
|
||||||
|
"home_missing_impact": round(self.home_missing_impact, 2),
|
||||||
|
"away_missing_impact": round(self.away_missing_impact, 2),
|
||||||
|
"home_goals_form": self.home_goals_form,
|
||||||
|
"away_goals_form": self.away_goals_form,
|
||||||
|
"lineup_available": self.lineup_available,
|
||||||
|
"confidence": round(self.confidence, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PlayerPredictorEngine:
|
||||||
|
"""
|
||||||
|
Player/Squad-based prediction engine.
|
||||||
|
|
||||||
|
Analyzes:
|
||||||
|
- Starting 11 quality
|
||||||
|
- Key player availability (top scorers)
|
||||||
|
- Missing player impact
|
||||||
|
- Recent goalscoring form per player
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.squad_engine = get_squad_analysis_engine()
|
||||||
|
self.sidelined_analyzer = get_sidelined_analyzer()
|
||||||
|
print("✅ PlayerPredictorEngine initialized")
|
||||||
|
|
||||||
|
def predict(self,
|
||||||
|
match_id: str,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_id: str,
|
||||||
|
home_lineup: List[str] = None,
|
||||||
|
away_lineup: List[str] = None,
|
||||||
|
sidelined_data: Dict = None) -> PlayerPrediction:
|
||||||
|
"""
|
||||||
|
Generate player-based prediction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
match_id: Match ID for lineup lookup
|
||||||
|
home_team_id: Home team ID
|
||||||
|
away_team_id: Away team ID
|
||||||
|
home_lineup: Optional list of home player IDs
|
||||||
|
away_lineup: Optional list of away player IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PlayerPrediction with squad analysis
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get squad features
|
||||||
|
if home_lineup and away_lineup:
|
||||||
|
# Use provided lineups (for live matches)
|
||||||
|
home_analysis = self.squad_engine.analyze_squad_from_list(
|
||||||
|
home_lineup, home_team_id
|
||||||
|
)
|
||||||
|
away_analysis = self.squad_engine.analyze_squad_from_list(
|
||||||
|
away_lineup, away_team_id
|
||||||
|
)
|
||||||
|
lineup_available = True
|
||||||
|
# Build features dict from analysis objects
|
||||||
|
features = {
|
||||||
|
"home_starting_11": home_analysis.starting_count or 11,
|
||||||
|
"home_goals_last_5": home_analysis.total_goals_last_5,
|
||||||
|
"home_assists_last_5": home_analysis.total_assists_last_5,
|
||||||
|
"home_key_players": home_analysis.key_players_count,
|
||||||
|
"away_starting_11": away_analysis.starting_count or 11,
|
||||||
|
"away_goals_last_5": away_analysis.total_goals_last_5,
|
||||||
|
"away_assists_last_5": away_analysis.total_assists_last_5,
|
||||||
|
"away_key_players": away_analysis.key_players_count,
|
||||||
|
}
|
||||||
|
elif match_id:
|
||||||
|
# Try to get from database
|
||||||
|
try:
|
||||||
|
features = self.squad_engine.get_features(
|
||||||
|
match_id, home_team_id, away_team_id
|
||||||
|
)
|
||||||
|
lineup_available = (
|
||||||
|
features.get("home_starting_11", 0) >= 11 and
|
||||||
|
features.get("away_starting_11", 0) >= 11
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
features = self.squad_engine.get_features_without_match(
|
||||||
|
home_team_id, away_team_id
|
||||||
|
)
|
||||||
|
lineup_available = False
|
||||||
|
else:
|
||||||
|
features = self.squad_engine.get_features_without_match(
|
||||||
|
home_team_id, away_team_id
|
||||||
|
)
|
||||||
|
lineup_available = False
|
||||||
|
|
||||||
|
# Extract features
|
||||||
|
home_goals = features.get("home_goals_last_5", 0)
|
||||||
|
away_goals = features.get("away_goals_last_5", 0)
|
||||||
|
home_key = features.get("home_key_players", 0)
|
||||||
|
away_key = features.get("away_key_players", 0)
|
||||||
|
|
||||||
|
# Calculate squad quality (0-100)
|
||||||
|
# Based on: goals scored, key players, assists
|
||||||
|
home_quality = min(100, 50 + (home_goals * 3) + (home_key * 5) +
|
||||||
|
features.get("home_assists_last_5", 0) * 2)
|
||||||
|
away_quality = min(100, 50 + (away_goals * 3) + (away_key * 5) +
|
||||||
|
features.get("away_assists_last_5", 0) * 2)
|
||||||
|
|
||||||
|
# Squad difference
|
||||||
|
squad_diff = home_quality - away_quality
|
||||||
|
|
||||||
|
# Missing player impact
|
||||||
|
# Priority: sidelined data (position-weighted) > lineup count (basic)
|
||||||
|
if sidelined_data:
|
||||||
|
home_impact, away_impact = self.sidelined_analyzer.analyze_match(sidelined_data)
|
||||||
|
home_missing = home_impact.impact_score
|
||||||
|
away_missing = away_impact.impact_score
|
||||||
|
sidelined_available = True
|
||||||
|
else:
|
||||||
|
# Fallback: basic lineup count method
|
||||||
|
expected_xi = 11
|
||||||
|
actual_home_xi = features.get("home_starting_11", 11)
|
||||||
|
actual_away_xi = features.get("away_starting_11", 11)
|
||||||
|
home_missing = (expected_xi - actual_home_xi) / expected_xi if actual_home_xi < expected_xi else 0
|
||||||
|
away_missing = (expected_xi - actual_away_xi) / expected_xi if actual_away_xi < expected_xi else 0
|
||||||
|
sidelined_available = False
|
||||||
|
|
||||||
|
# Confidence: more data sources = higher confidence
|
||||||
|
confidence = 70.0 if lineup_available else 35.0
|
||||||
|
if home_goals + away_goals > 10:
|
||||||
|
confidence += 15
|
||||||
|
if sidelined_available:
|
||||||
|
confidence += self.sidelined_analyzer.config.get("sidelined.confidence_boost", 10)
|
||||||
|
if not lineup_available:
|
||||||
|
confidence -= 5.0
|
||||||
|
|
||||||
|
return PlayerPrediction(
|
||||||
|
home_squad_quality=home_quality,
|
||||||
|
away_squad_quality=away_quality,
|
||||||
|
squad_diff=squad_diff,
|
||||||
|
home_key_players=home_key,
|
||||||
|
away_key_players=away_key,
|
||||||
|
home_missing_impact=home_missing,
|
||||||
|
away_missing_impact=away_missing,
|
||||||
|
home_goals_form=home_goals,
|
||||||
|
away_goals_form=away_goals,
|
||||||
|
lineup_available=lineup_available,
|
||||||
|
confidence=max(5.0, confidence)
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_1x2_modifier(self, prediction: PlayerPrediction) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Calculate 1X2 probability modifiers based on squad analysis.
|
||||||
|
|
||||||
|
Returns modifiers to apply to base probabilities.
|
||||||
|
"""
|
||||||
|
diff = prediction.squad_diff / 100 # -1 to +1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"home_modifier": 1.0 + (diff * 0.3), # Up to +/-30%
|
||||||
|
"away_modifier": 1.0 - (diff * 0.3),
|
||||||
|
"draw_modifier": 1.0 - abs(diff) * 0.2 # Less draw if big diff
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_engine: Optional[PlayerPredictorEngine] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_player_predictor() -> PlayerPredictorEngine:
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = PlayerPredictorEngine()
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
engine = get_player_predictor()
|
||||||
|
|
||||||
|
print("\n🧪 Player Predictor Engine Test")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
pred = engine.predict(
|
||||||
|
match_id=None,
|
||||||
|
home_team_id="test_home",
|
||||||
|
away_team_id="test_away"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n📊 Prediction:")
|
||||||
|
for k, v in pred.to_dict().items():
|
||||||
|
print(f" {k}: {v}")
|
||||||
Executable
+188
@@ -0,0 +1,188 @@
|
|||||||
|
"""
|
||||||
|
Referee Predictor Engine - V20 Ensemble Component
|
||||||
|
Analyzes referee patterns for cards, goals, and home bias.
|
||||||
|
|
||||||
|
Weight: 15% in ensemble
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Dict, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from features.referee_engine import get_referee_engine
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RefereePrediction:
|
||||||
|
"""Referee engine prediction output."""
|
||||||
|
referee_name: str = ""
|
||||||
|
matches_officiated: int = 0
|
||||||
|
|
||||||
|
# Card tendencies
|
||||||
|
avg_yellow_cards: float = 4.0
|
||||||
|
avg_red_cards: float = 0.2
|
||||||
|
is_card_heavy: bool = False # Above average cards
|
||||||
|
|
||||||
|
# Goal tendencies
|
||||||
|
avg_goals_per_match: float = 2.5
|
||||||
|
over_25_rate: float = 0.50
|
||||||
|
is_high_scoring: bool = False # Above average goals
|
||||||
|
|
||||||
|
# Home bias
|
||||||
|
home_win_rate: float = 0.45
|
||||||
|
home_bias: float = 0.0 # -1 to +1, positive = favors home
|
||||||
|
|
||||||
|
# Penalty tendency
|
||||||
|
penalty_rate: float = 0.15
|
||||||
|
|
||||||
|
confidence: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"referee_name": self.referee_name,
|
||||||
|
"matches_officiated": self.matches_officiated,
|
||||||
|
"avg_yellow_cards": round(self.avg_yellow_cards, 1),
|
||||||
|
"avg_red_cards": round(self.avg_red_cards, 2),
|
||||||
|
"is_card_heavy": self.is_card_heavy,
|
||||||
|
"avg_goals_per_match": round(self.avg_goals_per_match, 2),
|
||||||
|
"over_25_rate": round(self.over_25_rate * 100, 1),
|
||||||
|
"is_high_scoring": self.is_high_scoring,
|
||||||
|
"home_win_rate": round(self.home_win_rate * 100, 1),
|
||||||
|
"home_bias": round(self.home_bias, 2),
|
||||||
|
"penalty_rate": round(self.penalty_rate * 100, 1),
|
||||||
|
"confidence": round(self.confidence, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RefereePredictorEngine:
|
||||||
|
"""
|
||||||
|
Referee-based prediction engine.
|
||||||
|
|
||||||
|
Analyzes:
|
||||||
|
- Card tendency (sarı/kırmızı kart ortalaması)
|
||||||
|
- Goal tendency (maç başına gol, 2.5 üst oranı)
|
||||||
|
- Home bias (ev sahibi lehine karar oranı)
|
||||||
|
- Penalty tendency (penaltı verme oranı)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# League average benchmarks
|
||||||
|
LEAGUE_AVG_GOALS = 2.65
|
||||||
|
LEAGUE_AVG_YELLOW = 4.0
|
||||||
|
LEAGUE_HOME_WIN_RATE = 0.45
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.referee_engine = get_referee_engine()
|
||||||
|
print("✅ RefereePredictorEngine initialized")
|
||||||
|
|
||||||
|
def predict(self,
|
||||||
|
match_id: str = None,
|
||||||
|
referee_name: str = None,
|
||||||
|
league_id: str = None) -> RefereePrediction:
|
||||||
|
"""
|
||||||
|
Generate referee-based prediction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
match_id: Match ID to find referee
|
||||||
|
referee_name: Or provide referee name directly
|
||||||
|
league_id: League ID to scope stats (prevents name collisions)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
RefereePrediction with referee analysis
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get referee features
|
||||||
|
if match_id:
|
||||||
|
features = self.referee_engine.get_features(match_id, league_id=league_id)
|
||||||
|
# Live flows may already have referee_name while match_officials table is sparse.
|
||||||
|
# Prefer the richer profile if direct-name lookup has more history.
|
||||||
|
if referee_name:
|
||||||
|
name_features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id)
|
||||||
|
if (name_features.get("referee_matches", 0) or 0) > (features.get("referee_matches", 0) or 0):
|
||||||
|
features = name_features
|
||||||
|
elif referee_name:
|
||||||
|
features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id)
|
||||||
|
else:
|
||||||
|
# Return default
|
||||||
|
return RefereePrediction(confidence=10.0)
|
||||||
|
|
||||||
|
ref_name = features.get("referee_name", "Unknown")
|
||||||
|
matches = features.get("referee_matches", 0)
|
||||||
|
|
||||||
|
if matches < 5:
|
||||||
|
# Not enough data
|
||||||
|
return RefereePrediction(
|
||||||
|
referee_name=ref_name,
|
||||||
|
matches_officiated=matches,
|
||||||
|
confidence=20.0
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract features
|
||||||
|
avg_yellow = features.get("referee_avg_yellow", 4.0)
|
||||||
|
avg_red = features.get("referee_avg_red", 0.2)
|
||||||
|
avg_goals = features.get("referee_avg_goals", 2.5)
|
||||||
|
over25_rate = features.get("referee_over25_rate", 0.5)
|
||||||
|
home_win_rate = features.get("referee_home_win_rate", 0.45) if "referee_home_win_rate" in features else 0.45
|
||||||
|
home_bias = features.get("referee_home_bias", 0.0)
|
||||||
|
penalty_rate = features.get("referee_penalty_rate", 0.15)
|
||||||
|
|
||||||
|
# Determine tendencies
|
||||||
|
is_card_heavy = (avg_yellow + avg_red * 4) > (self.LEAGUE_AVG_YELLOW + 1)
|
||||||
|
is_high_scoring = avg_goals > self.LEAGUE_AVG_GOALS
|
||||||
|
|
||||||
|
# Confidence based on matches officiated
|
||||||
|
confidence = min(90.0, 30.0 + matches * 2)
|
||||||
|
|
||||||
|
return RefereePrediction(
|
||||||
|
referee_name=ref_name,
|
||||||
|
matches_officiated=matches,
|
||||||
|
avg_yellow_cards=avg_yellow,
|
||||||
|
avg_red_cards=avg_red,
|
||||||
|
is_card_heavy=is_card_heavy,
|
||||||
|
avg_goals_per_match=avg_goals,
|
||||||
|
over_25_rate=over25_rate,
|
||||||
|
is_high_scoring=is_high_scoring,
|
||||||
|
home_win_rate=home_win_rate,
|
||||||
|
home_bias=home_bias,
|
||||||
|
penalty_rate=penalty_rate,
|
||||||
|
confidence=confidence
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_modifiers(self, prediction: RefereePrediction) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Get modifiers to apply to other predictions based on referee profile.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
# Home team gets slight boost if referee has home bias
|
||||||
|
"home_modifier": 1.0 + (prediction.home_bias * 0.05),
|
||||||
|
# O/U modifier
|
||||||
|
"over_25_modifier": 1.0 + (prediction.avg_goals_per_match - self.LEAGUE_AVG_GOALS) * 0.1,
|
||||||
|
# Card modifier for card markets
|
||||||
|
"cards_modifier": 1.0 + (prediction.avg_yellow_cards - self.LEAGUE_AVG_YELLOW) * 0.05
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_engine: Optional[RefereePredictorEngine] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_referee_predictor() -> RefereePredictorEngine:
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = RefereePredictorEngine()
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
engine = get_referee_predictor()
|
||||||
|
|
||||||
|
print("\n🧪 Referee Predictor Engine Test")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
pred = engine.predict(referee_name="Cüneyt Çakır")
|
||||||
|
|
||||||
|
print(f"\n📊 Prediction:")
|
||||||
|
for k, v in pred.to_dict().items():
|
||||||
|
print(f" {k}: {v}")
|
||||||
Executable
+286
@@ -0,0 +1,286 @@
|
|||||||
|
"""
|
||||||
|
Team Predictor Engine - V20 Ensemble Component
|
||||||
|
Combines ELO ratings, form stats, H2H records and team statistics.
|
||||||
|
|
||||||
|
Weight: 30% in ensemble
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Dict, Optional, Tuple, Any
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
# Add parent to path
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from features.elo_system import get_elo_system
|
||||||
|
from features.h2h_engine import get_h2h_engine
|
||||||
|
from features.momentum_engine import get_momentum_engine, MomentumData
|
||||||
|
from features.team_stats_engine import get_team_stats_engine
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TeamPrediction:
|
||||||
|
"""Team engine prediction output."""
|
||||||
|
home_win_prob: float = 0.33
|
||||||
|
draw_prob: float = 0.33
|
||||||
|
away_win_prob: float = 0.33
|
||||||
|
home_xg: float = 1.3
|
||||||
|
away_xg: float = 1.1
|
||||||
|
form_advantage: float = 0.0 # -1 to +1, positive = home advantage
|
||||||
|
h2h_advantage: float = 0.0 # -1 to +1
|
||||||
|
elo_diff: float = 0.0
|
||||||
|
confidence: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"home_win_prob": round(self.home_win_prob * 100, 1),
|
||||||
|
"draw_prob": round(self.draw_prob * 100, 1),
|
||||||
|
"away_win_prob": round(self.away_win_prob * 100, 1),
|
||||||
|
"home_xg": round(self.home_xg, 2),
|
||||||
|
"away_xg": round(self.away_xg, 2),
|
||||||
|
"form_advantage": round(self.form_advantage, 2),
|
||||||
|
"h2h_advantage": round(self.h2h_advantage, 2),
|
||||||
|
"elo_diff": round(self.elo_diff, 0),
|
||||||
|
"confidence": round(self.confidence, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_features: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
class TeamPredictorEngine:
|
||||||
|
"""
|
||||||
|
Team-based prediction engine.
|
||||||
|
|
||||||
|
Uses:
|
||||||
|
- ELO Rating System (venue-adjusted, league-weighted)
|
||||||
|
- H2H Engine (head-to-head history)
|
||||||
|
- Momentum Engine (recent form)
|
||||||
|
- Team Stats Engine (possession, shots, corners)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.elo_system = get_elo_system()
|
||||||
|
self.h2h_engine = get_h2h_engine()
|
||||||
|
self.momentum_engine = get_momentum_engine()
|
||||||
|
self.team_stats_engine = get_team_stats_engine()
|
||||||
|
|
||||||
|
print("✅ TeamPredictorEngine initialized")
|
||||||
|
|
||||||
|
def predict(self,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_id: str,
|
||||||
|
match_date_ms: int,
|
||||||
|
home_team_name: str = "",
|
||||||
|
away_team_name: str = "") -> TeamPrediction:
|
||||||
|
"""
|
||||||
|
Generate team-based prediction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
home_team_id: Home team ID
|
||||||
|
away_team_id: Away team ID
|
||||||
|
match_date_ms: Match date in milliseconds
|
||||||
|
home_team_name: Home team name (for ELO)
|
||||||
|
away_team_name: Away team name (for ELO)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
TeamPrediction with 1X2 probabilities and xG
|
||||||
|
"""
|
||||||
|
|
||||||
|
# 1. Get ELO predictions
|
||||||
|
elo_pred = self.elo_system.predict_match(home_team_id, away_team_id)
|
||||||
|
elo_features = self.elo_system.get_match_features(home_team_id, away_team_id)
|
||||||
|
|
||||||
|
# 2. Get H2H features
|
||||||
|
try:
|
||||||
|
h2h_features = self.h2h_engine.get_features(
|
||||||
|
home_team_id, away_team_id, match_date_ms
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
h2h_features = {
|
||||||
|
"h2h_home_win_rate": 0.5,
|
||||||
|
"h2h_away_win_rate": 0.5,
|
||||||
|
"h2h_avg_goals": 2.5,
|
||||||
|
"h2h_btts_rate": 0.5
|
||||||
|
}
|
||||||
|
|
||||||
|
# 3. Get Momentum/Form features
|
||||||
|
try:
|
||||||
|
# key: form_score should be 0-1 derived from momentum_score (-1 to 1)
|
||||||
|
home_mom_data = self.momentum_engine.calculate_momentum(home_team_id, match_date_ms)
|
||||||
|
away_mom_data = self.momentum_engine.calculate_momentum(away_team_id, match_date_ms)
|
||||||
|
|
||||||
|
home_form_score = (home_mom_data.momentum_score + 1) / 2
|
||||||
|
away_form_score = (away_mom_data.momentum_score + 1) / 2
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ MomentumEngine error: {e}")
|
||||||
|
home_mom_data = MomentumData()
|
||||||
|
away_mom_data = MomentumData()
|
||||||
|
home_form_score = 0.5
|
||||||
|
away_form_score = 0.5
|
||||||
|
|
||||||
|
# 4. Get Team Stats
|
||||||
|
home_stats = self.team_stats_engine.get_features(home_team_id, match_date_ms)
|
||||||
|
away_stats = self.team_stats_engine.get_features(away_team_id, match_date_ms)
|
||||||
|
|
||||||
|
# 5. Combine predictions
|
||||||
|
# ELO-based 1X2 (60% weight)
|
||||||
|
elo_home = elo_pred.get("home_win_prob", 0.33)
|
||||||
|
elo_draw = elo_pred.get("draw_prob", 0.33)
|
||||||
|
elo_away = elo_pred.get("away_win_prob", 0.33)
|
||||||
|
|
||||||
|
# Adjust based on H2H (20% weight)
|
||||||
|
h2h_home_rate = h2h_features.get("h2h_home_win_rate", 0.5)
|
||||||
|
h2h_away_rate = h2h_features.get("h2h_away_win_rate", 0.5)
|
||||||
|
|
||||||
|
# Adjust based on form (20% weight)
|
||||||
|
home_form = home_form_score
|
||||||
|
away_form = away_form_score
|
||||||
|
form_diff = (home_form - away_form) # -1 to +1
|
||||||
|
|
||||||
|
# Weighted combination
|
||||||
|
final_home = elo_home * 0.6 + h2h_home_rate * 0.2 + (0.5 + form_diff * 0.3) * 0.2
|
||||||
|
final_away = elo_away * 0.6 + h2h_away_rate * 0.2 + (0.5 - form_diff * 0.3) * 0.2
|
||||||
|
final_draw = 1.0 - final_home - final_away
|
||||||
|
|
||||||
|
# Normalize
|
||||||
|
total = final_home + final_draw + final_away
|
||||||
|
if total > 0:
|
||||||
|
final_home /= total
|
||||||
|
final_draw /= total
|
||||||
|
final_away /= total
|
||||||
|
|
||||||
|
# Calculate xG based on stats and form (conservative base)
|
||||||
|
home_conversion = home_stats.get("shot_conversion_rate", 0.1)
|
||||||
|
away_conversion = away_stats.get("shot_conversion_rate", 0.1)
|
||||||
|
|
||||||
|
base_home_xg = 1.35 + (home_conversion * 3.0)
|
||||||
|
base_away_xg = 1.10 + (away_conversion * 2.5)
|
||||||
|
|
||||||
|
# Defense weakness factor: opponent's defensive quality affects xG
|
||||||
|
# Higher shots on target against = weaker defense
|
||||||
|
away_def_weakness = away_stats.get("shot_accuracy", 0.35) # opponent's shot accuracy as proxy
|
||||||
|
home_def_weakness = home_stats.get("shot_accuracy", 0.35)
|
||||||
|
|
||||||
|
# Adjust xG: stronger opponent defense → lower xG
|
||||||
|
home_xg = base_home_xg * (1 + form_diff * 0.15) * (0.8 + away_def_weakness * 0.6)
|
||||||
|
away_xg = base_away_xg * (1 - form_diff * 0.15) * (0.8 + home_def_weakness * 0.6)
|
||||||
|
|
||||||
|
# Apply xG Underperformance Penalty directly to calculated xG
|
||||||
|
# If a team chronically underperforms its xG, we subtract that historical difference here
|
||||||
|
if hasattr(home_mom_data, 'xg_underperformance') and home_mom_data.xg_underperformance > 0.2:
|
||||||
|
home_xg -= min(0.5, home_mom_data.xg_underperformance * 0.5)
|
||||||
|
|
||||||
|
if hasattr(away_mom_data, 'xg_underperformance') and away_mom_data.xg_underperformance > 0.2:
|
||||||
|
away_xg -= min(0.5, away_mom_data.xg_underperformance * 0.5)
|
||||||
|
|
||||||
|
# H2H adjustment (more conservative)
|
||||||
|
h2h_avg_goals = h2h_features.get("h2h_avg_goals", 2.5)
|
||||||
|
if h2h_avg_goals > 3.0:
|
||||||
|
home_xg *= 1.05
|
||||||
|
away_xg *= 1.05
|
||||||
|
elif h2h_avg_goals < 2.0:
|
||||||
|
home_xg *= 0.95
|
||||||
|
away_xg *= 0.95
|
||||||
|
|
||||||
|
# Clamp xG to reasonable range
|
||||||
|
home_xg = max(0.5, min(3.5, home_xg))
|
||||||
|
away_xg = max(0.3, min(3.0, away_xg))
|
||||||
|
|
||||||
|
# Calculate confidence
|
||||||
|
# Higher when ELO, H2H, and Form all agree
|
||||||
|
elo_winner = "H" if elo_home > max(elo_draw, elo_away) else ("A" if elo_away > elo_draw else "D")
|
||||||
|
h2h_winner = "H" if h2h_home_rate > h2h_away_rate else "A"
|
||||||
|
form_winner = "H" if form_diff > 0.1 else ("A" if form_diff < -0.1 else "D")
|
||||||
|
|
||||||
|
agreement = sum([
|
||||||
|
elo_winner == h2h_winner,
|
||||||
|
elo_winner == form_winner,
|
||||||
|
h2h_winner == form_winner
|
||||||
|
])
|
||||||
|
|
||||||
|
max_prob = max(final_home, final_draw, final_away)
|
||||||
|
confidence = max_prob * 100 * (0.7 + agreement * 0.1)
|
||||||
|
|
||||||
|
# Collect Raw Features for XGBoost
|
||||||
|
# Note: home_mom_data is an object now
|
||||||
|
def get_rate(val): return val if val is not None else 0.5
|
||||||
|
|
||||||
|
raw_features = {
|
||||||
|
**elo_features, # 8 features
|
||||||
|
|
||||||
|
# Form Features (need key mapping to match extract_training_data.py)
|
||||||
|
"home_goals_avg": 1.5 + home_mom_data.goals_trend, # Proxy
|
||||||
|
"home_conceded_avg": 1.5 - home_mom_data.conceded_trend, # Proxy
|
||||||
|
"away_goals_avg": 1.5 + away_mom_data.goals_trend,
|
||||||
|
"away_conceded_avg": 1.5 - away_mom_data.conceded_trend,
|
||||||
|
|
||||||
|
"home_clean_sheet_rate": 0.2, # Not in new MomentumData
|
||||||
|
"away_clean_sheet_rate": 0.2,
|
||||||
|
"home_scoring_rate": 0.8,
|
||||||
|
"away_scoring_rate": 0.8,
|
||||||
|
|
||||||
|
"home_winning_streak": home_mom_data.winning_streak,
|
||||||
|
"away_winning_streak": away_mom_data.winning_streak,
|
||||||
|
"home_unbeaten_streak": home_mom_data.unbeaten_streak,
|
||||||
|
"away_unbeaten_streak": away_mom_data.unbeaten_streak,
|
||||||
|
|
||||||
|
# H2H Features
|
||||||
|
**h2h_features,
|
||||||
|
|
||||||
|
# Team Stats
|
||||||
|
"home_avg_possession": home_stats.get("avg_possession", 0.5),
|
||||||
|
"away_avg_possession": away_stats.get("avg_possession", 0.5),
|
||||||
|
"home_avg_shots_on_target": home_stats.get("avg_shots_on_target", 3.5),
|
||||||
|
"away_avg_shots_on_target": away_stats.get("avg_shots_on_target", 3.5),
|
||||||
|
"home_shot_conversion": home_stats.get("shot_conversion_rate", 0.1),
|
||||||
|
"away_shot_conversion": away_stats.get("shot_conversion_rate", 0.1),
|
||||||
|
"home_avg_corners": home_stats.get("avg_corners", 4.5),
|
||||||
|
"away_avg_corners": away_stats.get("avg_corners", 4.5),
|
||||||
|
|
||||||
|
# Derived
|
||||||
|
"home_xga": 1.5 - home_mom_data.conceded_trend, # reusing as proxy
|
||||||
|
"away_xga": 1.5 - away_mom_data.conceded_trend
|
||||||
|
}
|
||||||
|
|
||||||
|
return TeamPrediction(
|
||||||
|
home_win_prob=final_home,
|
||||||
|
draw_prob=final_draw,
|
||||||
|
away_win_prob=final_away,
|
||||||
|
home_xg=home_xg,
|
||||||
|
away_xg=away_xg,
|
||||||
|
form_advantage=form_diff,
|
||||||
|
h2h_advantage=h2h_home_rate - h2h_away_rate,
|
||||||
|
elo_diff=elo_features.get("elo_diff", 0),
|
||||||
|
confidence=confidence,
|
||||||
|
raw_features=raw_features
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_engine: Optional[TeamPredictorEngine] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_team_predictor() -> TeamPredictorEngine:
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = TeamPredictorEngine()
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
engine = get_team_predictor()
|
||||||
|
|
||||||
|
print("\n🧪 Team Predictor Engine Test")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Test with sample IDs
|
||||||
|
pred = engine.predict(
|
||||||
|
home_team_id="test_home",
|
||||||
|
away_team_id="test_away",
|
||||||
|
match_date_ms=1707393600000
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n📊 Prediction:")
|
||||||
|
for k, v in pred.to_dict().items():
|
||||||
|
print(f" {k}: {v}")
|
||||||
@@ -0,0 +1,302 @@
|
|||||||
|
"""
|
||||||
|
Quantitative Finance Module — V2 Betting Engine
|
||||||
|
Edge calculation, Fractional Kelly Criterion staking, bet grading, and risk assessment.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
# Constants
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
BANKROLL_UNITS: float = 10.0 # Total bankroll in abstract units
|
||||||
|
KELLY_FRACTION: float = 0.25 # Quarter-Kelly (conservative, anti-ruin)
|
||||||
|
MIN_EDGE_PLAYABLE: float = 0.05 # 5% edge minimum to mark as playable
|
||||||
|
MIN_ODDS_PLAYABLE: float = 1.30 # Skip extreme chalk below 1.30
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
# Edge Calculation
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def calculate_edge(true_prob: float, decimal_odds: float) -> float:
|
||||||
|
"""
|
||||||
|
Edge = (True_Probability × Decimal_Odds) - 1.0
|
||||||
|
Positive edge → the model says we have an advantage over the bookmaker.
|
||||||
|
"""
|
||||||
|
if decimal_odds <= 1.0 or true_prob <= 0.0:
|
||||||
|
return -1.0
|
||||||
|
return round((true_prob * decimal_odds) - 1.0, 4)
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
# Kelly Criterion Staking
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def kelly_stake(true_prob: float, decimal_odds: float) -> float:
|
||||||
|
"""
|
||||||
|
Fractional Kelly Criterion for a bankroll of BANKROLL_UNITS.
|
||||||
|
|
||||||
|
Full Kelly: f* = ((b × p) - q) / b
|
||||||
|
where b = decimal_odds - 1, p = true_prob, q = 1 - true_prob
|
||||||
|
|
||||||
|
We use KELLY_FRACTION (25%) to reduce variance and avoid ruin.
|
||||||
|
Returns stake in units, rounded to 0.1.
|
||||||
|
"""
|
||||||
|
if decimal_odds <= 1.0 or true_prob <= 0.0 or true_prob >= 1.0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
b = decimal_odds - 1.0
|
||||||
|
p = true_prob
|
||||||
|
q = 1.0 - p
|
||||||
|
|
||||||
|
f_star = ((b * p) - q) / b
|
||||||
|
|
||||||
|
if f_star <= 0.0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# Scale by fraction and bankroll
|
||||||
|
stake = f_star * KELLY_FRACTION * BANKROLL_UNITS
|
||||||
|
|
||||||
|
# Cap at a sensible maximum (3 units on a 10-unit bankroll)
|
||||||
|
stake = min(stake, 3.0)
|
||||||
|
|
||||||
|
return round(max(0.0, stake), 1)
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
# Bet Grading
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def grade_bet(edge: float, playable: bool) -> str:
|
||||||
|
"""
|
||||||
|
Assign a letter grade based on edge magnitude.
|
||||||
|
A: Edge > 10% — Elite value, rare
|
||||||
|
B: Edge > 5% — Strong value, core bets
|
||||||
|
C: Edge > 2% — Marginal value, supporting picks only
|
||||||
|
PASS: Below threshold — Do not bet
|
||||||
|
"""
|
||||||
|
if not playable or edge < 0.02:
|
||||||
|
return "PASS"
|
||||||
|
if edge > 0.10:
|
||||||
|
return "A"
|
||||||
|
if edge > 0.05:
|
||||||
|
return "B"
|
||||||
|
return "C"
|
||||||
|
|
||||||
|
|
||||||
|
def is_playable(edge: float, decimal_odds: float) -> bool:
|
||||||
|
"""A pick is playable if it has sufficient edge AND reasonable odds."""
|
||||||
|
return edge >= MIN_EDGE_PLAYABLE and decimal_odds >= MIN_ODDS_PLAYABLE
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
# Play Score (0-100 composite)
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def calculate_play_score(
|
||||||
|
edge: float,
|
||||||
|
true_prob: float,
|
||||||
|
data_quality: float,
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Composite score combining edge strength, probability confidence,
|
||||||
|
and data quality. Used for ranking picks and filtering.
|
||||||
|
|
||||||
|
Components:
|
||||||
|
- Edge contribution (0-50): edge * 250, capped at 50
|
||||||
|
- Prob contribution (0-30): probability * 30
|
||||||
|
- DQ contribution (0-20): data_quality * 20
|
||||||
|
"""
|
||||||
|
edge_score = min(50.0, max(0.0, edge * 250.0))
|
||||||
|
prob_score = min(30.0, max(0.0, true_prob * 30.0))
|
||||||
|
dq_score = min(20.0, max(0.0, data_quality * 20.0))
|
||||||
|
return round(edge_score + prob_score + dq_score, 1)
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
# Risk Assessment
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RiskResult:
|
||||||
|
level: str # LOW, MEDIUM, HIGH, EXTREME
|
||||||
|
score: float # 0.0 - 1.0
|
||||||
|
is_surprise_risk: bool
|
||||||
|
surprise_type: str | None
|
||||||
|
warnings: list[str]
|
||||||
|
|
||||||
|
|
||||||
|
def assess_risk(
|
||||||
|
missing_players_impact: float,
|
||||||
|
data_quality_score: float,
|
||||||
|
elo_diff: float,
|
||||||
|
implied_prob_fav: float,
|
||||||
|
) -> RiskResult:
|
||||||
|
"""
|
||||||
|
Multi-factor risk assessment.
|
||||||
|
|
||||||
|
Factors:
|
||||||
|
1. Missing key players (injuries/suspensions)
|
||||||
|
2. Data quality (missing stats, odds)
|
||||||
|
3. ELO closeness (tight matches are riskier)
|
||||||
|
4. Surprise potential (heavy favorite vulnerable)
|
||||||
|
"""
|
||||||
|
warnings: list[str] = []
|
||||||
|
risk_score = 0.0
|
||||||
|
|
||||||
|
# ─── Factor 1: Missing players ────────────────────────────────────
|
||||||
|
if missing_players_impact > 0.3:
|
||||||
|
risk_score += 0.35
|
||||||
|
warnings.append(
|
||||||
|
f"High missing-player impact: {missing_players_impact:.2f}"
|
||||||
|
)
|
||||||
|
elif missing_players_impact > 0.15:
|
||||||
|
risk_score += 0.15
|
||||||
|
warnings.append(
|
||||||
|
f"Moderate missing-player impact: {missing_players_impact:.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ─── Factor 2: Data quality ───────────────────────────────────────
|
||||||
|
if data_quality_score < 0.5:
|
||||||
|
risk_score += 0.25
|
||||||
|
warnings.append(
|
||||||
|
f"Low data quality: {data_quality_score:.2f}"
|
||||||
|
)
|
||||||
|
elif data_quality_score < 0.75:
|
||||||
|
risk_score += 0.10
|
||||||
|
|
||||||
|
# ─── Factor 3: ELO closeness ──────────────────────────────────────
|
||||||
|
abs_elo_diff = abs(elo_diff)
|
||||||
|
if abs_elo_diff < 50:
|
||||||
|
risk_score += 0.15
|
||||||
|
warnings.append("Very tight ELO difference — coin-flip territory")
|
||||||
|
elif abs_elo_diff < 100:
|
||||||
|
risk_score += 0.05
|
||||||
|
|
||||||
|
# ─── Factor 4: Surprise detection ─────────────────────────────────
|
||||||
|
is_surprise = False
|
||||||
|
surprise_type: str | None = None
|
||||||
|
|
||||||
|
if implied_prob_fav > 0.65 and abs_elo_diff < 80:
|
||||||
|
# Heavy favorite by odds but ELO says match is closer
|
||||||
|
is_surprise = True
|
||||||
|
surprise_type = "odds_elo_divergence"
|
||||||
|
risk_score += 0.15
|
||||||
|
warnings.append(
|
||||||
|
"Upset potential: bookmaker odds suggest heavy favorite "
|
||||||
|
"but ELO says the match is closer than the market thinks"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ─── Classify ─────────────────────────────────────────────────────
|
||||||
|
risk_score = min(1.0, risk_score)
|
||||||
|
if risk_score >= 0.7:
|
||||||
|
level = "EXTREME"
|
||||||
|
elif risk_score >= 0.45:
|
||||||
|
level = "HIGH"
|
||||||
|
elif risk_score >= 0.2:
|
||||||
|
level = "MEDIUM"
|
||||||
|
else:
|
||||||
|
level = "LOW"
|
||||||
|
|
||||||
|
return RiskResult(
|
||||||
|
level=level,
|
||||||
|
score=round(risk_score, 3),
|
||||||
|
is_surprise_risk=is_surprise,
|
||||||
|
surprise_type=surprise_type,
|
||||||
|
warnings=warnings,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
# Market Analysis (orchestrates edge/kelly/grade per market)
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MarketPick:
|
||||||
|
market: str
|
||||||
|
pick: str
|
||||||
|
probability: float
|
||||||
|
odds: float
|
||||||
|
edge: float
|
||||||
|
playable: bool
|
||||||
|
bet_grade: str
|
||||||
|
stake_units: float
|
||||||
|
play_score: float
|
||||||
|
decision_reasons: list[str]
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_market(
|
||||||
|
market: str,
|
||||||
|
probs: dict[str, float],
|
||||||
|
odds_map: dict[str, float],
|
||||||
|
data_quality_score: float,
|
||||||
|
) -> MarketPick:
|
||||||
|
"""
|
||||||
|
For a given market (MS, OU25, BTTS), find the best pick,
|
||||||
|
calculate edge, kelly stake, and grade it.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
market: "MS", "OU25", "BTTS"
|
||||||
|
probs: {"1": 0.55, "X": 0.25, "2": 0.20} — calibrated model probs
|
||||||
|
odds_map: {"1": 2.10, "X": 3.40, "2": 3.50} — decimal odds
|
||||||
|
data_quality_score: 0.0-1.0
|
||||||
|
"""
|
||||||
|
best_pick: str = ""
|
||||||
|
best_edge: float = -99.0
|
||||||
|
best_prob: float = 0.0
|
||||||
|
best_odds: float = 0.0
|
||||||
|
reasons: list[str] = []
|
||||||
|
|
||||||
|
for pick_name, prob in probs.items():
|
||||||
|
odd = odds_map.get(pick_name, 0.0)
|
||||||
|
if odd <= 1.0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
edge = calculate_edge(prob, odd)
|
||||||
|
if edge > best_edge:
|
||||||
|
best_edge = edge
|
||||||
|
best_pick = pick_name
|
||||||
|
best_prob = prob
|
||||||
|
best_odds = odd
|
||||||
|
|
||||||
|
if not best_pick:
|
||||||
|
return MarketPick(
|
||||||
|
market=market, pick="", probability=0.0, odds=0.0,
|
||||||
|
edge=0.0, playable=False, bet_grade="PASS",
|
||||||
|
stake_units=0.0, play_score=0.0,
|
||||||
|
decision_reasons=["no_valid_odds_found"],
|
||||||
|
)
|
||||||
|
|
||||||
|
playable = is_playable(best_edge, best_odds)
|
||||||
|
grade = grade_bet(best_edge, playable)
|
||||||
|
stake = kelly_stake(best_prob, best_odds) if playable else 0.0
|
||||||
|
play_score = calculate_play_score(best_edge, best_prob, data_quality_score)
|
||||||
|
|
||||||
|
# Build decision reasons
|
||||||
|
if playable:
|
||||||
|
reasons.append(f"edge_{best_edge:.1%}_above_threshold")
|
||||||
|
reasons.append(f"kelly_stake_{stake:.1f}_units")
|
||||||
|
else:
|
||||||
|
if best_edge < MIN_EDGE_PLAYABLE:
|
||||||
|
reasons.append(f"edge_{best_edge:.1%}_below_{MIN_EDGE_PLAYABLE:.0%}_threshold")
|
||||||
|
if best_odds < MIN_ODDS_PLAYABLE:
|
||||||
|
reasons.append(f"odds_{best_odds:.2f}_below_{MIN_ODDS_PLAYABLE:.2f}_minimum")
|
||||||
|
|
||||||
|
return MarketPick(
|
||||||
|
market=market,
|
||||||
|
pick=best_pick,
|
||||||
|
probability=round(best_prob, 4),
|
||||||
|
odds=round(best_odds, 2),
|
||||||
|
edge=round(best_edge, 4),
|
||||||
|
playable=playable,
|
||||||
|
bet_grade=grade,
|
||||||
|
stake_units=stake,
|
||||||
|
play_score=play_score,
|
||||||
|
decision_reasons=reasons,
|
||||||
|
)
|
||||||
Executable
+29
@@ -0,0 +1,29 @@
|
|||||||
|
"""
|
||||||
|
AI Engine V9 Feature Modules
|
||||||
|
Includes V8 features + new V9 engines (Upset, Momentum, Poisson, Context, Referee, Squad)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# V20 Features
|
||||||
|
from .h2h_engine import H2HFeatureEngine, get_h2h_engine
|
||||||
|
from .elo_system import ELORatingSystem, get_elo_system
|
||||||
|
from .value_calculator import ValueCalculator, get_value_calculator
|
||||||
|
from .team_stats_engine import get_team_stats_engine
|
||||||
|
from .upset_engine import UpsetEngine, get_upset_engine
|
||||||
|
from .momentum_engine import MomentumEngine, get_momentum_engine
|
||||||
|
from .poisson_engine import PoissonEngine, get_poisson_engine
|
||||||
|
from .referee_engine import RefereeEngine, get_referee_engine
|
||||||
|
from .squad_analysis_engine import SquadAnalysisEngine, get_squad_analysis_engine
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'H2HFeatureEngine', 'get_h2h_engine',
|
||||||
|
'ELORatingSystem', 'get_elo_system',
|
||||||
|
'ValueCalculator', 'get_value_calculator',
|
||||||
|
'get_team_stats_engine',
|
||||||
|
'UpsetEngine', 'get_upset_engine',
|
||||||
|
'MomentumEngine', 'get_momentum_engine',
|
||||||
|
'PoissonEngine', 'get_poisson_engine',
|
||||||
|
'RefereeEngine', 'get_referee_engine',
|
||||||
|
'SquadAnalysisEngine', 'get_squad_analysis_engine',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
Executable
+655
@@ -0,0 +1,655 @@
|
|||||||
|
"""
|
||||||
|
ELO Rating System V2 - Venue-Adjusted & League-Weighted
|
||||||
|
V9 Model için geliştirilmiş ELO sistemi.
|
||||||
|
|
||||||
|
V1'den Farklar:
|
||||||
|
- Lig kalitesi faktörü (Premier League vs küçük lig)
|
||||||
|
- Form decay (son maçlar daha etkili)
|
||||||
|
- Venue-adjusted ELO (ev/deplasman ayrı)
|
||||||
|
- Win probability hesaplama
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from typing import Dict, Optional, Tuple
|
||||||
|
from dataclasses import dataclass, asdict, field
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
try:
|
||||||
|
import psycopg2
|
||||||
|
except ImportError:
|
||||||
|
psycopg2 = None
|
||||||
|
|
||||||
|
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'models')
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TeamELO:
|
||||||
|
"""Takım ELO profili - Geliştirilmiş"""
|
||||||
|
team_id: str
|
||||||
|
team_name: str = ""
|
||||||
|
|
||||||
|
# Ana ELO'lar
|
||||||
|
overall_elo: float = 1500.0
|
||||||
|
home_elo: float = 1500.0
|
||||||
|
away_elo: float = 1500.0
|
||||||
|
|
||||||
|
# Form ELO (son 5 maça göre)
|
||||||
|
form_elo: float = 1500.0
|
||||||
|
|
||||||
|
# Meta
|
||||||
|
matches_played: int = 0
|
||||||
|
home_matches: int = 0
|
||||||
|
away_matches: int = 0
|
||||||
|
wins: int = 0
|
||||||
|
draws: int = 0
|
||||||
|
losses: int = 0
|
||||||
|
last_updated: Optional[str] = None
|
||||||
|
|
||||||
|
# Son 5 maç formu (W/D/L sequence)
|
||||||
|
recent_form: str = ""
|
||||||
|
|
||||||
|
def win_rate(self) -> float:
|
||||||
|
if self.matches_played == 0:
|
||||||
|
return 0.0
|
||||||
|
return self.wins / self.matches_played
|
||||||
|
|
||||||
|
def to_features(self) -> Dict[str, float]:
|
||||||
|
return {
|
||||||
|
'elo_overall': self.overall_elo,
|
||||||
|
'elo_home': self.home_elo,
|
||||||
|
'elo_away': self.away_elo,
|
||||||
|
'elo_form': self.form_elo,
|
||||||
|
'elo_matches': self.matches_played,
|
||||||
|
'elo_win_rate': self.win_rate(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Lig kalitesi faktörleri (1.0 = ortalama)
|
||||||
|
LEAGUE_QUALITY = {
|
||||||
|
# Top 5 Avrupa Ligleri
|
||||||
|
"premier league": 1.15,
|
||||||
|
"premier lig": 1.15,
|
||||||
|
"la liga": 1.12,
|
||||||
|
"bundesliga": 1.10,
|
||||||
|
"serie a": 1.08,
|
||||||
|
"ligue 1": 1.05,
|
||||||
|
|
||||||
|
# Güçlü ligler
|
||||||
|
"eredivisie": 1.02,
|
||||||
|
"primeira liga": 1.02,
|
||||||
|
"süper lig": 1.00,
|
||||||
|
|
||||||
|
# Avrupa kupaları
|
||||||
|
"champions league": 1.20,
|
||||||
|
"şampiyonlar ligi": 1.20,
|
||||||
|
"europa league": 1.10,
|
||||||
|
"avrupa ligi": 1.10,
|
||||||
|
"conference league": 1.00,
|
||||||
|
|
||||||
|
# Orta ligler
|
||||||
|
"championship": 0.95,
|
||||||
|
"2. bundesliga": 0.92,
|
||||||
|
"serie b": 0.90,
|
||||||
|
"la liga 2": 0.90,
|
||||||
|
|
||||||
|
# Küçük ligler
|
||||||
|
"default": 0.85,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ELORatingSystem:
|
||||||
|
"""
|
||||||
|
ELO Rating System V2 - Venue-Adjusted & League-Weighted
|
||||||
|
|
||||||
|
Yenilikler:
|
||||||
|
- Ev/Deplasman ayrı ELO takibi
|
||||||
|
- Lig kalitesi faktörü
|
||||||
|
- Form ELO (son 5 maç ağırlıklı)
|
||||||
|
- Gol farkına göre K-faktör ayarı
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ELO parametreleri
|
||||||
|
K_FACTOR_BASE = 32 # Temel K faktörü
|
||||||
|
K_FACTOR_NEW_TEAM = 48 # Yeni takımlar için daha yüksek (ilk 20 maç)
|
||||||
|
HOME_ADVANTAGE = 65 # Ev sahibi avantajı (ELO cinsinden)
|
||||||
|
INITIAL_ELO = 1500
|
||||||
|
FORM_WEIGHT = 0.7 # Form ELO için son maç ağırlığı
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.ratings: Dict[str, TeamELO] = {}
|
||||||
|
self.league_cache: Dict[str, str] = {} # team_id -> league_name
|
||||||
|
self.conn = None
|
||||||
|
self._load_ratings()
|
||||||
|
|
||||||
|
def _connect_db(self):
|
||||||
|
if psycopg2 is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
return self.conn
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ELO] DB connection failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_conn(self):
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
self._connect_db()
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def _load_ratings(self):
|
||||||
|
"""Rating'leri yükle — önce DB, sonra JSON fallback"""
|
||||||
|
if self._load_ratings_from_db():
|
||||||
|
return
|
||||||
|
self._load_ratings_from_json()
|
||||||
|
|
||||||
|
def _load_ratings_from_db(self) -> bool:
|
||||||
|
"""team_elo_ratings tablosundan rating'leri yükle"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("""
|
||||||
|
SELECT ter.team_id, t.name,
|
||||||
|
ter.overall_elo, ter.home_elo, ter.away_elo,
|
||||||
|
ter.form_elo, ter.matches_played, ter.recent_form
|
||||||
|
FROM team_elo_ratings ter
|
||||||
|
LEFT JOIN teams t ON ter.team_id = t.id
|
||||||
|
""")
|
||||||
|
rows = cur.fetchall()
|
||||||
|
cur.close()
|
||||||
|
if not rows:
|
||||||
|
return False
|
||||||
|
for row in rows:
|
||||||
|
tid, name, overall, home, away, form, played, recent = row
|
||||||
|
self.ratings[str(tid)] = TeamELO(
|
||||||
|
team_id=str(tid),
|
||||||
|
team_name=name or "",
|
||||||
|
overall_elo=float(overall),
|
||||||
|
home_elo=float(home),
|
||||||
|
away_elo=float(away),
|
||||||
|
form_elo=float(form),
|
||||||
|
matches_played=int(played),
|
||||||
|
recent_form=recent or [],
|
||||||
|
)
|
||||||
|
print(f"[OK] ELO V2 ratings DB'den yuklendi ({len(self.ratings)} takim)")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] ELO DB yuklenemedi, JSON'a dusuyuyor: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _load_ratings_from_json(self):
|
||||||
|
"""JSON dosyasından rating'leri yükle (fallback)"""
|
||||||
|
ratings_path = os.path.join(MODELS_DIR, 'elo_ratings_v2.json')
|
||||||
|
if os.path.exists(ratings_path):
|
||||||
|
try:
|
||||||
|
with open(ratings_path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
for team_id, rating_data in data.items():
|
||||||
|
self.ratings[team_id] = TeamELO(**rating_data)
|
||||||
|
print(f"[OK] ELO V2 ratings JSON'dan yuklendi ({len(self.ratings)} takim)")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] ELO V2 ratings yuklenemedi: {e}")
|
||||||
|
|
||||||
|
def save_ratings(self):
|
||||||
|
"""Rating'leri kaydet"""
|
||||||
|
ratings_path = os.path.join(MODELS_DIR, 'elo_ratings_v2.json')
|
||||||
|
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
data = {team_id: asdict(elo) for team_id, elo in self.ratings.items()}
|
||||||
|
with open(ratings_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||||
|
print(f"💾 ELO V2 ratings kaydedildi ({len(self.ratings)} takım)")
|
||||||
|
|
||||||
|
def get_or_create_rating(self, team_id: str, team_name: str = "") -> TeamELO:
|
||||||
|
"""Takımın ELO'sunu getir veya oluştur"""
|
||||||
|
if team_id not in self.ratings:
|
||||||
|
self.ratings[team_id] = TeamELO(team_id=team_id, team_name=team_name)
|
||||||
|
return self.ratings[team_id]
|
||||||
|
|
||||||
|
def get_league_quality(self, league_name: str) -> float:
|
||||||
|
"""Lig kalitesi faktörünü döndür"""
|
||||||
|
if not league_name:
|
||||||
|
return LEAGUE_QUALITY["default"]
|
||||||
|
|
||||||
|
league_lower = league_name.lower()
|
||||||
|
for key, quality in LEAGUE_QUALITY.items():
|
||||||
|
if key in league_lower:
|
||||||
|
return quality
|
||||||
|
return LEAGUE_QUALITY["default"]
|
||||||
|
|
||||||
|
def expected_score(self, rating_a: float, rating_b: float) -> float:
|
||||||
|
"""
|
||||||
|
A'nın B'ye karşı beklenen skoru (0-1 arası).
|
||||||
|
1 = kesin kazanır, 0.5 = eşit, 0 = kesin kaybeder
|
||||||
|
"""
|
||||||
|
return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
|
||||||
|
|
||||||
|
def get_k_factor(self, team_elo: TeamELO, goal_diff: int,
|
||||||
|
league_quality: float = 1.0) -> float:
|
||||||
|
"""
|
||||||
|
Dinamik K-faktörü hesapla.
|
||||||
|
- Yeni takımlar için yüksek (hızlı adaptasyon)
|
||||||
|
- Gol farkı yüksekse yüksek
|
||||||
|
- Kaliteli liglerde yüksek
|
||||||
|
"""
|
||||||
|
# Temel K
|
||||||
|
if team_elo.matches_played < 20:
|
||||||
|
k = self.K_FACTOR_NEW_TEAM
|
||||||
|
else:
|
||||||
|
k = self.K_FACTOR_BASE
|
||||||
|
|
||||||
|
# Gol farkı çarpanı
|
||||||
|
if goal_diff == 1:
|
||||||
|
goal_mult = 1.0
|
||||||
|
elif goal_diff == 2:
|
||||||
|
goal_mult = 1.25
|
||||||
|
elif goal_diff == 3:
|
||||||
|
goal_mult = 1.5
|
||||||
|
else:
|
||||||
|
goal_mult = 1.75 + (goal_diff - 3) * 0.1
|
||||||
|
|
||||||
|
# Lig kalitesi çarpanı
|
||||||
|
return k * goal_mult * league_quality
|
||||||
|
|
||||||
|
def update_after_match(
|
||||||
|
self,
|
||||||
|
home_id: str,
|
||||||
|
away_id: str,
|
||||||
|
home_goals: int,
|
||||||
|
away_goals: int,
|
||||||
|
home_name: str = "",
|
||||||
|
away_name: str = "",
|
||||||
|
league_name: str = ""
|
||||||
|
):
|
||||||
|
"""Maç sonrası ELO güncelle"""
|
||||||
|
home_elo = self.get_or_create_rating(home_id, home_name)
|
||||||
|
away_elo = self.get_or_create_rating(away_id, away_name)
|
||||||
|
|
||||||
|
# Gerçek skor
|
||||||
|
if home_goals > away_goals:
|
||||||
|
actual_home, actual_away = 1.0, 0.0
|
||||||
|
home_elo.wins += 1
|
||||||
|
away_elo.losses += 1
|
||||||
|
result_home, result_away = 'W', 'L'
|
||||||
|
elif home_goals < away_goals:
|
||||||
|
actual_home, actual_away = 0.0, 1.0
|
||||||
|
home_elo.losses += 1
|
||||||
|
away_elo.wins += 1
|
||||||
|
result_home, result_away = 'L', 'W'
|
||||||
|
else:
|
||||||
|
actual_home, actual_away = 0.5, 0.5
|
||||||
|
home_elo.draws += 1
|
||||||
|
away_elo.draws += 1
|
||||||
|
result_home, result_away = 'D', 'D'
|
||||||
|
|
||||||
|
goal_diff = abs(home_goals - away_goals)
|
||||||
|
league_quality = self.get_league_quality(league_name)
|
||||||
|
|
||||||
|
# K faktörleri
|
||||||
|
k_home = self.get_k_factor(home_elo, goal_diff, league_quality)
|
||||||
|
k_away = self.get_k_factor(away_elo, goal_diff, league_quality)
|
||||||
|
|
||||||
|
# -- Overall ELO --
|
||||||
|
expected_home = self.expected_score(
|
||||||
|
home_elo.overall_elo + self.HOME_ADVANTAGE,
|
||||||
|
away_elo.overall_elo
|
||||||
|
)
|
||||||
|
home_elo.overall_elo += k_home * (actual_home - expected_home)
|
||||||
|
away_elo.overall_elo += k_away * (actual_away - (1 - expected_home))
|
||||||
|
|
||||||
|
# -- Venue-Specific ELO --
|
||||||
|
expected_home_venue = self.expected_score(home_elo.home_elo, away_elo.away_elo)
|
||||||
|
home_elo.home_elo += k_home * (actual_home - expected_home_venue)
|
||||||
|
away_elo.away_elo += k_away * (actual_away - (1 - expected_home_venue))
|
||||||
|
|
||||||
|
# -- Form ELO (son maçlar daha ağırlıklı) --
|
||||||
|
home_elo.form_elo = (
|
||||||
|
home_elo.form_elo * (1 - self.FORM_WEIGHT) +
|
||||||
|
(1500 + (actual_home - 0.5) * 100) * self.FORM_WEIGHT
|
||||||
|
)
|
||||||
|
away_elo.form_elo = (
|
||||||
|
away_elo.form_elo * (1 - self.FORM_WEIGHT) +
|
||||||
|
(1500 + (actual_away - 0.5) * 100) * self.FORM_WEIGHT
|
||||||
|
)
|
||||||
|
|
||||||
|
# Meta güncelle
|
||||||
|
home_elo.matches_played += 1
|
||||||
|
away_elo.matches_played += 1
|
||||||
|
home_elo.home_matches += 1
|
||||||
|
away_elo.away_matches += 1
|
||||||
|
|
||||||
|
# Son 5 form güncelle
|
||||||
|
home_elo.recent_form = (result_home + home_elo.recent_form)[:5]
|
||||||
|
away_elo.recent_form = (result_away + away_elo.recent_form)[:5]
|
||||||
|
|
||||||
|
home_elo.last_updated = datetime.now().isoformat()
|
||||||
|
away_elo.last_updated = datetime.now().isoformat()
|
||||||
|
|
||||||
|
def predict_match(self, home_id: str, away_id: str) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Maç için kazanma olasılıklarını tahmin et.
|
||||||
|
"""
|
||||||
|
home_elo = self.get_or_create_rating(home_id)
|
||||||
|
away_elo = self.get_or_create_rating(away_id)
|
||||||
|
|
||||||
|
# Overall bazlı
|
||||||
|
exp_home_overall = self.expected_score(
|
||||||
|
home_elo.overall_elo + self.HOME_ADVANTAGE,
|
||||||
|
away_elo.overall_elo
|
||||||
|
)
|
||||||
|
|
||||||
|
# Venue bazlı
|
||||||
|
exp_home_venue = self.expected_score(
|
||||||
|
home_elo.home_elo,
|
||||||
|
away_elo.away_elo
|
||||||
|
)
|
||||||
|
|
||||||
|
# Kombine (ortama)
|
||||||
|
home_prob = (exp_home_overall + exp_home_venue) / 2
|
||||||
|
|
||||||
|
# Draw tahmini (ELO farkı küçükse daha yüksek)
|
||||||
|
elo_diff = abs(home_elo.overall_elo - away_elo.overall_elo)
|
||||||
|
draw_base = 0.25 # Temel beraberlik oranı
|
||||||
|
draw_prob = draw_base * (1 - elo_diff / 800) # Fark arttıkça beraberlik azalır
|
||||||
|
draw_prob = max(0.15, min(draw_prob, 0.35))
|
||||||
|
|
||||||
|
# Normalize
|
||||||
|
remaining = 1 - draw_prob
|
||||||
|
home_win = home_prob * remaining
|
||||||
|
away_win = (1 - home_prob) * remaining
|
||||||
|
|
||||||
|
return {
|
||||||
|
"home_win": round(home_win, 3),
|
||||||
|
"draw": round(draw_prob, 3),
|
||||||
|
"away_win": round(away_win, 3),
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_match_features(self, home_id: str, away_id: str) -> Dict[str, float]:
|
||||||
|
"""Model için ELO feature'larını döndür"""
|
||||||
|
home_elo = self.get_or_create_rating(home_id)
|
||||||
|
away_elo = self.get_or_create_rating(away_id)
|
||||||
|
|
||||||
|
probs = self.predict_match(home_id, away_id)
|
||||||
|
|
||||||
|
# Form encode (WWWDL -> sayısal)
|
||||||
|
def form_to_score(form: str) -> float:
|
||||||
|
if not form:
|
||||||
|
return 0.5
|
||||||
|
score = 0
|
||||||
|
for char in form:
|
||||||
|
if char == 'W':
|
||||||
|
score += 1
|
||||||
|
elif char == 'D':
|
||||||
|
score += 0.5
|
||||||
|
return score / max(len(form), 1)
|
||||||
|
|
||||||
|
return {
|
||||||
|
# Overall ELO
|
||||||
|
'elo_home_overall': home_elo.overall_elo,
|
||||||
|
'elo_away_overall': away_elo.overall_elo,
|
||||||
|
'elo_diff_overall': home_elo.overall_elo - away_elo.overall_elo,
|
||||||
|
|
||||||
|
# Venue-Specific ELO
|
||||||
|
'elo_home_venue': home_elo.home_elo,
|
||||||
|
'elo_away_venue': away_elo.away_elo,
|
||||||
|
'elo_diff_venue': home_elo.home_elo - away_elo.away_elo,
|
||||||
|
|
||||||
|
# Form ELO
|
||||||
|
'elo_home_form': home_elo.form_elo,
|
||||||
|
'elo_away_form': away_elo.form_elo,
|
||||||
|
'elo_diff_form': home_elo.form_elo - away_elo.form_elo,
|
||||||
|
|
||||||
|
# Win probabilities
|
||||||
|
'elo_prob_home': probs['home_win'],
|
||||||
|
'elo_prob_draw': probs['draw'],
|
||||||
|
'elo_prob_away': probs['away_win'],
|
||||||
|
|
||||||
|
# Experience
|
||||||
|
'elo_home_matches': min(home_elo.matches_played, 100),
|
||||||
|
'elo_away_matches': min(away_elo.matches_played, 100),
|
||||||
|
|
||||||
|
# Form score
|
||||||
|
'elo_home_form_score': form_to_score(home_elo.recent_form),
|
||||||
|
'elo_away_form_score': form_to_score(away_elo.recent_form),
|
||||||
|
|
||||||
|
# Win rates
|
||||||
|
'elo_home_win_rate': home_elo.win_rate(),
|
||||||
|
'elo_away_win_rate': away_elo.win_rate(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def save_ratings_to_db(self):
|
||||||
|
"""Rating'leri team_elo_ratings tablosuna yaz (upsert)"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
print("❌ DB bağlantısı yok, DB'ye yazılamadı!")
|
||||||
|
return
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
batch_size = 500
|
||||||
|
teams = list(self.ratings.values())
|
||||||
|
written = 0
|
||||||
|
|
||||||
|
for i in range(0, len(teams), batch_size):
|
||||||
|
batch = teams[i:i + batch_size]
|
||||||
|
values = []
|
||||||
|
for elo in batch:
|
||||||
|
values.append(cur.mogrify(
|
||||||
|
"(%s, %s, %s, %s, %s, %s, %s, NOW())",
|
||||||
|
(
|
||||||
|
elo.team_id,
|
||||||
|
round(elo.overall_elo, 2),
|
||||||
|
round(elo.home_elo, 2),
|
||||||
|
round(elo.away_elo, 2),
|
||||||
|
round(elo.form_elo, 2),
|
||||||
|
elo.matches_played,
|
||||||
|
elo.recent_form[:5],
|
||||||
|
)
|
||||||
|
).decode('utf-8'))
|
||||||
|
|
||||||
|
sql = """
|
||||||
|
INSERT INTO team_elo_ratings
|
||||||
|
(team_id, overall_elo, home_elo, away_elo, form_elo, matches_played, recent_form, updated_at)
|
||||||
|
VALUES {}
|
||||||
|
ON CONFLICT (team_id) DO UPDATE SET
|
||||||
|
overall_elo = EXCLUDED.overall_elo,
|
||||||
|
home_elo = EXCLUDED.home_elo,
|
||||||
|
away_elo = EXCLUDED.away_elo,
|
||||||
|
form_elo = EXCLUDED.form_elo,
|
||||||
|
matches_played = EXCLUDED.matches_played,
|
||||||
|
recent_form = EXCLUDED.recent_form,
|
||||||
|
updated_at = EXCLUDED.updated_at
|
||||||
|
""".format(", ".join(values))
|
||||||
|
|
||||||
|
cur.execute(sql)
|
||||||
|
written += len(batch)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
cur.close()
|
||||||
|
print(f"💾 DB'ye {written} takım ELO yazıldı (team_elo_ratings)")
|
||||||
|
|
||||||
|
def _load_top_league_ids(self) -> set:
|
||||||
|
"""top_leagues.json'dan lig ID'lerini oku"""
|
||||||
|
paths = [
|
||||||
|
os.path.join(os.path.dirname(__file__), '..', '..', 'top_leagues.json'),
|
||||||
|
os.path.join(os.path.dirname(__file__), '..', 'top_leagues.json'),
|
||||||
|
]
|
||||||
|
for p in paths:
|
||||||
|
if os.path.exists(p):
|
||||||
|
with open(p) as f:
|
||||||
|
ids = set(json.load(f))
|
||||||
|
print(f"📋 {len(ids)} top lig yüklendi ({os.path.basename(p)})")
|
||||||
|
return ids
|
||||||
|
print("⚠️ top_leagues.json bulunamadı — tüm maçlar yazılacak")
|
||||||
|
return set()
|
||||||
|
|
||||||
|
def calculate_all_from_history(self, sport: str = 'football'):
|
||||||
|
"""Tüm tarihsel maçlardan ELO hesapla, top ligleri match_ai_features'a yaz"""
|
||||||
|
print(f"\n🔄 {sport.upper()} için ELO V2 hesaplanıyor...")
|
||||||
|
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
print("❌ DB bağlantısı yok!")
|
||||||
|
return
|
||||||
|
|
||||||
|
top_league_ids = self._load_top_league_ids()
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
# Tüm bitmiş maçları tarih sırasına göre al (m.id ve league_id dahil)
|
||||||
|
cur.execute("""
|
||||||
|
SELECT m.id, m.home_team_id, m.away_team_id,
|
||||||
|
m.score_home, m.score_away, m.league_id,
|
||||||
|
t1.name as home_name, t2.name as away_name,
|
||||||
|
l.name as league_name
|
||||||
|
FROM matches m
|
||||||
|
LEFT JOIN teams t1 ON m.home_team_id = t1.id
|
||||||
|
LEFT JOIN teams t2 ON m.away_team_id = t2.id
|
||||||
|
LEFT JOIN leagues l ON m.league_id = l.id
|
||||||
|
WHERE m.sport = %s
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
ORDER BY m.mst_utc ASC
|
||||||
|
""", (sport,))
|
||||||
|
|
||||||
|
matches = cur.fetchall()
|
||||||
|
print(f"📊 {len(matches):,} maç işlenecek...")
|
||||||
|
|
||||||
|
BATCH_SIZE = 1000
|
||||||
|
batch: list = []
|
||||||
|
processed = 0
|
||||||
|
written = 0
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
(match_id, home_id, away_id, score_h, score_a,
|
||||||
|
league_id, home_name, away_name, league) = match
|
||||||
|
|
||||||
|
if not (home_id and away_id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sadece top ligler için pre-match ELO kaydet
|
||||||
|
if not top_league_ids or league_id in top_league_ids:
|
||||||
|
home_elo_obj = self.get_or_create_rating(home_id, home_name or "")
|
||||||
|
away_elo_obj = self.get_or_create_rating(away_id, away_name or "")
|
||||||
|
batch.append((
|
||||||
|
match_id,
|
||||||
|
home_elo_obj.overall_elo,
|
||||||
|
away_elo_obj.overall_elo,
|
||||||
|
home_elo_obj.home_elo,
|
||||||
|
away_elo_obj.away_elo,
|
||||||
|
home_elo_obj.form_elo,
|
||||||
|
away_elo_obj.form_elo,
|
||||||
|
))
|
||||||
|
|
||||||
|
# Tüm maçlar için ELO güncelle
|
||||||
|
self.update_after_match(
|
||||||
|
home_id, away_id, score_h, score_a,
|
||||||
|
home_name or "", away_name or "", league or ""
|
||||||
|
)
|
||||||
|
processed += 1
|
||||||
|
|
||||||
|
if len(batch) >= BATCH_SIZE:
|
||||||
|
self._flush_elo_batch(cur, batch, sport)
|
||||||
|
conn.commit()
|
||||||
|
written += len(batch)
|
||||||
|
batch.clear()
|
||||||
|
|
||||||
|
if processed % 10000 == 0:
|
||||||
|
print(f" İşlenen: {processed:,} / {len(matches):,}")
|
||||||
|
|
||||||
|
# Kalan batch'i yaz
|
||||||
|
if batch:
|
||||||
|
self._flush_elo_batch(cur, batch, sport)
|
||||||
|
conn.commit()
|
||||||
|
written += len(batch)
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
print(f"✅ {processed:,} maç işlendi, {len(self.ratings)} takım")
|
||||||
|
print(f"📝 {written:,} maç match_ai_features'a yazıldı")
|
||||||
|
|
||||||
|
# JSON'a kaydet
|
||||||
|
self.save_ratings()
|
||||||
|
|
||||||
|
# DB'ye kaydet
|
||||||
|
self.save_ratings_to_db()
|
||||||
|
|
||||||
|
# Top 20 takımı göster
|
||||||
|
self._show_top_teams()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _flush_elo_batch(cur, batch: list, sport: str = 'football') -> None:
|
||||||
|
"""Batch upsert pre-match ELO values into sport-partitioned ai_features table."""
|
||||||
|
from psycopg2.extras import execute_values
|
||||||
|
|
||||||
|
table_name = 'football_ai_features' if sport == 'football' else 'basketball_ai_features'
|
||||||
|
sql = f"""
|
||||||
|
INSERT INTO {table_name}
|
||||||
|
(match_id, home_elo, away_elo,
|
||||||
|
home_home_elo, away_away_elo,
|
||||||
|
home_form_elo, away_form_elo,
|
||||||
|
calculator_ver, updated_at)
|
||||||
|
VALUES %s
|
||||||
|
ON CONFLICT (match_id) DO UPDATE SET
|
||||||
|
home_elo = EXCLUDED.home_elo,
|
||||||
|
away_elo = EXCLUDED.away_elo,
|
||||||
|
home_home_elo = EXCLUDED.home_home_elo,
|
||||||
|
away_away_elo = EXCLUDED.away_away_elo,
|
||||||
|
home_form_elo = EXCLUDED.home_form_elo,
|
||||||
|
away_form_elo = EXCLUDED.away_form_elo,
|
||||||
|
calculator_ver = EXCLUDED.calculator_ver,
|
||||||
|
updated_at = EXCLUDED.updated_at
|
||||||
|
"""
|
||||||
|
now = datetime.now().isoformat()
|
||||||
|
values = [
|
||||||
|
(mid, h_elo, a_elo, hh_elo, aa_elo, hf_elo, af_elo,
|
||||||
|
'elo_v2_backfill', now)
|
||||||
|
for mid, h_elo, a_elo, hh_elo, aa_elo, hf_elo, af_elo in batch
|
||||||
|
]
|
||||||
|
execute_values(cur, sql, values, page_size=500)
|
||||||
|
|
||||||
|
def _show_top_teams(self, n: int = 20):
|
||||||
|
"""En güçlü takımları göster"""
|
||||||
|
sorted_teams = sorted(
|
||||||
|
self.ratings.items(),
|
||||||
|
key=lambda x: x[1].overall_elo,
|
||||||
|
reverse=True
|
||||||
|
)[:n]
|
||||||
|
|
||||||
|
print(f"\n🏆 Top {n} Takım (ELO V2):")
|
||||||
|
for i, (team_id, elo) in enumerate(sorted_teams, 1):
|
||||||
|
name = elo.team_name[:25] if elo.team_name else team_id[:25]
|
||||||
|
print(f" {i:2}. {name:25} → {elo.overall_elo:.0f} (H:{elo.home_elo:.0f} A:{elo.away_elo:.0f})")
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_system = None
|
||||||
|
|
||||||
|
def get_elo_system() -> ELORatingSystem:
|
||||||
|
global _system
|
||||||
|
if _system is None:
|
||||||
|
_system = ELORatingSystem()
|
||||||
|
return _system
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Ensure ai-engine root is on sys.path (for `from data.db import ...`)
|
||||||
|
_AI_ENGINE_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
if str(_AI_ENGINE_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(_AI_ENGINE_ROOT))
|
||||||
|
|
||||||
|
system = get_elo_system()
|
||||||
|
|
||||||
|
if len(sys.argv) > 1 and sys.argv[1] == 'calculate':
|
||||||
|
system.calculate_all_from_history('football')
|
||||||
|
else:
|
||||||
|
print("\n🧪 ELO V2 Test")
|
||||||
|
print("Kullanım: python elo_system.py calculate")
|
||||||
|
print(f"\n📊 Yüklü takım sayısı: {len(system.ratings)}")
|
||||||
|
|
||||||
|
if len(system.ratings) > 0:
|
||||||
|
system._show_top_teams(10)
|
||||||
@@ -0,0 +1,990 @@
|
|||||||
|
"""
|
||||||
|
Feature Extractor - V2 Betting Engine
|
||||||
|
Pulls historical team stats, ELO, missing-player impact and live odds from
|
||||||
|
PostgreSQL and engineers a leakage-free feature vector for the ensemble model.
|
||||||
|
|
||||||
|
CRITICAL: Only pre-match data (matches before the target match) is used.
|
||||||
|
Post-match stats of the target match are NEVER included.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from sqlalchemy import text
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
ROLLING_WINDOW: int = 5
|
||||||
|
H2H_WINDOW: int = 10
|
||||||
|
MAX_REST_DAYS: float = 14.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MatchFeatures:
|
||||||
|
"""Structured feature vector ready for the ensemble model."""
|
||||||
|
|
||||||
|
match_id: str = ""
|
||||||
|
home_team_id: str = ""
|
||||||
|
away_team_id: str = ""
|
||||||
|
|
||||||
|
# ELO & AI features
|
||||||
|
home_elo: float = 1500.0
|
||||||
|
away_elo: float = 1500.0
|
||||||
|
elo_diff: float = 0.0
|
||||||
|
missing_players_impact: float = 0.0
|
||||||
|
home_form_score: float = 0.0
|
||||||
|
away_form_score: float = 0.0
|
||||||
|
h2h_home_win_rate: float = 0.5
|
||||||
|
h2h_sample_size: int = 0
|
||||||
|
home_rest_days: float = 7.0
|
||||||
|
away_rest_days: float = 7.0
|
||||||
|
rest_diff: float = 0.0
|
||||||
|
home_lineup_availability: float = 1.0
|
||||||
|
away_lineup_availability: float = 1.0
|
||||||
|
|
||||||
|
# Rolling averages - Home (last 5 matches)
|
||||||
|
home_avg_possession: float = 50.0
|
||||||
|
home_avg_shots_on_target: float = 4.0
|
||||||
|
home_avg_total_shots: float = 10.0
|
||||||
|
home_avg_goals_scored: float = 1.3
|
||||||
|
home_avg_goals_conceded: float = 1.1
|
||||||
|
|
||||||
|
# Rolling averages - Away (last 5 matches)
|
||||||
|
away_avg_possession: float = 50.0
|
||||||
|
away_avg_shots_on_target: float = 4.0
|
||||||
|
away_avg_total_shots: float = 10.0
|
||||||
|
away_avg_goals_scored: float = 1.3
|
||||||
|
away_avg_goals_conceded: float = 1.1
|
||||||
|
|
||||||
|
# Implied probabilities from bookmaker odds
|
||||||
|
implied_prob_home: float = 0.33
|
||||||
|
implied_prob_draw: float = 0.33
|
||||||
|
implied_prob_away: float = 0.33
|
||||||
|
implied_prob_over25: float = 0.50
|
||||||
|
implied_prob_under25: float = 0.50
|
||||||
|
implied_prob_btts_yes: float = 0.50
|
||||||
|
implied_prob_btts_no: float = 0.50
|
||||||
|
|
||||||
|
# Raw decimal odds (for Edge/Kelly calculations downstream)
|
||||||
|
odds_home: float = 2.50
|
||||||
|
odds_draw: float = 3.20
|
||||||
|
odds_away: float = 2.80
|
||||||
|
odds_over25: float = 1.90
|
||||||
|
odds_under25: float = 1.90
|
||||||
|
odds_btts_yes: float = 1.85
|
||||||
|
odds_btts_no: float = 1.95
|
||||||
|
|
||||||
|
# Data quality
|
||||||
|
data_quality_score: float = 0.5
|
||||||
|
data_quality_flags: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Metadata
|
||||||
|
match_name: str = ""
|
||||||
|
home_team_name: str = ""
|
||||||
|
away_team_name: str = ""
|
||||||
|
league_id: str = ""
|
||||||
|
league_name: str = ""
|
||||||
|
referee_name: str = ""
|
||||||
|
match_date_ms: int = 0
|
||||||
|
league_avg_goals: float = 2.6
|
||||||
|
referee_avg_goals: float = 2.6
|
||||||
|
referee_home_bias: float = 0.0
|
||||||
|
home_squad_strength: float = 0.5
|
||||||
|
away_squad_strength: float = 0.5
|
||||||
|
home_key_players: float = 0.0
|
||||||
|
away_key_players: float = 0.0
|
||||||
|
|
||||||
|
def to_model_array(self) -> np.ndarray:
|
||||||
|
"""Return the 24-feature vector the ensemble expects."""
|
||||||
|
return np.array(
|
||||||
|
[
|
||||||
|
self.home_elo,
|
||||||
|
self.away_elo,
|
||||||
|
self.elo_diff,
|
||||||
|
self.missing_players_impact,
|
||||||
|
self.home_avg_possession,
|
||||||
|
self.home_avg_shots_on_target,
|
||||||
|
self.home_avg_total_shots,
|
||||||
|
self.home_avg_goals_scored,
|
||||||
|
self.home_avg_goals_conceded,
|
||||||
|
self.away_avg_possession,
|
||||||
|
self.away_avg_shots_on_target,
|
||||||
|
self.away_avg_total_shots,
|
||||||
|
self.away_avg_goals_scored,
|
||||||
|
self.away_avg_goals_conceded,
|
||||||
|
self.implied_prob_home,
|
||||||
|
self.implied_prob_draw,
|
||||||
|
self.implied_prob_away,
|
||||||
|
self.implied_prob_over25,
|
||||||
|
self.implied_prob_under25,
|
||||||
|
self.implied_prob_btts_yes,
|
||||||
|
self.implied_prob_btts_no,
|
||||||
|
self.odds_home,
|
||||||
|
self.odds_draw,
|
||||||
|
self.odds_away,
|
||||||
|
],
|
||||||
|
dtype=np.float64,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def feature_names() -> list[str]:
|
||||||
|
return [
|
||||||
|
"home_elo", "away_elo", "elo_diff", "missing_players_impact",
|
||||||
|
"home_avg_possession", "home_avg_shots_on_target",
|
||||||
|
"home_avg_total_shots", "home_avg_goals_scored",
|
||||||
|
"home_avg_goals_conceded",
|
||||||
|
"away_avg_possession", "away_avg_shots_on_target",
|
||||||
|
"away_avg_total_shots", "away_avg_goals_scored",
|
||||||
|
"away_avg_goals_conceded",
|
||||||
|
"implied_prob_home", "implied_prob_draw", "implied_prob_away",
|
||||||
|
"implied_prob_over25", "implied_prob_under25",
|
||||||
|
"implied_prob_btts_yes", "implied_prob_btts_no",
|
||||||
|
"odds_home", "odds_draw", "odds_away",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_features(session: AsyncSession, match_id: str) -> MatchFeatures | None:
|
||||||
|
"""Master extraction pipeline."""
|
||||||
|
feats = MatchFeatures(match_id=match_id)
|
||||||
|
flags: list[str] = []
|
||||||
|
|
||||||
|
match_row = await _load_match_header(session, match_id)
|
||||||
|
if match_row is None:
|
||||||
|
logger.warning("Match %s not found in live_matches or matches.", match_id)
|
||||||
|
return None
|
||||||
|
|
||||||
|
feats.home_team_id = match_row["home_team_id"] or ""
|
||||||
|
feats.away_team_id = match_row["away_team_id"] or ""
|
||||||
|
feats.match_name = match_row.get("match_name", "") or ""
|
||||||
|
feats.match_date_ms = int(match_row.get("mst_utc", 0) or 0)
|
||||||
|
feats.home_team_name = match_row.get("home_name", "") or ""
|
||||||
|
feats.away_team_name = match_row.get("away_name", "") or ""
|
||||||
|
feats.league_id = match_row.get("league_id", "") or ""
|
||||||
|
feats.league_name = match_row.get("league_name", "") or ""
|
||||||
|
feats.referee_name = match_row.get("referee_name", "") or ""
|
||||||
|
|
||||||
|
if not feats.home_team_id or not feats.away_team_id:
|
||||||
|
logger.warning("Match %s missing team IDs.", match_id)
|
||||||
|
flags.append("missing_team_ids")
|
||||||
|
feats.data_quality_flags = flags
|
||||||
|
feats.data_quality_score = 0.1
|
||||||
|
return feats
|
||||||
|
|
||||||
|
ai_row = await _load_ai_features(session, match_id)
|
||||||
|
if ai_row:
|
||||||
|
feats.home_elo = float(ai_row["home_elo"] or 1500.0)
|
||||||
|
feats.away_elo = float(ai_row["away_elo"] or 1500.0)
|
||||||
|
feats.missing_players_impact = float(ai_row["missing_players_impact"] or 0.0)
|
||||||
|
feats.home_form_score = float(ai_row["home_form_score"] or 0.0)
|
||||||
|
feats.away_form_score = float(ai_row["away_form_score"] or 0.0)
|
||||||
|
if ai_row.get("h2h_home_win_rate") is not None:
|
||||||
|
feats.h2h_home_win_rate = float(ai_row["h2h_home_win_rate"])
|
||||||
|
feats.h2h_sample_size = int(ai_row.get("h2h_total") or 0)
|
||||||
|
else:
|
||||||
|
flags.append("missing_ai_features")
|
||||||
|
|
||||||
|
feats.elo_diff = feats.home_elo - feats.away_elo
|
||||||
|
|
||||||
|
home_rolling = await _rolling_team_stats(
|
||||||
|
session, feats.home_team_id, feats.match_date_ms,
|
||||||
|
)
|
||||||
|
away_rolling = await _rolling_team_stats(
|
||||||
|
session, feats.away_team_id, feats.match_date_ms,
|
||||||
|
)
|
||||||
|
|
||||||
|
if home_rolling is not None:
|
||||||
|
feats.home_avg_possession = home_rolling["avg_possession"]
|
||||||
|
feats.home_avg_shots_on_target = home_rolling["avg_shots_on_target"]
|
||||||
|
feats.home_avg_total_shots = home_rolling["avg_total_shots"]
|
||||||
|
feats.home_avg_goals_scored = home_rolling["avg_goals_scored"]
|
||||||
|
feats.home_avg_goals_conceded = home_rolling["avg_goals_conceded"]
|
||||||
|
else:
|
||||||
|
flags.append("missing_home_stats")
|
||||||
|
|
||||||
|
if away_rolling is not None:
|
||||||
|
feats.away_avg_possession = away_rolling["avg_possession"]
|
||||||
|
feats.away_avg_shots_on_target = away_rolling["avg_shots_on_target"]
|
||||||
|
feats.away_avg_total_shots = away_rolling["avg_total_shots"]
|
||||||
|
feats.away_avg_goals_scored = away_rolling["avg_goals_scored"]
|
||||||
|
feats.away_avg_goals_conceded = away_rolling["avg_goals_conceded"]
|
||||||
|
else:
|
||||||
|
flags.append("missing_away_stats")
|
||||||
|
|
||||||
|
if abs(feats.home_form_score) < 1e-6:
|
||||||
|
feats.home_form_score = round(
|
||||||
|
feats.home_avg_goals_scored - feats.home_avg_goals_conceded,
|
||||||
|
3,
|
||||||
|
)
|
||||||
|
if abs(feats.away_form_score) < 1e-6:
|
||||||
|
feats.away_form_score = round(
|
||||||
|
feats.away_avg_goals_scored - feats.away_avg_goals_conceded,
|
||||||
|
3,
|
||||||
|
)
|
||||||
|
|
||||||
|
home_rest_days = await _load_rest_days(
|
||||||
|
session, feats.home_team_id, feats.match_date_ms,
|
||||||
|
)
|
||||||
|
away_rest_days = await _load_rest_days(
|
||||||
|
session, feats.away_team_id, feats.match_date_ms,
|
||||||
|
)
|
||||||
|
if home_rest_days is not None:
|
||||||
|
feats.home_rest_days = home_rest_days
|
||||||
|
else:
|
||||||
|
flags.append("missing_home_rest")
|
||||||
|
if away_rest_days is not None:
|
||||||
|
feats.away_rest_days = away_rest_days
|
||||||
|
else:
|
||||||
|
flags.append("missing_away_rest")
|
||||||
|
feats.rest_diff = round(feats.home_rest_days - feats.away_rest_days, 3)
|
||||||
|
|
||||||
|
if feats.h2h_sample_size == 0:
|
||||||
|
h2h = await _load_h2h_stats(
|
||||||
|
session,
|
||||||
|
feats.home_team_id,
|
||||||
|
feats.away_team_id,
|
||||||
|
feats.match_date_ms,
|
||||||
|
)
|
||||||
|
if h2h is not None:
|
||||||
|
feats.h2h_home_win_rate = h2h["home_win_rate"]
|
||||||
|
feats.h2h_sample_size = h2h["sample_size"]
|
||||||
|
else:
|
||||||
|
flags.append("missing_h2h")
|
||||||
|
|
||||||
|
league_profile = await _load_league_profile(
|
||||||
|
session,
|
||||||
|
feats.league_id,
|
||||||
|
feats.match_date_ms,
|
||||||
|
)
|
||||||
|
if league_profile is not None:
|
||||||
|
feats.league_avg_goals = league_profile["avg_goals"]
|
||||||
|
else:
|
||||||
|
flags.append("missing_league_profile")
|
||||||
|
|
||||||
|
referee_profile = await _load_referee_profile(
|
||||||
|
session,
|
||||||
|
feats.referee_name,
|
||||||
|
feats.match_date_ms,
|
||||||
|
)
|
||||||
|
if referee_profile is not None:
|
||||||
|
feats.referee_avg_goals = referee_profile["avg_goals"]
|
||||||
|
feats.referee_home_bias = referee_profile["home_bias"]
|
||||||
|
else:
|
||||||
|
flags.append("missing_referee_profile")
|
||||||
|
|
||||||
|
home_squad = await _load_team_squad_profile(
|
||||||
|
session,
|
||||||
|
feats.home_team_id,
|
||||||
|
feats.match_date_ms,
|
||||||
|
)
|
||||||
|
away_squad = await _load_team_squad_profile(
|
||||||
|
session,
|
||||||
|
feats.away_team_id,
|
||||||
|
feats.match_date_ms,
|
||||||
|
)
|
||||||
|
if home_squad is not None:
|
||||||
|
feats.home_squad_strength = home_squad["squad_strength"]
|
||||||
|
feats.home_key_players = home_squad["key_players"]
|
||||||
|
else:
|
||||||
|
flags.append("missing_home_squad_profile")
|
||||||
|
if away_squad is not None:
|
||||||
|
feats.away_squad_strength = away_squad["squad_strength"]
|
||||||
|
feats.away_key_players = away_squad["key_players"]
|
||||||
|
else:
|
||||||
|
flags.append("missing_away_squad_profile")
|
||||||
|
|
||||||
|
lineup_info = _extract_lineup_context(match_row)
|
||||||
|
feats.home_lineup_availability = lineup_info["home_availability"]
|
||||||
|
feats.away_lineup_availability = lineup_info["away_availability"]
|
||||||
|
if lineup_info["has_real_lineup_data"]:
|
||||||
|
feats.missing_players_impact = max(
|
||||||
|
feats.missing_players_impact,
|
||||||
|
round(
|
||||||
|
(
|
||||||
|
(1.0 - feats.home_lineup_availability)
|
||||||
|
+ (1.0 - feats.away_lineup_availability)
|
||||||
|
) / 2.0,
|
||||||
|
4,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
flags.append("missing_lineup_context")
|
||||||
|
|
||||||
|
odds_ok = await _extract_odds(session, match_id, feats)
|
||||||
|
if not odds_ok:
|
||||||
|
flags.append("missing_odds")
|
||||||
|
|
||||||
|
quality = 1.0
|
||||||
|
penalty_map = {
|
||||||
|
"missing_team_ids": 0.5,
|
||||||
|
"missing_ai_features": 0.05,
|
||||||
|
"missing_home_stats": 0.15,
|
||||||
|
"missing_away_stats": 0.15,
|
||||||
|
"missing_home_rest": 0.05,
|
||||||
|
"missing_away_rest": 0.05,
|
||||||
|
"missing_h2h": 0.05,
|
||||||
|
"missing_league_profile": 0.04,
|
||||||
|
"missing_referee_profile": 0.04,
|
||||||
|
"missing_home_squad_profile": 0.06,
|
||||||
|
"missing_away_squad_profile": 0.06,
|
||||||
|
"missing_lineup_context": 0.05,
|
||||||
|
"missing_odds": 0.2,
|
||||||
|
}
|
||||||
|
for flag in flags:
|
||||||
|
quality -= penalty_map.get(flag, 0.05)
|
||||||
|
feats.data_quality_score = max(0.0, round(quality, 2))
|
||||||
|
feats.data_quality_flags = flags
|
||||||
|
|
||||||
|
return feats
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_match_header(
|
||||||
|
session: AsyncSession, match_id: str,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Try live_matches first, then matches table."""
|
||||||
|
table_queries = {
|
||||||
|
"live_matches": """
|
||||||
|
SELECT
|
||||||
|
m.id,
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.match_name,
|
||||||
|
m.mst_utc,
|
||||||
|
m.sport,
|
||||||
|
m.league_id,
|
||||||
|
m.referee_name,
|
||||||
|
m.lineups,
|
||||||
|
m.sidelined,
|
||||||
|
ht.name AS home_name,
|
||||||
|
at.name AS away_name,
|
||||||
|
l.name AS league_name
|
||||||
|
FROM live_matches m
|
||||||
|
LEFT JOIN teams ht ON ht.id = m.home_team_id
|
||||||
|
LEFT JOIN teams at ON at.id = m.away_team_id
|
||||||
|
LEFT JOIN leagues l ON l.id = m.league_id
|
||||||
|
WHERE m.id = :match_id
|
||||||
|
LIMIT 1
|
||||||
|
""",
|
||||||
|
"matches": """
|
||||||
|
SELECT
|
||||||
|
m.id,
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.match_name,
|
||||||
|
m.mst_utc,
|
||||||
|
m.sport,
|
||||||
|
m.league_id,
|
||||||
|
ref.name AS referee_name,
|
||||||
|
NULL AS lineups,
|
||||||
|
NULL AS sidelined,
|
||||||
|
ht.name AS home_name,
|
||||||
|
at.name AS away_name,
|
||||||
|
l.name AS league_name
|
||||||
|
FROM matches m
|
||||||
|
LEFT JOIN teams ht ON ht.id = m.home_team_id
|
||||||
|
LEFT JOIN teams at ON at.id = m.away_team_id
|
||||||
|
LEFT JOIN leagues l ON l.id = m.league_id
|
||||||
|
LEFT JOIN match_officials ref ON ref.match_id = m.id AND ref.role_id = 1
|
||||||
|
WHERE m.id = :match_id
|
||||||
|
LIMIT 1
|
||||||
|
""",
|
||||||
|
}
|
||||||
|
for table in ("live_matches", "matches"):
|
||||||
|
query = text(table_queries[table])
|
||||||
|
result = await session.execute(query, {"match_id": match_id})
|
||||||
|
row = result.mappings().first()
|
||||||
|
if row:
|
||||||
|
return dict(row)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_ai_features(
|
||||||
|
session: AsyncSession, match_id: str,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
query = text("""
|
||||||
|
SELECT
|
||||||
|
home_elo,
|
||||||
|
away_elo,
|
||||||
|
missing_players_impact,
|
||||||
|
home_form_score,
|
||||||
|
away_form_score,
|
||||||
|
h2h_home_win_rate,
|
||||||
|
h2h_total
|
||||||
|
FROM football_ai_features
|
||||||
|
WHERE match_id = :match_id
|
||||||
|
LIMIT 1
|
||||||
|
""")
|
||||||
|
result = await session.execute(query, {"match_id": match_id})
|
||||||
|
row = result.mappings().first()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
async def _rolling_team_stats(
|
||||||
|
session: AsyncSession,
|
||||||
|
team_id: str,
|
||||||
|
before_mst_utc: int,
|
||||||
|
) -> dict[str, float] | None:
|
||||||
|
"""Calculate rolling averages from the team's last N finished matches."""
|
||||||
|
query = text("""
|
||||||
|
WITH recent AS (
|
||||||
|
SELECT
|
||||||
|
m.id AS match_id,
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away,
|
||||||
|
ts.possession_percentage,
|
||||||
|
ts.shots_on_target,
|
||||||
|
ts.total_shots
|
||||||
|
FROM matches m
|
||||||
|
JOIN football_team_stats ts ON ts.match_id = m.id AND ts.team_id = :team_id
|
||||||
|
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
|
||||||
|
AND m.mst_utc < :before_ts
|
||||||
|
AND m.sport = 'football'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT :window
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
COALESCE(AVG(possession_percentage), 50.0) AS avg_possession,
|
||||||
|
COALESCE(AVG(shots_on_target), 4.0) AS avg_shots_on_target,
|
||||||
|
COALESCE(AVG(total_shots), 10.0) AS avg_total_shots,
|
||||||
|
COALESCE(AVG(
|
||||||
|
CASE
|
||||||
|
WHEN home_team_id = :team_id THEN score_home
|
||||||
|
ELSE score_away
|
||||||
|
END
|
||||||
|
), 1.3) AS avg_goals_scored,
|
||||||
|
COALESCE(AVG(
|
||||||
|
CASE
|
||||||
|
WHEN home_team_id = :team_id THEN score_away
|
||||||
|
ELSE score_home
|
||||||
|
END
|
||||||
|
), 1.1) AS avg_goals_conceded,
|
||||||
|
COUNT(*) AS match_count
|
||||||
|
FROM recent
|
||||||
|
""")
|
||||||
|
result = await session.execute(
|
||||||
|
query,
|
||||||
|
{"team_id": team_id, "before_ts": before_mst_utc, "window": ROLLING_WINDOW},
|
||||||
|
)
|
||||||
|
row = result.mappings().first()
|
||||||
|
if row is None or int(row["match_count"]) == 0:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"avg_possession": round(float(row["avg_possession"]), 2),
|
||||||
|
"avg_shots_on_target": round(float(row["avg_shots_on_target"]), 2),
|
||||||
|
"avg_total_shots": round(float(row["avg_total_shots"]), 2),
|
||||||
|
"avg_goals_scored": round(float(row["avg_goals_scored"]), 2),
|
||||||
|
"avg_goals_conceded": round(float(row["avg_goals_conceded"]), 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_rest_days(
|
||||||
|
session: AsyncSession,
|
||||||
|
team_id: str,
|
||||||
|
before_mst_utc: int,
|
||||||
|
) -> float | None:
|
||||||
|
query = text("""
|
||||||
|
SELECT m.mst_utc
|
||||||
|
FROM matches m
|
||||||
|
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
|
||||||
|
AND m.mst_utc < :before_ts
|
||||||
|
AND m.sport = 'football'
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 1
|
||||||
|
""")
|
||||||
|
result = await session.execute(
|
||||||
|
query,
|
||||||
|
{"team_id": team_id, "before_ts": before_mst_utc},
|
||||||
|
)
|
||||||
|
last_match_ts = result.scalar_one_or_none()
|
||||||
|
if last_match_ts is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
rest_days = max(0.0, (float(before_mst_utc) - float(last_match_ts)) / 86400000.0)
|
||||||
|
return round(min(rest_days, MAX_REST_DAYS), 3)
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_h2h_stats(
|
||||||
|
session: AsyncSession,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_id: str,
|
||||||
|
before_mst_utc: int,
|
||||||
|
) -> dict[str, float | int] | None:
|
||||||
|
query = text("""
|
||||||
|
SELECT
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away
|
||||||
|
FROM matches m
|
||||||
|
WHERE m.sport = 'football'
|
||||||
|
AND m.mst_utc < :before_ts
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND (
|
||||||
|
(m.home_team_id = :home_team_id AND m.away_team_id = :away_team_id)
|
||||||
|
OR
|
||||||
|
(m.home_team_id = :away_team_id AND m.away_team_id = :home_team_id)
|
||||||
|
)
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT :window
|
||||||
|
""")
|
||||||
|
result = await session.execute(
|
||||||
|
query,
|
||||||
|
{
|
||||||
|
"home_team_id": home_team_id,
|
||||||
|
"away_team_id": away_team_id,
|
||||||
|
"before_ts": before_mst_utc,
|
||||||
|
"window": H2H_WINDOW,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
rows = result.mappings().all()
|
||||||
|
if not rows:
|
||||||
|
return None
|
||||||
|
|
||||||
|
home_wins = 0.0
|
||||||
|
draws = 0.0
|
||||||
|
sample_size = 0
|
||||||
|
for row in rows:
|
||||||
|
score_home = row["score_home"]
|
||||||
|
score_away = row["score_away"]
|
||||||
|
if score_home is None or score_away is None:
|
||||||
|
continue
|
||||||
|
sample_size += 1
|
||||||
|
row_home_team_id = row["home_team_id"]
|
||||||
|
row_away_team_id = row["away_team_id"]
|
||||||
|
|
||||||
|
current_home_score = float(score_home) if row_home_team_id == home_team_id else float(score_away)
|
||||||
|
current_away_score = float(score_away) if row_home_team_id == home_team_id else float(score_home)
|
||||||
|
|
||||||
|
if current_home_score > current_away_score:
|
||||||
|
home_wins += 1.0
|
||||||
|
elif current_home_score == current_away_score:
|
||||||
|
draws += 1.0
|
||||||
|
|
||||||
|
if sample_size == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Count draws as a half-win signal instead of throwing them away.
|
||||||
|
home_win_rate = round((home_wins + draws * 0.5) / sample_size, 4)
|
||||||
|
return {
|
||||||
|
"home_win_rate": home_win_rate,
|
||||||
|
"sample_size": sample_size,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_league_profile(
|
||||||
|
session: AsyncSession,
|
||||||
|
league_id: str,
|
||||||
|
before_mst_utc: int,
|
||||||
|
) -> dict[str, float] | None:
|
||||||
|
if not league_id:
|
||||||
|
return None
|
||||||
|
|
||||||
|
query = text("""
|
||||||
|
SELECT
|
||||||
|
COALESCE(AVG(m.score_home + m.score_away), 2.6) AS avg_goals,
|
||||||
|
COUNT(*) AS match_count
|
||||||
|
FROM (
|
||||||
|
SELECT score_home, score_away
|
||||||
|
FROM matches
|
||||||
|
WHERE league_id = :league_id
|
||||||
|
AND sport = 'football'
|
||||||
|
AND status = 'FT'
|
||||||
|
AND score_home IS NOT NULL
|
||||||
|
AND score_away IS NOT NULL
|
||||||
|
AND mst_utc < :before_ts
|
||||||
|
ORDER BY mst_utc DESC
|
||||||
|
LIMIT 100
|
||||||
|
) m
|
||||||
|
""")
|
||||||
|
result = await session.execute(
|
||||||
|
query,
|
||||||
|
{"league_id": league_id, "before_ts": before_mst_utc},
|
||||||
|
)
|
||||||
|
row = result.mappings().first()
|
||||||
|
if row is None or int(row["match_count"] or 0) == 0:
|
||||||
|
return None
|
||||||
|
return {"avg_goals": round(float(row["avg_goals"]), 3)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_referee_profile(
|
||||||
|
session: AsyncSession,
|
||||||
|
referee_name: str,
|
||||||
|
before_mst_utc: int,
|
||||||
|
) -> dict[str, float] | None:
|
||||||
|
if not referee_name:
|
||||||
|
return None
|
||||||
|
|
||||||
|
query = text("""
|
||||||
|
SELECT
|
||||||
|
COALESCE(AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END), 0.46) - 0.46 AS home_bias,
|
||||||
|
COALESCE(AVG(score_home + score_away), 2.6) AS avg_goals,
|
||||||
|
COUNT(*) AS match_count
|
||||||
|
FROM (
|
||||||
|
SELECT m.score_home, m.score_away
|
||||||
|
FROM match_officials mo
|
||||||
|
JOIN matches m ON m.id = mo.match_id
|
||||||
|
WHERE mo.name = :referee_name
|
||||||
|
AND mo.role_id = 1
|
||||||
|
AND m.sport = 'football'
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc < :before_ts
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 30
|
||||||
|
) ref_matches
|
||||||
|
""")
|
||||||
|
result = await session.execute(
|
||||||
|
query,
|
||||||
|
{"referee_name": referee_name, "before_ts": before_mst_utc},
|
||||||
|
)
|
||||||
|
row = result.mappings().first()
|
||||||
|
if row is None or int(row["match_count"] or 0) == 0:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"home_bias": round(float(row["home_bias"]), 4),
|
||||||
|
"avg_goals": round(float(row["avg_goals"]), 3),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_team_squad_profile(
|
||||||
|
session: AsyncSession,
|
||||||
|
team_id: str,
|
||||||
|
before_mst_utc: int,
|
||||||
|
) -> dict[str, float] | None:
|
||||||
|
if not team_id:
|
||||||
|
return None
|
||||||
|
|
||||||
|
query = text("""
|
||||||
|
WITH recent_matches AS (
|
||||||
|
SELECT m.id, m.mst_utc
|
||||||
|
FROM matches m
|
||||||
|
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
|
||||||
|
AND m.sport = 'football'
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.mst_utc < :before_ts
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 8
|
||||||
|
),
|
||||||
|
player_base AS (
|
||||||
|
SELECT
|
||||||
|
mpp.player_id,
|
||||||
|
COUNT(*)::float AS appearances,
|
||||||
|
COUNT(*) FILTER (WHERE mpp.is_starting = true)::float AS starts
|
||||||
|
FROM match_player_participation mpp
|
||||||
|
JOIN recent_matches rm ON rm.id = mpp.match_id
|
||||||
|
WHERE mpp.team_id = :team_id
|
||||||
|
GROUP BY mpp.player_id
|
||||||
|
),
|
||||||
|
player_goals AS (
|
||||||
|
SELECT
|
||||||
|
mpe.player_id,
|
||||||
|
COUNT(*) FILTER (
|
||||||
|
WHERE mpe.event_type = 'goal'
|
||||||
|
AND COALESCE(mpe.event_subtype, '') NOT ILIKE '%penaltı kaçırma%'
|
||||||
|
)::float AS goals,
|
||||||
|
0.0::float AS assists
|
||||||
|
FROM match_player_events mpe
|
||||||
|
JOIN recent_matches rm ON rm.id = mpe.match_id
|
||||||
|
WHERE mpe.team_id = :team_id
|
||||||
|
GROUP BY mpe.player_id
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
mpe.assist_player_id AS player_id,
|
||||||
|
0.0::float AS goals,
|
||||||
|
COUNT(*) FILTER (
|
||||||
|
WHERE mpe.event_type = 'goal'
|
||||||
|
AND mpe.assist_player_id IS NOT NULL
|
||||||
|
)::float AS assists
|
||||||
|
FROM match_player_events mpe
|
||||||
|
JOIN recent_matches rm ON rm.id = mpe.match_id
|
||||||
|
WHERE mpe.team_id = :team_id
|
||||||
|
AND mpe.assist_player_id IS NOT NULL
|
||||||
|
GROUP BY mpe.assist_player_id
|
||||||
|
),
|
||||||
|
player_events AS (
|
||||||
|
SELECT
|
||||||
|
player_id,
|
||||||
|
SUM(goals) AS goals,
|
||||||
|
SUM(assists) AS assists
|
||||||
|
FROM player_goals
|
||||||
|
GROUP BY player_id
|
||||||
|
),
|
||||||
|
player_scores AS (
|
||||||
|
SELECT
|
||||||
|
pb.player_id,
|
||||||
|
(pb.starts * 1.5)
|
||||||
|
+ ((pb.appearances - pb.starts) * 0.5)
|
||||||
|
+ (COALESCE(pe.goals, 0.0) * 2.5)
|
||||||
|
+ (COALESCE(pe.assists, 0.0) * 1.5) AS score
|
||||||
|
FROM player_base pb
|
||||||
|
LEFT JOIN player_events pe ON pe.player_id = pb.player_id
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
COALESCE(AVG(top_players.score), 0.0) AS avg_top_score,
|
||||||
|
COALESCE(COUNT(*) FILTER (WHERE top_players.score >= 6.0), 0) AS key_players,
|
||||||
|
COALESCE((SELECT COUNT(*) FROM recent_matches), 0) AS match_count
|
||||||
|
FROM (
|
||||||
|
SELECT score
|
||||||
|
FROM player_scores
|
||||||
|
ORDER BY score DESC
|
||||||
|
LIMIT 11
|
||||||
|
) top_players
|
||||||
|
""")
|
||||||
|
result = await session.execute(
|
||||||
|
query,
|
||||||
|
{"team_id": team_id, "before_ts": before_mst_utc},
|
||||||
|
)
|
||||||
|
row = result.mappings().first()
|
||||||
|
if row is None or int(row["match_count"] or 0) == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
avg_top_score = float(row["avg_top_score"] or 0.0)
|
||||||
|
return {
|
||||||
|
"squad_strength": round(min(max(avg_top_score / 10.0, 0.0), 1.0), 4),
|
||||||
|
"key_players": float(row["key_players"] or 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_json(value: Any) -> dict[str, Any] | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(value)
|
||||||
|
except (TypeError, json.JSONDecodeError):
|
||||||
|
return None
|
||||||
|
return parsed if isinstance(parsed, dict) else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_list(value: Any) -> list[Any]:
|
||||||
|
if isinstance(value, list):
|
||||||
|
return value
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_lineup_context(match_row: dict[str, Any]) -> dict[str, float | bool]:
|
||||||
|
lineups = _safe_json(match_row.get("lineups"))
|
||||||
|
sidelined = _safe_json(match_row.get("sidelined"))
|
||||||
|
|
||||||
|
home_xi_count = 0
|
||||||
|
away_xi_count = 0
|
||||||
|
home_sidelined_count = 0
|
||||||
|
away_sidelined_count = 0
|
||||||
|
|
||||||
|
if lineups:
|
||||||
|
home_xi_count = len(_safe_list(lineups.get("home", {}).get("xi")))
|
||||||
|
away_xi_count = len(_safe_list(lineups.get("away", {}).get("xi")))
|
||||||
|
|
||||||
|
if sidelined:
|
||||||
|
home_team = sidelined.get("homeTeam", {})
|
||||||
|
away_team = sidelined.get("awayTeam", {})
|
||||||
|
home_sidelined_count = max(
|
||||||
|
int(home_team.get("totalSidelined") or 0),
|
||||||
|
len(_safe_list(home_team.get("players"))),
|
||||||
|
)
|
||||||
|
away_sidelined_count = max(
|
||||||
|
int(away_team.get("totalSidelined") or 0),
|
||||||
|
len(_safe_list(away_team.get("players"))),
|
||||||
|
)
|
||||||
|
|
||||||
|
has_real_lineup_data = any(
|
||||||
|
value > 0
|
||||||
|
for value in (
|
||||||
|
home_xi_count,
|
||||||
|
away_xi_count,
|
||||||
|
home_sidelined_count,
|
||||||
|
away_sidelined_count,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
home_availability = _compute_availability(home_xi_count, home_sidelined_count)
|
||||||
|
away_availability = _compute_availability(away_xi_count, away_sidelined_count)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"home_availability": home_availability,
|
||||||
|
"away_availability": away_availability,
|
||||||
|
"has_real_lineup_data": has_real_lineup_data,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_availability(xi_count: int, sidelined_count: int) -> float:
|
||||||
|
xi_ratio = min(max(xi_count / 11.0, 0.0), 1.0) if xi_count > 0 else 1.0
|
||||||
|
sidelined_penalty = min(max(sidelined_count / 11.0, 0.0), 1.0) * 0.35
|
||||||
|
return round(min(max(xi_ratio - sidelined_penalty, 0.0), 1.0), 4)
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_odd(val: Any) -> float:
|
||||||
|
"""Parse an odds value that might be str, float, int, or None."""
|
||||||
|
if val is None:
|
||||||
|
return 0.0
|
||||||
|
try:
|
||||||
|
parsed = float(val)
|
||||||
|
return parsed if parsed > 1.0 else 0.0
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _implied_prob(decimal_odd: float) -> float:
|
||||||
|
"""Convert decimal odds to implied probability, clamped [0, 1]."""
|
||||||
|
if decimal_odd <= 1.0:
|
||||||
|
return 0.0
|
||||||
|
return min(1.0, 1.0 / decimal_odd)
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_odds(
|
||||||
|
session: AsyncSession,
|
||||||
|
match_id: str,
|
||||||
|
feats: MatchFeatures,
|
||||||
|
) -> bool:
|
||||||
|
"""Extract odds from live JSON first, then relational tables."""
|
||||||
|
found = False
|
||||||
|
|
||||||
|
odds_json = await _load_live_odds_json(session, match_id)
|
||||||
|
if odds_json:
|
||||||
|
found = _parse_odds_json(odds_json, feats)
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
found = await _load_relational_odds(session, match_id, feats)
|
||||||
|
|
||||||
|
if found:
|
||||||
|
feats.implied_prob_home = round(_implied_prob(feats.odds_home), 4)
|
||||||
|
feats.implied_prob_draw = round(_implied_prob(feats.odds_draw), 4)
|
||||||
|
feats.implied_prob_away = round(_implied_prob(feats.odds_away), 4)
|
||||||
|
feats.implied_prob_over25 = round(_implied_prob(feats.odds_over25), 4)
|
||||||
|
feats.implied_prob_under25 = round(_implied_prob(feats.odds_under25), 4)
|
||||||
|
feats.implied_prob_btts_yes = round(_implied_prob(feats.odds_btts_yes), 4)
|
||||||
|
feats.implied_prob_btts_no = round(_implied_prob(feats.odds_btts_no), 4)
|
||||||
|
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_live_odds_json(
|
||||||
|
session: AsyncSession, match_id: str,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
query = text("SELECT odds FROM live_matches WHERE id = :mid AND odds IS NOT NULL")
|
||||||
|
result = await session.execute(query, {"mid": match_id})
|
||||||
|
row = result.scalar_one_or_none()
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
if isinstance(row, str):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(row)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
return None
|
||||||
|
return parsed if isinstance(parsed, (dict, list)) else None
|
||||||
|
if isinstance(row, (dict, list)):
|
||||||
|
return row
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_odds_json(odds_blob: dict[str, Any] | list[Any], feats: MatchFeatures) -> bool:
|
||||||
|
"""Parse the Mackolik-style odds JSON structure."""
|
||||||
|
found_any = False
|
||||||
|
categories: list[dict[str, Any]] = []
|
||||||
|
if isinstance(odds_blob, list):
|
||||||
|
categories = [item for item in odds_blob if isinstance(item, dict)]
|
||||||
|
elif isinstance(odds_blob, dict):
|
||||||
|
raw_categories = odds_blob.get("categories", odds_blob.get("odds", []))
|
||||||
|
if isinstance(raw_categories, dict):
|
||||||
|
categories = [item for item in raw_categories.values() if isinstance(item, dict)]
|
||||||
|
elif isinstance(raw_categories, list):
|
||||||
|
categories = [item for item in raw_categories if isinstance(item, dict)]
|
||||||
|
|
||||||
|
for cat in categories:
|
||||||
|
cat_name = (cat.get("name") or cat.get("cn") or "").strip().lower()
|
||||||
|
selections = cat.get("selections") or cat.get("s") or []
|
||||||
|
|
||||||
|
if cat_name in ("mac sonucu", "match result", "1x2", "maç sonucu"):
|
||||||
|
sels = _selections_to_map(selections)
|
||||||
|
feats.odds_home = _safe_odd(sels.get("1")) or feats.odds_home
|
||||||
|
feats.odds_draw = _safe_odd(sels.get("x")) or feats.odds_draw
|
||||||
|
feats.odds_away = _safe_odd(sels.get("2")) or feats.odds_away
|
||||||
|
found_any = True
|
||||||
|
|
||||||
|
elif cat_name in ("2,5 alt/ust", "over/under 2.5", "2.5 alt/ust", "2,5 alt/üst", "2.5 alt/üst"):
|
||||||
|
sels = _selections_to_map(selections)
|
||||||
|
feats.odds_over25 = _safe_odd(sels.get("ust") or sels.get("over") or sels.get("üst")) or feats.odds_over25
|
||||||
|
feats.odds_under25 = _safe_odd(sels.get("alt") or sels.get("under")) or feats.odds_under25
|
||||||
|
found_any = True
|
||||||
|
|
||||||
|
elif cat_name in ("karsilikli gol", "both teams to score", "btts", "karşılıklı gol"):
|
||||||
|
sels = _selections_to_map(selections)
|
||||||
|
feats.odds_btts_yes = _safe_odd(sels.get("var") or sels.get("yes")) or feats.odds_btts_yes
|
||||||
|
feats.odds_btts_no = _safe_odd(sels.get("yok") or sels.get("no")) or feats.odds_btts_no
|
||||||
|
found_any = True
|
||||||
|
|
||||||
|
return found_any
|
||||||
|
|
||||||
|
|
||||||
|
def _selections_to_map(selections: list[Any] | dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Normalize varied selection structures into {name_lower: odd_value}."""
|
||||||
|
result: dict[str, Any] = {}
|
||||||
|
if isinstance(selections, dict):
|
||||||
|
for key, value in selections.items():
|
||||||
|
result[str(key).strip().lower()] = value
|
||||||
|
elif isinstance(selections, list):
|
||||||
|
for sel in selections:
|
||||||
|
if isinstance(sel, dict):
|
||||||
|
name = (sel.get("name") or sel.get("n") or "").strip().lower()
|
||||||
|
value = sel.get("odd_value") or sel.get("ov") or sel.get("v")
|
||||||
|
if name:
|
||||||
|
result[name] = value
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_relational_odds(
|
||||||
|
session: AsyncSession, match_id: str, feats: MatchFeatures,
|
||||||
|
) -> bool:
|
||||||
|
"""Fallback: load odds from odd_categories + odd_selections."""
|
||||||
|
query = text("""
|
||||||
|
SELECT oc.name AS cat_name, os.name AS sel_name, os.odd_value
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = :match_id
|
||||||
|
AND oc.name IN ('Maç Sonucu', '2,5 Alt/Üst', 'Karşılıklı Gol')
|
||||||
|
""")
|
||||||
|
result = await session.execute(query, {"match_id": match_id})
|
||||||
|
rows = result.mappings().all()
|
||||||
|
if not rows:
|
||||||
|
return False
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
cat = (row["cat_name"] or "").strip()
|
||||||
|
sel = (row["sel_name"] or "").strip().lower()
|
||||||
|
value = _safe_odd(row["odd_value"])
|
||||||
|
if value <= 1.0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if cat == "Maç Sonucu":
|
||||||
|
if sel == "1":
|
||||||
|
feats.odds_home = value
|
||||||
|
elif sel == "x":
|
||||||
|
feats.odds_draw = value
|
||||||
|
elif sel == "2":
|
||||||
|
feats.odds_away = value
|
||||||
|
elif cat == "2,5 Alt/Üst":
|
||||||
|
if sel in ("üst", "ust", "over"):
|
||||||
|
feats.odds_over25 = value
|
||||||
|
elif sel in ("alt", "under"):
|
||||||
|
feats.odds_under25 = value
|
||||||
|
elif cat == "Karşılıklı Gol":
|
||||||
|
if sel in ("var", "yes"):
|
||||||
|
feats.odds_btts_yes = value
|
||||||
|
elif sel in ("yok", "no"):
|
||||||
|
feats.odds_btts_no = value
|
||||||
|
|
||||||
|
return True
|
||||||
Executable
+256
@@ -0,0 +1,256 @@
|
|||||||
|
"""
|
||||||
|
Feature Adapter for XGBoost Inference
|
||||||
|
=====================================
|
||||||
|
Bridges the gap between V20 Engine outputs (CalculationContext) and XGBoost Models.
|
||||||
|
Constructs the exact 44-feature vector used in training.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extensions import connection as PgConnection
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
|
||||||
|
# Feature definitions (Must match train_xgboost_markets.py)
|
||||||
|
# NOTE: 68 features - matching the trained XGBoost models
|
||||||
|
FEATURES = [
|
||||||
|
# ELO
|
||||||
|
"home_overall_elo", "away_overall_elo", "elo_diff",
|
||||||
|
"home_home_elo", "away_away_elo", "form_elo_diff",
|
||||||
|
|
||||||
|
# Form
|
||||||
|
"home_goals_avg", "home_conceded_avg",
|
||||||
|
"away_goals_avg", "away_conceded_avg",
|
||||||
|
"home_clean_sheet_rate", "away_clean_sheet_rate",
|
||||||
|
"home_scoring_rate", "away_scoring_rate",
|
||||||
|
"home_winning_streak", "away_winning_streak",
|
||||||
|
|
||||||
|
# H2H
|
||||||
|
"h2h_home_win_rate", "h2h_draw_rate",
|
||||||
|
"h2h_avg_goals", "h2h_btts_rate", "h2h_over25_rate",
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
"home_avg_possession", "away_avg_possession",
|
||||||
|
"home_avg_shots_on_target", "away_avg_shots_on_target",
|
||||||
|
"home_shot_conversion", "away_shot_conversion",
|
||||||
|
|
||||||
|
# Odds (Implicit market wisdom)
|
||||||
|
"odds_ms_h", "odds_ms_d", "odds_ms_a",
|
||||||
|
"implied_home", "implied_draw", "implied_away",
|
||||||
|
|
||||||
|
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
|
||||||
|
|
||||||
|
"odds_ou05_o", "odds_ou05_u",
|
||||||
|
"odds_ou15_o", "odds_ou15_u",
|
||||||
|
"odds_ou25_o", "odds_ou25_u",
|
||||||
|
"odds_ou35_o", "odds_ou35_u",
|
||||||
|
|
||||||
|
"odds_ht_ou05_o", "odds_ht_ou05_u",
|
||||||
|
"odds_ht_ou15_o", "odds_ht_ou15_u",
|
||||||
|
|
||||||
|
"odds_btts_y", "odds_btts_n",
|
||||||
|
|
||||||
|
# League/Context
|
||||||
|
"league_avg_goals", "league_zero_goal_rate",
|
||||||
|
"home_xga", "away_xga",
|
||||||
|
|
||||||
|
# Upset features
|
||||||
|
"upset_atmosphere", "upset_motivation", "upset_fatigue", "upset_potential",
|
||||||
|
|
||||||
|
# Referee features
|
||||||
|
"referee_home_bias", "referee_avg_goals", "referee_cards_total",
|
||||||
|
"referee_avg_yellow", "referee_experience",
|
||||||
|
|
||||||
|
# Momentum features
|
||||||
|
"home_momentum_score", "away_momentum_score", "momentum_diff",
|
||||||
|
]
|
||||||
|
|
||||||
|
class FeatureAdapter:
|
||||||
|
"""
|
||||||
|
Adapter to convert V20 context into XGBoost-compatible features.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.conn: PgConnection | None = None
|
||||||
|
self._connect_db()
|
||||||
|
self.league_stats_cache: dict[str, dict[str, float]] = {}
|
||||||
|
|
||||||
|
def _connect_db(self) -> None:
|
||||||
|
try:
|
||||||
|
# FeatureAdapter uses DB only for optional league stats enrichment.
|
||||||
|
# Keep startup non-blocking when DB/tunnel is unavailable.
|
||||||
|
if not os.getenv("DATABASE_URL", "").strip():
|
||||||
|
return
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ FeatureAdapter DB connection failed: {e}")
|
||||||
|
|
||||||
|
def get_features(self, ctx: Any) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Construct feature vector from CalculationContext.
|
||||||
|
Returns a DataFrame with 1 row and correct columns.
|
||||||
|
"""
|
||||||
|
raw = ctx.team_pred.raw_features
|
||||||
|
odds = ctx.odds_data or {}
|
||||||
|
upset_features = getattr(ctx, "upset_features", {}) or {}
|
||||||
|
momentum_features = getattr(ctx, "momentum_features", {}) or {}
|
||||||
|
referee_features = getattr(ctx, "referee_features", {}) or {}
|
||||||
|
|
||||||
|
# 1. Odds Features
|
||||||
|
ms_h = float(odds.get("ms_h") or 0)
|
||||||
|
ms_d = float(odds.get("ms_d") or 0)
|
||||||
|
ms_a = float(odds.get("ms_a") or 0)
|
||||||
|
|
||||||
|
implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33
|
||||||
|
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||||||
|
raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
|
||||||
|
implied_home = (1/ms_h) / raw_sum
|
||||||
|
implied_draw = (1/ms_d) / raw_sum
|
||||||
|
implied_away = (1/ms_a) / raw_sum
|
||||||
|
|
||||||
|
# 2. League Features
|
||||||
|
# Using ctx.league_id if available, or just defaults
|
||||||
|
league_stats = self._get_league_stats(ctx.league_id)
|
||||||
|
|
||||||
|
# 3. Assemble Dictionary
|
||||||
|
row = {
|
||||||
|
# ELO (Explicit float casting)
|
||||||
|
"home_overall_elo": float(raw.get("home_overall_elo") or 1500),
|
||||||
|
"away_overall_elo": float(raw.get("away_overall_elo") or 1500),
|
||||||
|
"elo_diff": float(raw.get("elo_diff") or 0),
|
||||||
|
"home_home_elo": float(raw.get("home_home_elo") or 1500),
|
||||||
|
"away_away_elo": float(raw.get("away_away_elo") or 1500),
|
||||||
|
"form_elo_diff": float(raw.get("form_elo_diff") or 0),
|
||||||
|
|
||||||
|
# Form (Explicit float casting)
|
||||||
|
"home_goals_avg": float(raw.get("home_goals_avg") or 1.3),
|
||||||
|
"home_conceded_avg": float(raw.get("home_conceded_avg") or 1.2),
|
||||||
|
"away_goals_avg": float(raw.get("away_goals_avg") or 1.2),
|
||||||
|
"away_conceded_avg": float(raw.get("away_conceded_avg") or 1.4),
|
||||||
|
"home_clean_sheet_rate": float(raw.get("home_clean_sheet_rate") or 0.2),
|
||||||
|
"away_clean_sheet_rate": float(raw.get("away_clean_sheet_rate") or 0.2),
|
||||||
|
"home_scoring_rate": float(raw.get("home_scoring_rate") or 0.8),
|
||||||
|
"away_scoring_rate": float(raw.get("away_scoring_rate") or 0.8),
|
||||||
|
"home_winning_streak": float(raw.get("home_winning_streak") or 0),
|
||||||
|
"away_winning_streak": float(raw.get("away_winning_streak") or 0),
|
||||||
|
|
||||||
|
# H2H (Explicit float casting)
|
||||||
|
"h2h_home_win_rate": float(raw.get("h2h_home_win_rate") or 0.33),
|
||||||
|
"h2h_draw_rate": float(raw.get("h2h_draw_rate") or 0.33),
|
||||||
|
"h2h_avg_goals": float(raw.get("h2h_avg_goals") or 2.5),
|
||||||
|
"h2h_btts_rate": float(raw.get("h2h_btts_rate") or 0.5),
|
||||||
|
"h2h_over25_rate": float(raw.get("h2h_over25_rate") or 0.5),
|
||||||
|
|
||||||
|
# Stats (Explicit float casting to avoid XGBoost 'object' error)
|
||||||
|
"home_avg_possession": float(raw.get("home_avg_possession") or 0.5),
|
||||||
|
"away_avg_possession": float(raw.get("away_avg_possession") or 0.5),
|
||||||
|
"home_avg_shots_on_target": float(raw.get("home_avg_shots_on_target") or 4.0),
|
||||||
|
"away_avg_shots_on_target": float(raw.get("away_avg_shots_on_target") or 3.5),
|
||||||
|
"home_shot_conversion": float(raw.get("home_shot_conversion") or 0.1),
|
||||||
|
"away_shot_conversion": float(raw.get("away_shot_conversion") or 0.1),
|
||||||
|
|
||||||
|
# Odds
|
||||||
|
"odds_ms_h": ms_h,
|
||||||
|
"odds_ms_d": ms_d,
|
||||||
|
"odds_ms_a": ms_a,
|
||||||
|
"implied_home": implied_home,
|
||||||
|
"implied_draw": implied_draw,
|
||||||
|
"implied_away": implied_away,
|
||||||
|
|
||||||
|
"odds_ht_ms_h": float(odds.get("ht_ms_h") or 0.0),
|
||||||
|
"odds_ht_ms_d": float(odds.get("ht_ms_d") or 0.0),
|
||||||
|
"odds_ht_ms_a": float(odds.get("ht_ms_a") or 0.0),
|
||||||
|
|
||||||
|
"odds_ou05_o": float(odds.get("ou05_o") or 0.0),
|
||||||
|
"odds_ou05_u": float(odds.get("ou05_u") or 0.0),
|
||||||
|
"odds_ou15_o": float(odds.get("ou15_o") or 0.0),
|
||||||
|
"odds_ou15_u": float(odds.get("ou15_u") or 0.0),
|
||||||
|
"odds_ou25_o": float(odds.get("ou25_o") or 0.0),
|
||||||
|
"odds_ou25_u": float(odds.get("ou25_u") or 0.0),
|
||||||
|
"odds_ou35_o": float(odds.get("ou35_o") or 0.0),
|
||||||
|
"odds_ou35_u": float(odds.get("ou35_u") or 0.0),
|
||||||
|
|
||||||
|
"odds_ht_ou05_o": float(odds.get("ht_ou05_o") or 0.0),
|
||||||
|
"odds_ht_ou05_u": float(odds.get("ht_ou05_u") or 0.0),
|
||||||
|
"odds_ht_ou15_o": float(odds.get("ht_ou15_o") or 0.0),
|
||||||
|
"odds_ht_ou15_u": float(odds.get("ht_ou15_u") or 0.0),
|
||||||
|
|
||||||
|
"odds_btts_y": float(odds.get("btts_y") or 0.0),
|
||||||
|
"odds_btts_n": float(odds.get("btts_n") or 0.0),
|
||||||
|
|
||||||
|
# League/Def
|
||||||
|
"league_avg_goals": float(league_stats.get("avg_goals") or 2.7),
|
||||||
|
"league_zero_goal_rate": float(league_stats.get("zero_rate") or 0.07),
|
||||||
|
"home_xga": float(raw.get("home_xga") or 1.2),
|
||||||
|
"away_xga": float(raw.get("away_xga") or 1.4),
|
||||||
|
|
||||||
|
# Upset features (default values - computed separately in upset_engine_v2)
|
||||||
|
"upset_atmosphere": float(raw.get("upset_atmosphere") or 0.0),
|
||||||
|
"upset_motivation": float(raw.get("upset_motivation") or 0.0),
|
||||||
|
"upset_fatigue": float(raw.get("upset_fatigue") or 0.0),
|
||||||
|
"upset_potential": float(raw.get("upset_potential") or 0.0),
|
||||||
|
|
||||||
|
# Referee features (default values)
|
||||||
|
"referee_home_bias": float(raw.get("referee_home_bias") or 0.0),
|
||||||
|
"referee_avg_goals": float(raw.get("referee_avg_goals") or 2.5),
|
||||||
|
"referee_cards_total": float(raw.get("referee_cards_total") or 4.0),
|
||||||
|
"referee_avg_yellow": float(raw.get("referee_avg_yellow") or 3.0),
|
||||||
|
"referee_experience": float(raw.get("referee_experience") or 0),
|
||||||
|
|
||||||
|
# Momentum features (default values)
|
||||||
|
"home_momentum_score": float(raw.get("home_momentum_score") or 0.0),
|
||||||
|
"away_momentum_score": float(raw.get("away_momentum_score") or 0.0),
|
||||||
|
"momentum_diff": float(raw.get("momentum_diff") or 0.0),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Return as DataFrame (cols sorted by FEATURES list to ensure alignment)
|
||||||
|
df = pd.DataFrame([row], columns=FEATURES)
|
||||||
|
return df
|
||||||
|
|
||||||
|
def _get_league_stats(self, league_id: str | None) -> dict[str, float]:
|
||||||
|
"""Get cached league stats or default."""
|
||||||
|
if not league_id:
|
||||||
|
return {"avg_goals": 2.7, "zero_rate": 0.07}
|
||||||
|
|
||||||
|
if league_id in self.league_stats_cache:
|
||||||
|
return self.league_stats_cache[league_id]
|
||||||
|
|
||||||
|
if self.conn:
|
||||||
|
try:
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT AVG(score_home + score_away),
|
||||||
|
AVG(CASE WHEN score_home=0 AND score_away=0 THEN 1.0 ELSE 0.0 END)
|
||||||
|
FROM matches
|
||||||
|
WHERE league_id = %s AND status = 'FT'
|
||||||
|
AND mst_utc > EXTRACT(EPOCH FROM NOW() - INTERVAL '1 year')
|
||||||
|
""", (league_id,))
|
||||||
|
res = cur.fetchone()
|
||||||
|
if res and res[0]:
|
||||||
|
stats = {
|
||||||
|
"avg_goals": float(res[0]),
|
||||||
|
"zero_rate": float(res[1])
|
||||||
|
}
|
||||||
|
self.league_stats_cache[league_id] = stats
|
||||||
|
return stats
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Default fallback
|
||||||
|
return {"avg_goals": 2.7, "zero_rate": 0.07}
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_adapter: FeatureAdapter | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_feature_adapter() -> FeatureAdapter:
|
||||||
|
global _adapter
|
||||||
|
if _adapter is None:
|
||||||
|
_adapter = FeatureAdapter()
|
||||||
|
return _adapter
|
||||||
Executable
+316
@@ -0,0 +1,316 @@
|
|||||||
|
"""
|
||||||
|
Head-to-Head (H2H) Feature Engine
|
||||||
|
Takımların birbirine karşı geçmiş performansını analiz eder.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import psycopg2
|
||||||
|
from typing import Dict, Optional, Tuple
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class H2HProfile:
|
||||||
|
"""Head-to-Head analiz sonucu"""
|
||||||
|
total_matches: int
|
||||||
|
home_wins: int
|
||||||
|
draws: int
|
||||||
|
away_wins: int
|
||||||
|
home_goals_total: int
|
||||||
|
away_goals_total: int
|
||||||
|
btts_count: int # Both teams to score
|
||||||
|
over25_count: int
|
||||||
|
|
||||||
|
@property
|
||||||
|
def home_win_rate(self) -> float:
|
||||||
|
return self.home_wins / self.total_matches if self.total_matches > 0 else 0.33
|
||||||
|
|
||||||
|
@property
|
||||||
|
def draw_rate(self) -> float:
|
||||||
|
return self.draws / self.total_matches if self.total_matches > 0 else 0.33
|
||||||
|
|
||||||
|
@property
|
||||||
|
def away_win_rate(self) -> float:
|
||||||
|
return self.away_wins / self.total_matches if self.total_matches > 0 else 0.33
|
||||||
|
|
||||||
|
@property
|
||||||
|
def avg_total_goals(self) -> float:
|
||||||
|
return (self.home_goals_total + self.away_goals_total) / self.total_matches if self.total_matches > 0 else 2.5
|
||||||
|
|
||||||
|
@property
|
||||||
|
def btts_rate(self) -> float:
|
||||||
|
return self.btts_count / self.total_matches if self.total_matches > 0 else 0.5
|
||||||
|
|
||||||
|
@property
|
||||||
|
def over25_rate(self) -> float:
|
||||||
|
return self.over25_count / self.total_matches if self.total_matches > 0 else 0.5
|
||||||
|
|
||||||
|
@property
|
||||||
|
def home_dominance(self) -> float:
|
||||||
|
"""Ev sahibinin üstünlük skoru (-1 ile 1 arası)"""
|
||||||
|
if self.total_matches == 0:
|
||||||
|
return 0
|
||||||
|
return (self.home_wins - self.away_wins) / self.total_matches
|
||||||
|
|
||||||
|
def to_features(self) -> Dict[str, float]:
|
||||||
|
"""Feature dictionary döndür"""
|
||||||
|
return {
|
||||||
|
'h2h_total_matches': self.total_matches,
|
||||||
|
'h2h_home_win_rate': self.home_win_rate,
|
||||||
|
'h2h_draw_rate': self.draw_rate,
|
||||||
|
'h2h_away_win_rate': self.away_win_rate,
|
||||||
|
'h2h_avg_goals': self.avg_total_goals,
|
||||||
|
'h2h_btts_rate': self.btts_rate,
|
||||||
|
'h2h_over25_rate': self.over25_rate,
|
||||||
|
'h2h_home_dominance': self.home_dominance,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class H2HFeatureEngine:
|
||||||
|
"""
|
||||||
|
Head-to-Head Feature Engine
|
||||||
|
|
||||||
|
İki takım arasındaki geçmiş karşılaşmaları analiz eder.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conn = None
|
||||||
|
self._cache: Dict[Tuple[str, str], H2HProfile] = {}
|
||||||
|
|
||||||
|
def get_conn(self):
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def get_h2h_profile(self, home_team_id: str, away_team_id: str,
|
||||||
|
before_date: Optional[int] = None,
|
||||||
|
limit: int = 20) -> H2HProfile:
|
||||||
|
"""
|
||||||
|
İki takım arasındaki geçmiş karşılaşmaları analiz et.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
home_team_id: Ev sahibi takım ID
|
||||||
|
away_team_id: Deplasman takım ID
|
||||||
|
before_date: Bu tarihten önceki maçlar (mst_utc, milliseconds)
|
||||||
|
limit: Kaç maç geriye bakılacak
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
H2HProfile: Head-to-head analiz sonucu
|
||||||
|
"""
|
||||||
|
cache_key = (home_team_id, away_team_id)
|
||||||
|
|
||||||
|
# Cache kontrolü (before_date yoksa)
|
||||||
|
if before_date is None and cache_key in self._cache:
|
||||||
|
return self._cache[cache_key]
|
||||||
|
|
||||||
|
conn = self.get_conn()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
# Her iki yöndeki karşılaşmaları al
|
||||||
|
# (A evde B deplasman + B evde A deplasman)
|
||||||
|
query = """
|
||||||
|
SELECT
|
||||||
|
home_team_id, away_team_id,
|
||||||
|
score_home, score_away
|
||||||
|
FROM matches
|
||||||
|
WHERE (
|
||||||
|
(home_team_id = %s AND away_team_id = %s)
|
||||||
|
OR
|
||||||
|
(home_team_id = %s AND away_team_id = %s)
|
||||||
|
)
|
||||||
|
AND score_home IS NOT NULL
|
||||||
|
AND score_away IS NOT NULL
|
||||||
|
"""
|
||||||
|
|
||||||
|
params = [home_team_id, away_team_id, away_team_id, home_team_id]
|
||||||
|
|
||||||
|
if before_date:
|
||||||
|
query += " AND mst_utc < %s"
|
||||||
|
params.append(before_date)
|
||||||
|
|
||||||
|
query += " ORDER BY mst_utc DESC LIMIT %s"
|
||||||
|
params.append(limit)
|
||||||
|
|
||||||
|
cur.execute(query, params)
|
||||||
|
matches = cur.fetchall()
|
||||||
|
|
||||||
|
if not matches:
|
||||||
|
return H2HProfile(
|
||||||
|
total_matches=0, home_wins=0, draws=0, away_wins=0,
|
||||||
|
home_goals_total=0, away_goals_total=0,
|
||||||
|
btts_count=0, over25_count=0
|
||||||
|
)
|
||||||
|
|
||||||
|
# İstatistikleri hesapla
|
||||||
|
home_wins = 0
|
||||||
|
draws = 0
|
||||||
|
away_wins = 0
|
||||||
|
home_goals = 0
|
||||||
|
away_goals = 0
|
||||||
|
btts = 0
|
||||||
|
over25 = 0
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
m_home_id, m_away_id, score_h, score_a = match
|
||||||
|
|
||||||
|
# Perspektifi normalize et (istenen takım açısından)
|
||||||
|
if m_home_id == home_team_id:
|
||||||
|
# Normal sıralama
|
||||||
|
h_score, a_score = score_h, score_a
|
||||||
|
else:
|
||||||
|
# Ters sıralama (rakip evde oynamış)
|
||||||
|
h_score, a_score = score_a, score_h
|
||||||
|
|
||||||
|
# Sonuç
|
||||||
|
if h_score > a_score:
|
||||||
|
home_wins += 1
|
||||||
|
elif h_score < a_score:
|
||||||
|
away_wins += 1
|
||||||
|
else:
|
||||||
|
draws += 1
|
||||||
|
|
||||||
|
# Goller
|
||||||
|
home_goals += h_score
|
||||||
|
away_goals += a_score
|
||||||
|
|
||||||
|
# BTTS
|
||||||
|
if h_score > 0 and a_score > 0:
|
||||||
|
btts += 1
|
||||||
|
|
||||||
|
# Over 2.5
|
||||||
|
if h_score + a_score > 2.5:
|
||||||
|
over25 += 1
|
||||||
|
|
||||||
|
profile = H2HProfile(
|
||||||
|
total_matches=len(matches),
|
||||||
|
home_wins=home_wins,
|
||||||
|
draws=draws,
|
||||||
|
away_wins=away_wins,
|
||||||
|
home_goals_total=home_goals,
|
||||||
|
away_goals_total=away_goals,
|
||||||
|
btts_count=btts,
|
||||||
|
over25_count=over25
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache'e kaydet
|
||||||
|
if before_date is None:
|
||||||
|
self._cache[cache_key] = profile
|
||||||
|
|
||||||
|
return profile
|
||||||
|
|
||||||
|
def get_features(self, home_team_id: str, away_team_id: str,
|
||||||
|
before_date: Optional[int] = None) -> Dict[str, float]:
|
||||||
|
"""Feature dictionary döndür"""
|
||||||
|
profile = self.get_h2h_profile(home_team_id, away_team_id, before_date)
|
||||||
|
return profile.to_features()
|
||||||
|
|
||||||
|
def get_momentum(self, home_team_id: str, away_team_id: str,
|
||||||
|
before_date: Optional[int] = None) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Son karşılaşmalardaki momentum/trend analizi.
|
||||||
|
Son 5 maçtaki trend'e bakar.
|
||||||
|
"""
|
||||||
|
profile = self.get_h2h_profile(home_team_id, away_team_id, before_date, limit=5)
|
||||||
|
|
||||||
|
# Streak hesapla (ardışık sonuçlar)
|
||||||
|
conn = self.get_conn()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
query = """
|
||||||
|
SELECT home_team_id, score_home, score_away
|
||||||
|
FROM matches
|
||||||
|
WHERE (
|
||||||
|
(home_team_id = %s AND away_team_id = %s)
|
||||||
|
OR
|
||||||
|
(home_team_id = %s AND away_team_id = %s)
|
||||||
|
)
|
||||||
|
AND score_home IS NOT NULL
|
||||||
|
"""
|
||||||
|
params = [home_team_id, away_team_id, away_team_id, home_team_id]
|
||||||
|
if before_date:
|
||||||
|
query += " AND mst_utc < %s"
|
||||||
|
params.append(before_date)
|
||||||
|
query += " ORDER BY mst_utc DESC LIMIT 5"
|
||||||
|
|
||||||
|
cur.execute(query, params)
|
||||||
|
recent = cur.fetchall()
|
||||||
|
|
||||||
|
streak = 0
|
||||||
|
streak_type = None # 'home', 'away', 'draw'
|
||||||
|
|
||||||
|
for match in recent:
|
||||||
|
m_home_id, score_h, score_a = match
|
||||||
|
|
||||||
|
# Perspektifi normalize et
|
||||||
|
if m_home_id == home_team_id:
|
||||||
|
result = 'home' if score_h > score_a else ('away' if score_h < score_a else 'draw')
|
||||||
|
else:
|
||||||
|
result = 'away' if score_h > score_a else ('home' if score_h < score_a else 'draw')
|
||||||
|
|
||||||
|
if streak_type is None:
|
||||||
|
streak_type = result
|
||||||
|
streak = 1
|
||||||
|
elif result == streak_type:
|
||||||
|
streak += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
'h2h_recent_home_dominance': profile.home_dominance,
|
||||||
|
'h2h_streak_length': streak,
|
||||||
|
'h2h_streak_home': 1 if streak_type == 'home' else 0,
|
||||||
|
'h2h_streak_away': 1 if streak_type == 'away' else 0,
|
||||||
|
'h2h_streak_draw': 1 if streak_type == 'draw' else 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_engine = None
|
||||||
|
|
||||||
|
def get_h2h_engine() -> H2HFeatureEngine:
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = H2HFeatureEngine()
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test
|
||||||
|
engine = get_h2h_engine()
|
||||||
|
|
||||||
|
# Örnek: Fenerbahçe vs Galatasaray (ID'leri bulunmalı)
|
||||||
|
# Test için veritabanından bir karşılaşma çekelim
|
||||||
|
conn = engine.get_conn()
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("""
|
||||||
|
SELECT home_team_id, away_team_id, match_name
|
||||||
|
FROM matches
|
||||||
|
WHERE score_home IS NOT NULL
|
||||||
|
LIMIT 1
|
||||||
|
""")
|
||||||
|
result = cur.fetchone()
|
||||||
|
|
||||||
|
if result:
|
||||||
|
home_id, away_id, name = result
|
||||||
|
print(f"\n🧪 Test: {name}")
|
||||||
|
print(f" Home ID: {home_id}")
|
||||||
|
print(f" Away ID: {away_id}")
|
||||||
|
|
||||||
|
profile = engine.get_h2h_profile(home_id, away_id)
|
||||||
|
print(f"\n📊 H2H Profil:")
|
||||||
|
print(f" Toplam Maç: {profile.total_matches}")
|
||||||
|
print(f" Ev Sahibi Kazanma: {profile.home_win_rate:.1%}")
|
||||||
|
print(f" Beraberlik: {profile.draw_rate:.1%}")
|
||||||
|
print(f" Deplasman Kazanma: {profile.away_win_rate:.1%}")
|
||||||
|
print(f" Ortalama Gol: {profile.avg_total_goals:.2f}")
|
||||||
|
print(f" BTTS Oranı: {profile.btts_rate:.1%}")
|
||||||
|
print(f" Üst 2.5 Oranı: {profile.over25_rate:.1%}")
|
||||||
|
print(f" Ev Dominance: {profile.home_dominance:+.2f}")
|
||||||
|
|
||||||
|
features = engine.get_features(home_id, away_id)
|
||||||
|
print(f"\n🔧 Features: {features}")
|
||||||
@@ -0,0 +1,343 @@
|
|||||||
|
"""
|
||||||
|
HT/FT Tendency Feature Engine
|
||||||
|
================================
|
||||||
|
Produces team-level HT/FT tendency features for match prediction.
|
||||||
|
|
||||||
|
Computes ~15 features per match based on historical data:
|
||||||
|
- 1st half scoring/conceding rates
|
||||||
|
- Comeback rates
|
||||||
|
- Half-specific goal distribution
|
||||||
|
- League-level HT/FT profiles
|
||||||
|
|
||||||
|
All features are computed from the `matches` table using only data
|
||||||
|
BEFORE the match date (no future leakage).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from typing import Dict, Optional, Tuple
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TeamHtftProfile:
|
||||||
|
"""HT/FT tendency profile for a single team."""
|
||||||
|
matches: int = 0
|
||||||
|
ht_scored: int = 0 # Matches where team scored in 1st half
|
||||||
|
ht_conceded: int = 0 # Matches where team conceded in 1st half
|
||||||
|
ht_leading: int = 0 # Matches where team led at HT
|
||||||
|
ht_trailing: int = 0 # Matches where team trailed at HT
|
||||||
|
comeback_wins: int = 0 # Trailing at HT -> Won
|
||||||
|
goals_1h: int = 0
|
||||||
|
goals_2h: int = 0
|
||||||
|
conceded_1h: int = 0
|
||||||
|
conceded_2h: int = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ht_scoring_rate(self):
|
||||||
|
return self.ht_scored / self.matches if self.matches > 0 else 0.5
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ht_concede_rate(self):
|
||||||
|
return self.ht_conceded / self.matches if self.matches > 0 else 0.5
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ht_win_rate(self):
|
||||||
|
return self.ht_leading / self.matches if self.matches > 0 else 0.33
|
||||||
|
|
||||||
|
@property
|
||||||
|
def comeback_rate(self):
|
||||||
|
return self.comeback_wins / self.ht_trailing if self.ht_trailing > 0 else 0.0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def first_half_goal_pct(self):
|
||||||
|
total = self.goals_1h + self.goals_2h
|
||||||
|
return self.goals_1h / total if total > 0 else 0.5
|
||||||
|
|
||||||
|
@property
|
||||||
|
def second_half_surge(self):
|
||||||
|
"""Ratio of 2H goals vs 1H goals. >1 means more dangerous in 2nd half."""
|
||||||
|
return self.goals_2h / self.goals_1h if self.goals_1h > 0 else 1.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LeagueHtftProfile:
|
||||||
|
"""League-level HT/FT statistics."""
|
||||||
|
matches: int = 0
|
||||||
|
ht_goals_total: int = 0
|
||||||
|
ft_goals_total: int = 0
|
||||||
|
reversals: int = 0
|
||||||
|
htft_counts: Dict[str, int] = field(default_factory=dict)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def avg_ht_goals(self):
|
||||||
|
return self.ht_goals_total / self.matches if self.matches > 0 else 1.0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def avg_2h_goals(self):
|
||||||
|
ft = self.ft_goals_total / self.matches if self.matches > 0 else 2.5
|
||||||
|
return ft - self.avg_ht_goals
|
||||||
|
|
||||||
|
@property
|
||||||
|
def reversal_rate(self):
|
||||||
|
return self.reversals / self.matches if self.matches > 0 else 0.05
|
||||||
|
|
||||||
|
@property
|
||||||
|
def first_half_pct(self):
|
||||||
|
return self.ht_goals_total / self.ft_goals_total if self.ft_goals_total > 0 else 0.44
|
||||||
|
|
||||||
|
|
||||||
|
class HtftTendencyEngine:
|
||||||
|
"""
|
||||||
|
Computes HT/FT tendency features for a given match.
|
||||||
|
|
||||||
|
Uses historical data from `matches` table, filtering by date to
|
||||||
|
avoid future leakage.
|
||||||
|
|
||||||
|
Features are based on team-level and league-level tendencies, which
|
||||||
|
are DIFFERENT from the existing model features (ELO, form, H2H score).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conn = None
|
||||||
|
self._team_cache: Dict[Tuple[str, bool], TeamHtftProfile] = {}
|
||||||
|
self._league_cache: Dict[str, LeagueHtftProfile] = {}
|
||||||
|
|
||||||
|
def get_conn(self):
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
dsn = get_clean_dsn()
|
||||||
|
self.conn = psycopg2.connect(dsn)
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def _get_team_htft_profile(
|
||||||
|
self,
|
||||||
|
team_id: str,
|
||||||
|
is_home: bool,
|
||||||
|
before_date: Optional[int] = None,
|
||||||
|
limit: int = 30,
|
||||||
|
) -> TeamHtftProfile:
|
||||||
|
"""
|
||||||
|
Compute HT/FT profile for a team from their recent matches.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
team_id: Team ID
|
||||||
|
is_home: True = only home matches, False = only away matches
|
||||||
|
before_date: Only use matches before this timestamp (ms UTC)
|
||||||
|
limit: Number of recent matches to consider
|
||||||
|
"""
|
||||||
|
cache_key = (team_id, is_home, before_date)
|
||||||
|
if cache_key in self._team_cache:
|
||||||
|
return self._team_cache[cache_key]
|
||||||
|
|
||||||
|
conn = self.get_conn()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
if is_home:
|
||||||
|
query = """
|
||||||
|
SELECT ht_score_home, ht_score_away, score_home, score_away
|
||||||
|
FROM matches
|
||||||
|
WHERE home_team_id = %s
|
||||||
|
AND sport = 'football'
|
||||||
|
AND status = 'FT'
|
||||||
|
AND ht_score_home IS NOT NULL
|
||||||
|
AND ht_score_away IS NOT NULL
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
query = """
|
||||||
|
SELECT ht_score_away, ht_score_home, score_away, score_home
|
||||||
|
FROM matches
|
||||||
|
WHERE away_team_id = %s
|
||||||
|
AND sport = 'football'
|
||||||
|
AND status = 'FT'
|
||||||
|
AND ht_score_home IS NOT NULL
|
||||||
|
AND ht_score_away IS NOT NULL
|
||||||
|
"""
|
||||||
|
|
||||||
|
params = [team_id]
|
||||||
|
|
||||||
|
if before_date:
|
||||||
|
query += " AND mst_utc < %s"
|
||||||
|
params.append(before_date)
|
||||||
|
|
||||||
|
query += " ORDER BY mst_utc DESC LIMIT %s"
|
||||||
|
params.append(limit)
|
||||||
|
|
||||||
|
cur.execute(query, params)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
profile = TeamHtftProfile()
|
||||||
|
profile.matches = len(rows)
|
||||||
|
|
||||||
|
for ht_mine, ht_opp, ft_mine, ft_opp in rows:
|
||||||
|
# 1st half scoring
|
||||||
|
if ht_mine > 0:
|
||||||
|
profile.ht_scored += 1
|
||||||
|
if ht_opp > 0:
|
||||||
|
profile.ht_conceded += 1
|
||||||
|
|
||||||
|
# HT situation
|
||||||
|
if ht_mine > ht_opp:
|
||||||
|
profile.ht_leading += 1
|
||||||
|
elif ht_mine < ht_opp:
|
||||||
|
profile.ht_trailing += 1
|
||||||
|
# Comeback
|
||||||
|
if ft_mine > ft_opp:
|
||||||
|
profile.comeback_wins += 1
|
||||||
|
|
||||||
|
# Goal distribution
|
||||||
|
profile.goals_1h += ht_mine
|
||||||
|
profile.goals_2h += (ft_mine - ht_mine)
|
||||||
|
profile.conceded_1h += ht_opp
|
||||||
|
profile.conceded_2h += (ft_opp - ht_opp)
|
||||||
|
|
||||||
|
self._team_cache[cache_key] = profile
|
||||||
|
return profile
|
||||||
|
|
||||||
|
def _get_league_htft_profile(
|
||||||
|
self,
|
||||||
|
league_id: str,
|
||||||
|
before_date: Optional[int] = None,
|
||||||
|
) -> LeagueHtftProfile:
|
||||||
|
"""Compute HT/FT profile for a league."""
|
||||||
|
cache_key = (league_id, before_date)
|
||||||
|
if cache_key in self._league_cache:
|
||||||
|
return self._league_cache[cache_key]
|
||||||
|
|
||||||
|
conn = self.get_conn()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
query = """
|
||||||
|
SELECT ht_score_home, ht_score_away, score_home, score_away
|
||||||
|
FROM matches
|
||||||
|
WHERE league_id = %s
|
||||||
|
AND sport = 'football'
|
||||||
|
AND status = 'FT'
|
||||||
|
AND ht_score_home IS NOT NULL
|
||||||
|
AND ht_score_away IS NOT NULL
|
||||||
|
"""
|
||||||
|
params = [league_id]
|
||||||
|
|
||||||
|
if before_date:
|
||||||
|
query += " AND mst_utc < %s"
|
||||||
|
params.append(before_date)
|
||||||
|
|
||||||
|
query += " ORDER BY mst_utc DESC LIMIT 500"
|
||||||
|
params_final = params
|
||||||
|
|
||||||
|
cur.execute(query, params_final)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
profile = LeagueHtftProfile()
|
||||||
|
profile.matches = len(rows)
|
||||||
|
|
||||||
|
for hth, hta, sh, sa in rows:
|
||||||
|
profile.ht_goals_total += hth + hta
|
||||||
|
profile.ft_goals_total += sh + sa
|
||||||
|
|
||||||
|
# Classify HT/FT
|
||||||
|
ht = "1" if hth > hta else ("2" if hth < hta else "X")
|
||||||
|
ft = "1" if sh > sa else ("2" if sh < sa else "X")
|
||||||
|
htft = f"{ht}/{ft}"
|
||||||
|
|
||||||
|
profile.htft_counts[htft] = profile.htft_counts.get(htft, 0) + 1
|
||||||
|
if htft in ("1/2", "2/1"):
|
||||||
|
profile.reversals += 1
|
||||||
|
|
||||||
|
self._league_cache[cache_key] = profile
|
||||||
|
return profile
|
||||||
|
|
||||||
|
def get_features(
|
||||||
|
self,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_id: str,
|
||||||
|
league_id: Optional[str] = None,
|
||||||
|
before_date: Optional[int] = None,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Get HT/FT tendency features for a match.
|
||||||
|
|
||||||
|
Returns dict with ~15 features.
|
||||||
|
"""
|
||||||
|
# Team profiles (home side for home team, away side for away team)
|
||||||
|
home_prof = self._get_team_htft_profile(home_team_id, is_home=True, before_date=before_date)
|
||||||
|
away_prof = self._get_team_htft_profile(away_team_id, is_home=False, before_date=before_date)
|
||||||
|
|
||||||
|
# League profile
|
||||||
|
league_prof = LeagueHtftProfile()
|
||||||
|
if league_id:
|
||||||
|
league_prof = self._get_league_htft_profile(league_id, before_date=before_date)
|
||||||
|
|
||||||
|
features = {
|
||||||
|
# Home team HT/FT tendencies
|
||||||
|
"htft_home_ht_scoring_rate": home_prof.ht_scoring_rate,
|
||||||
|
"htft_home_ht_concede_rate": home_prof.ht_concede_rate,
|
||||||
|
"htft_home_ht_win_rate": home_prof.ht_win_rate,
|
||||||
|
"htft_home_comeback_rate": home_prof.comeback_rate,
|
||||||
|
"htft_home_first_half_goal_pct": home_prof.first_half_goal_pct,
|
||||||
|
"htft_home_second_half_surge": min(home_prof.second_half_surge, 3.0),
|
||||||
|
|
||||||
|
# Away team HT/FT tendencies
|
||||||
|
"htft_away_ht_scoring_rate": away_prof.ht_scoring_rate,
|
||||||
|
"htft_away_ht_concede_rate": away_prof.ht_concede_rate,
|
||||||
|
"htft_away_ht_win_rate": away_prof.ht_win_rate,
|
||||||
|
"htft_away_comeback_rate": away_prof.comeback_rate,
|
||||||
|
"htft_away_first_half_goal_pct": away_prof.first_half_goal_pct,
|
||||||
|
"htft_away_second_half_surge": min(away_prof.second_half_surge, 3.0),
|
||||||
|
|
||||||
|
# League-level
|
||||||
|
"htft_league_avg_ht_goals": league_prof.avg_ht_goals,
|
||||||
|
"htft_league_reversal_rate": league_prof.reversal_rate,
|
||||||
|
"htft_league_first_half_pct": league_prof.first_half_pct,
|
||||||
|
|
||||||
|
# Data quality (how many matches we have for these features)
|
||||||
|
"htft_home_sample_size": min(home_prof.matches / 30.0, 1.0),
|
||||||
|
"htft_away_sample_size": min(away_prof.matches / 30.0, 1.0),
|
||||||
|
}
|
||||||
|
|
||||||
|
return features
|
||||||
|
|
||||||
|
def clear_cache(self):
|
||||||
|
"""Clear internal caches (useful between batches)."""
|
||||||
|
self._team_cache.clear()
|
||||||
|
self._league_cache.clear()
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_engine = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_htft_tendency_engine() -> HtftTendencyEngine:
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = HtftTendencyEngine()
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
|
# ── Test ─────────────────────────────────────────────────────────────────────
|
||||||
|
if __name__ == "__main__":
|
||||||
|
engine = get_htft_tendency_engine()
|
||||||
|
|
||||||
|
conn = engine.get_conn()
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("""
|
||||||
|
SELECT home_team_id, away_team_id, league_id, mst_utc, match_name
|
||||||
|
FROM matches
|
||||||
|
WHERE sport = 'football' AND status = 'FT'
|
||||||
|
AND home_team_id IS NOT NULL AND away_team_id IS NOT NULL
|
||||||
|
ORDER BY mst_utc DESC LIMIT 3
|
||||||
|
""")
|
||||||
|
matches = cur.fetchall()
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
for hid, aid, lid, mst, name in matches:
|
||||||
|
print(f"\n🏟️ {name}")
|
||||||
|
features = engine.get_features(hid, aid, lid, mst)
|
||||||
|
for k, v in sorted(features.items()):
|
||||||
|
print(f" {k}: {v:.4f}")
|
||||||
Executable
+434
@@ -0,0 +1,434 @@
|
|||||||
|
"""
|
||||||
|
Momentum Engine - Son Maç Trendleri
|
||||||
|
V9 Model için takımların anlık form trendini analiz eder.
|
||||||
|
|
||||||
|
Faktörler:
|
||||||
|
1. Gol atma trendi (artan/azalan/stabil)
|
||||||
|
2. Yenilmezlik/yenilgi serisi
|
||||||
|
3. Son maç psikolojisi (büyük galibiyet/mağlubiyet etkisi)
|
||||||
|
4. Ev/Deplasman momentum farkı
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Dict, List, Tuple, Optional
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
try:
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
except ImportError:
|
||||||
|
psycopg2 = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MomentumData:
|
||||||
|
"""Takım momentum verileri"""
|
||||||
|
goals_trend: float = 0.0 # -1 (azalan) to +1 (artan)
|
||||||
|
conceded_trend: float = 0.0 # -1 (azalan) to +1 (artan) [negatif iyi]
|
||||||
|
unbeaten_streak: int = 0 # Yenilmezlik serisi
|
||||||
|
losing_streak: int = 0 # Yenilgi serisi
|
||||||
|
winning_streak: int = 0 # Galibiyet serisi
|
||||||
|
last_match_impact: float = 0.0 # Son maç psikolojik etkisi (-1 to +1)
|
||||||
|
momentum_score: float = 0.0 # Toplam momentum (-1 to +1)
|
||||||
|
form_direction: str = "stable" # "improving", "declining", "stable"
|
||||||
|
xg_underperformance: float = 0.0 # (xG_For - Real_Goals) in last matches (>0 means underperforming)
|
||||||
|
xg_conceded_diff: float = 0.0 # (Real_Conceded - xG_Against) in last matches
|
||||||
|
|
||||||
|
|
||||||
|
class MomentumEngine:
|
||||||
|
"""
|
||||||
|
Son maçlardaki trendi analiz eder.
|
||||||
|
Form yükselişi/düşüşü, seriler ve psikolojik etki.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conn = None
|
||||||
|
self._connect_db()
|
||||||
|
|
||||||
|
def _connect_db(self):
|
||||||
|
"""Veritabanına bağlan"""
|
||||||
|
if psycopg2 is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[MomentumEngine] DB connection failed: {e}")
|
||||||
|
self.conn = None
|
||||||
|
|
||||||
|
def _get_conn(self):
|
||||||
|
"""Bağlantıyı kontrol et ve döndür"""
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
self._connect_db()
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def get_recent_matches(
|
||||||
|
self,
|
||||||
|
team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
limit: int = 5,
|
||||||
|
home_only: bool = False,
|
||||||
|
away_only: bool = False
|
||||||
|
) -> List[Dict]:
|
||||||
|
"""
|
||||||
|
Takımın son maçlarını getir.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of matches with scores and home/away info
|
||||||
|
"""
|
||||||
|
conn = self._get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
|
||||||
|
conditions = ["mst_utc < %s", "score_home IS NOT NULL"]
|
||||||
|
params = [before_date_ms]
|
||||||
|
|
||||||
|
if home_only:
|
||||||
|
conditions.append("home_team_id = %s")
|
||||||
|
params.append(team_id)
|
||||||
|
elif away_only:
|
||||||
|
conditions.append("away_team_id = %s")
|
||||||
|
params.append(team_id)
|
||||||
|
else:
|
||||||
|
conditions.append("(home_team_id = %s OR away_team_id = %s)")
|
||||||
|
params.extend([team_id, team_id])
|
||||||
|
query = f"""
|
||||||
|
SELECT
|
||||||
|
id, home_team_id, away_team_id,
|
||||||
|
score_home, score_away, mst_utc
|
||||||
|
FROM matches
|
||||||
|
WHERE {' AND '.join(conditions)}
|
||||||
|
ORDER BY mst_utc DESC
|
||||||
|
LIMIT %s
|
||||||
|
"""
|
||||||
|
params.append(limit)
|
||||||
|
|
||||||
|
cursor.execute(query, params)
|
||||||
|
return cursor.fetchall()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[MomentumEngine] Query error: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def calculate_goals_trend(self, matches: List[Dict], team_id: str) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Gol atma ve yeme trendini hesapla.
|
||||||
|
Son 3 maç vs önceki 2 maç karşılaştırması.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(goals_trend, conceded_trend) - -1 to +1
|
||||||
|
"""
|
||||||
|
if len(matches) < 3:
|
||||||
|
return 0.0, 0.0
|
||||||
|
|
||||||
|
# Her maç için gol ve yenilen gol hesapla
|
||||||
|
goals = []
|
||||||
|
conceded = []
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
if match['home_team_id'] == team_id:
|
||||||
|
goals.append(match['score_home'])
|
||||||
|
conceded.append(match['score_away'])
|
||||||
|
else:
|
||||||
|
goals.append(match['score_away'])
|
||||||
|
conceded.append(match['score_home'])
|
||||||
|
|
||||||
|
# Son 3 vs önceki maçlar
|
||||||
|
recent_goals = sum(goals[:3]) / 3 if len(goals) >= 3 else 0
|
||||||
|
older_goals = sum(goals[3:]) / len(goals[3:]) if len(goals) > 3 else recent_goals
|
||||||
|
|
||||||
|
recent_conceded = sum(conceded[:3]) / 3 if len(conceded) >= 3 else 0
|
||||||
|
older_conceded = sum(conceded[3:]) / len(conceded[3:]) if len(conceded) > 3 else recent_conceded
|
||||||
|
|
||||||
|
# Trend hesapla (-1 to +1)
|
||||||
|
goals_trend = min(max((recent_goals - older_goals) / 2, -1), 1)
|
||||||
|
conceded_trend = min(max((recent_conceded - older_conceded) / 2, -1), 1)
|
||||||
|
|
||||||
|
return goals_trend, conceded_trend
|
||||||
|
|
||||||
|
def calculate_streaks(self, matches: List[Dict], team_id: str) -> Tuple[int, int, int]:
|
||||||
|
"""
|
||||||
|
Galibiyet, yenilmezlik ve yenilgi serilerini hesapla.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(winning_streak, unbeaten_streak, losing_streak)
|
||||||
|
"""
|
||||||
|
winning = 0
|
||||||
|
unbeaten = 0
|
||||||
|
losing = 0
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
# Sonucu belirle
|
||||||
|
if match['home_team_id'] == team_id:
|
||||||
|
goals_for = match['score_home']
|
||||||
|
goals_against = match['score_away']
|
||||||
|
else:
|
||||||
|
goals_for = match['score_away']
|
||||||
|
goals_against = match['score_home']
|
||||||
|
|
||||||
|
if goals_for > goals_against: # Galibiyet
|
||||||
|
if losing == 0: # Henüz yenilgi serisi başlamamış
|
||||||
|
winning += 1
|
||||||
|
unbeaten += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
elif goals_for == goals_against: # Beraberlik
|
||||||
|
if losing == 0:
|
||||||
|
winning = 0 # Galibiyet serisi bitti
|
||||||
|
unbeaten += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
else: # Yenilgi
|
||||||
|
if winning > 0 or unbeaten > 0:
|
||||||
|
winning = 0
|
||||||
|
unbeaten = 0
|
||||||
|
losing += 1
|
||||||
|
|
||||||
|
return winning, unbeaten, losing
|
||||||
|
|
||||||
|
def calculate_last_match_impact(self, matches: List[Dict], team_id: str) -> float:
|
||||||
|
"""
|
||||||
|
Son maçın psikolojik etkisini hesapla.
|
||||||
|
Büyük galibiyet = +1, büyük mağlubiyet = -1
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
impact score: -1 to +1
|
||||||
|
"""
|
||||||
|
if not matches:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
last_match = matches[0]
|
||||||
|
|
||||||
|
if last_match['home_team_id'] == team_id:
|
||||||
|
goals_for = last_match['score_home']
|
||||||
|
goals_against = last_match['score_away']
|
||||||
|
else:
|
||||||
|
goals_for = last_match['score_away']
|
||||||
|
goals_against = last_match['score_home']
|
||||||
|
|
||||||
|
goal_diff = goals_for - goals_against
|
||||||
|
|
||||||
|
# Gol farkına göre etki
|
||||||
|
if goal_diff >= 4:
|
||||||
|
return 1.0 # Çok büyük galibiyet
|
||||||
|
elif goal_diff >= 2:
|
||||||
|
return 0.6
|
||||||
|
elif goal_diff == 1:
|
||||||
|
return 0.3
|
||||||
|
elif goal_diff == 0:
|
||||||
|
return 0.0
|
||||||
|
elif goal_diff == -1:
|
||||||
|
return -0.3
|
||||||
|
elif goal_diff >= -3:
|
||||||
|
return -0.6
|
||||||
|
else:
|
||||||
|
return -1.0 # Çok büyük mağlubiyet
|
||||||
|
|
||||||
|
def calculate_xg_underperformance(self, matches: List[Dict], team_id: str) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Calculate if a team chronically underperforms its xG (Expected Goals).
|
||||||
|
Returns:
|
||||||
|
(xg_strike_diff, xg_defend_diff)
|
||||||
|
xg_strike_diff: > 0 means they score LESS than expected (Bad Finishers)
|
||||||
|
xg_defend_diff: > 0 means they concede MORE than expected (Bad Goalkeeper/Luck)
|
||||||
|
"""
|
||||||
|
if not matches:
|
||||||
|
return 0.0, 0.0
|
||||||
|
|
||||||
|
real_scored = 0
|
||||||
|
xg_created = 0.0
|
||||||
|
|
||||||
|
real_conceded = 0
|
||||||
|
xg_conceded = 0.0
|
||||||
|
|
||||||
|
for m in matches:
|
||||||
|
is_home = (m['home_team_id'] == team_id)
|
||||||
|
if is_home:
|
||||||
|
real_scored += m['score_home']
|
||||||
|
real_conceded += m['score_away']
|
||||||
|
# Create synthetic xG data (mock based on score for demo since stats table absent)
|
||||||
|
xg_created += max(0.5, m['score_home'] * 1.5 - 0.5)
|
||||||
|
xg_conceded += max(0.5, m['score_away'] * 1.5 - 0.5)
|
||||||
|
else:
|
||||||
|
real_scored += m['score_away']
|
||||||
|
real_conceded += m['score_home']
|
||||||
|
xg_created += max(0.5, m['score_away'] * 1.5 - 0.5)
|
||||||
|
xg_conceded += max(0.5, m['score_home'] * 1.5 - 0.5)
|
||||||
|
|
||||||
|
# Calculate per match diffs
|
||||||
|
match_count = len(matches)
|
||||||
|
|
||||||
|
xg_strike_diff = (xg_created - real_scored) / match_count if match_count else 0
|
||||||
|
xg_defend_diff = (real_conceded - xg_conceded) / match_count if match_count else 0
|
||||||
|
|
||||||
|
return xg_strike_diff, xg_defend_diff
|
||||||
|
|
||||||
|
def calculate_momentum(
|
||||||
|
self,
|
||||||
|
team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
match_limit: int = 5
|
||||||
|
) -> MomentumData:
|
||||||
|
"""
|
||||||
|
Takımın tam momentum analizini yap.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MomentumData with all metrics
|
||||||
|
"""
|
||||||
|
data = MomentumData()
|
||||||
|
|
||||||
|
matches = self.get_recent_matches(team_id, before_date_ms, match_limit)
|
||||||
|
|
||||||
|
if not matches:
|
||||||
|
return data
|
||||||
|
|
||||||
|
# 1. Gol trendi
|
||||||
|
data.goals_trend, data.conceded_trend = self.calculate_goals_trend(matches, team_id)
|
||||||
|
|
||||||
|
# 2. Seriler
|
||||||
|
data.winning_streak, data.unbeaten_streak, data.losing_streak = \
|
||||||
|
self.calculate_streaks(matches, team_id)
|
||||||
|
|
||||||
|
# 3. Son maç etkisi
|
||||||
|
data.last_match_impact = self.calculate_last_match_impact(matches, team_id)
|
||||||
|
|
||||||
|
# 4. Form yönü belirleme
|
||||||
|
if data.goals_trend > 0.3 and data.conceded_trend < 0:
|
||||||
|
data.form_direction = "improving"
|
||||||
|
elif data.goals_trend < -0.3 or data.conceded_trend > 0.3:
|
||||||
|
data.form_direction = "declining"
|
||||||
|
else:
|
||||||
|
data.form_direction = "stable"
|
||||||
|
|
||||||
|
# 5. xG Underperformance (Chronik beceriksizlik)
|
||||||
|
data.xg_underperformance, data.xg_conceded_diff = self.calculate_xg_underperformance(matches, team_id)
|
||||||
|
|
||||||
|
# 6. Toplam momentum skoru
|
||||||
|
momentum = 0.0
|
||||||
|
|
||||||
|
# Gol trendi + savunma trendi (ters çevrilmiş)
|
||||||
|
momentum += data.goals_trend * 0.25
|
||||||
|
momentum += (-data.conceded_trend) * 0.20
|
||||||
|
|
||||||
|
# Seri bonusları
|
||||||
|
if data.winning_streak >= 3:
|
||||||
|
momentum += 0.25
|
||||||
|
elif data.winning_streak >= 2:
|
||||||
|
momentum += 0.15
|
||||||
|
elif data.unbeaten_streak >= 5:
|
||||||
|
momentum += 0.15
|
||||||
|
|
||||||
|
if data.losing_streak >= 3:
|
||||||
|
momentum -= 0.30
|
||||||
|
elif data.losing_streak >= 2:
|
||||||
|
momentum -= 0.15
|
||||||
|
|
||||||
|
# Son maç etkisi
|
||||||
|
momentum += data.last_match_impact * 0.20
|
||||||
|
|
||||||
|
# Ceza: xG Underperformance Penalty (Beceriksizlik Cezası)
|
||||||
|
# Eğer takım attığından çok xG üretiyorsa (- puan)
|
||||||
|
if data.xg_underperformance > 0.5: # Maç başı 0.5 gol eksik atıyor!
|
||||||
|
momentum -= min(0.3, data.xg_underperformance * 0.2)
|
||||||
|
|
||||||
|
# Ceza: xG Defend Underperformance (Kötü kaleci Cezası)
|
||||||
|
# Eğer beklenenden çok gol yiyorsa
|
||||||
|
if data.xg_conceded_diff > 0.5:
|
||||||
|
momentum -= min(0.3, data.xg_conceded_diff * 0.2)
|
||||||
|
|
||||||
|
data.momentum_score = min(max(momentum, -1), 1)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def get_features(
|
||||||
|
self,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_id: str,
|
||||||
|
match_date_ms: int
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Model için feature dict döndür.
|
||||||
|
"""
|
||||||
|
home_momentum = self.calculate_momentum(home_team_id, match_date_ms)
|
||||||
|
away_momentum = self.calculate_momentum(away_team_id, match_date_ms)
|
||||||
|
|
||||||
|
# Form direction encoding
|
||||||
|
direction_map = {"improving": 1, "stable": 0, "declining": -1}
|
||||||
|
|
||||||
|
return {
|
||||||
|
# Ev sahibi momentum
|
||||||
|
"home_momentum_score": home_momentum.momentum_score,
|
||||||
|
"home_goals_trend": home_momentum.goals_trend,
|
||||||
|
"home_conceded_trend": home_momentum.conceded_trend,
|
||||||
|
"home_winning_streak": min(home_momentum.winning_streak, 5),
|
||||||
|
"home_unbeaten_streak": min(home_momentum.unbeaten_streak, 10),
|
||||||
|
"home_losing_streak": min(home_momentum.losing_streak, 5),
|
||||||
|
"home_last_impact": home_momentum.last_match_impact,
|
||||||
|
"home_form_direction": direction_map.get(home_momentum.form_direction, 0),
|
||||||
|
"home_xg_underperf": home_momentum.xg_underperformance,
|
||||||
|
"home_xg_conceded_diff": home_momentum.xg_conceded_diff,
|
||||||
|
|
||||||
|
# Deplasman momentum
|
||||||
|
"away_momentum_score": away_momentum.momentum_score,
|
||||||
|
"away_goals_trend": away_momentum.goals_trend,
|
||||||
|
"away_conceded_trend": away_momentum.conceded_trend,
|
||||||
|
"away_winning_streak": min(away_momentum.winning_streak, 5),
|
||||||
|
"away_unbeaten_streak": min(away_momentum.unbeaten_streak, 10),
|
||||||
|
"away_losing_streak": min(away_momentum.losing_streak, 5),
|
||||||
|
"away_last_impact": away_momentum.last_match_impact,
|
||||||
|
"away_form_direction": direction_map.get(away_momentum.form_direction, 0),
|
||||||
|
"away_xg_underperf": away_momentum.xg_underperformance,
|
||||||
|
"away_xg_conceded_diff": away_momentum.xg_conceded_diff,
|
||||||
|
|
||||||
|
# Farklar
|
||||||
|
"momentum_diff": home_momentum.momentum_score - away_momentum.momentum_score,
|
||||||
|
"trend_diff": (home_momentum.goals_trend - home_momentum.conceded_trend) -
|
||||||
|
(away_momentum.goals_trend - away_momentum.conceded_trend),
|
||||||
|
"xg_underperf_diff": home_momentum.xg_underperformance - away_momentum.xg_underperformance,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance
|
||||||
|
_engine_instance = None
|
||||||
|
|
||||||
|
def get_momentum_engine() -> MomentumEngine:
|
||||||
|
"""Singleton pattern ile engine döndür"""
|
||||||
|
global _engine_instance
|
||||||
|
if _engine_instance is None:
|
||||||
|
_engine_instance = MomentumEngine()
|
||||||
|
return _engine_instance
|
||||||
|
|
||||||
|
|
||||||
|
# Test
|
||||||
|
if __name__ == "__main__":
|
||||||
|
engine = get_momentum_engine()
|
||||||
|
|
||||||
|
# Test data
|
||||||
|
print("=" * 60)
|
||||||
|
print("MOMENTUM ENGINE TEST")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Örnek hesaplama (DB olmadan)
|
||||||
|
data = MomentumData(
|
||||||
|
goals_trend=0.5,
|
||||||
|
conceded_trend=-0.3,
|
||||||
|
winning_streak=3,
|
||||||
|
unbeaten_streak=5,
|
||||||
|
losing_streak=0,
|
||||||
|
last_match_impact=0.6,
|
||||||
|
form_direction="improving"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Goals Trend: {data.goals_trend}")
|
||||||
|
print(f"Conceded Trend: {data.conceded_trend}")
|
||||||
|
print(f"Winning Streak: {data.winning_streak}")
|
||||||
|
print(f"Unbeaten Streak: {data.unbeaten_streak}")
|
||||||
|
print(f"Form Direction: {data.form_direction}")
|
||||||
|
print(f"Last Match Impact: {data.last_match_impact}")
|
||||||
File diff suppressed because it is too large
Load Diff
Executable
+371
@@ -0,0 +1,371 @@
|
|||||||
|
"""
|
||||||
|
Poisson Engine - Matematiksel Gol Modeli
|
||||||
|
V9 Model için Poisson dağılımı ile gol olasılıkları hesaplar.
|
||||||
|
|
||||||
|
Özellikler:
|
||||||
|
1. Exact score olasılıkları (0-0, 1-0, 1-1, 2-1, vb.)
|
||||||
|
2. Over/Under olasılıkları (matematiksel)
|
||||||
|
3. BTTS (Karşılıklı Gol) olasılıkları
|
||||||
|
4. Expected Goals (xG) tahmini
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
from typing import Dict, Tuple, Optional
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
|
||||||
|
def poisson_prob(lam: float, k: int) -> float:
|
||||||
|
"""
|
||||||
|
Poisson olasılık formülü.
|
||||||
|
P(X = k) = (λ^k * e^(-λ)) / k!
|
||||||
|
"""
|
||||||
|
if lam <= 0:
|
||||||
|
return 1.0 if k == 0 else 0.0
|
||||||
|
return (math.pow(lam, k) * math.exp(-lam)) / math.factorial(k)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PoissonPrediction:
|
||||||
|
"""Poisson tahmin sonuçları"""
|
||||||
|
home_xg: float = 0.0 # Ev sahibi beklenen gol
|
||||||
|
away_xg: float = 0.0 # Deplasman beklenen gol
|
||||||
|
total_xg: float = 0.0 # Toplam beklenen gol
|
||||||
|
|
||||||
|
# Maç sonucu olasılıkları
|
||||||
|
home_win_prob: float = 0.0
|
||||||
|
draw_prob: float = 0.0
|
||||||
|
away_win_prob: float = 0.0
|
||||||
|
|
||||||
|
# Alt/Üst olasılıkları
|
||||||
|
over_15_prob: float = 0.0
|
||||||
|
over_25_prob: float = 0.0
|
||||||
|
over_35_prob: float = 0.0
|
||||||
|
under_15_prob: float = 0.0
|
||||||
|
under_25_prob: float = 0.0
|
||||||
|
under_35_prob: float = 0.0
|
||||||
|
|
||||||
|
# BTTS
|
||||||
|
btts_yes_prob: float = 0.0
|
||||||
|
btts_no_prob: float = 0.0
|
||||||
|
|
||||||
|
# En olası skorlar
|
||||||
|
most_likely_scores: list = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class PoissonEngine:
|
||||||
|
"""
|
||||||
|
Poisson dağılımı ile gol olasılıkları hesaplar.
|
||||||
|
İstatistiksel bir yaklaşım - machine learning'den bağımsız.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Lig bazlı ortalama gol verileri (varsayılan değerler)
|
||||||
|
DEFAULT_HOME_XG = 1.45
|
||||||
|
DEFAULT_AWAY_XG = 1.15
|
||||||
|
DEFAULT_LEAGUE_AVG = 2.60
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.max_goals = 7 # Hesaplama için maksimum gol sayısı
|
||||||
|
|
||||||
|
def calculate_xg(
|
||||||
|
self,
|
||||||
|
home_goals_avg: float,
|
||||||
|
home_conceded_avg: float,
|
||||||
|
away_goals_avg: float,
|
||||||
|
away_conceded_avg: float,
|
||||||
|
league_home_avg: float = None,
|
||||||
|
league_away_avg: float = None,
|
||||||
|
league_total_avg: float = None
|
||||||
|
) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Beklenen gol (xG) hesapla.
|
||||||
|
|
||||||
|
Attack strength * Defense weakness * League average
|
||||||
|
"""
|
||||||
|
# Varsayılan lig ortalamaları
|
||||||
|
if league_home_avg is None:
|
||||||
|
league_home_avg = self.DEFAULT_HOME_XG
|
||||||
|
if league_away_avg is None:
|
||||||
|
league_away_avg = self.DEFAULT_AWAY_XG
|
||||||
|
if league_total_avg is None:
|
||||||
|
league_total_avg = self.DEFAULT_LEAGUE_AVG
|
||||||
|
|
||||||
|
# Güç hesaplamaları
|
||||||
|
# Ev sahibi saldırı gücü = Ev gol ortalaması / Lig ev gol ortalaması
|
||||||
|
home_attack = home_goals_avg / league_home_avg if league_home_avg > 0 else 1.0
|
||||||
|
# Deplasman savunma zayıflığı = Deplasman yenilen gol / Lig deplasman yenilen
|
||||||
|
away_defense = away_conceded_avg / league_away_avg if league_away_avg > 0 else 1.0
|
||||||
|
|
||||||
|
# Deplasman saldırı gücü
|
||||||
|
away_attack = away_goals_avg / league_away_avg if league_away_avg > 0 else 1.0
|
||||||
|
# Ev sahibi savunma zayıflığı
|
||||||
|
home_defense = home_conceded_avg / league_home_avg if league_home_avg > 0 else 1.0
|
||||||
|
|
||||||
|
# Expected Goals
|
||||||
|
home_xg = home_attack * away_defense * league_home_avg
|
||||||
|
away_xg = away_attack * home_defense * league_away_avg
|
||||||
|
|
||||||
|
# Aşırı değerleri sınırla
|
||||||
|
home_xg = max(0.3, min(home_xg, 4.0))
|
||||||
|
away_xg = max(0.2, min(away_xg, 3.5))
|
||||||
|
|
||||||
|
return home_xg, away_xg
|
||||||
|
|
||||||
|
def calculate_score_matrix(
|
||||||
|
self,
|
||||||
|
home_xg: float,
|
||||||
|
away_xg: float
|
||||||
|
) -> Dict[Tuple[int, int], float]:
|
||||||
|
"""
|
||||||
|
Tüm skor kombinasyonlarının olasılıklarını hesapla.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[(home_goals, away_goals)] = probability
|
||||||
|
"""
|
||||||
|
matrix = {}
|
||||||
|
|
||||||
|
for home_goals in range(self.max_goals + 1):
|
||||||
|
for away_goals in range(self.max_goals + 1):
|
||||||
|
prob = poisson_prob(home_xg, home_goals) * poisson_prob(away_xg, away_goals)
|
||||||
|
matrix[(home_goals, away_goals)] = prob
|
||||||
|
|
||||||
|
return matrix
|
||||||
|
|
||||||
|
def calculate_match_odds(
|
||||||
|
self,
|
||||||
|
home_xg: float,
|
||||||
|
away_xg: float
|
||||||
|
) -> Tuple[float, float, float]:
|
||||||
|
"""
|
||||||
|
1X2 olasılıklarını hesapla.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(home_win, draw, away_win) probabilities
|
||||||
|
"""
|
||||||
|
matrix = self.calculate_score_matrix(home_xg, away_xg)
|
||||||
|
|
||||||
|
home_win = 0.0
|
||||||
|
draw = 0.0
|
||||||
|
away_win = 0.0
|
||||||
|
|
||||||
|
for (h, a), prob in matrix.items():
|
||||||
|
if h > a:
|
||||||
|
home_win += prob
|
||||||
|
elif h == a:
|
||||||
|
draw += prob
|
||||||
|
else:
|
||||||
|
away_win += prob
|
||||||
|
|
||||||
|
# Normalize (toplam 1 olmalı)
|
||||||
|
total = home_win + draw + away_win
|
||||||
|
if total > 0:
|
||||||
|
home_win /= total
|
||||||
|
draw /= total
|
||||||
|
away_win /= total
|
||||||
|
|
||||||
|
return home_win, draw, away_win
|
||||||
|
|
||||||
|
def calculate_over_under(
|
||||||
|
self,
|
||||||
|
home_xg: float,
|
||||||
|
away_xg: float
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Alt/Üst olasılıklarını hesapla.
|
||||||
|
"""
|
||||||
|
matrix = self.calculate_score_matrix(home_xg, away_xg)
|
||||||
|
|
||||||
|
over_15 = 0.0
|
||||||
|
over_25 = 0.0
|
||||||
|
over_35 = 0.0
|
||||||
|
|
||||||
|
for (h, a), prob in matrix.items():
|
||||||
|
total = h + a
|
||||||
|
if total > 1.5:
|
||||||
|
over_15 += prob
|
||||||
|
if total > 2.5:
|
||||||
|
over_25 += prob
|
||||||
|
if total > 3.5:
|
||||||
|
over_35 += prob
|
||||||
|
|
||||||
|
return {
|
||||||
|
"over_15": over_15,
|
||||||
|
"over_25": over_25,
|
||||||
|
"over_35": over_35,
|
||||||
|
"under_15": 1 - over_15,
|
||||||
|
"under_25": 1 - over_25,
|
||||||
|
"under_35": 1 - over_35,
|
||||||
|
}
|
||||||
|
|
||||||
|
def calculate_btts(
|
||||||
|
self,
|
||||||
|
home_xg: float,
|
||||||
|
away_xg: float
|
||||||
|
) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Karşılıklı Gol (Both Teams To Score) olasılığı.
|
||||||
|
"""
|
||||||
|
# P(Home scores at least 1) = 1 - P(Home scores 0)
|
||||||
|
home_scores = 1 - poisson_prob(home_xg, 0)
|
||||||
|
# P(Away scores at least 1) = 1 - P(Away scores 0)
|
||||||
|
away_scores = 1 - poisson_prob(away_xg, 0)
|
||||||
|
|
||||||
|
# P(BTTS) = P(Home scores) * P(Away scores)
|
||||||
|
btts_yes = home_scores * away_scores
|
||||||
|
btts_no = 1 - btts_yes
|
||||||
|
|
||||||
|
return btts_yes, btts_no
|
||||||
|
|
||||||
|
def get_most_likely_scores(
|
||||||
|
self,
|
||||||
|
home_xg: float,
|
||||||
|
away_xg: float,
|
||||||
|
top_n: int = 5
|
||||||
|
) -> list:
|
||||||
|
"""
|
||||||
|
En olası skorları getir.
|
||||||
|
"""
|
||||||
|
matrix = self.calculate_score_matrix(home_xg, away_xg)
|
||||||
|
|
||||||
|
# Olasılığa göre sırala
|
||||||
|
sorted_scores = sorted(matrix.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
return [
|
||||||
|
{"score": f"{h}-{a}", "probability": round(prob * 100, 1)}
|
||||||
|
for (h, a), prob in sorted_scores[:top_n]
|
||||||
|
]
|
||||||
|
|
||||||
|
def predict(
|
||||||
|
self,
|
||||||
|
home_goals_avg: float,
|
||||||
|
home_conceded_avg: float,
|
||||||
|
away_goals_avg: float,
|
||||||
|
away_conceded_avg: float,
|
||||||
|
league_home_avg: float = None,
|
||||||
|
league_away_avg: float = None,
|
||||||
|
league_total_avg: float = None
|
||||||
|
) -> PoissonPrediction:
|
||||||
|
"""
|
||||||
|
Tam Poisson tahmini.
|
||||||
|
"""
|
||||||
|
prediction = PoissonPrediction()
|
||||||
|
|
||||||
|
# 1. xG hesapla
|
||||||
|
home_xg, away_xg = self.calculate_xg(
|
||||||
|
home_goals_avg, home_conceded_avg,
|
||||||
|
away_goals_avg, away_conceded_avg,
|
||||||
|
league_home_avg, league_away_avg, league_total_avg
|
||||||
|
)
|
||||||
|
|
||||||
|
prediction.home_xg = round(home_xg, 2)
|
||||||
|
prediction.away_xg = round(away_xg, 2)
|
||||||
|
prediction.total_xg = round(home_xg + away_xg, 2)
|
||||||
|
|
||||||
|
# 2. Maç sonucu
|
||||||
|
hw, d, aw = self.calculate_match_odds(home_xg, away_xg)
|
||||||
|
prediction.home_win_prob = round(hw, 3)
|
||||||
|
prediction.draw_prob = round(d, 3)
|
||||||
|
prediction.away_win_prob = round(aw, 3)
|
||||||
|
|
||||||
|
# 3. Alt/Üst
|
||||||
|
ou = self.calculate_over_under(home_xg, away_xg)
|
||||||
|
prediction.over_15_prob = round(ou["over_15"], 3)
|
||||||
|
prediction.over_25_prob = round(ou["over_25"], 3)
|
||||||
|
prediction.over_35_prob = round(ou["over_35"], 3)
|
||||||
|
prediction.under_15_prob = round(ou["under_15"], 3)
|
||||||
|
prediction.under_25_prob = round(ou["under_25"], 3)
|
||||||
|
prediction.under_35_prob = round(ou["under_35"], 3)
|
||||||
|
|
||||||
|
# 4. BTTS
|
||||||
|
btts_yes, btts_no = self.calculate_btts(home_xg, away_xg)
|
||||||
|
prediction.btts_yes_prob = round(btts_yes, 3)
|
||||||
|
prediction.btts_no_prob = round(btts_no, 3)
|
||||||
|
|
||||||
|
# 5. En olası skorlar
|
||||||
|
prediction.most_likely_scores = self.get_most_likely_scores(home_xg, away_xg)
|
||||||
|
|
||||||
|
return prediction
|
||||||
|
|
||||||
|
def get_features(
|
||||||
|
self,
|
||||||
|
home_goals_avg: float,
|
||||||
|
home_conceded_avg: float,
|
||||||
|
away_goals_avg: float,
|
||||||
|
away_conceded_avg: float,
|
||||||
|
league_home_avg: float = None,
|
||||||
|
league_away_avg: float = None,
|
||||||
|
league_total_avg: float = None
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Model için feature dict.
|
||||||
|
"""
|
||||||
|
pred = self.predict(
|
||||||
|
home_goals_avg, home_conceded_avg,
|
||||||
|
away_goals_avg, away_conceded_avg,
|
||||||
|
league_home_avg, league_away_avg, league_total_avg
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"poisson_home_xg": pred.home_xg,
|
||||||
|
"poisson_away_xg": pred.away_xg,
|
||||||
|
"poisson_total_xg": pred.total_xg,
|
||||||
|
"poisson_home_win": pred.home_win_prob,
|
||||||
|
"poisson_draw": pred.draw_prob,
|
||||||
|
"poisson_away_win": pred.away_win_prob,
|
||||||
|
"poisson_over_15": pred.over_15_prob,
|
||||||
|
"poisson_over_25": pred.over_25_prob,
|
||||||
|
"poisson_over_35": pred.over_35_prob,
|
||||||
|
"poisson_btts_yes": pred.btts_yes_prob,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_engine_instance = None
|
||||||
|
|
||||||
|
def get_poisson_engine() -> PoissonEngine:
|
||||||
|
"""Singleton pattern"""
|
||||||
|
global _engine_instance
|
||||||
|
if _engine_instance is None:
|
||||||
|
_engine_instance = PoissonEngine()
|
||||||
|
return _engine_instance
|
||||||
|
|
||||||
|
|
||||||
|
# Test
|
||||||
|
if __name__ == "__main__":
|
||||||
|
engine = get_poisson_engine()
|
||||||
|
|
||||||
|
# Örnek: Güçlü ev sahibi vs zayıf deplasman
|
||||||
|
print("=" * 60)
|
||||||
|
print("POISSON ENGINE TEST")
|
||||||
|
print("Galatasaray (ev) vs Antalyaspor (deplasman)")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
pred = engine.predict(
|
||||||
|
home_goals_avg=2.1, # GS ev ortalaması
|
||||||
|
home_conceded_avg=0.8, # GS ev yenilen
|
||||||
|
away_goals_avg=0.9, # Antalya deplasman gol
|
||||||
|
away_conceded_avg=1.8, # Antalya deplasman yenilen
|
||||||
|
league_home_avg=1.5,
|
||||||
|
league_away_avg=1.1
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n📊 Expected Goals:")
|
||||||
|
print(f" Ev Sahibi xG: {pred.home_xg}")
|
||||||
|
print(f" Deplasman xG: {pred.away_xg}")
|
||||||
|
print(f" Toplam xG: {pred.total_xg}")
|
||||||
|
|
||||||
|
print(f"\n🎯 Maç Sonucu:")
|
||||||
|
print(f" 1 (Ev): {pred.home_win_prob*100:.1f}%")
|
||||||
|
print(f" X (Beraberlik): {pred.draw_prob*100:.1f}%")
|
||||||
|
print(f" 2 (Deplasman): {pred.away_win_prob*100:.1f}%")
|
||||||
|
|
||||||
|
print(f"\n⚽ Alt/Üst:")
|
||||||
|
print(f" 2.5 Üst: {pred.over_25_prob*100:.1f}%")
|
||||||
|
print(f" 2.5 Alt: {pred.under_25_prob*100:.1f}%")
|
||||||
|
|
||||||
|
print(f"\n🤝 Karşılıklı Gol:")
|
||||||
|
print(f" KG Var: {pred.btts_yes_prob*100:.1f}%")
|
||||||
|
print(f" KG Yok: {pred.btts_no_prob*100:.1f}%")
|
||||||
|
|
||||||
|
print(f"\n📈 En Olası Skorlar:")
|
||||||
|
for score_data in pred.most_likely_scores:
|
||||||
|
print(f" {score_data['score']}: {score_data['probability']}%")
|
||||||
Executable
+368
@@ -0,0 +1,368 @@
|
|||||||
|
"""
|
||||||
|
Referee Engine - V9 Feature
|
||||||
|
Hakem profilleri ve maç etki analizi.
|
||||||
|
|
||||||
|
Analiz Edilen Metrikler:
|
||||||
|
- Ortalama kart sayısı (sarı/kırmızı)
|
||||||
|
- Penaltı verme eğilimi
|
||||||
|
- Ev sahibi lehine karar oranı
|
||||||
|
- Maç başına toplam gol ortalaması
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Dict, Optional, List
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
try:
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
except ImportError:
|
||||||
|
psycopg2 = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RefereeProfile:
|
||||||
|
"""Hakem profili"""
|
||||||
|
referee_name: str
|
||||||
|
matches_count: int = 0
|
||||||
|
|
||||||
|
# Kart istatistikleri
|
||||||
|
avg_yellow_cards: float = 0.0
|
||||||
|
avg_red_cards: float = 0.0
|
||||||
|
total_cards_per_match: float = 0.0
|
||||||
|
|
||||||
|
# Penaltı istatistikleri
|
||||||
|
penalty_rate: float = 0.0 # Penaltı verdiği maç oranı
|
||||||
|
|
||||||
|
# Ev sahibi eğilimi
|
||||||
|
home_win_rate: float = 0.0
|
||||||
|
home_bias: float = 0.0 # -1 (away bias) to +1 (home bias)
|
||||||
|
|
||||||
|
# Gol istatistikleri
|
||||||
|
avg_goals_per_match: float = 0.0
|
||||||
|
over_25_rate: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RefereeFeatures:
|
||||||
|
"""Model için hakem feature'ları"""
|
||||||
|
referee_name: str = ""
|
||||||
|
referee_matches: int = 0
|
||||||
|
referee_avg_yellow: float = 0.0
|
||||||
|
referee_avg_red: float = 0.0
|
||||||
|
referee_cards_total: float = 0.0
|
||||||
|
referee_penalty_rate: float = 0.0
|
||||||
|
referee_home_bias: float = 0.0
|
||||||
|
referee_avg_goals: float = 0.0
|
||||||
|
referee_over25_rate: float = 0.0
|
||||||
|
referee_experience: float = 0.0 # 0-1 normalized
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, float]:
|
||||||
|
return {
|
||||||
|
'referee_matches': float(self.referee_matches),
|
||||||
|
'referee_avg_yellow': self.referee_avg_yellow,
|
||||||
|
'referee_avg_red': self.referee_avg_red,
|
||||||
|
'referee_cards_total': self.referee_cards_total,
|
||||||
|
'referee_penalty_rate': self.referee_penalty_rate,
|
||||||
|
'referee_home_bias': self.referee_home_bias,
|
||||||
|
'referee_avg_goals': self.referee_avg_goals,
|
||||||
|
'referee_over25_rate': self.referee_over25_rate,
|
||||||
|
'referee_experience': self.referee_experience,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RefereeEngine:
|
||||||
|
"""
|
||||||
|
Hakem analiz motoru.
|
||||||
|
|
||||||
|
Hakemlerin geçmiş maçlarını analiz ederek:
|
||||||
|
- Kart eğilimlerini
|
||||||
|
- Ev sahibi bias'ını
|
||||||
|
- Gol ortalamasını
|
||||||
|
hesaplar.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Ana hakem rolü ID'si (genellikle 1 veya "Hakem")
|
||||||
|
MAIN_REFEREE_ROLE_ID = 1
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conn = None
|
||||||
|
self._referee_cache: Dict[str, RefereeProfile] = {}
|
||||||
|
self._cache_loaded = False
|
||||||
|
|
||||||
|
def _connect_db(self):
|
||||||
|
if psycopg2 is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
return self.conn
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[RefereeEngine] DB connection failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_conn(self):
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
self._connect_db()
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def _get_main_referee_role_id(self) -> int:
|
||||||
|
"""Ana hakem rolü ID'sini bul"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return self.MAIN_REFEREE_ROLE_ID
|
||||||
|
|
||||||
|
try:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT id FROM official_roles
|
||||||
|
WHERE LOWER(name) LIKE '%%hakem%%'
|
||||||
|
AND LOWER(name) NOT LIKE '%%yardımcı%%'
|
||||||
|
AND LOWER(name) NOT LIKE '%%dördüncü%%'
|
||||||
|
LIMIT 1
|
||||||
|
""")
|
||||||
|
result = cur.fetchone()
|
||||||
|
if result:
|
||||||
|
return result[0]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return self.MAIN_REFEREE_ROLE_ID
|
||||||
|
|
||||||
|
def get_referee_for_match(self, match_id: str) -> Optional[str]:
|
||||||
|
"""Maçın ana hakemini bul"""
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
main_role_id = self._get_main_referee_role_id()
|
||||||
|
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT name FROM match_officials
|
||||||
|
WHERE match_id = %s AND role_id = %s
|
||||||
|
LIMIT 1
|
||||||
|
""", (match_id, main_role_id))
|
||||||
|
result = cur.fetchone()
|
||||||
|
return result[0] if result else None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[RefereeEngine] Error getting referee: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def calculate_referee_profile(self, referee_name: str, league_id: str = None) -> RefereeProfile:
|
||||||
|
"""Hakemin maçlarını analiz et. league_id verilirse sadece o ligteki maçları kullanır."""
|
||||||
|
|
||||||
|
# Composite cache key — aynı isim farklı liglerde farklı profil
|
||||||
|
cache_key = (referee_name, league_id)
|
||||||
|
if cache_key in self._referee_cache:
|
||||||
|
return self._referee_cache[cache_key]
|
||||||
|
|
||||||
|
profile = RefereeProfile(referee_name=referee_name)
|
||||||
|
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return profile
|
||||||
|
|
||||||
|
try:
|
||||||
|
main_role_id = self._get_main_referee_role_id()
|
||||||
|
|
||||||
|
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||||
|
# Bu hakemin yönettiği maçları al (league_id varsa sadece o lig)
|
||||||
|
if league_id:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT m.id, m.score_home, m.score_away, m.home_team_id, m.away_team_id
|
||||||
|
FROM matches m
|
||||||
|
JOIN match_officials mo ON m.id = mo.match_id
|
||||||
|
WHERE mo.name = %s
|
||||||
|
AND mo.role_id = %s
|
||||||
|
AND m.league_id = %s
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 100
|
||||||
|
""", (referee_name, main_role_id, league_id))
|
||||||
|
else:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT m.id, m.score_home, m.score_away, m.home_team_id, m.away_team_id
|
||||||
|
FROM matches m
|
||||||
|
JOIN match_officials mo ON m.id = mo.match_id
|
||||||
|
WHERE mo.name = %s
|
||||||
|
AND mo.role_id = %s
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 100
|
||||||
|
""", (referee_name, main_role_id))
|
||||||
|
|
||||||
|
matches = cur.fetchall()
|
||||||
|
profile.matches_count = len(matches)
|
||||||
|
|
||||||
|
if profile.matches_count == 0:
|
||||||
|
return profile
|
||||||
|
|
||||||
|
match_ids = [m['id'] for m in matches]
|
||||||
|
|
||||||
|
# Kart istatistikleri
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE event_subtype ILIKE '%%yellow%%') as yellow_count,
|
||||||
|
COUNT(*) FILTER (WHERE event_subtype ILIKE '%%red%%' OR event_subtype ILIKE '%%second%%') as red_count
|
||||||
|
FROM match_player_events
|
||||||
|
WHERE match_id = ANY(%s) AND event_type = 'card'
|
||||||
|
""", (match_ids,))
|
||||||
|
|
||||||
|
card_stats = cur.fetchone()
|
||||||
|
if card_stats:
|
||||||
|
profile.avg_yellow_cards = (card_stats['yellow_count'] or 0) / profile.matches_count
|
||||||
|
profile.avg_red_cards = (card_stats['red_count'] or 0) / profile.matches_count
|
||||||
|
profile.total_cards_per_match = profile.avg_yellow_cards + profile.avg_red_cards
|
||||||
|
|
||||||
|
# Penaltı istatistikleri
|
||||||
|
cur.execute("""
|
||||||
|
SELECT COUNT(DISTINCT match_id) as penalty_matches
|
||||||
|
FROM match_player_events
|
||||||
|
WHERE match_id = ANY(%s)
|
||||||
|
AND event_type = 'goal'
|
||||||
|
AND event_subtype ILIKE '%%penaltı%%'
|
||||||
|
""", (match_ids,))
|
||||||
|
|
||||||
|
penalty_stats = cur.fetchone()
|
||||||
|
if penalty_stats:
|
||||||
|
profile.penalty_rate = (penalty_stats['penalty_matches'] or 0) / profile.matches_count
|
||||||
|
|
||||||
|
# Ev sahibi eğilimi ve gol ortalaması
|
||||||
|
home_wins = 0
|
||||||
|
away_wins = 0
|
||||||
|
draws = 0
|
||||||
|
total_goals = 0
|
||||||
|
over_25_count = 0
|
||||||
|
|
||||||
|
for m in matches:
|
||||||
|
goals = (m['score_home'] or 0) + (m['score_away'] or 0)
|
||||||
|
total_goals += goals
|
||||||
|
|
||||||
|
if goals > 2.5:
|
||||||
|
over_25_count += 1
|
||||||
|
|
||||||
|
if m['score_home'] > m['score_away']:
|
||||||
|
home_wins += 1
|
||||||
|
elif m['score_home'] < m['score_away']:
|
||||||
|
away_wins += 1
|
||||||
|
else:
|
||||||
|
draws += 1
|
||||||
|
|
||||||
|
profile.avg_goals_per_match = total_goals / profile.matches_count
|
||||||
|
profile.over_25_rate = over_25_count / profile.matches_count
|
||||||
|
profile.home_win_rate = home_wins / profile.matches_count
|
||||||
|
|
||||||
|
# Home bias: -1 (away favors) to +1 (home favors)
|
||||||
|
# Normal lig ortalaması ~%46 ev sahibi, buna göre normalize
|
||||||
|
expected_home_rate = 0.46
|
||||||
|
profile.home_bias = (profile.home_win_rate - expected_home_rate) * 2
|
||||||
|
profile.home_bias = max(-1, min(1, profile.home_bias))
|
||||||
|
|
||||||
|
# Cache'e ekle
|
||||||
|
self._referee_cache[cache_key] = profile
|
||||||
|
return profile
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[RefereeEngine] Error calculating profile: {e}")
|
||||||
|
return profile
|
||||||
|
|
||||||
|
def get_features(self, match_id: str, league_id: str = None) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Maç için hakem feature'larını hesapla.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
match_id: Maç ID'si
|
||||||
|
league_id: Lig ID'si (opsiyonel — isim çakışmalarını önlemek için)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Hakem feature'ları dict olarak
|
||||||
|
"""
|
||||||
|
features = RefereeFeatures()
|
||||||
|
|
||||||
|
# Hakemi bul
|
||||||
|
referee_name = self.get_referee_for_match(match_id)
|
||||||
|
if referee_name is None:
|
||||||
|
return features.to_dict()
|
||||||
|
|
||||||
|
features.referee_name = referee_name
|
||||||
|
|
||||||
|
# Profili hesapla (league_id ile scope'lanmış)
|
||||||
|
profile = self.calculate_referee_profile(referee_name, league_id=league_id)
|
||||||
|
|
||||||
|
features.referee_matches = profile.matches_count
|
||||||
|
features.referee_avg_yellow = profile.avg_yellow_cards
|
||||||
|
features.referee_avg_red = profile.avg_red_cards
|
||||||
|
features.referee_cards_total = profile.total_cards_per_match
|
||||||
|
features.referee_penalty_rate = profile.penalty_rate
|
||||||
|
features.referee_home_bias = profile.home_bias
|
||||||
|
features.referee_avg_goals = profile.avg_goals_per_match
|
||||||
|
features.referee_over25_rate = profile.over_25_rate
|
||||||
|
|
||||||
|
# Deneyim: 50+ maç = 1.0, 0 maç = 0.0
|
||||||
|
features.referee_experience = min(profile.matches_count / 50, 1.0)
|
||||||
|
|
||||||
|
return features.to_dict()
|
||||||
|
|
||||||
|
def get_features_by_name(self, referee_name: str, league_id: str = None) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Hakem ismiyle feature'ları hesapla.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
referee_name: Hakem ismi
|
||||||
|
league_id: Lig ID'si (opsiyonel — isim çakışmalarını önlemek için)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Hakem feature'ları dict olarak
|
||||||
|
"""
|
||||||
|
features = RefereeFeatures()
|
||||||
|
|
||||||
|
if not referee_name:
|
||||||
|
return features.to_dict()
|
||||||
|
|
||||||
|
features.referee_name = referee_name
|
||||||
|
profile = self.calculate_referee_profile(referee_name, league_id=league_id)
|
||||||
|
|
||||||
|
features.referee_matches = profile.matches_count
|
||||||
|
features.referee_avg_yellow = profile.avg_yellow_cards
|
||||||
|
features.referee_avg_red = profile.avg_red_cards
|
||||||
|
features.referee_cards_total = profile.total_cards_per_match
|
||||||
|
features.referee_penalty_rate = profile.penalty_rate
|
||||||
|
features.referee_home_bias = profile.home_bias
|
||||||
|
features.referee_avg_goals = profile.avg_goals_per_match
|
||||||
|
features.referee_over25_rate = profile.over_25_rate
|
||||||
|
features.referee_experience = min(profile.matches_count / 50, 1.0)
|
||||||
|
|
||||||
|
return features.to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance
|
||||||
|
_engine: Optional[RefereeEngine] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_referee_engine() -> RefereeEngine:
|
||||||
|
"""Singleton referee engine instance döndür"""
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = RefereeEngine()
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test
|
||||||
|
engine = get_referee_engine()
|
||||||
|
|
||||||
|
print("\n🧪 Referee Engine Test")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Test with a known referee name
|
||||||
|
test_referee = "Cüneyt Çakır"
|
||||||
|
features = engine.get_features_by_name(test_referee)
|
||||||
|
|
||||||
|
print(f"\n📊 Hakem: {test_referee}")
|
||||||
|
for key, value in features.items():
|
||||||
|
print(f" {key}: {value:.3f}")
|
||||||
@@ -0,0 +1,243 @@
|
|||||||
|
"""
|
||||||
|
V27 Rolling Window Feature Calculator
|
||||||
|
======================================
|
||||||
|
Computes rolling averages over 5/10/20 match windows,
|
||||||
|
with home/away splits and trend detection.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import Dict, List, Tuple
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
def calc_rolling_features(
|
||||||
|
team_matches: List[Tuple], # [(mst, is_home, team_goals, opp_goals, opp_id), ...]
|
||||||
|
before_date: int,
|
||||||
|
team_is_home: bool,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""Calculate rolling window features for a team before a given date."""
|
||||||
|
valid = [m for m in team_matches if m[0] < before_date]
|
||||||
|
|
||||||
|
defaults = {
|
||||||
|
"rolling5_goals_avg": 1.3, "rolling5_conceded_avg": 1.2,
|
||||||
|
"rolling10_goals_avg": 1.3, "rolling10_conceded_avg": 1.2,
|
||||||
|
"rolling20_goals_avg": 1.3, "rolling20_conceded_avg": 1.2,
|
||||||
|
"rolling5_clean_sheets": 0.25,
|
||||||
|
"venue_goals_avg": 1.3, "venue_conceded_avg": 1.2,
|
||||||
|
"goal_trend": 0.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(valid) < 3:
|
||||||
|
return defaults
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
for window in [5, 10, 20]:
|
||||||
|
recent = valid[-window:] if len(valid) >= window else valid
|
||||||
|
n = len(recent)
|
||||||
|
g_sum = sum(m[2] for m in recent)
|
||||||
|
c_sum = sum(m[3] for m in recent)
|
||||||
|
result[f"rolling{window}_goals_avg"] = g_sum / n
|
||||||
|
result[f"rolling{window}_conceded_avg"] = c_sum / n
|
||||||
|
|
||||||
|
# Clean sheet rate (last 5)
|
||||||
|
r5 = valid[-5:] if len(valid) >= 5 else valid
|
||||||
|
result["rolling5_clean_sheets"] = sum(1 for m in r5 if m[3] == 0) / len(r5)
|
||||||
|
|
||||||
|
# Venue-specific (home-only or away-only)
|
||||||
|
venue_matches = [m for m in valid if m[1] == team_is_home]
|
||||||
|
if venue_matches:
|
||||||
|
vm = venue_matches[-10:] if len(venue_matches) >= 10 else venue_matches
|
||||||
|
result["venue_goals_avg"] = sum(m[2] for m in vm) / len(vm)
|
||||||
|
result["venue_conceded_avg"] = sum(m[3] for m in vm) / len(vm)
|
||||||
|
else:
|
||||||
|
result["venue_goals_avg"] = defaults["venue_goals_avg"]
|
||||||
|
result["venue_conceded_avg"] = defaults["venue_conceded_avg"]
|
||||||
|
|
||||||
|
# Goal trend: compare last 3 vs previous 3
|
||||||
|
if len(valid) >= 6:
|
||||||
|
last3 = sum(m[2] for m in valid[-3:]) / 3
|
||||||
|
prev3 = sum(m[2] for m in valid[-6:-3]) / 3
|
||||||
|
result["goal_trend"] = last3 - prev3
|
||||||
|
else:
|
||||||
|
result["goal_trend"] = 0.0
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def calc_league_quality(
|
||||||
|
all_matches: List[Tuple], # all FT matches in this league
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""Calculate league-level quality features."""
|
||||||
|
defaults = {
|
||||||
|
"league_home_win_rate": 0.45,
|
||||||
|
"league_draw_rate": 0.25,
|
||||||
|
"league_btts_rate": 0.50,
|
||||||
|
"league_ou25_rate": 0.50,
|
||||||
|
"league_reliability_score": 0.50,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(all_matches) < 20:
|
||||||
|
return defaults
|
||||||
|
|
||||||
|
n = len(all_matches)
|
||||||
|
home_wins = sum(1 for m in all_matches if m[2] > m[3])
|
||||||
|
draws = sum(1 for m in all_matches if m[2] == m[3])
|
||||||
|
btts = sum(1 for m in all_matches if m[2] > 0 and m[3] > 0)
|
||||||
|
ou25 = sum(1 for m in all_matches if (m[2] + m[3]) > 2.5)
|
||||||
|
|
||||||
|
hw_rate = home_wins / n
|
||||||
|
dr_rate = draws / n
|
||||||
|
btts_rate = btts / n
|
||||||
|
ou25_rate = ou25 / n
|
||||||
|
|
||||||
|
# Reliability: leagues closer to averages are more predictable
|
||||||
|
predictability = 1.0 - abs(hw_rate - 0.45) - abs(dr_rate - 0.27) * 0.5
|
||||||
|
reliability = max(0.2, min(0.95, predictability))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"league_home_win_rate": round(hw_rate, 4),
|
||||||
|
"league_draw_rate": round(dr_rate, 4),
|
||||||
|
"league_btts_rate": round(btts_rate, 4),
|
||||||
|
"league_ou25_rate": round(ou25_rate, 4),
|
||||||
|
"league_reliability_score": round(reliability, 4),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def calc_time_features(
|
||||||
|
team_matches: List[Tuple],
|
||||||
|
match_mst: int,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""Calculate time-based features."""
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Days since last match
|
||||||
|
valid = [m for m in team_matches if m[0] < match_mst]
|
||||||
|
if valid:
|
||||||
|
last_mst = valid[-1][0]
|
||||||
|
days_rest = (match_mst - last_mst) / 86_400_000 # ms to days
|
||||||
|
days_rest = min(days_rest, 60.0) # cap at 60 days
|
||||||
|
else:
|
||||||
|
days_rest = 14.0
|
||||||
|
|
||||||
|
# Month and season flags
|
||||||
|
try:
|
||||||
|
dt = datetime.utcfromtimestamp(match_mst / 1000)
|
||||||
|
month = dt.month
|
||||||
|
is_season_start = 1.0 if month in (7, 8) else 0.0
|
||||||
|
is_season_end = 1.0 if month in (5, 6) else 0.0
|
||||||
|
except Exception:
|
||||||
|
month = 6
|
||||||
|
is_season_start = 0.0
|
||||||
|
is_season_end = 0.0
|
||||||
|
|
||||||
|
return {
|
||||||
|
"days_rest": round(days_rest, 2),
|
||||||
|
"match_month": month,
|
||||||
|
"is_season_start": is_season_start,
|
||||||
|
"is_season_end": is_season_end,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def calc_advanced_h2h(
|
||||||
|
team_matches: List[Tuple],
|
||||||
|
home_id: int,
|
||||||
|
away_id: int,
|
||||||
|
before_date: int,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""Calculate advanced H2H features."""
|
||||||
|
defaults = {
|
||||||
|
"h2h_home_goals_avg": 1.3,
|
||||||
|
"h2h_away_goals_avg": 1.1,
|
||||||
|
"h2h_recent_trend": 0.0,
|
||||||
|
"h2h_venue_advantage": 0.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
h2h = [m for m in team_matches if m[4] == away_id and m[0] < before_date]
|
||||||
|
if not h2h:
|
||||||
|
return defaults
|
||||||
|
|
||||||
|
recent = h2h[-10:]
|
||||||
|
home_goals_total = 0
|
||||||
|
away_goals_total = 0
|
||||||
|
venue_home_wins = 0
|
||||||
|
venue_total = 0
|
||||||
|
|
||||||
|
for mst, is_home, team_goals, opp_goals, _ in recent:
|
||||||
|
if is_home:
|
||||||
|
home_goals_total += team_goals
|
||||||
|
away_goals_total += opp_goals
|
||||||
|
venue_total += 1
|
||||||
|
if team_goals > opp_goals:
|
||||||
|
venue_home_wins += 1
|
||||||
|
else:
|
||||||
|
home_goals_total += opp_goals
|
||||||
|
away_goals_total += team_goals
|
||||||
|
|
||||||
|
n = len(recent)
|
||||||
|
result = {
|
||||||
|
"h2h_home_goals_avg": home_goals_total / n,
|
||||||
|
"h2h_away_goals_avg": away_goals_total / n,
|
||||||
|
"h2h_venue_advantage": venue_home_wins / venue_total if venue_total > 0 else 0.5,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Recent trend: last 3 vs overall
|
||||||
|
if len(h2h) >= 4:
|
||||||
|
last3_pts = sum(
|
||||||
|
1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
|
||||||
|
for m in h2h[-3:]
|
||||||
|
) / 3
|
||||||
|
overall_pts = sum(
|
||||||
|
1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
|
||||||
|
for m in h2h
|
||||||
|
) / len(h2h)
|
||||||
|
result["h2h_recent_trend"] = round(last3_pts - overall_pts, 4)
|
||||||
|
else:
|
||||||
|
result["h2h_recent_trend"] = 0.0
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def calc_strength_diff(
|
||||||
|
home_form: Dict[str, float],
|
||||||
|
away_form: Dict[str, float],
|
||||||
|
home_elo: Dict[str, float],
|
||||||
|
away_elo: Dict[str, float],
|
||||||
|
home_momentum: float,
|
||||||
|
away_momentum: float,
|
||||||
|
upset_potential: float,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""Calculate strength differential features."""
|
||||||
|
# Attack vs Defense mismatches
|
||||||
|
h_attack = home_form.get("goals_avg", 1.3)
|
||||||
|
a_defense = away_form.get("conceded_avg", 1.2)
|
||||||
|
a_attack = away_form.get("goals_avg", 1.3)
|
||||||
|
h_defense = home_form.get("conceded_avg", 1.2)
|
||||||
|
|
||||||
|
atk_def_home = h_attack - a_defense # positive = home attack > away defense
|
||||||
|
atk_def_away = a_attack - h_defense
|
||||||
|
|
||||||
|
# XG diff approximation
|
||||||
|
xg_diff = (h_attack + a_defense) / 2 - (a_attack + h_defense) / 2
|
||||||
|
|
||||||
|
# Form × Momentum interaction
|
||||||
|
form_mom = (home_momentum - away_momentum) * (
|
||||||
|
home_form.get("scoring_rate", 0.75) - away_form.get("scoring_rate", 0.75)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ELO-Form consistency
|
||||||
|
elo_diff = home_elo.get("overall", 1500) - away_elo.get("overall", 1500)
|
||||||
|
form_diff = h_attack - a_attack
|
||||||
|
elo_form_consistency = 1.0 if (elo_diff > 0 and form_diff > 0) or (elo_diff < 0 and form_diff < 0) else 0.0
|
||||||
|
|
||||||
|
# Upset × ELO gap
|
||||||
|
elo_gap = abs(elo_diff)
|
||||||
|
upset_x_elo = upset_potential * (elo_gap / 400.0)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"attack_vs_defense_home": round(atk_def_home, 4),
|
||||||
|
"attack_vs_defense_away": round(atk_def_away, 4),
|
||||||
|
"xg_diff": round(xg_diff, 4),
|
||||||
|
"form_momentum_interaction": round(form_mom, 4),
|
||||||
|
"elo_form_consistency": elo_form_consistency,
|
||||||
|
"upset_x_elo_gap": round(upset_x_elo, 4),
|
||||||
|
}
|
||||||
Executable
+408
@@ -0,0 +1,408 @@
|
|||||||
|
"""
|
||||||
|
Sidelined Analyzer — Injury & Suspension Impact Calculator
|
||||||
|
==========================================================
|
||||||
|
Parses sidelined JSON from live_matches and calculates
|
||||||
|
position-weighted missing player impact using ACTUAL player
|
||||||
|
statistics from the database (goals, assists, starting frequency).
|
||||||
|
|
||||||
|
Senior ML Engineer Principle: No magic numbers — all weights from config.
|
||||||
|
Data Quality: Cross-reference sidelined IDs with DB for real impact.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Dict, List, Optional, Any, Tuple
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
try:
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
except ImportError:
|
||||||
|
psycopg2 = None
|
||||||
|
|
||||||
|
from config.config_loader import get_config
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PlayerImpactDetail:
|
||||||
|
"""Impact detail for a single sidelined player."""
|
||||||
|
player_id: str
|
||||||
|
player_name: str
|
||||||
|
position: str
|
||||||
|
impact_score: float
|
||||||
|
db_goals: int = 0
|
||||||
|
db_assists: int = 0
|
||||||
|
db_starts: int = 0
|
||||||
|
db_rating: float = 0.0 # Calculated from DB stats
|
||||||
|
is_key_player: bool = False
|
||||||
|
adaptation_applied: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SidelinedImpact:
|
||||||
|
"""Impact analysis of sidelined players for one team."""
|
||||||
|
total_sidelined: int = 0
|
||||||
|
impact_score: float = 0.0 # 0.0 - 1.0 (normalized)
|
||||||
|
key_position_missing: bool = False # GK or 2+ same position missing
|
||||||
|
key_players_missing: int = 0 # How many key players are missing
|
||||||
|
position_breakdown: Dict[str, int] = field(default_factory=dict)
|
||||||
|
player_details: List[PlayerImpactDetail] = field(default_factory=list)
|
||||||
|
details: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class SidelinedAnalyzer:
|
||||||
|
"""
|
||||||
|
Analyzes sidelined player data with DB-backed statistics.
|
||||||
|
|
||||||
|
Impact formula per player:
|
||||||
|
player_impact = position_weight × db_rating_factor × adaptation_factor
|
||||||
|
|
||||||
|
Where:
|
||||||
|
- position_weight: from config (GK most critical)
|
||||||
|
- db_rating_factor: calculated from actual goals + assists + starts (not mackolik average!)
|
||||||
|
- adaptation_factor: 1.0 if recent injury, discounted if team adapted (many matches missed)
|
||||||
|
|
||||||
|
DB Query: Cross-references sidelined player IDs with match_player_events
|
||||||
|
to get real goals/assists from recent matches.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.config = get_config()
|
||||||
|
self.conn = None
|
||||||
|
self._load_config()
|
||||||
|
self._connect_db()
|
||||||
|
|
||||||
|
def _load_config(self):
|
||||||
|
"""Load all config values once at init."""
|
||||||
|
cfg = self.config
|
||||||
|
self.position_weights = cfg.get("sidelined.position_weights", {
|
||||||
|
"K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30
|
||||||
|
})
|
||||||
|
self.max_rating = cfg.get("sidelined.max_rating", 10)
|
||||||
|
self.adaptation_threshold = cfg.get("sidelined.adaptation_threshold", 10)
|
||||||
|
self.adaptation_discount = cfg.get("sidelined.adaptation_discount", 0.5)
|
||||||
|
self.goalkeeper_penalty = cfg.get("sidelined.goalkeeper_penalty", 0.15)
|
||||||
|
self.confidence_boost = cfg.get("sidelined.confidence_boost", 10)
|
||||||
|
self.max_impact = cfg.get("sidelined.max_impact", 0.85)
|
||||||
|
self.key_player_threshold = cfg.get("sidelined.key_player_threshold", 3)
|
||||||
|
self.recent_matches_lookback = cfg.get("sidelined.recent_matches_lookback", 15)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _safe_int(value: Any, default: int = 0) -> int:
|
||||||
|
try:
|
||||||
|
if value is None or value == "":
|
||||||
|
return default
|
||||||
|
return int(float(value))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _safe_float(value: Any, default: float = 0.0) -> float:
|
||||||
|
try:
|
||||||
|
if value is None or value == "":
|
||||||
|
return default
|
||||||
|
return float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
def _connect_db(self):
|
||||||
|
"""Lazy DB connection following existing engine patterns."""
|
||||||
|
if psycopg2 is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[SidelinedAnalyzer] DB connection failed: {e}")
|
||||||
|
self.conn = None
|
||||||
|
|
||||||
|
def _get_conn(self):
|
||||||
|
"""Get or reconnect DB connection."""
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
self._connect_db()
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def _fetch_player_stats(self, player_ids: List[str]) -> Dict[str, Dict]:
|
||||||
|
"""
|
||||||
|
Fetch real player statistics from DB for given player IDs.
|
||||||
|
|
||||||
|
Returns dict keyed by player_id with:
|
||||||
|
goals: int, assists: int, starts: int, matches: int
|
||||||
|
"""
|
||||||
|
conn = self._get_conn()
|
||||||
|
if not conn or not player_ids:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
stats = {}
|
||||||
|
try:
|
||||||
|
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
|
||||||
|
# 1. Goals from match_player_events + Assists via assist_player_id
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
sub.player_id,
|
||||||
|
SUM(sub.goals) AS goals,
|
||||||
|
SUM(sub.assists) AS assists
|
||||||
|
FROM (
|
||||||
|
-- Goals: player scored
|
||||||
|
SELECT mpe.player_id,
|
||||||
|
COUNT(*) AS goals,
|
||||||
|
0 AS assists
|
||||||
|
FROM match_player_events mpe
|
||||||
|
JOIN matches m ON mpe.match_id = m.id
|
||||||
|
WHERE mpe.player_id = ANY(%s)
|
||||||
|
AND mpe.event_type = 'goal'
|
||||||
|
AND m.status = 'FT'
|
||||||
|
GROUP BY mpe.player_id
|
||||||
|
|
||||||
|
UNION ALL
|
||||||
|
|
||||||
|
-- Assists: player assisted
|
||||||
|
SELECT mpe.assist_player_id AS player_id,
|
||||||
|
0 AS goals,
|
||||||
|
COUNT(*) AS assists
|
||||||
|
FROM match_player_events mpe
|
||||||
|
JOIN matches m ON mpe.match_id = m.id
|
||||||
|
WHERE mpe.assist_player_id = ANY(%s)
|
||||||
|
AND mpe.event_type = 'goal'
|
||||||
|
AND m.status = 'FT'
|
||||||
|
GROUP BY mpe.assist_player_id
|
||||||
|
) sub
|
||||||
|
GROUP BY sub.player_id
|
||||||
|
""", (player_ids, player_ids))
|
||||||
|
|
||||||
|
for row in cur.fetchall():
|
||||||
|
pid = row["player_id"]
|
||||||
|
stats[pid] = {
|
||||||
|
"goals": row["goals"] or 0,
|
||||||
|
"assists": row["assists"] or 0,
|
||||||
|
"starts": 0,
|
||||||
|
"matches": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. Starting frequency from match_player_participation
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
mpp.player_id,
|
||||||
|
COUNT(*) AS total_matches,
|
||||||
|
COUNT(*) FILTER (WHERE mpp.is_starting = true) AS starts
|
||||||
|
FROM match_player_participation mpp
|
||||||
|
JOIN matches m ON mpp.match_id = m.id
|
||||||
|
WHERE mpp.player_id = ANY(%s)
|
||||||
|
AND m.status = 'FT'
|
||||||
|
GROUP BY mpp.player_id
|
||||||
|
""", (player_ids,))
|
||||||
|
|
||||||
|
for row in cur.fetchall():
|
||||||
|
pid = row["player_id"]
|
||||||
|
if pid not in stats:
|
||||||
|
stats[pid] = {"goals": 0, "assists": 0, "starts": 0, "matches": 0}
|
||||||
|
stats[pid]["starts"] = row["starts"] or 0
|
||||||
|
stats[pid]["matches"] = row["total_matches"] or 0
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[SidelinedAnalyzer] DB query error: {e}")
|
||||||
|
try:
|
||||||
|
conn.rollback()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
def _calculate_db_rating(self, db_stats: Dict, position: str) -> float:
|
||||||
|
"""
|
||||||
|
Calculate player rating from DB statistics.
|
||||||
|
|
||||||
|
Rating is 0.0 - 1.0, where 1.0 = absolute key player.
|
||||||
|
|
||||||
|
Factors:
|
||||||
|
- Goals (weighted by position: Forwards value more, Defenders less)
|
||||||
|
- Assists
|
||||||
|
- Starting frequency (regulars > squad players)
|
||||||
|
"""
|
||||||
|
def _to_float(value: Any, default: float = 0.0) -> float:
|
||||||
|
try:
|
||||||
|
return float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
goals = _to_float(db_stats.get("goals", 0))
|
||||||
|
assists = _to_float(db_stats.get("assists", 0))
|
||||||
|
starts = _to_float(db_stats.get("starts", 0))
|
||||||
|
matches = _to_float(db_stats.get("matches", 0))
|
||||||
|
|
||||||
|
# Goal contribution weight by position
|
||||||
|
# Forwards: goals matter most
|
||||||
|
# Midfielders: balanced
|
||||||
|
# Defenders: starts matter more than goals
|
||||||
|
# Goalkeeper: starts are everything
|
||||||
|
goal_weight = {"F": 0.5, "O": 0.35, "D": 0.15, "K": 0.05}.get(position, 0.25)
|
||||||
|
assist_weight = {"F": 0.2, "O": 0.3, "D": 0.15, "K": 0.0}.get(position, 0.15)
|
||||||
|
start_weight = {"F": 0.3, "O": 0.35, "D": 0.7, "K": 0.95}.get(position, 0.5)
|
||||||
|
|
||||||
|
# Normalize each component to 0-1
|
||||||
|
# Goals: 5+ goals in recent matches = max
|
||||||
|
goal_factor = min(goals / 5.0, 1.0) if goals > 0 else 0.0
|
||||||
|
# Assists: 4+ assists = max
|
||||||
|
assist_factor = min(assists / 4.0, 1.0) if assists > 0 else 0.0
|
||||||
|
# Starts: 80%+ start rate = max regular
|
||||||
|
start_rate = starts / max(matches, 1)
|
||||||
|
start_factor = min(start_rate / 0.8, 1.0)
|
||||||
|
|
||||||
|
rating = (goal_factor * goal_weight +
|
||||||
|
assist_factor * assist_weight +
|
||||||
|
start_factor * start_weight)
|
||||||
|
|
||||||
|
return round(min(rating, 1.0), 4)
|
||||||
|
|
||||||
|
def analyze(self, team_data: Optional[Dict[str, Any]]) -> SidelinedImpact:
|
||||||
|
"""
|
||||||
|
Analyze sidelined data for a single team using DB-backed stats.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
team_data: dict with 'players' list and 'totalSidelined' count.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SidelinedImpact with calculated impact score and breakdown.
|
||||||
|
"""
|
||||||
|
if not team_data or not isinstance(team_data, dict):
|
||||||
|
return SidelinedImpact()
|
||||||
|
|
||||||
|
players = team_data.get("players", [])
|
||||||
|
if not players:
|
||||||
|
return SidelinedImpact(
|
||||||
|
total_sidelined=team_data.get("totalSidelined", 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Collect player IDs for batch DB query
|
||||||
|
player_ids = [p.get("playerId", "") for p in players if p.get("playerId")]
|
||||||
|
|
||||||
|
# Batch fetch DB stats (single query, not N+1)
|
||||||
|
db_stats = self._fetch_player_stats(player_ids) if player_ids else {}
|
||||||
|
|
||||||
|
total_impact = 0.0
|
||||||
|
position_counts: Dict[str, int] = {}
|
||||||
|
player_details: List[PlayerImpactDetail] = []
|
||||||
|
details: List[str] = []
|
||||||
|
has_gk_missing = False
|
||||||
|
key_players_count = 0
|
||||||
|
|
||||||
|
for player in players:
|
||||||
|
if not isinstance(player, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
pos = player.get("positionShort", "O")
|
||||||
|
name = player.get("playerName", "Unknown")
|
||||||
|
pid = player.get("playerId", "")
|
||||||
|
matches_missed = self._safe_int(player.get("matchesMissed", 0), 0)
|
||||||
|
player_type = player.get("type", "other")
|
||||||
|
mackolik_avg = self._safe_float(player.get("average", 0), 0.0)
|
||||||
|
|
||||||
|
position_counts[pos] = position_counts.get(pos, 0) + 1
|
||||||
|
|
||||||
|
if pos == "K":
|
||||||
|
has_gk_missing = True
|
||||||
|
|
||||||
|
# === Rating: DB first, mackolik fallback ===
|
||||||
|
p_db_stats = db_stats.get(pid, {})
|
||||||
|
|
||||||
|
if p_db_stats:
|
||||||
|
# Use real DB stats
|
||||||
|
db_rating = self._calculate_db_rating(p_db_stats, pos)
|
||||||
|
else:
|
||||||
|
# Fallback to mackolik average (normalized)
|
||||||
|
db_rating = min(mackolik_avg / self.max_rating, 1.0) if self.max_rating > 0 else 0.3
|
||||||
|
db_rating = max(db_rating, 0.15) # Minimum floor
|
||||||
|
|
||||||
|
# Key player check
|
||||||
|
is_key = db_rating >= 0.5 or (
|
||||||
|
self._safe_int(p_db_stats.get("goals", 0), 0) >= self.key_player_threshold
|
||||||
|
)
|
||||||
|
if is_key:
|
||||||
|
key_players_count += 1
|
||||||
|
|
||||||
|
# === Impact Calculation ===
|
||||||
|
pos_weight = self.position_weights.get(pos, 0.20)
|
||||||
|
|
||||||
|
# Rating factor: higher rated = bigger loss
|
||||||
|
rating_factor = max(db_rating, 0.15) # Even unknown players have minimum impact
|
||||||
|
|
||||||
|
# Adaptation: team has coped if player missed many matches
|
||||||
|
adapted = matches_missed >= self.adaptation_threshold
|
||||||
|
adapt_factor = self.adaptation_discount if adapted else 1.0
|
||||||
|
|
||||||
|
# Type factor
|
||||||
|
type_factor = 1.0 if player_type == "injury" else 0.8
|
||||||
|
|
||||||
|
player_impact = pos_weight * rating_factor * adapt_factor * type_factor
|
||||||
|
total_impact += player_impact
|
||||||
|
|
||||||
|
detail = PlayerImpactDetail(
|
||||||
|
player_id=pid,
|
||||||
|
player_name=name,
|
||||||
|
position=pos,
|
||||||
|
impact_score=round(player_impact, 4),
|
||||||
|
db_goals=p_db_stats.get("goals", 0),
|
||||||
|
db_assists=p_db_stats.get("assists", 0),
|
||||||
|
db_starts=p_db_stats.get("starts", 0),
|
||||||
|
db_rating=db_rating,
|
||||||
|
is_key_player=is_key,
|
||||||
|
adaptation_applied=adapted
|
||||||
|
)
|
||||||
|
player_details.append(detail)
|
||||||
|
|
||||||
|
db_info = f"G:{detail.db_goals} A:{detail.db_assists} S:{detail.db_starts}" if p_db_stats else "no DB data"
|
||||||
|
details.append(
|
||||||
|
f"{name} ({pos}, db_rating:{db_rating:.2f}, {db_info}) → impact:{player_impact:.3f}"
|
||||||
|
+ (" ⭐ KEY" if is_key else "")
|
||||||
|
+ (f" [adapted, {matches_missed} missed]" if adapted else "")
|
||||||
|
)
|
||||||
|
|
||||||
|
# GK penalty bonus
|
||||||
|
if has_gk_missing:
|
||||||
|
total_impact += self.goalkeeper_penalty
|
||||||
|
|
||||||
|
key_position_missing = has_gk_missing or any(v >= 2 for v in position_counts.values())
|
||||||
|
|
||||||
|
# Normalize to 0-1 range
|
||||||
|
normalization_cap = 1.5
|
||||||
|
normalized_impact = min(total_impact / normalization_cap, self.max_impact)
|
||||||
|
|
||||||
|
return SidelinedImpact(
|
||||||
|
total_sidelined=len(players),
|
||||||
|
impact_score=round(normalized_impact, 4),
|
||||||
|
key_position_missing=key_position_missing,
|
||||||
|
key_players_missing=key_players_count,
|
||||||
|
position_breakdown=position_counts,
|
||||||
|
player_details=player_details,
|
||||||
|
details=details
|
||||||
|
)
|
||||||
|
|
||||||
|
def analyze_match(self, sidelined_json: Optional[Dict[str, Any]]) -> Tuple[SidelinedImpact, SidelinedImpact]:
|
||||||
|
"""
|
||||||
|
Analyze sidelined data for both teams.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(home_impact, away_impact)
|
||||||
|
"""
|
||||||
|
if not sidelined_json or not isinstance(sidelined_json, dict):
|
||||||
|
return SidelinedImpact(), SidelinedImpact()
|
||||||
|
|
||||||
|
home_impact = self.analyze(sidelined_json.get("homeTeam"))
|
||||||
|
away_impact = self.analyze(sidelined_json.get("awayTeam"))
|
||||||
|
return home_impact, away_impact
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_analyzer: Optional[SidelinedAnalyzer] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_sidelined_analyzer() -> SidelinedAnalyzer:
|
||||||
|
global _analyzer
|
||||||
|
if _analyzer is None:
|
||||||
|
_analyzer = SidelinedAnalyzer()
|
||||||
|
return _analyzer
|
||||||
@@ -0,0 +1,357 @@
|
|||||||
|
"""
|
||||||
|
Smart Bet Recommender
|
||||||
|
=====================
|
||||||
|
|
||||||
|
Skor tahminine göre akıllı bahis önerileri yapan sistem.
|
||||||
|
|
||||||
|
Örnek: Beşiktaş-Galatasaray için model 3-1 tahmin ediyor
|
||||||
|
→ DÜŞÜK RİSK: 1.5 Üst (yüksek ihtimal tutar)
|
||||||
|
→ ORTA RİSK: MS 1 + 2.5 Üst (orta ihtimal)
|
||||||
|
→ YÜKSEK RİSK: 3.5 Üst veya skor 3-1 (düşük ihtimal, yüksek kazanç)
|
||||||
|
|
||||||
|
Ayrıca kombinasyonlar:
|
||||||
|
- MS 1 + 1.5 Üst
|
||||||
|
- MS 1 + KG Var
|
||||||
|
- Her iki takım skor > 0.5 (her takım en az 1 gol atar)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class RiskLevel(Enum):
|
||||||
|
LOW = "LOW" # Yüksek olasılık, düşük oran (güvenli)
|
||||||
|
MEDIUM = "MEDIUM" # Orta olasılık, orta oran
|
||||||
|
HIGH = "HIGH" # Düşük olasılık, yüksek kazanç
|
||||||
|
EXTREME = "EXTREME" # Çok düşük olasılık, çok yüksek kazanç
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BetRecommendation:
|
||||||
|
"""Tek bir bahis önerisi"""
|
||||||
|
market: str # Piyasa adı (örn: "MS 1", "2.5 Üst")
|
||||||
|
pick: str # Seçim (örn: "1", "OVER", "YES")
|
||||||
|
odds: float # Oran
|
||||||
|
probability: float # Model olasılığı (0-1)
|
||||||
|
confidence: float # Güven seviyesi (0-100)
|
||||||
|
risk_level: RiskLevel
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"market": self.market,
|
||||||
|
"pick": self.pick,
|
||||||
|
"odds": self.odds,
|
||||||
|
"probability": round(self.probability * 100, 1),
|
||||||
|
"confidence": round(self.confidence, 1),
|
||||||
|
"risk_level": self.risk_level.value
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MatchPredictionSet:
|
||||||
|
"""Bir maç için tüm tahmin seti"""
|
||||||
|
match_name: str
|
||||||
|
predicted_score: Tuple[int, int] # (home, away)
|
||||||
|
home_win_prob: float
|
||||||
|
draw_prob: float
|
||||||
|
away_win_prob: float
|
||||||
|
over_15_prob: float
|
||||||
|
over_25_prob: float
|
||||||
|
over_35_prob: float
|
||||||
|
btts_yes_prob: float
|
||||||
|
|
||||||
|
# Öneriler
|
||||||
|
low_risk_bets: List[BetRecommendation]
|
||||||
|
medium_risk_bets: List[BetRecommendation]
|
||||||
|
high_risk_bets: List[BetRecommendation]
|
||||||
|
extreme_risk_bets: List[BetRecommendation]
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"match_name": self.match_name,
|
||||||
|
"predicted_score": f"{self.predicted_score[0]}-{self.predicted_score[1]}",
|
||||||
|
"probs": {
|
||||||
|
"home_win": round(self.home_win_prob * 100, 1),
|
||||||
|
"draw": round(self.draw_prob * 100, 1),
|
||||||
|
"away_win": round(self.away_win_prob * 100, 1),
|
||||||
|
"over_15": round(self.over_15_prob * 100, 1),
|
||||||
|
"over_25": round(self.over_25_prob * 100, 1),
|
||||||
|
"over_35": round(self.over_35_prob * 100, 1),
|
||||||
|
"btts": round(self.btts_yes_prob * 100, 1)
|
||||||
|
},
|
||||||
|
"low_risk": [b.to_dict() for b in self.low_risk_bets],
|
||||||
|
"medium_risk": [b.to_dict() for b in self.medium_risk_bets],
|
||||||
|
"high_risk": [b.to_dict() for b in self.high_risk_bets],
|
||||||
|
"extreme_risk": [b.to_dict() for b in self.extreme_risk_bets]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SmartBetRecommender:
|
||||||
|
"""
|
||||||
|
Akıllı Bahis Öneri Sistemi
|
||||||
|
|
||||||
|
Skor tahminine göre farklı risk seviyelerinde bahisler önerir.
|
||||||
|
|
||||||
|
Mantık:
|
||||||
|
1. DÜŞÜK RİSK: Yüksek olasılıklı (>70%), düşük oranlı bahisler
|
||||||
|
- 1.5 Üst
|
||||||
|
- Double Chance
|
||||||
|
- Favori takım gol atar
|
||||||
|
|
||||||
|
2. ORTA RİSK: Orta olasılıklı (50-70%), orta oranlı bahisler
|
||||||
|
- MS favori
|
||||||
|
- 2.5 Üst
|
||||||
|
- KG Var/Var
|
||||||
|
|
||||||
|
3. YÜKSEK RİSK: Düşük olasılıklı (30-50%), yüksek oranlı bahisler
|
||||||
|
- 3.5 Üst
|
||||||
|
- Skor tahmini
|
||||||
|
- Handikap
|
||||||
|
|
||||||
|
4. EXTREME RİSK: Çok düşük olasılıklı (<30%), çok yüksek oranlı
|
||||||
|
- Tam skor
|
||||||
|
- Uzunluklu kombinasyonlar
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Olasılık eşikleri
|
||||||
|
PROB_LOW_RISK = 0.70 # > %70 olasılık
|
||||||
|
PROB_MEDIUM_RISK = 0.50 # %50-70 olasılık
|
||||||
|
PROB_HIGH_RISK = 0.30 # %30-50 olasılık
|
||||||
|
# < %30 = EXTREME
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _determine_risk(self, probability: float) -> RiskLevel:
|
||||||
|
"""Olasılığa göre risk seviyesi belirle"""
|
||||||
|
if probability >= self.PROB_LOW_RISK:
|
||||||
|
return RiskLevel.LOW
|
||||||
|
elif probability >= self.PROB_MEDIUM_RISK:
|
||||||
|
return RiskLevel.MEDIUM
|
||||||
|
elif probability >= self.PROB_HIGH_RISK:
|
||||||
|
return RiskLevel.HIGH
|
||||||
|
else:
|
||||||
|
return RiskLevel.EXTREME
|
||||||
|
|
||||||
|
def _get_favorite(self, home_prob: float, draw_prob: float, away_prob: float) -> Tuple[str, float]:
|
||||||
|
"""Favori sonucu ve olasılığını döndür"""
|
||||||
|
if home_prob >= draw_prob and home_prob >= away_prob:
|
||||||
|
return "1", home_prob
|
||||||
|
elif away_prob >= home_prob and away_prob >= draw_prob:
|
||||||
|
return "2", away_prob
|
||||||
|
else:
|
||||||
|
return "X", draw_prob
|
||||||
|
|
||||||
|
def _calculate_expected_goals(self, predicted_score: Tuple[int, int]) -> float:
|
||||||
|
"""Tahmin edilen skora göre beklenen gol sayısı"""
|
||||||
|
return predicted_score[0] + predicted_score[1]
|
||||||
|
|
||||||
|
def recommend(
|
||||||
|
self,
|
||||||
|
match_name: str,
|
||||||
|
predicted_score: Tuple[int, int],
|
||||||
|
probs: Dict[str, float],
|
||||||
|
odds: Dict[str, float]
|
||||||
|
) -> MatchPredictionSet:
|
||||||
|
"""
|
||||||
|
Maç için tüm bahis önerilerini oluştur.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
match_name: Maç adı
|
||||||
|
predicted_score: (home_goals, away_goals)
|
||||||
|
probs: {"home_win": 0.55, "draw": 0.25, "away_win": 0.20,
|
||||||
|
"over_15": 0.85, "over_25": 0.65, "over_35": 0.35,
|
||||||
|
"btts_yes": 0.55}
|
||||||
|
odds: {"1": 1.80, "X": 3.50, "2": 4.20,
|
||||||
|
"ou15_o": 1.25, "ou15_u": 3.80,
|
||||||
|
"ou25_o": 1.90, "ou25_u": 1.85,
|
||||||
|
"ou35_o": 3.20, "ou35_u": 1.30,
|
||||||
|
"btts_y": 1.75, "btts_n": 2.00}
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MatchPredictionSet with all recommendations
|
||||||
|
"""
|
||||||
|
home_prob = probs.get("home_win", 0.33)
|
||||||
|
draw_prob = probs.get("draw", 0.33)
|
||||||
|
away_prob = probs.get("away_win", 0.33)
|
||||||
|
over_15_prob = probs.get("over_15", 0.70)
|
||||||
|
over_25_prob = probs.get("over_25", 0.50)
|
||||||
|
over_35_prob = probs.get("over_35", 0.30)
|
||||||
|
btts_prob = probs.get("btts_yes", 0.50)
|
||||||
|
|
||||||
|
# Beklenen goller
|
||||||
|
expected_goals = self._calculate_expected_goals(predicted_score)
|
||||||
|
|
||||||
|
# Favori
|
||||||
|
favorite, favorite_prob = self._get_favorite(home_prob, draw_prob, away_prob)
|
||||||
|
|
||||||
|
# Önerileri oluştur
|
||||||
|
low_risk = []
|
||||||
|
medium_risk = []
|
||||||
|
high_risk = []
|
||||||
|
extreme_risk = []
|
||||||
|
|
||||||
|
# ========== DÜŞÜK RİSK ÖNERİLERİ ==========
|
||||||
|
# 1.5 Üst (en güvenli)
|
||||||
|
if over_15_prob >= self.PROB_LOW_RISK:
|
||||||
|
low_risk.append(BetRecommendation(
|
||||||
|
market="1.5 Üst/Alt",
|
||||||
|
pick="OVER",
|
||||||
|
odds=odds.get("ou15_o", 1.25),
|
||||||
|
probability=over_15_prob,
|
||||||
|
confidence=over_15_prob * 100,
|
||||||
|
risk_level=RiskLevel.LOW
|
||||||
|
))
|
||||||
|
|
||||||
|
# Double Chance
|
||||||
|
if home_prob > away_prob:
|
||||||
|
dc_prob = home_prob + draw_prob
|
||||||
|
if dc_prob >= self.PROB_LOW_RISK:
|
||||||
|
low_risk.append(BetRecommendation(
|
||||||
|
market="Double Chance",
|
||||||
|
pick="1X",
|
||||||
|
odds=odds.get("dc_1x", 1.30),
|
||||||
|
probability=dc_prob,
|
||||||
|
confidence=dc_prob * 100,
|
||||||
|
risk_level=RiskLevel.LOW
|
||||||
|
))
|
||||||
|
elif away_prob > home_prob:
|
||||||
|
dc_prob = away_prob + draw_prob
|
||||||
|
if dc_prob >= self.PROB_LOW_RISK:
|
||||||
|
low_risk.append(BetRecommendation(
|
||||||
|
market="Double Chance",
|
||||||
|
pick="X2",
|
||||||
|
odds=odds.get("dc_x2", 1.30),
|
||||||
|
probability=dc_prob,
|
||||||
|
confidence=dc_prob * 100,
|
||||||
|
risk_level=RiskLevel.LOW
|
||||||
|
))
|
||||||
|
|
||||||
|
# ========== ORTA RİSK ÖNERİLERİ ==========
|
||||||
|
# MS Favori
|
||||||
|
if self.PROB_MEDIUM_RISK <= favorite_prob < self.PROB_LOW_RISK:
|
||||||
|
medium_risk.append(BetRecommendation(
|
||||||
|
market="Maç Sonucu",
|
||||||
|
pick=favorite,
|
||||||
|
odds=odds.get(favorite, 2.00),
|
||||||
|
probability=favorite_prob,
|
||||||
|
confidence=favorite_prob * 100,
|
||||||
|
risk_level=RiskLevel.MEDIUM
|
||||||
|
))
|
||||||
|
|
||||||
|
# 2.5 Üst
|
||||||
|
if self.PROB_MEDIUM_RISK <= over_25_prob < self.PROB_LOW_RISK:
|
||||||
|
medium_risk.append(BetRecommendation(
|
||||||
|
market="2.5 Üst/Alt",
|
||||||
|
pick="OVER",
|
||||||
|
odds=odds.get("ou25_o", 1.90),
|
||||||
|
probability=over_25_prob,
|
||||||
|
confidence=over_25_prob * 100,
|
||||||
|
risk_level=RiskLevel.MEDIUM
|
||||||
|
))
|
||||||
|
|
||||||
|
# KG Var
|
||||||
|
if self.PROB_MEDIUM_RISK <= btts_prob < self.PROB_LOW_RISK:
|
||||||
|
medium_risk.append(BetRecommendation(
|
||||||
|
market="Karşılıklı Gol",
|
||||||
|
pick="YES",
|
||||||
|
odds=odds.get("btts_y", 1.75),
|
||||||
|
probability=btts_prob,
|
||||||
|
confidence=btts_prob * 100,
|
||||||
|
risk_level=RiskLevel.MEDIUM
|
||||||
|
))
|
||||||
|
|
||||||
|
# MS + 2.5 Üst kombinasyonu
|
||||||
|
if favorite_prob >= 0.45 and over_25_prob >= 0.50:
|
||||||
|
combo_prob = favorite_prob * over_25_prob # Basit çarpım
|
||||||
|
combo_odds = odds.get(favorite, 2.00) * odds.get("ou25_o", 1.90)
|
||||||
|
if combo_prob >= 0.30: # En az %30 olasılık
|
||||||
|
medium_risk.append(BetRecommendation(
|
||||||
|
market=f"MS {favorite} + 2.5 Üst",
|
||||||
|
pick=f"{favorite} & OVER",
|
||||||
|
odds=combo_odds,
|
||||||
|
probability=combo_prob,
|
||||||
|
confidence=combo_prob * 100,
|
||||||
|
risk_level=RiskLevel.MEDIUM
|
||||||
|
))
|
||||||
|
|
||||||
|
# ========== YÜKSEK RİSK ÖNERİLERİ ==========
|
||||||
|
# 3.5 Üst
|
||||||
|
if self.PROB_HIGH_RISK <= over_35_prob < self.PROB_MEDIUM_RISK:
|
||||||
|
high_risk.append(BetRecommendation(
|
||||||
|
market="3.5 Üst/Alt",
|
||||||
|
pick="OVER",
|
||||||
|
odds=odds.get("ou35_o", 3.20),
|
||||||
|
probability=over_35_prob,
|
||||||
|
confidence=over_35_prob * 100,
|
||||||
|
risk_level=RiskLevel.HIGH
|
||||||
|
))
|
||||||
|
|
||||||
|
# Skor tahmini (yüksek skorlu maçlar için)
|
||||||
|
if expected_goals >= 3.5:
|
||||||
|
score_str = f"{predicted_score[0]}-{predicted_score[1]}"
|
||||||
|
# Skor olasılığı tahmini (basit model)
|
||||||
|
score_prob = 0.15 if expected_goals <= 4 else 0.10
|
||||||
|
high_risk.append(BetRecommendation(
|
||||||
|
market="Tam Skor",
|
||||||
|
pick=score_str,
|
||||||
|
odds=8.0, # Tahmini oran
|
||||||
|
probability=score_prob,
|
||||||
|
confidence=score_prob * 100,
|
||||||
|
risk_level=RiskLevel.HIGH
|
||||||
|
))
|
||||||
|
|
||||||
|
# MS + 3.5 Üst
|
||||||
|
if favorite_prob >= 0.40 and over_35_prob >= 0.30:
|
||||||
|
combo_prob = favorite_prob * over_35_prob
|
||||||
|
combo_odds = odds.get(favorite, 2.00) * odds.get("ou35_o", 3.20)
|
||||||
|
high_risk.append(BetRecommendation(
|
||||||
|
market=f"MS {favorite} + 3.5 Üst",
|
||||||
|
pick=f"{favorite} & OVER",
|
||||||
|
odds=combo_odds,
|
||||||
|
probability=combo_prob,
|
||||||
|
confidence=combo_prob * 100,
|
||||||
|
risk_level=RiskLevel.HIGH
|
||||||
|
))
|
||||||
|
|
||||||
|
# ========== EXTREME RİSK ÖNERİLERİ ==========
|
||||||
|
# Uzun kombinasyonlar
|
||||||
|
if favorite_prob >= 0.50 and btts_prob >= 0.50 and over_25_prob >= 0.60:
|
||||||
|
combo_prob = favorite_prob * btts_prob * over_25_prob
|
||||||
|
combo_odds = odds.get(favorite, 2.00) * odds.get("btts_y", 1.75) * odds.get("ou25_o", 1.90)
|
||||||
|
if combo_prob >= 0.15: # En az %15 olasılık
|
||||||
|
extreme_risk.append(BetRecommendation(
|
||||||
|
market=f"MS {favorite} + KG Var + 2.5 Üst",
|
||||||
|
pick=f"{favorite} & BTTS & OVER",
|
||||||
|
odds=combo_odds,
|
||||||
|
probability=combo_prob,
|
||||||
|
confidence=combo_prob * 100,
|
||||||
|
risk_level=RiskLevel.EXTREME
|
||||||
|
))
|
||||||
|
|
||||||
|
return MatchPredictionSet(
|
||||||
|
match_name=match_name,
|
||||||
|
predicted_score=predicted_score,
|
||||||
|
home_win_prob=home_prob,
|
||||||
|
draw_prob=draw_prob,
|
||||||
|
away_win_prob=away_prob,
|
||||||
|
over_15_prob=over_15_prob,
|
||||||
|
over_25_prob=over_25_prob,
|
||||||
|
over_35_prob=over_35_prob,
|
||||||
|
btts_yes_prob=btts_prob,
|
||||||
|
low_risk_bets=low_risk,
|
||||||
|
medium_risk_bets=medium_risk,
|
||||||
|
high_risk_bets=high_risk,
|
||||||
|
extreme_risk_bets=extreme_risk
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_recommender = None
|
||||||
|
|
||||||
|
def get_smart_bet_recommender() -> SmartBetRecommender:
|
||||||
|
global _recommender
|
||||||
|
if _recommender is None:
|
||||||
|
_recommender = SmartBetRecommender()
|
||||||
|
return _recommender
|
||||||
Executable
+582
@@ -0,0 +1,582 @@
|
|||||||
|
"""
|
||||||
|
Squad Analysis Engine - V9 Feature
|
||||||
|
Kadro ve oyuncu bazlı analiz.
|
||||||
|
|
||||||
|
Analiz Edilen Metrikler:
|
||||||
|
- İlk 11 kalitesi (golcü formu, key player)
|
||||||
|
- Yedek gücü
|
||||||
|
- Eksik oyuncu etkisi
|
||||||
|
- Pozisyon bazlı güç
|
||||||
|
- Takım içi golcü dağılımı
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Dict, Optional, List, Tuple
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
try:
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
except ImportError:
|
||||||
|
psycopg2 = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PlayerForm:
|
||||||
|
"""Oyuncu form bilgisi"""
|
||||||
|
player_id: str
|
||||||
|
player_name: str
|
||||||
|
goals_last_5: int = 0
|
||||||
|
assists_last_5: int = 0
|
||||||
|
minutes_last_5: int = 0
|
||||||
|
cards_last_5: int = 0
|
||||||
|
is_key_player: bool = False # Golcü veya sık oynayan
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SquadAnalysis:
|
||||||
|
"""Takım kadro analizi"""
|
||||||
|
team_id: str
|
||||||
|
team_name: str = ""
|
||||||
|
|
||||||
|
# İlk 11 bilgisi
|
||||||
|
starting_count: int = 0
|
||||||
|
sub_count: int = 0
|
||||||
|
total_squad: int = 0
|
||||||
|
|
||||||
|
# Pozisyon dağılımı
|
||||||
|
goalkeeper_count: int = 0
|
||||||
|
defender_count: int = 0
|
||||||
|
midfielder_count: int = 0
|
||||||
|
forward_count: int = 0
|
||||||
|
|
||||||
|
# Form metrikleri
|
||||||
|
total_goals_last_5: int = 0 # Kadrodaki oyuncuların son 5 maçtaki golleri
|
||||||
|
total_assists_last_5: int = 0
|
||||||
|
key_players_count: int = 0 # Golcü sayısı
|
||||||
|
key_player_missing: int = 0 # Eksik golcü
|
||||||
|
|
||||||
|
# Kalite metrikleri
|
||||||
|
avg_minutes_per_player: float = 0.0 # Ortalama oynama süresi
|
||||||
|
squad_experience: float = 0.0 # 0-1, takımla oynama deneyimi
|
||||||
|
rotation_rate: float = 0.0 # Kadro rotasyonu oranı
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SquadFeatures:
|
||||||
|
"""Model için kadro feature'ları"""
|
||||||
|
# Home team features
|
||||||
|
home_starting_11: int = 11
|
||||||
|
home_sub_count: int = 7
|
||||||
|
home_total_squad: int = 18
|
||||||
|
home_goalkeepers: int = 1
|
||||||
|
home_defenders: int = 4
|
||||||
|
home_midfielders: int = 4
|
||||||
|
home_forwards: int = 2
|
||||||
|
home_goals_last_5: int = 0
|
||||||
|
home_assists_last_5: int = 0
|
||||||
|
home_key_players: int = 0
|
||||||
|
home_squad_experience: float = 0.5
|
||||||
|
|
||||||
|
# Away team features
|
||||||
|
away_starting_11: int = 11
|
||||||
|
away_sub_count: int = 7
|
||||||
|
away_total_squad: int = 18
|
||||||
|
away_goalkeepers: int = 1
|
||||||
|
away_defenders: int = 4
|
||||||
|
away_midfielders: int = 4
|
||||||
|
away_forwards: int = 2
|
||||||
|
away_goals_last_5: int = 0
|
||||||
|
away_assists_last_5: int = 0
|
||||||
|
away_key_players: int = 0
|
||||||
|
away_squad_experience: float = 0.5
|
||||||
|
|
||||||
|
# Comparison features
|
||||||
|
squad_strength_diff: float = 0.0 # + = home stronger
|
||||||
|
goals_form_diff: float = 0.0
|
||||||
|
key_players_diff: int = 0
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, float]:
|
||||||
|
return {
|
||||||
|
# Home
|
||||||
|
'home_starting_11': float(self.home_starting_11),
|
||||||
|
'home_sub_count': float(self.home_sub_count),
|
||||||
|
'home_total_squad': float(self.home_total_squad),
|
||||||
|
'home_goalkeepers': float(self.home_goalkeepers),
|
||||||
|
'home_defenders': float(self.home_defenders),
|
||||||
|
'home_midfielders': float(self.home_midfielders),
|
||||||
|
'home_forwards': float(self.home_forwards),
|
||||||
|
'home_goals_last_5': float(self.home_goals_last_5),
|
||||||
|
'home_assists_last_5': float(self.home_assists_last_5),
|
||||||
|
'home_key_players': float(self.home_key_players),
|
||||||
|
'home_squad_experience': self.home_squad_experience,
|
||||||
|
# Away
|
||||||
|
'away_starting_11': float(self.away_starting_11),
|
||||||
|
'away_sub_count': float(self.away_sub_count),
|
||||||
|
'away_total_squad': float(self.away_total_squad),
|
||||||
|
'away_goalkeepers': float(self.away_goalkeepers),
|
||||||
|
'away_defenders': float(self.away_defenders),
|
||||||
|
'away_midfielders': float(self.away_midfielders),
|
||||||
|
'away_forwards': float(self.away_forwards),
|
||||||
|
'away_goals_last_5': float(self.away_goals_last_5),
|
||||||
|
'away_assists_last_5': float(self.away_assists_last_5),
|
||||||
|
'away_key_players': float(self.away_key_players),
|
||||||
|
'away_squad_experience': self.away_squad_experience,
|
||||||
|
# Diffs
|
||||||
|
'squad_strength_diff': self.squad_strength_diff,
|
||||||
|
'goals_form_diff': self.goals_form_diff,
|
||||||
|
'key_players_diff': float(self.key_players_diff),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SquadAnalysisEngine:
|
||||||
|
"""
|
||||||
|
Kadro ve oyuncu analiz motoru.
|
||||||
|
|
||||||
|
Beşiktaş-Galatasaray maçı için:
|
||||||
|
- İlk 11'deki oyuncuların son 5 maçtaki gol/asist
|
||||||
|
- Key player tespiti (çok gol atan oyuncular)
|
||||||
|
- Pozisyon dağılımı (4-3-3, 4-4-2 vb.)
|
||||||
|
- Yedek kalitesi
|
||||||
|
hesaplar.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Pozisyon mapping
|
||||||
|
POSITION_MAP = {
|
||||||
|
'goalkeeper': 'GK',
|
||||||
|
'gk': 'GK',
|
||||||
|
'kaleci': 'GK',
|
||||||
|
'defender': 'DEF',
|
||||||
|
'def': 'DEF',
|
||||||
|
'defans': 'DEF',
|
||||||
|
'savunma': 'DEF',
|
||||||
|
'midfielder': 'MID',
|
||||||
|
'mid': 'MID',
|
||||||
|
'orta saha': 'MID',
|
||||||
|
'forward': 'FWD',
|
||||||
|
'fwd': 'FWD',
|
||||||
|
'forvet': 'FWD',
|
||||||
|
'striker': 'FWD',
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conn = None
|
||||||
|
self._player_form_cache: Dict[str, PlayerForm] = {}
|
||||||
|
|
||||||
|
def _connect_db(self):
|
||||||
|
if psycopg2 is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
return self.conn
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[SquadEngine] DB connection failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_conn(self):
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
self._connect_db()
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def _normalize_position(self, position: Optional[str]) -> str:
|
||||||
|
"""Pozisyonu normalize et"""
|
||||||
|
if not position:
|
||||||
|
return 'UNK'
|
||||||
|
|
||||||
|
pos_lower = position.lower().strip()
|
||||||
|
for key, val in self.POSITION_MAP.items():
|
||||||
|
if key in pos_lower:
|
||||||
|
return val
|
||||||
|
return 'UNK'
|
||||||
|
|
||||||
|
def get_player_form(self, player_id: str, before_date_ms: int = None) -> PlayerForm:
|
||||||
|
"""Oyuncunun son 5 maçtaki formunu hesapla"""
|
||||||
|
|
||||||
|
if player_id in self._player_form_cache:
|
||||||
|
return self._player_form_cache[player_id]
|
||||||
|
|
||||||
|
form = PlayerForm(player_id=player_id, player_name="")
|
||||||
|
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return form
|
||||||
|
|
||||||
|
try:
|
||||||
|
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||||
|
# Oyuncu adını al
|
||||||
|
cur.execute("SELECT name FROM players WHERE id = %s", (player_id,))
|
||||||
|
player_row = cur.fetchone()
|
||||||
|
if player_row:
|
||||||
|
form.player_name = player_row['name']
|
||||||
|
|
||||||
|
# Son 5 maçtaki gol ve asist
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE event_type = 'goal' AND event_subtype NOT ILIKE '%%penaltı kaçırma%%') as goals,
|
||||||
|
COUNT(*) FILTER (WHERE event_type = 'goal' AND assist_player_id IS NOT NULL) as assists_given
|
||||||
|
FROM match_player_events
|
||||||
|
WHERE player_id = %s
|
||||||
|
AND match_id IN (
|
||||||
|
SELECT match_id FROM match_player_participation
|
||||||
|
WHERE player_id = %s
|
||||||
|
ORDER BY match_id DESC LIMIT 5
|
||||||
|
)
|
||||||
|
""", (player_id, player_id))
|
||||||
|
|
||||||
|
stats = cur.fetchone()
|
||||||
|
if stats:
|
||||||
|
form.goals_last_5 = stats['goals'] or 0
|
||||||
|
|
||||||
|
# Asist hesapla (assist_player_id olarak geçen)
|
||||||
|
cur.execute("""
|
||||||
|
SELECT COUNT(*) as assists
|
||||||
|
FROM match_player_events
|
||||||
|
WHERE assist_player_id = %s
|
||||||
|
AND match_id IN (
|
||||||
|
SELECT match_id FROM match_player_participation
|
||||||
|
WHERE player_id = %s
|
||||||
|
ORDER BY match_id DESC LIMIT 5
|
||||||
|
)
|
||||||
|
""", (player_id, player_id))
|
||||||
|
|
||||||
|
assist_row = cur.fetchone()
|
||||||
|
if assist_row:
|
||||||
|
form.assists_last_5 = assist_row['assists'] or 0
|
||||||
|
|
||||||
|
# Kart sayısı
|
||||||
|
cur.execute("""
|
||||||
|
SELECT COUNT(*) as cards
|
||||||
|
FROM match_player_events
|
||||||
|
WHERE player_id = %s AND event_type = 'card'
|
||||||
|
AND match_id IN (
|
||||||
|
SELECT match_id FROM match_player_participation
|
||||||
|
WHERE player_id = %s
|
||||||
|
ORDER BY match_id DESC LIMIT 5
|
||||||
|
)
|
||||||
|
""", (player_id, player_id))
|
||||||
|
|
||||||
|
card_row = cur.fetchone()
|
||||||
|
if card_row:
|
||||||
|
form.cards_last_5 = card_row['cards'] or 0
|
||||||
|
|
||||||
|
# Key player mi? (Son 10 maçta 3+ gol)
|
||||||
|
cur.execute("""
|
||||||
|
SELECT COUNT(*) as total_goals
|
||||||
|
FROM match_player_events
|
||||||
|
WHERE player_id = %s
|
||||||
|
AND event_type = 'goal'
|
||||||
|
AND event_subtype NOT ILIKE '%%penaltı kaçırma%%'
|
||||||
|
""", (player_id,))
|
||||||
|
|
||||||
|
total_row = cur.fetchone()
|
||||||
|
form.is_key_player = (total_row['total_goals'] or 0) >= 3
|
||||||
|
|
||||||
|
self._player_form_cache[player_id] = form
|
||||||
|
return form
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
print(f"[SquadEngine] Error getting player form: {e}")
|
||||||
|
return form
|
||||||
|
|
||||||
|
def analyze_squad(self, match_id: str, team_id: str) -> SquadAnalysis:
|
||||||
|
"""Takımın maç kadrosunu analiz et"""
|
||||||
|
|
||||||
|
analysis = SquadAnalysis(team_id=team_id)
|
||||||
|
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return analysis
|
||||||
|
|
||||||
|
try:
|
||||||
|
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||||
|
# Takım adını al
|
||||||
|
cur.execute("SELECT name FROM teams WHERE id = %s", (team_id,))
|
||||||
|
team_row = cur.fetchone()
|
||||||
|
if team_row:
|
||||||
|
analysis.team_name = team_row['name']
|
||||||
|
|
||||||
|
# Maç kadrosunu al
|
||||||
|
cur.execute("""
|
||||||
|
SELECT player_id, position, is_starting
|
||||||
|
FROM match_player_participation
|
||||||
|
WHERE match_id = %s AND team_id = %s
|
||||||
|
""", (match_id, team_id))
|
||||||
|
|
||||||
|
players = cur.fetchall()
|
||||||
|
|
||||||
|
for p in players:
|
||||||
|
if p['is_starting']:
|
||||||
|
analysis.starting_count += 1
|
||||||
|
else:
|
||||||
|
analysis.sub_count += 1
|
||||||
|
|
||||||
|
pos = self._normalize_position(p['position'])
|
||||||
|
if pos == 'GK':
|
||||||
|
analysis.goalkeeper_count += 1
|
||||||
|
elif pos == 'DEF':
|
||||||
|
analysis.defender_count += 1
|
||||||
|
elif pos == 'MID':
|
||||||
|
analysis.midfielder_count += 1
|
||||||
|
elif pos == 'FWD':
|
||||||
|
analysis.forward_count += 1
|
||||||
|
|
||||||
|
# İlk 11'in formunu topluca hesapla
|
||||||
|
if p['is_starting']:
|
||||||
|
form = self.get_player_form(p['player_id'])
|
||||||
|
analysis.total_goals_last_5 += form.goals_last_5
|
||||||
|
analysis.total_assists_last_5 += form.assists_last_5
|
||||||
|
if form.is_key_player:
|
||||||
|
analysis.key_players_count += 1
|
||||||
|
|
||||||
|
analysis.total_squad = analysis.starting_count + analysis.sub_count
|
||||||
|
|
||||||
|
# Takım deneyimi (bu takımla kaç maç oynamışlar)
|
||||||
|
if analysis.starting_count > 0:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT AVG(match_count) as avg_exp
|
||||||
|
FROM (
|
||||||
|
SELECT player_id, COUNT(*) as match_count
|
||||||
|
FROM match_player_participation
|
||||||
|
WHERE team_id = %s AND is_starting = true
|
||||||
|
GROUP BY player_id
|
||||||
|
) sub
|
||||||
|
""", (team_id,))
|
||||||
|
|
||||||
|
exp_row = cur.fetchone()
|
||||||
|
if exp_row and exp_row['avg_exp']:
|
||||||
|
# Normalize: 50+ maç = 1.0
|
||||||
|
analysis.squad_experience = min(exp_row['avg_exp'] / 50, 1.0)
|
||||||
|
|
||||||
|
return analysis
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[SquadEngine] Error analyzing squad: {e}")
|
||||||
|
return analysis
|
||||||
|
|
||||||
|
def analyze_squad_from_list(self, player_ids: List[str], team_id: str) -> SquadAnalysis:
|
||||||
|
"""
|
||||||
|
Memory'deki oyuncu listesinden kadro analizi yap.
|
||||||
|
DB'de olmayan canlı maçlar için kullanılır.
|
||||||
|
"""
|
||||||
|
analysis = SquadAnalysis(team_id=team_id)
|
||||||
|
# Varsayılan: İlk 11 oyuncu (listede genellikle ilk 11 verilir)
|
||||||
|
|
||||||
|
# Eğer liste boşsa
|
||||||
|
if not player_ids:
|
||||||
|
return analysis
|
||||||
|
|
||||||
|
# Varsayımlar: Mackolik API'den gelen liste sıralıdır.
|
||||||
|
# İlk 11 genellikle as kadrodur. Ancak burada sadece 'starting' oyuncuları alıyoruz varsayalım.
|
||||||
|
# User calling uses explicit starting 11 list.
|
||||||
|
|
||||||
|
analysis.starting_count = len(player_ids)
|
||||||
|
analysis.total_squad = len(player_ids) # Subs unknown usually unless separate list
|
||||||
|
|
||||||
|
# Position tahmini zor, default dağıt? Veya oyuncu detayına git?
|
||||||
|
# Hız için: Oyuncu ID'sinden DB'ye bakıp pozisyon öğrenmeye çalışabiliriz.
|
||||||
|
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return analysis
|
||||||
|
|
||||||
|
try:
|
||||||
|
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||||
|
# Calculate stats for these specific players
|
||||||
|
for pid in player_ids:
|
||||||
|
# Get Form
|
||||||
|
form = self.get_player_form(pid)
|
||||||
|
analysis.total_goals_last_5 += form.goals_last_5
|
||||||
|
analysis.total_assists_last_5 += form.assists_last_5
|
||||||
|
if form.is_key_player:
|
||||||
|
analysis.key_players_count += 1
|
||||||
|
|
||||||
|
# Get Position/Exp history attempt
|
||||||
|
cur.execute("""
|
||||||
|
SELECT position, COUNT(*) as match_count
|
||||||
|
FROM match_player_participation
|
||||||
|
WHERE player_id = %s AND team_id = %s
|
||||||
|
GROUP BY position
|
||||||
|
ORDER BY match_count DESC LIMIT 1
|
||||||
|
""", (pid, team_id))
|
||||||
|
row = cur.fetchone()
|
||||||
|
|
||||||
|
if row:
|
||||||
|
pos = self._normalize_position(row.get('position', 'UNK'))
|
||||||
|
if pos == 'GK': analysis.goalkeeper_count += 1
|
||||||
|
elif pos == 'DEF': analysis.defender_count += 1
|
||||||
|
elif pos == 'MID': analysis.midfielder_count += 1
|
||||||
|
elif pos == 'FWD': analysis.forward_count += 1
|
||||||
|
|
||||||
|
# Experience contribution
|
||||||
|
exp = min(row['match_count'] / 50.0, 1.0)
|
||||||
|
analysis.squad_experience += exp
|
||||||
|
|
||||||
|
# Average experience
|
||||||
|
if analysis.starting_count > 0:
|
||||||
|
analysis.squad_experience /= analysis.starting_count
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[SquadEngine] Live analyze error: {e}")
|
||||||
|
|
||||||
|
return analysis
|
||||||
|
|
||||||
|
def get_features(
|
||||||
|
self,
|
||||||
|
match_id: str,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_id: str
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Maç için kadro feature'larını hesapla.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
match_id: Maç ID'si
|
||||||
|
home_team_id: Ev sahibi takım ID
|
||||||
|
away_team_id: Deplasman takım ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Kadro feature'ları dict olarak
|
||||||
|
"""
|
||||||
|
features = SquadFeatures()
|
||||||
|
|
||||||
|
# Ev sahibi analizi
|
||||||
|
home = self.analyze_squad(match_id, home_team_id)
|
||||||
|
features.home_starting_11 = home.starting_count
|
||||||
|
features.home_sub_count = home.sub_count
|
||||||
|
features.home_total_squad = home.total_squad
|
||||||
|
features.home_goalkeepers = home.goalkeeper_count
|
||||||
|
features.home_defenders = home.defender_count
|
||||||
|
features.home_midfielders = home.midfielder_count
|
||||||
|
features.home_forwards = home.forward_count
|
||||||
|
features.home_goals_last_5 = home.total_goals_last_5
|
||||||
|
features.home_assists_last_5 = home.total_assists_last_5
|
||||||
|
features.home_key_players = home.key_players_count
|
||||||
|
features.home_squad_experience = home.squad_experience
|
||||||
|
|
||||||
|
# Deplasman analizi
|
||||||
|
away = self.analyze_squad(match_id, away_team_id)
|
||||||
|
features.away_starting_11 = away.starting_count
|
||||||
|
features.away_sub_count = away.sub_count
|
||||||
|
features.away_total_squad = away.total_squad
|
||||||
|
features.away_goalkeepers = away.goalkeeper_count
|
||||||
|
features.away_defenders = away.defender_count
|
||||||
|
features.away_midfielders = away.midfielder_count
|
||||||
|
features.away_forwards = away.forward_count
|
||||||
|
features.away_goals_last_5 = away.total_goals_last_5
|
||||||
|
features.away_assists_last_5 = away.total_assists_last_5
|
||||||
|
features.away_key_players = away.key_players_count
|
||||||
|
features.away_squad_experience = away.squad_experience
|
||||||
|
|
||||||
|
# Karşılaştırma feature'ları
|
||||||
|
home_strength = (
|
||||||
|
home.total_goals_last_5 * 2 +
|
||||||
|
home.total_assists_last_5 +
|
||||||
|
home.key_players_count * 3 +
|
||||||
|
home.squad_experience * 10
|
||||||
|
)
|
||||||
|
away_strength = (
|
||||||
|
away.total_goals_last_5 * 2 +
|
||||||
|
away.total_assists_last_5 +
|
||||||
|
away.key_players_count * 3 +
|
||||||
|
away.squad_experience * 10
|
||||||
|
)
|
||||||
|
|
||||||
|
features.squad_strength_diff = home_strength - away_strength
|
||||||
|
features.goals_form_diff = home.total_goals_last_5 - away.total_goals_last_5
|
||||||
|
features.key_players_diff = home.key_players_count - away.key_players_count
|
||||||
|
|
||||||
|
return features.to_dict()
|
||||||
|
|
||||||
|
def get_features_without_match(
|
||||||
|
self,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_id: str
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Maç ID olmadan takım bazlı feature'ları hesapla.
|
||||||
|
Son maçtaki kadroyu referans alır.
|
||||||
|
"""
|
||||||
|
features = SquadFeatures()
|
||||||
|
|
||||||
|
conn = self.get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return features.to_dict()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||||
|
for team_id, prefix in [(home_team_id, 'home'), (away_team_id, 'away')]:
|
||||||
|
# Son maçı bul
|
||||||
|
cur.execute("""
|
||||||
|
SELECT mpp.match_id
|
||||||
|
FROM match_player_participation mpp
|
||||||
|
JOIN matches m ON mpp.match_id = m.id
|
||||||
|
WHERE mpp.team_id = %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 1
|
||||||
|
""", (team_id,))
|
||||||
|
|
||||||
|
row = cur.fetchone()
|
||||||
|
if row:
|
||||||
|
analysis = self.analyze_squad(row['match_id'], team_id)
|
||||||
|
|
||||||
|
if prefix == 'home':
|
||||||
|
features.home_starting_11 = analysis.starting_count
|
||||||
|
features.home_sub_count = analysis.sub_count
|
||||||
|
features.home_total_squad = analysis.total_squad
|
||||||
|
features.home_goals_last_5 = analysis.total_goals_last_5
|
||||||
|
features.home_assists_last_5 = analysis.total_assists_last_5
|
||||||
|
features.home_key_players = analysis.key_players_count
|
||||||
|
features.home_squad_experience = analysis.squad_experience
|
||||||
|
else:
|
||||||
|
features.away_starting_11 = analysis.starting_count
|
||||||
|
features.away_sub_count = analysis.sub_count
|
||||||
|
features.away_total_squad = analysis.total_squad
|
||||||
|
features.away_goals_last_5 = analysis.total_goals_last_5
|
||||||
|
features.away_assists_last_5 = analysis.total_assists_last_5
|
||||||
|
features.away_key_players = analysis.key_players_count
|
||||||
|
features.away_squad_experience = analysis.squad_experience
|
||||||
|
|
||||||
|
# Karşılaştırma
|
||||||
|
features.goals_form_diff = features.home_goals_last_5 - features.away_goals_last_5
|
||||||
|
features.key_players_diff = features.home_key_players - features.away_key_players
|
||||||
|
|
||||||
|
return features.to_dict()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[SquadEngine] Error: {e}")
|
||||||
|
return features.to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance
|
||||||
|
_engine: Optional[SquadAnalysisEngine] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_squad_analysis_engine() -> SquadAnalysisEngine:
|
||||||
|
"""Singleton squad analysis engine instance döndür"""
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = SquadAnalysisEngine()
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test
|
||||||
|
engine = get_squad_analysis_engine()
|
||||||
|
|
||||||
|
print("\n🧪 Squad Analysis Engine Test")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Test with known team IDs (Galatasaray, Fenerbahce)
|
||||||
|
features = engine.get_features_without_match(
|
||||||
|
home_team_id="test_gs",
|
||||||
|
away_team_id="test_fb"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n📊 Features:")
|
||||||
|
for key, value in features.items():
|
||||||
|
print(f" {key}: {value:.2f}")
|
||||||
Executable
+194
@@ -0,0 +1,194 @@
|
|||||||
|
"""
|
||||||
|
Team Stats Engine
|
||||||
|
Takımların oyun tarzı istatistiklerini analiz eder.
|
||||||
|
football_team_stats tablosundaki kayıtlardan possession, şut, korner verilerini kullanır.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import psycopg2
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
|
||||||
|
|
||||||
|
class TeamStatsEngine:
|
||||||
|
"""
|
||||||
|
Takım istatistikleri için feature engine.
|
||||||
|
|
||||||
|
Analiz edilen metrikler:
|
||||||
|
- Ortalama top hakimiyeti (possession)
|
||||||
|
- Ortalama isabetli şut
|
||||||
|
- Ortalama korner
|
||||||
|
- Şut/Gol dönüşüm oranı (xG benzeri)
|
||||||
|
- Savunma gücü
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conn = None
|
||||||
|
|
||||||
|
def get_conn(self):
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def get_features(self, team_id: str, before_date: int,
|
||||||
|
limit: int = 10, max_days: int = 180) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Takımın oyun tarzı feature'larını hesapla.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
team_id: Takım ID
|
||||||
|
before_date: Bu tarihten önceki maçlara bak (ms timestamp)
|
||||||
|
limit: Kaç maç analiz edilecek
|
||||||
|
max_days: Maksimum kaç gün geriye gidilecek
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict: Team stats feature'ları
|
||||||
|
"""
|
||||||
|
if not team_id or len(team_id) < 5:
|
||||||
|
return self._default_features()
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn = self.get_conn()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
min_date = before_date - (max_days * 24 * 60 * 60 * 1000)
|
||||||
|
|
||||||
|
# Bu takımın son N maçındaki istatistikleri çek
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
mts.possession_percentage,
|
||||||
|
mts.shots_on_target,
|
||||||
|
mts.shots_off_target,
|
||||||
|
mts.total_shots,
|
||||||
|
mts.corners,
|
||||||
|
mts.fouls,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away,
|
||||||
|
m.home_team_id
|
||||||
|
FROM football_team_stats mts
|
||||||
|
JOIN matches m ON mts.match_id = m.id
|
||||||
|
WHERE mts.team_id = %s
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
AND m.mst_utc > %s
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.sport = 'football'
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT %s
|
||||||
|
""", (team_id, before_date, min_date, limit))
|
||||||
|
|
||||||
|
stats = cur.fetchall()
|
||||||
|
|
||||||
|
if not stats:
|
||||||
|
return self._default_features()
|
||||||
|
|
||||||
|
# İstatistikleri hesapla
|
||||||
|
total_matches = len(stats)
|
||||||
|
|
||||||
|
possession_sum = 0
|
||||||
|
shots_on_target_sum = 0
|
||||||
|
shots_total_sum = 0
|
||||||
|
corners_sum = 0
|
||||||
|
fouls_sum = 0
|
||||||
|
goals_scored = 0
|
||||||
|
valid_possession_count = 0
|
||||||
|
|
||||||
|
for stat in stats:
|
||||||
|
poss, sot, soff, total_shots, corners, fouls, sh, sa, home_id = stat
|
||||||
|
|
||||||
|
if poss and poss > 0:
|
||||||
|
possession_sum += poss
|
||||||
|
valid_possession_count += 1
|
||||||
|
|
||||||
|
if sot:
|
||||||
|
shots_on_target_sum += sot
|
||||||
|
if total_shots:
|
||||||
|
shots_total_sum += total_shots
|
||||||
|
if corners:
|
||||||
|
corners_sum += corners
|
||||||
|
if fouls:
|
||||||
|
fouls_sum += fouls
|
||||||
|
|
||||||
|
# Gol hesaplama
|
||||||
|
is_home = (home_id == team_id)
|
||||||
|
goals_scored += sh if is_home else sa
|
||||||
|
|
||||||
|
avg_possession = possession_sum / valid_possession_count if valid_possession_count > 0 else 50.0
|
||||||
|
avg_shots_on_target = shots_on_target_sum / total_matches if total_matches > 0 else 3.0
|
||||||
|
avg_shots_total = shots_total_sum / total_matches if total_matches > 0 else 10.0
|
||||||
|
avg_corners = corners_sum / total_matches if total_matches > 0 else 4.0
|
||||||
|
avg_fouls = fouls_sum / total_matches if total_matches > 0 else 12.0
|
||||||
|
|
||||||
|
# Shot conversion rate (xG benzeri)
|
||||||
|
shot_conversion = goals_scored / shots_total_sum if shots_total_sum > 0 else 0.1
|
||||||
|
|
||||||
|
# Shot accuracy
|
||||||
|
shot_accuracy = shots_on_target_sum / shots_total_sum if shots_total_sum > 0 else 0.35
|
||||||
|
|
||||||
|
return {
|
||||||
|
'avg_possession': avg_possession / 100, # Normalize to 0-1
|
||||||
|
'avg_shots_on_target': avg_shots_on_target,
|
||||||
|
'avg_shots_total': avg_shots_total,
|
||||||
|
'avg_corners': avg_corners,
|
||||||
|
'avg_fouls': avg_fouls,
|
||||||
|
'shot_conversion_rate': shot_conversion,
|
||||||
|
'shot_accuracy': shot_accuracy,
|
||||||
|
'attacking_intensity': (avg_shots_total + avg_corners) / 2
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[TeamStatsEngine] Error: {e}")
|
||||||
|
return self._default_features()
|
||||||
|
|
||||||
|
def _default_features(self) -> Dict[str, float]:
|
||||||
|
return {
|
||||||
|
'avg_possession': 0.50,
|
||||||
|
'avg_shots_on_target': 3.5,
|
||||||
|
'avg_shots_total': 11.0,
|
||||||
|
'avg_corners': 4.5,
|
||||||
|
'avg_fouls': 12.0,
|
||||||
|
'shot_conversion_rate': 0.10,
|
||||||
|
'shot_accuracy': 0.35,
|
||||||
|
'attacking_intensity': 7.5
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_engine = None
|
||||||
|
|
||||||
|
def get_team_stats_engine() -> TeamStatsEngine:
|
||||||
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
_engine = TeamStatsEngine()
|
||||||
|
return _engine
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
engine = get_team_stats_engine()
|
||||||
|
|
||||||
|
print("\n🧪 Team Stats Engine Test")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Test için örnek takım ID'si al
|
||||||
|
conn = engine.get_conn()
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("""
|
||||||
|
SELECT DISTINCT mts.team_id, t.name
|
||||||
|
FROM match_team_stats mts
|
||||||
|
JOIN teams t ON mts.team_id = t.id
|
||||||
|
LIMIT 1
|
||||||
|
""")
|
||||||
|
result = cur.fetchone()
|
||||||
|
|
||||||
|
if result:
|
||||||
|
team_id, team_name = result
|
||||||
|
print(f"Test Takımı: {team_name}")
|
||||||
|
|
||||||
|
import time
|
||||||
|
features = engine.get_features(team_id, int(time.time() * 1000))
|
||||||
|
|
||||||
|
print(f"\n📊 Feature'lar:")
|
||||||
|
for k, v in features.items():
|
||||||
|
print(f" {k}: {v:.3f}")
|
||||||
Executable
+419
@@ -0,0 +1,419 @@
|
|||||||
|
"""
|
||||||
|
Upset Engine - Dev Avcısı Tespit Sistemi
|
||||||
|
V9 Model için Galatasaray-Liverpool tarzı sürpriz maçları tespit eder.
|
||||||
|
|
||||||
|
Faktörler:
|
||||||
|
1. Atmosfer (Avrupa gecesi, taraftar baskısı)
|
||||||
|
2. Motivasyon asimetrisi (küme düşme vs şampiyon)
|
||||||
|
3. Yorgunluk (maç yoğunluğu, seyahat)
|
||||||
|
4. Tarihsel upset pattern
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Dict, Any, Optional, Tuple
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
# Add parent directory to path for imports
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
try:
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
except ImportError:
|
||||||
|
psycopg2 = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UpsetFactors:
|
||||||
|
"""Upset potansiyelini etkileyen faktörler"""
|
||||||
|
atmosphere_score: float = 0.0 # Atmosfer etkisi (0-1)
|
||||||
|
motivation_score: float = 0.0 # Motivasyon asimetrisi (0-1)
|
||||||
|
fatigue_score: float = 0.0 # Yorgunluk farkı (0-1)
|
||||||
|
historical_upset_rate: float = 0.0 # Tarihsel upset oranı (0-1)
|
||||||
|
total_upset_potential: float = 0.0 # Toplam upset potansiyeli (0-1)
|
||||||
|
reasoning: list = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class UpsetEngine:
|
||||||
|
"""
|
||||||
|
Favori takımın kaybedeceği maçları tespit eder.
|
||||||
|
Galatasaray-Liverpool tarzı sürprizleri yakalar.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Yüksek atmosferli stadyumlar (manuel tanımlı + hesaplanabilir)
|
||||||
|
HIGH_ATMOSPHERE_TEAMS = {
|
||||||
|
# Türkiye
|
||||||
|
"galatasaray", "fenerbahce", "besiktas", "trabzonspor",
|
||||||
|
# İngiltere
|
||||||
|
"liverpool", "newcastle", "leeds",
|
||||||
|
# Almanya
|
||||||
|
"dortmund", "union berlin",
|
||||||
|
# Yunanistan
|
||||||
|
"olympiacos", "panathinaikos", "aek athens",
|
||||||
|
# Arjantin
|
||||||
|
"boca juniors", "river plate",
|
||||||
|
# Diğer
|
||||||
|
"celtic", "rangers", "red star belgrade"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Avrupa kupaları (yüksek motivasyon)
|
||||||
|
EUROPEAN_COMPETITIONS = {
|
||||||
|
"şampiyonlar ligi", "champions league", "uefa champions league",
|
||||||
|
"avrupa ligi", "europa league", "uefa europa league",
|
||||||
|
"konferans ligi", "conference league", "uefa conference league"
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conn = None
|
||||||
|
self._connect_db()
|
||||||
|
|
||||||
|
def _connect_db(self):
|
||||||
|
"""Veritabanına bağlan"""
|
||||||
|
if psycopg2 is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[UpsetEngine] DB connection failed: {e}")
|
||||||
|
self.conn = None
|
||||||
|
|
||||||
|
def _get_conn(self):
|
||||||
|
"""Bağlantıyı kontrol et ve döndür"""
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
self._connect_db()
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
def calculate_atmosphere_score(
|
||||||
|
self,
|
||||||
|
home_team_name: str,
|
||||||
|
league_name: str,
|
||||||
|
is_cup_match: bool = False
|
||||||
|
) -> Tuple[float, list]:
|
||||||
|
"""
|
||||||
|
Atmosfer skorunu hesapla.
|
||||||
|
Yüksek atmosferli stadyumlar upset potansiyelini artırır.
|
||||||
|
"""
|
||||||
|
score = 0.0
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
# Yüksek atmosferli takım mı?
|
||||||
|
home_lower = home_team_name.lower()
|
||||||
|
for team in self.HIGH_ATMOSPHERE_TEAMS:
|
||||||
|
if team in home_lower:
|
||||||
|
score += 0.25
|
||||||
|
reasons.append(f"🔥 {home_team_name} yüksek atmosferli stadyum")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Avrupa kupası mı?
|
||||||
|
league_lower = league_name.lower()
|
||||||
|
for comp in self.EUROPEAN_COMPETITIONS:
|
||||||
|
if comp in league_lower:
|
||||||
|
score += 0.20
|
||||||
|
reasons.append("🌟 Avrupa gecesi - ekstra motivasyon")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Kupa maçı mı? (tek maç eliminasyon)
|
||||||
|
if is_cup_match:
|
||||||
|
score += 0.10
|
||||||
|
reasons.append("🏆 Kupa maçı - her şey olabilir")
|
||||||
|
|
||||||
|
return min(score, 1.0), reasons
|
||||||
|
|
||||||
|
def calculate_motivation_score(
|
||||||
|
self,
|
||||||
|
home_position: int,
|
||||||
|
away_position: int,
|
||||||
|
home_points_to_safety: Optional[int] = None,
|
||||||
|
away_already_champion: bool = False,
|
||||||
|
total_teams: int = 20
|
||||||
|
) -> Tuple[float, list]:
|
||||||
|
"""
|
||||||
|
Motivasyon asimetrisini hesapla.
|
||||||
|
Alt sıradaki takımın üst sıradakine karşı ekstra motivasyonu.
|
||||||
|
"""
|
||||||
|
score = 0.0
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
# Pozisyon farkı
|
||||||
|
position_diff = 0
|
||||||
|
if away_position is not None and home_position is not None:
|
||||||
|
position_diff = away_position - home_position # Negatif = deplasman daha iyi sırada
|
||||||
|
|
||||||
|
# Küme düşme hattı vs üst sıra (en güçlü upset faktörü)
|
||||||
|
relegation_zone = total_teams - 3 # Son 3 takım
|
||||||
|
if home_position is not None and away_position is not None:
|
||||||
|
if home_position >= relegation_zone and away_position <= 3:
|
||||||
|
score += 0.30
|
||||||
|
reasons.append("⚔️ Hayatta kalma savaşı vs şampiyonluk adayı")
|
||||||
|
elif home_position >= relegation_zone:
|
||||||
|
score += 0.15
|
||||||
|
reasons.append("🔥 Ev sahibi küme düşme hattında - ekstra motivasyon")
|
||||||
|
elif home_position is not None and home_position >= relegation_zone:
|
||||||
|
score += 0.15
|
||||||
|
reasons.append("🔥 Ev sahibi küme düşme hattında - ekstra motivasyon")
|
||||||
|
|
||||||
|
# Deplasman takımı zaten şampiyon mu?
|
||||||
|
if away_already_champion:
|
||||||
|
score += 0.20
|
||||||
|
reasons.append("😴 Deplasman takımı zaten şampiyon - motivasyon düşük")
|
||||||
|
|
||||||
|
# Büyük pozisyon farkı (underdog evinde)
|
||||||
|
if position_diff < -10:
|
||||||
|
score += 0.15
|
||||||
|
reasons.append(f"📊 {abs(position_diff)} sıra fark - büyük maç heyecanı")
|
||||||
|
elif position_diff < -5:
|
||||||
|
score += 0.08
|
||||||
|
|
||||||
|
return min(score, 1.0), reasons
|
||||||
|
|
||||||
|
def calculate_fatigue_score(
|
||||||
|
self,
|
||||||
|
home_matches_last_14d: int = 0,
|
||||||
|
away_matches_last_14d: int = 0,
|
||||||
|
home_days_rest: int = 7,
|
||||||
|
away_days_rest: int = 7,
|
||||||
|
away_travel_km: float = 0
|
||||||
|
) -> Tuple[float, list]:
|
||||||
|
"""
|
||||||
|
Yorgunluk farkını hesapla.
|
||||||
|
Yorgun deplasman takımı = yüksek upset potansiyeli.
|
||||||
|
"""
|
||||||
|
score = 0.0
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
# Maç yoğunluğu farkı
|
||||||
|
match_diff = away_matches_last_14d - home_matches_last_14d
|
||||||
|
if match_diff >= 3:
|
||||||
|
score += 0.20
|
||||||
|
reasons.append(f"🏃 Deplasman {match_diff} maç daha fazla oynamış")
|
||||||
|
elif match_diff >= 2:
|
||||||
|
score += 0.10
|
||||||
|
|
||||||
|
# Dinlenme süresi farkı
|
||||||
|
rest_diff = home_days_rest - away_days_rest
|
||||||
|
if rest_diff >= 4:
|
||||||
|
score += 0.15
|
||||||
|
reasons.append(f"💤 Ev sahibi {rest_diff} gün daha fazla dinlenmiş")
|
||||||
|
elif rest_diff >= 2:
|
||||||
|
score += 0.08
|
||||||
|
|
||||||
|
# Uzun deplasman
|
||||||
|
if away_travel_km > 3000:
|
||||||
|
score += 0.15
|
||||||
|
reasons.append(f"✈️ Uzun deplasman ({int(away_travel_km)} km)")
|
||||||
|
elif away_travel_km > 1500:
|
||||||
|
score += 0.08
|
||||||
|
|
||||||
|
return min(score, 1.0), reasons
|
||||||
|
|
||||||
|
def get_historical_upset_rate(
|
||||||
|
self,
|
||||||
|
home_team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
lookback_matches: int = 20
|
||||||
|
) -> Tuple[float, list]:
|
||||||
|
"""
|
||||||
|
Ev sahibi takımın tarihsel upset oranını hesapla.
|
||||||
|
Üst sıradaki takımlara karşı galibiyetler.
|
||||||
|
"""
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
conn = self._get_conn()
|
||||||
|
if conn is None:
|
||||||
|
return 0.0, reasons
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
|
||||||
|
# Ev sahibi olarak oynadığı ve sıralamada geride olduğu maçlar
|
||||||
|
query = """
|
||||||
|
WITH home_matches AS (
|
||||||
|
SELECT
|
||||||
|
m.id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away,
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id
|
||||||
|
FROM matches m
|
||||||
|
WHERE m.home_team_id = %s
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT %s
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
SUM(CASE WHEN score_home > score_away THEN 1 ELSE 0 END) as wins
|
||||||
|
FROM home_matches
|
||||||
|
"""
|
||||||
|
|
||||||
|
cursor.execute(query, (home_team_id, before_date_ms, lookback_matches))
|
||||||
|
result = cursor.fetchone()
|
||||||
|
|
||||||
|
if result and result['total'] > 0:
|
||||||
|
win_rate = result['wins'] / result['total']
|
||||||
|
# Ev sahibi kazanma oranı yüksekse, upset potansiyeli de yüksek
|
||||||
|
if win_rate > 0.5:
|
||||||
|
rate = min((win_rate - 0.4) * 0.5, 0.3)
|
||||||
|
reasons.append(f"📈 Güçlü ev sahibi performansı (%{int(win_rate*100)} kazanma)")
|
||||||
|
return rate, reasons
|
||||||
|
|
||||||
|
return 0.0, reasons
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[UpsetEngine] Historical query error: {e}")
|
||||||
|
return 0.0, reasons
|
||||||
|
|
||||||
|
def calculate_upset_potential(
|
||||||
|
self,
|
||||||
|
home_team_name: str,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_name: str,
|
||||||
|
league_name: str,
|
||||||
|
home_position: int,
|
||||||
|
away_position: int,
|
||||||
|
match_date_ms: int,
|
||||||
|
is_cup_match: bool = False,
|
||||||
|
home_matches_last_14d: int = 2,
|
||||||
|
away_matches_last_14d: int = 2,
|
||||||
|
home_days_rest: int = 7,
|
||||||
|
away_days_rest: int = 7,
|
||||||
|
away_travel_km: float = 0,
|
||||||
|
total_teams: int = 20
|
||||||
|
) -> UpsetFactors:
|
||||||
|
"""
|
||||||
|
Tüm faktörleri birleştirerek upset potansiyelini hesapla.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UpsetFactors: Tüm faktörler ve toplam skor
|
||||||
|
"""
|
||||||
|
factors = UpsetFactors()
|
||||||
|
all_reasons = []
|
||||||
|
|
||||||
|
# 1. Atmosfer
|
||||||
|
atm_score, atm_reasons = self.calculate_atmosphere_score(
|
||||||
|
home_team_name, league_name, is_cup_match
|
||||||
|
)
|
||||||
|
factors.atmosphere_score = atm_score
|
||||||
|
all_reasons.extend(atm_reasons)
|
||||||
|
|
||||||
|
# 2. Motivasyon
|
||||||
|
mot_score, mot_reasons = self.calculate_motivation_score(
|
||||||
|
home_position, away_position,
|
||||||
|
total_teams=total_teams
|
||||||
|
)
|
||||||
|
factors.motivation_score = mot_score
|
||||||
|
all_reasons.extend(mot_reasons)
|
||||||
|
|
||||||
|
# 3. Yorgunluk
|
||||||
|
fat_score, fat_reasons = self.calculate_fatigue_score(
|
||||||
|
home_matches_last_14d, away_matches_last_14d,
|
||||||
|
home_days_rest, away_days_rest,
|
||||||
|
away_travel_km
|
||||||
|
)
|
||||||
|
factors.fatigue_score = fat_score
|
||||||
|
all_reasons.extend(fat_reasons)
|
||||||
|
|
||||||
|
# 4. Tarihsel (sadece DB varsa)
|
||||||
|
hist_score, hist_reasons = self.get_historical_upset_rate(
|
||||||
|
home_team_id, match_date_ms
|
||||||
|
)
|
||||||
|
factors.historical_upset_rate = hist_score
|
||||||
|
all_reasons.extend(hist_reasons)
|
||||||
|
|
||||||
|
# Toplam skor (weighted average)
|
||||||
|
factors.total_upset_potential = min(
|
||||||
|
factors.atmosphere_score * 0.25 +
|
||||||
|
factors.motivation_score * 0.35 +
|
||||||
|
factors.fatigue_score * 0.25 +
|
||||||
|
factors.historical_upset_rate * 0.15,
|
||||||
|
1.0
|
||||||
|
)
|
||||||
|
|
||||||
|
factors.reasoning = all_reasons
|
||||||
|
|
||||||
|
return factors
|
||||||
|
|
||||||
|
def get_features(
|
||||||
|
self,
|
||||||
|
home_team_name: str,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_name: str,
|
||||||
|
league_name: str,
|
||||||
|
home_position: int,
|
||||||
|
away_position: int,
|
||||||
|
match_date_ms: int,
|
||||||
|
**kwargs
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Model için feature dict döndür.
|
||||||
|
Training ve inference'da kullanılır.
|
||||||
|
"""
|
||||||
|
factors = self.calculate_upset_potential(
|
||||||
|
home_team_name=home_team_name,
|
||||||
|
home_team_id=home_team_id,
|
||||||
|
away_team_name=away_team_name,
|
||||||
|
league_name=league_name,
|
||||||
|
home_position=home_position,
|
||||||
|
away_position=away_position,
|
||||||
|
match_date_ms=match_date_ms,
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"upset_atmosphere": factors.atmosphere_score,
|
||||||
|
"upset_motivation": factors.motivation_score,
|
||||||
|
"upset_fatigue": factors.fatigue_score,
|
||||||
|
"upset_historical": factors.historical_upset_rate,
|
||||||
|
"upset_potential": factors.total_upset_potential,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance
|
||||||
|
_engine_instance = None
|
||||||
|
|
||||||
|
def get_upset_engine() -> UpsetEngine:
|
||||||
|
"""Singleton pattern ile engine döndür"""
|
||||||
|
global _engine_instance
|
||||||
|
if _engine_instance is None:
|
||||||
|
_engine_instance = UpsetEngine()
|
||||||
|
return _engine_instance
|
||||||
|
|
||||||
|
|
||||||
|
# Test
|
||||||
|
if __name__ == "__main__":
|
||||||
|
engine = get_upset_engine()
|
||||||
|
|
||||||
|
# Galatasaray vs Liverpool örneği
|
||||||
|
factors = engine.calculate_upset_potential(
|
||||||
|
home_team_name="Galatasaray",
|
||||||
|
home_team_id="test-gs-id",
|
||||||
|
away_team_name="Liverpool",
|
||||||
|
league_name="UEFA Champions League",
|
||||||
|
home_position=12,
|
||||||
|
away_position=1,
|
||||||
|
match_date_ms=1700000000000,
|
||||||
|
is_cup_match=False,
|
||||||
|
away_matches_last_14d=5,
|
||||||
|
home_matches_last_14d=2,
|
||||||
|
away_days_rest=3,
|
||||||
|
home_days_rest=7,
|
||||||
|
away_travel_km=2800,
|
||||||
|
total_teams=20
|
||||||
|
)
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print("GALATASARAY vs LIVERPOOL - UPSET ANALİZİ")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"🏟️ Atmosfer Skoru: {factors.atmosphere_score:.2f}")
|
||||||
|
print(f"💪 Motivasyon Skoru: {factors.motivation_score:.2f}")
|
||||||
|
print(f"😓 Yorgunluk Skoru: {factors.fatigue_score:.2f}")
|
||||||
|
print(f"📊 Tarihsel Skor: {factors.historical_upset_rate:.2f}")
|
||||||
|
print(f"\n🎯 TOPLAM UPSET POTANSİYELİ: {factors.total_upset_potential:.2f}")
|
||||||
|
print("\n📝 Sebepler:")
|
||||||
|
for reason in factors.reasoning:
|
||||||
|
print(f" {reason}")
|
||||||
@@ -0,0 +1,511 @@
|
|||||||
|
"""
|
||||||
|
Upset Engine v2 - GLM-5 Tespitleri ile Geliştirilmiş Sürpriz Tespiti
|
||||||
|
====================================================================
|
||||||
|
|
||||||
|
Yeni Eklenen Faktörler (GLM-5 Analizinden):
|
||||||
|
1. MARGIN_ANALIZI - Bookmaker margin > %18 = sürpriz riski
|
||||||
|
2. FAVORI_ORAN_TUZAGI - 1.40-1.60 arası en yüksek sürpriz oranı
|
||||||
|
3. HAKEM_SURPRIZ_ORANI - Hakemin geçmiş maçlarında ev kayıp oranı
|
||||||
|
4. FORM_FARKI_TUZAGI - Form farkı > 40 = "çok iyi görünen" favori tuzak
|
||||||
|
|
||||||
|
Orijinal Faktörler:
|
||||||
|
- Atmosfer (Avrupa gecesi, taraftar baskısı)
|
||||||
|
- Motivasyon asimetrisi (küme düşme vs şampiyon)
|
||||||
|
- Yorgunluk (maç yoğunluğu, seyahat)
|
||||||
|
- Tarihsel upset pattern
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Dict, Any, Optional, Tuple, List
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
try:
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
except ImportError:
|
||||||
|
psycopg2 = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UpsetFactorsV2:
|
||||||
|
"""Upset potansiyelini etkileyen faktörler - v2"""
|
||||||
|
# Orijinal faktörler
|
||||||
|
atmosphere_score: float = 0.0
|
||||||
|
motivation_score: float = 0.0
|
||||||
|
fatigue_score: float = 0.0
|
||||||
|
historical_upset_rate: float = 0.0
|
||||||
|
|
||||||
|
# YENİ FAKTÖRLER (GLM-5)
|
||||||
|
margin_score: float = 0.0 # Bookmaker margin analizi
|
||||||
|
favorite_odds_trap: float = 0.0 # Favori oran tuzağı
|
||||||
|
referee_upset_score: float = 0.0 # Hakem sürpriz oranı
|
||||||
|
form_trap_score: float = 0.0 # Form farkı tuzağı
|
||||||
|
|
||||||
|
# Toplam
|
||||||
|
total_upset_potential: float = 0.0
|
||||||
|
reasoning: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
# YENİ: Sürpriz skoru (0-100)
|
||||||
|
upset_score: int = 0
|
||||||
|
upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME
|
||||||
|
|
||||||
|
|
||||||
|
class UpsetEngineV2:
|
||||||
|
"""
|
||||||
|
Favori takımın kaybedeceği maçları tespit eder.
|
||||||
|
v2: GLM-5 analizlerinden elde edilen yeni faktörler eklendi.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Yüksek atmosferli stadyumlar
|
||||||
|
HIGH_ATMOSPHERE_TEAMS = {
|
||||||
|
"galatasaray", "fenerbahce", "besiktas", "trabzonspor",
|
||||||
|
"liverpool", "newcastle", "leeds",
|
||||||
|
"dortmund", "union berlin",
|
||||||
|
"olympiacos", "panathinaikos", "aek athens",
|
||||||
|
"boca juniors", "river plate",
|
||||||
|
"celtic", "rangers", "red star belgrade"
|
||||||
|
}
|
||||||
|
|
||||||
|
EUROPEAN_COMPETITIONS = {
|
||||||
|
"şampiyonlar ligi", "champions league", "uefa champions league",
|
||||||
|
"avrupa ligi", "europa league", "uefa europa league",
|
||||||
|
"konferans ligi", "conference league", "uefa conference league"
|
||||||
|
}
|
||||||
|
|
||||||
|
# YENİ: Sürpriz oranları (veritabanı analizinden)
|
||||||
|
# Favori oran aralığına göre sürpriz oranları
|
||||||
|
FAVORITE_ODDS_UPSET_RATES = {
|
||||||
|
(1.10, 1.20): 0.111, # %11.1 sürpriz
|
||||||
|
(1.20, 1.30): 0.150, # %15.0 sürpriz
|
||||||
|
(1.30, 1.40): 0.235, # %23.5 sürpriz
|
||||||
|
(1.40, 1.50): 0.333, # %33.3 sürpriz ← DİKKAT!
|
||||||
|
(1.50, 1.60): 0.350, # %35.0 sürpriz ← EN YÜKSEK!
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conn = None
|
||||||
|
self._connect_db()
|
||||||
|
|
||||||
|
def _connect_db(self):
|
||||||
|
if psycopg2 is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
self.conn = psycopg2.connect(get_clean_dsn())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[UpsetEngineV2] DB connection failed: {e}")
|
||||||
|
self.conn = None
|
||||||
|
|
||||||
|
def _get_conn(self):
|
||||||
|
if self.conn is None or self.conn.closed:
|
||||||
|
self._connect_db()
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
# ═════════════════════════════════════════════════════════════════
|
||||||
|
# YENİ FAKTÖRLER (GLM-5 Analizinden)
|
||||||
|
# ═════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def calculate_margin_score(
|
||||||
|
self,
|
||||||
|
odds_data: Dict[str, float]
|
||||||
|
) -> Tuple[float, List[str]]:
|
||||||
|
"""
|
||||||
|
GLM-5 Tespiti: Bookmaker margin analizi
|
||||||
|
|
||||||
|
Margin > %18 → Bookmaker kendini koruyor, favori riskli
|
||||||
|
Margin > %20 → Yüksek risk, sürpriz bekleniyor
|
||||||
|
"""
|
||||||
|
score = 0.0
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
ms_h = odds_data.get("ms_h", 0)
|
||||||
|
ms_d = odds_data.get("ms_d", 0)
|
||||||
|
ms_a = odds_data.get("ms_a", 0)
|
||||||
|
|
||||||
|
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||||||
|
margin = (1/ms_h + 1/ms_d + 1/ms_a) - 1
|
||||||
|
|
||||||
|
if margin > 0.20:
|
||||||
|
score = 0.25
|
||||||
|
reasons.append(f"⚠️ Margin çok yüksek (%{margin*100:.1f}) - Bookmaker risk görüyor!")
|
||||||
|
elif margin > 0.18:
|
||||||
|
score = 0.15
|
||||||
|
reasons.append(f"⚠️ Margin yüksek (%{margin*100:.1f}) - Dikkat!")
|
||||||
|
|
||||||
|
return score, reasons
|
||||||
|
|
||||||
|
def calculate_favorite_odds_trap(
|
||||||
|
self,
|
||||||
|
favorite_odds: float,
|
||||||
|
favorite_side: str # 'home' or 'away'
|
||||||
|
) -> Tuple[float, List[str]]:
|
||||||
|
"""
|
||||||
|
GLM-5 Tespiti: Favori oran tuzağı
|
||||||
|
|
||||||
|
Veritabanı analizine göre:
|
||||||
|
- 1.40-1.50 arası: %33.3 sürpriz
|
||||||
|
- 1.50-1.60 arası: %35.0 sürpriz (EN YÜKSEK!)
|
||||||
|
- < 1.20: Tuzak oranı şüphesi
|
||||||
|
"""
|
||||||
|
score = 0.0
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
if favorite_odds <= 0:
|
||||||
|
return score, reasons
|
||||||
|
|
||||||
|
for (low, high), upset_rate in self.FAVORITE_ODDS_UPSET_RATES.items():
|
||||||
|
if low <= favorite_odds < high:
|
||||||
|
score = upset_rate # Doğrudan sürpriz olasılığı
|
||||||
|
if upset_rate >= 0.30:
|
||||||
|
reasons.append(f"🔴 Favori oran {favorite_odds:.2f} - %{upset_rate*100:.0f} sürpriz oranı!")
|
||||||
|
elif upset_rate >= 0.20:
|
||||||
|
reasons.append(f"⚠️ Favori oran {favorite_odds:.2f} - %{upset_rate*100:.0f} sürpriz riski")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Çok düşük oran tuzağı
|
||||||
|
if favorite_odds < 1.20:
|
||||||
|
score = max(score, 0.20)
|
||||||
|
reasons.append(f"⚠️ Favori oran çok düşük ({favorite_odds:.2f}) - Tuzak oranı şüphesi")
|
||||||
|
|
||||||
|
return score, reasons
|
||||||
|
|
||||||
|
def calculate_referee_upset_score(
|
||||||
|
self,
|
||||||
|
referee_name: str
|
||||||
|
) -> Tuple[float, List[str]]:
|
||||||
|
"""
|
||||||
|
GLM-5 Tespiti: Hakem sürpriz oranı
|
||||||
|
|
||||||
|
Hakemin yönettiği maçlarda ev sahibi kayıp oranı
|
||||||
|
> %25 → Yüksek sürpriz riski
|
||||||
|
"""
|
||||||
|
score = 0.0
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
if not referee_name or not self._get_conn():
|
||||||
|
return score, reasons
|
||||||
|
|
||||||
|
try:
|
||||||
|
cur = self._get_conn().cursor()
|
||||||
|
|
||||||
|
# Hakemin yönettiği maçlarda sonuçlar
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
SUM(CASE WHEN m.score_home < m.score_away THEN 1 ELSE 0 END) as away_wins,
|
||||||
|
SUM(CASE WHEN m.score_home = m.score_away THEN 1 ELSE 0 END) as draws
|
||||||
|
FROM match_officials mo
|
||||||
|
JOIN matches m ON m.id = mo.match_id
|
||||||
|
WHERE mo.name = %s AND mo.role_id = 1
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
""", (referee_name,))
|
||||||
|
|
||||||
|
row = cur.fetchone()
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
if row and row[0] and row[0] >= 3:
|
||||||
|
total = row[0]
|
||||||
|
away_wins = row[1] or 0
|
||||||
|
draws = row[2] or 0
|
||||||
|
|
||||||
|
upset_rate = (away_wins + draws * 0.5) / total
|
||||||
|
|
||||||
|
if upset_rate > 0.40:
|
||||||
|
score = 0.25
|
||||||
|
reasons.append(f"👨⚖️ {referee_name}: %{upset_rate*100:.0f} sürpriz oranı (YÜKSEK!)")
|
||||||
|
elif upset_rate > 0.30:
|
||||||
|
score = 0.15
|
||||||
|
reasons.append(f"👨⚖️ {referee_name}: %{upset_rate*100:.0f} sürpriz oranı")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return score, reasons
|
||||||
|
|
||||||
|
def calculate_form_trap_score(
|
||||||
|
self,
|
||||||
|
home_form_score: float,
|
||||||
|
away_form_score: float,
|
||||||
|
favorite_side: str
|
||||||
|
) -> Tuple[float, List[str]]:
|
||||||
|
"""
|
||||||
|
GLM-5 Tespiti: Form farkı tuzağı
|
||||||
|
|
||||||
|
Form farkı > 40 → "Çok iyi görünen" favori tuzak
|
||||||
|
Favori formu kötü ama oran düşük → Sürpriz bekleniyor
|
||||||
|
"""
|
||||||
|
score = 0.0
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
form_diff = home_form_score - away_form_score
|
||||||
|
|
||||||
|
# Form farkı çok büyük
|
||||||
|
if abs(form_diff) > 40:
|
||||||
|
score = 0.20
|
||||||
|
if form_diff > 0 and favorite_side == 'away':
|
||||||
|
reasons.append(f"🔴 Form tuzağı! Ev sahibi formda ({home_form_score:.0f}) ama deplasman favori")
|
||||||
|
elif form_diff < 0 and favorite_side == 'home':
|
||||||
|
reasons.append(f"🔴 Form tuzağı! Deplasman formda ({away_form_score:.0f}) ama ev sahibi favori")
|
||||||
|
|
||||||
|
# Favori formu kötü
|
||||||
|
if favorite_side == 'home' and home_form_score < 50:
|
||||||
|
score = max(score, 0.15)
|
||||||
|
reasons.append(f"⚠️ Favori ev sahibi formu düşük ({home_form_score:.0f})")
|
||||||
|
elif favorite_side == 'away' and away_form_score < 50:
|
||||||
|
score = max(score, 0.15)
|
||||||
|
reasons.append(f"⚠️ Favori deplasman formu düşük ({away_form_score:.0f})")
|
||||||
|
|
||||||
|
return score, reasons
|
||||||
|
|
||||||
|
# ═════════════════════════════════════════════════════════════════
|
||||||
|
# ORİJİNAL FAKTÖRLER
|
||||||
|
# ═════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def calculate_atmosphere_score(
|
||||||
|
self,
|
||||||
|
home_team_name: str,
|
||||||
|
league_name: str,
|
||||||
|
is_cup_match: bool = False
|
||||||
|
) -> Tuple[float, List[str]]:
|
||||||
|
"""Orijinal: Atmosfer skoru"""
|
||||||
|
score = 0.0
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
home_lower = home_team_name.lower()
|
||||||
|
for team in self.HIGH_ATMOSPHERE_TEAMS:
|
||||||
|
if team in home_lower:
|
||||||
|
score += 0.25
|
||||||
|
reasons.append(f"🔥 {home_team_name} yüksek atmosferli stadyum")
|
||||||
|
break
|
||||||
|
|
||||||
|
league_lower = league_name.lower()
|
||||||
|
for comp in self.EUROPEAN_COMPETITIONS:
|
||||||
|
if comp in league_lower:
|
||||||
|
score += 0.20
|
||||||
|
reasons.append("🌟 Avrupa gecesi - ekstra motivasyon")
|
||||||
|
break
|
||||||
|
|
||||||
|
if is_cup_match:
|
||||||
|
score += 0.10
|
||||||
|
reasons.append("🏆 Kupa maçı - her şey olabilir")
|
||||||
|
|
||||||
|
return min(score, 1.0), reasons
|
||||||
|
|
||||||
|
def calculate_motivation_score(
|
||||||
|
self,
|
||||||
|
home_position: int,
|
||||||
|
away_position: int,
|
||||||
|
total_teams: int = 20
|
||||||
|
) -> Tuple[float, List[str]]:
|
||||||
|
"""Orijinal: Motivasyon asimetrisi"""
|
||||||
|
score = 0.0
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
if home_position is not None and away_position is not None:
|
||||||
|
position_diff = away_position - home_position
|
||||||
|
relegation_zone = total_teams - 3
|
||||||
|
|
||||||
|
if home_position >= relegation_zone and away_position <= 3:
|
||||||
|
score += 0.30
|
||||||
|
reasons.append("⚔️ Hayatta kalma savaşı vs şampiyonluk adayı")
|
||||||
|
elif home_position >= relegation_zone:
|
||||||
|
score += 0.15
|
||||||
|
reasons.append("🔥 Ev sahibi küme düşme hattında")
|
||||||
|
|
||||||
|
if position_diff < -10:
|
||||||
|
score += 0.15
|
||||||
|
reasons.append(f"📊 {abs(position_diff)} sıra fark")
|
||||||
|
|
||||||
|
return min(score, 1.0), reasons
|
||||||
|
|
||||||
|
# ═════════════════════════════════════════════════════════════════
|
||||||
|
# ANA FONKSİYON
|
||||||
|
# ═════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def calculate_upset_potential(
|
||||||
|
self,
|
||||||
|
home_team_name: str,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_name: str,
|
||||||
|
league_name: str,
|
||||||
|
home_position: int = None,
|
||||||
|
away_position: int = None,
|
||||||
|
match_date_ms: int = None,
|
||||||
|
odds_data: Dict[str, float] = None,
|
||||||
|
referee_name: str = None,
|
||||||
|
home_form_score: float = 50.0,
|
||||||
|
away_form_score: float = 50.0,
|
||||||
|
favorite_side: str = None, # 'home', 'away', or 'draw'
|
||||||
|
favorite_odds: float = None
|
||||||
|
) -> UpsetFactorsV2:
|
||||||
|
"""
|
||||||
|
Tam upset analizi - v2 (GLM-5 geliştirmeleri ile)
|
||||||
|
"""
|
||||||
|
factors = UpsetFactorsV2()
|
||||||
|
all_reasons = []
|
||||||
|
|
||||||
|
# 1. Margin analizi (YENİ)
|
||||||
|
if odds_data:
|
||||||
|
factors.margin_score, reasons = self.calculate_margin_score(odds_data)
|
||||||
|
all_reasons.extend(reasons)
|
||||||
|
|
||||||
|
# 2. Favori oran tuzağı (YENİ)
|
||||||
|
if favorite_odds and favorite_side:
|
||||||
|
factors.favorite_odds_trap, reasons = self.calculate_favorite_odds_trap(
|
||||||
|
favorite_odds, favorite_side
|
||||||
|
)
|
||||||
|
all_reasons.extend(reasons)
|
||||||
|
|
||||||
|
# 3. Hakem sürpriz oranı (YENİ)
|
||||||
|
if referee_name:
|
||||||
|
factors.referee_upset_score, reasons = self.calculate_referee_upset_score(
|
||||||
|
referee_name
|
||||||
|
)
|
||||||
|
all_reasons.extend(reasons)
|
||||||
|
|
||||||
|
# 4. Form tuzağı (YENİ)
|
||||||
|
factors.form_trap_score, reasons = self.calculate_form_trap_score(
|
||||||
|
home_form_score, away_form_score, favorite_side or 'home'
|
||||||
|
)
|
||||||
|
all_reasons.extend(reasons)
|
||||||
|
|
||||||
|
# 5. Atmosfer (orijinal)
|
||||||
|
factors.atmosphere_score, reasons = self.calculate_atmosphere_score(
|
||||||
|
home_team_name, league_name
|
||||||
|
)
|
||||||
|
all_reasons.extend(reasons)
|
||||||
|
|
||||||
|
# 6. Motivasyon (orijinal)
|
||||||
|
if home_position is not None and away_position is not None:
|
||||||
|
factors.motivation_score, reasons = self.calculate_motivation_score(
|
||||||
|
home_position, away_position
|
||||||
|
)
|
||||||
|
all_reasons.extend(reasons)
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
# SÜRPRİZ SKORU HESAPLAMA (0-100) - GÜÇLENDİRİLMİŞ v2.1
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
upset_score = 0
|
||||||
|
|
||||||
|
# Margin (> %18 = +20, > %20 = +30) - GÜÇLENDİRİLDİ
|
||||||
|
if factors.margin_score >= 0.25:
|
||||||
|
upset_score += 30 # Artırıldı: 20 -> 30
|
||||||
|
all_reasons.append("🔴 Margin > %20: Bookmaker büyük risk görüyor!")
|
||||||
|
elif factors.margin_score >= 0.15:
|
||||||
|
upset_score += 20 # Artırıldı: 15 -> 20
|
||||||
|
all_reasons.append("⚠️ Margin > %18: Dikkatli ol!")
|
||||||
|
|
||||||
|
# Favori oran tuzağı - GÜÇLENDİRİLDİ
|
||||||
|
if factors.favorite_odds_trap >= 0.30:
|
||||||
|
upset_score += 30 # Artırıldı: 25 -> 30
|
||||||
|
elif factors.favorite_odds_trap >= 0.20:
|
||||||
|
upset_score += 25 # Artırıldı: 20 -> 25
|
||||||
|
elif factors.favorite_odds_trap >= 0.15:
|
||||||
|
upset_score += 20 # Artırıldı: 15 -> 20
|
||||||
|
|
||||||
|
# Hakem
|
||||||
|
if factors.referee_upset_score >= 0.25:
|
||||||
|
upset_score += 20
|
||||||
|
elif factors.referee_upset_score >= 0.15:
|
||||||
|
upset_score += 10
|
||||||
|
|
||||||
|
# Form tuzağı - GÜÇLENDİRİLDİ
|
||||||
|
if factors.form_trap_score >= 0.20:
|
||||||
|
upset_score += 20 # Artırıldı: 15 -> 20
|
||||||
|
elif factors.form_trap_score >= 0.15:
|
||||||
|
upset_score += 15 # Artırıldı: 10 -> 15
|
||||||
|
|
||||||
|
# Atmosfer - GÜÇLENDİRİLDİ
|
||||||
|
if factors.atmosphere_score >= 0.40:
|
||||||
|
upset_score += 20 # Artırıldı: 15 -> 20
|
||||||
|
elif factors.atmosphere_score >= 0.25:
|
||||||
|
upset_score += 15 # Artırıldı: 10 -> 15
|
||||||
|
|
||||||
|
# Motivasyon
|
||||||
|
if factors.motivation_score >= 0.30:
|
||||||
|
upset_score += 15
|
||||||
|
elif factors.motivation_score >= 0.15:
|
||||||
|
upset_score += 10
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
# YENİ: EKSTRA RİSK FAKTÖRLERİ
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# Deplasman favorisi ekstra risk (+10)
|
||||||
|
if favorite_side == 'away':
|
||||||
|
upset_score += 10
|
||||||
|
all_reasons.append("📍 Deplasman favorisi - ekstra risk!")
|
||||||
|
|
||||||
|
# Favori formu çok düşük (< 40) = +15
|
||||||
|
if favorite_side == 'home' and home_form_score < 40:
|
||||||
|
upset_score += 15
|
||||||
|
all_reasons.append(f"🔴 Favori ev sahibi formu ÇOK DÜŞÜK ({home_form_score:.0f})")
|
||||||
|
elif favorite_side == 'away' and away_form_score < 40:
|
||||||
|
upset_score += 15
|
||||||
|
all_reasons.append(f"🔴 Favori deplasman formu ÇOK DÜŞÜK ({away_form_score:.0f})")
|
||||||
|
|
||||||
|
# Çok düşük favori oranı (< 1.30) ama margin yüksek = tuzak şüphesi
|
||||||
|
if favorite_odds and favorite_odds < 1.30 and factors.margin_score >= 0.15:
|
||||||
|
upset_score += 10
|
||||||
|
all_reasons.append(f"⚠️ Düşük oran ({favorite_odds:.2f}) + yüksek margin = TUZAK ŞÜPHESİ!")
|
||||||
|
|
||||||
|
factors.upset_score = min(upset_score, 100)
|
||||||
|
|
||||||
|
# Seviye belirle
|
||||||
|
if factors.upset_score >= 60:
|
||||||
|
factors.upset_level = "EXTREME"
|
||||||
|
elif factors.upset_score >= 45:
|
||||||
|
factors.upset_level = "HIGH"
|
||||||
|
elif factors.upset_score >= 30:
|
||||||
|
factors.upset_level = "MEDIUM"
|
||||||
|
else:
|
||||||
|
factors.upset_level = "LOW"
|
||||||
|
|
||||||
|
# Toplam upset potansiyeli
|
||||||
|
factors.total_upset_potential = min(
|
||||||
|
(factors.margin_score + factors.favorite_odds_trap +
|
||||||
|
factors.referee_upset_score + factors.form_trap_score +
|
||||||
|
factors.atmosphere_score * 0.5 + factors.motivation_score * 0.5) / 1.5,
|
||||||
|
1.0
|
||||||
|
)
|
||||||
|
|
||||||
|
factors.reasoning = all_reasons
|
||||||
|
|
||||||
|
return factors
|
||||||
|
|
||||||
|
|
||||||
|
def get_upset_engine_v2():
|
||||||
|
"""Singleton pattern"""
|
||||||
|
return UpsetEngineV2()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test
|
||||||
|
engine = get_upset_engine_v2()
|
||||||
|
|
||||||
|
# Real Madrid vs Getafe test
|
||||||
|
result = engine.calculate_upset_potential(
|
||||||
|
home_team_name="Real Madrid",
|
||||||
|
home_team_id="test",
|
||||||
|
away_team_name="Getafe",
|
||||||
|
league_name="LaLiga",
|
||||||
|
odds_data={"ms_h": 1.25, "ms_d": 3.92, "ms_a": 6.86},
|
||||||
|
referee_name="A. Muniz Ruiz",
|
||||||
|
home_form_score=80.0,
|
||||||
|
away_form_score=56.7,
|
||||||
|
favorite_side="home",
|
||||||
|
favorite_odds=1.25
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"Real Madrid vs Getafe - Sürpriz Analizi")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
print(f"Sürpriz Skoru: {result.upset_score}/100")
|
||||||
|
print(f"Seviye: {result.upset_level}")
|
||||||
|
print(f"\nNedenler:")
|
||||||
|
for reason in result.reasoning:
|
||||||
|
print(f" {reason}")
|
||||||
Executable
+249
@@ -0,0 +1,249 @@
|
|||||||
|
"""
|
||||||
|
Value Betting Calculator
|
||||||
|
Expected Value (EV) ve stake önerileri hesaplar.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Dict, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ValueBet:
|
||||||
|
"""Value bet analiz sonucu"""
|
||||||
|
bet_type: str # MS_1, AU25_Üst, KG_Var
|
||||||
|
my_probability: float # Bizim tahminimiz
|
||||||
|
market_odds: float # Bahis oranı
|
||||||
|
implied_probability: float # Oranın ima ettiği olasılık
|
||||||
|
edge: float # Fark (benim tahmin - implied)
|
||||||
|
expected_value: float # EV = (prob × odds) - 1
|
||||||
|
is_value: bool # EV > threshold mı?
|
||||||
|
kelly_fraction: float # Kelly stake oranı
|
||||||
|
confidence_tier: str # "banker", "strong", "value", "skip"
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict:
|
||||||
|
return {
|
||||||
|
'bet_type': self.bet_type,
|
||||||
|
'my_probability': round(self.my_probability, 4),
|
||||||
|
'market_odds': self.market_odds,
|
||||||
|
'implied_probability': round(self.implied_probability, 4),
|
||||||
|
'edge': round(self.edge, 4),
|
||||||
|
'expected_value': round(self.expected_value, 4),
|
||||||
|
'is_value': self.is_value,
|
||||||
|
'kelly_fraction': round(self.kelly_fraction, 4),
|
||||||
|
'confidence_tier': self.confidence_tier,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ValueCalculator:
|
||||||
|
"""
|
||||||
|
Value Betting Calculator
|
||||||
|
|
||||||
|
Tahminleri oranlarla karşılaştırarak EV hesaplar.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Eşikler
|
||||||
|
MIN_EDGE_FOR_VALUE = 0.05 # Minimum %5 edge
|
||||||
|
MIN_EDGE_FOR_STRONG = 0.10 # %10+ edge = strong value
|
||||||
|
MIN_EDGE_FOR_BANKER = 0.15 # %15+ edge = banker
|
||||||
|
|
||||||
|
KELLY_FRACTION = 0.25 # 1/4 Kelly (güvenli)
|
||||||
|
MAX_STAKE_PERCENT = 0.10 # Maksimum bank'ın %10'u
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def calculate_implied_probability(self, odds: float) -> float:
|
||||||
|
"""Bahis oranından implied probability hesapla"""
|
||||||
|
if odds <= 1:
|
||||||
|
return 1.0
|
||||||
|
return 1 / odds
|
||||||
|
|
||||||
|
def calculate_ev(self, probability: float, odds: float) -> float:
|
||||||
|
"""
|
||||||
|
Expected Value hesapla.
|
||||||
|
|
||||||
|
EV = (Probability × Odds) - 1
|
||||||
|
|
||||||
|
Pozitif EV = uzun vadede kar
|
||||||
|
Negatif EV = uzun vadede zarar
|
||||||
|
"""
|
||||||
|
return (probability * odds) - 1
|
||||||
|
|
||||||
|
def calculate_kelly_stake(self, probability: float, odds: float) -> float:
|
||||||
|
"""
|
||||||
|
Kelly Criterion stake hesapla.
|
||||||
|
|
||||||
|
Kelly = (p × b - q) / b
|
||||||
|
Burada:
|
||||||
|
- p = kazanma olasılığı
|
||||||
|
- q = kaybetme olasılığı (1 - p)
|
||||||
|
- b = odds - 1 (net kar)
|
||||||
|
"""
|
||||||
|
if odds <= 1:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
b = odds - 1
|
||||||
|
p = probability
|
||||||
|
q = 1 - p
|
||||||
|
|
||||||
|
kelly = (p * b - q) / b
|
||||||
|
|
||||||
|
# Negatif veya çok yüksek değerleri sınırla
|
||||||
|
kelly = max(0, min(kelly, self.MAX_STAKE_PERCENT))
|
||||||
|
|
||||||
|
# Fractional Kelly (daha güvenli)
|
||||||
|
return kelly * self.KELLY_FRACTION
|
||||||
|
|
||||||
|
def analyze_bet(self, bet_type: str, my_probability: float,
|
||||||
|
market_odds: float) -> ValueBet:
|
||||||
|
"""
|
||||||
|
Tek bir bahis için value analizi yap.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bet_type: Bahis türü (MS_1, AU25_Üst, KG_Var vb.)
|
||||||
|
my_probability: Bizim tahminimiz (0-1 arası)
|
||||||
|
market_odds: Bahis oranı
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ValueBet: Analiz sonucu
|
||||||
|
"""
|
||||||
|
if market_odds <= 1:
|
||||||
|
return ValueBet(
|
||||||
|
bet_type=bet_type,
|
||||||
|
my_probability=my_probability,
|
||||||
|
market_odds=market_odds,
|
||||||
|
implied_probability=1.0,
|
||||||
|
edge=0,
|
||||||
|
expected_value=-1,
|
||||||
|
is_value=False,
|
||||||
|
kelly_fraction=0,
|
||||||
|
confidence_tier="skip"
|
||||||
|
)
|
||||||
|
|
||||||
|
implied = self.calculate_implied_probability(market_odds)
|
||||||
|
edge = my_probability - implied
|
||||||
|
ev = self.calculate_ev(my_probability, market_odds)
|
||||||
|
kelly = self.calculate_kelly_stake(my_probability, market_odds)
|
||||||
|
|
||||||
|
# Tier belirleme
|
||||||
|
if edge >= self.MIN_EDGE_FOR_BANKER and my_probability >= 0.70:
|
||||||
|
tier = "banker"
|
||||||
|
elif edge >= self.MIN_EDGE_FOR_STRONG:
|
||||||
|
tier = "strong"
|
||||||
|
elif edge >= self.MIN_EDGE_FOR_VALUE:
|
||||||
|
tier = "value"
|
||||||
|
else:
|
||||||
|
tier = "skip"
|
||||||
|
|
||||||
|
return ValueBet(
|
||||||
|
bet_type=bet_type,
|
||||||
|
my_probability=my_probability,
|
||||||
|
market_odds=market_odds,
|
||||||
|
implied_probability=implied,
|
||||||
|
edge=edge,
|
||||||
|
expected_value=ev,
|
||||||
|
is_value=edge >= self.MIN_EDGE_FOR_VALUE,
|
||||||
|
kelly_fraction=kelly,
|
||||||
|
confidence_tier=tier
|
||||||
|
)
|
||||||
|
|
||||||
|
def analyze_match_predictions(self, predictions: Dict[str, float],
|
||||||
|
odds: Dict[str, float]) -> Dict[str, ValueBet]:
|
||||||
|
"""
|
||||||
|
Maç için tüm tahminleri analiz et.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
predictions: Tahminler {'MS_1': 0.55, 'MS_X': 0.25, ...}
|
||||||
|
odds: Oranlar {'MS_1': 1.80, 'MS_X': 3.50, ...}
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, ValueBet]: Her bahis için value analizi
|
||||||
|
"""
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for bet_type, probability in predictions.items():
|
||||||
|
if bet_type in odds and odds[bet_type] > 1:
|
||||||
|
results[bet_type] = self.analyze_bet(
|
||||||
|
bet_type=bet_type,
|
||||||
|
my_probability=probability,
|
||||||
|
market_odds=odds[bet_type]
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def get_best_value_bets(self, value_bets: Dict[str, ValueBet],
|
||||||
|
top_n: int = 3) -> list:
|
||||||
|
"""En iyi value bet'leri döndür"""
|
||||||
|
valid_bets = [vb for vb in value_bets.values() if vb.is_value]
|
||||||
|
sorted_bets = sorted(valid_bets, key=lambda x: x.expected_value, reverse=True)
|
||||||
|
return sorted_bets[:top_n]
|
||||||
|
|
||||||
|
def calculate_stake(self, value_bet: ValueBet, bankroll: float,
|
||||||
|
use_kelly: bool = True) -> float:
|
||||||
|
"""
|
||||||
|
Önerilen stake miktarını hesapla.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value_bet: Value bet analizi
|
||||||
|
bankroll: Toplam bütçe
|
||||||
|
use_kelly: Kelly criterion kullan mı?
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: Önerilen stake miktarı
|
||||||
|
"""
|
||||||
|
if not value_bet.is_value:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if use_kelly:
|
||||||
|
return bankroll * value_bet.kelly_fraction
|
||||||
|
else:
|
||||||
|
# Tier bazlı sabit stake
|
||||||
|
tier_stakes = {
|
||||||
|
"banker": 0.05,
|
||||||
|
"strong": 0.03,
|
||||||
|
"value": 0.02,
|
||||||
|
"skip": 0
|
||||||
|
}
|
||||||
|
return bankroll * tier_stakes.get(value_bet.confidence_tier, 0)
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_calculator = None
|
||||||
|
|
||||||
|
def get_value_calculator() -> ValueCalculator:
|
||||||
|
global _calculator
|
||||||
|
if _calculator is None:
|
||||||
|
_calculator = ValueCalculator()
|
||||||
|
return _calculator
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
calc = get_value_calculator()
|
||||||
|
|
||||||
|
print("\n🧪 Value Calculator Test")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Test senaryoları
|
||||||
|
test_cases = [
|
||||||
|
{"bet": "MS_1", "prob": 0.70, "odds": 1.60}, # High prob, low odds
|
||||||
|
{"bet": "MS_1", "prob": 0.55, "odds": 1.90}, # Medium prob, good odds
|
||||||
|
{"bet": "MS_1", "prob": 0.60, "odds": 2.10}, # VALUE!
|
||||||
|
{"bet": "AU25_Üst", "prob": 0.65, "odds": 1.85}, # VALUE!
|
||||||
|
{"bet": "KG_Var", "prob": 0.50, "odds": 1.70}, # No value
|
||||||
|
]
|
||||||
|
|
||||||
|
for tc in test_cases:
|
||||||
|
result = calc.analyze_bet(tc["bet"], tc["prob"], tc["odds"])
|
||||||
|
|
||||||
|
status_emoji = "✅" if result.is_value else "❌"
|
||||||
|
tier_emoji = {"banker": "🎯", "strong": "💪", "value": "✓", "skip": "⏭️"}
|
||||||
|
|
||||||
|
print(f"\n{status_emoji} {tc['bet']}")
|
||||||
|
print(f" Tahmin: {tc['prob']:.0%} | Oran: {tc['odds']:.2f} | Implied: {result.implied_probability:.0%}")
|
||||||
|
print(f" Edge: {result.edge:+.1%} | EV: {result.expected_value:+.1%}")
|
||||||
|
print(f" Tier: {tier_emoji.get(result.confidence_tier, '')} {result.confidence_tier.upper()}")
|
||||||
|
print(f" Kelly Stake: {result.kelly_fraction:.2%} of bankroll")
|
||||||
|
|
||||||
|
if result.is_value:
|
||||||
|
stake = calc.calculate_stake(result, 1000)
|
||||||
|
print(f" 💰 Önerilen Stake (1000 TL bank): {stake:.2f} TL")
|
||||||
@@ -0,0 +1,415 @@
|
|||||||
|
"""
|
||||||
|
Value Detection Engine
|
||||||
|
======================
|
||||||
|
The Smart Way to Beat the Bookmakers
|
||||||
|
|
||||||
|
This engine doesn't just predict winners - it finds VALUE.
|
||||||
|
The key insight: We don't need to predict the winner, we need to find
|
||||||
|
where the bookmaker made a mistake in their odds.
|
||||||
|
|
||||||
|
Core Philosophy:
|
||||||
|
- High Margin = High Uncertainty = Potential Value
|
||||||
|
- Model Probability > Implied Probability = Value Bet
|
||||||
|
- The goal is NOT to predict correctly, but to find +EV bets
|
||||||
|
|
||||||
|
Author: AI Engine V21
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ValueBet:
|
||||||
|
"""Represents a value bet opportunity"""
|
||||||
|
outcome: str # "1", "X", "2"
|
||||||
|
model_probability: float # Our model's probability (0-1)
|
||||||
|
implied_probability: float # Bookmaker's implied probability (0-1)
|
||||||
|
odds: float # Bookmaker's odds
|
||||||
|
edge: float # model_prob - implied_prob (as percentage)
|
||||||
|
expected_value: float # EV = (prob * odds) - 1
|
||||||
|
kelly_fraction: float # Optimal bet size
|
||||||
|
confidence: str # "HIGH", "MEDIUM", "LOW"
|
||||||
|
reasons: List[str] # Why this is value
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"outcome": self.outcome,
|
||||||
|
"model_prob": round(self.model_probability * 100, 1),
|
||||||
|
"implied_prob": round(self.implied_probability * 100, 1),
|
||||||
|
"odds": self.odds,
|
||||||
|
"edge": round(self.edge * 100, 1),
|
||||||
|
"ev": round(self.expected_value * 100, 1),
|
||||||
|
"kelly": round(self.kelly_fraction * 100, 1),
|
||||||
|
"confidence": self.confidence,
|
||||||
|
"reasons": self.reasons
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MarginAnalysis:
|
||||||
|
"""Analysis of bookmaker margin"""
|
||||||
|
raw_margin: float # Sum of raw implied probabilities - 1
|
||||||
|
true_margin: float # Adjusted for favorite-longshot bias
|
||||||
|
favorite_outcome: str
|
||||||
|
favorite_odds: float
|
||||||
|
uncertainty_level: str # "LOW", "MEDIUM", "HIGH", "EXTREME"
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"raw_margin": round(self.raw_margin * 100, 1),
|
||||||
|
"true_margin": round(self.true_margin * 100, 1),
|
||||||
|
"favorite": self.favorite_outcome,
|
||||||
|
"favorite_odds": self.favorite_odds,
|
||||||
|
"uncertainty": self.uncertainty_level
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ValueDetectionEngine:
|
||||||
|
"""
|
||||||
|
The Smart Betting Engine
|
||||||
|
|
||||||
|
This engine finds value bets by comparing model probabilities
|
||||||
|
with bookmaker implied probabilities.
|
||||||
|
|
||||||
|
Key Insights:
|
||||||
|
1. Margin > 18% → Bookmaker is unsure, potential value on underdog
|
||||||
|
2. Margin > 20% → Bookmaker sees high risk, BIG potential value
|
||||||
|
3. Favorite odds 1.40-1.60 → Highest upset rate historically
|
||||||
|
4. Away favorites have higher upset rate than home favorites
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Historical upset rates by favorite odds range
|
||||||
|
UPSET_RATES = {
|
||||||
|
(1.00, 1.25): 0.08, # 8% upset rate
|
||||||
|
(1.25, 1.40): 0.18, # 18% upset rate
|
||||||
|
(1.40, 1.60): 0.33, # 33% upset rate - DANGER ZONE
|
||||||
|
(1.60, 1.80): 0.28, # 28% upset rate
|
||||||
|
(1.80, 2.00): 0.35, # 35% upset rate
|
||||||
|
(2.00, 2.50): 0.42, # 42% upset rate
|
||||||
|
(2.50, 3.00): 0.45, # 45% upset rate
|
||||||
|
(3.00, 5.00): 0.55, # 55% upset rate
|
||||||
|
}
|
||||||
|
|
||||||
|
# Margin thresholds
|
||||||
|
MARGIN_LOW = 0.06 # 6% - bookmaker very confident
|
||||||
|
MARGIN_MEDIUM = 0.12 # 12% - normal margin
|
||||||
|
MARGIN_HIGH = 0.18 # 18% - bookmaker unsure
|
||||||
|
MARGIN_EXTREME = 0.22 # 22% - bookmaker very unsure
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.historical_data = [] # For learning
|
||||||
|
self.value_threshold = 0.03 # Minimum 3% edge to consider value
|
||||||
|
|
||||||
|
def calculate_margin(self, odds_1: float, odds_x: float, odds_2: float) -> MarginAnalysis:
|
||||||
|
"""
|
||||||
|
Calculate bookmaker margin and analyze uncertainty.
|
||||||
|
|
||||||
|
Higher margin = More uncertainty = More potential value
|
||||||
|
"""
|
||||||
|
if not all([odds_1 > 1, odds_x > 1, odds_2 > 1]):
|
||||||
|
return MarginAnalysis(0, 0, "X", 0, "UNKNOWN")
|
||||||
|
|
||||||
|
# Raw implied probabilities
|
||||||
|
imp_1 = 1 / odds_1
|
||||||
|
imp_x = 1 / odds_x
|
||||||
|
imp_2 = 1 / odds_2
|
||||||
|
|
||||||
|
raw_margin = imp_1 + imp_x + imp_2 - 1
|
||||||
|
|
||||||
|
# Determine favorite
|
||||||
|
if odds_1 <= odds_x and odds_1 <= odds_2:
|
||||||
|
favorite_outcome = "1"
|
||||||
|
favorite_odds = odds_1
|
||||||
|
elif odds_2 <= odds_1 and odds_2 <= odds_x:
|
||||||
|
favorite_outcome = "2"
|
||||||
|
favorite_odds = odds_2
|
||||||
|
else:
|
||||||
|
favorite_outcome = "X"
|
||||||
|
favorite_odds = odds_x
|
||||||
|
|
||||||
|
# Adjust for favorite-longshot bias
|
||||||
|
# Bookmakers typically overprice longshots
|
||||||
|
true_margin = raw_margin * 0.85 # Simplified adjustment
|
||||||
|
|
||||||
|
# Determine uncertainty level
|
||||||
|
if raw_margin < self.MARGIN_LOW:
|
||||||
|
uncertainty = "LOW"
|
||||||
|
elif raw_margin < self.MARGIN_MEDIUM:
|
||||||
|
uncertainty = "MEDIUM"
|
||||||
|
elif raw_margin < self.MARGIN_HIGH:
|
||||||
|
uncertainty = "HIGH"
|
||||||
|
else:
|
||||||
|
uncertainty = "EXTREME"
|
||||||
|
|
||||||
|
return MarginAnalysis(
|
||||||
|
raw_margin=raw_margin,
|
||||||
|
true_margin=true_margin,
|
||||||
|
favorite_outcome=favorite_outcome,
|
||||||
|
favorite_odds=favorite_odds,
|
||||||
|
uncertainty_level=uncertainty
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_historical_upset_rate(self, favorite_odds: float) -> float:
|
||||||
|
"""Get historical upset rate for given favorite odds"""
|
||||||
|
for (low, high), rate in self.UPSET_RATES.items():
|
||||||
|
if low <= favorite_odds < high:
|
||||||
|
return rate
|
||||||
|
return 0.40 # Default for very high odds
|
||||||
|
|
||||||
|
def calculate_edge(
|
||||||
|
self,
|
||||||
|
model_prob: float,
|
||||||
|
odds: float,
|
||||||
|
margin: float
|
||||||
|
) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Calculate the edge (advantage) we have over the bookmaker.
|
||||||
|
|
||||||
|
Returns: (edge, expected_value)
|
||||||
|
|
||||||
|
Edge = Model Probability - True Implied Probability
|
||||||
|
EV = (Probability * Odds) - 1
|
||||||
|
"""
|
||||||
|
if odds <= 1:
|
||||||
|
return 0, -1
|
||||||
|
|
||||||
|
# Raw implied probability
|
||||||
|
implied = 1 / odds
|
||||||
|
|
||||||
|
# Adjust for margin (proportional adjustment)
|
||||||
|
# This gives us the "true" implied probability
|
||||||
|
# Assuming bookmaker spreads margin proportionally
|
||||||
|
true_implied = implied # Simplified - could be more sophisticated
|
||||||
|
|
||||||
|
edge = model_prob - true_implied
|
||||||
|
ev = (model_prob * odds) - 1
|
||||||
|
|
||||||
|
return edge, ev
|
||||||
|
|
||||||
|
def calculate_kelly_fraction(
|
||||||
|
self,
|
||||||
|
probability: float,
|
||||||
|
odds: float,
|
||||||
|
half_kelly: bool = True
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Calculate optimal bet size using Kelly Criterion.
|
||||||
|
|
||||||
|
Kelly = (p * b - 1) / (b - 1)
|
||||||
|
where b = odds - 1
|
||||||
|
|
||||||
|
We use half Kelly for safety.
|
||||||
|
"""
|
||||||
|
if odds <= 1:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
b = odds - 1
|
||||||
|
kelly = (probability * b - 1) / b
|
||||||
|
|
||||||
|
# Don't bet if negative
|
||||||
|
if kelly < 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Use half Kelly for safety
|
||||||
|
if half_kelly:
|
||||||
|
kelly = kelly / 2
|
||||||
|
|
||||||
|
# Cap at 10% of bankroll
|
||||||
|
return min(kelly, 0.10)
|
||||||
|
|
||||||
|
def find_value_bets(
|
||||||
|
self,
|
||||||
|
model_probs: Dict[str, float],
|
||||||
|
odds: Dict[str, float],
|
||||||
|
match_context: Optional[Dict] = None
|
||||||
|
) -> List[ValueBet]:
|
||||||
|
"""
|
||||||
|
Find all value bets in a match.
|
||||||
|
|
||||||
|
This is the MAIN method - it finds where we have an edge.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_probs: {"1": 0.55, "X": 0.25, "2": 0.20}
|
||||||
|
odds: {"1": 1.25, "X": 4.50, "2": 8.00}
|
||||||
|
match_context: Additional context (form, h2h, etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ValueBet objects, sorted by edge
|
||||||
|
"""
|
||||||
|
value_bets = []
|
||||||
|
|
||||||
|
# Calculate margin
|
||||||
|
margin_analysis = self.calculate_margin(
|
||||||
|
odds.get("1", 0),
|
||||||
|
odds.get("X", 0),
|
||||||
|
odds.get("2", 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Analyze each outcome
|
||||||
|
for outcome in ["1", "X", "2"]:
|
||||||
|
prob = model_probs.get(outcome, 0)
|
||||||
|
odd = odds.get(outcome, 0)
|
||||||
|
|
||||||
|
if prob <= 0 or odd <= 1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
edge, ev = self.calculate_edge(prob, odd, margin_analysis.raw_margin)
|
||||||
|
kelly = self.calculate_kelly_fraction(prob, odd)
|
||||||
|
|
||||||
|
# Determine if this is a value bet
|
||||||
|
reasons = []
|
||||||
|
|
||||||
|
# 1. Basic edge
|
||||||
|
if edge > self.value_threshold:
|
||||||
|
reasons.append(f"Edge: +{round(edge*100, 1)}% over bookmaker")
|
||||||
|
|
||||||
|
# 2. High margin bonus
|
||||||
|
if margin_analysis.raw_margin > self.MARGIN_HIGH:
|
||||||
|
reasons.append(f"High margin ({round(margin_analysis.raw_margin*100, 1)}%) = uncertainty")
|
||||||
|
|
||||||
|
# Boost edge for underdogs in high margin matches
|
||||||
|
if outcome != margin_analysis.favorite_outcome:
|
||||||
|
edge += 0.02 # 2% bonus
|
||||||
|
reasons.append("Underdog in high-margin match = bonus value")
|
||||||
|
|
||||||
|
# 3. Favorite odds trap
|
||||||
|
fav_odds = margin_analysis.favorite_odds
|
||||||
|
if margin_analysis.favorite_outcome != outcome:
|
||||||
|
upset_rate = self.get_historical_upset_rate(fav_odds)
|
||||||
|
if upset_rate > 0.25:
|
||||||
|
reasons.append(f"Favorite odds {fav_odds} has {round(upset_rate*100)}% upset rate")
|
||||||
|
|
||||||
|
# Extra bonus for 1.40-1.60 range
|
||||||
|
if 1.40 <= fav_odds <= 1.60:
|
||||||
|
edge += 0.03
|
||||||
|
reasons.append("DANGER ZONE: 1.40-1.60 odds = highest upset risk")
|
||||||
|
|
||||||
|
# 4. Away favorite risk
|
||||||
|
if margin_analysis.favorite_outcome == "2" and outcome == "1":
|
||||||
|
edge += 0.015
|
||||||
|
reasons.append("Away favorite = extra home value")
|
||||||
|
|
||||||
|
# 5. EV positive
|
||||||
|
if ev > 0:
|
||||||
|
reasons.append(f"Positive EV: +{round(ev*100, 1)}%")
|
||||||
|
|
||||||
|
# Only add if we have reasons (value detected)
|
||||||
|
if reasons and edge > 0:
|
||||||
|
# Determine confidence
|
||||||
|
if edge > 0.08 or (edge > 0.05 and kelly > 0.03):
|
||||||
|
confidence = "HIGH"
|
||||||
|
elif edge > 0.05:
|
||||||
|
confidence = "MEDIUM"
|
||||||
|
else:
|
||||||
|
confidence = "LOW"
|
||||||
|
|
||||||
|
value_bets.append(ValueBet(
|
||||||
|
outcome=outcome,
|
||||||
|
model_probability=prob,
|
||||||
|
implied_probability=1/odd,
|
||||||
|
odds=odd,
|
||||||
|
edge=edge,
|
||||||
|
expected_value=ev,
|
||||||
|
kelly_fraction=kelly,
|
||||||
|
confidence=confidence,
|
||||||
|
reasons=reasons
|
||||||
|
))
|
||||||
|
|
||||||
|
# Sort by edge (highest first)
|
||||||
|
value_bets.sort(key=lambda x: x.edge, reverse=True)
|
||||||
|
|
||||||
|
return value_bets
|
||||||
|
|
||||||
|
def predict_with_value(
|
||||||
|
self,
|
||||||
|
model_probs: Dict[str, float],
|
||||||
|
odds: Dict[str, float],
|
||||||
|
match_context: Optional[Dict] = None
|
||||||
|
) -> Dict:
|
||||||
|
"""
|
||||||
|
Make a prediction based on VALUE, not just probability.
|
||||||
|
|
||||||
|
This is the smart way to bet:
|
||||||
|
- If there's clear value on one outcome → Bet it
|
||||||
|
- If there's no value → NO BET (don't force it)
|
||||||
|
- If margin is extreme → Look for underdog value
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
"best_value": ValueBet or None,
|
||||||
|
"alternative_value": ValueBet or None,
|
||||||
|
"margin_analysis": MarginAnalysis,
|
||||||
|
"recommendation": str,
|
||||||
|
"confidence": str
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
margin_analysis = self.calculate_margin(
|
||||||
|
odds.get("1", 0),
|
||||||
|
odds.get("X", 0),
|
||||||
|
odds.get("2", 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
value_bets = self.find_value_bets(model_probs, odds, match_context)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"margin_analysis": margin_analysis.to_dict(),
|
||||||
|
"value_bets": [vb.to_dict() for vb in value_bets],
|
||||||
|
"best_value": None,
|
||||||
|
"alternative_value": None,
|
||||||
|
"recommendation": "NO_BET",
|
||||||
|
"confidence": "LOW",
|
||||||
|
"reasoning": []
|
||||||
|
}
|
||||||
|
|
||||||
|
if not value_bets:
|
||||||
|
result["reasoning"].append("No value detected in any outcome")
|
||||||
|
result["reasoning"].append("Bookmaker odds are efficient for this match")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Get best value bet
|
||||||
|
best = value_bets[0]
|
||||||
|
result["best_value"] = best.to_dict()
|
||||||
|
|
||||||
|
if len(value_bets) > 1:
|
||||||
|
result["alternative_value"] = value_bets[1].to_dict()
|
||||||
|
|
||||||
|
# Determine recommendation
|
||||||
|
if best.confidence == "HIGH" and best.edge > 0.05:
|
||||||
|
result["recommendation"] = f"BET_{best.outcome}"
|
||||||
|
result["confidence"] = "HIGH"
|
||||||
|
result["reasoning"] = best.reasons
|
||||||
|
result["reasoning"].append(f"Strong value on {best.outcome} with {round(best.edge*100, 1)}% edge")
|
||||||
|
|
||||||
|
elif best.confidence == "MEDIUM" or best.edge > 0.03:
|
||||||
|
result["recommendation"] = f"CONSIDER_{best.outcome}"
|
||||||
|
result["confidence"] = "MEDIUM"
|
||||||
|
result["reasoning"] = best.reasons
|
||||||
|
result["reasoning"].append(f"Moderate value on {best.outcome}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
result["recommendation"] = "NO_BET"
|
||||||
|
result["confidence"] = "LOW"
|
||||||
|
result["reasoning"].append("Edge too small to justify bet")
|
||||||
|
result["reasoning"].append(f"Best edge: {round(best.edge*100, 1)}% (need >3%)")
|
||||||
|
|
||||||
|
# Add margin context
|
||||||
|
if margin_analysis.uncertainty_level == "EXTREME":
|
||||||
|
result["reasoning"].append("⚠️ EXTREME margin - high volatility match")
|
||||||
|
elif margin_analysis.uncertainty_level == "HIGH":
|
||||||
|
result["reasoning"].append("⚠️ High margin - bookmaker sees risk")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance
|
||||||
|
_engine_instance = None
|
||||||
|
|
||||||
|
def get_value_detection_engine() -> ValueDetectionEngine:
|
||||||
|
"""Get the singleton instance"""
|
||||||
|
global _engine_instance
|
||||||
|
if _engine_instance is None:
|
||||||
|
_engine_instance = ValueDetectionEngine()
|
||||||
|
return _engine_instance
|
||||||
@@ -0,0 +1,167 @@
|
|||||||
|
"""
|
||||||
|
Shared VQWEN feature contract
|
||||||
|
=============================
|
||||||
|
|
||||||
|
One place defines how VQWEN features are produced.
|
||||||
|
Both training and runtime inference must use this module so the model sees
|
||||||
|
the same feature semantics in historical data and live analysis.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
FEATURE_COLUMNS = [
|
||||||
|
"elo_diff",
|
||||||
|
"h_xg",
|
||||||
|
"a_xg",
|
||||||
|
"total_xg",
|
||||||
|
"pow_diff",
|
||||||
|
"rest_diff",
|
||||||
|
"h_fat",
|
||||||
|
"a_fat",
|
||||||
|
"imp_h",
|
||||||
|
"imp_d",
|
||||||
|
"imp_a",
|
||||||
|
"h_xi",
|
||||||
|
"a_xi",
|
||||||
|
"h2h_h_wr",
|
||||||
|
"form_diff",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class VqwenFeatureInput:
|
||||||
|
home_elo: float
|
||||||
|
away_elo: float
|
||||||
|
home_avg_goals_scored: float
|
||||||
|
away_avg_goals_scored: float
|
||||||
|
home_avg_goals_conceded: float
|
||||||
|
away_avg_goals_conceded: float
|
||||||
|
home_avg_shots_on_target: float
|
||||||
|
away_avg_shots_on_target: float
|
||||||
|
home_avg_possession: float
|
||||||
|
away_avg_possession: float
|
||||||
|
home_rest_days: float
|
||||||
|
away_rest_days: float
|
||||||
|
implied_prob_home: float
|
||||||
|
implied_prob_draw: float
|
||||||
|
implied_prob_away: float
|
||||||
|
home_lineup_availability: float = 1.0
|
||||||
|
away_lineup_availability: float = 1.0
|
||||||
|
h2h_home_win_rate: float = 0.5
|
||||||
|
home_form_score: float = 0.0
|
||||||
|
away_form_score: float = 0.0
|
||||||
|
league_avg_goals: float = 2.6
|
||||||
|
referee_avg_goals: float = 2.6
|
||||||
|
referee_home_bias: float = 0.0
|
||||||
|
home_squad_strength: float = 0.5
|
||||||
|
away_squad_strength: float = 0.5
|
||||||
|
home_key_players: float = 0.0
|
||||||
|
away_key_players: float = 0.0
|
||||||
|
missing_players_impact: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def fatigue_multiplier(rest_days: float) -> float:
|
||||||
|
if rest_days < 3.0:
|
||||||
|
return 0.85
|
||||||
|
if rest_days < 5.0:
|
||||||
|
return 0.95
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
|
||||||
|
def clamp(value: float, lower: float, upper: float) -> float:
|
||||||
|
return min(max(float(value), lower), upper)
|
||||||
|
|
||||||
|
|
||||||
|
def build_vqwen_feature_row(values: VqwenFeatureInput) -> dict[str, float]:
|
||||||
|
home_fatigue = fatigue_multiplier(values.home_rest_days)
|
||||||
|
away_fatigue = fatigue_multiplier(values.away_rest_days)
|
||||||
|
goal_environment = (
|
||||||
|
float(values.league_avg_goals) + float(values.referee_avg_goals)
|
||||||
|
) / 2.0
|
||||||
|
goal_environment_multiplier = clamp(goal_environment / 2.6, 0.85, 1.2)
|
||||||
|
squad_diff = float(values.home_squad_strength) - float(values.away_squad_strength)
|
||||||
|
key_player_diff = float(values.home_key_players) - float(values.away_key_players)
|
||||||
|
missing_penalty = clamp(float(values.missing_players_impact), 0.0, 1.0)
|
||||||
|
referee_bias = clamp(float(values.referee_home_bias), -0.25, 0.25)
|
||||||
|
home_squad_multiplier = clamp(
|
||||||
|
1.0 + squad_diff * 0.08 + key_player_diff * 0.025 - missing_penalty * 0.08 + referee_bias * 0.03,
|
||||||
|
0.82,
|
||||||
|
1.18,
|
||||||
|
)
|
||||||
|
away_squad_multiplier = clamp(
|
||||||
|
1.0 - squad_diff * 0.08 - key_player_diff * 0.025 - missing_penalty * 0.08 - referee_bias * 0.03,
|
||||||
|
0.82,
|
||||||
|
1.18,
|
||||||
|
)
|
||||||
|
|
||||||
|
home_xg = max(
|
||||||
|
0.05,
|
||||||
|
(
|
||||||
|
float(values.home_avg_goals_scored)
|
||||||
|
+ float(values.away_avg_goals_conceded)
|
||||||
|
)
|
||||||
|
/ 2.0,
|
||||||
|
) * home_fatigue * goal_environment_multiplier * home_squad_multiplier
|
||||||
|
away_xg = max(
|
||||||
|
0.05,
|
||||||
|
(
|
||||||
|
float(values.away_avg_goals_scored)
|
||||||
|
+ float(values.home_avg_goals_conceded)
|
||||||
|
)
|
||||||
|
/ 2.0,
|
||||||
|
) * away_fatigue * goal_environment_multiplier * away_squad_multiplier
|
||||||
|
|
||||||
|
home_power = (
|
||||||
|
float(values.home_avg_goals_scored) * 5.0
|
||||||
|
- float(values.home_avg_goals_conceded) * 5.0
|
||||||
|
+ float(values.home_avg_shots_on_target) * 2.0
|
||||||
|
+ float(values.home_avg_possession) * 0.1
|
||||||
|
+ float(values.home_squad_strength) * 3.0
|
||||||
|
+ float(values.home_key_players) * 0.8
|
||||||
|
+ referee_bias * 6.0
|
||||||
|
)
|
||||||
|
away_power = (
|
||||||
|
float(values.away_avg_goals_scored) * 5.0
|
||||||
|
- float(values.away_avg_goals_conceded) * 5.0
|
||||||
|
+ float(values.away_avg_shots_on_target) * 2.0
|
||||||
|
+ float(values.away_avg_possession) * 0.1
|
||||||
|
+ float(values.away_squad_strength) * 3.0
|
||||||
|
+ float(values.away_key_players) * 0.8
|
||||||
|
- referee_bias * 6.0
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"elo_diff": float(values.home_elo) - float(values.away_elo),
|
||||||
|
"h_xg": home_xg,
|
||||||
|
"a_xg": away_xg,
|
||||||
|
"total_xg": home_xg + away_xg,
|
||||||
|
"pow_diff": home_power - away_power,
|
||||||
|
"rest_diff": float(values.home_rest_days) - float(values.away_rest_days),
|
||||||
|
"h_fat": home_fatigue,
|
||||||
|
"a_fat": away_fatigue,
|
||||||
|
"imp_h": clamp(values.implied_prob_home, 0.01, 0.98),
|
||||||
|
"imp_d": clamp(values.implied_prob_draw, 0.01, 0.98),
|
||||||
|
"imp_a": clamp(values.implied_prob_away, 0.01, 0.98),
|
||||||
|
# Column names are preserved for artifact compatibility.
|
||||||
|
# Semantics are now "pre-match lineup availability" instead of leaked
|
||||||
|
# post-match starting-XI counts.
|
||||||
|
"h_xi": clamp(values.home_lineup_availability, 0.0, 1.0),
|
||||||
|
"a_xi": clamp(values.away_lineup_availability, 0.0, 1.0),
|
||||||
|
"h2h_h_wr": clamp(values.h2h_home_win_rate, 0.0, 1.0),
|
||||||
|
"form_diff": (
|
||||||
|
float(values.home_form_score)
|
||||||
|
- float(values.away_form_score)
|
||||||
|
+ squad_diff * 1.5
|
||||||
|
+ key_player_diff * 0.35
|
||||||
|
+ referee_bias * 2.0
|
||||||
|
- missing_penalty * 1.75
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def row_to_array(row: dict[str, float]) -> np.ndarray:
|
||||||
|
return np.array([[float(row[column]) for column in FEATURE_COLUMNS]], dtype=np.float64)
|
||||||
Executable
+269
@@ -0,0 +1,269 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from models.basketball_v25 import get_basketball_v25_predictor
|
||||||
|
from services.single_match_orchestrator import get_single_match_orchestrator
|
||||||
|
from services.v26_shadow_engine import get_v26_shadow_engine
|
||||||
|
from data.database import dispose_engine
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
|
||||||
|
sys.stdout.reconfigure(encoding="utf-8")
|
||||||
|
if sys.stderr and hasattr(sys.stderr, "reconfigure"):
|
||||||
|
sys.stderr.reconfigure(encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
class CouponRequest(BaseModel):
|
||||||
|
match_ids: list[str]
|
||||||
|
strategy: str | None = "BALANCED"
|
||||||
|
max_matches: int | None = None
|
||||||
|
min_confidence: float | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(_: FastAPI):
|
||||||
|
try:
|
||||||
|
print("🚀 Initializing V28 orchestrator...", flush=True)
|
||||||
|
get_single_match_orchestrator()
|
||||||
|
get_v26_shadow_engine()
|
||||||
|
print("✅ V28 orchestrator ready", flush=True)
|
||||||
|
except Exception as error:
|
||||||
|
print(f"❌ Failed to initialize orchestrator: {error}", flush=True)
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
# Cleanup async DB connections on shutdown
|
||||||
|
await dispose_engine()
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="Suggest-Bet AI Engine",
|
||||||
|
version="28.0.0",
|
||||||
|
description="V28 Single Match Prediction Package API",
|
||||||
|
lifespan=lifespan,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_cors_origins() -> list[str]:
|
||||||
|
raw = os.getenv("CORS_ALLOW_ORIGINS", "").strip()
|
||||||
|
if raw:
|
||||||
|
return [item.strip() for item in raw.split(",") if item.strip()]
|
||||||
|
# Dev-safe defaults + production domains.
|
||||||
|
return [
|
||||||
|
"http://localhost:3000",
|
||||||
|
"http://127.0.0.1:3000",
|
||||||
|
"http://localhost:3001",
|
||||||
|
"http://127.0.0.1:3001",
|
||||||
|
"http://localhost:3005",
|
||||||
|
"http://127.0.0.1:3005",
|
||||||
|
"https://ui-suggestbet.bilgich.com",
|
||||||
|
"https://suggestbet.bilgich.com",
|
||||||
|
"https://iddaai.com",
|
||||||
|
"https://www.iddaai.com",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=_parse_cors_origins(),
|
||||||
|
allow_origin_regex=r"^https?://(localhost|127\.0\.0\.1)(:\d+)?$",
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.exception_handler(Exception)
|
||||||
|
async def global_exception_handler(_: Request, exc: Exception):
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
print(f"💥 ERROR: {exc}", flush=True)
|
||||||
|
traceback.print_exc()
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=500,
|
||||||
|
content={"message": f"Internal Server Error: {str(exc)}"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
def read_root() -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"status": "Suggest-Bet AI Engine v28",
|
||||||
|
"engine": "V28 Single Match Orchestrator",
|
||||||
|
"mode": os.getenv("AI_ENGINE_MODE", "v28"),
|
||||||
|
"routes": [
|
||||||
|
"POST /v20plus/analyze/{match_id}",
|
||||||
|
"GET /v20plus/analyze-htms/{match_id}",
|
||||||
|
"GET /v20plus/analyze-htft/{match_id}",
|
||||||
|
"GET /v20plus/reversal-watchlist",
|
||||||
|
"POST /v20plus/coupon",
|
||||||
|
"GET /v20plus/daily-banker",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health_check() -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
orchestrator = get_single_match_orchestrator()
|
||||||
|
shadow_engine = get_v26_shadow_engine()
|
||||||
|
basketball_predictor = get_basketball_v25_predictor()
|
||||||
|
basketball_readiness = basketball_predictor.readiness_summary()
|
||||||
|
ready = bool(basketball_readiness["fully_loaded"])
|
||||||
|
return {
|
||||||
|
"status": "healthy" if ready else "degraded",
|
||||||
|
"engine": "v28.main",
|
||||||
|
"mode": os.getenv("AI_ENGINE_MODE", "v28"),
|
||||||
|
"ready": ready,
|
||||||
|
"basketball_v25": basketball_readiness,
|
||||||
|
"v26_shadow": shadow_engine.readiness_summary(),
|
||||||
|
"prediction_service_ready": True,
|
||||||
|
"model_loaded": ready,
|
||||||
|
"orchestrator_mode": getattr(orchestrator, "engine_mode", "v28"),
|
||||||
|
}
|
||||||
|
except Exception as error:
|
||||||
|
return {"status": "unhealthy", "ready": False, "error": str(error)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/v20plus/analyze/{match_id}")
|
||||||
|
async def analyze_match_v20plus(match_id: str) -> dict[str, Any]:
|
||||||
|
orchestrator = get_single_match_orchestrator()
|
||||||
|
result = orchestrator.analyze_match(match_id)
|
||||||
|
if not result:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Match not found: {match_id}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v20plus/analyze-htms/{match_id}")
|
||||||
|
async def analyze_match_htms_v20plus(match_id: str) -> dict[str, Any]:
|
||||||
|
orchestrator = get_single_match_orchestrator()
|
||||||
|
result = orchestrator.analyze_match_htms(match_id)
|
||||||
|
if not result:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Match not found: {match_id}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v20plus/analyze-htft/{match_id}")
|
||||||
|
async def analyze_match_htft_v20plus(match_id: str, timeout_sec: int = 30) -> dict[str, Any]:
|
||||||
|
# Small, explicit endpoint for HT/FT inspection and debugging in FE/Postman.
|
||||||
|
if timeout_sec < 3 or timeout_sec > 120:
|
||||||
|
raise HTTPException(status_code=400, detail="timeout_sec must be between 3 and 120")
|
||||||
|
|
||||||
|
orchestrator = get_single_match_orchestrator()
|
||||||
|
started_at = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await asyncio.wait_for(
|
||||||
|
asyncio.to_thread(orchestrator.analyze_match, match_id),
|
||||||
|
timeout=float(timeout_sec),
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError as error:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=504,
|
||||||
|
detail=f"Analyze timeout after {timeout_sec}s for match_id={match_id}",
|
||||||
|
) from error
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Match not found: {match_id}")
|
||||||
|
|
||||||
|
risk = result.get("risk", {})
|
||||||
|
market_board = result.get("market_board", {})
|
||||||
|
htft_probs = market_board.get("HTFT", {}).get("probs", {}) or risk.get("ht_ft_probs", {})
|
||||||
|
top_reversal_pick = None
|
||||||
|
top_reversal_prob = 0.0
|
||||||
|
if htft_probs:
|
||||||
|
prob_12 = float(htft_probs.get("1/2", 0.0))
|
||||||
|
prob_21 = float(htft_probs.get("2/1", 0.0))
|
||||||
|
if prob_21 >= prob_12:
|
||||||
|
top_reversal_pick = "2/1"
|
||||||
|
top_reversal_prob = prob_21
|
||||||
|
else:
|
||||||
|
top_reversal_pick = "1/2"
|
||||||
|
top_reversal_prob = prob_12
|
||||||
|
|
||||||
|
overall_htft_pick = None
|
||||||
|
overall_htft_prob = 0.0
|
||||||
|
if htft_probs:
|
||||||
|
overall_htft_pick, overall_htft_prob = max(
|
||||||
|
htft_probs.items(),
|
||||||
|
key=lambda item: float(item[1]),
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"engine": "v28.main",
|
||||||
|
"match_info": result.get("match_info", {}),
|
||||||
|
"timing_ms": int((time.time() - started_at) * 1000),
|
||||||
|
"ht_ft_probs": htft_probs,
|
||||||
|
"top_reversal_pick": top_reversal_pick,
|
||||||
|
"top_reversal_prob": round(float(top_reversal_prob), 4),
|
||||||
|
"overall_htft_pick": overall_htft_pick,
|
||||||
|
"overall_htft_pick_prob": round(float(overall_htft_prob), 4),
|
||||||
|
"surprise_hunter": result.get("surprise_hunter", {}),
|
||||||
|
"ht_ft_reversal_radar": result.get("ht_ft_reversal_radar", {}),
|
||||||
|
"first_half_result": result.get("market_board", {}).get("first_half_result", {}),
|
||||||
|
"main_pick": result.get("main_pick", {}),
|
||||||
|
"bet_summary": result.get("bet_summary", {}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/v20plus/coupon")
|
||||||
|
async def generate_coupon_v20plus(request: CouponRequest) -> dict[str, Any]:
|
||||||
|
orchestrator = get_single_match_orchestrator()
|
||||||
|
return orchestrator.build_coupon(
|
||||||
|
match_ids=request.match_ids,
|
||||||
|
strategy=request.strategy or "BALANCED",
|
||||||
|
max_matches=request.max_matches,
|
||||||
|
min_confidence=request.min_confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v20plus/daily-banker")
|
||||||
|
async def get_daily_banker_v20plus(count: int = 3) -> dict[str, Any]:
|
||||||
|
if count < 1:
|
||||||
|
raise HTTPException(status_code=400, detail="count must be >= 1")
|
||||||
|
|
||||||
|
orchestrator = get_single_match_orchestrator()
|
||||||
|
bankers = orchestrator.get_daily_bankers(count=count)
|
||||||
|
return {"count": len(bankers), "bankers": bankers}
|
||||||
|
|
||||||
|
@app.get("/v20plus/reversal-watchlist")
|
||||||
|
async def get_reversal_watchlist_v20plus(
|
||||||
|
count: int = 20,
|
||||||
|
horizon_hours: int = 72,
|
||||||
|
min_score: float = 45.0,
|
||||||
|
top_leagues_only: bool = False,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
if count < 1 or count > 100:
|
||||||
|
raise HTTPException(status_code=400, detail="count must be between 1 and 100")
|
||||||
|
if horizon_hours < 6 or horizon_hours > 168:
|
||||||
|
raise HTTPException(status_code=400, detail="horizon_hours must be between 6 and 168")
|
||||||
|
if min_score < 0 or min_score > 100:
|
||||||
|
raise HTTPException(status_code=400, detail="min_score must be between 0 and 100")
|
||||||
|
|
||||||
|
orchestrator = get_single_match_orchestrator()
|
||||||
|
return orchestrator.get_reversal_watchlist(
|
||||||
|
count=count,
|
||||||
|
horizon_hours=horizon_hours,
|
||||||
|
min_score=min_score,
|
||||||
|
top_leagues_only=top_leagues_only,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
port = int(os.getenv("PORT", "8000"))
|
||||||
|
uvicorn.run("main:app", host="0.0.0.0", port=port, reload=True)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"executionEnvironments": [
|
||||||
|
{
|
||||||
|
"root": ".",
|
||||||
|
"extraPaths": ["."]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"reportMissingImports": "warning",
|
||||||
|
"pythonVersion": "3.14"
|
||||||
|
}
|
||||||
@@ -0,0 +1,69 @@
|
|||||||
|
{
|
||||||
|
"trained_at": "2026-04-15T10:15:30.114795Z",
|
||||||
|
"rows": 1760,
|
||||||
|
"markets": {
|
||||||
|
"ml": {
|
||||||
|
"skipped": false,
|
||||||
|
"samples": 1760,
|
||||||
|
"train_samples": 1232,
|
||||||
|
"val_samples": 264,
|
||||||
|
"test_samples": 264,
|
||||||
|
"xgb": {
|
||||||
|
"accuracy": 0.6515,
|
||||||
|
"logloss": 0.6106
|
||||||
|
},
|
||||||
|
"lgb": {
|
||||||
|
"accuracy": 0.6288,
|
||||||
|
"logloss": 0.63
|
||||||
|
},
|
||||||
|
"ensemble": {
|
||||||
|
"accuracy": 0.6477,
|
||||||
|
"logloss": 0.615
|
||||||
|
},
|
||||||
|
"xgb_path": "/Users/piton/Documents/iddaai.com/Suggest-Bet-BE/ai-engine/models/basketball_v25/xgb_basketball_v25_ml.json",
|
||||||
|
"lgb_path": "/Users/piton/Documents/iddaai.com/Suggest-Bet-BE/ai-engine/models/basketball_v25/lgb_basketball_v25_ml.txt"
|
||||||
|
},
|
||||||
|
"total": {
|
||||||
|
"skipped": false,
|
||||||
|
"samples": 1760,
|
||||||
|
"train_samples": 1232,
|
||||||
|
"val_samples": 264,
|
||||||
|
"test_samples": 264,
|
||||||
|
"xgb": {
|
||||||
|
"accuracy": 0.5417,
|
||||||
|
"logloss": 0.7011
|
||||||
|
},
|
||||||
|
"lgb": {
|
||||||
|
"accuracy": 0.5114,
|
||||||
|
"logloss": 0.6929
|
||||||
|
},
|
||||||
|
"ensemble": {
|
||||||
|
"accuracy": 0.5492,
|
||||||
|
"logloss": 0.6905
|
||||||
|
},
|
||||||
|
"xgb_path": "/Users/piton/Documents/iddaai.com/Suggest-Bet-BE/ai-engine/models/basketball_v25/xgb_basketball_v25_total.json",
|
||||||
|
"lgb_path": "/Users/piton/Documents/iddaai.com/Suggest-Bet-BE/ai-engine/models/basketball_v25/lgb_basketball_v25_total.txt"
|
||||||
|
},
|
||||||
|
"spread": {
|
||||||
|
"skipped": false,
|
||||||
|
"samples": 1760,
|
||||||
|
"train_samples": 1232,
|
||||||
|
"val_samples": 264,
|
||||||
|
"test_samples": 264,
|
||||||
|
"xgb": {
|
||||||
|
"accuracy": 0.5644,
|
||||||
|
"logloss": 0.6953
|
||||||
|
},
|
||||||
|
"lgb": {
|
||||||
|
"accuracy": 0.5341,
|
||||||
|
"logloss": 0.6903
|
||||||
|
},
|
||||||
|
"ensemble": {
|
||||||
|
"accuracy": 0.5417,
|
||||||
|
"logloss": 0.6821
|
||||||
|
},
|
||||||
|
"xgb_path": "/Users/piton/Documents/iddaai.com/Suggest-Bet-BE/ai-engine/models/basketball_v25/xgb_basketball_v25_spread.json",
|
||||||
|
"lgb_path": "/Users/piton/Documents/iddaai.com/Suggest-Bet-BE/ai-engine/models/basketball_v25/lgb_basketball_v25_spread.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"version": "v26.shadow.0",
|
||||||
|
"calibration_version": "v26.shadow.calib.0",
|
||||||
|
"train_rows": 6853,
|
||||||
|
"validation_rows": 1469,
|
||||||
|
"label_priors": {
|
||||||
|
"MS": 0.4404,
|
||||||
|
"OU25": 0.5214,
|
||||||
|
"BTTS": 0.5398,
|
||||||
|
"HT": 0.4275,
|
||||||
|
"HTFT": 0.26,
|
||||||
|
"CARDS": 0.6052
|
||||||
|
},
|
||||||
|
"artifact_path": "/Users/piton/Documents/GitHub/iddaai/iddaai-be/ai-engine/models/v26_shadow/market_profiles.json",
|
||||||
|
"notes": [
|
||||||
|
"v26.shadow runtime currently uses artifact-based calibration and ROI gating",
|
||||||
|
"market profile JSON remains the source of truth for runtime thresholds"
|
||||||
|
]
|
||||||
|
}
|
||||||
Executable
+20
@@ -0,0 +1,20 @@
|
|||||||
|
fastapi==0.110.0
|
||||||
|
uvicorn==0.27.1
|
||||||
|
pandas>=2.2.0
|
||||||
|
scikit-learn>=1.4.1.post1
|
||||||
|
psycopg2-binary>=2.9.9
|
||||||
|
python-dotenv==1.0.1
|
||||||
|
numpy>=1.26.4
|
||||||
|
# PyTorch CPU version will be installed manually in Dockerfile
|
||||||
|
requests==2.31.0
|
||||||
|
sqlalchemy>=2.0.25
|
||||||
|
joblib>=1.3.0
|
||||||
|
xgboost>=2.0.0
|
||||||
|
# V20+ model dependencies
|
||||||
|
lightgbm>=4.0.0
|
||||||
|
tqdm>=4.66.0
|
||||||
|
tabulate>=0.9.0
|
||||||
|
pyyaml>=6.0
|
||||||
|
# V2 async database
|
||||||
|
asyncpg>=0.29.0
|
||||||
|
pydantic>=2.5.0
|
||||||
Executable
+20
@@ -0,0 +1,20 @@
|
|||||||
|
fastapi==0.110.0
|
||||||
|
uvicorn==0.27.1
|
||||||
|
pandas>=2.2.0
|
||||||
|
scikit-learn>=1.4.1.post1
|
||||||
|
psycopg2-binary>=2.9.9
|
||||||
|
python-dotenv==1.0.1
|
||||||
|
numpy>=1.26.4
|
||||||
|
requests==2.31.0
|
||||||
|
sqlalchemy>=2.0.25
|
||||||
|
joblib>=1.3.0
|
||||||
|
xgboost>=2.0.0
|
||||||
|
# V20+ model dependencies
|
||||||
|
lightgbm>=4.0.0
|
||||||
|
tqdm>=4.66.0
|
||||||
|
tabulate>=0.9.0
|
||||||
|
pyyaml>=6.0
|
||||||
|
# V2 async database
|
||||||
|
asyncpg>=0.29.0
|
||||||
|
pydantic>=2.5.0
|
||||||
|
pytest>=8.0.0
|
||||||
@@ -0,0 +1,125 @@
|
|||||||
|
"""
|
||||||
|
Pydantic v2 response schemas for the V2 Betting Engine.
|
||||||
|
Strictly mirrors the NestJS DTO contract for SingleMatchPredictionPackage.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
# ── Sub-models ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class MatchInfo(BaseModel):
|
||||||
|
match_id: str
|
||||||
|
match_name: str = ""
|
||||||
|
home_team: str = ""
|
||||||
|
away_team: str = ""
|
||||||
|
league: str = ""
|
||||||
|
match_date_ms: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class DataQuality(BaseModel):
|
||||||
|
label: str = Field(default="MEDIUM", description="HIGH | MEDIUM | LOW")
|
||||||
|
score: float = Field(default=0.5, ge=0.0, le=1.0)
|
||||||
|
flags: list[str] = Field(default_factory=list)
|
||||||
|
home_lineup_count: int = 0
|
||||||
|
away_lineup_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class RiskAssessment(BaseModel):
|
||||||
|
level: str = Field(default="MEDIUM", description="LOW | MEDIUM | HIGH | EXTREME")
|
||||||
|
score: float = Field(default=0.0, ge=0.0, le=1.0)
|
||||||
|
is_surprise_risk: bool = False
|
||||||
|
surprise_type: str | None = None
|
||||||
|
warnings: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class PickDetail(BaseModel):
|
||||||
|
market: str = Field(..., description="MS, OU25, BTTS, DC, HT, HTFT, etc.")
|
||||||
|
pick: str = Field(..., description="1, X, 2, Over, Under, Yes, No, 1/1, etc.")
|
||||||
|
probability: float = Field(..., ge=0.0, le=1.0)
|
||||||
|
confidence: float = Field(default=0.0, description="Percentage 0-100")
|
||||||
|
odds: float | None = Field(default=None, gt=0.0)
|
||||||
|
raw_confidence: float = 0.0
|
||||||
|
calibrated_confidence: float = 0.0
|
||||||
|
min_required_confidence: float = 0.0
|
||||||
|
edge: float = Field(default=0.0, description="Model prob minus implied prob")
|
||||||
|
play_score: float = Field(default=0.0, ge=0.0, le=100.0)
|
||||||
|
playable: bool = False
|
||||||
|
bet_grade: str = Field(default="PASS", description="A | B | C | PASS")
|
||||||
|
stake_units: float = Field(default=0.0, ge=0.0)
|
||||||
|
decision_reasons: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class BetAdvice(BaseModel):
|
||||||
|
playable: bool = False
|
||||||
|
suggested_stake_units: float = 0.0
|
||||||
|
reason: str = "no_playable_pick"
|
||||||
|
|
||||||
|
|
||||||
|
class BetSummaryRow(BaseModel):
|
||||||
|
market: str
|
||||||
|
pick: str
|
||||||
|
raw_confidence: float = 0.0
|
||||||
|
calibrated_confidence: float = 0.0
|
||||||
|
bet_grade: str = "PASS"
|
||||||
|
playable: bool = False
|
||||||
|
stake_units: float = 0.0
|
||||||
|
play_score: float = 0.0
|
||||||
|
reasons: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class ScoreScenario(BaseModel):
|
||||||
|
score: str
|
||||||
|
prob: float
|
||||||
|
|
||||||
|
|
||||||
|
class ScorePrediction(BaseModel):
|
||||||
|
ft: str = "0-0"
|
||||||
|
ht: str = "0-0"
|
||||||
|
xg_home: float = 0.0
|
||||||
|
xg_away: float = 0.0
|
||||||
|
xg_total: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class EngineBreakdown(BaseModel):
|
||||||
|
team: float = 0.0
|
||||||
|
player: float = 0.0
|
||||||
|
odds: float = 0.0
|
||||||
|
referee: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class MarketProbs(BaseModel):
|
||||||
|
pick: str = ""
|
||||||
|
confidence: float = 0.0
|
||||||
|
probs: dict[str, float] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Root Response ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class PredictionResponse(BaseModel):
|
||||||
|
"""
|
||||||
|
Root API contract. Every field matches the NestJS
|
||||||
|
`SingleMatchPredictionPackage` DTO exactly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_version: str = "v2.betting_engine"
|
||||||
|
match_info: MatchInfo
|
||||||
|
data_quality: DataQuality = Field(default_factory=DataQuality)
|
||||||
|
risk: RiskAssessment = Field(default_factory=RiskAssessment)
|
||||||
|
engine_breakdown: EngineBreakdown = Field(default_factory=EngineBreakdown)
|
||||||
|
main_pick: PickDetail | None = None
|
||||||
|
value_pick: PickDetail | None = None
|
||||||
|
bet_advice: BetAdvice = Field(default_factory=BetAdvice)
|
||||||
|
bet_summary: list[BetSummaryRow] = Field(default_factory=list)
|
||||||
|
supporting_picks: list[PickDetail] = Field(default_factory=list)
|
||||||
|
aggressive_pick: PickDetail | None = None
|
||||||
|
scenario_top5: list[ScoreScenario] = Field(default_factory=list)
|
||||||
|
score_prediction: ScorePrediction = Field(default_factory=ScorePrediction)
|
||||||
|
market_board: dict[str, Any] = Field(default_factory=dict)
|
||||||
|
reasoning_factors: list[str] = Field(default_factory=list)
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
"""
|
||||||
|
Analyze a single match by ID using VQWEN v3
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pickle
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
DSN = "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||||
|
MATCH_ID = "9vjazyxahh8wxlmqfjfkgfqxg"
|
||||||
|
|
||||||
|
def analyze():
|
||||||
|
print(f"🔍 Analyzing Match: {MATCH_ID}")
|
||||||
|
conn = psycopg2.connect(DSN)
|
||||||
|
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
|
||||||
|
# Fetch Match
|
||||||
|
cur.execute("SELECT * FROM live_matches WHERE id = %s", (MATCH_ID,))
|
||||||
|
match = cur.fetchone()
|
||||||
|
if not match:
|
||||||
|
cur.execute("SELECT * FROM matches WHERE id = %s", (MATCH_ID,))
|
||||||
|
match = cur.fetchone()
|
||||||
|
|
||||||
|
if not match:
|
||||||
|
print("❌ Match not found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"⚽ Match Found: {match.get('home_team_id')} vs {match.get('away_team_id')}")
|
||||||
|
print(f"📊 Score: {match.get('score_home')} - {match.get('score_away')}")
|
||||||
|
print(f"⏱️ Status: {match.get('status')}")
|
||||||
|
|
||||||
|
# In a real scenario, we calculate all features (ELO, xG, Rest, etc.) here.
|
||||||
|
# Since I can't run the full heavy query in this short context,
|
||||||
|
# I will check the raw data availability.
|
||||||
|
|
||||||
|
h_id = match['home_team_id']
|
||||||
|
a_id = match['away_team_id']
|
||||||
|
|
||||||
|
# Check ELO
|
||||||
|
cur.execute("SELECT home_elo, away_elo FROM football_ai_features WHERE match_id = %s", (MATCH_ID,))
|
||||||
|
elo = cur.fetchone()
|
||||||
|
if elo:
|
||||||
|
print(f"🧠 ELO: Home {elo['home_elo']} | Away {elo['away_elo']}")
|
||||||
|
else:
|
||||||
|
print("⚠️ No ELO data found for this match.")
|
||||||
|
|
||||||
|
# Check Odds
|
||||||
|
cur.execute("""
|
||||||
|
SELECT oc.name, os.name as sel, os.odd_value
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = %s AND oc.name ILIKE '%%Maç Sonucu%%'
|
||||||
|
""", (MATCH_ID,))
|
||||||
|
odds = cur.fetchall()
|
||||||
|
if odds:
|
||||||
|
print("💰 Odds found:")
|
||||||
|
for o in odds:
|
||||||
|
print(f" {o['sel']}: {o['odd_value']}")
|
||||||
|
else:
|
||||||
|
print("❌ No Odds found. Cannot predict.")
|
||||||
|
|
||||||
|
# Conclusion
|
||||||
|
print("\n🔮 VQWEN Prediction Logic:")
|
||||||
|
print("Since this match is already in progress/finished with score 1-0,")
|
||||||
|
print("the model would have predicted this BEFORE kickoff based on historical stats.")
|
||||||
|
|
||||||
|
# Hypothetical check
|
||||||
|
print("\n👉 If the model predicted 'Home Win (1)' or 'Under 2.5', it would be CORRECT ✅")
|
||||||
|
print("👉 If it predicted 'Away Win' or 'Over 2.5', it would be WRONG ❌")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
analyze()
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Standalone ELO computation script.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/compute_elo.py # football only
|
||||||
|
python scripts/compute_elo.py --sport basketball
|
||||||
|
python scripts/compute_elo.py --sport all # football + basketball
|
||||||
|
|
||||||
|
Designed for cron or manual execution.
|
||||||
|
Calculates ELO ratings from match history and persists to both JSON and DB.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# Add ai-engine root to path
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from features.elo_system import ELORatingSystem
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Compute ELO ratings from match history")
|
||||||
|
parser.add_argument(
|
||||||
|
"--sport",
|
||||||
|
choices=["football", "basketball", "all"],
|
||||||
|
default="football",
|
||||||
|
help="Sport to compute ELO for (default: football)",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
sports = ["football", "basketball"] if args.sport == "all" else [args.sport]
|
||||||
|
|
||||||
|
for sport in sports:
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"🏆 Computing ELO ratings for: {sport.upper()}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
system = ELORatingSystem()
|
||||||
|
system.calculate_all_from_history(sport)
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
|
|
||||||
|
print(f"\n✅ {sport} ELO computation completed in {elapsed:.1f}s")
|
||||||
|
print(f" Teams rated: {len(system.ratings)}")
|
||||||
|
|
||||||
|
if system.ratings:
|
||||||
|
top = sorted(
|
||||||
|
system.ratings.values(),
|
||||||
|
key=lambda r: r.overall_elo,
|
||||||
|
reverse=True,
|
||||||
|
)[:5]
|
||||||
|
print(" Top 5:")
|
||||||
|
for i, t in enumerate(top, 1):
|
||||||
|
print(f" {i}. {t.team_name:25} → {t.overall_elo:.0f}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,248 @@
|
|||||||
|
"""
|
||||||
|
League Odds Reliability Calculator
|
||||||
|
===================================
|
||||||
|
Computes per-league Brier Score from historical match results + odds,
|
||||||
|
then derives an odds_reliability factor (0.0 – 1.0) for each league.
|
||||||
|
|
||||||
|
Output: ai-engine/data/league_reliability.json
|
||||||
|
Used by: SingleMatchOrchestrator to weight odds-based edge calculations.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/compute_league_reliability.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import psycopg2.extras
|
||||||
|
|
||||||
|
# ─── Config ──────────────────────────────────────────────────────────────
|
||||||
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
AI_ENGINE_DIR = os.path.join(SCRIPT_DIR, "..")
|
||||||
|
OUTPUT_PATH = os.path.join(AI_ENGINE_DIR, "data", "league_reliability.json")
|
||||||
|
|
||||||
|
MIN_MATCHES = 50 # Minimum completed matches to compute reliability
|
||||||
|
BRIER_BASELINE = 0.50 # Random-guess Brier Score for 3-way (worst case)
|
||||||
|
BRIER_PERFECT = 0.33 # Theoretical best for well-calibrated 3-way odds
|
||||||
|
|
||||||
|
|
||||||
|
def get_dsn() -> str:
|
||||||
|
"""Build DSN from environment, matching the AI Engine's own config."""
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
env_path = os.path.join(AI_ENGINE_DIR, "..", ".env")
|
||||||
|
load_dotenv(env_path)
|
||||||
|
|
||||||
|
raw = os.getenv("DATABASE_URL", "")
|
||||||
|
if raw.startswith("postgresql://"):
|
||||||
|
return raw.split("?")[0]
|
||||||
|
|
||||||
|
host = os.getenv("DB_HOST", "localhost")
|
||||||
|
port = os.getenv("DB_PORT", "15432")
|
||||||
|
user = os.getenv("DB_USER", "suggestbet")
|
||||||
|
pw = os.getenv("DB_PASS", "SuGGesT2026SecuRe")
|
||||||
|
db = os.getenv("DB_NAME", "boilerplate_db")
|
||||||
|
return f"postgresql://{user}:{pw}@{host}:{port}/{db}"
|
||||||
|
|
||||||
|
|
||||||
|
def compute_league_reliability(conn: Any) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
For each league with enough data, compute:
|
||||||
|
- brier_score: calibration quality of the odds
|
||||||
|
- heavy_fav_win_pct: how often <1.50 favorites actually win
|
||||||
|
- upset_rate: how often heavy favorites lose
|
||||||
|
- odds_reliability: composite 0.0-1.0 score
|
||||||
|
"""
|
||||||
|
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||||
|
|
||||||
|
print("📊 Computing per-league Brier Scores from match results + odds...")
|
||||||
|
|
||||||
|
cur.execute("""
|
||||||
|
WITH ms_odds AS (
|
||||||
|
SELECT
|
||||||
|
oc.match_id,
|
||||||
|
MAX(CASE WHEN os.name = '1' THEN os.odd_value::float END) AS odds_h,
|
||||||
|
MAX(CASE WHEN os.name = 'X' THEN os.odd_value::float END) AS odds_d,
|
||||||
|
MAX(CASE WHEN os.name = '2' THEN os.odd_value::float END) AS odds_a
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.name = 'Maç Sonucu'
|
||||||
|
GROUP BY oc.match_id
|
||||||
|
HAVING MAX(CASE WHEN os.name = '1' THEN os.odd_value::float END) > 1.0
|
||||||
|
AND MAX(CASE WHEN os.name = '2' THEN os.odd_value::float END) > 1.0
|
||||||
|
),
|
||||||
|
match_results AS (
|
||||||
|
SELECT
|
||||||
|
m.league_id,
|
||||||
|
l.name AS league_name,
|
||||||
|
CASE
|
||||||
|
WHEN m.score_home > m.score_away THEN '1'
|
||||||
|
WHEN m.score_home = m.score_away THEN 'X'
|
||||||
|
ELSE '2'
|
||||||
|
END AS result,
|
||||||
|
o.odds_h, o.odds_d, o.odds_a,
|
||||||
|
-- Normalized implied probabilities
|
||||||
|
(1.0 / o.odds_h) / (
|
||||||
|
(1.0 / o.odds_h) +
|
||||||
|
(1.0 / COALESCE(o.odds_d, 3.3)) +
|
||||||
|
(1.0 / o.odds_a)
|
||||||
|
) AS ip_home,
|
||||||
|
(1.0 / o.odds_a) / (
|
||||||
|
(1.0 / o.odds_h) +
|
||||||
|
(1.0 / COALESCE(o.odds_d, 3.3)) +
|
||||||
|
(1.0 / o.odds_a)
|
||||||
|
) AS ip_away,
|
||||||
|
CASE WHEN o.odds_h < o.odds_a THEN 'H' ELSE 'A' END AS fav_side,
|
||||||
|
LEAST(o.odds_h, o.odds_a) AS fav_odds
|
||||||
|
FROM matches m
|
||||||
|
JOIN ms_odds o ON o.match_id = m.id
|
||||||
|
JOIN leagues l ON m.league_id = l.id
|
||||||
|
WHERE m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.sport = 'football'
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
league_id,
|
||||||
|
league_name,
|
||||||
|
COUNT(*) AS match_count,
|
||||||
|
|
||||||
|
-- Brier Score (lower = better odds calibration)
|
||||||
|
AVG(
|
||||||
|
POWER(ip_home - CASE WHEN result = '1' THEN 1.0 ELSE 0.0 END, 2) +
|
||||||
|
POWER(ip_away - CASE WHEN result = '2' THEN 1.0 ELSE 0.0 END, 2)
|
||||||
|
) AS brier_score,
|
||||||
|
|
||||||
|
-- Heavy favorite metrics
|
||||||
|
COUNT(CASE WHEN fav_odds < 1.50 THEN 1 END) AS heavy_fav_count,
|
||||||
|
AVG(CASE
|
||||||
|
WHEN fav_odds < 1.50
|
||||||
|
AND ((fav_side = 'H' AND result = '1') OR (fav_side = 'A' AND result = '2'))
|
||||||
|
THEN 1.0
|
||||||
|
WHEN fav_odds < 1.50 THEN 0.0
|
||||||
|
END) AS heavy_fav_win_rate,
|
||||||
|
|
||||||
|
-- Overall favorite win rate
|
||||||
|
AVG(CASE
|
||||||
|
WHEN (fav_side = 'H' AND result = '1') OR (fav_side = 'A' AND result = '2')
|
||||||
|
THEN 1.0 ELSE 0.0
|
||||||
|
END) AS fav_win_rate,
|
||||||
|
|
||||||
|
-- Chaos metric
|
||||||
|
STDDEV(
|
||||||
|
CASE WHEN result = '1' THEN 1 WHEN result = '2' THEN -1 ELSE 0 END
|
||||||
|
) AS result_volatility
|
||||||
|
|
||||||
|
FROM match_results
|
||||||
|
GROUP BY league_id, league_name
|
||||||
|
HAVING COUNT(*) >= %s
|
||||||
|
ORDER BY COUNT(*) DESC
|
||||||
|
""", (MIN_MATCHES,))
|
||||||
|
|
||||||
|
rows = cur.fetchall()
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
print(f" ✅ Found {len(rows)} leagues with >= {MIN_MATCHES} matches")
|
||||||
|
|
||||||
|
# ── Compute composite odds_reliability ──────────────────────────────
|
||||||
|
results: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
brier = float(row["brier_score"])
|
||||||
|
match_count = int(row["match_count"])
|
||||||
|
heavy_fav_win = float(row["heavy_fav_win_rate"] or 0.65)
|
||||||
|
fav_win = float(row["fav_win_rate"])
|
||||||
|
|
||||||
|
# Component 1: Brier-based reliability (0-1, higher = better)
|
||||||
|
# Maps [BRIER_BASELINE .. BRIER_PERFECT] → [0.0 .. 1.0]
|
||||||
|
brier_reliability = max(0.0, min(1.0,
|
||||||
|
(BRIER_BASELINE - brier) / (BRIER_BASELINE - BRIER_PERFECT)
|
||||||
|
))
|
||||||
|
|
||||||
|
# Component 2: Sample size confidence (log scale, caps at 500 matches)
|
||||||
|
import math
|
||||||
|
sample_confidence = min(1.0, math.log(max(1, match_count)) / math.log(500))
|
||||||
|
|
||||||
|
# Component 3: Heavy favorite predictability
|
||||||
|
# If heavy fav wins 80%+ → odds are very reliable; if 55% → chaotic
|
||||||
|
fav_reliability = max(0.0, min(1.0, (heavy_fav_win - 0.55) / (0.80 - 0.55)))
|
||||||
|
|
||||||
|
# Composite: weighted blend
|
||||||
|
# Brier is the primary signal (60%), sample size (20%), fav reliability (20%)
|
||||||
|
odds_reliability = (
|
||||||
|
brier_reliability * 0.60 +
|
||||||
|
sample_confidence * 0.20 +
|
||||||
|
fav_reliability * 0.20
|
||||||
|
)
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"league_id": row["league_id"],
|
||||||
|
"league_name": row["league_name"],
|
||||||
|
"match_count": match_count,
|
||||||
|
"brier_score": round(brier, 4),
|
||||||
|
"heavy_fav_win_pct": round(heavy_fav_win * 100, 1),
|
||||||
|
"fav_win_pct": round(fav_win * 100, 1),
|
||||||
|
"odds_reliability": round(odds_reliability, 4),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort by reliability descending
|
||||||
|
results.sort(key=lambda x: x["odds_reliability"], reverse=True)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def build_lookup(results: List[Dict[str, Any]]) -> Dict[str, float]:
|
||||||
|
"""Build league_id → odds_reliability lookup for the orchestrator."""
|
||||||
|
return {r["league_id"]: r["odds_reliability"] for r in results}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
dsn = get_dsn()
|
||||||
|
print(f"🔗 Connecting to database...")
|
||||||
|
conn = psycopg2.connect(dsn)
|
||||||
|
|
||||||
|
try:
|
||||||
|
results = compute_league_reliability(conn)
|
||||||
|
|
||||||
|
# Build output structure
|
||||||
|
output = {
|
||||||
|
"version": "v1",
|
||||||
|
"description": "Per-league odds reliability scores computed from Brier Score analysis",
|
||||||
|
"min_matches_threshold": MIN_MATCHES,
|
||||||
|
"total_leagues": len(results),
|
||||||
|
"default_reliability": 0.35, # fallback for unknown leagues
|
||||||
|
"lookup": build_lookup(results),
|
||||||
|
"details": results[:50], # top 50 for human reference
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure output directory exists
|
||||||
|
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||||
|
|
||||||
|
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(output, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
print(f"\n✅ Saved {len(results)} league reliability scores to {OUTPUT_PATH}")
|
||||||
|
print(f"\n📈 Top 10 most reliable leagues:")
|
||||||
|
for i, r in enumerate(results[:10], 1):
|
||||||
|
print(f" {i:2d}. {r['league_name']:25s} | Brier: {r['brier_score']:.4f} | "
|
||||||
|
f"Reliability: {r['odds_reliability']:.4f} | "
|
||||||
|
f"Heavy Fav: {r['heavy_fav_win_pct']:.1f}% | "
|
||||||
|
f"N={r['match_count']}")
|
||||||
|
|
||||||
|
print(f"\n📉 Bottom 10 (least reliable):")
|
||||||
|
for i, r in enumerate(results[-10:], 1):
|
||||||
|
print(f" {i:2d}. {r['league_name']:25s} | Brier: {r['brier_score']:.4f} | "
|
||||||
|
f"Reliability: {r['odds_reliability']:.4f} | "
|
||||||
|
f"Heavy Fav: {r['heavy_fav_win_pct']:.1f}% | "
|
||||||
|
f"N={r['match_count']}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,312 @@
|
|||||||
|
"""
|
||||||
|
V28 — CONDITIONAL FREQUENCY ENGINE
|
||||||
|
====================================
|
||||||
|
User's strategy automated at scale:
|
||||||
|
|
||||||
|
For every match (e.g. Beşiktaş vs Konya):
|
||||||
|
1. Look at Beşiktaş's HOME history when their MS1 odds were in the same band (e.g. 1.30-1.40)
|
||||||
|
→ What % of those matches ended OU 1.5 over? OU 2.5 over? MS1?
|
||||||
|
2. Look at Konya's AWAY history when their MS2 odds were in the same band (e.g. 2.00-2.20)
|
||||||
|
→ Same questions
|
||||||
|
3. COMBINE both signals:
|
||||||
|
→ If BOTH teams historically produce >80% OU1.5 over at these odds → BET OU1.5 over
|
||||||
|
→ This is the user's exact Excel strategy, now running on 104K matches
|
||||||
|
|
||||||
|
CRITICAL: Only uses PAST matches for each prediction (no future leakage)
|
||||||
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from collections import defaultdict
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
|
# ─── Load Data ───
|
||||||
|
print("Loading data...")
|
||||||
|
df = pd.read_csv('data/training_data_v27.csv', low_memory=False)
|
||||||
|
KEEP_STR = ['match_id', 'league_name', 'home_team', 'away_team',
|
||||||
|
'home_team_id', 'away_team_id', 'league_id', 'mst_utc']
|
||||||
|
for c in df.columns:
|
||||||
|
if c not in KEEP_STR:
|
||||||
|
df[c] = pd.to_numeric(df[c], errors='coerce')
|
||||||
|
|
||||||
|
# Ensure chronological order (by match_id or date)
|
||||||
|
if 'mst_utc' in df.columns:
|
||||||
|
df['mst_utc'] = pd.to_datetime(df['mst_utc'], errors='coerce')
|
||||||
|
df = df.sort_values('mst_utc').reset_index(drop=True)
|
||||||
|
|
||||||
|
# Filter: need valid odds + scores
|
||||||
|
df = df.dropna(subset=['odds_ms_h', 'odds_ms_a', 'score_home', 'score_away',
|
||||||
|
'home_team_id', 'away_team_id', 'label_ms'])
|
||||||
|
|
||||||
|
# Compute actual goal labels
|
||||||
|
df['total_goals'] = df['score_home'] + df['score_away']
|
||||||
|
df['ou15_actual'] = (df['total_goals'] > 1.5).astype(int)
|
||||||
|
df['ou25_actual'] = (df['total_goals'] > 2.5).astype(int)
|
||||||
|
df['ou35_actual'] = (df['total_goals'] > 3.5).astype(int)
|
||||||
|
df['btts_actual'] = ((df['score_home'] > 0) & (df['score_away'] > 0)).astype(int)
|
||||||
|
df['ms_result'] = df['label_ms'].astype(int) # 0=H, 1=D, 2=A
|
||||||
|
|
||||||
|
N = len(df)
|
||||||
|
print(f"Total matches: {N}")
|
||||||
|
print(f"Unique home teams: {df.home_team_id.nunique()}")
|
||||||
|
print(f"Unique away teams: {df.away_team_id.nunique()}")
|
||||||
|
|
||||||
|
# ─── Odds Band Helper ───
|
||||||
|
def get_odds_band(odds, band_width=0.10):
|
||||||
|
"""Round odds to nearest band. E.g. 1.35 → (1.30, 1.40)"""
|
||||||
|
lower = round(np.floor(odds / band_width) * band_width, 2)
|
||||||
|
upper = round(lower + band_width, 2)
|
||||||
|
return (lower, upper)
|
||||||
|
|
||||||
|
def get_odds_band_wide(odds):
|
||||||
|
"""Wider band for less common teams. E.g. 1.35 → (1.20, 1.50)"""
|
||||||
|
if odds < 1.50:
|
||||||
|
return (1.01, 1.50)
|
||||||
|
elif odds < 2.00:
|
||||||
|
return (1.50, 2.00)
|
||||||
|
elif odds < 2.50:
|
||||||
|
return (2.00, 2.50)
|
||||||
|
elif odds < 3.00:
|
||||||
|
return (2.50, 3.00)
|
||||||
|
elif odds < 4.00:
|
||||||
|
return (3.00, 4.00)
|
||||||
|
elif odds < 6.00:
|
||||||
|
return (4.00, 6.00)
|
||||||
|
else:
|
||||||
|
return (6.00, 20.00)
|
||||||
|
|
||||||
|
# ─── Build Conditional Frequency Lookup (Expanding Window) ───
|
||||||
|
print("\nBuilding conditional frequency features (expanding window)...")
|
||||||
|
|
||||||
|
# We'll compute features for each match using only past data
|
||||||
|
MIN_MATCHES = 5 # minimum historical matches to generate a signal
|
||||||
|
|
||||||
|
# Pre-allocate feature arrays
|
||||||
|
feat_names = [
|
||||||
|
'home_ou15_rate_at_band', 'home_ou25_rate_at_band', 'home_ou35_rate_at_band',
|
||||||
|
'home_btts_rate_at_band', 'home_win_rate_at_band', 'home_n_at_band',
|
||||||
|
'away_ou15_rate_at_band', 'away_ou25_rate_at_band', 'away_ou35_rate_at_band',
|
||||||
|
'away_btts_rate_at_band', 'away_win_rate_at_band', 'away_n_at_band',
|
||||||
|
'combined_ou15', 'combined_ou25', 'combined_ou35', 'combined_btts',
|
||||||
|
'home_goals_at_band', 'away_goals_at_band', 'combined_goals_at_band',
|
||||||
|
'home_conceded_at_band', 'away_conceded_at_band',
|
||||||
|
]
|
||||||
|
features = np.full((N, len(feat_names)), np.nan)
|
||||||
|
|
||||||
|
# Historical ledger: team_id → list of (odds_band, ou15, ou25, ou35, btts, ms_result, goals_scored, goals_conceded)
|
||||||
|
home_history = defaultdict(list) # team performances when playing HOME
|
||||||
|
away_history = defaultdict(list) # team performances when playing AWAY
|
||||||
|
|
||||||
|
for i in range(N):
|
||||||
|
row = df.iloc[i]
|
||||||
|
ht_id = row.home_team_id
|
||||||
|
at_id = row.away_team_id
|
||||||
|
h_odds = row.odds_ms_h
|
||||||
|
a_odds = row.odds_ms_a
|
||||||
|
|
||||||
|
if pd.isna(h_odds) or pd.isna(a_odds):
|
||||||
|
continue
|
||||||
|
|
||||||
|
h_band = get_odds_band_wide(h_odds)
|
||||||
|
a_band = get_odds_band_wide(a_odds)
|
||||||
|
|
||||||
|
# ── Look up HOME team's historical performance at this odds band ──
|
||||||
|
h_hist = [x for x in home_history[ht_id] if h_band[0] <= x[0] < h_band[1]]
|
||||||
|
if len(h_hist) >= MIN_MATCHES:
|
||||||
|
features[i, 0] = np.mean([x[1] for x in h_hist]) # ou15 rate
|
||||||
|
features[i, 1] = np.mean([x[2] for x in h_hist]) # ou25 rate
|
||||||
|
features[i, 2] = np.mean([x[3] for x in h_hist]) # ou35 rate
|
||||||
|
features[i, 3] = np.mean([x[4] for x in h_hist]) # btts rate
|
||||||
|
features[i, 4] = np.mean([x[5] for x in h_hist]) # win rate (home win = 1 if ms==0)
|
||||||
|
features[i, 5] = len(h_hist)
|
||||||
|
features[i, 16] = np.mean([x[6] for x in h_hist]) # avg goals scored
|
||||||
|
features[i, 19] = np.mean([x[7] for x in h_hist]) # avg goals conceded
|
||||||
|
|
||||||
|
# ── Look up AWAY team's historical performance at this odds band ──
|
||||||
|
a_hist = [x for x in away_history[at_id] if a_band[0] <= x[0] < a_band[1]]
|
||||||
|
if len(a_hist) >= MIN_MATCHES:
|
||||||
|
features[i, 6] = np.mean([x[1] for x in a_hist]) # ou15 rate
|
||||||
|
features[i, 7] = np.mean([x[2] for x in a_hist]) # ou25 rate
|
||||||
|
features[i, 8] = np.mean([x[3] for x in a_hist]) # ou35 rate
|
||||||
|
features[i, 9] = np.mean([x[4] for x in a_hist]) # btts rate
|
||||||
|
features[i, 10] = np.mean([x[5] for x in a_hist]) # away win rate
|
||||||
|
features[i, 11] = len(a_hist)
|
||||||
|
features[i, 17] = np.mean([x[6] for x in a_hist]) # avg goals scored (away)
|
||||||
|
features[i, 20] = np.mean([x[7] for x in a_hist]) # avg goals conceded (away)
|
||||||
|
|
||||||
|
# ── Combined signals ──
|
||||||
|
if not np.isnan(features[i, 0]) and not np.isnan(features[i, 6]):
|
||||||
|
features[i, 12] = (features[i, 0] + features[i, 6]) / 2 # combined ou15
|
||||||
|
features[i, 13] = (features[i, 1] + features[i, 7]) / 2 # combined ou25
|
||||||
|
features[i, 14] = (features[i, 2] + features[i, 8]) / 2 # combined ou35
|
||||||
|
features[i, 15] = (features[i, 3] + features[i, 9]) / 2 # combined btts
|
||||||
|
features[i, 18] = features[i, 16] + features[i, 17] # combined goals
|
||||||
|
|
||||||
|
# ── Add THIS match to history (for future lookups) ──
|
||||||
|
ou15 = int(row.total_goals > 1.5)
|
||||||
|
ou25 = int(row.total_goals > 2.5)
|
||||||
|
ou35 = int(row.total_goals > 3.5)
|
||||||
|
btts = int(row.score_home > 0 and row.score_away > 0)
|
||||||
|
h_won = int(row.label_ms == 0)
|
||||||
|
a_won = int(row.label_ms == 2)
|
||||||
|
|
||||||
|
home_history[ht_id].append((h_odds, ou15, ou25, ou35, btts, h_won,
|
||||||
|
row.score_home, row.score_away))
|
||||||
|
away_history[at_id].append((a_odds, ou15, ou25, ou35, btts, a_won,
|
||||||
|
row.score_away, row.score_home))
|
||||||
|
|
||||||
|
if (i+1) % 20000 == 0:
|
||||||
|
valid = np.sum(~np.isnan(features[:i+1, 12]))
|
||||||
|
print(f" Processed {i+1}/{N} matches, {valid} with combined signals")
|
||||||
|
|
||||||
|
# Count valid features
|
||||||
|
valid_mask = ~np.isnan(features[:, 12])
|
||||||
|
print(f"\nMatches with combined conditional signals: {valid_mask.sum()} / {N}")
|
||||||
|
|
||||||
|
# ─── BACKTEST: Walk-Forward ───
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print(" CONDITIONAL FREQUENCY BACKTEST")
|
||||||
|
print("="*70)
|
||||||
|
|
||||||
|
# Only test on last 20% of data (to avoid early sparse data)
|
||||||
|
test_start = int(N * 0.7)
|
||||||
|
test_idx = range(test_start, N)
|
||||||
|
test_valid = [i for i in test_idx if valid_mask[i]]
|
||||||
|
print(f"Test window: matches {test_start}-{N} ({len(test_valid)} with signals)")
|
||||||
|
|
||||||
|
# Strategy: bet on OU1.5 over when combined_ou15 > threshold
|
||||||
|
markets = [
|
||||||
|
('OU 1.5 Over', 'combined_ou15', 12, 'ou15_actual', 'odds_ou15_o'),
|
||||||
|
('OU 2.5 Over', 'combined_ou25', 13, 'ou25_actual', 'odds_ou25_o'),
|
||||||
|
('OU 3.5 Over', 'combined_ou35', 14, 'ou35_actual', 'odds_ou35_o'),
|
||||||
|
('BTTS Yes', 'combined_btts', 15, 'btts_actual', 'odds_btts_y'),
|
||||||
|
]
|
||||||
|
|
||||||
|
for market_name, feat_key, feat_idx, label_col, odds_col in markets:
|
||||||
|
print(f"\n ── {market_name} ──")
|
||||||
|
|
||||||
|
if odds_col not in df.columns:
|
||||||
|
print(f" No odds column '{odds_col}', skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for threshold in [0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90]:
|
||||||
|
bets = 0
|
||||||
|
wins = 0
|
||||||
|
pnl = 0.0
|
||||||
|
|
||||||
|
for i in test_valid:
|
||||||
|
signal = features[i, feat_idx]
|
||||||
|
if np.isnan(signal) or signal < threshold:
|
||||||
|
continue
|
||||||
|
odds_val = df.iloc[i][odds_col]
|
||||||
|
if pd.isna(odds_val) or odds_val < 1.05:
|
||||||
|
continue
|
||||||
|
actual = df.iloc[i][label_col]
|
||||||
|
if pd.isna(actual):
|
||||||
|
continue
|
||||||
|
|
||||||
|
bets += 1
|
||||||
|
if actual == 1:
|
||||||
|
wins += 1
|
||||||
|
pnl += odds_val - 1
|
||||||
|
else:
|
||||||
|
pnl -= 1
|
||||||
|
|
||||||
|
if bets >= 20:
|
||||||
|
roi = pnl / bets * 100
|
||||||
|
hit = wins / bets * 100
|
||||||
|
ev = (wins/bets) * (pnl/wins + 1) if wins > 0 else 0
|
||||||
|
marker = " *** PROFITABLE ***" if roi > 0 else ""
|
||||||
|
print(f" Threshold>{threshold:.2f}: {bets:5d} bets, "
|
||||||
|
f"hit={hit:.1f}%, ROI={roi:+.1f}%{marker}")
|
||||||
|
|
||||||
|
# Also test MS (1X2) market
|
||||||
|
print(f"\n ── Maç Sonucu (1X2) ──")
|
||||||
|
# Home win when home_win_rate_at_band > X AND away team loses often at that band
|
||||||
|
for threshold in [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80]:
|
||||||
|
bets = wins = 0
|
||||||
|
pnl = 0.0
|
||||||
|
for i in test_valid:
|
||||||
|
h_wr = features[i, 4] # home win rate at band
|
||||||
|
a_lr = 1 - features[i, 10] if not np.isnan(features[i, 10]) else np.nan # away loss rate
|
||||||
|
if np.isnan(h_wr) or np.isnan(a_lr):
|
||||||
|
continue
|
||||||
|
combined = (h_wr + a_lr) / 2
|
||||||
|
if combined < threshold:
|
||||||
|
continue
|
||||||
|
odds_val = df.iloc[i].odds_ms_h
|
||||||
|
if pd.isna(odds_val) or odds_val < 1.10 or odds_val > 5.0:
|
||||||
|
continue
|
||||||
|
bets += 1
|
||||||
|
if df.iloc[i].label_ms == 0:
|
||||||
|
wins += 1
|
||||||
|
pnl += odds_val - 1
|
||||||
|
else:
|
||||||
|
pnl -= 1
|
||||||
|
if bets >= 20:
|
||||||
|
roi = pnl / bets * 100
|
||||||
|
hit = wins / bets * 100
|
||||||
|
marker = " *** PROFITABLE ***" if roi > 0 else ""
|
||||||
|
print(f" Home win comb>{threshold:.2f}: {bets:5d} bets, "
|
||||||
|
f"hit={hit:.1f}%, ROI={roi:+.1f}%{marker}")
|
||||||
|
|
||||||
|
# ─── DEEP DIVE: Best performing niches ───
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print(" DEEP DIVE: Combined OU15 + Odds Value Filter")
|
||||||
|
print("="*70)
|
||||||
|
|
||||||
|
# The user's strategy: high confidence + the odds must pay enough
|
||||||
|
for threshold in [0.75, 0.80, 0.85, 0.90]:
|
||||||
|
for min_odds in [1.10, 1.20, 1.30, 1.40]:
|
||||||
|
bets = wins = 0
|
||||||
|
pnl = 0.0
|
||||||
|
for i in test_valid:
|
||||||
|
signal = features[i, 12] # combined ou15
|
||||||
|
if np.isnan(signal) or signal < threshold:
|
||||||
|
continue
|
||||||
|
odds_val = df.iloc[i].get('odds_ou15_o', np.nan) if 'odds_ou15_o' in df.columns else np.nan
|
||||||
|
if pd.isna(odds_val) or odds_val < min_odds:
|
||||||
|
continue
|
||||||
|
actual = df.iloc[i].ou15_actual
|
||||||
|
|
||||||
|
bets += 1
|
||||||
|
if actual == 1:
|
||||||
|
wins += 1
|
||||||
|
pnl += odds_val - 1
|
||||||
|
else:
|
||||||
|
pnl -= 1
|
||||||
|
|
||||||
|
if bets >= 30:
|
||||||
|
roi = pnl / bets * 100
|
||||||
|
hit = wins / bets * 100
|
||||||
|
if roi > -5: # show near-profitable too
|
||||||
|
marker = " *** PROFITABLE ***" if roi > 0 else ""
|
||||||
|
print(f" OU15 sig>{threshold:.2f} odds>{min_odds}: "
|
||||||
|
f"{bets:5d} bets, hit={hit:.1f}%, ROI={roi:+.1f}%{marker}")
|
||||||
|
|
||||||
|
# ─── Additional: Goal expectation accuracy ───
|
||||||
|
print("\n" + "="*70)
|
||||||
|
print(" GOAL PREDICTION ACCURACY")
|
||||||
|
print("="*70)
|
||||||
|
valid_goals = [i for i in test_valid if not np.isnan(features[i, 18])]
|
||||||
|
if valid_goals:
|
||||||
|
pred_goals = [features[i, 18] for i in valid_goals]
|
||||||
|
actual_goals = [df.iloc[i].total_goals for i in valid_goals]
|
||||||
|
from sklearn.metrics import mean_absolute_error
|
||||||
|
mae = mean_absolute_error(actual_goals, pred_goals)
|
||||||
|
corr = np.corrcoef(pred_goals, actual_goals)[0, 1]
|
||||||
|
print(f" Combined goal prediction MAE: {mae:.3f}")
|
||||||
|
print(f" Correlation: {corr:.4f}")
|
||||||
|
print(f" Avg predicted: {np.mean(pred_goals):.2f}, Avg actual: {np.mean(actual_goals):.2f}")
|
||||||
|
|
||||||
|
# Bucket analysis
|
||||||
|
print("\n Goal prediction buckets:")
|
||||||
|
for low, high in [(0, 1.5), (1.5, 2.0), (2.0, 2.5), (2.5, 3.0), (3.0, 3.5), (3.5, 5.0)]:
|
||||||
|
bucket = [i for i, pg in zip(valid_goals, pred_goals) if low <= pg < high]
|
||||||
|
if len(bucket) >= 20:
|
||||||
|
avg_actual = np.mean([df.iloc[i].total_goals for i in bucket])
|
||||||
|
ou25_rate = np.mean([df.iloc[i].ou25_actual for i in bucket])
|
||||||
|
print(f" Predicted {low:.1f}-{high:.1f}: n={len(bucket)}, "
|
||||||
|
f"actual_avg={avg_actual:.2f}, OU25%={ou25_rate*100:.1f}%")
|
||||||
|
|
||||||
|
print("\nDone!")
|
||||||
@@ -0,0 +1,228 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
ELO Backfill Script — Chronological Replay
|
||||||
|
|
||||||
|
Replays all finished matches in chronological order, computes ELO ratings,
|
||||||
|
and persists:
|
||||||
|
1. Per-match pre-match ELO snapshots → match_ai_features
|
||||||
|
2. Final team ELO state → team_elo_ratings
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/elo_backfill.py # football (default)
|
||||||
|
python scripts/elo_backfill.py --sport basketball
|
||||||
|
python scripts/elo_backfill.py --sport all
|
||||||
|
python scripts/elo_backfill.py --dry-run # no DB writes
|
||||||
|
python scripts/elo_backfill.py --batch-size 2000
|
||||||
|
|
||||||
|
Designed to be idempotent: uses ON CONFLICT upserts everywhere.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# Add ai-engine root to path
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import execute_values
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
from features.elo_system import ELORatingSystem
|
||||||
|
|
||||||
|
# ────────────────────────── constants ──────────────────────────
|
||||||
|
|
||||||
|
CALCULATOR_VER = "elo_backfill_v1"
|
||||||
|
DEFAULT_BATCH_SIZE = 1000
|
||||||
|
|
||||||
|
|
||||||
|
# ────────────────────────── helpers ────────────────────────────
|
||||||
|
|
||||||
|
def fetch_matches(conn, sport: str):
|
||||||
|
"""Fetch all finished matches chronologically."""
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT m.id, m.home_team_id, m.away_team_id,
|
||||||
|
m.score_home, m.score_away,
|
||||||
|
t1.name AS home_name, t2.name AS away_name,
|
||||||
|
l.name AS league_name
|
||||||
|
FROM matches m
|
||||||
|
LEFT JOIN teams t1 ON m.home_team_id = t1.id
|
||||||
|
LEFT JOIN teams t2 ON m.away_team_id = t2.id
|
||||||
|
LEFT JOIN leagues l ON m.league_id = l.id
|
||||||
|
WHERE m.sport = %s
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
ORDER BY m.mst_utc ASC
|
||||||
|
""", (sport,))
|
||||||
|
return cur.fetchall()
|
||||||
|
|
||||||
|
|
||||||
|
def flush_features_batch(conn, rows, dry_run: bool, sport: str = 'football'):
|
||||||
|
"""Bulk upsert a batch of (match_id, home_elo, away_elo) into sport-partitioned ai_features table."""
|
||||||
|
if not rows or dry_run:
|
||||||
|
return
|
||||||
|
|
||||||
|
table_name = 'football_ai_features' if sport == 'football' else 'basketball_ai_features'
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
execute_values(
|
||||||
|
cur,
|
||||||
|
f"""
|
||||||
|
INSERT INTO {table_name}
|
||||||
|
(match_id, home_elo, away_elo,
|
||||||
|
home_form_score, away_form_score,
|
||||||
|
missing_players_impact, calculator_ver, updated_at)
|
||||||
|
VALUES %s
|
||||||
|
ON CONFLICT (match_id) DO UPDATE SET
|
||||||
|
home_elo = EXCLUDED.home_elo,
|
||||||
|
away_elo = EXCLUDED.away_elo,
|
||||||
|
home_form_score = EXCLUDED.home_form_score,
|
||||||
|
away_form_score = EXCLUDED.away_form_score,
|
||||||
|
calculator_ver = EXCLUDED.calculator_ver,
|
||||||
|
updated_at = EXCLUDED.updated_at
|
||||||
|
""",
|
||||||
|
rows,
|
||||||
|
template="(%s, %s, %s, %s, %s, 0.0, %s, NOW())",
|
||||||
|
page_size=500,
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
# ────────────────────────── main ───────────────────────────────
|
||||||
|
|
||||||
|
def backfill(sport: str, batch_size: int, dry_run: bool):
|
||||||
|
"""Core backfill: chronological replay → match_ai_features + team_elo_ratings"""
|
||||||
|
|
||||||
|
dsn = get_clean_dsn()
|
||||||
|
conn = psycopg2.connect(dsn)
|
||||||
|
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"🏆 ELO Backfill — {sport.upper()}")
|
||||||
|
print(f" batch_size={batch_size} dry_run={dry_run}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
|
||||||
|
# ── 1. Fetch matches ──
|
||||||
|
t0 = time.time()
|
||||||
|
matches = fetch_matches(conn, sport)
|
||||||
|
print(f"📊 {len(matches):,} matches fetched in {time.time()-t0:.1f}s")
|
||||||
|
|
||||||
|
if not matches:
|
||||||
|
print("⚠️ No matches found — nothing to do.")
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
# ── 2. Fresh ELO system (no preloaded ratings) ──
|
||||||
|
elo = ELORatingSystem.__new__(ELORatingSystem)
|
||||||
|
elo.ratings = {}
|
||||||
|
elo.league_cache = {}
|
||||||
|
elo.conn = conn
|
||||||
|
|
||||||
|
# ── 3. Chronological replay ──
|
||||||
|
feature_buf = []
|
||||||
|
processed = 0
|
||||||
|
features_written = 0
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
def form_to_score(form: str) -> float:
|
||||||
|
"""Convert WDLWW form string to 0-100 float (matches existing DB convention)."""
|
||||||
|
if not form:
|
||||||
|
return 50.0
|
||||||
|
s = sum(1.0 if c == 'W' else 0.5 if c == 'D' else 0.0 for c in form)
|
||||||
|
return (s / max(len(form), 1)) * 100.0
|
||||||
|
|
||||||
|
for row in matches:
|
||||||
|
match_id, home_id, away_id, score_h, score_a, h_name, a_name, league = row
|
||||||
|
|
||||||
|
if not home_id or not away_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Snapshot PRE-match ELO
|
||||||
|
home_rating = elo.get_or_create_rating(home_id, h_name or "")
|
||||||
|
away_rating = elo.get_or_create_rating(away_id, a_name or "")
|
||||||
|
|
||||||
|
feature_buf.append((
|
||||||
|
match_id,
|
||||||
|
round(home_rating.overall_elo, 2),
|
||||||
|
round(away_rating.overall_elo, 2),
|
||||||
|
round(form_to_score(home_rating.recent_form), 2),
|
||||||
|
round(form_to_score(away_rating.recent_form), 2),
|
||||||
|
CALCULATOR_VER,
|
||||||
|
))
|
||||||
|
|
||||||
|
# Update ELO after the match
|
||||||
|
elo.update_after_match(
|
||||||
|
home_id, away_id, score_h, score_a,
|
||||||
|
h_name or "", a_name or "", league or "",
|
||||||
|
)
|
||||||
|
|
||||||
|
processed += 1
|
||||||
|
|
||||||
|
# Flush batch
|
||||||
|
if len(feature_buf) >= batch_size:
|
||||||
|
flush_features_batch(conn, feature_buf, dry_run, sport)
|
||||||
|
features_written += len(feature_buf)
|
||||||
|
feature_buf.clear()
|
||||||
|
|
||||||
|
if processed % 10_000 == 0:
|
||||||
|
elapsed = time.time() - t_start
|
||||||
|
rate = processed / elapsed if elapsed > 0 else 0
|
||||||
|
print(f" {processed:>8,} / {len(matches):,} processed "
|
||||||
|
f"({rate:,.0f} matches/s) "
|
||||||
|
f"teams={len(elo.ratings)}")
|
||||||
|
|
||||||
|
# Flush remaining
|
||||||
|
if feature_buf:
|
||||||
|
flush_features_batch(conn, feature_buf, dry_run, sport)
|
||||||
|
features_written += len(feature_buf)
|
||||||
|
|
||||||
|
elapsed = time.time() - t_start
|
||||||
|
print(f"\n✅ Replay complete: {processed:,} matches in {elapsed:.1f}s")
|
||||||
|
table_name = 'football_ai_features' if sport == 'football' else 'basketball_ai_features'
|
||||||
|
print(f" {features_written:,} {table_name} rows written")
|
||||||
|
print(f" {len(elo.ratings):,} teams rated")
|
||||||
|
|
||||||
|
# ── 4. Persist final team ELO state ──
|
||||||
|
if not dry_run:
|
||||||
|
elo.save_ratings_to_db()
|
||||||
|
elo.save_ratings()
|
||||||
|
print("💾 team_elo_ratings + JSON saved")
|
||||||
|
else:
|
||||||
|
print("🔸 DRY-RUN: no DB writes performed")
|
||||||
|
|
||||||
|
# ── 5. Show top teams ──
|
||||||
|
elo._show_top_teams(10)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="ELO Backfill — chronological replay → match_ai_features & team_elo_ratings"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--sport",
|
||||||
|
choices=["football", "basketball", "all"],
|
||||||
|
default="football",
|
||||||
|
help="Sport to compute ELO for (default: football)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--batch-size",
|
||||||
|
type=int,
|
||||||
|
default=DEFAULT_BATCH_SIZE,
|
||||||
|
help=f"DB insert batch size (default: {DEFAULT_BATCH_SIZE})",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Run replay without writing to DB",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
sports = ["football", "basketball"] if args.sport == "all" else [args.sport]
|
||||||
|
|
||||||
|
for sport in sports:
|
||||||
|
backfill(sport, args.batch_size, args.dry_run)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,519 @@
|
|||||||
|
"""
|
||||||
|
XGBoost Training Data Extraction (Advanced Basketball V21)
|
||||||
|
============================================================
|
||||||
|
Batch feature extraction for top-league basketball matches.
|
||||||
|
Extracts 60+ features per match including deep team stats (FG%, Rebounds, Qrt pacing).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/extract_advanced_basketball_data.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import csv
|
||||||
|
import math
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CONFIG
|
||||||
|
# =============================================================================
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.insert(0, AI_ENGINE_DIR)
|
||||||
|
|
||||||
|
TOP_LEAGUES_PATH = os.path.join(AI_ENGINE_DIR, "..", "basketball_top_leagues.json")
|
||||||
|
OUTPUT_CSV = os.path.join(AI_ENGINE_DIR, "data", "advanced_basketball_training_data.csv")
|
||||||
|
|
||||||
|
os.makedirs(os.path.dirname(OUTPUT_CSV), exist_ok=True)
|
||||||
|
|
||||||
|
def get_conn():
|
||||||
|
db_url = os.getenv("DATABASE_URL", "").split("?schema=")[0]
|
||||||
|
return psycopg2.connect(db_url)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FEATURE COLUMNS (ORDER MATTERS)
|
||||||
|
# =============================================================================
|
||||||
|
FEATURE_COLS = [
|
||||||
|
"match_id", "home_team_id", "away_team_id", "league_id", "mst_utc",
|
||||||
|
|
||||||
|
# Form & Winning
|
||||||
|
"home_winning_streak", "away_winning_streak",
|
||||||
|
"home_win_rate", "away_win_rate",
|
||||||
|
|
||||||
|
# Home Team Offense (Averages of last 5)
|
||||||
|
"home_pts_avg", "home_reb_avg", "home_ast_avg", "home_stl_avg", "home_blk_avg", "home_tov_avg",
|
||||||
|
"home_fg_pct", "home_3pt_pct", "home_ft_pct",
|
||||||
|
"home_q1_avg", "home_q2_avg", "home_q3_avg", "home_q4_avg",
|
||||||
|
|
||||||
|
# Home Team Defense (Averages of opponent stats in last 5)
|
||||||
|
"home_conc_pts", "home_conc_reb", "home_conc_ast", "home_conc_tov",
|
||||||
|
"home_conc_fg_pct", "home_conc_3pt_pct",
|
||||||
|
|
||||||
|
# Away Team Offense (Averages of last 5)
|
||||||
|
"away_pts_avg", "away_reb_avg", "away_ast_avg", "away_stl_avg", "away_blk_avg", "away_tov_avg",
|
||||||
|
"away_fg_pct", "away_3pt_pct", "away_ft_pct",
|
||||||
|
"away_q1_avg", "away_q2_avg", "away_q3_avg", "away_q4_avg",
|
||||||
|
|
||||||
|
# Away Team Defense (Averages of opponent stats in last 5)
|
||||||
|
"away_conc_pts", "away_conc_reb", "away_conc_ast", "away_conc_tov",
|
||||||
|
"away_conc_fg_pct", "away_conc_3pt_pct",
|
||||||
|
|
||||||
|
# H2H Features
|
||||||
|
"h2h_total_matches", "h2h_home_win_rate",
|
||||||
|
"h2h_avg_points", "h2h_over140_rate",
|
||||||
|
|
||||||
|
# Odds Features
|
||||||
|
"odds_ml_h", "odds_ml_a",
|
||||||
|
"odds_tot_o", "odds_tot_u", "odds_tot_line",
|
||||||
|
"odds_spread_h", "odds_spread_a", "odds_spread_line",
|
||||||
|
|
||||||
|
# Labels
|
||||||
|
"score_home", "score_away", "total_points",
|
||||||
|
"label_ml", # 0=Home, 1=Away
|
||||||
|
"label_tot", # 0=Under, 1=Over (dynamic line)
|
||||||
|
"label_spread", # 0=Away Cover, 1=Home Cover (dynamic line)
|
||||||
|
]
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# BATCH LOADERS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class AdvancedDataLoader:
|
||||||
|
def __init__(self, conn, top_league_ids: list):
|
||||||
|
self.conn = conn
|
||||||
|
self.cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
self.top_league_ids = top_league_ids
|
||||||
|
|
||||||
|
self.matches = []
|
||||||
|
self.odds_cache = {}
|
||||||
|
self.team_stats_cache = {} # (match_id, team_id) -> stats dict
|
||||||
|
self.form_cache = {}
|
||||||
|
self.h2h_cache = {}
|
||||||
|
|
||||||
|
def load_all(self):
|
||||||
|
t0 = time.time()
|
||||||
|
self._load_matches()
|
||||||
|
print(f" ✅ Matches: {len(self.matches)} ({time.time()-t0:.1f}s)", flush=True)
|
||||||
|
|
||||||
|
t1 = time.time()
|
||||||
|
self._load_team_stats()
|
||||||
|
print(f" ✅ Team Stats: {len(self.team_stats_cache)} records ({time.time()-t1:.1f}s)", flush=True)
|
||||||
|
|
||||||
|
t2 = time.time()
|
||||||
|
self._load_odds()
|
||||||
|
print(f" ✅ Odds: {len(self.odds_cache)} matches ({time.time()-t2:.1f}s)", flush=True)
|
||||||
|
|
||||||
|
t3 = time.time()
|
||||||
|
self._build_advanced_history()
|
||||||
|
print(f" ✅ Advanced History & Stats cache built ({time.time()-t3:.1f}s)", flush=True)
|
||||||
|
|
||||||
|
print(f" 📊 Total load time: {time.time()-t0:.1f}s", flush=True)
|
||||||
|
|
||||||
|
def _load_matches(self):
|
||||||
|
query = """
|
||||||
|
SELECT
|
||||||
|
id, mst_utc, league_id, home_team_id, away_team_id,
|
||||||
|
score_home, score_away
|
||||||
|
FROM matches
|
||||||
|
WHERE sport = 'basketball'
|
||||||
|
AND status = 'FT'
|
||||||
|
AND score_home IS NOT NULL
|
||||||
|
AND score_away IS NOT NULL
|
||||||
|
AND mst_utc > 1640995200000
|
||||||
|
"""
|
||||||
|
if self.top_league_ids:
|
||||||
|
format_strings = ",".join(["%s"] * len(self.top_league_ids))
|
||||||
|
query += f" AND league_id IN ({format_strings})"
|
||||||
|
self.cur.execute(query + " ORDER BY mst_utc ASC", tuple(self.top_league_ids))
|
||||||
|
else:
|
||||||
|
self.cur.execute(query + " ORDER BY mst_utc ASC")
|
||||||
|
|
||||||
|
self.matches = self.cur.fetchall()
|
||||||
|
|
||||||
|
def _load_team_stats(self):
|
||||||
|
query = """
|
||||||
|
SELECT
|
||||||
|
match_id, team_id,
|
||||||
|
points, rebounds, assists, steals, blocks, turnovers,
|
||||||
|
fg_made, fg_attempted,
|
||||||
|
three_pt_made, three_pt_attempted,
|
||||||
|
ft_made, ft_attempted,
|
||||||
|
q1_score, q2_score, q3_score, q4_score
|
||||||
|
FROM basketball_team_stats
|
||||||
|
WHERE match_id IN (
|
||||||
|
SELECT id FROM matches WHERE sport = 'basketball' AND status = 'FT'
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
self.cur.execute(query)
|
||||||
|
rows = self.cur.fetchall()
|
||||||
|
for r in rows:
|
||||||
|
self.team_stats_cache[(str(r['match_id']), str(r['team_id']))] = r
|
||||||
|
|
||||||
|
def _load_odds(self):
|
||||||
|
# Using exact same odds parser as original script
|
||||||
|
query = """
|
||||||
|
SELECT match_id, name as category_name, db_id as category_id
|
||||||
|
FROM odd_categories
|
||||||
|
WHERE match_id IN (
|
||||||
|
SELECT id FROM matches WHERE sport = 'basketball' AND status = 'FT'
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
self.cur.execute(query)
|
||||||
|
cats = self.cur.fetchall()
|
||||||
|
|
||||||
|
cat_to_match = {c['category_id']: c['match_id'] for c in cats}
|
||||||
|
cat_ids = tuple(cat_to_match.keys())
|
||||||
|
if not cat_ids: return
|
||||||
|
|
||||||
|
cat_id_to_name = {c['category_id']: c['category_name'] for c in cats}
|
||||||
|
|
||||||
|
chunk_size = 50000
|
||||||
|
cats_list = list(cat_ids)
|
||||||
|
total_chunks = len(cats_list) // chunk_size + 1
|
||||||
|
|
||||||
|
for idx, i in enumerate(range(0, len(cats_list), chunk_size)):
|
||||||
|
chunk = tuple(cats_list[i:i+chunk_size])
|
||||||
|
self.cur.execute("SELECT odd_category_db_id, name, odd_value FROM odd_selections WHERE odd_category_db_id IN %s", (chunk,))
|
||||||
|
rows = self.cur.fetchall()
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
c_id = row['odd_category_db_id']
|
||||||
|
m_id = str(cat_to_match[c_id])
|
||||||
|
c_name = cat_id_to_name.get(c_id, "")
|
||||||
|
|
||||||
|
if m_id not in self.odds_cache:
|
||||||
|
self.odds_cache[m_id] = {}
|
||||||
|
self._parse_single_odd(m_id, c_name, str(row['name']), float(row['odd_value']))
|
||||||
|
|
||||||
|
def _parse_single_odd(self, match_id, category_name, sel_name, odd_value):
|
||||||
|
if odd_value <= 1.0: return
|
||||||
|
cat_lower = category_name.lower()
|
||||||
|
sel_lower = sel_name.lower()
|
||||||
|
target = self.odds_cache[match_id]
|
||||||
|
|
||||||
|
# ML
|
||||||
|
if cat_lower in ("maç sonucu (uzt. dahil)", "mac sonucu (uzt. dahil)", "maç sonucu", "mac sonucu"):
|
||||||
|
if sel_lower == "1": target["ml_h"] = odd_value
|
||||||
|
elif sel_lower == "2": target["ml_a"] = odd_value
|
||||||
|
|
||||||
|
# Totals
|
||||||
|
if "alt/üst" in cat_lower or "alt/ust" in cat_lower:
|
||||||
|
line = None
|
||||||
|
try:
|
||||||
|
left = cat_lower.find("(")
|
||||||
|
right = cat_lower.find(")", left + 1)
|
||||||
|
if left > -1 and right > -1:
|
||||||
|
line = float(cat_lower[left+1:right].replace(",", "."))
|
||||||
|
except: pass
|
||||||
|
if line and "tot_line" not in target: target["tot_line"] = line
|
||||||
|
|
||||||
|
if "üst" in sel_lower or "ust" in sel_lower or "over" in sel_lower:
|
||||||
|
target.setdefault("tot_o", odd_value)
|
||||||
|
elif "alt" in sel_lower or "under" in sel_lower:
|
||||||
|
target.setdefault("tot_u", odd_value)
|
||||||
|
|
||||||
|
# Spread
|
||||||
|
if "hnd. ms" in cat_lower or "hand. ms" in cat_lower or "hnd ms" in cat_lower:
|
||||||
|
line = None
|
||||||
|
try:
|
||||||
|
left = cat_lower.find("(")
|
||||||
|
right = cat_lower.find(")", left + 1)
|
||||||
|
if left > -1 and right > -1:
|
||||||
|
payload = cat_lower[left+1:right].replace(",", ".")
|
||||||
|
if ":" in payload:
|
||||||
|
home_hcp = float(payload.split(":")[0])
|
||||||
|
away_hcp = float(payload.split(":")[1])
|
||||||
|
if abs(home_hcp) < 1e-6 and away_hcp > 0: line = -away_hcp
|
||||||
|
elif home_hcp > 0 and abs(away_hcp) < 1e-6: line = home_hcp
|
||||||
|
elif abs(home_hcp - away_hcp) < 1e-6 and home_hcp > 0: line = 0.0
|
||||||
|
except: pass
|
||||||
|
if line is not None and "spread_line" not in target:
|
||||||
|
target["spread_line"] = line
|
||||||
|
|
||||||
|
if sel_lower == "1": target.setdefault("spread_h", odd_value)
|
||||||
|
elif sel_lower == "2": target.setdefault("spread_a", odd_value)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_advanced_history(self):
|
||||||
|
team_matches = defaultdict(list)
|
||||||
|
for m in self.matches:
|
||||||
|
mid = str(m['id'])
|
||||||
|
hid = str(m['home_team_id'])
|
||||||
|
aid = str(m['away_team_id'])
|
||||||
|
|
||||||
|
# Fetch stats from cache
|
||||||
|
h_stat = self.team_stats_cache.get((mid, hid))
|
||||||
|
a_stat = self.team_stats_cache.get((mid, aid))
|
||||||
|
|
||||||
|
if h_stat and a_stat:
|
||||||
|
m_data = {
|
||||||
|
"utc": int(m['mst_utc']),
|
||||||
|
"mid": mid,
|
||||||
|
}
|
||||||
|
# For Home Team History (it stores what THEY did, and what Opp did)
|
||||||
|
team_matches[hid].append({
|
||||||
|
"utc": int(m['mst_utc']),
|
||||||
|
"scored": m['score_home'], "conceded": m['score_away'],
|
||||||
|
"offense": h_stat, "defense": a_stat
|
||||||
|
})
|
||||||
|
# For Away Team History
|
||||||
|
team_matches[aid].append({
|
||||||
|
"utc": int(m['mst_utc']),
|
||||||
|
"scored": m['score_away'], "conceded": m['score_home'],
|
||||||
|
"offense": a_stat, "defense": h_stat
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# If advanced stats are missing, we still push the scores to maintain streak tracking
|
||||||
|
team_matches[hid].append({
|
||||||
|
"utc": int(m['mst_utc']),
|
||||||
|
"scored": m['score_home'], "conceded": m['score_away'],
|
||||||
|
"offense": None, "defense": None
|
||||||
|
})
|
||||||
|
team_matches[aid].append({
|
||||||
|
"utc": int(m['mst_utc']),
|
||||||
|
"scored": m['score_away'], "conceded": m['score_home'],
|
||||||
|
"offense": None, "defense": None
|
||||||
|
})
|
||||||
|
|
||||||
|
for team_id, hist in team_matches.items():
|
||||||
|
hist.sort(key=lambda x: x["utc"])
|
||||||
|
|
||||||
|
for i, match_info in enumerate(hist):
|
||||||
|
mst_utc = match_info["utc"]
|
||||||
|
past = [x for x in hist[:i] if x["utc"] < mst_utc]
|
||||||
|
|
||||||
|
if not past:
|
||||||
|
self.form_cache[(team_id, mst_utc)] = self._empty_form()
|
||||||
|
continue
|
||||||
|
|
||||||
|
last_5 = past[-5:]
|
||||||
|
|
||||||
|
wins = sum(1 for x in past if x["scored"] > x["conceded"])
|
||||||
|
win_rate = wins / len(past) if len(past) > 0 else 0.5
|
||||||
|
|
||||||
|
streak = 0
|
||||||
|
for x in reversed(past):
|
||||||
|
if x["scored"] > x["conceded"]: streak += 1
|
||||||
|
else: break
|
||||||
|
|
||||||
|
# Averages
|
||||||
|
off_pts, off_reb, off_ast, off_stl, off_blk, off_tov = 0,0,0,0,0,0
|
||||||
|
off_fg_m, off_fg_a, off_3pt_m, off_3pt_a, off_ft_m, off_ft_a = 0,0,0,0,0,0
|
||||||
|
off_q1, off_q2, off_q3, off_q4 = 0,0,0,0
|
||||||
|
|
||||||
|
def_pts, def_reb, def_ast, def_tov = 0,0,0,0
|
||||||
|
def_fg_m, def_fg_a, def_3pt_m, def_3pt_a = 0,0,0,0
|
||||||
|
|
||||||
|
valid_stats_count = sum(1 for x in last_5 if x["offense"] is not None)
|
||||||
|
|
||||||
|
if valid_stats_count > 0:
|
||||||
|
for x in last_5:
|
||||||
|
o = x["offense"]
|
||||||
|
d = x["defense"]
|
||||||
|
if o and d:
|
||||||
|
off_pts += (o["points"] or 0)
|
||||||
|
off_reb += (o["rebounds"] or 0)
|
||||||
|
off_ast += (o["assists"] or 0)
|
||||||
|
off_stl += (o["steals"] or 0)
|
||||||
|
off_blk += (o["blocks"] or 0)
|
||||||
|
off_tov += (o["turnovers"] or 0)
|
||||||
|
off_fg_m += (o["fg_made"] or 0)
|
||||||
|
off_fg_a += (o["fg_attempted"] or 0)
|
||||||
|
off_3pt_m += (o["three_pt_made"] or 0)
|
||||||
|
off_3pt_a += (o["three_pt_attempted"] or 0)
|
||||||
|
off_ft_m += (o["ft_made"] or 0)
|
||||||
|
off_ft_a += (o["ft_attempted"] or 0)
|
||||||
|
off_q1 += (o["q1_score"] or 0)
|
||||||
|
off_q2 += (o["q2_score"] or 0)
|
||||||
|
off_q3 += (o["q3_score"] or 0)
|
||||||
|
off_q4 += (o["q4_score"] or 0)
|
||||||
|
|
||||||
|
def_pts += (d["points"] or 0) # Conceded points based on opponents "offense" data
|
||||||
|
def_reb += (d["rebounds"] or 0)
|
||||||
|
def_ast += (d["assists"] or 0)
|
||||||
|
def_tov += (d["turnovers"] or 0)
|
||||||
|
def_fg_m += (d["fg_made"] or 0)
|
||||||
|
def_fg_a += (d["fg_attempted"] or 0)
|
||||||
|
def_3pt_m += (d["three_pt_made"] or 0)
|
||||||
|
def_3pt_a += (d["three_pt_attempted"] or 0)
|
||||||
|
|
||||||
|
avg_c = float(valid_stats_count)
|
||||||
|
self.form_cache[(team_id, mst_utc)] = {
|
||||||
|
"winning_streak": streak, "win_rate": win_rate,
|
||||||
|
"pts_avg": off_pts/avg_c, "reb_avg": off_reb/avg_c,
|
||||||
|
"ast_avg": off_ast/avg_c, "stl_avg": off_stl/avg_c,
|
||||||
|
"blk_avg": off_blk/avg_c, "tov_avg": off_tov/avg_c,
|
||||||
|
"fg_pct": (off_fg_m / off_fg_a) if off_fg_a > 0 else 0.45,
|
||||||
|
"3pt_pct": (off_3pt_m / off_3pt_a) if off_3pt_a > 0 else 0.35,
|
||||||
|
"ft_pct": (off_ft_m / off_ft_a) if off_ft_a > 0 else 0.75,
|
||||||
|
"q1_avg": off_q1/avg_c, "q2_avg": off_q2/avg_c,
|
||||||
|
"q3_avg": off_q3/avg_c, "q4_avg": off_q4/avg_c,
|
||||||
|
|
||||||
|
"conc_pts": def_pts/avg_c, "conc_reb": def_reb/avg_c,
|
||||||
|
"conc_ast": def_ast/avg_c, "conc_tov": def_tov/avg_c,
|
||||||
|
"conc_fg_pct": (def_fg_m / def_fg_a) if def_fg_a > 0 else 0.45,
|
||||||
|
"conc_3pt_pct": (def_3pt_m / def_3pt_a) if def_3pt_a > 0 else 0.35,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
self.form_cache[(team_id, mst_utc)] = self._empty_form()
|
||||||
|
self.form_cache[(team_id, mst_utc)]["winning_streak"] = streak
|
||||||
|
self.form_cache[(team_id, mst_utc)]["win_rate"] = win_rate
|
||||||
|
|
||||||
|
# Build H2H similarly
|
||||||
|
h2h_map = defaultdict(list)
|
||||||
|
for m in self.matches:
|
||||||
|
directional_pair = (str(m['home_team_id']), str(m['away_team_id']))
|
||||||
|
h2h_map[directional_pair].append((m['mst_utc'], m['score_home'], m['score_away']))
|
||||||
|
|
||||||
|
for (h_id, a_id), hist in h2h_map.items():
|
||||||
|
hist.sort(key=lambda x: x[0])
|
||||||
|
for i, (mst_utc, sh, sa) in enumerate(hist):
|
||||||
|
past = [x for x in hist[:i] if x[0] < mst_utc]
|
||||||
|
if not past:
|
||||||
|
self.h2h_cache[(h_id, a_id, mst_utc)] = {
|
||||||
|
"total": 0, "home_win_rate": 0.5,
|
||||||
|
"avg_points": 160.0, "over140_rate": 0.5
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
home_wins = sum(1 for x in past if x[1] > x[2])
|
||||||
|
total_pts = sum(x[1] + x[2] for x in past)
|
||||||
|
over140 = sum(1 for x in past if x[1] + x[2] > 140)
|
||||||
|
self.h2h_cache[(h_id, a_id, mst_utc)] = {
|
||||||
|
"total": len(past), "home_win_rate": home_wins / len(past),
|
||||||
|
"avg_points": total_pts / len(past), "over140_rate": over140 / len(past)
|
||||||
|
}
|
||||||
|
|
||||||
|
def _empty_form(self):
|
||||||
|
return {
|
||||||
|
"winning_streak": 0, "win_rate": 0.5,
|
||||||
|
"pts_avg": 80.0, "reb_avg": 35.0, "ast_avg": 20.0,
|
||||||
|
"stl_avg": 7.0, "blk_avg": 3.0, "tov_avg": 13.0,
|
||||||
|
"fg_pct": 0.45, "3pt_pct": 0.35, "ft_pct": 0.75,
|
||||||
|
"q1_avg": 20.0, "q2_avg": 20.0, "q3_avg": 20.0, "q4_avg": 20.0,
|
||||||
|
|
||||||
|
"conc_pts": 80.0, "conc_reb": 35.0, "conc_ast": 20.0, "conc_tov": 13.0,
|
||||||
|
"conc_fg_pct": 0.45, "conc_3pt_pct": 0.35,
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FEATURE EXTRACTION PIPELINE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def process_matches(loader: AdvancedDataLoader):
|
||||||
|
f = open(OUTPUT_CSV, "w", newline='')
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(FEATURE_COLS)
|
||||||
|
|
||||||
|
extracted_count = 0
|
||||||
|
missing_odds_count = 0
|
||||||
|
|
||||||
|
for match in loader.matches:
|
||||||
|
mid = str(match['id'])
|
||||||
|
mst = int(match['mst_utc'])
|
||||||
|
hid = str(match['home_team_id'])
|
||||||
|
aid = str(match['away_team_id'])
|
||||||
|
|
||||||
|
s_home = int(match['score_home'])
|
||||||
|
s_away = int(match['score_away'])
|
||||||
|
total_pts = s_home + s_away
|
||||||
|
|
||||||
|
c_odds = loader.odds_cache.get(mid, {})
|
||||||
|
c_form_h = loader.form_cache.get((hid, mst), {})
|
||||||
|
c_form_a = loader.form_cache.get((aid, mst), {})
|
||||||
|
c_h2h = loader.h2h_cache.get((hid, aid, mst), {})
|
||||||
|
|
||||||
|
if "ml_h" not in c_odds or "ml_a" not in c_odds:
|
||||||
|
missing_odds_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
label_ml = 0 if s_home > s_away else 1
|
||||||
|
line_tot = c_odds.get("tot_line", 160.0)
|
||||||
|
label_tot = 1 if total_pts > line_tot else 0
|
||||||
|
|
||||||
|
line_spread = c_odds.get("spread_line", 0.0)
|
||||||
|
hc_score = float(s_home) + float(line_spread)
|
||||||
|
label_spread = 1 if hc_score > float(s_away) else 0
|
||||||
|
|
||||||
|
row = [
|
||||||
|
mid, hid, aid, match.get('league_id', ''), mst,
|
||||||
|
|
||||||
|
c_form_h.get("winning_streak", 0), c_form_a.get("winning_streak", 0),
|
||||||
|
c_form_h.get("win_rate", 0), c_form_a.get("win_rate", 0),
|
||||||
|
|
||||||
|
# Home Offense
|
||||||
|
c_form_h.get("pts_avg", 80), c_form_h.get("reb_avg", 35), c_form_h.get("ast_avg", 20),
|
||||||
|
c_form_h.get("stl_avg", 7), c_form_h.get("blk_avg", 3), c_form_h.get("tov_avg", 13),
|
||||||
|
c_form_h.get("fg_pct", 0.45), c_form_h.get("3pt_pct", 0.35), c_form_h.get("ft_pct", 0.75),
|
||||||
|
c_form_h.get("q1_avg", 20), c_form_h.get("q2_avg", 20), c_form_h.get("q3_avg", 20), c_form_h.get("q4_avg", 20),
|
||||||
|
|
||||||
|
# Home Defense
|
||||||
|
c_form_h.get("conc_pts", 80), c_form_h.get("conc_reb", 35), c_form_h.get("conc_ast", 20), c_form_h.get("conc_tov", 13),
|
||||||
|
c_form_h.get("conc_fg_pct", 0.45), c_form_h.get("conc_3pt_pct", 0.35),
|
||||||
|
|
||||||
|
# Away Offense
|
||||||
|
c_form_a.get("pts_avg", 80), c_form_a.get("reb_avg", 35), c_form_a.get("ast_avg", 20),
|
||||||
|
c_form_a.get("stl_avg", 7), c_form_a.get("blk_avg", 3), c_form_a.get("tov_avg", 13),
|
||||||
|
c_form_a.get("fg_pct", 0.45), c_form_a.get("3pt_pct", 0.35), c_form_a.get("ft_pct", 0.75),
|
||||||
|
c_form_a.get("q1_avg", 20), c_form_a.get("q2_avg", 20), c_form_a.get("q3_avg", 20), c_form_a.get("q4_avg", 20),
|
||||||
|
|
||||||
|
# Away Defense
|
||||||
|
c_form_a.get("conc_pts", 80), c_form_a.get("conc_reb", 35), c_form_a.get("conc_ast", 20), c_form_a.get("conc_tov", 13),
|
||||||
|
c_form_a.get("conc_fg_pct", 0.45), c_form_a.get("conc_3pt_pct", 0.35),
|
||||||
|
|
||||||
|
c_h2h.get("total", 0), c_h2h.get("home_win_rate", 0.5),
|
||||||
|
c_h2h.get("avg_points", 160.0), c_h2h.get("over140_rate", 0.5),
|
||||||
|
|
||||||
|
c_odds.get("ml_h", 1.9), c_odds.get("ml_a", 1.9),
|
||||||
|
c_odds.get("tot_o", 1.9), c_odds.get("tot_u", 1.9), line_tot,
|
||||||
|
c_odds.get("spread_h", 1.9), c_odds.get("spread_a", 1.9), line_spread,
|
||||||
|
|
||||||
|
s_home, s_away, total_pts,
|
||||||
|
label_ml, label_tot, label_spread,
|
||||||
|
]
|
||||||
|
|
||||||
|
if len(row) != len(FEATURE_COLS):
|
||||||
|
print(f"Error: Row length mismatch {len(row)} != {len(FEATURE_COLS)}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
writer.writerow(row)
|
||||||
|
extracted_count += 1
|
||||||
|
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
print("\nExtraction Summary")
|
||||||
|
print("=========================")
|
||||||
|
print(f"Total Matches in Scope: {len(loader.matches)}")
|
||||||
|
print(f"Filtered (Missing ML Odds): {missing_odds_count}")
|
||||||
|
print(f"✅ Successfully Extracted: {extracted_count}")
|
||||||
|
print(f"📂 Saved to: {OUTPUT_CSV}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
if not os.path.exists(TOP_LEAGUES_PATH):
|
||||||
|
print(f"Error: file not found {TOP_LEAGUES_PATH}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
with open(TOP_LEAGUES_PATH, "r") as f:
|
||||||
|
top_leagues = json.load(f)
|
||||||
|
|
||||||
|
print(f"🏀 Extracting Advanced Basketball Training Data (V21)")
|
||||||
|
print(f"=====================================================")
|
||||||
|
print(f"Loaded {len(top_leagues)} top leagues.")
|
||||||
|
|
||||||
|
conn = get_conn()
|
||||||
|
loader = AdvancedDataLoader(conn, top_leagues)
|
||||||
|
|
||||||
|
loader.load_all()
|
||||||
|
process_matches(loader)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print(f"Total Script Run Time: {time.time()-t_start:.1f}s")
|
||||||
@@ -0,0 +1,428 @@
|
|||||||
|
"""
|
||||||
|
XGBoost Training Data Extraction (Basketball)
|
||||||
|
==============================================
|
||||||
|
Batch feature extraction for top-league basketball matches.
|
||||||
|
Extracts features + labels per match for XGBoost model training.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/extract_basketball_data.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import csv
|
||||||
|
import math
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CONFIG
|
||||||
|
# =============================================================================
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.insert(0, AI_ENGINE_DIR)
|
||||||
|
|
||||||
|
TOP_LEAGUES_PATH = os.path.join(AI_ENGINE_DIR, "..", "basketball_top_leagues.json")
|
||||||
|
OUTPUT_CSV = os.path.join(AI_ENGINE_DIR, "data", "basketball_training_data.csv")
|
||||||
|
|
||||||
|
os.makedirs(os.path.dirname(OUTPUT_CSV), exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def get_conn():
|
||||||
|
db_url = os.getenv("DATABASE_URL", "").split("?schema=")[0]
|
||||||
|
return psycopg2.connect(db_url)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FEATURE COLUMNS (ORDER MATTERS — matches CSV header)
|
||||||
|
# =============================================================================
|
||||||
|
FEATURE_COLS = [
|
||||||
|
# Match identifiers
|
||||||
|
"match_id", "home_team_id", "away_team_id", "league_id", "mst_utc",
|
||||||
|
|
||||||
|
# Form Features (8)
|
||||||
|
"home_points_avg", "home_conceded_avg",
|
||||||
|
"away_points_avg", "away_conceded_avg",
|
||||||
|
"home_winning_streak", "away_winning_streak",
|
||||||
|
"home_win_rate", "away_win_rate",
|
||||||
|
|
||||||
|
# H2H Features (4)
|
||||||
|
"h2h_total_matches", "h2h_home_win_rate",
|
||||||
|
"h2h_avg_points", "h2h_over140_rate",
|
||||||
|
|
||||||
|
# Odds Features (6)
|
||||||
|
"odds_ml_h", "odds_ml_a",
|
||||||
|
"odds_tot_o", "odds_tot_u", "odds_tot_line",
|
||||||
|
"odds_spread_h", "odds_spread_a", "odds_spread_line",
|
||||||
|
|
||||||
|
# Labels
|
||||||
|
"score_home", "score_away", "total_points",
|
||||||
|
"label_ml", # 0=Home, 1=Away
|
||||||
|
"label_tot", # 0=Under, 1=Over (dynamic line)
|
||||||
|
"label_spread", # 0=Away Cover, 1=Home Cover (dynamic line)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# BATCH LOADERS — Pre-load data to avoid N+1 queries
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class BatchDataLoader:
|
||||||
|
"""Pre-loads all necessary data in bulk, then serves features per match."""
|
||||||
|
|
||||||
|
def __init__(self, conn, top_league_ids: list):
|
||||||
|
self.conn = conn
|
||||||
|
self.cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
self.top_league_ids = top_league_ids
|
||||||
|
|
||||||
|
# Pre-loaded data caches
|
||||||
|
self.matches = []
|
||||||
|
self.odds_cache = {} # match_id → {ml_h, ml_a, ...}
|
||||||
|
self.form_cache = {} # (team_id, match_id) → form features
|
||||||
|
self.h2h_cache = {} # (home_id, away_id, match_id) → h2h features
|
||||||
|
|
||||||
|
def load_all(self):
|
||||||
|
"""Load all data in batch."""
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
self._load_matches()
|
||||||
|
print(f" ✅ Matches: {len(self.matches)} ({time.time()-t0:.1f}s)", flush=True)
|
||||||
|
|
||||||
|
t1 = time.time()
|
||||||
|
self._load_odds()
|
||||||
|
print(f" ✅ Odds: {len(self.odds_cache)} matches ({time.time()-t1:.1f}s)", flush=True)
|
||||||
|
|
||||||
|
t3 = time.time()
|
||||||
|
self._load_team_history()
|
||||||
|
print(f" ✅ Team History & Stats cache built ({time.time()-t3:.1f}s)", flush=True)
|
||||||
|
|
||||||
|
print(f" 📊 Total load time: {time.time()-t0:.1f}s", flush=True)
|
||||||
|
|
||||||
|
def _load_matches(self):
|
||||||
|
query = """
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
mst_utc,
|
||||||
|
league_id,
|
||||||
|
home_team_id,
|
||||||
|
away_team_id,
|
||||||
|
score_home,
|
||||||
|
score_away,
|
||||||
|
status
|
||||||
|
FROM matches
|
||||||
|
WHERE sport = 'basketball'
|
||||||
|
AND status = 'FT'
|
||||||
|
AND score_home IS NOT NULL
|
||||||
|
AND score_away IS NOT NULL
|
||||||
|
AND mst_utc > 1640995200000 -- Since Jan 1, 2022
|
||||||
|
"""
|
||||||
|
if self.top_league_ids:
|
||||||
|
format_strings = ",".join(["%s"] * len(self.top_league_ids))
|
||||||
|
query += f" AND league_id IN ({format_strings})"
|
||||||
|
self.cur.execute(query + " ORDER BY mst_utc ASC", tuple(self.top_league_ids))
|
||||||
|
else:
|
||||||
|
self.cur.execute(query + " ORDER BY mst_utc ASC")
|
||||||
|
|
||||||
|
self.matches = self.cur.fetchall()
|
||||||
|
|
||||||
|
def _load_odds(self):
|
||||||
|
query = """
|
||||||
|
SELECT match_id, name as category_name, db_id as category_id
|
||||||
|
FROM odd_categories
|
||||||
|
WHERE match_id IN (
|
||||||
|
SELECT id FROM matches WHERE sport = 'basketball' AND status = 'FT'
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
self.cur.execute(query)
|
||||||
|
cats = self.cur.fetchall()
|
||||||
|
|
||||||
|
# map cat -> match
|
||||||
|
cat_to_match = {c['category_id']: c['match_id'] for c in cats}
|
||||||
|
|
||||||
|
query2 = """
|
||||||
|
SELECT odd_category_db_id, name, odd_value
|
||||||
|
FROM odd_selections
|
||||||
|
WHERE odd_category_db_id IN %(cat_ids)s
|
||||||
|
"""
|
||||||
|
cat_ids = tuple(cat_to_match.keys())
|
||||||
|
if not cat_ids:
|
||||||
|
return
|
||||||
|
|
||||||
|
cat_id_to_name = {c['category_id']: c['category_name'] for c in cats}
|
||||||
|
|
||||||
|
chunk_size = 50000
|
||||||
|
cats_list = list(cat_ids)
|
||||||
|
total_chunks = len(cats_list) // chunk_size + 1
|
||||||
|
print(f" Fetching {len(cats_list)} categories in {total_chunks} chunks...", flush=True)
|
||||||
|
|
||||||
|
for idx, i in enumerate(range(0, len(cats_list), chunk_size)):
|
||||||
|
chunk = tuple(cats_list[i:i+chunk_size])
|
||||||
|
self.cur.execute("SELECT odd_category_db_id, name, odd_value FROM odd_selections WHERE odd_category_db_id IN %s", (chunk,))
|
||||||
|
rows = self.cur.fetchall()
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
c_id = row['odd_category_db_id']
|
||||||
|
m_id = cat_to_match[c_id]
|
||||||
|
c_name = cat_id_to_name.get(c_id, "")
|
||||||
|
|
||||||
|
if m_id not in self.odds_cache:
|
||||||
|
self.odds_cache[m_id] = {}
|
||||||
|
|
||||||
|
self._parse_single_odd(m_id, c_name, str(row['name']), float(row['odd_value']))
|
||||||
|
print(f" Processed chunk {idx+1}/{total_chunks} ({len(rows)} selections).", flush=True)
|
||||||
|
|
||||||
|
def _parse_single_odd(self, match_id, category_name, sel_name, odd_value):
|
||||||
|
if odd_value <= 1.0: return
|
||||||
|
cat_lower = category_name.lower()
|
||||||
|
sel_lower = sel_name.lower()
|
||||||
|
|
||||||
|
target = self.odds_cache[match_id]
|
||||||
|
|
||||||
|
# ML
|
||||||
|
if cat_lower in ("maç sonucu (uzt. dahil)", "mac sonucu (uzt. dahil)", "maç sonucu", "mac sonucu"):
|
||||||
|
if sel_lower == "1": target["ml_h"] = odd_value
|
||||||
|
elif sel_lower == "2": target["ml_a"] = odd_value
|
||||||
|
|
||||||
|
# Totals
|
||||||
|
if "alt/üst" in cat_lower or "alt/ust" in cat_lower:
|
||||||
|
# Extract line
|
||||||
|
line = None
|
||||||
|
try:
|
||||||
|
left = cat_lower.find("(")
|
||||||
|
right = cat_lower.find(")", left + 1)
|
||||||
|
if left > -1 and right > -1:
|
||||||
|
line = float(cat_lower[left+1:right].replace(",", "."))
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
if line and "tot_line" not in target:
|
||||||
|
target["tot_line"] = line
|
||||||
|
|
||||||
|
if "üst" in sel_lower or "ust" in sel_lower or "over" in sel_lower:
|
||||||
|
target.setdefault("tot_o", odd_value)
|
||||||
|
elif "alt" in sel_lower or "under" in sel_lower:
|
||||||
|
target.setdefault("tot_u", odd_value)
|
||||||
|
|
||||||
|
# Spread
|
||||||
|
if "hnd. ms" in cat_lower or "hand. ms" in cat_lower or "hnd ms" in cat_lower:
|
||||||
|
line = None
|
||||||
|
try:
|
||||||
|
left = cat_lower.find("(")
|
||||||
|
right = cat_lower.find(")", left + 1)
|
||||||
|
if left > -1 and right > -1:
|
||||||
|
payload = cat_lower[left+1:right].replace(",", ".")
|
||||||
|
if ":" in payload:
|
||||||
|
home_hcp = float(payload.split(":")[0])
|
||||||
|
away_hcp = float(payload.split(":")[1])
|
||||||
|
if abs(home_hcp) < 1e-6 and away_hcp > 0: line = -away_hcp
|
||||||
|
elif home_hcp > 0 and abs(away_hcp) < 1e-6: line = home_hcp
|
||||||
|
elif abs(home_hcp - away_hcp) < 1e-6 and home_hcp > 0: line = 0.0
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
if line is not None and "spread_line" not in target:
|
||||||
|
target["spread_line"] = line
|
||||||
|
|
||||||
|
if sel_lower == "1": target.setdefault("spread_h", odd_value)
|
||||||
|
elif sel_lower == "2": target.setdefault("spread_a", odd_value)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_team_history(self):
|
||||||
|
# We need historical form (avg points scored/conceded, win rate).
|
||||||
|
team_matches = defaultdict(list)
|
||||||
|
for m in self.matches:
|
||||||
|
# m has id, mst_utc, home_team_id, away_team_id, score_home, score_away
|
||||||
|
team_matches[m['home_team_id']].append((m['mst_utc'], m['score_home'], m['score_away'], 'H'))
|
||||||
|
team_matches[m['away_team_id']].append((m['mst_utc'], m['score_away'], m['score_home'], 'A'))
|
||||||
|
|
||||||
|
for team_id, hist in team_matches.items():
|
||||||
|
hist.sort(key=lambda x: x[0]) # Sort by time
|
||||||
|
|
||||||
|
for i, (mst_utc, scored, conceded, location) in enumerate(hist):
|
||||||
|
# Filter past matches
|
||||||
|
past = [x for x in hist[:i] if x[0] < mst_utc]
|
||||||
|
if not past:
|
||||||
|
self.form_cache[(team_id, mst_utc)] = {
|
||||||
|
"points_avg": 80.0,
|
||||||
|
"conceded_avg": 80.0,
|
||||||
|
"winning_streak": 0,
|
||||||
|
"win_rate": 0.5
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
|
||||||
|
last_5 = past[-5:]
|
||||||
|
|
||||||
|
pts = sum(x[1] for x in last_5) / len(last_5)
|
||||||
|
conc = sum(x[2] for x in last_5) / len(last_5)
|
||||||
|
|
||||||
|
wins = sum(1 for x in past if x[1] > x[2])
|
||||||
|
win_rate = wins / len(past) if len(past) > 0 else 0.5
|
||||||
|
|
||||||
|
streak = 0
|
||||||
|
for x in reversed(past):
|
||||||
|
if x[1] > x[2]: streak += 1
|
||||||
|
else: break
|
||||||
|
|
||||||
|
self.form_cache[(team_id, mst_utc)] = {
|
||||||
|
"points_avg": pts,
|
||||||
|
"conceded_avg": conc,
|
||||||
|
"winning_streak": streak,
|
||||||
|
"win_rate": win_rate
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build H2H
|
||||||
|
h2h_map = defaultdict(list)
|
||||||
|
for m in self.matches:
|
||||||
|
pair = tuple(sorted([str(m['home_team_id']), str(m['away_team_id'])]))
|
||||||
|
tgt = m['home_team_id']
|
||||||
|
h_win = 1 if m['score_home'] > m['score_away'] else 0
|
||||||
|
if tgt != pair[0]: # Ensure orientation is relative to pair[0] usually, but let's just do directional
|
||||||
|
pass
|
||||||
|
directional_pair = (str(m['home_team_id']), str(m['away_team_id']))
|
||||||
|
h2h_map[directional_pair].append((m['mst_utc'], m['score_home'], m['score_away']))
|
||||||
|
|
||||||
|
for (h_id, a_id), hist in h2h_map.items():
|
||||||
|
hist.sort(key=lambda x: x[0])
|
||||||
|
for i, (mst_utc, sh, sa) in enumerate(hist):
|
||||||
|
past = [x for x in hist[:i] if x[0] < mst_utc]
|
||||||
|
|
||||||
|
if not past:
|
||||||
|
self.h2h_cache[(h_id, a_id, mst_utc)] = {
|
||||||
|
"total": 0, "home_win_rate": 0.5,
|
||||||
|
"avg_points": 160.0, "over140_rate": 0.5
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
home_wins = sum(1 for x in past if x[1] > x[2])
|
||||||
|
total_pts = sum(x[1] + x[2] for x in past)
|
||||||
|
over140 = sum(1 for x in past if x[1] + x[2] > 140)
|
||||||
|
|
||||||
|
self.h2h_cache[(h_id, a_id, mst_utc)] = {
|
||||||
|
"total": len(past),
|
||||||
|
"home_win_rate": home_wins / len(past),
|
||||||
|
"avg_points": total_pts / len(past),
|
||||||
|
"over140_rate": over140 / len(past)
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FEATURE EXTRACTION PIPELINE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def process_matches(loader: BatchDataLoader):
|
||||||
|
"""Processes loaded matches, maps to features, handles implicit fallbacks, saves to CSV."""
|
||||||
|
f = open(OUTPUT_CSV, "w", newline='')
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(FEATURE_COLS)
|
||||||
|
|
||||||
|
extracted_count = 0
|
||||||
|
missing_odds_count = 0
|
||||||
|
|
||||||
|
for match in loader.matches:
|
||||||
|
mid = str(match['id'])
|
||||||
|
mst = int(match['mst_utc'])
|
||||||
|
hid = str(match['home_team_id'])
|
||||||
|
aid = str(match['away_team_id'])
|
||||||
|
|
||||||
|
# True Results
|
||||||
|
s_home = int(match['score_home'])
|
||||||
|
s_away = int(match['score_away'])
|
||||||
|
total_pts = s_home + s_away
|
||||||
|
|
||||||
|
c_odds = loader.odds_cache.get(mid, {})
|
||||||
|
c_form_h = loader.form_cache.get((hid, mst), {})
|
||||||
|
c_form_a = loader.form_cache.get((aid, mst), {})
|
||||||
|
c_h2h = loader.h2h_cache.get((hid, aid, mst), {})
|
||||||
|
|
||||||
|
# Basic validation: ensure we have at least ML odds
|
||||||
|
if "ml_h" not in c_odds or "ml_a" not in c_odds:
|
||||||
|
missing_odds_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Target Variables (Labels)
|
||||||
|
label_ml = 0 if s_home > s_away else 1 # Home Win vs Away Win
|
||||||
|
|
||||||
|
# Totals label (evaluate against dynamic line)
|
||||||
|
line_tot = c_odds.get("tot_line", 160.0)
|
||||||
|
label_tot = 1 if total_pts > line_tot else 0 # Over = 1, Under = 0
|
||||||
|
|
||||||
|
# Spread label (evaluate against dynamic line)
|
||||||
|
# Home Spread Coverage. Example: line= -5.5. s_home + line = s_home - 5.5.
|
||||||
|
line_spread = c_odds.get("spread_line", 0.0)
|
||||||
|
hc_score = float(s_home) + float(line_spread)
|
||||||
|
label_spread = 1 if hc_score > float(s_away) else 0 # Spread Coverage: 1=Home, 0=Away
|
||||||
|
|
||||||
|
# Compile Row
|
||||||
|
row = [
|
||||||
|
# Identifiers
|
||||||
|
mid, hid, aid, match.get('league_id', ''), mst,
|
||||||
|
|
||||||
|
# Form cache
|
||||||
|
c_form_h.get("points_avg", 80), c_form_h.get("conceded_avg", 80),
|
||||||
|
c_form_a.get("points_avg", 80), c_form_a.get("conceded_avg", 80),
|
||||||
|
c_form_h.get("winning_streak", 0), c_form_a.get("winning_streak", 0),
|
||||||
|
c_form_h.get("win_rate", 0), c_form_a.get("win_rate", 0),
|
||||||
|
|
||||||
|
# H2H cache
|
||||||
|
c_h2h.get("total", 0), c_h2h.get("home_win_rate", 0.5),
|
||||||
|
c_h2h.get("avg_points", 160.0), c_h2h.get("over140_rate", 0.5),
|
||||||
|
|
||||||
|
# Odds
|
||||||
|
c_odds.get("ml_h", 1.9), c_odds.get("ml_a", 1.9),
|
||||||
|
c_odds.get("tot_o", 1.9), c_odds.get("tot_u", 1.9), line_tot,
|
||||||
|
c_odds.get("spread_h", 1.9), c_odds.get("spread_a", 1.9), line_spread,
|
||||||
|
|
||||||
|
# Labels
|
||||||
|
s_home, s_away, total_pts,
|
||||||
|
label_ml,
|
||||||
|
label_tot,
|
||||||
|
label_spread,
|
||||||
|
]
|
||||||
|
|
||||||
|
# Safeguard length
|
||||||
|
if len(row) != len(FEATURE_COLS):
|
||||||
|
print(f"Error: Row length mismatch {len(row)} != {len(FEATURE_COLS)}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
writer.writerow(row)
|
||||||
|
extracted_count += 1
|
||||||
|
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
print("\nExtraction Summary")
|
||||||
|
print("=========================")
|
||||||
|
print(f"Total Matches in Scope: {len(loader.matches)}")
|
||||||
|
print(f"Filtered (Missing ML Odds): {missing_odds_count}")
|
||||||
|
print(f"✅ Successfully Extracted: {extracted_count}")
|
||||||
|
print(f"📂 Saved to: {OUTPUT_CSV}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
# Load leagues
|
||||||
|
if not os.path.exists(TOP_LEAGUES_PATH):
|
||||||
|
print(f"Error: file not found {TOP_LEAGUES_PATH}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
with open(TOP_LEAGUES_PATH, "r") as f:
|
||||||
|
top_leagues = json.load(f)
|
||||||
|
|
||||||
|
print(f"🏀 Extracting Basketball Training Data (XGBoost)")
|
||||||
|
print(f"==================================================")
|
||||||
|
print(f"Loaded {len(top_leagues)} top leagues.")
|
||||||
|
|
||||||
|
conn = get_conn()
|
||||||
|
loader = BatchDataLoader(conn, top_leagues)
|
||||||
|
|
||||||
|
# 1. Pre-load everything into memory
|
||||||
|
loader.load_all()
|
||||||
|
|
||||||
|
# 2. Extract and match features, then write CSV
|
||||||
|
process_matches(loader)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print(f"Total Script Run Time: {time.time()-t_start:.1f}s")
|
||||||
@@ -0,0 +1,765 @@
|
|||||||
|
"""
|
||||||
|
Extract basketball V25-style training data.
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
- top leagues from basketball_top_leagues.json
|
||||||
|
- finished basketball matches
|
||||||
|
- pre-match features only
|
||||||
|
- labels for moneyline / total / spread markets
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from collections import defaultdict
|
||||||
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.insert(0, AI_ENGINE_DIR)
|
||||||
|
|
||||||
|
from models.basketball_v25_features import DEFAULT_FEATURE_COLS
|
||||||
|
|
||||||
|
TOP_LEAGUES_PATH = os.path.join(AI_ENGINE_DIR, "..", "basketball_top_leagues.json")
|
||||||
|
OUTPUT_CSV = os.path.join(AI_ENGINE_DIR, "data", "basketball_training_data_v25.csv")
|
||||||
|
|
||||||
|
IDENTIFIER_COLS = ["match_id", "home_team_id", "away_team_id", "league_id", "mst_utc"]
|
||||||
|
LABEL_COLS = [
|
||||||
|
"score_home",
|
||||||
|
"score_away",
|
||||||
|
"total_points",
|
||||||
|
"label_ml",
|
||||||
|
"label_total",
|
||||||
|
"label_spread",
|
||||||
|
]
|
||||||
|
CSV_COLS = IDENTIFIER_COLS + DEFAULT_FEATURE_COLS + LABEL_COLS
|
||||||
|
|
||||||
|
|
||||||
|
def get_conn():
|
||||||
|
db_url = os.getenv("DATABASE_URL", "").split("?schema=")[0]
|
||||||
|
if not db_url:
|
||||||
|
raise RuntimeError("DATABASE_URL is required")
|
||||||
|
return psycopg2.connect(db_url)
|
||||||
|
|
||||||
|
|
||||||
|
def safe_float(value: Any, default: float = 0.0) -> float:
|
||||||
|
try:
|
||||||
|
if value is None:
|
||||||
|
return default
|
||||||
|
return float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def pct(num: float, den: float, default: float = 0.0) -> float:
|
||||||
|
if den <= 0:
|
||||||
|
return default
|
||||||
|
return float(num) / float(den)
|
||||||
|
|
||||||
|
|
||||||
|
def default_recent_stats() -> Dict[str, float]:
|
||||||
|
return {
|
||||||
|
"points_avg": 82.0,
|
||||||
|
"conceded_avg": 80.0,
|
||||||
|
"net_rating": 2.0,
|
||||||
|
"win_rate": 0.5,
|
||||||
|
"winning_streak": 0.0,
|
||||||
|
"rest_days": 3.0,
|
||||||
|
"rebounds_avg": 35.0,
|
||||||
|
"assists_avg": 18.0,
|
||||||
|
"steals_avg": 6.5,
|
||||||
|
"blocks_avg": 3.0,
|
||||||
|
"turnovers_avg": 13.0,
|
||||||
|
"fg_pct": 0.45,
|
||||||
|
"three_pt_pct": 0.34,
|
||||||
|
"ft_pct": 0.75,
|
||||||
|
"q1_avg": 20.0,
|
||||||
|
"q4_avg": 21.0,
|
||||||
|
"conc_rebounds_avg": 35.0,
|
||||||
|
"conc_assists_avg": 18.0,
|
||||||
|
"conc_turnovers_avg": 13.0,
|
||||||
|
"conc_fg_pct": 0.45,
|
||||||
|
"conc_three_pt_pct": 0.34,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_team_history(history: List[Dict[str, Any]], match_date_ms: int) -> Dict[str, float]:
|
||||||
|
if not history:
|
||||||
|
return default_recent_stats()
|
||||||
|
|
||||||
|
recent = history[-8:]
|
||||||
|
form_window = history[-12:]
|
||||||
|
scored = [safe_float(item["scored"]) for item in recent]
|
||||||
|
conceded = [safe_float(item["conceded"]) for item in recent]
|
||||||
|
wins = sum(1 for item in form_window if safe_float(item["scored"]) > safe_float(item["conceded"]))
|
||||||
|
|
||||||
|
streak = 0
|
||||||
|
for item in reversed(form_window):
|
||||||
|
if safe_float(item["scored"]) > safe_float(item["conceded"]):
|
||||||
|
streak += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
last_match_ms = safe_float(history[-1].get("mst_utc"), 0.0)
|
||||||
|
rest_days = max(0.0, (float(match_date_ms) - last_match_ms) / 86_400_000.0) if last_match_ms else 3.0
|
||||||
|
|
||||||
|
def avg_key(key: str, fallback: float) -> float:
|
||||||
|
values = [safe_float(item.get(key), fallback) for item in recent]
|
||||||
|
return sum(values) / max(len(values), 1)
|
||||||
|
|
||||||
|
points_avg = sum(scored) / max(len(scored), 1)
|
||||||
|
conceded_avg = sum(conceded) / max(len(conceded), 1)
|
||||||
|
return {
|
||||||
|
"points_avg": points_avg,
|
||||||
|
"conceded_avg": conceded_avg,
|
||||||
|
"net_rating": points_avg - conceded_avg,
|
||||||
|
"win_rate": wins / max(len(form_window), 1),
|
||||||
|
"winning_streak": float(streak),
|
||||||
|
"rest_days": rest_days,
|
||||||
|
"rebounds_avg": avg_key("rebounds", 35.0),
|
||||||
|
"assists_avg": avg_key("assists", 18.0),
|
||||||
|
"steals_avg": avg_key("steals", 6.5),
|
||||||
|
"blocks_avg": avg_key("blocks", 3.0),
|
||||||
|
"turnovers_avg": avg_key("turnovers", 13.0),
|
||||||
|
"fg_pct": avg_key("fg_pct", 0.45),
|
||||||
|
"three_pt_pct": avg_key("three_pt_pct", 0.34),
|
||||||
|
"ft_pct": avg_key("ft_pct", 0.75),
|
||||||
|
"q1_avg": avg_key("q1_score", 20.0),
|
||||||
|
"q4_avg": avg_key("q4_score", 21.0),
|
||||||
|
"conc_rebounds_avg": avg_key("opp_rebounds", 35.0),
|
||||||
|
"conc_assists_avg": avg_key("opp_assists", 18.0),
|
||||||
|
"conc_turnovers_avg": avg_key("opp_turnovers", 13.0),
|
||||||
|
"conc_fg_pct": avg_key("opp_fg_pct", 0.45),
|
||||||
|
"conc_three_pt_pct": avg_key("opp_three_pt_pct", 0.34),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_h2h(
|
||||||
|
history: List[Dict[str, Any]],
|
||||||
|
current_home_id: str,
|
||||||
|
total_line: float,
|
||||||
|
spread_home_line: float,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
if not history:
|
||||||
|
return {
|
||||||
|
"h2h_total_matches": 0.0,
|
||||||
|
"h2h_home_win_rate": 0.5,
|
||||||
|
"h2h_avg_points": 160.0,
|
||||||
|
"h2h_avg_margin": 0.0,
|
||||||
|
"h2h_over_total_rate": 0.5,
|
||||||
|
"h2h_home_cover_rate": 0.5,
|
||||||
|
}
|
||||||
|
|
||||||
|
recent = history[-10:]
|
||||||
|
home_wins = 0
|
||||||
|
total_points = 0.0
|
||||||
|
total_margin = 0.0
|
||||||
|
over_hits = 0
|
||||||
|
cover_hits = 0
|
||||||
|
for item in recent:
|
||||||
|
if item["home_team_id"] == current_home_id:
|
||||||
|
home_score = safe_float(item["score_home"])
|
||||||
|
away_score = safe_float(item["score_away"])
|
||||||
|
else:
|
||||||
|
home_score = safe_float(item["score_away"])
|
||||||
|
away_score = safe_float(item["score_home"])
|
||||||
|
if home_score > away_score:
|
||||||
|
home_wins += 1
|
||||||
|
margin = home_score - away_score
|
||||||
|
total_margin += margin
|
||||||
|
total_points += home_score + away_score
|
||||||
|
if total_line > 0 and (home_score + away_score) > total_line:
|
||||||
|
over_hits += 1
|
||||||
|
if (home_score + spread_home_line) > away_score:
|
||||||
|
cover_hits += 1
|
||||||
|
|
||||||
|
size = float(len(recent))
|
||||||
|
return {
|
||||||
|
"h2h_total_matches": size,
|
||||||
|
"h2h_home_win_rate": home_wins / size,
|
||||||
|
"h2h_avg_points": total_points / size,
|
||||||
|
"h2h_avg_margin": total_margin / size,
|
||||||
|
"h2h_over_total_rate": over_hits / size if total_line > 0 else 0.5,
|
||||||
|
"h2h_home_cover_rate": cover_hits / size,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_league(
|
||||||
|
history: List[Dict[str, Any]],
|
||||||
|
total_line: float,
|
||||||
|
spread_home_line: float,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
if not history:
|
||||||
|
return {
|
||||||
|
"league_avg_points": 160.0,
|
||||||
|
"league_home_win_rate": 0.56,
|
||||||
|
"league_over_total_rate": 0.5,
|
||||||
|
"league_home_cover_rate": 0.5,
|
||||||
|
}
|
||||||
|
|
||||||
|
recent = history[-200:]
|
||||||
|
total_points = 0.0
|
||||||
|
home_wins = 0
|
||||||
|
over_hits = 0
|
||||||
|
cover_hits = 0
|
||||||
|
for item in recent:
|
||||||
|
score_home = safe_float(item["score_home"])
|
||||||
|
score_away = safe_float(item["score_away"])
|
||||||
|
total_points += score_home + score_away
|
||||||
|
if score_home > score_away:
|
||||||
|
home_wins += 1
|
||||||
|
if total_line > 0 and (score_home + score_away) > total_line:
|
||||||
|
over_hits += 1
|
||||||
|
if (score_home + spread_home_line) > score_away:
|
||||||
|
cover_hits += 1
|
||||||
|
size = float(len(recent))
|
||||||
|
return {
|
||||||
|
"league_avg_points": total_points / size,
|
||||||
|
"league_home_win_rate": home_wins / size,
|
||||||
|
"league_over_total_rate": over_hits / size if total_line > 0 else 0.5,
|
||||||
|
"league_home_cover_rate": cover_hits / size,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_text(value: Any) -> str:
|
||||||
|
return (
|
||||||
|
str(value or "")
|
||||||
|
.strip()
|
||||||
|
.lower()
|
||||||
|
.replace("ı", "i")
|
||||||
|
.replace("ç", "c")
|
||||||
|
.replace("ş", "s")
|
||||||
|
.replace("ğ", "g")
|
||||||
|
.replace("ö", "o")
|
||||||
|
.replace("ü", "u")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_parenthesized_number(category_name: str) -> float | None:
|
||||||
|
left = category_name.find("(")
|
||||||
|
right = category_name.find(")", left + 1)
|
||||||
|
if left < 0 or right < 0:
|
||||||
|
return None
|
||||||
|
payload = category_name[left + 1 : right].replace(",", ".")
|
||||||
|
if ":" in payload:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(payload)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_handicap_home_line(category_name: str) -> float | None:
|
||||||
|
left = category_name.find("(")
|
||||||
|
right = category_name.find(")", left + 1)
|
||||||
|
if left < 0 or right < 0:
|
||||||
|
return None
|
||||||
|
payload = category_name[left + 1 : right].replace(",", ".")
|
||||||
|
if ":" not in payload:
|
||||||
|
return None
|
||||||
|
home_raw, away_raw = payload.split(":", 1)
|
||||||
|
try:
|
||||||
|
home_line = float(home_raw)
|
||||||
|
away_line = float(away_raw)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
if abs(home_line) < 1e-9 and away_line > 0:
|
||||||
|
return -away_line
|
||||||
|
if home_line > 0 and abs(away_line) < 1e-9:
|
||||||
|
return home_line
|
||||||
|
if abs(home_line - away_line) < 1e-9 and home_line > 0:
|
||||||
|
return 0.0
|
||||||
|
return home_line
|
||||||
|
|
||||||
|
|
||||||
|
def parse_odds(categories: List[Dict[str, Any]], selections: List[Dict[str, Any]]) -> Dict[str, Dict[str, float]]:
|
||||||
|
match_odds: Dict[str, Dict[str, float]] = defaultdict(dict)
|
||||||
|
category_map = {
|
||||||
|
row["category_id"]: (str(row["match_id"]), str(row["category_name"]))
|
||||||
|
for row in categories
|
||||||
|
}
|
||||||
|
for row in selections:
|
||||||
|
category_id = row["odd_category_db_id"]
|
||||||
|
if category_id not in category_map:
|
||||||
|
continue
|
||||||
|
match_id, category_name = category_map[category_id]
|
||||||
|
category_norm = normalize_text(category_name)
|
||||||
|
selection_norm = normalize_text(row["name"])
|
||||||
|
odd_value = safe_float(row["odd_value"], 0.0)
|
||||||
|
if odd_value <= 1.0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
target = match_odds[match_id]
|
||||||
|
if category_norm in ("mac sonucu", "mac sonucu (uzt. dahil)"):
|
||||||
|
if selection_norm == "1":
|
||||||
|
target["ml_h"] = odd_value
|
||||||
|
elif selection_norm == "2":
|
||||||
|
target["ml_a"] = odd_value
|
||||||
|
|
||||||
|
if ("alt/ust" in category_norm or "alt/üst" in str(category_name).lower()) and not any(
|
||||||
|
token in category_norm for token in ("1. yari", "1. yarı", "periyot", "ev sahibi", "deplasman")
|
||||||
|
):
|
||||||
|
total_line = extract_parenthesized_number(category_name)
|
||||||
|
if total_line is not None:
|
||||||
|
target.setdefault("tot_line", total_line)
|
||||||
|
if any(token in selection_norm for token in ("ust", "over")):
|
||||||
|
target.setdefault("tot_o", odd_value)
|
||||||
|
elif any(token in selection_norm for token in ("alt", "under")):
|
||||||
|
target.setdefault("tot_u", odd_value)
|
||||||
|
|
||||||
|
if "hnd. ms" in category_norm or "hand. ms" in category_norm or "hnd ms" in category_norm:
|
||||||
|
home_line = parse_handicap_home_line(category_name)
|
||||||
|
if home_line is not None:
|
||||||
|
target.setdefault("spread_home_line", home_line)
|
||||||
|
if selection_norm == "1":
|
||||||
|
target.setdefault("spread_h", odd_value)
|
||||||
|
elif selection_norm == "2":
|
||||||
|
target.setdefault("spread_a", odd_value)
|
||||||
|
return match_odds
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractionContext:
|
||||||
|
def __init__(self, conn, league_ids: List[str]):
|
||||||
|
self.conn = conn
|
||||||
|
self.cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
self.league_ids = league_ids
|
||||||
|
self.matches: List[Dict[str, Any]] = []
|
||||||
|
self.team_stats: Dict[Tuple[str, str], Dict[str, Any]] = {}
|
||||||
|
self.ai_features: Dict[str, Dict[str, Any]] = {}
|
||||||
|
self.odds_cache: Dict[str, Dict[str, float]] = {}
|
||||||
|
|
||||||
|
def load(self) -> None:
|
||||||
|
self._load_matches()
|
||||||
|
self._load_team_stats()
|
||||||
|
self._load_ai_features()
|
||||||
|
self._load_odds()
|
||||||
|
|
||||||
|
def _load_matches(self) -> None:
|
||||||
|
query = """
|
||||||
|
SELECT id, league_id, home_team_id, away_team_id, mst_utc, score_home, score_away
|
||||||
|
FROM matches
|
||||||
|
WHERE sport = 'basketball'
|
||||||
|
AND status = 'FT'
|
||||||
|
AND score_home IS NOT NULL
|
||||||
|
AND score_away IS NOT NULL
|
||||||
|
AND mst_utc >= 1640995200000
|
||||||
|
"""
|
||||||
|
params: Tuple[Any, ...] = ()
|
||||||
|
if self.league_ids:
|
||||||
|
placeholders = ",".join(["%s"] * len(self.league_ids))
|
||||||
|
query += f" AND league_id IN ({placeholders})"
|
||||||
|
params = tuple(self.league_ids)
|
||||||
|
query += " ORDER BY mst_utc ASC"
|
||||||
|
self.cur.execute(query, params)
|
||||||
|
self.matches = self.cur.fetchall()
|
||||||
|
|
||||||
|
def _load_team_stats(self) -> None:
|
||||||
|
self.cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
match_id,
|
||||||
|
team_id,
|
||||||
|
points,
|
||||||
|
rebounds,
|
||||||
|
assists,
|
||||||
|
steals,
|
||||||
|
blocks,
|
||||||
|
turnovers,
|
||||||
|
fg_made,
|
||||||
|
fg_attempted,
|
||||||
|
three_pt_made,
|
||||||
|
three_pt_attempted,
|
||||||
|
ft_made,
|
||||||
|
ft_attempted,
|
||||||
|
q1_score,
|
||||||
|
q4_score
|
||||||
|
FROM basketball_team_stats
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
for row in self.cur.fetchall():
|
||||||
|
key = (str(row["match_id"]), str(row["team_id"]))
|
||||||
|
self.team_stats[key] = row
|
||||||
|
|
||||||
|
def _load_ai_features(self) -> None:
|
||||||
|
self.cur.execute("SELECT * FROM basketball_ai_features")
|
||||||
|
for row in self.cur.fetchall():
|
||||||
|
self.ai_features[str(row["match_id"])] = row
|
||||||
|
|
||||||
|
def _load_odds(self) -> None:
|
||||||
|
self.cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT db_id AS category_id, match_id, name AS category_name
|
||||||
|
FROM odd_categories
|
||||||
|
WHERE match_id IN (
|
||||||
|
SELECT id
|
||||||
|
FROM matches
|
||||||
|
WHERE sport = 'basketball'
|
||||||
|
AND status = 'FT'
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
categories = self.cur.fetchall()
|
||||||
|
category_ids = [row["category_id"] for row in categories]
|
||||||
|
if not category_ids:
|
||||||
|
return
|
||||||
|
|
||||||
|
selections: List[Dict[str, Any]] = []
|
||||||
|
chunk_size = 50000
|
||||||
|
for idx in range(0, len(category_ids), chunk_size):
|
||||||
|
chunk = tuple(category_ids[idx : idx + chunk_size])
|
||||||
|
self.cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT odd_category_db_id, name, odd_value
|
||||||
|
FROM odd_selections
|
||||||
|
WHERE odd_category_db_id IN %s
|
||||||
|
""",
|
||||||
|
(chunk,),
|
||||||
|
)
|
||||||
|
selections.extend(self.cur.fetchall())
|
||||||
|
self.odds_cache = parse_odds(categories, selections)
|
||||||
|
|
||||||
|
|
||||||
|
def build_match_feature_row(
|
||||||
|
match: Dict[str, Any],
|
||||||
|
ctx: ExtractionContext,
|
||||||
|
team_history: Dict[str, List[Dict[str, Any]]],
|
||||||
|
pair_history: Dict[Tuple[str, str], List[Dict[str, Any]]],
|
||||||
|
league_history: Dict[str, List[Dict[str, Any]]],
|
||||||
|
) -> Dict[str, Any] | None:
|
||||||
|
match_id = str(match["id"])
|
||||||
|
home_id = str(match["home_team_id"])
|
||||||
|
away_id = str(match["away_team_id"])
|
||||||
|
league_id = str(match["league_id"] or "")
|
||||||
|
mst_utc = int(match["mst_utc"])
|
||||||
|
odds = ctx.odds_cache.get(match_id, {})
|
||||||
|
if safe_float(odds.get("ml_h"), 0.0) <= 1.0 or safe_float(odds.get("ml_a"), 0.0) <= 1.0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
ai_row = ctx.ai_features.get(match_id, {})
|
||||||
|
home_recent = summarize_team_history(team_history[home_id], mst_utc)
|
||||||
|
away_recent = summarize_team_history(team_history[away_id], mst_utc)
|
||||||
|
|
||||||
|
total_line = safe_float(odds.get("tot_line"), 160.0)
|
||||||
|
spread_home_line = safe_float(odds.get("spread_home_line"), 0.0)
|
||||||
|
pair_key = tuple(sorted((home_id, away_id)))
|
||||||
|
h2h = summarize_h2h(pair_history[pair_key], home_id, total_line, spread_home_line)
|
||||||
|
league = summarize_league(league_history[league_id], total_line, spread_home_line)
|
||||||
|
|
||||||
|
ml_h = safe_float(odds.get("ml_h"), 1.90)
|
||||||
|
ml_a = safe_float(odds.get("ml_a"), 1.90)
|
||||||
|
tot_o = safe_float(odds.get("tot_o"), 1.90)
|
||||||
|
tot_u = safe_float(odds.get("tot_u"), 1.90)
|
||||||
|
spr_h = safe_float(odds.get("spread_h"), 1.90)
|
||||||
|
spr_a = safe_float(odds.get("spread_a"), 1.90)
|
||||||
|
|
||||||
|
raw_home = 1.0 / ml_h
|
||||||
|
raw_away = 1.0 / ml_a
|
||||||
|
raw_total = raw_home + raw_away
|
||||||
|
implied_home = (raw_home / raw_total) if raw_total > 0 else 0.5
|
||||||
|
implied_away = (raw_away / raw_total) if raw_total > 0 else 0.5
|
||||||
|
|
||||||
|
raw_over = 1.0 / tot_o if tot_o > 1.0 else 0.0
|
||||||
|
raw_under = 1.0 / tot_u if tot_u > 1.0 else 0.0
|
||||||
|
raw_total_ou = raw_over + raw_under
|
||||||
|
implied_total_over = (raw_over / raw_total_ou) if raw_total_ou > 0 else 0.5
|
||||||
|
implied_total_under = (raw_under / raw_total_ou) if raw_total_ou > 0 else 0.5
|
||||||
|
|
||||||
|
raw_home_cover = 1.0 / spr_h if spr_h > 1.0 else 0.0
|
||||||
|
raw_away_cover = 1.0 / spr_a if spr_a > 1.0 else 0.0
|
||||||
|
raw_total_spread = raw_home_cover + raw_away_cover
|
||||||
|
implied_spread_home = (raw_home_cover / raw_total_spread) if raw_total_spread > 0 else 0.5
|
||||||
|
implied_spread_away = (raw_away_cover / raw_total_spread) if raw_total_spread > 0 else 0.5
|
||||||
|
|
||||||
|
projected_total_form = (
|
||||||
|
home_recent["points_avg"]
|
||||||
|
+ away_recent["points_avg"]
|
||||||
|
+ home_recent["conceded_avg"]
|
||||||
|
+ away_recent["conceded_avg"]
|
||||||
|
) / 2.0
|
||||||
|
projected_margin_form = home_recent["net_rating"] - away_recent["net_rating"]
|
||||||
|
|
||||||
|
features = {
|
||||||
|
"home_overall_elo": safe_float(ai_row.get("home_elo"), 1500.0),
|
||||||
|
"away_overall_elo": safe_float(ai_row.get("away_elo"), 1500.0),
|
||||||
|
"elo_diff": safe_float(ai_row.get("elo_diff"), 0.0),
|
||||||
|
"home_home_elo": safe_float(ai_row.get("home_home_elo"), safe_float(ai_row.get("home_elo"), 1500.0)),
|
||||||
|
"away_away_elo": safe_float(ai_row.get("away_away_elo"), safe_float(ai_row.get("away_elo"), 1500.0)),
|
||||||
|
"home_form_elo": safe_float(ai_row.get("home_form_elo"), safe_float(ai_row.get("home_elo"), 1500.0)),
|
||||||
|
"away_form_elo": safe_float(ai_row.get("away_form_elo"), safe_float(ai_row.get("away_elo"), 1500.0)),
|
||||||
|
"home_form_score": safe_float(ai_row.get("home_form_score"), home_recent["win_rate"] * 100.0),
|
||||||
|
"away_form_score": safe_float(ai_row.get("away_form_score"), away_recent["win_rate"] * 100.0),
|
||||||
|
"form_score_diff": safe_float(ai_row.get("home_form_score"), home_recent["win_rate"] * 100.0)
|
||||||
|
- safe_float(ai_row.get("away_form_score"), away_recent["win_rate"] * 100.0),
|
||||||
|
"home_points_avg": safe_float(ai_row.get("home_pts_avg_5"), home_recent["points_avg"]),
|
||||||
|
"away_points_avg": safe_float(ai_row.get("away_pts_avg_5"), away_recent["points_avg"]),
|
||||||
|
"points_avg_diff": safe_float(ai_row.get("home_pts_avg_5"), home_recent["points_avg"])
|
||||||
|
- safe_float(ai_row.get("away_pts_avg_5"), away_recent["points_avg"]),
|
||||||
|
"home_conceded_avg": safe_float(ai_row.get("home_conceded_avg_5"), home_recent["conceded_avg"]),
|
||||||
|
"away_conceded_avg": safe_float(ai_row.get("away_conceded_avg_5"), away_recent["conceded_avg"]),
|
||||||
|
"conceded_avg_diff": safe_float(ai_row.get("home_conceded_avg_5"), home_recent["conceded_avg"])
|
||||||
|
- safe_float(ai_row.get("away_conceded_avg_5"), away_recent["conceded_avg"]),
|
||||||
|
"home_net_rating": home_recent["net_rating"],
|
||||||
|
"away_net_rating": away_recent["net_rating"],
|
||||||
|
"net_rating_diff": home_recent["net_rating"] - away_recent["net_rating"],
|
||||||
|
"home_win_rate": home_recent["win_rate"],
|
||||||
|
"away_win_rate": away_recent["win_rate"],
|
||||||
|
"win_rate_diff": home_recent["win_rate"] - away_recent["win_rate"],
|
||||||
|
"home_winning_streak": safe_float(ai_row.get("home_win_streak"), home_recent["winning_streak"]),
|
||||||
|
"away_winning_streak": safe_float(ai_row.get("away_win_streak"), away_recent["winning_streak"]),
|
||||||
|
"streak_diff": safe_float(ai_row.get("home_win_streak"), home_recent["winning_streak"])
|
||||||
|
- safe_float(ai_row.get("away_win_streak"), away_recent["winning_streak"]),
|
||||||
|
"home_rest_days": home_recent["rest_days"],
|
||||||
|
"away_rest_days": away_recent["rest_days"],
|
||||||
|
"rest_diff": home_recent["rest_days"] - away_recent["rest_days"],
|
||||||
|
"home_rebounds_avg": safe_float(ai_row.get("home_avg_rebounds"), home_recent["rebounds_avg"]),
|
||||||
|
"away_rebounds_avg": safe_float(ai_row.get("away_avg_rebounds"), away_recent["rebounds_avg"]),
|
||||||
|
"rebounds_diff": safe_float(ai_row.get("home_avg_rebounds"), home_recent["rebounds_avg"])
|
||||||
|
- safe_float(ai_row.get("away_avg_rebounds"), away_recent["rebounds_avg"]),
|
||||||
|
"home_assists_avg": home_recent["assists_avg"],
|
||||||
|
"away_assists_avg": away_recent["assists_avg"],
|
||||||
|
"assists_diff": home_recent["assists_avg"] - away_recent["assists_avg"],
|
||||||
|
"home_steals_avg": home_recent["steals_avg"],
|
||||||
|
"away_steals_avg": away_recent["steals_avg"],
|
||||||
|
"steals_diff": home_recent["steals_avg"] - away_recent["steals_avg"],
|
||||||
|
"home_blocks_avg": home_recent["blocks_avg"],
|
||||||
|
"away_blocks_avg": away_recent["blocks_avg"],
|
||||||
|
"blocks_diff": home_recent["blocks_avg"] - away_recent["blocks_avg"],
|
||||||
|
"home_turnovers_avg": safe_float(ai_row.get("home_avg_turnovers"), home_recent["turnovers_avg"]),
|
||||||
|
"away_turnovers_avg": safe_float(ai_row.get("away_avg_turnovers"), away_recent["turnovers_avg"]),
|
||||||
|
"turnovers_diff": safe_float(ai_row.get("home_avg_turnovers"), home_recent["turnovers_avg"])
|
||||||
|
- safe_float(ai_row.get("away_avg_turnovers"), away_recent["turnovers_avg"]),
|
||||||
|
"home_fg_pct": safe_float(ai_row.get("home_fg_pct"), home_recent["fg_pct"]),
|
||||||
|
"away_fg_pct": safe_float(ai_row.get("away_fg_pct"), away_recent["fg_pct"]),
|
||||||
|
"fg_pct_diff": safe_float(ai_row.get("home_fg_pct"), home_recent["fg_pct"])
|
||||||
|
- safe_float(ai_row.get("away_fg_pct"), away_recent["fg_pct"]),
|
||||||
|
"home_three_pt_pct": pct(
|
||||||
|
safe_float(ai_row.get("home_avg_three_pt_made"), home_recent["three_pt_pct"] * 25.0),
|
||||||
|
25.0,
|
||||||
|
home_recent["three_pt_pct"],
|
||||||
|
),
|
||||||
|
"away_three_pt_pct": pct(
|
||||||
|
safe_float(ai_row.get("away_avg_three_pt_made"), away_recent["three_pt_pct"] * 25.0),
|
||||||
|
25.0,
|
||||||
|
away_recent["three_pt_pct"],
|
||||||
|
),
|
||||||
|
"three_pt_pct_diff": pct(
|
||||||
|
safe_float(ai_row.get("home_avg_three_pt_made"), home_recent["three_pt_pct"] * 25.0),
|
||||||
|
25.0,
|
||||||
|
home_recent["three_pt_pct"],
|
||||||
|
)
|
||||||
|
- pct(
|
||||||
|
safe_float(ai_row.get("away_avg_three_pt_made"), away_recent["three_pt_pct"] * 25.0),
|
||||||
|
25.0,
|
||||||
|
away_recent["three_pt_pct"],
|
||||||
|
),
|
||||||
|
"home_ft_pct": home_recent["ft_pct"],
|
||||||
|
"away_ft_pct": away_recent["ft_pct"],
|
||||||
|
"ft_pct_diff": home_recent["ft_pct"] - away_recent["ft_pct"],
|
||||||
|
"home_q1_avg": home_recent["q1_avg"],
|
||||||
|
"away_q1_avg": away_recent["q1_avg"],
|
||||||
|
"home_q4_avg": home_recent["q4_avg"],
|
||||||
|
"away_q4_avg": away_recent["q4_avg"],
|
||||||
|
"home_conc_rebounds_avg": home_recent["conc_rebounds_avg"],
|
||||||
|
"away_conc_rebounds_avg": away_recent["conc_rebounds_avg"],
|
||||||
|
"home_conc_assists_avg": home_recent["conc_assists_avg"],
|
||||||
|
"away_conc_assists_avg": away_recent["conc_assists_avg"],
|
||||||
|
"home_conc_turnovers_avg": home_recent["conc_turnovers_avg"],
|
||||||
|
"away_conc_turnovers_avg": away_recent["conc_turnovers_avg"],
|
||||||
|
"home_conc_fg_pct": home_recent["conc_fg_pct"],
|
||||||
|
"away_conc_fg_pct": away_recent["conc_fg_pct"],
|
||||||
|
"home_conc_three_pt_pct": home_recent["conc_three_pt_pct"],
|
||||||
|
"away_conc_three_pt_pct": away_recent["conc_three_pt_pct"],
|
||||||
|
**h2h,
|
||||||
|
**league,
|
||||||
|
"ml_home_odds": ml_h,
|
||||||
|
"ml_away_odds": ml_a,
|
||||||
|
"implied_home": safe_float(ai_row.get("implied_home"), implied_home),
|
||||||
|
"implied_away": safe_float(ai_row.get("implied_away"), implied_away),
|
||||||
|
"total_line": total_line,
|
||||||
|
"total_over_odds": tot_o,
|
||||||
|
"total_under_odds": tot_u,
|
||||||
|
"implied_total_over": safe_float(ai_row.get("implied_over_total"), implied_total_over),
|
||||||
|
"implied_total_under": implied_total_under,
|
||||||
|
"spread_home_line": spread_home_line,
|
||||||
|
"spread_home_odds": spr_h,
|
||||||
|
"spread_away_odds": spr_a,
|
||||||
|
"implied_spread_home": safe_float(ai_row.get("implied_spread_home"), implied_spread_home),
|
||||||
|
"implied_spread_away": implied_spread_away,
|
||||||
|
"odds_overround": safe_float(ai_row.get("odds_overround"), raw_total - 1.0),
|
||||||
|
"home_sidelined_count": 0.0,
|
||||||
|
"away_sidelined_count": 0.0,
|
||||||
|
"sidelined_diff": 0.0,
|
||||||
|
"missing_players_impact": safe_float(ai_row.get("missing_players_impact"), 0.0),
|
||||||
|
"total_points_form": projected_total_form,
|
||||||
|
"total_points_allowed_form": home_recent["conceded_avg"] + away_recent["conceded_avg"],
|
||||||
|
"projected_total_delta_vs_line": projected_total_form - total_line,
|
||||||
|
"projected_margin_vs_spread": projected_margin_form + spread_home_line,
|
||||||
|
}
|
||||||
|
|
||||||
|
score_home = int(match["score_home"])
|
||||||
|
score_away = int(match["score_away"])
|
||||||
|
total_points = score_home + score_away
|
||||||
|
return {
|
||||||
|
"match_id": match_id,
|
||||||
|
"home_team_id": home_id,
|
||||||
|
"away_team_id": away_id,
|
||||||
|
"league_id": league_id,
|
||||||
|
"mst_utc": mst_utc,
|
||||||
|
**{feature: safe_float(features.get(feature), 0.0) for feature in DEFAULT_FEATURE_COLS},
|
||||||
|
"score_home": score_home,
|
||||||
|
"score_away": score_away,
|
||||||
|
"total_points": total_points,
|
||||||
|
"label_ml": 0 if score_home > score_away else 1,
|
||||||
|
"label_total": 1 if total_points > total_line else 0,
|
||||||
|
"label_spread": 1 if (score_home + spread_home_line) > score_away else 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def update_histories(
|
||||||
|
match: Dict[str, Any],
|
||||||
|
ctx: ExtractionContext,
|
||||||
|
team_history: Dict[str, List[Dict[str, Any]]],
|
||||||
|
pair_history: Dict[Tuple[str, str], List[Dict[str, Any]]],
|
||||||
|
league_history: Dict[str, List[Dict[str, Any]]],
|
||||||
|
) -> None:
|
||||||
|
match_id = str(match["id"])
|
||||||
|
home_id = str(match["home_team_id"])
|
||||||
|
away_id = str(match["away_team_id"])
|
||||||
|
league_id = str(match["league_id"] or "")
|
||||||
|
score_home = int(match["score_home"])
|
||||||
|
score_away = int(match["score_away"])
|
||||||
|
home_stats = ctx.team_stats.get((match_id, home_id), {})
|
||||||
|
away_stats = ctx.team_stats.get((match_id, away_id), {})
|
||||||
|
|
||||||
|
home_record = {
|
||||||
|
"mst_utc": int(match["mst_utc"]),
|
||||||
|
"scored": score_home,
|
||||||
|
"conceded": score_away,
|
||||||
|
"rebounds": safe_float(home_stats.get("rebounds"), 35.0),
|
||||||
|
"assists": safe_float(home_stats.get("assists"), 18.0),
|
||||||
|
"steals": safe_float(home_stats.get("steals"), 6.5),
|
||||||
|
"blocks": safe_float(home_stats.get("blocks"), 3.0),
|
||||||
|
"turnovers": safe_float(home_stats.get("turnovers"), 13.0),
|
||||||
|
"fg_pct": pct(safe_float(home_stats.get("fg_made")), safe_float(home_stats.get("fg_attempted")), 0.45),
|
||||||
|
"three_pt_pct": pct(
|
||||||
|
safe_float(home_stats.get("three_pt_made")),
|
||||||
|
safe_float(home_stats.get("three_pt_attempted")),
|
||||||
|
0.34,
|
||||||
|
),
|
||||||
|
"ft_pct": pct(safe_float(home_stats.get("ft_made")), safe_float(home_stats.get("ft_attempted")), 0.75),
|
||||||
|
"q1_score": safe_float(home_stats.get("q1_score"), 20.0),
|
||||||
|
"q4_score": safe_float(home_stats.get("q4_score"), 21.0),
|
||||||
|
"opp_rebounds": safe_float(away_stats.get("rebounds"), 35.0),
|
||||||
|
"opp_assists": safe_float(away_stats.get("assists"), 18.0),
|
||||||
|
"opp_turnovers": safe_float(away_stats.get("turnovers"), 13.0),
|
||||||
|
"opp_fg_pct": pct(safe_float(away_stats.get("fg_made")), safe_float(away_stats.get("fg_attempted")), 0.45),
|
||||||
|
"opp_three_pt_pct": pct(
|
||||||
|
safe_float(away_stats.get("three_pt_made")),
|
||||||
|
safe_float(away_stats.get("three_pt_attempted")),
|
||||||
|
0.34,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
away_record = {
|
||||||
|
"mst_utc": int(match["mst_utc"]),
|
||||||
|
"scored": score_away,
|
||||||
|
"conceded": score_home,
|
||||||
|
"rebounds": safe_float(away_stats.get("rebounds"), 35.0),
|
||||||
|
"assists": safe_float(away_stats.get("assists"), 18.0),
|
||||||
|
"steals": safe_float(away_stats.get("steals"), 6.5),
|
||||||
|
"blocks": safe_float(away_stats.get("blocks"), 3.0),
|
||||||
|
"turnovers": safe_float(away_stats.get("turnovers"), 13.0),
|
||||||
|
"fg_pct": pct(safe_float(away_stats.get("fg_made")), safe_float(away_stats.get("fg_attempted")), 0.45),
|
||||||
|
"three_pt_pct": pct(
|
||||||
|
safe_float(away_stats.get("three_pt_made")),
|
||||||
|
safe_float(away_stats.get("three_pt_attempted")),
|
||||||
|
0.34,
|
||||||
|
),
|
||||||
|
"ft_pct": pct(safe_float(away_stats.get("ft_made")), safe_float(away_stats.get("ft_attempted")), 0.75),
|
||||||
|
"q1_score": safe_float(away_stats.get("q1_score"), 20.0),
|
||||||
|
"q4_score": safe_float(away_stats.get("q4_score"), 21.0),
|
||||||
|
"opp_rebounds": safe_float(home_stats.get("rebounds"), 35.0),
|
||||||
|
"opp_assists": safe_float(home_stats.get("assists"), 18.0),
|
||||||
|
"opp_turnovers": safe_float(home_stats.get("turnovers"), 13.0),
|
||||||
|
"opp_fg_pct": pct(safe_float(home_stats.get("fg_made")), safe_float(home_stats.get("fg_attempted")), 0.45),
|
||||||
|
"opp_three_pt_pct": pct(
|
||||||
|
safe_float(home_stats.get("three_pt_made")),
|
||||||
|
safe_float(home_stats.get("three_pt_attempted")),
|
||||||
|
0.34,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
team_history[home_id].append(home_record)
|
||||||
|
team_history[away_id].append(away_record)
|
||||||
|
pair_history[tuple(sorted((home_id, away_id)))].append(
|
||||||
|
{
|
||||||
|
"home_team_id": home_id,
|
||||||
|
"away_team_id": away_id,
|
||||||
|
"score_home": score_home,
|
||||||
|
"score_away": score_away,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
league_history[league_id].append(
|
||||||
|
{
|
||||||
|
"score_home": score_home,
|
||||||
|
"score_away": score_away,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
started_at = time.time()
|
||||||
|
if not os.path.exists(TOP_LEAGUES_PATH):
|
||||||
|
raise FileNotFoundError(TOP_LEAGUES_PATH)
|
||||||
|
|
||||||
|
with open(TOP_LEAGUES_PATH, "r", encoding="utf-8") as handle:
|
||||||
|
league_ids = json.load(handle)
|
||||||
|
|
||||||
|
os.makedirs(os.path.dirname(OUTPUT_CSV), exist_ok=True)
|
||||||
|
conn = get_conn()
|
||||||
|
ctx = ExtractionContext(conn, league_ids)
|
||||||
|
ctx.load()
|
||||||
|
|
||||||
|
team_history: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
||||||
|
pair_history: Dict[Tuple[str, str], List[Dict[str, Any]]] = defaultdict(list)
|
||||||
|
league_history: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
||||||
|
|
||||||
|
extracted = 0
|
||||||
|
skipped = 0
|
||||||
|
with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as handle:
|
||||||
|
writer = csv.DictWriter(handle, fieldnames=CSV_COLS)
|
||||||
|
writer.writeheader()
|
||||||
|
|
||||||
|
for idx, match in enumerate(ctx.matches, start=1):
|
||||||
|
row = build_match_feature_row(match, ctx, team_history, pair_history, league_history)
|
||||||
|
if row is None:
|
||||||
|
skipped += 1
|
||||||
|
else:
|
||||||
|
writer.writerow(row)
|
||||||
|
extracted += 1
|
||||||
|
update_histories(match, ctx, team_history, pair_history, league_history)
|
||||||
|
|
||||||
|
if idx % 2000 == 0:
|
||||||
|
print(
|
||||||
|
f"[INFO] processed={idx} extracted={extracted} skipped={skipped}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print("[OK] Basketball V25 extraction complete", flush=True)
|
||||||
|
print(f"[INFO] matches={len(ctx.matches)} extracted={extracted} skipped={skipped}", flush=True)
|
||||||
|
print(f"[INFO] output={OUTPUT_CSV}", flush=True)
|
||||||
|
print(f"[INFO] duration_sec={time.time() - started_at:.1f}", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
Executable
+1180
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,93 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
|
||||||
|
SOURCE_CSV = AI_ENGINE_DIR / "data" / "training_data.csv"
|
||||||
|
TARGET_DIR = AI_ENGINE_DIR / "data" / "v26_shadow"
|
||||||
|
TARGET_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _rolling_windows(frame: pd.DataFrame) -> list[dict[str, int]]:
|
||||||
|
ordered = frame.sort_values("mst_utc").reset_index(drop=True)
|
||||||
|
windows: list[dict[str, int]] = []
|
||||||
|
if ordered.empty:
|
||||||
|
return windows
|
||||||
|
|
||||||
|
size = len(ordered)
|
||||||
|
cuts = [0.55, 0.7, 0.85]
|
||||||
|
for idx, cut in enumerate(cuts, start=1):
|
||||||
|
end_ix = max(int(size * cut), 1)
|
||||||
|
test_end = min(size - 1, end_ix + max(int(size * 0.10), 1))
|
||||||
|
windows.append(
|
||||||
|
{
|
||||||
|
"window": idx,
|
||||||
|
"train_end_ix": end_ix - 1,
|
||||||
|
"test_start_ix": end_ix,
|
||||||
|
"test_end_ix": test_end,
|
||||||
|
"train_end_mst_utc": int(ordered.iloc[end_ix - 1]["mst_utc"]),
|
||||||
|
"test_end_mst_utc": int(ordered.iloc[test_end]["mst_utc"]),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return windows
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
if not SOURCE_CSV.exists():
|
||||||
|
raise SystemExit(f"Missing source CSV: {SOURCE_CSV}")
|
||||||
|
|
||||||
|
frame = pd.read_csv(SOURCE_CSV)
|
||||||
|
if "mst_utc" not in frame.columns:
|
||||||
|
raise SystemExit("training_data.csv must include mst_utc")
|
||||||
|
|
||||||
|
ordered = frame.sort_values("mst_utc").reset_index(drop=True)
|
||||||
|
ordered["lineup_completeness"] = 1.0
|
||||||
|
ordered["referee_available"] = (
|
||||||
|
ordered.get("referee_experience", pd.Series([0] * len(ordered))).fillna(0) > 0
|
||||||
|
).astype(float)
|
||||||
|
ordered["league_reliability"] = ordered.get("league_zero_goal_rate", 0).fillna(0).apply(
|
||||||
|
lambda value: round(max(0.25, min(0.95, 0.85 - float(value))), 4)
|
||||||
|
)
|
||||||
|
ordered["odds_snapshot_freshness"] = 1.0
|
||||||
|
|
||||||
|
train_end = max(int(len(ordered) * 0.70), 1)
|
||||||
|
validation_end = max(int(len(ordered) * 0.85), train_end + 1)
|
||||||
|
validation_end = min(validation_end, len(ordered) - 1)
|
||||||
|
|
||||||
|
train_df = ordered.iloc[:train_end].copy()
|
||||||
|
validation_df = ordered.iloc[train_end:validation_end].copy()
|
||||||
|
holdout_df = ordered.iloc[validation_end:].copy()
|
||||||
|
|
||||||
|
train_df.to_csv(TARGET_DIR / "train.csv", index=False)
|
||||||
|
validation_df.to_csv(TARGET_DIR / "validation.csv", index=False)
|
||||||
|
holdout_df.to_csv(TARGET_DIR / "holdout.csv", index=False)
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
"source": str(SOURCE_CSV),
|
||||||
|
"rows": int(len(ordered)),
|
||||||
|
"train_rows": int(len(train_df)),
|
||||||
|
"validation_rows": int(len(validation_df)),
|
||||||
|
"holdout_rows": int(len(holdout_df)),
|
||||||
|
"rolling_windows": _rolling_windows(ordered),
|
||||||
|
"derived_columns": [
|
||||||
|
"lineup_completeness",
|
||||||
|
"referee_available",
|
||||||
|
"league_reliability",
|
||||||
|
"odds_snapshot_freshness",
|
||||||
|
],
|
||||||
|
"feature_policy": "prediction_time_only",
|
||||||
|
}
|
||||||
|
(TARGET_DIR / "dataset_meta.json").write_text(
|
||||||
|
json.dumps(meta, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"[OK] V26 dataset written to {TARGET_DIR}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,305 @@
|
|||||||
|
"""
|
||||||
|
V27 Training Data Extraction - Value Sniper
|
||||||
|
Extends V25 to ALL matches with odds (~104K).
|
||||||
|
Adds rolling window, league quality, time, H2H, strength features.
|
||||||
|
Usage: python3 scripts/extract_training_data_v27.py
|
||||||
|
"""
|
||||||
|
import os, sys, csv, time
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.insert(0, AI_DIR)
|
||||||
|
|
||||||
|
from scripts.extract_training_data import (
|
||||||
|
BatchDataLoader as V25Loader,
|
||||||
|
FeatureExtractor as V25Extractor,
|
||||||
|
FEATURE_COLS as V25_COLS,
|
||||||
|
get_conn,
|
||||||
|
)
|
||||||
|
from features.rolling_features import (
|
||||||
|
calc_rolling_features, calc_league_quality,
|
||||||
|
calc_time_features, calc_advanced_h2h, calc_strength_diff,
|
||||||
|
)
|
||||||
|
|
||||||
|
OUTPUT = os.path.join(AI_DIR, "data", "training_data_v27.csv")
|
||||||
|
os.makedirs(os.path.dirname(OUTPUT), exist_ok=True)
|
||||||
|
|
||||||
|
V27_NEW = [
|
||||||
|
"home_rolling5_goals","home_rolling5_conceded",
|
||||||
|
"home_rolling10_goals","home_rolling10_conceded",
|
||||||
|
"home_rolling20_goals","home_rolling20_conceded",
|
||||||
|
"away_rolling5_goals","away_rolling5_conceded",
|
||||||
|
"away_rolling10_goals","away_rolling10_conceded",
|
||||||
|
"home_rolling5_cs","away_rolling5_cs",
|
||||||
|
"home_venue_goals","home_venue_conceded",
|
||||||
|
"away_venue_goals","away_venue_conceded",
|
||||||
|
"home_goal_trend","away_goal_trend",
|
||||||
|
"league_home_win_rate","league_draw_rate",
|
||||||
|
"league_btts_rate","league_ou25_rate",
|
||||||
|
"league_reliability_score",
|
||||||
|
"home_days_rest","away_days_rest",
|
||||||
|
"match_month","is_season_start","is_season_end",
|
||||||
|
"h2h_home_goals_avg","h2h_away_goals_avg",
|
||||||
|
"h2h_recent_trend","h2h_venue_advantage",
|
||||||
|
"attack_vs_defense_home","attack_vs_defense_away",
|
||||||
|
"xg_diff","form_momentum_interaction",
|
||||||
|
"elo_form_consistency","upset_x_elo_gap",
|
||||||
|
]
|
||||||
|
ALL_COLS = V25_COLS + V27_NEW
|
||||||
|
|
||||||
|
|
||||||
|
class V27Loader(V25Loader):
|
||||||
|
"""Load ALL matches with odds, not just top leagues."""
|
||||||
|
def __init__(self, conn):
|
||||||
|
super().__init__(conn, [])
|
||||||
|
self.league_matches_cache = {}
|
||||||
|
|
||||||
|
def _load_matches(self):
|
||||||
|
self.cur.execute("""
|
||||||
|
SELECT m.id, m.home_team_id, m.away_team_id,
|
||||||
|
m.score_home, m.score_away,
|
||||||
|
m.ht_score_home, m.ht_score_away,
|
||||||
|
m.mst_utc, m.league_id,
|
||||||
|
ht.name, at.name, l.name
|
||||||
|
FROM matches m
|
||||||
|
JOIN teams ht ON m.home_team_id = ht.id
|
||||||
|
JOIN teams at ON m.away_team_id = at.id
|
||||||
|
JOIN leagues l ON m.league_id = l.id
|
||||||
|
WHERE m.status='FT' AND m.score_home IS NOT NULL
|
||||||
|
AND m.sport='football'
|
||||||
|
AND EXISTS(SELECT 1 FROM odd_categories oc WHERE oc.match_id=m.id)
|
||||||
|
ORDER BY m.mst_utc ASC
|
||||||
|
""")
|
||||||
|
self.matches = self.cur.fetchall()
|
||||||
|
|
||||||
|
def _load_odds(self):
|
||||||
|
self.cur.execute("""
|
||||||
|
SELECT oc.match_id, oc.name, os.name, os.odd_value
|
||||||
|
FROM odd_selections os
|
||||||
|
JOIN odd_categories oc ON os.odd_category_db_id=oc.db_id
|
||||||
|
JOIN matches m ON oc.match_id=m.id
|
||||||
|
WHERE m.status='FT' AND m.sport='football'
|
||||||
|
""")
|
||||||
|
for mid, cat, sel, val in self.cur.fetchall():
|
||||||
|
try:
|
||||||
|
v = float(val) if val else 0
|
||||||
|
if v <= 0 or not cat or not sel: continue
|
||||||
|
if mid not in self.odds_cache: self.odds_cache[mid] = {}
|
||||||
|
c = cat.lower().strip()
|
||||||
|
s = sel.lower().strip()
|
||||||
|
o = self.odds_cache[mid]
|
||||||
|
if c == 'maç sonucu':
|
||||||
|
if sel=='1': o['ms_h']=v
|
||||||
|
elif sel in('0','X'): o['ms_d']=v
|
||||||
|
elif sel=='2': o['ms_a']=v
|
||||||
|
elif c == '1. yarı sonucu':
|
||||||
|
if sel=='1': o['ht_ms_h']=v
|
||||||
|
elif sel in('0','X'): o['ht_ms_d']=v
|
||||||
|
elif sel=='2': o['ht_ms_a']=v
|
||||||
|
elif c == 'karşılıklı gol':
|
||||||
|
if 'var' in s: o['btts_y']=v
|
||||||
|
elif 'yok' in s: o['btts_n']=v
|
||||||
|
elif c == '2,5 alt/üst':
|
||||||
|
if 'alt' in s: o['ou25_u']=v
|
||||||
|
elif 'üst' in s: o['ou25_o']=v
|
||||||
|
elif c == '1,5 alt/üst':
|
||||||
|
if 'alt' in s: o['ou15_u']=v
|
||||||
|
elif 'üst' in s: o['ou15_o']=v
|
||||||
|
elif c == '3,5 alt/üst':
|
||||||
|
if 'alt' in s: o['ou35_u']=v
|
||||||
|
elif 'üst' in s: o['ou35_o']=v
|
||||||
|
elif c == '0,5 alt/üst':
|
||||||
|
if 'alt' in s: o['ou05_u']=v
|
||||||
|
elif 'üst' in s: o['ou05_o']=v
|
||||||
|
elif c == '1. yarı 0,5 alt/üst':
|
||||||
|
if 'alt' in s: o['ht_ou05_u']=v
|
||||||
|
elif 'üst' in s: o['ht_ou05_o']=v
|
||||||
|
elif c == '1. yarı 1,5 alt/üst':
|
||||||
|
if 'alt' in s: o['ht_ou15_u']=v
|
||||||
|
elif 'üst' in s: o['ht_ou15_o']=v
|
||||||
|
except (ValueError, TypeError): pass
|
||||||
|
|
||||||
|
def _load_league_stats(self):
|
||||||
|
self.cur.execute("""
|
||||||
|
SELECT league_id,
|
||||||
|
AVG(score_home+score_away), AVG(CASE WHEN score_home=0 AND score_away=0 THEN 1.0 ELSE 0.0 END),
|
||||||
|
COUNT(*)
|
||||||
|
FROM matches WHERE status='FT' AND score_home IS NOT NULL AND sport='football'
|
||||||
|
GROUP BY league_id
|
||||||
|
""")
|
||||||
|
for lid, ag, zr, cnt in self.cur.fetchall():
|
||||||
|
self.league_stats_cache[lid] = {
|
||||||
|
"avg_goals": float(ag) if ag else 2.5,
|
||||||
|
"zero_rate": float(zr) if zr else 0.07,
|
||||||
|
"match_count": cnt
|
||||||
|
}
|
||||||
|
|
||||||
|
def _load_squad_data(self):
|
||||||
|
self.cur.execute("""
|
||||||
|
SELECT mpp.match_id, mpp.team_id,
|
||||||
|
COUNT(*) FILTER(WHERE mpp.is_starting=true),
|
||||||
|
COUNT(*),
|
||||||
|
COUNT(*) FILTER(WHERE mpp.is_starting=true
|
||||||
|
AND LOWER(COALESCE(mpp.position::TEXT,''))~'(forward|fwd|forvet|striker)')
|
||||||
|
FROM match_player_participation mpp
|
||||||
|
JOIN matches m ON mpp.match_id=m.id
|
||||||
|
WHERE m.status='FT' AND m.sport='football'
|
||||||
|
GROUP BY mpp.match_id, mpp.team_id
|
||||||
|
""")
|
||||||
|
part = {}
|
||||||
|
for mid,tid,st,tot,fwd in self.cur.fetchall():
|
||||||
|
part[(mid,tid)]={'starting_count':st or 0,'total_squad':tot or 0,'fwd_count':fwd or 0}
|
||||||
|
|
||||||
|
self.cur.execute("""
|
||||||
|
SELECT mpe.match_id, mpe.team_id,
|
||||||
|
COUNT(*) FILTER(WHERE mpe.event_type='goal' AND COALESCE(mpe.event_subtype,'') NOT ILIKE '%%penaltı kaçırma%%'),
|
||||||
|
COUNT(DISTINCT mpe.assist_player_id) FILTER(WHERE mpe.event_type='goal' AND mpe.assist_player_id IS NOT NULL),
|
||||||
|
COUNT(DISTINCT mpe.player_id) FILTER(WHERE mpe.event_type='goal' AND COALESCE(mpe.event_subtype,'') NOT ILIKE '%%penaltı kaçırma%%')
|
||||||
|
FROM match_player_events mpe
|
||||||
|
JOIN matches m ON mpe.match_id=m.id
|
||||||
|
WHERE m.status='FT' AND m.sport='football'
|
||||||
|
GROUP BY mpe.match_id, mpe.team_id
|
||||||
|
""")
|
||||||
|
evts = {}
|
||||||
|
for mid,tid,g,a,sc in self.cur.fetchall():
|
||||||
|
evts[(mid,tid)]={'goals':g or 0,'assists':a or 0,'unique_scorers':sc or 0}
|
||||||
|
|
||||||
|
self.cur.execute("""
|
||||||
|
SELECT mpe.team_id, mpe.player_id, COUNT(*)
|
||||||
|
FROM match_player_events mpe JOIN matches m ON mpe.match_id=m.id
|
||||||
|
WHERE m.status='FT' AND m.sport='football' AND mpe.event_type='goal'
|
||||||
|
AND COALESCE(mpe.event_subtype,'') NOT ILIKE '%%penaltı kaçırma%%'
|
||||||
|
GROUP BY mpe.team_id, mpe.player_id HAVING COUNT(*)>=3
|
||||||
|
""")
|
||||||
|
kp_by_team = defaultdict(set)
|
||||||
|
for tid,pid,_ in self.cur.fetchall(): kp_by_team[tid].add(pid)
|
||||||
|
|
||||||
|
self.cur.execute("""
|
||||||
|
SELECT mpp.match_id, mpp.team_id, mpp.player_id
|
||||||
|
FROM match_player_participation mpp JOIN matches m ON mpp.match_id=m.id
|
||||||
|
WHERE mpp.is_starting=true AND m.status='FT' AND m.sport='football'
|
||||||
|
""")
|
||||||
|
starters = defaultdict(list)
|
||||||
|
for mid,tid,pid in self.cur.fetchall(): starters[(mid,tid)].append(pid)
|
||||||
|
|
||||||
|
for key in set(part)|set(evts):
|
||||||
|
mid,tid = key
|
||||||
|
p = part.get(key,{'starting_count':0,'total_squad':0,'fwd_count':0})
|
||||||
|
e = evts.get(key,{'goals':0,'assists':0,'unique_scorers':0})
|
||||||
|
s = starters.get(key,[])
|
||||||
|
kp_in = sum(1 for x in s if x in kp_by_team.get(tid,set()))
|
||||||
|
kp_tot = len(kp_by_team.get(tid,set()))
|
||||||
|
kp_miss = max(0, kp_tot - kp_in)
|
||||||
|
sq = p['starting_count']*0.3 + e['goals']*2.0 + e['assists']*1.0 + kp_in*3.0 + p['fwd_count']*1.5
|
||||||
|
mi = min(kp_miss/max(kp_tot,1), 1.0)
|
||||||
|
self.squad_cache[key] = {'squad_quality':sq,'key_players':kp_in,'missing_impact':mi,'goals_form':e['goals']}
|
||||||
|
|
||||||
|
def _load_cards_data(self):
|
||||||
|
self.cur.execute("""
|
||||||
|
SELECT mpe.match_id,
|
||||||
|
SUM(CASE WHEN mpe.event_type::text LIKE '%%yellow_card%%' THEN 1
|
||||||
|
WHEN mpe.event_type::text LIKE '%%red_card%%' THEN 2 ELSE 1 END)
|
||||||
|
FROM match_player_events mpe JOIN matches m ON mpe.match_id=m.id
|
||||||
|
WHERE m.status='FT' AND m.sport='football' AND mpe.event_type::text LIKE '%%card%%'
|
||||||
|
GROUP BY mpe.match_id
|
||||||
|
""")
|
||||||
|
for mid, cw in self.cur.fetchall():
|
||||||
|
self.cards_cache[mid] = float(cw) if cw else 0.0
|
||||||
|
|
||||||
|
def load_league_matches(self):
|
||||||
|
for m in self.matches:
|
||||||
|
lid = m[8]
|
||||||
|
if lid not in self.league_matches_cache:
|
||||||
|
self.league_matches_cache[lid] = []
|
||||||
|
self.league_matches_cache[lid].append((m[7],None,m[3],m[4],None))
|
||||||
|
|
||||||
|
|
||||||
|
class V27Extractor(V25Extractor):
|
||||||
|
"""Adds V27 features on top of V25."""
|
||||||
|
def _extract_one(self, mid, hid, aid, sh, sa, hth, hta, mst, lid,
|
||||||
|
hn, an, ln):
|
||||||
|
row = super()._extract_one(mid,hid,aid,sh,sa,hth,hta,mst,lid,hn,an,ln)
|
||||||
|
if not row: return None
|
||||||
|
|
||||||
|
hm = self.loader.team_matches.get(hid,[])
|
||||||
|
am = self.loader.team_matches.get(aid,[])
|
||||||
|
|
||||||
|
hr = calc_rolling_features(hm, mst, True)
|
||||||
|
ar = calc_rolling_features(am, mst, False)
|
||||||
|
for pfx,r in [("home",hr),("away",ar)]:
|
||||||
|
row[f"{pfx}_rolling5_goals"]=r["rolling5_goals_avg"]
|
||||||
|
row[f"{pfx}_rolling5_conceded"]=r["rolling5_conceded_avg"]
|
||||||
|
row[f"{pfx}_rolling10_goals"]=r["rolling10_goals_avg"]
|
||||||
|
row[f"{pfx}_rolling10_conceded"]=r["rolling10_conceded_avg"]
|
||||||
|
row[f"{pfx}_rolling20_goals"]=r["rolling20_goals_avg"]
|
||||||
|
row[f"{pfx}_rolling20_conceded"]=r["rolling20_conceded_avg"]
|
||||||
|
row[f"{pfx}_rolling5_cs"]=r["rolling5_clean_sheets"]
|
||||||
|
row[f"{pfx}_venue_goals"]=r["venue_goals_avg"]
|
||||||
|
row[f"{pfx}_venue_conceded"]=r["venue_conceded_avg"]
|
||||||
|
row[f"{pfx}_goal_trend"]=r["goal_trend"]
|
||||||
|
|
||||||
|
lb = [x for x in self.loader.league_matches_cache.get(lid,[]) if x[0]<mst]
|
||||||
|
lq = calc_league_quality(lb)
|
||||||
|
for k,v in lq.items(): row[k]=v
|
||||||
|
|
||||||
|
ht = calc_time_features(hm, mst)
|
||||||
|
at = calc_time_features(am, mst)
|
||||||
|
row["home_days_rest"]=ht["days_rest"]
|
||||||
|
row["away_days_rest"]=at["days_rest"]
|
||||||
|
row["match_month"]=ht["match_month"]
|
||||||
|
row["is_season_start"]=ht["is_season_start"]
|
||||||
|
row["is_season_end"]=ht["is_season_end"]
|
||||||
|
|
||||||
|
h2h = calc_advanced_h2h(hm, hid, aid, mst)
|
||||||
|
for k,v in h2h.items(): row[k]=v
|
||||||
|
|
||||||
|
sd = calc_strength_diff(
|
||||||
|
{"goals_avg":row.get("home_goals_avg",1.3),"conceded_avg":row.get("home_conceded_avg",1.2),"scoring_rate":row.get("home_scoring_rate",0.75)},
|
||||||
|
{"goals_avg":row.get("away_goals_avg",1.3),"conceded_avg":row.get("away_conceded_avg",1.2),"scoring_rate":row.get("away_scoring_rate",0.75)},
|
||||||
|
self.elo_ratings[hid], self.elo_ratings[aid],
|
||||||
|
row.get("home_momentum_score",0.5), row.get("away_momentum_score",0.5),
|
||||||
|
row.get("upset_potential",0.0),
|
||||||
|
)
|
||||||
|
row.update(sd)
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("🚀 V27 Value Sniper — Training Data Extraction")
|
||||||
|
print("="*60)
|
||||||
|
t0 = time.time()
|
||||||
|
conn = get_conn()
|
||||||
|
|
||||||
|
print("\n📦 Loading ALL odds-bearing matches...")
|
||||||
|
loader = V27Loader(conn)
|
||||||
|
loader.load_all()
|
||||||
|
loader.load_league_matches()
|
||||||
|
print(f" Matches: {len(loader.matches)}")
|
||||||
|
print(f" Leagues: {len(loader.league_stats_cache)}")
|
||||||
|
print(f" Odds: {len(loader.odds_cache)}")
|
||||||
|
|
||||||
|
ext = V27Extractor(conn, loader)
|
||||||
|
rows = ext.extract_all()
|
||||||
|
if not rows:
|
||||||
|
print("❌ No data!"); return
|
||||||
|
|
||||||
|
print(f"\n💾 Writing {len(rows)} rows...")
|
||||||
|
with open(OUTPUT,"w",newline="",encoding="utf-8") as f:
|
||||||
|
w = csv.DictWriter(f, fieldnames=ALL_COLS, extrasaction='ignore')
|
||||||
|
w.writeheader(); w.writerows(rows)
|
||||||
|
|
||||||
|
n = len(rows)
|
||||||
|
wo = sum(1 for r in rows if r.get("odds_ms_h",0)>0)
|
||||||
|
md = defaultdict(int)
|
||||||
|
for r in rows: md[r["label_ms"]]+=1
|
||||||
|
print(f"\n📊 Summary:")
|
||||||
|
print(f" Rows: {n}")
|
||||||
|
print(f" With odds: {wo} ({wo/n*100:.1f}%)")
|
||||||
|
print(f" Features: {len(ALL_COLS)} ({len(V25_COLS)} V25 + {len(V27_NEW)} new)")
|
||||||
|
print(f" MS: H={md[0]/n*100:.1f}% D={md[1]/n*100:.1f}% A={md[2]/n*100:.1f}%")
|
||||||
|
print(f" Time: {(time.time()-t0)/60:.1f}min")
|
||||||
|
print(f"\n✅ Done! → {OUTPUT}")
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
main()
|
||||||
Executable
+48
@@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||||
|
MODEL_DIR="${XGB_MODEL_DIR:-$ROOT_DIR/ai-engine/models/xgboost}"
|
||||||
|
|
||||||
|
mkdir -p "$MODEL_DIR"
|
||||||
|
|
||||||
|
download_model() {
|
||||||
|
local file_name="$1"
|
||||||
|
local url="${2:-}"
|
||||||
|
local expected_sha="${3:-}"
|
||||||
|
|
||||||
|
if [[ -z "$url" ]]; then
|
||||||
|
echo "⚠️ Skip ${file_name}: URL not provided"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local target_path="${MODEL_DIR}/${file_name}"
|
||||||
|
local tmp_path="${target_path}.tmp"
|
||||||
|
|
||||||
|
echo "⬇️ Downloading ${file_name}..."
|
||||||
|
curl -fL --retry 3 --retry-delay 2 "$url" -o "$tmp_path"
|
||||||
|
|
||||||
|
if [[ -n "$expected_sha" ]]; then
|
||||||
|
local actual_sha
|
||||||
|
actual_sha="$(sha256sum "$tmp_path" | awk '{print $1}')"
|
||||||
|
if [[ "$actual_sha" != "$expected_sha" ]]; then
|
||||||
|
echo "❌ SHA256 mismatch for ${file_name}"
|
||||||
|
echo " expected: ${expected_sha}"
|
||||||
|
echo " actual : ${actual_sha}"
|
||||||
|
rm -f "$tmp_path"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
mv "$tmp_path" "$target_path"
|
||||||
|
echo "✅ Ready: ${file_name}"
|
||||||
|
}
|
||||||
|
|
||||||
|
download_model "xgb_ht_ft.pkl" "${MODEL_XGB_HT_FT_URL:-}" "${MODEL_XGB_HT_FT_SHA256:-}"
|
||||||
|
download_model "xgb_ms.pkl" "${MODEL_XGB_MS_URL:-}" "${MODEL_XGB_MS_SHA256:-}"
|
||||||
|
download_model "xgb_ou25.pkl" "${MODEL_XGB_OU25_URL:-}" "${MODEL_XGB_OU25_SHA256:-}"
|
||||||
|
download_model "xgb_btts.pkl" "${MODEL_XGB_BTTS_URL:-}" "${MODEL_XGB_BTTS_SHA256:-}"
|
||||||
|
download_model "xgb_ou15.pkl" "${MODEL_XGB_OU15_URL:-}" "${MODEL_XGB_OU15_SHA256:-}"
|
||||||
|
download_model "xgb_ou35.pkl" "${MODEL_XGB_OU35_URL:-}" "${MODEL_XGB_OU35_SHA256:-}"
|
||||||
|
|
||||||
|
echo "📦 XGBoost model bootstrap completed."
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
"""
|
||||||
|
List Matches for Sept 13, 2025 (Top Leagues)
|
||||||
|
============================================
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
sys.path.insert(0, project_root)
|
||||||
|
|
||||||
|
def get_clean_dsn() -> str:
|
||||||
|
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||||
|
|
||||||
|
def list_matches():
|
||||||
|
print("📅 Matches on Sept 13, 2025 (Top Leagues)")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
# Load Top Leagues
|
||||||
|
leagues_path = os.path.join(project_root, "top_leagues.json")
|
||||||
|
try:
|
||||||
|
with open(leagues_path, 'r') as f:
|
||||||
|
top_leagues = json.load(f)
|
||||||
|
league_ids = tuple(str(lid) for lid in top_leagues)
|
||||||
|
print(f"📋 Loaded {len(top_leagues)} top leagues.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error loading top_leagues.json: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Date Range
|
||||||
|
start_dt = datetime(2025, 9, 13, 0, 0, 0)
|
||||||
|
end_dt = datetime(2025, 9, 13, 23, 59, 59)
|
||||||
|
start_ts = int(start_dt.timestamp() * 1000)
|
||||||
|
end_ts = int(end_dt.timestamp() * 1000)
|
||||||
|
|
||||||
|
dsn = get_clean_dsn()
|
||||||
|
conn = psycopg2.connect(dsn)
|
||||||
|
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
|
||||||
|
# Fetch Matches
|
||||||
|
query = """
|
||||||
|
SELECT m.id, m.match_name, m.home_team_id, m.away_team_id,
|
||||||
|
m.mst_utc, m.league_id, m.status, m.score_home, m.score_away,
|
||||||
|
t1.name as home_team, t2.name as away_team,
|
||||||
|
l.name as league_name
|
||||||
|
FROM matches m
|
||||||
|
LEFT JOIN teams t1 ON m.home_team_id = t1.id
|
||||||
|
LEFT JOIN teams t2 ON m.away_team_id = t2.id
|
||||||
|
LEFT JOIN leagues l ON m.league_id = l.id
|
||||||
|
WHERE m.mst_utc BETWEEN %s AND %s
|
||||||
|
AND m.league_id IN %s
|
||||||
|
ORDER BY m.mst_utc ASC
|
||||||
|
"""
|
||||||
|
|
||||||
|
cur.execute(query, (start_ts, end_ts, league_ids))
|
||||||
|
rows = cur.fetchall()
|
||||||
|
|
||||||
|
print(f"📊 Found {len(rows)} matches.")
|
||||||
|
print("-" * 60)
|
||||||
|
|
||||||
|
for r in rows:
|
||||||
|
time_str = datetime.fromtimestamp(r['mst_utc']/1000).strftime('%H:%M')
|
||||||
|
score = f"{r['score_home']} - {r['score_away']}" if r['score_home'] is not None else "v"
|
||||||
|
status = r['status']
|
||||||
|
|
||||||
|
print(f"⚽ {time_str} | {r['league_name']}")
|
||||||
|
print(f" {r['home_team']} {score} {r['away_team']} ({status})")
|
||||||
|
print(f" ID: {r['id']}")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
list_matches()
|
||||||
@@ -0,0 +1,250 @@
|
|||||||
|
"""
|
||||||
|
VQWEN Live Prediction Tracker
|
||||||
|
=============================
|
||||||
|
Predicts today's upcoming matches (from live_matches) and tracks results.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import pickle
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
|
||||||
|
AI_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
ROOT_DIR = os.path.dirname(AI_DIR)
|
||||||
|
PROJECT_ROOT = os.path.dirname(ROOT_DIR)
|
||||||
|
|
||||||
|
def get_clean_dsn() -> str:
|
||||||
|
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||||
|
|
||||||
|
def run_live_predictions():
|
||||||
|
print("🔴 VQWEN LIVE PREDICTION TRACKER")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
# Load Models
|
||||||
|
mdir = os.path.join(ROOT_DIR, 'models', 'vqwen')
|
||||||
|
try:
|
||||||
|
with open(os.path.join(mdir, 'vqwen_ms.pkl'), 'rb') as f: model_ms = pickle.load(f)
|
||||||
|
with open(os.path.join(mdir, 'vqwen_ou25.pkl'), 'rb') as f: model_ou = pickle.load(f)
|
||||||
|
with open(os.path.join(mdir, 'vqwen_btts.pkl'), 'rb') as f: model_btts = pickle.load(f)
|
||||||
|
print("✅ VQWEN v3 modelleri yüklendi.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Model hatası: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
dsn = get_clean_dsn()
|
||||||
|
conn = psycopg2.connect(dsn)
|
||||||
|
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
|
||||||
|
# 1. Bugünün Maçlarını Çek (NS veya oynanıyor ama henüz bitmemiş olanlar)
|
||||||
|
# mst_utc bugün olan maçlar
|
||||||
|
start_of_day = int(time.mktime(time.strptime(time.strftime("%Y-%m-%d"), "%Y-%m-%d")) * 1000)
|
||||||
|
end_of_day = start_of_day + (24 * 60 * 60 * 1000)
|
||||||
|
|
||||||
|
print(f"📅 Bugünün maçları taranıyor...")
|
||||||
|
|
||||||
|
# live_matches veya matches tablosundan bugünkü maçları alıyoruz
|
||||||
|
# Önce odds olanları alalım
|
||||||
|
cur.execute("""
|
||||||
|
SELECT m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away,
|
||||||
|
m.mst_utc, m.status,
|
||||||
|
t1.name as home_team, t2.name as away_team,
|
||||||
|
l.name as league_name,
|
||||||
|
maf.home_elo, maf.away_elo
|
||||||
|
FROM live_matches m
|
||||||
|
LEFT JOIN teams t1 ON m.home_team_id = t1.id
|
||||||
|
LEFT JOIN teams t2 ON m.away_team_id = t2.id
|
||||||
|
LEFT JOIN leagues l ON m.league_id = l.id
|
||||||
|
LEFT JOIN football_ai_features maf ON maf.match_id = m.id
|
||||||
|
WHERE m.mst_utc >= %s AND m.mst_utc <= %s
|
||||||
|
ORDER BY m.mst_utc ASC
|
||||||
|
""", (start_of_day, end_of_day))
|
||||||
|
|
||||||
|
rows = cur.fetchall()
|
||||||
|
print(f"📊 Bugün için {len(rows)} maç bulundu.")
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
print("⚠️ Bugün için oranı olan maç bulunamadı.")
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
results = []
|
||||||
|
total_profit = 0.0
|
||||||
|
total_bet = 0
|
||||||
|
total_won = 0
|
||||||
|
|
||||||
|
for i, row in enumerate(rows):
|
||||||
|
match_id = str(row['id'])
|
||||||
|
home = row['home_team'] or "Home"
|
||||||
|
away = row['away_team'] or "Away"
|
||||||
|
league = row['league_name'] or "Unknown"
|
||||||
|
|
||||||
|
# Maç bitmiş mi kontrol et
|
||||||
|
is_finished = row['status'] in ['FT', 'AET', 'PEN', 'post', 'postGame'] or (
|
||||||
|
row['score_home'] is not None and row['score_away'] is not None and
|
||||||
|
row['status'] not in ['NS', 'pre', 'preGame', 'live', 'liveGame']
|
||||||
|
)
|
||||||
|
|
||||||
|
# Oranları al (odd_categories)
|
||||||
|
cur.execute("""
|
||||||
|
SELECT oc.name as category, os.name as selection, os.odd_value
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = %s AND oc.name ILIKE ANY (ARRAY['%%Maç Sonucu%%', '%%2,5 Alt/Üst%%', '%%Karşılıklı Gol%%'])
|
||||||
|
""", (match_id,))
|
||||||
|
odds_rows = cur.fetchall()
|
||||||
|
|
||||||
|
odds_dict = {}
|
||||||
|
for o in odds_rows:
|
||||||
|
cat = o['category'].lower()
|
||||||
|
sel = o['selection'].lower()
|
||||||
|
val = float(o['odd_value'])
|
||||||
|
if 'maç sonucu' in cat or 'mac sonucu' in cat:
|
||||||
|
if sel == '1': odds_dict['ms_h'] = val
|
||||||
|
elif sel == 'x': odds_dict['ms_d'] = val
|
||||||
|
elif sel == '2': odds_dict['ms_a'] = val
|
||||||
|
elif '2,5 alt' in cat or '2.5 alt' in cat:
|
||||||
|
if 'alt' in sel: odds_dict['ou25_u'] = val
|
||||||
|
elif 'üst' in sel or 'ust' in sel: odds_dict['ou25_o'] = val
|
||||||
|
elif 'karşılıklı gol' in cat:
|
||||||
|
if 'var' in sel: odds_dict['btts_y'] = val
|
||||||
|
elif 'yok' in sel: odds_dict['btts_n'] = val
|
||||||
|
|
||||||
|
# Eğer oranlar yoksa atla
|
||||||
|
if not all(k in odds_dict for k in ['ms_h', 'ms_d', 'ms_a', 'ou25_o', 'btts_y']):
|
||||||
|
# print(f"⚠️ {home} vs {away} - Oranlar eksik.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Özellikleri Hesapla
|
||||||
|
# Form, Rest, Contextual Goals veritabanından çekilmeli (canlı maç için)
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = %s AND m2.status = 'FT' AND m2.mst_utc < %s), 1.2) as h_home_goals,
|
||||||
|
COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = %s AND m2.status = 'FT' AND m2.mst_utc < %s), 1.2) as a_away_goals,
|
||||||
|
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(%s/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.home_team_id = %s AND m2.status = 'FT' AND m2.mst_utc < %s)) / 86400), 7) as h_rest,
|
||||||
|
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(%s/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.away_team_id = %s AND m2.status = 'FT' AND m2.mst_utc < %s)) / 86400), 7) as a_rest,
|
||||||
|
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = %s AND mp.team_id = %s AND mp.is_starting = true), 11) as h_xi,
|
||||||
|
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = %s AND mp.team_id = %s AND mp.is_starting = true), 11) as a_xi,
|
||||||
|
COALESCE((SELECT COUNT(*) FILTER (WHERE m2.score_home > m2.score_away)::float / NULLIF(COUNT(*), 0) FROM matches m2 WHERE m2.home_team_id = %s AND m2.away_team_id = m2.away_team_id AND m2.status = 'FT' AND m2.mst_utc < %s), 0.5) as h2h_h_wr,
|
||||||
|
COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_home > m2.score_away THEN 3 WHEN m2.score_home = m2.score_away THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.home_team_id = %s AND m2.status = 'FT' AND m2.mst_utc < %s ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as h_form_pts,
|
||||||
|
COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_away > m2.score_home THEN 3 WHEN m2.score_away = m2.score_home THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.away_team_id = %s AND m2.status = 'FT' AND m2.mst_utc < %s ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as a_form_pts
|
||||||
|
""", (
|
||||||
|
row['home_team_id'], row['mst_utc'],
|
||||||
|
row['away_team_id'], row['mst_utc'],
|
||||||
|
row['mst_utc'], row['home_team_id'], row['mst_utc'],
|
||||||
|
row['mst_utc'], row['away_team_id'], row['mst_utc'],
|
||||||
|
match_id, row['home_team_id'],
|
||||||
|
match_id, row['away_team_id'],
|
||||||
|
row['home_team_id'], row['away_team_id'], row['mst_utc'],
|
||||||
|
row['home_team_id'], row['mst_utc'],
|
||||||
|
row['away_team_id'], row['mst_utc']
|
||||||
|
))
|
||||||
|
stats = cur.fetchone()
|
||||||
|
|
||||||
|
h_elo = float(row['home_elo'] or 1500)
|
||||||
|
a_elo = float(row['away_elo'] or 1500)
|
||||||
|
h_home_goals = float(stats['h_home_goals'] or 1.2)
|
||||||
|
a_away_goals = float(stats['a_away_goals'] or 1.2)
|
||||||
|
h_rest = float(stats['h_rest'] or 7)
|
||||||
|
a_rest = float(stats['a_rest'] or 7)
|
||||||
|
h_xi = float(stats['h_xi'] or 11)
|
||||||
|
a_xi = float(stats['a_xi'] or 11)
|
||||||
|
h2h_h_wr = float(stats['h2h_h_wr'] or 0.5)
|
||||||
|
h_pts = float(stats['h_form_pts'] or 0)
|
||||||
|
a_pts = float(stats['a_form_pts'] or 0)
|
||||||
|
|
||||||
|
def fatigue(rest):
|
||||||
|
if rest < 3: return 0.85
|
||||||
|
if rest < 5: return 0.95
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
h_fat = fatigue(h_rest)
|
||||||
|
a_fat = fatigue(a_rest)
|
||||||
|
h_xg = h_home_goals * h_fat
|
||||||
|
a_xg = a_away_goals * a_fat
|
||||||
|
margin = (1/odds_dict['ms_h']) + (1/odds_dict['ms_d']) + (1/odds_dict['ms_a'])
|
||||||
|
|
||||||
|
features = pd.DataFrame([{
|
||||||
|
'elo_diff': h_elo - a_elo,
|
||||||
|
'h_xg': h_xg, 'a_xg': a_xg,
|
||||||
|
'total_xg': h_xg + a_xg,
|
||||||
|
'pow_diff': (h_elo/100)*h_fat - (a_elo/100)*a_fat,
|
||||||
|
'rest_diff': h_rest - a_rest,
|
||||||
|
'h_fatigue': h_fat, 'a_fatigue': a_fat,
|
||||||
|
'imp_h': (1/odds_dict['ms_h'])/margin,
|
||||||
|
'imp_d': (1/odds_dict['ms_d'])/margin,
|
||||||
|
'imp_a': (1/odds_dict['ms_a'])/margin,
|
||||||
|
'h_xi': h_xi, 'a_xi': a_xi,
|
||||||
|
'h2h_h_wr': h2h_h_wr,
|
||||||
|
'form_diff': h_pts - a_pts
|
||||||
|
}])
|
||||||
|
|
||||||
|
# --- TAHMİNLER ---
|
||||||
|
ms_probs = model_ms.predict(features)[0]
|
||||||
|
p_over = float(model_ou.predict(features)[0])
|
||||||
|
p_btts = float(model_btts.predict(features)[0])
|
||||||
|
|
||||||
|
# --- EN İYİ VALUE PICK ---
|
||||||
|
picks = []
|
||||||
|
for pick, prob, odd in zip(['1', 'X', '2'], ms_probs, [odds_dict['ms_h'], odds_dict['ms_d'], odds_dict['ms_a']]):
|
||||||
|
edge = prob - (1/odd)
|
||||||
|
if edge > 0.05 and prob > 0.45:
|
||||||
|
picks.append({"market": "MS", "pick": pick, "prob": prob, "odds": odd})
|
||||||
|
|
||||||
|
if p_over > 0.55: picks.append({"market": "OU2.5", "pick": "Over", "prob": p_over, "odds": odds_dict.get('ou25_o', 1.85)})
|
||||||
|
if p_btts > 0.55: picks.append({"market": "BTTS", "pick": "Var", "prob": p_btts, "odds": odds_dict.get('btts_y', 1.85)})
|
||||||
|
|
||||||
|
picks.sort(key=lambda x: (x['prob'] + max(0, x['prob'] - 1/x['odds'])*100), reverse=True)
|
||||||
|
best_pick = picks[0] if picks else None
|
||||||
|
|
||||||
|
# --- SONUÇ KONTROLÜ ---
|
||||||
|
res_str = "⏳ Oynanıyor/Bekleniyor"
|
||||||
|
won = None
|
||||||
|
h_score = row['score_home']
|
||||||
|
a_score = row['score_away']
|
||||||
|
|
||||||
|
if is_finished and h_score is not None and a_score is not None:
|
||||||
|
res_str = f"🏁 SONUÇ: {h_score}-{a_score}"
|
||||||
|
if best_pick:
|
||||||
|
p = best_pick['pick']
|
||||||
|
if p == '1': won = h_score > a_score
|
||||||
|
elif p == 'X': won = h_score == a_score
|
||||||
|
elif p == '2': won = a_score > h_score
|
||||||
|
elif p == 'Over': won = (h_score + a_score) > 2.5
|
||||||
|
elif p == 'Var': won = h_score > 0 and a_score > 0
|
||||||
|
|
||||||
|
res_str += " | " + ("✅ KAZANDI" if won else "❌ KAYBETTİ")
|
||||||
|
if won: total_profit += (best_pick['odds'] - 1.0)
|
||||||
|
else: total_profit -= 1.0
|
||||||
|
total_bet += 1
|
||||||
|
if won: total_won += 1
|
||||||
|
|
||||||
|
# Çıktı
|
||||||
|
match_time = time.strftime("%H:%M", time.gmtime(row['mst_utc']/1000))
|
||||||
|
pick_info = f"{best_pick['market']} - {best_pick['pick']} (%{best_pick['prob']*100:.0f} @ {best_pick['odds']:.2f})" if best_pick else "💤 Önerilen Bahis Yok"
|
||||||
|
|
||||||
|
print(f"\n⚽ [{match_time}] {home} vs {away} ({league})")
|
||||||
|
print(f" 🧠 Tahmin: {pick_info}")
|
||||||
|
print(f" {res_str}")
|
||||||
|
|
||||||
|
print("\n" + "="*60)
|
||||||
|
print("📊 GÜNLÜK ÖZET")
|
||||||
|
print("="*60)
|
||||||
|
if total_bet > 0:
|
||||||
|
print(f"🎲 Oynanan Bahis: {total_bet}")
|
||||||
|
print(f"✅ Kazanan: {total_won}")
|
||||||
|
print(f"💰 Toplam Kâr: {total_profit:.2f} Units")
|
||||||
|
print(f"📈 ROI: {(total_profit/total_bet)*100:.1f}%")
|
||||||
|
else:
|
||||||
|
print("📝 Bugün için Value Bahis bulunamadı veya maçlar bitmedi.")
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_live_predictions()
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.insert(0, AI_ENGINE_DIR)
|
||||||
|
|
||||||
|
from services.single_match_orchestrator import get_single_match_orchestrator
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Match ID needed.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
match_id = sys.argv[1].strip()
|
||||||
|
orch = get_single_match_orchestrator()
|
||||||
|
|
||||||
|
result = orch.analyze_match(match_id)
|
||||||
|
|
||||||
|
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||||
@@ -0,0 +1,317 @@
|
|||||||
|
"""
|
||||||
|
Strategy Generator — Senin Excel mantığını DB üzerinde otomatize eder.
|
||||||
|
|
||||||
|
Mantık:
|
||||||
|
1. Ev sahibi takım X, evinde oran bandı Y'de oynadığında → OU1.5/OU2.5/BTTS oranları
|
||||||
|
2. Deplasman takım Z, deplasmanda oran bandı W'de oynadığında → OU1.5/OU2.5/BTTS oranları
|
||||||
|
3. İkisi de yüksekse → STRATEJİ ÜRET
|
||||||
|
|
||||||
|
Çıktı: Her maç için hangi bahis oynanabilir, neden, ve geçmiş başarı oranı
|
||||||
|
"""
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# DB connection
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
host="localhost",
|
||||||
|
port=15432,
|
||||||
|
dbname="boilerplate_db",
|
||||||
|
user="suggestbet",
|
||||||
|
password="SuGGesT2026SecuRe"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
print(" STRATEGY GENERATOR — Veritabanından Strateji Üretimi")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# 1. Tüm biten maçları, takım adları ve MS oranlarıyla çek
|
||||||
|
query = """
|
||||||
|
SELECT
|
||||||
|
m.id as match_id,
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.league_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away,
|
||||||
|
m.mst_utc,
|
||||||
|
ht.name as home_team,
|
||||||
|
at.name as away_team,
|
||||||
|
l.name as league_name
|
||||||
|
FROM matches m
|
||||||
|
JOIN teams ht ON m.home_team_id = ht.id
|
||||||
|
JOIN teams at ON m.away_team_id = at.id
|
||||||
|
JOIN leagues l ON m.league_id = l.id
|
||||||
|
WHERE m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
ORDER BY m.mst_utc ASC
|
||||||
|
"""
|
||||||
|
df = pd.read_sql(query, conn)
|
||||||
|
print(f"\nToplam biten maç: {len(df):,}")
|
||||||
|
|
||||||
|
# 2. Tüm oranları çek (MS, OU25, BTTS, OU15)
|
||||||
|
odds_query = """
|
||||||
|
SELECT
|
||||||
|
oc.match_id,
|
||||||
|
oc.name as market,
|
||||||
|
os.name as selection,
|
||||||
|
CAST(os.odd_value AS DECIMAL) as odds
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.name IN (
|
||||||
|
'Maç Sonucu',
|
||||||
|
'2,5 Alt/Üst',
|
||||||
|
'1,5 Alt/Üst',
|
||||||
|
'3,5 Alt/Üst',
|
||||||
|
'Karşılıklı Gol'
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
odds_df = pd.read_sql(odds_query, conn)
|
||||||
|
print(f"Toplam oran kaydı: {len(odds_df):,}")
|
||||||
|
|
||||||
|
# Pivot: her maç için oranları sütunlara çevir
|
||||||
|
def get_odds(match_id, market, selection):
|
||||||
|
mask = (odds_df.match_id == match_id) & (odds_df.market == market) & (odds_df.selection == selection)
|
||||||
|
vals = odds_df.loc[mask, 'odds']
|
||||||
|
return float(vals.iloc[0]) if len(vals) > 0 else None
|
||||||
|
|
||||||
|
# Daha verimli: oran lookup dict oluştur
|
||||||
|
print("Oran lookup oluşturuluyor...")
|
||||||
|
odds_lookup = {}
|
||||||
|
for _, row in odds_df.iterrows():
|
||||||
|
key = (row.match_id, row.market, row.selection)
|
||||||
|
odds_lookup[key] = float(row.odds)
|
||||||
|
|
||||||
|
def get_o(mid, market, sel):
|
||||||
|
return odds_lookup.get((mid, market, sel))
|
||||||
|
|
||||||
|
# 3. Her maça oranları ekle
|
||||||
|
print("Maçlara oranlar ekleniyor...")
|
||||||
|
df['odds_ms_h'] = df.match_id.map(lambda x: get_o(x, 'Maç Sonucu', '1'))
|
||||||
|
df['odds_ms_a'] = df.match_id.map(lambda x: get_o(x, 'Maç Sonucu', '2'))
|
||||||
|
df['odds_ms_d'] = df.match_id.map(lambda x: get_o(x, 'Maç Sonucu', '0'))
|
||||||
|
df['odds_ou25_o'] = df.match_id.map(lambda x: get_o(x, '2,5 Alt/Üst', 'Üst'))
|
||||||
|
df['odds_ou25_u'] = df.match_id.map(lambda x: get_o(x, '2,5 Alt/Üst', 'Alt'))
|
||||||
|
df['odds_ou15_o'] = df.match_id.map(lambda x: get_o(x, '1,5 Alt/Üst', 'Üst'))
|
||||||
|
df['odds_ou15_u'] = df.match_id.map(lambda x: get_o(x, '1,5 Alt/Üst', 'Alt'))
|
||||||
|
df['odds_ou35_o'] = df.match_id.map(lambda x: get_o(x, '3,5 Alt/Üst', 'Üst'))
|
||||||
|
df['odds_ou35_u'] = df.match_id.map(lambda x: get_o(x, '3,5 Alt/Üst', 'Alt'))
|
||||||
|
df['odds_btts_y'] = df.match_id.map(lambda x: get_o(x, 'Karşılıklı Gol', 'Var'))
|
||||||
|
df['odds_btts_n'] = df.match_id.map(lambda x: get_o(x, 'Karşılıklı Gol', 'Yok'))
|
||||||
|
|
||||||
|
# Sonuç hesapla
|
||||||
|
df['total_goals'] = df.score_home + df.score_away
|
||||||
|
df['ou15'] = (df.total_goals > 1).astype(int)
|
||||||
|
df['ou25'] = (df.total_goals > 2).astype(int)
|
||||||
|
df['ou35'] = (df.total_goals > 3).astype(int)
|
||||||
|
df['btts'] = ((df.score_home > 0) & (df.score_away > 0)).astype(int)
|
||||||
|
|
||||||
|
print(f"Oranı olan maç sayısı: {df.odds_ms_h.notna().sum():,}")
|
||||||
|
|
||||||
|
# 4. ORAN BANDI fonksiyonu
|
||||||
|
def odds_band(odds):
|
||||||
|
if pd.isna(odds): return None
|
||||||
|
if odds < 1.30: return '1.00-1.30'
|
||||||
|
if odds < 1.50: return '1.30-1.50'
|
||||||
|
if odds < 1.80: return '1.50-1.80'
|
||||||
|
if odds < 2.20: return '1.80-2.20'
|
||||||
|
if odds < 2.80: return '2.20-2.80'
|
||||||
|
if odds < 4.00: return '2.80-4.00'
|
||||||
|
if odds < 6.00: return '4.00-6.00'
|
||||||
|
return '6.00+'
|
||||||
|
|
||||||
|
# 5. STRATEJİ: Expanding window — sadece geçmiş veriye bakarak tahmin
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print(" STRATEJİ BACKTEST — Expanding Window")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Ev sahibi geçmişi: {team_id: {odds_band: [ou15, ou25, btts, ou35, ...]}}
|
||||||
|
home_history = defaultdict(lambda: defaultdict(list))
|
||||||
|
away_history = defaultdict(lambda: defaultdict(list))
|
||||||
|
|
||||||
|
MIN_MATCHES = 8 # Minimum geçmiş maç sayısı
|
||||||
|
TEST_PCT = 0.30 # Son %30 test
|
||||||
|
N = len(df)
|
||||||
|
test_start = int(N * (1 - TEST_PCT))
|
||||||
|
|
||||||
|
results = {
|
||||||
|
'ou15_over': [], 'ou25_over': [], 'ou35_over': [],
|
||||||
|
'btts_yes': [], 'btts_no': [],
|
||||||
|
'ou25_under': [], 'ou15_under': [],
|
||||||
|
'ms_home': []
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in range(N):
|
||||||
|
row = df.iloc[i]
|
||||||
|
h_odds = row.odds_ms_h
|
||||||
|
a_odds = row.odds_ms_a
|
||||||
|
|
||||||
|
if pd.isna(h_odds) or pd.isna(a_odds):
|
||||||
|
continue
|
||||||
|
|
||||||
|
h_band = odds_band(h_odds)
|
||||||
|
a_band = odds_band(a_odds)
|
||||||
|
|
||||||
|
# TEST: sadece test bölümünde bahis yap
|
||||||
|
if i >= test_start:
|
||||||
|
h_hist = home_history[row.home_team_id][h_band]
|
||||||
|
a_hist = away_history[row.away_team_id][a_band]
|
||||||
|
|
||||||
|
if len(h_hist) >= MIN_MATCHES and len(a_hist) >= MIN_MATCHES:
|
||||||
|
# Ev sahibi bu oran bandında ne yapmış?
|
||||||
|
h_ou15 = np.mean([x[0] for x in h_hist])
|
||||||
|
h_ou25 = np.mean([x[1] for x in h_hist])
|
||||||
|
h_ou35 = np.mean([x[2] for x in h_hist])
|
||||||
|
h_btts = np.mean([x[3] for x in h_hist])
|
||||||
|
h_win = np.mean([x[4] for x in h_hist])
|
||||||
|
|
||||||
|
# Deplasman bu oran bandında ne yapmış?
|
||||||
|
a_ou15 = np.mean([x[0] for x in a_hist])
|
||||||
|
a_ou25 = np.mean([x[1] for x in a_hist])
|
||||||
|
a_ou35 = np.mean([x[2] for x in a_hist])
|
||||||
|
a_btts = np.mean([x[3] for x in a_hist])
|
||||||
|
a_loss = np.mean([x[4] for x in a_hist]) # deplasman kaybetme oranı
|
||||||
|
|
||||||
|
# KOMBİNE SİNYAL
|
||||||
|
sig_ou15 = (h_ou15 + a_ou15) / 2
|
||||||
|
sig_ou25 = (h_ou25 + a_ou25) / 2
|
||||||
|
sig_ou35 = (h_ou35 + a_ou35) / 2
|
||||||
|
sig_btts = (h_btts + a_btts) / 2
|
||||||
|
sig_hw = (h_win + a_loss) / 2 # ev kazanma + deplasman kaybetme
|
||||||
|
|
||||||
|
base = {
|
||||||
|
'match': f"{row.home_team} vs {row.away_team}",
|
||||||
|
'league': row.league_name,
|
||||||
|
'home_team': row.home_team,
|
||||||
|
'away_team': row.away_team,
|
||||||
|
'h_band': h_band,
|
||||||
|
'a_band': a_band,
|
||||||
|
'h_n': len(h_hist),
|
||||||
|
'a_n': len(a_hist),
|
||||||
|
}
|
||||||
|
|
||||||
|
# OU 1.5 OVER
|
||||||
|
if sig_ou15 >= 0.85 and row.odds_ou15_o and row.odds_ou15_o > 1.01:
|
||||||
|
results['ou15_over'].append({
|
||||||
|
**base, 'signal': sig_ou15, 'odds': row.odds_ou15_o,
|
||||||
|
'won': row.ou15 == 1, 'actual_goals': row.total_goals,
|
||||||
|
'h_sig': h_ou15, 'a_sig': a_ou15
|
||||||
|
})
|
||||||
|
|
||||||
|
# OU 2.5 OVER
|
||||||
|
if sig_ou25 >= 0.70 and row.odds_ou25_o and row.odds_ou25_o > 1.10:
|
||||||
|
results['ou25_over'].append({
|
||||||
|
**base, 'signal': sig_ou25, 'odds': row.odds_ou25_o,
|
||||||
|
'won': row.ou25 == 1, 'actual_goals': row.total_goals,
|
||||||
|
'h_sig': h_ou25, 'a_sig': a_ou25
|
||||||
|
})
|
||||||
|
|
||||||
|
# OU 3.5 OVER
|
||||||
|
if sig_ou35 >= 0.60 and row.odds_ou35_o and row.odds_ou35_o > 1.20:
|
||||||
|
results['ou35_over'].append({
|
||||||
|
**base, 'signal': sig_ou35, 'odds': row.odds_ou35_o,
|
||||||
|
'won': row.ou35 == 1, 'actual_goals': row.total_goals,
|
||||||
|
'h_sig': h_ou35, 'a_sig': a_ou35
|
||||||
|
})
|
||||||
|
|
||||||
|
# BTTS YES
|
||||||
|
if sig_btts >= 0.70 and row.odds_btts_y and row.odds_btts_y > 1.10:
|
||||||
|
results['btts_yes'].append({
|
||||||
|
**base, 'signal': sig_btts, 'odds': row.odds_btts_y,
|
||||||
|
'won': row.btts == 1, 'actual_goals': row.total_goals,
|
||||||
|
'h_sig': h_btts, 'a_sig': a_btts
|
||||||
|
})
|
||||||
|
|
||||||
|
# OU 2.5 UNDER (düşük gol beklentisi)
|
||||||
|
if sig_ou25 <= 0.30 and row.odds_ou25_u and row.odds_ou25_u > 1.10:
|
||||||
|
results['ou25_under'].append({
|
||||||
|
**base, 'signal': 1-sig_ou25, 'odds': row.odds_ou25_u,
|
||||||
|
'won': row.ou25 == 0, 'actual_goals': row.total_goals,
|
||||||
|
'h_sig': 1-h_ou25, 'a_sig': 1-a_ou25
|
||||||
|
})
|
||||||
|
|
||||||
|
# MS HOME WIN (ev sahibi kazanma)
|
||||||
|
if sig_hw >= 0.75 and row.odds_ms_h and 1.10 < row.odds_ms_h < 3.50:
|
||||||
|
results['ms_home'].append({
|
||||||
|
**base, 'signal': sig_hw, 'odds': row.odds_ms_h,
|
||||||
|
'won': row.score_home > row.score_away,
|
||||||
|
'actual_goals': row.total_goals,
|
||||||
|
'h_sig': h_win, 'a_sig': a_loss
|
||||||
|
})
|
||||||
|
|
||||||
|
# History güncelle (her zaman)
|
||||||
|
home_history[row.home_team_id][h_band].append((
|
||||||
|
row.ou15, row.ou25, row.ou35, row.btts,
|
||||||
|
int(row.score_home > row.score_away)
|
||||||
|
))
|
||||||
|
away_history[row.away_team_id][a_band].append((
|
||||||
|
row.ou15, row.ou25, row.ou35, row.btts,
|
||||||
|
int(row.score_away < row.score_home) # deplasman kaybetme
|
||||||
|
))
|
||||||
|
|
||||||
|
# 6. SONUÇLARI YAZIDIR
|
||||||
|
print(f"\nTest bölümü: son {TEST_PCT*100:.0f}% ({N - test_start:,} maç)")
|
||||||
|
print(f"Minimum geçmiş: {MIN_MATCHES} maç\n")
|
||||||
|
|
||||||
|
for market_name, bets in results.items():
|
||||||
|
if not bets:
|
||||||
|
print(f"\n {market_name}: sinyal yok")
|
||||||
|
continue
|
||||||
|
|
||||||
|
bdf = pd.DataFrame(bets)
|
||||||
|
total = len(bdf)
|
||||||
|
wins = bdf.won.sum()
|
||||||
|
hit = wins / total * 100
|
||||||
|
pnl = (bdf.won * (bdf.odds - 1) - (~bdf.won) * 1).sum()
|
||||||
|
roi = pnl / total * 100
|
||||||
|
avg_odds = bdf.odds.mean()
|
||||||
|
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f" {market_name.upper()}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
print(f" Toplam bahis: {total}")
|
||||||
|
print(f" Kazanan: {wins} ({hit:.1f}%)")
|
||||||
|
print(f" Ortalama odds: {avg_odds:.2f}")
|
||||||
|
print(f" PnL: {pnl:+.1f} birim")
|
||||||
|
print(f" ROI: {roi:+.1f}%")
|
||||||
|
|
||||||
|
# Farklı sinyal eşiklerinde performans
|
||||||
|
print(f"\n Sinyal eşik analizi:")
|
||||||
|
for threshold in [0.70, 0.75, 0.80, 0.85, 0.90, 0.95]:
|
||||||
|
sub = bdf[bdf.signal >= threshold]
|
||||||
|
if len(sub) < 5: continue
|
||||||
|
w = sub.won.sum()
|
||||||
|
p = (sub.won * (sub.odds - 1) - (~sub.won) * 1).sum()
|
||||||
|
r = p / len(sub) * 100
|
||||||
|
star = ' ✅ PROFIT' if r > 0 else (' ⚖️ BE' if r > -3 else '')
|
||||||
|
print(f" ≥{threshold:.2f}: {len(sub):5d} bahis, hit={w/len(sub)*100:.1f}%, ROI={r:+.1f}%{star}")
|
||||||
|
|
||||||
|
# En iyi 10 örnek (kazanan)
|
||||||
|
if wins > 0:
|
||||||
|
best = bdf[bdf.won].nlargest(min(5, wins), 'signal')
|
||||||
|
print(f"\n Örnek kazanan bahisler:")
|
||||||
|
for _, b in best.iterrows():
|
||||||
|
print(f" {b.home_team} vs {b.away_team} ({b.league})")
|
||||||
|
print(f" Ev {b.h_band} ({b.h_sig:.0%}) + Dep {b.a_band} ({b.a_sig:.0%}) → sinyal={b.signal:.0%}, odds={b.odds:.2f}, gol={b.actual_goals:.0f}")
|
||||||
|
|
||||||
|
# 7. ÖZET TABLO
|
||||||
|
print("\n\n" + "=" * 70)
|
||||||
|
print(" ÖZET TABLO")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"{'Market':<15} {'Bahis':>6} {'Hit':>7} {'ROI':>8} {'Avg Odds':>9}")
|
||||||
|
print("-" * 50)
|
||||||
|
for market_name, bets in results.items():
|
||||||
|
if not bets: continue
|
||||||
|
bdf = pd.DataFrame(bets)
|
||||||
|
total = len(bdf)
|
||||||
|
wins = bdf.won.sum()
|
||||||
|
hit = wins / total * 100
|
||||||
|
pnl = (bdf.won * (bdf.odds - 1) - (~bdf.won) * 1).sum()
|
||||||
|
roi = pnl / total * 100
|
||||||
|
avg_odds = bdf.odds.mean()
|
||||||
|
print(f"{market_name:<15} {total:>6} {hit:>6.1f}% {roi:>+7.1f}% {avg_odds:>8.2f}")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print("\n✅ Tamamlandı!")
|
||||||
@@ -0,0 +1,188 @@
|
|||||||
|
"""
|
||||||
|
XGBoost Model Training (Advanced Basketball V21)
|
||||||
|
================================================
|
||||||
|
Trains XGBoost models for Match Winner (ML), Totals (O/U), and Spread.
|
||||||
|
Builds upon 60+ deep tactical features (Rebounds, FG%, Q1/Q2 pacing, advanced odds).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/train_advanced_basketball.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import xgboost as xgb
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.insert(0, AI_ENGINE_DIR)
|
||||||
|
|
||||||
|
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "advanced_basketball_training_data.csv")
|
||||||
|
MODEL_DIR = os.path.join(AI_ENGINE_DIR, "models", "bin")
|
||||||
|
|
||||||
|
os.makedirs(MODEL_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Deep Statistical Feature Matrix (54 Features)
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
FEATURES = [
|
||||||
|
# Form
|
||||||
|
"home_winning_streak", "away_winning_streak",
|
||||||
|
"home_win_rate", "away_win_rate",
|
||||||
|
|
||||||
|
# Home Team Offense
|
||||||
|
"home_pts_avg", "home_reb_avg", "home_ast_avg", "home_stl_avg", "home_blk_avg", "home_tov_avg",
|
||||||
|
"home_fg_pct", "home_3pt_pct", "home_ft_pct",
|
||||||
|
"home_q1_avg", "home_q2_avg", "home_q3_avg", "home_q4_avg",
|
||||||
|
|
||||||
|
# Home Team Defense
|
||||||
|
"home_conc_pts", "home_conc_reb", "home_conc_ast", "home_conc_tov",
|
||||||
|
"home_conc_fg_pct", "home_conc_3pt_pct",
|
||||||
|
|
||||||
|
# Away Team Offense
|
||||||
|
"away_pts_avg", "away_reb_avg", "away_ast_avg", "away_stl_avg", "away_blk_avg", "away_tov_avg",
|
||||||
|
"away_fg_pct", "away_3pt_pct", "away_ft_pct",
|
||||||
|
"away_q1_avg", "away_q2_avg", "away_q3_avg", "away_q4_avg",
|
||||||
|
|
||||||
|
# Away Team Defense
|
||||||
|
"away_conc_pts", "away_conc_reb", "away_conc_ast", "away_conc_tov",
|
||||||
|
"away_conc_fg_pct", "away_conc_3pt_pct",
|
||||||
|
|
||||||
|
# H2H Features
|
||||||
|
"h2h_total_matches", "h2h_home_win_rate",
|
||||||
|
"h2h_avg_points", "h2h_over140_rate",
|
||||||
|
|
||||||
|
# Odds Features
|
||||||
|
"odds_ml_h", "odds_ml_a",
|
||||||
|
"odds_tot_o", "odds_tot_u", "odds_tot_line",
|
||||||
|
"odds_spread_h", "odds_spread_a", "odds_spread_line",
|
||||||
|
]
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Core Training Function
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
def train_model(df, target_col, model_name, params=None):
|
||||||
|
print(f"\n--- Training {model_name} ---")
|
||||||
|
|
||||||
|
# For Totals and Spread we need to drop purely empty lines if odds aren't matched
|
||||||
|
if target_col in ["label_tot", "label_spread"]:
|
||||||
|
# If line implies 0 and wasn't populated heavily, we may want to skip
|
||||||
|
if target_col == "label_tot":
|
||||||
|
df_filtered = df[(df["odds_tot_line"] > 50) & (df["odds_tot_line"] < 300)].copy()
|
||||||
|
elif target_col == "label_spread":
|
||||||
|
df_filtered = df[(abs(df["odds_spread_line"]) > 0.0) | (df["odds_spread_h"] != 1.9)].copy()
|
||||||
|
else:
|
||||||
|
df_filtered = df.copy()
|
||||||
|
|
||||||
|
X = df_filtered[FEATURES]
|
||||||
|
y = df_filtered[target_col]
|
||||||
|
|
||||||
|
print(f"Data Shape: {X.shape}")
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
|
||||||
|
|
||||||
|
# Defaults for XGBoost
|
||||||
|
if params is None:
|
||||||
|
params = {
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'eval_metric': 'logloss',
|
||||||
|
'max_depth': 6,
|
||||||
|
'learning_rate': 0.05,
|
||||||
|
'n_estimators': 300,
|
||||||
|
'subsample': 0.8,
|
||||||
|
'colsample_bytree': 0.8,
|
||||||
|
'random_state': 42
|
||||||
|
}
|
||||||
|
|
||||||
|
clf = xgb.XGBClassifier(**params)
|
||||||
|
clf.fit(
|
||||||
|
X_train, y_train,
|
||||||
|
eval_set=[(X_train, y_train), (X_test, y_test)],
|
||||||
|
verbose=50
|
||||||
|
)
|
||||||
|
|
||||||
|
y_pred = clf.predict(X_test)
|
||||||
|
|
||||||
|
acc = accuracy_score(y_test, y_pred)
|
||||||
|
prec = precision_score(y_test, y_pred, zero_division=0)
|
||||||
|
rec = recall_score(y_test, y_pred, zero_division=0)
|
||||||
|
|
||||||
|
print(f"\n[{model_name}] Metrics:")
|
||||||
|
print(f"Accuracy : {acc:.4f}")
|
||||||
|
if len(np.unique(y_train)) == 2:
|
||||||
|
print(f"Precision: {prec:.4f}")
|
||||||
|
print(f"Recall : {rec:.4f}")
|
||||||
|
|
||||||
|
# Display Top 10 Feature Importances
|
||||||
|
importances = clf.feature_importances_
|
||||||
|
sorted_idx = np.argsort(importances)[::-1]
|
||||||
|
print("\nTop 10 Feature Importances:")
|
||||||
|
for i in range(10):
|
||||||
|
print(f" {i+1}. {FEATURES[sorted_idx[i]]}: {importances[sorted_idx[i]]:.4f}")
|
||||||
|
|
||||||
|
# Save
|
||||||
|
save_path = os.path.join(MODEL_DIR, f"{model_name}.json")
|
||||||
|
clf.save_model(save_path)
|
||||||
|
print(f"Saved to: {save_path}")
|
||||||
|
return clf
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if not os.path.exists(DATA_PATH):
|
||||||
|
print(f"ERROR: Training data not found at {DATA_PATH}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"Loading data from {DATA_PATH}")
|
||||||
|
df = pd.read_csv(DATA_PATH)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 1. Match Winner (Moneyline)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
ml_params = {
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'eval_metric': 'logloss',
|
||||||
|
'max_depth': 5,
|
||||||
|
'learning_rate': 0.03,
|
||||||
|
'n_estimators': 250,
|
||||||
|
'subsample': 0.85,
|
||||||
|
'colsample_bytree': 0.8,
|
||||||
|
'random_state': 42
|
||||||
|
}
|
||||||
|
train_model(df, "label_ml", "basketball_v21_ml", ml_params)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 2. Match Totals (Over / Under)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Finding O/U against dynamic line needs complex relationships
|
||||||
|
tot_params = {
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'eval_metric': 'logloss',
|
||||||
|
'max_depth': 6,
|
||||||
|
'learning_rate': 0.05,
|
||||||
|
'n_estimators': 350,
|
||||||
|
'subsample': 0.8,
|
||||||
|
'colsample_bytree': 0.8,
|
||||||
|
'random_state': 42
|
||||||
|
}
|
||||||
|
train_model(df, "label_tot", "basketball_v21_tot", tot_params)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 3. Spread (Handicap Cover)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
spread_params = {
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'eval_metric': 'logloss',
|
||||||
|
'max_depth': 6,
|
||||||
|
'learning_rate': 0.04,
|
||||||
|
'n_estimators': 300,
|
||||||
|
'subsample': 0.8,
|
||||||
|
'colsample_bytree': 0.8,
|
||||||
|
'random_state': 42
|
||||||
|
}
|
||||||
|
train_model(df, "label_spread", "basketball_v21_spread", spread_params)
|
||||||
|
|
||||||
|
print("\n🏁 Advanced V21 Basketball Models trained successfully.")
|
||||||
@@ -0,0 +1,135 @@
|
|||||||
|
"""
|
||||||
|
XGBoost Market Model Trainer (Basketball)
|
||||||
|
=========================================
|
||||||
|
Trains specialized XGBoost models for basketball betting markets.
|
||||||
|
Models:
|
||||||
|
1. ML (Match Result) - Binary (Home Win / Away Win)
|
||||||
|
2. Totals (Over/Under) - Binary (Over / Under dynamic line)
|
||||||
|
3. Spread (Handicap) - Binary (Home Cover / Away Cover)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/train_basketball_markets.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pickle
|
||||||
|
import pandas as pd
|
||||||
|
import xgboost as xgb
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
|
||||||
|
|
||||||
|
# Config
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "basketball_training_data.csv")
|
||||||
|
MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "xgboost", "basketball")
|
||||||
|
|
||||||
|
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# Feature Columns
|
||||||
|
FEATURES = [
|
||||||
|
# Form
|
||||||
|
"home_points_avg", "home_conceded_avg",
|
||||||
|
"away_points_avg", "away_conceded_avg",
|
||||||
|
"home_winning_streak", "away_winning_streak",
|
||||||
|
"home_win_rate", "away_win_rate",
|
||||||
|
|
||||||
|
# H2H
|
||||||
|
"h2h_total_matches", "h2h_home_win_rate",
|
||||||
|
"h2h_avg_points", "h2h_over140_rate",
|
||||||
|
|
||||||
|
# Odds
|
||||||
|
"odds_ml_h", "odds_ml_a",
|
||||||
|
"odds_tot_o", "odds_tot_u", "odds_tot_line",
|
||||||
|
"odds_spread_h", "odds_spread_a", "odds_spread_line"
|
||||||
|
]
|
||||||
|
|
||||||
|
def load_data():
|
||||||
|
if not os.path.exists(DATA_PATH):
|
||||||
|
print(f"❌ Data file not found: {DATA_PATH}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"📦 Loading data from {DATA_PATH}...")
|
||||||
|
df = pd.read_csv(DATA_PATH)
|
||||||
|
df.fillna(0, inplace=True)
|
||||||
|
print(f" Shape: {df.shape}")
|
||||||
|
return df
|
||||||
|
|
||||||
|
def train_binary_model(df, target_col, model_name):
|
||||||
|
"""Generic trainer for Binary XGBoost models (ML, Totals, Spread)."""
|
||||||
|
print(f"\n🚀 Training {model_name} (Target: {target_col})...")
|
||||||
|
|
||||||
|
valid_df = df[df[target_col].notna()].copy()
|
||||||
|
if valid_df.empty:
|
||||||
|
print(f" ⚠️ No valid data for {target_col}, skipping.")
|
||||||
|
return
|
||||||
|
|
||||||
|
X = valid_df[FEATURES]
|
||||||
|
y = valid_df[target_col].astype(int)
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||||||
|
X, y, test_size=0.2, random_state=42, stratify=y
|
||||||
|
)
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'eval_metric': 'logloss',
|
||||||
|
'eta': 0.05,
|
||||||
|
'max_depth': 6,
|
||||||
|
'subsample': 0.8,
|
||||||
|
'colsample_bytree': 0.8,
|
||||||
|
'nthread': 4,
|
||||||
|
'seed': 42
|
||||||
|
}
|
||||||
|
|
||||||
|
model = xgb.XGBClassifier(**params, n_estimators=1000, early_stopping_rounds=50)
|
||||||
|
|
||||||
|
model.fit(
|
||||||
|
X_train, y_train,
|
||||||
|
eval_set=[(X_test, y_test)],
|
||||||
|
verbose=False
|
||||||
|
)
|
||||||
|
|
||||||
|
y_pred = model.predict(X_test)
|
||||||
|
y_prob = model.predict_proba(X_test)[:, 1]
|
||||||
|
|
||||||
|
acc = accuracy_score(y_test, y_pred)
|
||||||
|
try:
|
||||||
|
auc = roc_auc_score(y_test, y_prob)
|
||||||
|
except:
|
||||||
|
auc = 0.0
|
||||||
|
|
||||||
|
print(f" ✅ Finished! Best Iteration: {model.best_iteration}")
|
||||||
|
print(f" 📊 Accuracy: {acc:.4f} | ROC AUC: {auc:.4f}")
|
||||||
|
print(classification_report(y_test, y_pred, zero_division=0))
|
||||||
|
|
||||||
|
# Save Model
|
||||||
|
model_path = os.path.join(MODELS_DIR, f"{model_name}.pkl")
|
||||||
|
with open(model_path, "wb") as f:
|
||||||
|
pickle.dump(model, f)
|
||||||
|
print(f" 💾 Saved to {model_path}")
|
||||||
|
|
||||||
|
# Save Top Features
|
||||||
|
try:
|
||||||
|
booster = model.get_booster()
|
||||||
|
importance = booster.get_score(importance_type="gain")
|
||||||
|
sorted_imp = sorted(importance.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
|
print(" 🔍 Top 5 Features (Gain):")
|
||||||
|
for ft, score in sorted_imp:
|
||||||
|
print(f" - {ft}: {score:.2f}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ⚠️ Could not extract feature importance: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
df = load_data()
|
||||||
|
|
||||||
|
# 1. Moneyline (ML) Model -> Targets Home Win (0) vs Away Win (1)
|
||||||
|
train_binary_model(df, "label_ml", "basketball_ml_v1")
|
||||||
|
|
||||||
|
# 2. Totals (Over/Under) Model -> Targets Under (0) vs Over (1) against 'odds_tot_line'
|
||||||
|
train_binary_model(df, "label_tot", "basketball_tot_v1")
|
||||||
|
|
||||||
|
# 3. Spread (Handicap) Model -> Targets Away Cover (0) vs Home Cover (1) against 'odds_spread_line'
|
||||||
|
train_binary_model(df, "label_spread", "basketball_spread_v1")
|
||||||
|
|
||||||
|
print("\n🎉 All Basketball Models Trained Successfully!")
|
||||||
@@ -0,0 +1,204 @@
|
|||||||
|
"""
|
||||||
|
Train basketball V25-style market models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
|
import lightgbm as lgb
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import xgboost as xgb
|
||||||
|
from sklearn.metrics import accuracy_score, classification_report, log_loss
|
||||||
|
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.insert(0, AI_ENGINE_DIR)
|
||||||
|
|
||||||
|
from models.basketball_v25_features import DEFAULT_FEATURE_COLS
|
||||||
|
|
||||||
|
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "basketball_training_data_v25.csv")
|
||||||
|
MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "basketball_v25")
|
||||||
|
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports", "training_basketball_v25")
|
||||||
|
|
||||||
|
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||||
|
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
MARKETS = [
|
||||||
|
{"target": "label_ml", "name": "ml"},
|
||||||
|
{"target": "label_total", "name": "total"},
|
||||||
|
{"target": "label_spread", "name": "spread"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def load_data() -> pd.DataFrame:
|
||||||
|
if not os.path.exists(DATA_PATH):
|
||||||
|
raise FileNotFoundError(DATA_PATH)
|
||||||
|
frame = pd.read_csv(DATA_PATH)
|
||||||
|
for col in DEFAULT_FEATURE_COLS:
|
||||||
|
if col not in frame.columns:
|
||||||
|
frame[col] = 0.0
|
||||||
|
frame[DEFAULT_FEATURE_COLS] = frame[DEFAULT_FEATURE_COLS].fillna(0.0)
|
||||||
|
return frame
|
||||||
|
|
||||||
|
|
||||||
|
def temporal_split(frame: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
||||||
|
ordered = frame.sort_values("mst_utc").reset_index(drop=True)
|
||||||
|
size = len(ordered)
|
||||||
|
train_end = max(int(size * 0.70), 1)
|
||||||
|
val_end = max(int(size * 0.85), train_end + 1)
|
||||||
|
val_end = min(val_end, size - 1)
|
||||||
|
return (
|
||||||
|
ordered.iloc[:train_end].copy(),
|
||||||
|
ordered.iloc[train_end:val_end].copy(),
|
||||||
|
ordered.iloc[val_end:].copy(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def train_xgb(X_train, y_train, X_val, y_val):
|
||||||
|
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||||
|
dval = xgb.DMatrix(X_val, label=y_val)
|
||||||
|
params = {
|
||||||
|
"objective": "binary:logistic",
|
||||||
|
"eval_metric": "logloss",
|
||||||
|
"max_depth": 6,
|
||||||
|
"eta": 0.04,
|
||||||
|
"subsample": 0.84,
|
||||||
|
"colsample_bytree": 0.82,
|
||||||
|
"min_child_weight": 4,
|
||||||
|
"gamma": 0.08,
|
||||||
|
"n_jobs": 4,
|
||||||
|
"random_state": 42,
|
||||||
|
}
|
||||||
|
return xgb.train(
|
||||||
|
params,
|
||||||
|
dtrain,
|
||||||
|
num_boost_round=1200,
|
||||||
|
evals=[(dtrain, "train"), (dval, "val")],
|
||||||
|
early_stopping_rounds=60,
|
||||||
|
verbose_eval=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def train_lgb(X_train, y_train, X_val, y_val):
|
||||||
|
train_data = lgb.Dataset(X_train, label=y_train)
|
||||||
|
val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)
|
||||||
|
params = {
|
||||||
|
"objective": "binary",
|
||||||
|
"metric": "binary_logloss",
|
||||||
|
"learning_rate": 0.04,
|
||||||
|
"max_depth": 6,
|
||||||
|
"feature_fraction": 0.82,
|
||||||
|
"bagging_fraction": 0.84,
|
||||||
|
"bagging_freq": 5,
|
||||||
|
"min_child_samples": 24,
|
||||||
|
"n_jobs": 4,
|
||||||
|
"seed": 42,
|
||||||
|
"verbose": -1,
|
||||||
|
}
|
||||||
|
return lgb.train(
|
||||||
|
params,
|
||||||
|
train_data,
|
||||||
|
num_boost_round=1200,
|
||||||
|
valid_sets=[train_data, val_data],
|
||||||
|
valid_names=["train", "val"],
|
||||||
|
callbacks=[
|
||||||
|
lgb.early_stopping(stopping_rounds=60),
|
||||||
|
lgb.log_evaluation(period=100),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_binary(model: Any, X_test, y_test, model_type: str) -> Tuple[np.ndarray, Dict[str, float]]:
|
||||||
|
if model_type == "xgb":
|
||||||
|
probs = model.predict(xgb.DMatrix(X_test))
|
||||||
|
else:
|
||||||
|
probs = model.predict(X_test, num_iteration=model.best_iteration)
|
||||||
|
probs = np.asarray(probs, dtype=float)
|
||||||
|
probs = np.clip(probs, 1e-6, 1.0 - 1e-6)
|
||||||
|
preds = (probs >= 0.5).astype(int)
|
||||||
|
metrics = {
|
||||||
|
"accuracy": round(float(accuracy_score(y_test, preds)), 4),
|
||||||
|
"logloss": round(float(log_loss(y_test, probs)), 4),
|
||||||
|
}
|
||||||
|
print(classification_report(y_test, preds, zero_division=0))
|
||||||
|
return probs, metrics
|
||||||
|
|
||||||
|
|
||||||
|
def train_market(frame: pd.DataFrame, market_name: str, target_col: str) -> Dict[str, Any]:
|
||||||
|
valid = frame[frame[target_col].notna()].copy()
|
||||||
|
if len(valid) < 400:
|
||||||
|
return {"skipped": True, "reason": "not_enough_samples", "samples": int(len(valid))}
|
||||||
|
|
||||||
|
train_df, val_df, test_df = temporal_split(valid)
|
||||||
|
X_train = train_df[DEFAULT_FEATURE_COLS].values
|
||||||
|
y_train = train_df[target_col].astype(int).values
|
||||||
|
X_val = val_df[DEFAULT_FEATURE_COLS].values
|
||||||
|
y_val = val_df[target_col].astype(int).values
|
||||||
|
X_test = test_df[DEFAULT_FEATURE_COLS].values
|
||||||
|
y_test = test_df[target_col].astype(int).values
|
||||||
|
|
||||||
|
print(f"\n[MARKET] {market_name.upper()} samples={len(valid)}")
|
||||||
|
xgb_model = train_xgb(X_train, y_train, X_val, y_val)
|
||||||
|
lgb_model = train_lgb(X_train, y_train, X_val, y_val)
|
||||||
|
|
||||||
|
xgb_probs, xgb_metrics = evaluate_binary(xgb_model, X_test, y_test, "xgb")
|
||||||
|
lgb_probs, lgb_metrics = evaluate_binary(lgb_model, X_test, y_test, "lgb")
|
||||||
|
|
||||||
|
ensemble_probs = np.clip((xgb_probs + lgb_probs) / 2.0, 1e-6, 1.0 - 1e-6)
|
||||||
|
ensemble_preds = (ensemble_probs >= 0.5).astype(int)
|
||||||
|
ensemble_metrics = {
|
||||||
|
"accuracy": round(float(accuracy_score(y_test, ensemble_preds)), 4),
|
||||||
|
"logloss": round(float(log_loss(y_test, ensemble_probs)), 4),
|
||||||
|
}
|
||||||
|
|
||||||
|
xgb_path = os.path.join(MODELS_DIR, f"xgb_basketball_v25_{market_name}.json")
|
||||||
|
lgb_path = os.path.join(MODELS_DIR, f"lgb_basketball_v25_{market_name}.txt")
|
||||||
|
xgb_model.save_model(xgb_path)
|
||||||
|
lgb_model.save_model(lgb_path)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"skipped": False,
|
||||||
|
"samples": int(len(valid)),
|
||||||
|
"train_samples": int(len(train_df)),
|
||||||
|
"val_samples": int(len(val_df)),
|
||||||
|
"test_samples": int(len(test_df)),
|
||||||
|
"xgb": xgb_metrics,
|
||||||
|
"lgb": lgb_metrics,
|
||||||
|
"ensemble": ensemble_metrics,
|
||||||
|
"xgb_path": xgb_path,
|
||||||
|
"lgb_path": lgb_path,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
print("[INFO] training basketball_v25 started", flush=True)
|
||||||
|
frame = load_data()
|
||||||
|
report: Dict[str, Any] = {
|
||||||
|
"trained_at": datetime.utcnow().isoformat() + "Z",
|
||||||
|
"rows": int(len(frame)),
|
||||||
|
"markets": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
for market in MARKETS:
|
||||||
|
report["markets"][market["name"]] = train_market(frame, market["name"], market["target"])
|
||||||
|
|
||||||
|
feature_path = os.path.join(MODELS_DIR, "feature_cols.json")
|
||||||
|
with open(feature_path, "w", encoding="utf-8") as handle:
|
||||||
|
json.dump(DEFAULT_FEATURE_COLS, handle, indent=2)
|
||||||
|
|
||||||
|
report_path = os.path.join(REPORTS_DIR, "basketball_v25_market_metrics.json")
|
||||||
|
with open(report_path, "w", encoding="utf-8") as handle:
|
||||||
|
json.dump(report, handle, indent=2)
|
||||||
|
|
||||||
|
print(f"[OK] feature_cols={feature_path}", flush=True)
|
||||||
|
print(f"[OK] report={report_path}", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
@@ -0,0 +1,423 @@
|
|||||||
|
"""
|
||||||
|
Calibration Training Script
|
||||||
|
===========================
|
||||||
|
Trains Isotonic Regression calibration models for all betting markets.
|
||||||
|
|
||||||
|
This script:
|
||||||
|
1. Fetches historical match data with predictions and actual results
|
||||||
|
2. Trains Isotonic Regression models for each market
|
||||||
|
3. Calculates calibration metrics (Brier Score, ECE)
|
||||||
|
4. Saves models to ai-engine/models/calibration/
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Train on last 90 days of data
|
||||||
|
python3 ai-engine/scripts/train_calibration.py
|
||||||
|
|
||||||
|
# Train on specific date range
|
||||||
|
python3 ai-engine/scripts/train_calibration.py --start 2026-01-01 --end 2026-02-15
|
||||||
|
|
||||||
|
# Train only specific markets
|
||||||
|
python3 ai-engine/scripts/train_calibration.py --markets ou25 btts ms_home
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from typing import Dict, List, Tuple, Any, Optional
|
||||||
|
|
||||||
|
# Setup path for ai-engine imports
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.insert(0, AI_ENGINE_DIR)
|
||||||
|
|
||||||
|
from models.calibration import get_calibrator, SUPPORTED_MARKETS
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CONFIG
|
||||||
|
# =============================================================================
|
||||||
|
TOP_LEAGUES_PATH = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(AI_ENGINE_DIR)),
|
||||||
|
"top_leagues.json"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Default: last 90 days
|
||||||
|
DEFAULT_START_DATE = (datetime.utcnow() - timedelta(days=90)).strftime("%Y-%m-%d")
|
||||||
|
DEFAULT_END_DATE = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DB CONNECTION
|
||||||
|
# =============================================================================
|
||||||
|
def get_conn():
|
||||||
|
"""Get PostgreSQL connection."""
|
||||||
|
db_url = os.getenv("DATABASE_URL")
|
||||||
|
if not db_url:
|
||||||
|
raise ValueError("DATABASE_URL not set")
|
||||||
|
if "?schema=" in db_url:
|
||||||
|
db_url = db_url.split("?schema=")[0]
|
||||||
|
return psycopg2.connect(db_url)
|
||||||
|
|
||||||
|
|
||||||
|
def load_top_league_ids() -> List[str]:
|
||||||
|
"""Load top league IDs from JSON file."""
|
||||||
|
if not os.path.exists(TOP_LEAGUES_PATH):
|
||||||
|
print(f"[Warning] top_leagues.json not found at {TOP_LEAGUES_PATH}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
with open(TOP_LEAGUES_PATH, "r") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
# Handle both list and dict formats
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return data.get("football", [])
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DATA EXTRACTION
|
||||||
|
# =============================================================================
|
||||||
|
def fetch_training_data(
|
||||||
|
cur,
|
||||||
|
start_date: str,
|
||||||
|
end_date: str,
|
||||||
|
league_ids: List[str] = None,
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Fetch match data with odds and results for calibration training.
|
||||||
|
|
||||||
|
Returns DataFrame with columns:
|
||||||
|
- match_id
|
||||||
|
- home_team, away_team
|
||||||
|
- ms_h, ms_d, ms_a (odds)
|
||||||
|
- score_home, score_away (actual result)
|
||||||
|
- ht_score_home, ht_score_away
|
||||||
|
- ou25_actual, btts_actual, etc.
|
||||||
|
"""
|
||||||
|
start_ms = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp() * 1000)
|
||||||
|
end_ms = int(datetime.strptime(end_date, "%Y-%m-%d").timestamp() * 1000) + 86400000 # +1 day
|
||||||
|
|
||||||
|
# Build league filter
|
||||||
|
league_filter = ""
|
||||||
|
params = [start_ms, end_ms]
|
||||||
|
if league_ids:
|
||||||
|
placeholders = ",".join(["%s"] * len(league_ids))
|
||||||
|
league_filter = f"AND m.league_id IN ({placeholders})"
|
||||||
|
params.extend(league_ids)
|
||||||
|
|
||||||
|
query = f"""
|
||||||
|
SELECT
|
||||||
|
m.id as match_id,
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away,
|
||||||
|
m.ht_score_home,
|
||||||
|
m.ht_score_away,
|
||||||
|
m.mst_utc,
|
||||||
|
-- Odds from odd_categories/selections
|
||||||
|
MAX(CASE WHEN oc.name = 'Maç Sonucu' AND os.name = '1' THEN os.odd_value END) as ms_h,
|
||||||
|
MAX(CASE WHEN oc.name = 'Maç Sonucu' AND os.name = 'X' THEN os.odd_value END) as ms_d,
|
||||||
|
MAX(CASE WHEN oc.name = 'Maç Sonucu' AND os.name = '2' THEN os.odd_value END) as ms_a,
|
||||||
|
MAX(CASE WHEN oc.name = '2,5 Alt/Üst' AND os.name = 'Üst' THEN os.odd_value END) as ou25_over,
|
||||||
|
MAX(CASE WHEN oc.name = '2,5 Alt/Üst' AND os.name = 'Alt' THEN os.odd_value END) as ou25_under,
|
||||||
|
MAX(CASE WHEN oc.name = '1,5 Alt/Üst' AND os.name = 'Üst' THEN os.odd_value END) as ou15_over,
|
||||||
|
MAX(CASE WHEN oc.name = '3,5 Alt/Üst' AND os.name = 'Üst' THEN os.odd_value END) as ou35_over,
|
||||||
|
MAX(CASE WHEN oc.name = 'Karşılıklı Gol' AND os.name = 'Var' THEN os.odd_value END) as btts_yes,
|
||||||
|
MAX(CASE WHEN oc.name = 'Karşılıklı Gol' AND os.name = 'Yok' THEN os.odd_value END) as btts_no
|
||||||
|
FROM matches m
|
||||||
|
LEFT JOIN odd_categories oc ON oc.match_id = m.id
|
||||||
|
LEFT JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE m.mst_utc >= %s
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
{league_filter}
|
||||||
|
GROUP BY m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away,
|
||||||
|
m.ht_score_home, m.ht_score_away, m.mst_utc
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
"""
|
||||||
|
|
||||||
|
cur.execute(query, params)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
columns = [desc[0] for desc in cur.description]
|
||||||
|
|
||||||
|
df = pd.DataFrame(rows, columns=columns)
|
||||||
|
print(f"[Data] Fetched {len(df)} matches from {start_date} to {end_date}")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_actual_outcomes(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Calculate actual binary outcomes for each market.
|
||||||
|
|
||||||
|
Adds columns:
|
||||||
|
- ms_home_actual: 1 if home won, 0 otherwise
|
||||||
|
- ms_draw_actual: 1 if draw, 0 otherwise
|
||||||
|
- ms_away_actual: 1 if away won, 0 otherwise
|
||||||
|
- ou25_over_actual: 1 if total goals > 2.5, 0 otherwise
|
||||||
|
- ou15_over_actual: 1 if total goals > 1.5, 0 otherwise
|
||||||
|
- ou35_over_actual: 1 if total goals > 3.5, 0 otherwise
|
||||||
|
- btts_yes_actual: 1 if both teams scored, 0 otherwise
|
||||||
|
"""
|
||||||
|
# Total goals
|
||||||
|
df["total_goals"] = df["score_home"] + df["score_away"]
|
||||||
|
df["ht_total_goals"] = df["ht_score_home"].fillna(0) + df["ht_score_away"].fillna(0)
|
||||||
|
|
||||||
|
# Match result outcomes
|
||||||
|
df["ms_home_actual"] = (df["score_home"] > df["score_away"]).astype(int)
|
||||||
|
df["ms_draw_actual"] = (df["score_home"] == df["score_away"]).astype(int)
|
||||||
|
df["ms_away_actual"] = (df["score_home"] < df["score_away"]).astype(int)
|
||||||
|
|
||||||
|
# Over/Under outcomes
|
||||||
|
df["ou25_over_actual"] = (df["total_goals"] > 2.5).astype(int)
|
||||||
|
df["ou15_over_actual"] = (df["total_goals"] > 1.5).astype(int)
|
||||||
|
df["ou35_over_actual"] = (df["total_goals"] > 3.5).astype(int)
|
||||||
|
|
||||||
|
# BTTS outcome
|
||||||
|
df["btts_yes_actual"] = ((df["score_home"] > 0) & (df["score_away"] > 0)).astype(int)
|
||||||
|
|
||||||
|
# Half-Time result
|
||||||
|
df["ht_home_actual"] = (df["ht_score_home"] > df["ht_score_away"]).astype(int)
|
||||||
|
df["ht_draw_actual"] = (df["ht_score_home"] == df["ht_score_away"]).astype(int)
|
||||||
|
df["ht_away_actual"] = (df["ht_score_home"] < df["ht_score_away"]).astype(int)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_implied_probabilities(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Calculate implied probabilities from odds.
|
||||||
|
|
||||||
|
Adds columns:
|
||||||
|
- ms_home_prob: implied probability from odds
|
||||||
|
- ms_draw_prob
|
||||||
|
- ms_away_prob
|
||||||
|
- ou25_over_prob
|
||||||
|
- etc.
|
||||||
|
"""
|
||||||
|
def safe_implied_prob(odd_str: str) -> float:
|
||||||
|
"""Convert odds string to implied probability."""
|
||||||
|
if pd.isna(odd_str) or odd_str is None:
|
||||||
|
return np.nan
|
||||||
|
try:
|
||||||
|
odd = float(odd_str)
|
||||||
|
if odd <= 1.0:
|
||||||
|
return np.nan
|
||||||
|
return 1.0 / odd
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return np.nan
|
||||||
|
|
||||||
|
# Match result implied probabilities
|
||||||
|
df["ms_home_prob"] = df["ms_h"].apply(safe_implied_prob)
|
||||||
|
df["ms_draw_prob"] = df["ms_d"].apply(safe_implied_prob)
|
||||||
|
df["ms_away_prob"] = df["ms_a"].apply(safe_implied_prob)
|
||||||
|
|
||||||
|
# Over/Under implied probabilities
|
||||||
|
df["ou25_over_prob"] = df["ou25_over"].apply(safe_implied_prob)
|
||||||
|
df["ou15_over_prob"] = df["ou15_over"].apply(safe_implied_prob)
|
||||||
|
df["ou35_over_prob"] = df["ou35_over"].apply(safe_implied_prob)
|
||||||
|
|
||||||
|
# BTTS implied probabilities
|
||||||
|
df["btts_yes_prob"] = df["btts_yes"].apply(safe_implied_prob)
|
||||||
|
|
||||||
|
# -----------------------------------------------------
|
||||||
|
# CONTEXT-AWARE BUCKETS
|
||||||
|
# Create separate probability and actual columns for odds buckets
|
||||||
|
# ms_home odds: ms_h (note ms_h is the bookmaker odds for home win)
|
||||||
|
# -----------------------------------------------------
|
||||||
|
# Helper to safe-cast to float
|
||||||
|
df['ms_h_num'] = pd.to_numeric(df['ms_h'], errors='coerce')
|
||||||
|
|
||||||
|
# Bucket 1: Heavy Fav (odds <= 1.40)
|
||||||
|
b1_mask = df['ms_h_num'] <= 1.40
|
||||||
|
df.loc[b1_mask, 'ms_home_heavy_fav_prob'] = df.loc[b1_mask, 'ms_home_prob']
|
||||||
|
df.loc[b1_mask, 'ms_home_heavy_fav_actual'] = df.loc[b1_mask, 'ms_home_actual']
|
||||||
|
|
||||||
|
# Bucket 2: Fav (1.40 < odds <= 1.80)
|
||||||
|
b2_mask = (df['ms_h_num'] > 1.40) & (df['ms_h_num'] <= 1.80)
|
||||||
|
df.loc[b2_mask, 'ms_home_fav_prob'] = df.loc[b2_mask, 'ms_home_prob']
|
||||||
|
df.loc[b2_mask, 'ms_home_fav_actual'] = df.loc[b2_mask, 'ms_home_actual']
|
||||||
|
|
||||||
|
# Bucket 3: Balanced (1.80 < odds <= 2.50)
|
||||||
|
b3_mask = (df['ms_h_num'] > 1.80) & (df['ms_h_num'] <= 2.50)
|
||||||
|
df.loc[b3_mask, 'ms_home_balanced_prob'] = df.loc[b3_mask, 'ms_home_prob']
|
||||||
|
df.loc[b3_mask, 'ms_home_balanced_actual'] = df.loc[b3_mask, 'ms_home_actual']
|
||||||
|
|
||||||
|
# Bucket 4: Underdog (odds > 2.50)
|
||||||
|
b4_mask = df['ms_h_num'] > 2.50
|
||||||
|
df.loc[b4_mask, 'ms_home_underdog_prob'] = df.loc[b4_mask, 'ms_home_prob']
|
||||||
|
df.loc[b4_mask, 'ms_home_underdog_actual'] = df.loc[b4_mask, 'ms_home_actual']
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MODEL PREDICTIONS (Optional - if you want to calibrate model outputs)
|
||||||
|
# =============================================================================
|
||||||
|
def get_model_predictions(
|
||||||
|
df: pd.DataFrame,
|
||||||
|
cur,
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Get model predictions for each match.
|
||||||
|
|
||||||
|
This is optional - if you want to calibrate model outputs rather than
|
||||||
|
raw odds-implied probabilities.
|
||||||
|
|
||||||
|
TODO: Implement if needed. For now, we use odds-implied probabilities
|
||||||
|
as a proxy for model predictions.
|
||||||
|
"""
|
||||||
|
# For now, return odds-implied probabilities as "model predictions"
|
||||||
|
# In a full implementation, you would:
|
||||||
|
# 1. Load the V20 predictor
|
||||||
|
# 2. Run predictions for each match
|
||||||
|
# 3. Store raw model probabilities
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MAIN TRAINING
|
||||||
|
# =============================================================================
|
||||||
|
def train_calibration_models(
|
||||||
|
df: pd.DataFrame,
|
||||||
|
markets: List[str] = None,
|
||||||
|
min_samples: int = 100,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Train calibration models for specified markets.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame with probabilities and actual outcomes
|
||||||
|
markets: List of markets to train (default: all supported)
|
||||||
|
min_samples: Minimum samples required per market
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with training results
|
||||||
|
"""
|
||||||
|
if markets is None:
|
||||||
|
markets = SUPPORTED_MARKETS
|
||||||
|
|
||||||
|
calibrator = get_calibrator()
|
||||||
|
|
||||||
|
# Define market config: market -> (prob_col, actual_col)
|
||||||
|
market_config = {
|
||||||
|
"ms_home": ("ms_home_prob", "ms_home_actual"),
|
||||||
|
"ms_home_heavy_fav": ("ms_home_heavy_fav_prob", "ms_home_heavy_fav_actual"),
|
||||||
|
"ms_home_fav": ("ms_home_fav_prob", "ms_home_fav_actual"),
|
||||||
|
"ms_home_balanced": ("ms_home_balanced_prob", "ms_home_balanced_actual"),
|
||||||
|
"ms_home_underdog": ("ms_home_underdog_prob", "ms_home_underdog_actual"),
|
||||||
|
"ms_draw": ("ms_draw_prob", "ms_draw_actual"),
|
||||||
|
"ms_away": ("ms_away_prob", "ms_away_actual"),
|
||||||
|
"ou15": ("ou15_over_prob", "ou15_over_actual"),
|
||||||
|
"ou25": ("ou25_over_prob", "ou25_over_actual"),
|
||||||
|
"ou35": ("ou35_over_prob", "ou35_over_actual"),
|
||||||
|
"btts": ("btts_yes_prob", "btts_yes_actual"),
|
||||||
|
"ht_home": ("ht_home_prob", "ht_home_actual"), # Note: need to add ht probs
|
||||||
|
"ht_draw": ("ht_draw_prob", "ht_draw_actual"),
|
||||||
|
"ht_away": ("ht_away_prob", "ht_away_actual"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Filter to requested markets
|
||||||
|
market_config = {k: v for k, v in market_config.items() if k in markets}
|
||||||
|
|
||||||
|
# Train all markets
|
||||||
|
results = calibrator.train_all_markets(
|
||||||
|
df=df,
|
||||||
|
market_config=market_config,
|
||||||
|
min_samples=min_samples,
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def print_calibration_report(results: Dict[str, Any]):
|
||||||
|
"""Print a formatted calibration report."""
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("CALIBRATION TRAINING REPORT")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
print(f"\n{'Market':<15} {'Brier':<10} {'ECE':<10} {'Samples':<10} {'Status'}")
|
||||||
|
print("-" * 60)
|
||||||
|
|
||||||
|
for market, metrics in results.items():
|
||||||
|
status = "✓ Trained" if metrics.sample_count >= 100 else "⚠ Insufficient"
|
||||||
|
print(f"{market:<15} {metrics.brier_score:<10.4f} {metrics.calibration_error:<10.4f} "
|
||||||
|
f"{metrics.sample_count:<10} {status}")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Interpretation:")
|
||||||
|
print(" - Brier Score: Lower is better (0 = perfect, 0.25 = random)")
|
||||||
|
print(" - ECE (Expected Calibration Error): Lower is better (0 = perfect)")
|
||||||
|
print(" - Models saved to: ai-engine/models/calibration/")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CLI
|
||||||
|
# =============================================================================
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Train calibration models")
|
||||||
|
parser.add_argument("--start", type=str, default=DEFAULT_START_DATE,
|
||||||
|
help="Start date (YYYY-MM-DD)")
|
||||||
|
parser.add_argument("--end", type=str, default=DEFAULT_END_DATE,
|
||||||
|
help="End date (YYYY-MM-DD)")
|
||||||
|
parser.add_argument("--markets", nargs="+", default=None,
|
||||||
|
help="Markets to train (default: all)")
|
||||||
|
parser.add_argument("--min-samples", type=int, default=100,
|
||||||
|
help="Minimum samples per market")
|
||||||
|
parser.add_argument("--top-leagues-only", action="store_true",
|
||||||
|
help="Only use top leagues data")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"\n[Calibration Training] {args.start} to {args.end}")
|
||||||
|
|
||||||
|
# Load top leagues if requested
|
||||||
|
league_ids = None
|
||||||
|
if args.top_leagues_only:
|
||||||
|
league_ids = load_top_league_ids()
|
||||||
|
print(f"[Data] Filtering to {len(league_ids)} top leagues")
|
||||||
|
|
||||||
|
# Fetch data
|
||||||
|
conn = get_conn()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
df = fetch_training_data(cur, args.start, args.end, league_ids)
|
||||||
|
|
||||||
|
if len(df) == 0:
|
||||||
|
print("[Error] No data found for the specified date range")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Calculate outcomes and probabilities
|
||||||
|
df = calculate_actual_outcomes(df)
|
||||||
|
df = calculate_implied_probabilities(df)
|
||||||
|
|
||||||
|
# Train models
|
||||||
|
results = train_calibration_models(
|
||||||
|
df=df,
|
||||||
|
markets=args.markets,
|
||||||
|
min_samples=args.min_samples,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Print report
|
||||||
|
print_calibration_report(results)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Executable
+192
@@ -0,0 +1,192 @@
|
|||||||
|
"""
|
||||||
|
Card Market XGBoost Model Trainer
|
||||||
|
==================================
|
||||||
|
Kart (4.5 Alt/Üst, 5.5 Alt/Üst) için XGBoost modeli eğitir.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/train_cards_model.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pickle
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import xgboost as xgb
|
||||||
|
from sklearn.model_selection import train_test_split, StratifiedKFold
|
||||||
|
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score, classification_report
|
||||||
|
|
||||||
|
# Config
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data_cards.csv")
|
||||||
|
MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "xgboost")
|
||||||
|
|
||||||
|
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# Feature columns
|
||||||
|
FEATURES = [
|
||||||
|
# Referee features
|
||||||
|
"ref_matches",
|
||||||
|
"ref_avg_yellow",
|
||||||
|
"ref_avg_red",
|
||||||
|
"ref_avg_total",
|
||||||
|
|
||||||
|
# Team features
|
||||||
|
"home_team_matches",
|
||||||
|
"home_team_avg_cards",
|
||||||
|
"away_team_matches",
|
||||||
|
"away_team_avg_cards",
|
||||||
|
|
||||||
|
# League features
|
||||||
|
"league_avg_cards",
|
||||||
|
"league_match_count",
|
||||||
|
|
||||||
|
# Derived
|
||||||
|
"combined_team_avg",
|
||||||
|
"ref_team_combined",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def load_data():
|
||||||
|
if not os.path.exists(DATA_PATH):
|
||||||
|
print(f"❌ Data file not found: {DATA_PATH}")
|
||||||
|
print(" Run extract_card_training_data.py first!")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"📦 Loading data from {DATA_PATH}...")
|
||||||
|
df = pd.read_csv(DATA_PATH)
|
||||||
|
df.fillna(0, inplace=True)
|
||||||
|
print(f" Shape: {df.shape}")
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def train_card_model(df, target_col, model_name):
|
||||||
|
"""Kart modeli eğit"""
|
||||||
|
|
||||||
|
print(f"\n🚀 Training {model_name} (Target: {target_col})...")
|
||||||
|
|
||||||
|
# Filter valid rows
|
||||||
|
valid_df = df[df[target_col].notna()].copy()
|
||||||
|
if valid_df.empty:
|
||||||
|
print(f" ⚠️ No valid data for {target_col}, skipping.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
X = valid_df[FEATURES]
|
||||||
|
y = valid_df[target_col].astype(int)
|
||||||
|
|
||||||
|
print(f" Target distribution: {dict(y.value_counts())}")
|
||||||
|
|
||||||
|
# Split
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||||||
|
X, y, test_size=0.2, random_state=42, stratify=y
|
||||||
|
)
|
||||||
|
|
||||||
|
# Model params
|
||||||
|
params = {
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'eval_metric': 'logloss',
|
||||||
|
'eta': 0.05,
|
||||||
|
'max_depth': 5,
|
||||||
|
'subsample': 0.8,
|
||||||
|
'colsample_bytree': 0.8,
|
||||||
|
'min_child_weight': 3,
|
||||||
|
'nthread': 4,
|
||||||
|
'seed': 42
|
||||||
|
}
|
||||||
|
|
||||||
|
# Train with cross-validation
|
||||||
|
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
|
||||||
|
cv_scores = []
|
||||||
|
|
||||||
|
for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
|
||||||
|
X_t, X_v = X_train.iloc[train_idx], X_train.iloc[val_idx]
|
||||||
|
y_t, y_v = y_train.iloc[train_idx], y_train.iloc[val_idx]
|
||||||
|
|
||||||
|
dtrain = xgb.DMatrix(X_t, label=y_t, feature_names=FEATURES)
|
||||||
|
dval = xgb.DMatrix(X_v, label=y_v, feature_names=FEATURES)
|
||||||
|
|
||||||
|
model = xgb.train(
|
||||||
|
params,
|
||||||
|
dtrain,
|
||||||
|
num_boost_round=500,
|
||||||
|
evals=[(dval, 'eval')],
|
||||||
|
early_stopping_rounds=30,
|
||||||
|
verbose_eval=False
|
||||||
|
)
|
||||||
|
|
||||||
|
preds = model.predict(dval)
|
||||||
|
auc = roc_auc_score(y_v, preds)
|
||||||
|
cv_scores.append(auc)
|
||||||
|
print(f" Fold {fold+1} AUC: {auc:.4f}")
|
||||||
|
|
||||||
|
print(f" Mean CV AUC: {np.mean(cv_scores):.4f} (+/- {np.std(cv_scores):.4f})")
|
||||||
|
|
||||||
|
# Train final model on all training data
|
||||||
|
dtrain_full = xgb.DMatrix(X_train, label=y_train, feature_names=FEATURES)
|
||||||
|
dtest = xgb.DMatrix(X_test, label=y_test, feature_names=FEATURES)
|
||||||
|
|
||||||
|
final_model = xgb.train(
|
||||||
|
params,
|
||||||
|
dtrain_full,
|
||||||
|
num_boost_round=300,
|
||||||
|
verbose_eval=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Evaluate
|
||||||
|
test_preds = final_model.predict(dtest)
|
||||||
|
test_pred_class = (test_preds > 0.5).astype(int)
|
||||||
|
|
||||||
|
acc = accuracy_score(y_test, test_pred_class)
|
||||||
|
auc = roc_auc_score(y_test, test_preds)
|
||||||
|
|
||||||
|
print(f"\n📊 Test Results:")
|
||||||
|
print(f" Accuracy: {acc:.4f}")
|
||||||
|
print(f" AUC: {auc:.4f}")
|
||||||
|
print(classification_report(y_test, test_pred_class))
|
||||||
|
|
||||||
|
# Feature importance
|
||||||
|
importance = final_model.get_score(importance_type='gain')
|
||||||
|
print(f"\n🔍 Top Features:")
|
||||||
|
sorted_importance = sorted(importance.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||||
|
for feat, score in sorted_importance:
|
||||||
|
print(f" {feat}: {score:.2f}")
|
||||||
|
|
||||||
|
# Save model
|
||||||
|
model_path = os.path.join(MODELS_DIR, f"xgb_{model_name.lower()}.json")
|
||||||
|
final_model.save_model(model_path)
|
||||||
|
print(f"\n💾 Model saved to: {model_path}")
|
||||||
|
|
||||||
|
return final_model
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
df = load_data()
|
||||||
|
|
||||||
|
# Train multiple card models
|
||||||
|
models = []
|
||||||
|
|
||||||
|
# 1. Cards Over 4.5
|
||||||
|
model_45 = train_card_model(df, "label_cards_over45", "cards45")
|
||||||
|
models.append(("cards_over_45", model_45))
|
||||||
|
|
||||||
|
# 2. Cards Over 3.5
|
||||||
|
model_35 = train_card_model(df, "label_cards_over35", "cards35")
|
||||||
|
models.append(("cards_over_35", model_35))
|
||||||
|
|
||||||
|
# 3. Cards Over 5.5
|
||||||
|
model_55 = train_card_model(df, "label_cards_over55", "cards55")
|
||||||
|
models.append(("cards_over_55", model_55))
|
||||||
|
|
||||||
|
print("\n" + "="*60)
|
||||||
|
print("✅ All card models trained successfully!")
|
||||||
|
print(f"📁 Models saved to: {MODELS_DIR}")
|
||||||
|
|
||||||
|
# List saved files
|
||||||
|
import glob
|
||||||
|
card_files = glob.glob(os.path.join(MODELS_DIR, "xgb_cards*.json"))
|
||||||
|
for f in card_files:
|
||||||
|
print(f" - {os.path.basename(f)}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,396 @@
|
|||||||
|
"""
|
||||||
|
HT/FT (İY/MS) Model Training Script - VQWEN v3
|
||||||
|
|
||||||
|
Bu script İY/MS (Half Time / Full Time) tahmini için XGBoost modeli eğitir.
|
||||||
|
9 sınıf: 1/1, 1/X, 1/2, X/1, X/X, X/2, 2/1, 2/X, 2/2
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Odds (MS + HT)
|
||||||
|
- HT/FT Tendency Engine (takımların ilk yarı/ikinci yarı performansları)
|
||||||
|
- League-level stats
|
||||||
|
- Data quality metrics
|
||||||
|
|
||||||
|
Output:
|
||||||
|
- ai-engine/models/xgboost/xgb_ht_ft.json (V20 + V25 compatible)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import xgboost as xgb
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
|
||||||
|
from sklearn.calibration import CalibratedClassifierCV
|
||||||
|
|
||||||
|
# Add parent directorys to path
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
from features.htft_tendency_engine import HtftTendencyEngine
|
||||||
|
|
||||||
|
# Database connection
|
||||||
|
DB_URL = os.getenv('DATABASE_URL', 'postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db')
|
||||||
|
# Remove ?schema=public if present (psycopg2 doesn't accept it)
|
||||||
|
if '?' in DB_URL:
|
||||||
|
DB_URL = DB_URL.split('?')[0]
|
||||||
|
|
||||||
|
# HT/FT Labels
|
||||||
|
HTFT_LABELS = ["1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2"]
|
||||||
|
|
||||||
|
# Save path
|
||||||
|
MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'xgboost')
|
||||||
|
MODEL_PATH_JSON = os.path.join(MODEL_DIR, 'xgb_ht_ft.json')
|
||||||
|
MODEL_PATH_PKL = os.path.join(MODEL_DIR, 'xgb_ht_ft.pkl')
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_matches():
|
||||||
|
"""Fetch completed football matches with HT and FT scores"""
|
||||||
|
print("📊 Fetching completed football matches...")
|
||||||
|
|
||||||
|
conn = psycopg2.connect(DB_URL)
|
||||||
|
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
m.id,
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.league_id,
|
||||||
|
m.sport,
|
||||||
|
m.mst_utc,
|
||||||
|
m.ht_score_home,
|
||||||
|
m.ht_score_away,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away
|
||||||
|
FROM matches m
|
||||||
|
WHERE m.sport = 'football'
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.ht_score_home IS NOT NULL
|
||||||
|
AND m.ht_score_away IS NOT NULL
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc IS NOT NULL
|
||||||
|
ORDER BY m.mst_utc ASC
|
||||||
|
""")
|
||||||
|
|
||||||
|
matches = cur.fetchall()
|
||||||
|
print(f"✅ Fetched {len(matches)} matches")
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
|
||||||
|
def compute_htft_label(ht_home, ht_away, ft_home, ft_away):
|
||||||
|
"""
|
||||||
|
Compute HT/FT label as integer 0-8
|
||||||
|
|
||||||
|
HT result: 0=home, 1=draw, 2=away
|
||||||
|
FT result: 0=home, 1=draw, 2=away
|
||||||
|
Label = ht_result * 3 + ft_result
|
||||||
|
"""
|
||||||
|
if ht_home > ht_away:
|
||||||
|
ht_result = 0
|
||||||
|
elif ht_home == ht_away:
|
||||||
|
ht_result = 1
|
||||||
|
else:
|
||||||
|
ht_result = 2
|
||||||
|
|
||||||
|
if ft_home > ft_away:
|
||||||
|
ft_result = 0
|
||||||
|
elif ft_home == ft_away:
|
||||||
|
ft_result = 1
|
||||||
|
else:
|
||||||
|
ft_result = 2
|
||||||
|
|
||||||
|
return ht_result * 3 + ft_result
|
||||||
|
|
||||||
|
|
||||||
|
def extract_features_and_labels(matches):
|
||||||
|
"""Extract features using HT/FT Tendency Engine + Odds"""
|
||||||
|
print("\n🔧 Extracting features...")
|
||||||
|
|
||||||
|
conn = psycopg2.connect(DB_URL)
|
||||||
|
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||||
|
|
||||||
|
htft_engine = HtftTendencyEngine()
|
||||||
|
|
||||||
|
features_list = []
|
||||||
|
labels = []
|
||||||
|
match_ids = []
|
||||||
|
|
||||||
|
for idx, match in enumerate(matches):
|
||||||
|
if idx % 1000 == 0:
|
||||||
|
print(f" Processing {idx}/{len(matches)}...")
|
||||||
|
|
||||||
|
mid = match['id']
|
||||||
|
hid = str(match['home_team_id'])
|
||||||
|
aid = str(match['away_team_id'])
|
||||||
|
lid = str(match['league_id']) if match['league_id'] else None
|
||||||
|
mst = int(match['mst_utc'])
|
||||||
|
|
||||||
|
# Fetch odds (MS and HT)
|
||||||
|
cur.execute("""
|
||||||
|
SELECT oc.name as category_name, os.name as selection_name, os.odd_value
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = %s
|
||||||
|
""", (mid,))
|
||||||
|
|
||||||
|
odds_rows = cur.fetchall()
|
||||||
|
odds = {}
|
||||||
|
ht_odds = {}
|
||||||
|
|
||||||
|
for row in odds_rows:
|
||||||
|
cat = row['category_name'].lower()
|
||||||
|
sel = row['selection_name'].lower()
|
||||||
|
val = float(row['odd_value'])
|
||||||
|
|
||||||
|
if 'maç sonucu' in cat or '1.yarı sonucu' in cat:
|
||||||
|
if '1.yarı sonucu' in cat:
|
||||||
|
if sel == '1': ht_odds['ht_ms_h'] = val
|
||||||
|
elif sel in ('x', '0'): ht_odds['ht_ms_d'] = val
|
||||||
|
elif sel == '2': ht_odds['ht_ms_a'] = val
|
||||||
|
else:
|
||||||
|
if sel == '1': odds['ms_h'] = val
|
||||||
|
elif sel in ('x', '0'): odds['ms_d'] = val
|
||||||
|
elif sel == '2': odds['ms_a'] = val
|
||||||
|
|
||||||
|
# Skip if no odds
|
||||||
|
if 'ms_h' not in odds or 'ms_d' not in odds or 'ms_a' not in odds:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Compute HT/FT label
|
||||||
|
label = compute_htft_label(
|
||||||
|
match['ht_score_home'],
|
||||||
|
match['ht_score_away'],
|
||||||
|
match['score_home'],
|
||||||
|
match['score_away']
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract HT/FT tendency features
|
||||||
|
try:
|
||||||
|
htft_feats = htft_engine.get_features(hid, aid, lid, mst)
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to defaults
|
||||||
|
htft_feats = htft_engine._empty_features()
|
||||||
|
|
||||||
|
# Build feature dict
|
||||||
|
feat = {
|
||||||
|
# MS Odds
|
||||||
|
'odds_ms_h': odds.get('ms_h', 2.0),
|
||||||
|
'odds_ms_d': odds.get('ms_d', 3.2),
|
||||||
|
'odds_ms_a': odds.get('ms_a', 3.5),
|
||||||
|
'implied_home': 1.0 / odds.get('ms_h', 2.0),
|
||||||
|
'implied_draw': 1.0 / odds.get('ms_d', 3.2),
|
||||||
|
'implied_away': 1.0 / odds.get('ms_a', 3.5),
|
||||||
|
'fav_gap': abs(odds.get('ms_h', 2.0) - odds.get('ms_a', 3.5)),
|
||||||
|
|
||||||
|
# HT Odds
|
||||||
|
'ht_implied_home': 1.0 / ht_odds.get('ht_ms_h', 3.0),
|
||||||
|
'ht_implied_draw': 1.0 / ht_odds.get('ht_ms_d', 2.1),
|
||||||
|
'ht_implied_away': 1.0 / ht_odds.get('ht_ms_a', 3.5),
|
||||||
|
|
||||||
|
# HT/FT Tendencies (from engine)
|
||||||
|
'htft_home_ht_scoring_rate': htft_feats.get('home_ht_scoring_rate', 0.5),
|
||||||
|
'htft_home_ht_concede_rate': htft_feats.get('home_ht_concede_rate', 0.5),
|
||||||
|
'htft_home_ht_win_rate': htft_feats.get('home_ht_win_rate', 0.33),
|
||||||
|
'htft_home_comeback_rate': htft_feats.get('home_comeback_rate', 0.0),
|
||||||
|
'htft_home_first_half_goal_pct': htft_feats.get('home_first_half_goal_pct', 0.5),
|
||||||
|
'htft_home_second_half_surge': htft_feats.get('home_second_half_surge', 1.0),
|
||||||
|
|
||||||
|
'htft_away_ht_scoring_rate': htft_feats.get('away_ht_scoring_rate', 0.5),
|
||||||
|
'htft_away_ht_concede_rate': htft_feats.get('away_ht_concede_rate', 0.5),
|
||||||
|
'htft_away_ht_win_rate': htft_feats.get('away_ht_win_rate', 0.33),
|
||||||
|
'htft_away_comeback_rate': htft_feats.get('away_comeback_rate', 0.0),
|
||||||
|
'htft_away_first_half_goal_pct': htft_feats.get('away_first_half_goal_pct', 0.5),
|
||||||
|
'htft_away_second_half_surge': htft_feats.get('away_second_half_surge', 1.0),
|
||||||
|
|
||||||
|
# League-level
|
||||||
|
'htft_league_avg_ht_goals': htft_feats.get('league_avg_ht_goals', 1.0),
|
||||||
|
'htft_league_reversal_rate': htft_feats.get('league_reversal_rate', 0.05),
|
||||||
|
'htft_league_first_half_pct': htft_feats.get('league_first_half_pct', 0.44),
|
||||||
|
|
||||||
|
# Data quality
|
||||||
|
'htft_home_sample_size': htft_feats.get('home_sample_size', 0.0),
|
||||||
|
'htft_away_sample_size': htft_feats.get('away_sample_size', 0.0),
|
||||||
|
}
|
||||||
|
|
||||||
|
features_list.append(feat)
|
||||||
|
labels.append(label)
|
||||||
|
match_ids.append(mid)
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"✅ Extracted {len(features_list)} samples with features")
|
||||||
|
|
||||||
|
return features_list, labels, match_ids
|
||||||
|
|
||||||
|
|
||||||
|
def train_model(features_list, labels):
|
||||||
|
"""Train XGBoost classifier with class weights and calibration"""
|
||||||
|
print("\n🎯 Training HT/FT XGBoost model...")
|
||||||
|
|
||||||
|
# Convert to DataFrame
|
||||||
|
X = pd.DataFrame(features_list)
|
||||||
|
y = np.array(labels)
|
||||||
|
|
||||||
|
# Print class distribution
|
||||||
|
print("\n📊 Class distribution:")
|
||||||
|
for i, label_name in enumerate(HTFT_LABELS):
|
||||||
|
count = np.sum(y == i)
|
||||||
|
print(f" {label_name}: {count} ({count/len(y)*100:.1f}%)")
|
||||||
|
|
||||||
|
# Time-based split (80/20)
|
||||||
|
split_idx = int(len(X) * 0.8)
|
||||||
|
X_train = X.iloc[:split_idx]
|
||||||
|
X_test = X.iloc[split_idx:]
|
||||||
|
y_train = y[:split_idx]
|
||||||
|
y_test = y[split_idx:]
|
||||||
|
|
||||||
|
print(f"\n📈 Train size: {len(X_train)}, Test size: {len(X_test)}")
|
||||||
|
|
||||||
|
# Compute class weights (handle imbalance)
|
||||||
|
from sklearn.utils.class_weight import compute_class_weight
|
||||||
|
class_weights = compute_class_weight('balanced', classes=np.arange(9), y=y_train)
|
||||||
|
sample_weights = np.array([class_weights[label] for label in y_train])
|
||||||
|
|
||||||
|
print(f"\n⚖️ Class weights: {dict(zip(HTFT_LABELS, [round(w, 2) for w in class_weights]))}")
|
||||||
|
|
||||||
|
# Train XGBoost
|
||||||
|
model = xgb.XGBClassifier(
|
||||||
|
n_estimators=400,
|
||||||
|
max_depth=7,
|
||||||
|
learning_rate=0.05,
|
||||||
|
objective='multi:softprob',
|
||||||
|
num_class=9,
|
||||||
|
eval_metric='mlogloss',
|
||||||
|
subsample=0.8,
|
||||||
|
colsample_bytree=0.8,
|
||||||
|
min_child_weight=5,
|
||||||
|
gamma=0.1,
|
||||||
|
reg_alpha=0.1,
|
||||||
|
reg_lambda=1.0,
|
||||||
|
random_state=42,
|
||||||
|
n_jobs=-1,
|
||||||
|
early_stopping_rounds=20, # Move to init for newer XGBoost versions
|
||||||
|
)
|
||||||
|
|
||||||
|
model.fit(
|
||||||
|
X_train, y_train,
|
||||||
|
sample_weight=sample_weights,
|
||||||
|
eval_set=[(X_test, y_test)],
|
||||||
|
verbose=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Evaluate
|
||||||
|
y_pred = model.predict(X_test)
|
||||||
|
y_pred_proba = model.predict_proba(X_test)
|
||||||
|
|
||||||
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
print(f"\n✅ Test Accuracy: {accuracy:.4f} ({accuracy*100:.1f}%)")
|
||||||
|
|
||||||
|
# Classification report
|
||||||
|
print("\n📊 Classification Report:")
|
||||||
|
print(classification_report(y_test, y_pred, target_names=HTFT_LABELS, zero_division=0))
|
||||||
|
|
||||||
|
# Confusion matrix
|
||||||
|
print("\n🔲 Confusion Matrix:")
|
||||||
|
cm = confusion_matrix(y_test, y_pred)
|
||||||
|
print(cm)
|
||||||
|
|
||||||
|
# Feature importance
|
||||||
|
print("\n🔝 Top 15 Features:")
|
||||||
|
importance = model.feature_importances_
|
||||||
|
feat_importance = sorted(zip(X.columns, importance), key=lambda x: x[1], reverse=True)[:15]
|
||||||
|
for feat, imp in feat_importance:
|
||||||
|
print(f" {feat}: {imp:.4f}")
|
||||||
|
|
||||||
|
return model, X.columns.tolist()
|
||||||
|
|
||||||
|
|
||||||
|
def save_model(model, feature_names):
|
||||||
|
"""Save model in both JSON and PKL formats"""
|
||||||
|
print("\n💾 Saving model...")
|
||||||
|
|
||||||
|
# Create directory
|
||||||
|
os.makedirs(MODEL_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# Save as JSON (for V25 + V20)
|
||||||
|
model.get_booster().save_model(MODEL_PATH_JSON)
|
||||||
|
print(f"✅ Saved JSON model: {MODEL_PATH_JSON}")
|
||||||
|
|
||||||
|
# Save as PKL (for V20 sklearn wrapper)
|
||||||
|
with open(MODEL_PATH_PKL, 'wb') as f:
|
||||||
|
pickle.dump(model, f)
|
||||||
|
print(f"✅ Saved PKL model: {MODEL_PATH_PKL}")
|
||||||
|
|
||||||
|
# Save feature names as JSON
|
||||||
|
features_path = os.path.join(MODEL_DIR, 'htft_features.json')
|
||||||
|
with open(features_path, 'w') as f:
|
||||||
|
json.dump(feature_names, f, indent=2)
|
||||||
|
print(f"✅ Saved features: {features_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_model_loading():
|
||||||
|
"""Test that models can be loaded by V20 and V25"""
|
||||||
|
print("\n🧪 Testing model loading...")
|
||||||
|
|
||||||
|
# Test V25 loading (raw xgb.Booster from JSON)
|
||||||
|
import xgboost as xgb
|
||||||
|
booster = xgb.Booster()
|
||||||
|
booster.load_model(MODEL_PATH_JSON)
|
||||||
|
print(f"✅ V25 booster loaded from JSON, features: {len(booster.feature_names)}")
|
||||||
|
|
||||||
|
# Test V20 loading (sklearn wrapper from PKL)
|
||||||
|
with open(MODEL_PATH_PKL, 'rb') as f:
|
||||||
|
model_pkl = pickle.load(f)
|
||||||
|
print(f"✅ V20 model loaded from PKL, features: {len(model_pkl.feature_names_in_)}")
|
||||||
|
|
||||||
|
print("\n✅ All model loading tests passed!")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("="*80)
|
||||||
|
print("🚀 HT/FT (İY/MS) MODEL TRAINING - VQWEN v3")
|
||||||
|
print("="*80)
|
||||||
|
|
||||||
|
# 1. Fetch matches
|
||||||
|
matches = fetch_matches()
|
||||||
|
if not matches:
|
||||||
|
print("❌ No matches found")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 2. Extract features and labels
|
||||||
|
features_list, labels, match_ids = extract_features_and_labels(matches)
|
||||||
|
if not features_list:
|
||||||
|
print("❌ No features extracted")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 3. Train model
|
||||||
|
model, feature_names = train_model(features_list, labels)
|
||||||
|
|
||||||
|
# 4. Save model
|
||||||
|
save_model(model, feature_names)
|
||||||
|
|
||||||
|
# 5. Test loading
|
||||||
|
test_model_loading()
|
||||||
|
|
||||||
|
print("\n" + "="*80)
|
||||||
|
print("🎉 TRAINING COMPLETE")
|
||||||
|
print("="*80)
|
||||||
|
print(f"\n📊 Model files:")
|
||||||
|
print(f" JSON (V25+V20): {MODEL_PATH_JSON}")
|
||||||
|
print(f" PKL (V20): {MODEL_PATH_PKL}")
|
||||||
|
print(f" Features: {MODEL_DIR}/htft_features.json")
|
||||||
|
print(f"\n📈 Total samples: {len(features_list)}")
|
||||||
|
print(f"🎯 Classes: {len(HTFT_LABELS)}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
@@ -0,0 +1,423 @@
|
|||||||
|
"""
|
||||||
|
HT/FT Model Training with New Features + Backtest
|
||||||
|
=====================================================
|
||||||
|
Extracts training data with the new HT/FT tendency features,
|
||||||
|
trains a new XGBoost model, and compares it against the old model.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python ai-engine/scripts/train_htft_with_tendencies.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from collections import defaultdict
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import xgboost as xgb
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
||||||
|
|
||||||
|
from data.db import get_clean_dsn
|
||||||
|
from features.htft_tendency_engine import HtftTendencyEngine
|
||||||
|
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
TOP_LEAGUES_PATH = os.path.join(AI_ENGINE_DIR, "..", "top_leagues.json")
|
||||||
|
OUTPUT_DIR = os.path.join(AI_ENGINE_DIR, "data")
|
||||||
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
HTFT_LABELS = ["1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_conn():
|
||||||
|
dsn = get_clean_dsn()
|
||||||
|
return psycopg2.connect(dsn)
|
||||||
|
|
||||||
|
|
||||||
|
def load_top_leagues():
|
||||||
|
"""Load top league IDs from top_leagues.json."""
|
||||||
|
try:
|
||||||
|
with open(TOP_LEAGUES_PATH, "r") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
ids = set()
|
||||||
|
for entry in data:
|
||||||
|
if isinstance(entry, dict):
|
||||||
|
lid = entry.get("id") or entry.get("league_id")
|
||||||
|
if lid:
|
||||||
|
ids.add(str(lid))
|
||||||
|
elif isinstance(entry, str):
|
||||||
|
ids.add(entry)
|
||||||
|
print(f"✅ Loaded {len(ids)} top leagues")
|
||||||
|
return ids
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Could not load top_leagues.json: {e}. Using all leagues.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load_matches_with_odds(conn, top_league_ids=None):
|
||||||
|
"""Load FT football matches with HT scores and odds."""
|
||||||
|
query = """
|
||||||
|
SELECT
|
||||||
|
m.id,
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.league_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away,
|
||||||
|
m.ht_score_home,
|
||||||
|
m.ht_score_away,
|
||||||
|
m.mst_utc
|
||||||
|
FROM matches m
|
||||||
|
WHERE m.sport = 'football'
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.ht_score_home IS NOT NULL
|
||||||
|
AND m.ht_score_away IS NOT NULL
|
||||||
|
AND m.home_team_id IS NOT NULL
|
||||||
|
AND m.away_team_id IS NOT NULL
|
||||||
|
"""
|
||||||
|
|
||||||
|
if top_league_ids:
|
||||||
|
placeholders = ",".join(["%s"] * len(top_league_ids))
|
||||||
|
query += f" AND m.league_id IN ({placeholders})"
|
||||||
|
|
||||||
|
query += " ORDER BY m.mst_utc ASC"
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
params = list(top_league_ids) if top_league_ids else []
|
||||||
|
cur.execute(query, params)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
cols = ["id", "home_team_id", "away_team_id", "league_id",
|
||||||
|
"score_home", "score_away", "ht_score_home", "ht_score_away", "mst_utc"]
|
||||||
|
return pd.DataFrame(rows, columns=cols)
|
||||||
|
|
||||||
|
|
||||||
|
def load_odds_for_matches(conn, match_ids):
|
||||||
|
"""Load MS + HT odds for given match IDs."""
|
||||||
|
if not match_ids:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Load in batches
|
||||||
|
odds_map = {}
|
||||||
|
batch_size = 5000
|
||||||
|
match_list = list(match_ids)
|
||||||
|
|
||||||
|
for i in range(0, len(match_list), batch_size):
|
||||||
|
batch = match_list[i:i + batch_size]
|
||||||
|
placeholders = ",".join(["%s"] * len(batch))
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(f"""
|
||||||
|
SELECT oc.match_id, oc.name, os.name as sel_name, os.odd_value
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id IN ({placeholders})
|
||||||
|
AND oc.name IN (
|
||||||
|
'Maç Sonucu',
|
||||||
|
'1. Yarı Sonucu',
|
||||||
|
'2,5 Alt/Üst',
|
||||||
|
'Karşılıklı Gol',
|
||||||
|
'Çifte Şans'
|
||||||
|
)
|
||||||
|
""", batch)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
for mid, cat_name, sel_name, odd_value in rows:
|
||||||
|
if mid not in odds_map:
|
||||||
|
odds_map[mid] = {}
|
||||||
|
om = odds_map[mid]
|
||||||
|
|
||||||
|
try:
|
||||||
|
val = float(odd_value) if odd_value else 0.0
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
val = 0.0
|
||||||
|
|
||||||
|
if val <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Exact match for MS
|
||||||
|
if cat_name == "Maç Sonucu":
|
||||||
|
if sel_name in ("1", "Ev Sahibi"):
|
||||||
|
om["ms_h"] = val
|
||||||
|
elif sel_name in ("X", "Berabere"):
|
||||||
|
om["ms_d"] = val
|
||||||
|
elif sel_name in ("2", "Deplasman"):
|
||||||
|
om["ms_a"] = val
|
||||||
|
elif cat_name == "1. Yarı Sonucu":
|
||||||
|
if sel_name in ("1", "Ev Sahibi"):
|
||||||
|
om["ht_ms_h"] = val
|
||||||
|
elif sel_name in ("X", "Berabere"):
|
||||||
|
om["ht_ms_d"] = val
|
||||||
|
elif sel_name in ("2", "Deplasman"):
|
||||||
|
om["ht_ms_a"] = val
|
||||||
|
|
||||||
|
return odds_map
|
||||||
|
|
||||||
|
|
||||||
|
def compute_labels(df):
|
||||||
|
"""Compute HT/FT label (0-8)."""
|
||||||
|
labels = []
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
ht = 0 if row["ht_score_home"] > row["ht_score_away"] else (2 if row["ht_score_home"] < row["ht_score_away"] else 1)
|
||||||
|
ft = 0 if row["score_home"] > row["score_away"] else (2 if row["score_home"] < row["score_away"] else 1)
|
||||||
|
labels.append(ht * 3 + ft)
|
||||||
|
return labels
|
||||||
|
|
||||||
|
|
||||||
|
def extract_features(df, conn, odds_map, htft_engine):
|
||||||
|
"""Extract all features for each match."""
|
||||||
|
print(f"\n⏳ Extracting features for {len(df):,} matches...")
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
all_features = []
|
||||||
|
processed = 0
|
||||||
|
skipped = 0
|
||||||
|
|
||||||
|
for idx, row in df.iterrows():
|
||||||
|
mid = row["id"]
|
||||||
|
hid = row["home_team_id"]
|
||||||
|
aid = row["away_team_id"]
|
||||||
|
lid = row["league_id"]
|
||||||
|
mst = row["mst_utc"]
|
||||||
|
|
||||||
|
# Odds features
|
||||||
|
odds = odds_map.get(mid, {})
|
||||||
|
ms_h = odds.get("ms_h", 0.0)
|
||||||
|
ms_d = odds.get("ms_d", 0.0)
|
||||||
|
ms_a = odds.get("ms_a", 0.0)
|
||||||
|
|
||||||
|
# Skip matches without any odds (too noisy)
|
||||||
|
if ms_h <= 0 or ms_d <= 0 or ms_a <= 0:
|
||||||
|
skipped += 1
|
||||||
|
all_features.append(None)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Implied probs (vig-free)
|
||||||
|
raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
|
||||||
|
implied_home = (1/ms_h) / raw_sum
|
||||||
|
implied_draw = (1/ms_d) / raw_sum
|
||||||
|
implied_away = (1/ms_a) / raw_sum
|
||||||
|
|
||||||
|
ht_ms_h = odds.get("ht_ms_h", 0.0)
|
||||||
|
ht_ms_d = odds.get("ht_ms_d", 0.0)
|
||||||
|
ht_ms_a = odds.get("ht_ms_a", 0.0)
|
||||||
|
|
||||||
|
# HT implied probs
|
||||||
|
if ht_ms_h > 0 and ht_ms_d > 0 and ht_ms_a > 0:
|
||||||
|
ht_raw = 1/ht_ms_h + 1/ht_ms_d + 1/ht_ms_a
|
||||||
|
ht_implied_home = (1/ht_ms_h) / ht_raw
|
||||||
|
ht_implied_draw = (1/ht_ms_d) / ht_raw
|
||||||
|
ht_implied_away = (1/ht_ms_a) / ht_raw
|
||||||
|
else:
|
||||||
|
ht_implied_home = ht_implied_draw = ht_implied_away = 0.33
|
||||||
|
|
||||||
|
feat = {
|
||||||
|
# Odds features (core)
|
||||||
|
"odds_ms_h": ms_h,
|
||||||
|
"odds_ms_d": ms_d,
|
||||||
|
"odds_ms_a": ms_a,
|
||||||
|
"implied_home": implied_home,
|
||||||
|
"implied_draw": implied_draw,
|
||||||
|
"implied_away": implied_away,
|
||||||
|
"fav_gap": abs(implied_home - implied_away),
|
||||||
|
|
||||||
|
# HT odds
|
||||||
|
"ht_implied_home": ht_implied_home,
|
||||||
|
"ht_implied_draw": ht_implied_draw,
|
||||||
|
"ht_implied_away": ht_implied_away,
|
||||||
|
}
|
||||||
|
|
||||||
|
# HT/FT tendency features (NEW!)
|
||||||
|
try:
|
||||||
|
htft_feats = htft_engine.get_features(hid, aid, lid, mst)
|
||||||
|
feat.update(htft_feats)
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to neutral values
|
||||||
|
feat.update({
|
||||||
|
"htft_home_ht_scoring_rate": 0.5,
|
||||||
|
"htft_home_ht_concede_rate": 0.5,
|
||||||
|
"htft_home_ht_win_rate": 0.33,
|
||||||
|
"htft_home_comeback_rate": 0.0,
|
||||||
|
"htft_home_first_half_goal_pct": 0.5,
|
||||||
|
"htft_home_second_half_surge": 1.0,
|
||||||
|
"htft_away_ht_scoring_rate": 0.5,
|
||||||
|
"htft_away_ht_concede_rate": 0.5,
|
||||||
|
"htft_away_ht_win_rate": 0.33,
|
||||||
|
"htft_away_comeback_rate": 0.0,
|
||||||
|
"htft_away_first_half_goal_pct": 0.5,
|
||||||
|
"htft_away_second_half_surge": 1.0,
|
||||||
|
"htft_league_avg_ht_goals": 1.0,
|
||||||
|
"htft_league_reversal_rate": 0.05,
|
||||||
|
"htft_league_first_half_pct": 0.44,
|
||||||
|
"htft_home_sample_size": 0.0,
|
||||||
|
"htft_away_sample_size": 0.0,
|
||||||
|
})
|
||||||
|
|
||||||
|
all_features.append(feat)
|
||||||
|
processed += 1
|
||||||
|
|
||||||
|
if processed % 2000 == 0:
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
rate = processed / elapsed
|
||||||
|
remaining = (len(df) - processed - skipped) / rate if rate > 0 else 0
|
||||||
|
print(f" Processed: {processed:,} / {len(df):,} "
|
||||||
|
f"(skipped: {skipped:,}) "
|
||||||
|
f"[{elapsed:.0f}s elapsed, ~{remaining:.0f}s remaining]")
|
||||||
|
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
print(f" ✅ Features extracted: {processed:,} (skipped {skipped:,}) in {elapsed:.1f}s")
|
||||||
|
|
||||||
|
return all_features
|
||||||
|
|
||||||
|
|
||||||
|
def train_and_evaluate(X_train, y_train, X_test, y_test, feature_names, label=""):
|
||||||
|
"""Train XGBoost model and evaluate."""
|
||||||
|
model = xgb.XGBClassifier(
|
||||||
|
n_estimators=300,
|
||||||
|
max_depth=6,
|
||||||
|
learning_rate=0.05,
|
||||||
|
num_class=9,
|
||||||
|
objective="multi:softprob",
|
||||||
|
eval_metric="mlogloss",
|
||||||
|
subsample=0.8,
|
||||||
|
colsample_bytree=0.8,
|
||||||
|
min_child_weight=5,
|
||||||
|
random_state=42,
|
||||||
|
verbosity=0,
|
||||||
|
n_jobs=-1,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n🏋️ Training {label} model...")
|
||||||
|
model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
|
||||||
|
|
||||||
|
# Predictions
|
||||||
|
y_pred = model.predict(X_test)
|
||||||
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
|
||||||
|
print(f"\n📊 {label} Results:")
|
||||||
|
print(f" Overall Accuracy: {accuracy:.4f} ({accuracy*100:.1f}%)")
|
||||||
|
|
||||||
|
# Per-class accuracy
|
||||||
|
print(f"\n Per-class breakdown:")
|
||||||
|
rows = []
|
||||||
|
for i, label_name in enumerate(HTFT_LABELS):
|
||||||
|
mask = y_test == i
|
||||||
|
if mask.sum() > 0:
|
||||||
|
class_acc = accuracy_score(y_test[mask], y_pred[mask])
|
||||||
|
rows.append([label_name, mask.sum(), f"{class_acc*100:.1f}%"])
|
||||||
|
|
||||||
|
print(tabulate(rows, headers=["HT/FT", "Count", "Accuracy"], tablefmt="pretty"))
|
||||||
|
|
||||||
|
# Feature importance
|
||||||
|
importances = model.feature_importances_
|
||||||
|
feat_imp = sorted(zip(feature_names, importances), key=lambda x: x[1], reverse=True)
|
||||||
|
print(f"\n Top 15 Features:")
|
||||||
|
for fname, imp in feat_imp[:15]:
|
||||||
|
bar = "█" * int(imp * 100)
|
||||||
|
print(f" {fname:40s} {imp:.4f} {bar}")
|
||||||
|
|
||||||
|
return model, accuracy
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("🚀 HT/FT Model Training with New Tendency Features")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
conn = get_conn()
|
||||||
|
top_league_ids = load_top_leagues()
|
||||||
|
|
||||||
|
# Load matches
|
||||||
|
print("\n📊 Loading matches...")
|
||||||
|
df = load_matches_with_odds(conn, top_league_ids)
|
||||||
|
print(f" ✅ {len(df):,} matches loaded")
|
||||||
|
|
||||||
|
# Load odds
|
||||||
|
print("\n📊 Loading odds...")
|
||||||
|
match_ids = set(df["id"].tolist())
|
||||||
|
odds_map = load_odds_for_matches(conn, match_ids)
|
||||||
|
print(f" ✅ Odds loaded for {len(odds_map):,} matches")
|
||||||
|
|
||||||
|
# Compute labels
|
||||||
|
print("\n📊 Computing HT/FT labels...")
|
||||||
|
df["label"] = compute_labels(df)
|
||||||
|
label_dist = df["label"].value_counts().sort_index()
|
||||||
|
for i, label in enumerate(HTFT_LABELS):
|
||||||
|
c = label_dist.get(i, 0)
|
||||||
|
print(f" {label}: {c:,} ({c/len(df)*100:.1f}%)")
|
||||||
|
|
||||||
|
# Initialize HT/FT tendency engine
|
||||||
|
htft_engine = HtftTendencyEngine()
|
||||||
|
|
||||||
|
# Extract features
|
||||||
|
all_features = extract_features(df, conn, odds_map, htft_engine)
|
||||||
|
|
||||||
|
# Filter: keep only matches with features
|
||||||
|
valid_mask = [f is not None for f in all_features]
|
||||||
|
df_valid = df[valid_mask].reset_index(drop=True)
|
||||||
|
features_valid = [f for f in all_features if f is not None]
|
||||||
|
|
||||||
|
print(f"\n📊 Valid matches with features: {len(df_valid):,}")
|
||||||
|
|
||||||
|
# Convert to arrays
|
||||||
|
feature_names = list(features_valid[0].keys())
|
||||||
|
X = np.array([[f[k] for k in feature_names] for f in features_valid], dtype=np.float32)
|
||||||
|
y = np.array(df_valid["label"].tolist(), dtype=np.int32)
|
||||||
|
|
||||||
|
# Split: time-based (last 20% as test)
|
||||||
|
split_idx = int(len(X) * 0.8)
|
||||||
|
X_train, X_test = X[:split_idx], X[split_idx:]
|
||||||
|
y_train, y_test = y[:split_idx], y[split_idx:]
|
||||||
|
print(f" Train: {len(X_train):,}, Test: {len(X_test):,}")
|
||||||
|
|
||||||
|
# ─── Train WITH new features ─────────────────────────────────────────
|
||||||
|
model_new, acc_new = train_and_evaluate(
|
||||||
|
X_train, y_train, X_test, y_test, feature_names,
|
||||||
|
label="NEW (with HT/FT tendencies)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ─── Train WITHOUT new features (baseline) ──────────────────────────
|
||||||
|
# Remove htft_ features for comparison
|
||||||
|
baseline_cols = [i for i, n in enumerate(feature_names) if not n.startswith("htft_")]
|
||||||
|
baseline_names = [feature_names[i] for i in baseline_cols]
|
||||||
|
X_train_base = X_train[:, baseline_cols]
|
||||||
|
X_test_base = X_test[:, baseline_cols]
|
||||||
|
|
||||||
|
model_base, acc_base = train_and_evaluate(
|
||||||
|
X_train_base, y_train, X_test_base, y_test, baseline_names,
|
||||||
|
label="BASELINE (without HT/FT tendencies)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ─── Comparison ──────────────────────────────────────────────────────
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("📈 COMPARISON")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f" Baseline accuracy: {acc_base*100:.2f}%")
|
||||||
|
print(f" New accuracy: {acc_new*100:.2f}%")
|
||||||
|
delta = (acc_new - acc_base) * 100
|
||||||
|
direction = "📈 IMPROVEMENT" if delta > 0 else "📉 REGRESSION"
|
||||||
|
print(f" Delta: {delta:+.2f}% {direction}")
|
||||||
|
|
||||||
|
# Save new model
|
||||||
|
model_path = os.path.join(AI_ENGINE_DIR, "models", "xgboost", "xgb_ht_ft_v2.pkl")
|
||||||
|
with open(model_path, "wb") as f:
|
||||||
|
pickle.dump(model_new, f)
|
||||||
|
print(f"\n💾 New model saved: {model_path}")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print("\n✅ Done!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Executable
+183
@@ -0,0 +1,183 @@
|
|||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import xgboost as xgb
|
||||||
|
import pickle
|
||||||
|
import os
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import mean_absolute_error, r2_score
|
||||||
|
|
||||||
|
# Paths
|
||||||
|
DATA_PATH = os.path.join(os.path.dirname(__file__), "../data/training_data.csv")
|
||||||
|
MODEL_PATH = os.path.join(os.path.dirname(__file__), "../models/xgb_score.pkl")
|
||||||
|
|
||||||
|
# Import unified 56-feature array from markets trainer
|
||||||
|
from train_xgboost_markets import FEATURES
|
||||||
|
|
||||||
|
TARGETS = ["score_home", "score_away", "ht_score_home", "ht_score_away"]
|
||||||
|
|
||||||
|
def train():
|
||||||
|
print("🚀 Training Score Prediction Model (XGBoost) - Full Time & Half Time")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
if not os.path.exists(DATA_PATH):
|
||||||
|
print(f"❌ Data file not found: {DATA_PATH}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"📦 Loading data from {DATA_PATH}...")
|
||||||
|
df = pd.read_csv(DATA_PATH)
|
||||||
|
|
||||||
|
# Preprocessing
|
||||||
|
# Drop rows where target is missing (should verify)
|
||||||
|
df = df.dropna(subset=TARGETS)
|
||||||
|
|
||||||
|
# Fill feature NaNs with median/mean or 0
|
||||||
|
print(f" Original rows: {len(df)}")
|
||||||
|
|
||||||
|
# Filter valid odds (at least ms_h > 1.0)
|
||||||
|
df = df[df["odds_ms_h"] > 1.0].copy()
|
||||||
|
print(f" Rows with valid odds: {len(df)}")
|
||||||
|
|
||||||
|
X = df[FEATURES]
|
||||||
|
y_home = df["score_home"]
|
||||||
|
y_away = df["score_away"]
|
||||||
|
y_ht_home = df["ht_score_home"]
|
||||||
|
y_ht_away = df["ht_score_away"]
|
||||||
|
|
||||||
|
# Train/Test Split
|
||||||
|
X_train, X_test, y_h_train, y_h_test, y_a_train, y_a_test, y_ht_h_train, y_ht_h_test, y_ht_a_train, y_ht_a_test = train_test_split(
|
||||||
|
X, y_home, y_away, y_ht_home, y_ht_away, test_size=0.2, random_state=42
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f" Training set: {len(X_train)} matches")
|
||||||
|
print(f" Test set: {len(X_test)} matches")
|
||||||
|
|
||||||
|
# --- HOME GOALS MODEL ---
|
||||||
|
print("\n🏠 Training Home Goals Model...")
|
||||||
|
xgb_home = xgb.XGBRegressor(
|
||||||
|
objective='reg:squarederror',
|
||||||
|
n_estimators=1000,
|
||||||
|
learning_rate=0.01,
|
||||||
|
max_depth=5,
|
||||||
|
subsample=0.7,
|
||||||
|
colsample_bytree=0.7,
|
||||||
|
n_jobs=-1,
|
||||||
|
random_state=42,
|
||||||
|
early_stopping_rounds=50 # Configure here for newer XGBoost or remove if not supported in constructor (depends on version)
|
||||||
|
)
|
||||||
|
# Actually, to be safe across versions, let's remove early stopping for now or use validation set properly
|
||||||
|
# Using 'eval_set' without early_stopping_rounds just prints metrics
|
||||||
|
xgb_home = xgb.XGBRegressor(
|
||||||
|
objective='reg:squarederror',
|
||||||
|
n_estimators=1000,
|
||||||
|
learning_rate=0.01,
|
||||||
|
max_depth=5,
|
||||||
|
subsample=0.7,
|
||||||
|
colsample_bytree=0.7,
|
||||||
|
n_jobs=-1,
|
||||||
|
random_state=42
|
||||||
|
)
|
||||||
|
xgb_home.fit(X_train, y_h_train, eval_set=[(X_test, y_h_test)], verbose=False)
|
||||||
|
|
||||||
|
home_preds = xgb_home.predict(X_test)
|
||||||
|
mae_home = mean_absolute_error(y_h_test, home_preds)
|
||||||
|
r2_home = r2_score(y_h_test, home_preds)
|
||||||
|
print(f" ✅ FT Home MAE: {mae_home:.4f} goals")
|
||||||
|
print(f" ✅ FT Home R2: {r2_home:.4f}")
|
||||||
|
|
||||||
|
# --- AWAY GOALS MODEL ---
|
||||||
|
print("\n✈️ Training FT Away Goals Model...")
|
||||||
|
xgb_away = xgb.XGBRegressor(
|
||||||
|
objective='reg:squarederror',
|
||||||
|
n_estimators=1000,
|
||||||
|
learning_rate=0.01,
|
||||||
|
max_depth=5,
|
||||||
|
subsample=0.7,
|
||||||
|
colsample_bytree=0.7,
|
||||||
|
n_jobs=-1,
|
||||||
|
random_state=42
|
||||||
|
)
|
||||||
|
xgb_away.fit(X_train, y_a_train, eval_set=[(X_test, y_a_test)], verbose=False)
|
||||||
|
|
||||||
|
away_preds = xgb_away.predict(X_test)
|
||||||
|
mae_away = mean_absolute_error(y_a_test, away_preds)
|
||||||
|
r2_away = r2_score(y_a_test, away_preds)
|
||||||
|
print(f" ✅ FT Away MAE: {mae_away:.4f} goals")
|
||||||
|
print(f" ✅ FT Away R2: {r2_away:.4f}")
|
||||||
|
|
||||||
|
# --- HT HOME GOALS MODEL ---
|
||||||
|
print("\n🏠 Training HT Home Goals Model...")
|
||||||
|
xgb_ht_home = xgb.XGBRegressor(
|
||||||
|
objective='reg:squarederror',
|
||||||
|
n_estimators=1000,
|
||||||
|
learning_rate=0.01,
|
||||||
|
max_depth=5,
|
||||||
|
subsample=0.7,
|
||||||
|
colsample_bytree=0.7,
|
||||||
|
n_jobs=-1,
|
||||||
|
random_state=42
|
||||||
|
)
|
||||||
|
xgb_ht_home.fit(X_train, y_ht_h_train, eval_set=[(X_test, y_ht_h_test)], verbose=False)
|
||||||
|
|
||||||
|
ht_home_preds = xgb_ht_home.predict(X_test)
|
||||||
|
mae_ht_home = mean_absolute_error(y_ht_h_test, ht_home_preds)
|
||||||
|
print(f" ✅ HT Home MAE: {mae_ht_home:.4f} goals")
|
||||||
|
|
||||||
|
# --- HT AWAY GOALS MODEL ---
|
||||||
|
print("\n✈️ Training HT Away Goals Model...")
|
||||||
|
xgb_ht_away = xgb.XGBRegressor(
|
||||||
|
objective='reg:squarederror',
|
||||||
|
n_estimators=1000,
|
||||||
|
learning_rate=0.01,
|
||||||
|
max_depth=5,
|
||||||
|
subsample=0.7,
|
||||||
|
colsample_bytree=0.7,
|
||||||
|
n_jobs=-1,
|
||||||
|
random_state=42
|
||||||
|
)
|
||||||
|
xgb_ht_away.fit(X_train, y_ht_a_train, eval_set=[(X_test, y_ht_a_test)], verbose=False)
|
||||||
|
|
||||||
|
ht_away_preds = xgb_ht_away.predict(X_test)
|
||||||
|
mae_ht_away = mean_absolute_error(y_ht_a_test, ht_away_preds)
|
||||||
|
print(f" ✅ HT Away MAE: {mae_ht_away:.4f} goals")
|
||||||
|
|
||||||
|
# --- EVALUATE EXACT SCORE ACCURACY (ROUNDED) ---
|
||||||
|
print("\n🎯 Exact FT Score Accuracy (Test Set):")
|
||||||
|
correct = 0
|
||||||
|
close = 0 # Within 1 goal diff for both
|
||||||
|
|
||||||
|
for h_true, a_true, h_pred, a_pred in zip(y_h_test, y_a_test, home_preds, away_preds):
|
||||||
|
h_p = round(h_pred)
|
||||||
|
a_p = round(a_pred)
|
||||||
|
if h_p == h_true and a_p == a_true:
|
||||||
|
correct += 1
|
||||||
|
if abs(h_p - h_true) <= 1 and abs(a_p - a_true) <= 1:
|
||||||
|
close += 1
|
||||||
|
|
||||||
|
acc = correct / len(X_test) * 100
|
||||||
|
close_acc = close / len(X_test) * 100
|
||||||
|
print(f" Exact Match: {acc:.2f}%")
|
||||||
|
print(f" Close Match (+/- 1 goal): {close_acc:.2f}%")
|
||||||
|
|
||||||
|
# Save
|
||||||
|
print(f"\n💾 Saving models to {MODEL_PATH}...")
|
||||||
|
model_data = {
|
||||||
|
"home_model": xgb_home,
|
||||||
|
"away_model": xgb_away,
|
||||||
|
"ht_home_model": xgb_ht_home,
|
||||||
|
"ht_away_model": xgb_ht_away,
|
||||||
|
"features": FEATURES,
|
||||||
|
"meta": {
|
||||||
|
"mae_home": mae_home,
|
||||||
|
"mae_away": mae_away,
|
||||||
|
"mae_ht_home": mae_ht_home,
|
||||||
|
"mae_ht_away": mae_ht_away,
|
||||||
|
"acc": acc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
with open(MODEL_PATH, "wb") as f:
|
||||||
|
pickle.dump(model_data, f)
|
||||||
|
|
||||||
|
print("✅ Done.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
train()
|
||||||
@@ -0,0 +1,451 @@
|
|||||||
|
"""
|
||||||
|
V25 Model Trainer - NO TARGET LEAKAGE
|
||||||
|
=====================================
|
||||||
|
Training script for V25 ensemble model.
|
||||||
|
|
||||||
|
CRITICAL: This version removes total_goals and ht_total_goals features
|
||||||
|
to prevent target leakage. These features are only known AFTER the match ends.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/train_v25_clean.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import xgboost as xgb
|
||||||
|
import lightgbm as lgb
|
||||||
|
from datetime import datetime
|
||||||
|
from sklearn.metrics import accuracy_score, log_loss, classification_report
|
||||||
|
|
||||||
|
# Add parent directory to path
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
# Config
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
|
||||||
|
MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "v25")
|
||||||
|
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports", "training_v25")
|
||||||
|
|
||||||
|
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||||
|
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# Feature Columns - NO TARGET LEAKAGE
|
||||||
|
# These features are available BEFORE the match starts
|
||||||
|
FEATURES = [
|
||||||
|
# ELO Features (8)
|
||||||
|
"home_overall_elo", "away_overall_elo", "elo_diff",
|
||||||
|
"home_home_elo", "away_away_elo",
|
||||||
|
"home_form_elo", "away_form_elo", "form_elo_diff",
|
||||||
|
|
||||||
|
# Form Features (12)
|
||||||
|
"home_goals_avg", "home_conceded_avg",
|
||||||
|
"away_goals_avg", "away_conceded_avg",
|
||||||
|
"home_clean_sheet_rate", "away_clean_sheet_rate",
|
||||||
|
"home_scoring_rate", "away_scoring_rate",
|
||||||
|
"home_winning_streak", "away_winning_streak",
|
||||||
|
"home_unbeaten_streak", "away_unbeaten_streak",
|
||||||
|
|
||||||
|
# H2H Features (6)
|
||||||
|
"h2h_total_matches", "h2h_home_win_rate", "h2h_draw_rate",
|
||||||
|
"h2h_avg_goals", "h2h_btts_rate", "h2h_over25_rate",
|
||||||
|
|
||||||
|
# Team Stats Features (8)
|
||||||
|
"home_avg_possession", "away_avg_possession",
|
||||||
|
"home_avg_shots_on_target", "away_avg_shots_on_target",
|
||||||
|
"home_shot_conversion", "away_shot_conversion",
|
||||||
|
"home_avg_corners", "away_avg_corners",
|
||||||
|
|
||||||
|
# Odds Features (24) - Market wisdom
|
||||||
|
"odds_ms_h", "odds_ms_d", "odds_ms_a",
|
||||||
|
"implied_home", "implied_draw", "implied_away",
|
||||||
|
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
|
||||||
|
"odds_ou05_o", "odds_ou05_u",
|
||||||
|
"odds_ou15_o", "odds_ou15_u",
|
||||||
|
"odds_ou25_o", "odds_ou25_u",
|
||||||
|
"odds_ou35_o", "odds_ou35_u",
|
||||||
|
"odds_ht_ou05_o", "odds_ht_ou05_u",
|
||||||
|
"odds_ht_ou15_o", "odds_ht_ou15_u",
|
||||||
|
"odds_btts_y", "odds_btts_n",
|
||||||
|
"odds_ms_h_present", "odds_ms_d_present", "odds_ms_a_present",
|
||||||
|
"odds_ht_ms_h_present", "odds_ht_ms_d_present", "odds_ht_ms_a_present",
|
||||||
|
"odds_ou05_o_present", "odds_ou05_u_present",
|
||||||
|
"odds_ou15_o_present", "odds_ou15_u_present",
|
||||||
|
"odds_ou25_o_present", "odds_ou25_u_present",
|
||||||
|
"odds_ou35_o_present", "odds_ou35_u_present",
|
||||||
|
"odds_ht_ou05_o_present", "odds_ht_ou05_u_present",
|
||||||
|
"odds_ht_ou15_o_present", "odds_ht_ou15_u_present",
|
||||||
|
"odds_btts_y_present", "odds_btts_n_present",
|
||||||
|
|
||||||
|
# League Features (4)
|
||||||
|
"home_xga", "away_xga",
|
||||||
|
"league_avg_goals", "league_zero_goal_rate",
|
||||||
|
|
||||||
|
# Upset Engine (4)
|
||||||
|
"upset_atmosphere", "upset_motivation", "upset_fatigue", "upset_potential",
|
||||||
|
|
||||||
|
# Referee Engine (5)
|
||||||
|
"referee_home_bias", "referee_avg_goals", "referee_cards_total",
|
||||||
|
"referee_avg_yellow", "referee_experience",
|
||||||
|
|
||||||
|
# Momentum Engine (3)
|
||||||
|
"home_momentum_score", "away_momentum_score", "momentum_diff",
|
||||||
|
|
||||||
|
# Squad Features (9)
|
||||||
|
"home_squad_quality", "away_squad_quality", "squad_diff",
|
||||||
|
"home_key_players", "away_key_players",
|
||||||
|
"home_missing_impact", "away_missing_impact",
|
||||||
|
"home_goals_form", "away_goals_form",
|
||||||
|
]
|
||||||
|
|
||||||
|
# REMOVED: total_goals, ht_total_goals (TARGET LEAKAGE!)
|
||||||
|
# These are only known AFTER the match ends
|
||||||
|
|
||||||
|
print(f"[INFO] Total features: {len(FEATURES)}")
|
||||||
|
|
||||||
|
MARKET_CONFIGS = [
|
||||||
|
{"target": "label_ms", "name": "MS", "num_class": 3},
|
||||||
|
{"target": "label_ou15", "name": "OU15", "num_class": 2},
|
||||||
|
{"target": "label_ou25", "name": "OU25", "num_class": 2},
|
||||||
|
{"target": "label_ou35", "name": "OU35", "num_class": 2},
|
||||||
|
{"target": "label_btts", "name": "BTTS", "num_class": 2},
|
||||||
|
{"target": "label_ht_result", "name": "HT_RESULT", "num_class": 3},
|
||||||
|
{"target": "label_ht_ou05", "name": "HT_OU05", "num_class": 2},
|
||||||
|
{"target": "label_ht_ou15", "name": "HT_OU15", "num_class": 2},
|
||||||
|
{"target": "label_ht_ft", "name": "HTFT", "num_class": 9},
|
||||||
|
{"target": "label_odd_even", "name": "ODD_EVEN", "num_class": 2},
|
||||||
|
{"target": "label_cards_ou45", "name": "CARDS_OU45", "num_class": 2},
|
||||||
|
{"target": "label_handicap_ms", "name": "HANDICAP_MS", "num_class": 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def load_data():
|
||||||
|
"""Load training data from CSV."""
|
||||||
|
if not os.path.exists(DATA_PATH):
|
||||||
|
print(f"[ERROR] Data file not found: {DATA_PATH}")
|
||||||
|
print("[INFO] Run extract_training_data.py first to generate training data")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"[INFO] Loading data from {DATA_PATH}...")
|
||||||
|
df = pd.read_csv(DATA_PATH)
|
||||||
|
|
||||||
|
# Fill NaN values
|
||||||
|
for col in FEATURES:
|
||||||
|
if col in df.columns:
|
||||||
|
df[col] = df[col].fillna(0)
|
||||||
|
|
||||||
|
# Backward-compatible derivation for older CSVs without odds availability flags.
|
||||||
|
odds_flag_sources = {
|
||||||
|
"odds_ms_h_present": "odds_ms_h",
|
||||||
|
"odds_ms_d_present": "odds_ms_d",
|
||||||
|
"odds_ms_a_present": "odds_ms_a",
|
||||||
|
"odds_ht_ms_h_present": "odds_ht_ms_h",
|
||||||
|
"odds_ht_ms_d_present": "odds_ht_ms_d",
|
||||||
|
"odds_ht_ms_a_present": "odds_ht_ms_a",
|
||||||
|
"odds_ou05_o_present": "odds_ou05_o",
|
||||||
|
"odds_ou05_u_present": "odds_ou05_u",
|
||||||
|
"odds_ou15_o_present": "odds_ou15_o",
|
||||||
|
"odds_ou15_u_present": "odds_ou15_u",
|
||||||
|
"odds_ou25_o_present": "odds_ou25_o",
|
||||||
|
"odds_ou25_u_present": "odds_ou25_u",
|
||||||
|
"odds_ou35_o_present": "odds_ou35_o",
|
||||||
|
"odds_ou35_u_present": "odds_ou35_u",
|
||||||
|
"odds_ht_ou05_o_present": "odds_ht_ou05_o",
|
||||||
|
"odds_ht_ou05_u_present": "odds_ht_ou05_u",
|
||||||
|
"odds_ht_ou15_o_present": "odds_ht_ou15_o",
|
||||||
|
"odds_ht_ou15_u_present": "odds_ht_ou15_u",
|
||||||
|
"odds_btts_y_present": "odds_btts_y",
|
||||||
|
"odds_btts_n_present": "odds_btts_n",
|
||||||
|
}
|
||||||
|
for flag_col, odds_col in odds_flag_sources.items():
|
||||||
|
if flag_col not in df.columns:
|
||||||
|
df[flag_col] = (
|
||||||
|
pd.to_numeric(df.get(odds_col, 0), errors="coerce").fillna(0) > 1.01
|
||||||
|
).astype(float)
|
||||||
|
|
||||||
|
print(f"[INFO] Shape: {df.shape}")
|
||||||
|
print(f"[INFO] Columns: {list(df.columns)}")
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def temporal_split(valid_df: pd.DataFrame):
|
||||||
|
"""Chronological train/val/test split."""
|
||||||
|
ordered = valid_df.sort_values("mst_utc").reset_index(drop=True)
|
||||||
|
n = len(ordered)
|
||||||
|
train_end = max(int(n * 0.70), 1)
|
||||||
|
val_end = max(int(n * 0.85), train_end + 1)
|
||||||
|
val_end = min(val_end, n - 1)
|
||||||
|
|
||||||
|
train_df = ordered.iloc[:train_end].copy()
|
||||||
|
val_df = ordered.iloc[train_end:val_end].copy()
|
||||||
|
test_df = ordered.iloc[val_end:].copy()
|
||||||
|
|
||||||
|
return train_df, val_df, test_df
|
||||||
|
|
||||||
|
|
||||||
|
def train_xgboost_model(X_train, y_train, X_val, y_val, num_class=3, market_name="MS"):
|
||||||
|
"""Train XGBoost model with early stopping."""
|
||||||
|
|
||||||
|
print(f"\n[INFO] Training XGBoost for {market_name}...")
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"objective": "multi:softprob" if num_class > 2 else "binary:logistic",
|
||||||
|
"eval_metric": "mlogloss" if num_class > 2 else "logloss",
|
||||||
|
"max_depth": 6,
|
||||||
|
"eta": 0.05,
|
||||||
|
"subsample": 0.8,
|
||||||
|
"colsample_bytree": 0.8,
|
||||||
|
"min_child_weight": 3,
|
||||||
|
"gamma": 0.1,
|
||||||
|
"n_jobs": 4,
|
||||||
|
"random_state": 42,
|
||||||
|
}
|
||||||
|
|
||||||
|
if num_class > 2:
|
||||||
|
params["num_class"] = num_class
|
||||||
|
|
||||||
|
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||||
|
dval = xgb.DMatrix(X_val, label=y_val)
|
||||||
|
|
||||||
|
evals_result = {}
|
||||||
|
model = xgb.train(
|
||||||
|
params,
|
||||||
|
dtrain,
|
||||||
|
num_boost_round=1000,
|
||||||
|
evals=[(dtrain, 'train'), (dval, 'val')],
|
||||||
|
early_stopping_rounds=50,
|
||||||
|
evals_result=evals_result,
|
||||||
|
verbose_eval=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"[OK] Best iteration: {model.best_iteration}")
|
||||||
|
print(f"[OK] Best score: {model.best_score:.4f}")
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def train_lightgbm_model(X_train, y_train, X_val, y_val, num_class=3, market_name="MS"):
|
||||||
|
"""Train LightGBM model with early stopping."""
|
||||||
|
|
||||||
|
print(f"\n[INFO] Training LightGBM for {market_name}...")
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"objective": "multiclass" if num_class > 2 else "binary",
|
||||||
|
"metric": "multi_logloss" if num_class > 2 else "binary_logloss",
|
||||||
|
"max_depth": 6,
|
||||||
|
"learning_rate": 0.05,
|
||||||
|
"feature_fraction": 0.8,
|
||||||
|
"bagging_fraction": 0.8,
|
||||||
|
"bagging_freq": 5,
|
||||||
|
"min_child_samples": 20,
|
||||||
|
"n_jobs": 4,
|
||||||
|
"random_state": 42,
|
||||||
|
"verbose": -1,
|
||||||
|
}
|
||||||
|
|
||||||
|
if num_class > 2:
|
||||||
|
params["num_class"] = num_class
|
||||||
|
|
||||||
|
train_data = lgb.Dataset(X_train, label=y_train)
|
||||||
|
val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)
|
||||||
|
|
||||||
|
model = lgb.train(
|
||||||
|
params,
|
||||||
|
train_data,
|
||||||
|
num_boost_round=1000,
|
||||||
|
valid_sets=[train_data, val_data],
|
||||||
|
valid_names=['train', 'val'],
|
||||||
|
callbacks=[
|
||||||
|
lgb.early_stopping(stopping_rounds=50),
|
||||||
|
lgb.log_evaluation(period=100),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"[OK] Best iteration: {model.best_iteration}")
|
||||||
|
print(f"[OK] Best score: {model.best_score['val'][params['metric']]:.4f}")
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_model(model, X_test, y_test, model_type='xgb', num_class=3):
|
||||||
|
"""Evaluate model on test set."""
|
||||||
|
|
||||||
|
if model_type == 'xgb':
|
||||||
|
dtest = xgb.DMatrix(X_test)
|
||||||
|
probs = model.predict(dtest)
|
||||||
|
else: # lgb
|
||||||
|
probs = model.predict(X_test, num_iteration=model.best_iteration)
|
||||||
|
|
||||||
|
if len(probs.shape) == 1:
|
||||||
|
# Binary classification
|
||||||
|
probs = np.column_stack([1 - probs, probs])
|
||||||
|
|
||||||
|
preds = np.argmax(probs, axis=1)
|
||||||
|
|
||||||
|
acc = accuracy_score(y_test, preds)
|
||||||
|
loss = log_loss(y_test, probs)
|
||||||
|
|
||||||
|
print(f"\n[RESULTS] Test Results:")
|
||||||
|
print(f" Accuracy: {acc:.4f}")
|
||||||
|
print(f" Log Loss: {loss:.4f}")
|
||||||
|
|
||||||
|
# Per-class metrics
|
||||||
|
print("\n[REPORT] Classification Report:")
|
||||||
|
print(classification_report(y_test, preds))
|
||||||
|
|
||||||
|
return probs, acc, loss
|
||||||
|
|
||||||
|
|
||||||
|
def train_market(df, target_col, market_name, num_class=3):
|
||||||
|
"""Train models for a specific market."""
|
||||||
|
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"[MARKET] Training {market_name}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
|
||||||
|
# Filter valid rows
|
||||||
|
valid_df = df[df[target_col].notna()].copy()
|
||||||
|
valid_df = valid_df[valid_df[target_col].astype(str) != ""].copy()
|
||||||
|
print(f"[INFO] Valid samples: {len(valid_df)}")
|
||||||
|
|
||||||
|
if len(valid_df) < 100:
|
||||||
|
print(f"[ERROR] Not enough data for {market_name}")
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Prepare features
|
||||||
|
available_features = [f for f in FEATURES if f in valid_df.columns]
|
||||||
|
print(f"[INFO] Available features: {len(available_features)}/{len(FEATURES)}")
|
||||||
|
|
||||||
|
train_df, val_df, test_df = temporal_split(valid_df)
|
||||||
|
X_train = train_df[available_features].values
|
||||||
|
X_val = val_df[available_features].values
|
||||||
|
X_test = test_df[available_features].values
|
||||||
|
y_train = train_df[target_col].astype(int).values
|
||||||
|
y_val = val_df[target_col].astype(int).values
|
||||||
|
y_test = test_df[target_col].astype(int).values
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"[INFO] Temporal split -> Train: {len(X_train)},"
|
||||||
|
f" Val: {len(X_val)}, Test: {len(X_test)}"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"[INFO] Time windows -> train_end={int(train_df['mst_utc'].max())},"
|
||||||
|
f" val_end={int(val_df['mst_utc'].max())},"
|
||||||
|
f" test_end={int(test_df['mst_utc'].max())}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Train XGBoost
|
||||||
|
xgb_model = train_xgboost_model(X_train, y_train, X_val, y_val, num_class, market_name)
|
||||||
|
|
||||||
|
# Train LightGBM
|
||||||
|
lgb_model = train_lightgbm_model(X_train, y_train, X_val, y_val, num_class, market_name)
|
||||||
|
|
||||||
|
# Evaluate
|
||||||
|
print("\n[INFO] XGBoost Evaluation:")
|
||||||
|
xgb_probs, xgb_acc, xgb_loss = evaluate_model(xgb_model, X_test, y_test, 'xgb', num_class)
|
||||||
|
|
||||||
|
print("\n[INFO] LightGBM Evaluation:")
|
||||||
|
lgb_probs, lgb_acc, lgb_loss = evaluate_model(lgb_model, X_test, y_test, 'lgb', num_class)
|
||||||
|
|
||||||
|
# Ensemble evaluation
|
||||||
|
ensemble_probs = (xgb_probs + lgb_probs) / 2
|
||||||
|
ensemble_preds = np.argmax(ensemble_probs, axis=1)
|
||||||
|
ensemble_acc = accuracy_score(y_test, ensemble_preds)
|
||||||
|
ensemble_loss = log_loss(y_test, ensemble_probs)
|
||||||
|
|
||||||
|
print(f"\n[INFO] Ensemble Evaluation:")
|
||||||
|
print(f" Accuracy: {ensemble_acc:.4f}")
|
||||||
|
print(f" Log Loss: {ensemble_loss:.4f}")
|
||||||
|
|
||||||
|
# Save models
|
||||||
|
xgb_path = os.path.join(MODELS_DIR, f"xgb_v25_{market_name.lower()}.json")
|
||||||
|
xgb_model.save_model(xgb_path)
|
||||||
|
print(f"[OK] XGBoost saved: {xgb_path}")
|
||||||
|
|
||||||
|
lgb_path = os.path.join(MODELS_DIR, f"lgb_v25_{market_name.lower()}.txt")
|
||||||
|
lgb_model.save_model(lgb_path)
|
||||||
|
print(f"[OK] LightGBM saved: {lgb_path}")
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
"samples": int(len(valid_df)),
|
||||||
|
"features_used": available_features,
|
||||||
|
"train_samples": int(len(X_train)),
|
||||||
|
"val_samples": int(len(X_val)),
|
||||||
|
"test_samples": int(len(X_test)),
|
||||||
|
"xgb_accuracy": round(float(xgb_acc), 4),
|
||||||
|
"xgb_logloss": round(float(xgb_loss), 4),
|
||||||
|
"lgb_accuracy": round(float(lgb_acc), 4),
|
||||||
|
"lgb_logloss": round(float(lgb_loss), 4),
|
||||||
|
"ensemble_accuracy": round(float(ensemble_acc), 4),
|
||||||
|
"ensemble_logloss": round(float(ensemble_loss), 4),
|
||||||
|
"class_count": int(num_class),
|
||||||
|
}
|
||||||
|
|
||||||
|
return xgb_model, lgb_model, metrics
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main training pipeline."""
|
||||||
|
|
||||||
|
print("="*60)
|
||||||
|
print("V25 Model Training - NO TARGET LEAKAGE")
|
||||||
|
print("="*60)
|
||||||
|
print(f"[INFO] Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
df = load_data()
|
||||||
|
|
||||||
|
target_cols = [col for col in df.columns if col.startswith('label_')]
|
||||||
|
print(f"\n[INFO] Available targets: {target_cols}")
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
reports = {
|
||||||
|
"trained_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
"market_results": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
for config in MARKET_CONFIGS:
|
||||||
|
target = config["target"]
|
||||||
|
market_name = config["name"]
|
||||||
|
num_class = config["num_class"]
|
||||||
|
|
||||||
|
if target not in df.columns:
|
||||||
|
print(f"[SKIP] {market_name}: missing target column {target}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
xgb_model, lgb_model, metrics = train_market(
|
||||||
|
df, target, market_name, num_class=num_class
|
||||||
|
)
|
||||||
|
results[market_name] = {
|
||||||
|
'xgb': xgb_model is not None,
|
||||||
|
'lgb': lgb_model is not None,
|
||||||
|
}
|
||||||
|
reports["market_results"][market_name] = metrics
|
||||||
|
|
||||||
|
# Save feature list
|
||||||
|
feature_path = os.path.join(MODELS_DIR, "feature_cols.json")
|
||||||
|
with open(feature_path, 'w') as f:
|
||||||
|
json.dump(FEATURES, f, indent=2)
|
||||||
|
print(f"\n[OK] Feature list saved: {feature_path}")
|
||||||
|
|
||||||
|
report_path = os.path.join(REPORTS_DIR, "v25_market_metrics.json")
|
||||||
|
with open(report_path, "w") as f:
|
||||||
|
json.dump(reports, f, indent=2)
|
||||||
|
print(f"[OK] Metrics report saved: {report_path}")
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
print("\n" + "="*60)
|
||||||
|
print("[SUMMARY] Training Results")
|
||||||
|
print("="*60)
|
||||||
|
for market, status in results.items():
|
||||||
|
print(f" {market}: XGB={status['xgb']}, LGB={status['lgb']}")
|
||||||
|
|
||||||
|
print(f"\n[INFO] Completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
print("[OK] V25 Training Complete!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,58 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
|
||||||
|
DATA_DIR = AI_ENGINE_DIR / "data" / "v26_shadow"
|
||||||
|
CONFIG_PATH = AI_ENGINE_DIR / "models" / "v26_shadow" / "market_profiles.json"
|
||||||
|
REPORT_PATH = AI_ENGINE_DIR / "reports" / "training_v26_shadow.json"
|
||||||
|
REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _market_accuracy(frame: pd.DataFrame, target_col: str) -> float:
|
||||||
|
if target_col not in frame.columns or frame.empty:
|
||||||
|
return 0.0
|
||||||
|
counts = frame[target_col].value_counts(normalize=True)
|
||||||
|
if counts.empty:
|
||||||
|
return 0.0
|
||||||
|
return round(float(counts.max()), 4)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
train_csv = DATA_DIR / "train.csv"
|
||||||
|
validation_csv = DATA_DIR / "validation.csv"
|
||||||
|
if not train_csv.exists() or not validation_csv.exists():
|
||||||
|
raise SystemExit("Run extract_training_data_v26.py first")
|
||||||
|
|
||||||
|
train_df = pd.read_csv(train_csv)
|
||||||
|
validation_df = pd.read_csv(validation_csv)
|
||||||
|
config = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
|
||||||
|
report = {
|
||||||
|
"version": config.get("version"),
|
||||||
|
"calibration_version": config.get("calibration_version"),
|
||||||
|
"train_rows": int(len(train_df)),
|
||||||
|
"validation_rows": int(len(validation_df)),
|
||||||
|
"label_priors": {
|
||||||
|
"MS": _market_accuracy(validation_df, "label_ms"),
|
||||||
|
"OU25": _market_accuracy(validation_df, "label_ou25"),
|
||||||
|
"BTTS": _market_accuracy(validation_df, "label_btts"),
|
||||||
|
"HT": _market_accuracy(validation_df, "label_ht_result"),
|
||||||
|
"HTFT": _market_accuracy(validation_df, "label_ht_ft"),
|
||||||
|
"CARDS": _market_accuracy(validation_df, "label_cards_ou45"),
|
||||||
|
},
|
||||||
|
"artifact_path": str(CONFIG_PATH),
|
||||||
|
"notes": [
|
||||||
|
"v26.shadow runtime currently uses artifact-based calibration and ROI gating",
|
||||||
|
"market profile JSON remains the source of truth for runtime thresholds",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
REPORT_PATH.write_text(json.dumps(report, indent=2), encoding="utf-8")
|
||||||
|
print(f"[OK] Shadow training report written to {REPORT_PATH}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,480 @@
|
|||||||
|
"""
|
||||||
|
V27 Value Sniper — PRO Training Script
|
||||||
|
========================================
|
||||||
|
KEY INSIGHT: Train model WITHOUT odds to get independent probability.
|
||||||
|
Then compare with market odds to find genuine value edges.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
Stage A: "Fundamentals Model" — odds-free, learns from ELO/form/rolling/H2H
|
||||||
|
Stage B: "Value Model" — uses fundamentals + odds disagreement as features
|
||||||
|
Stage C: Multi-market — 1X2, O/U 2.5, BTTS
|
||||||
|
Stage D: Walk-forward backtest with Kelly sizing
|
||||||
|
"""
|
||||||
|
import os, sys, json, pickle, time, warnings
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
|
from sklearn.metrics import accuracy_score, log_loss
|
||||||
|
from sklearn.isotonic import IsotonicRegression
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
|
AI_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
DATA_CSV = AI_DIR / "data" / "training_data_v27.csv"
|
||||||
|
MODELS_DIR = AI_DIR / "models" / "v27"
|
||||||
|
MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# ── Leakage & category definitions ──
|
||||||
|
LEAKAGE_COLS = [
|
||||||
|
"total_goals", "goal_diff", "ht_total_goals", "ht_goal_diff",
|
||||||
|
"score_home", "score_away", "ht_score_home", "ht_score_away",
|
||||||
|
"home_goals_form", "away_goals_form",
|
||||||
|
"home_squad_quality", "away_squad_quality", "squad_diff",
|
||||||
|
"home_key_players", "away_key_players",
|
||||||
|
"home_missing_impact", "away_missing_impact",
|
||||||
|
"referee_home_bias", "referee_avg_goals", "referee_cards_total",
|
||||||
|
"referee_avg_yellow", "referee_avg_red", "referee_penalty_rate",
|
||||||
|
"referee_over25_rate", "referee_experience", "referee_matches",
|
||||||
|
]
|
||||||
|
LABEL_COLS = [c for c in [] ] # populated dynamically
|
||||||
|
META_COLS = ["match_id", "league_name", "home_team", "away_team"]
|
||||||
|
ODDS_COLS_PATTERNS = ["odds_", "implied_"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_odds_cols(df):
|
||||||
|
return [c for c in df.columns if any(c.startswith(p) for p in ODDS_COLS_PATTERNS)]
|
||||||
|
|
||||||
|
|
||||||
|
def get_label_cols(df):
|
||||||
|
return [c for c in df.columns if c.startswith("label_")]
|
||||||
|
|
||||||
|
|
||||||
|
def get_clean_features(df):
|
||||||
|
"""Features with NO odds and NO leakage — pure fundamentals."""
|
||||||
|
odds = set(get_odds_cols(df))
|
||||||
|
labels = set(get_label_cols(df))
|
||||||
|
exclude = odds | labels | set(LEAKAGE_COLS) | set(META_COLS)
|
||||||
|
# Also exclude ID columns
|
||||||
|
exclude |= {c for c in df.columns if c.endswith("_id") and c != "match_id"}
|
||||||
|
feats = [c for c in df.columns if c not in exclude]
|
||||||
|
# Keep only numeric
|
||||||
|
feats = [c for c in feats if pd.to_numeric(df[c], errors="coerce").notna().sum() > len(df)*0.3]
|
||||||
|
return feats
|
||||||
|
|
||||||
|
|
||||||
|
def load_data():
|
||||||
|
print(f"Loading {DATA_CSV}...")
|
||||||
|
df = pd.read_csv(DATA_CSV, low_memory=False)
|
||||||
|
print(f" Raw: {len(df)} rows")
|
||||||
|
|
||||||
|
# Ensure odds exist for value comparison
|
||||||
|
for c in ["odds_ms_h","odds_ms_d","odds_ms_a"]:
|
||||||
|
df[c] = pd.to_numeric(df[c], errors="coerce")
|
||||||
|
df = df.dropna(subset=["odds_ms_h","odds_ms_d","odds_ms_a"])
|
||||||
|
df = df[(df.odds_ms_h>1.01)&(df.odds_ms_d>1.01)&(df.odds_ms_a>1.01)]
|
||||||
|
|
||||||
|
# OU25 odds
|
||||||
|
for c in ["odds_ou25_over","odds_ou25_under"]:
|
||||||
|
if c in df.columns:
|
||||||
|
df[c] = pd.to_numeric(df[c], errors="coerce")
|
||||||
|
|
||||||
|
# Implied probabilities
|
||||||
|
margin = 1/df.odds_ms_h + 1/df.odds_ms_d + 1/df.odds_ms_a
|
||||||
|
df["implied_h"] = (1/df.odds_ms_h)/margin
|
||||||
|
df["implied_d"] = (1/df.odds_ms_d)/margin
|
||||||
|
df["implied_a"] = (1/df.odds_ms_a)/margin
|
||||||
|
|
||||||
|
print(f" After filter: {len(df)} rows")
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def temporal_split(df, val_ratio=0.15, test_ratio=0.10):
|
||||||
|
n = len(df)
|
||||||
|
tr = int(n*(1-val_ratio-test_ratio))
|
||||||
|
va = int(n*(1-test_ratio))
|
||||||
|
return df.iloc[:tr].copy(), df.iloc[tr:va].copy(), df.iloc[va:].copy()
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
# STAGE A: Fundamentals-Only Model (NO ODDS)
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
def train_fundamentals_model(X_tr, y_tr, X_va, y_va, feat_cols, market="ms"):
|
||||||
|
"""Train ensemble WITHOUT odds features."""
|
||||||
|
models = {}
|
||||||
|
n_class = 3 if market == "ms" else 2
|
||||||
|
|
||||||
|
# XGBoost
|
||||||
|
try:
|
||||||
|
import xgboost as xgb
|
||||||
|
print(f" [XGB] Training {market.upper()}...")
|
||||||
|
dtrain = xgb.DMatrix(X_tr, label=y_tr, feature_names=feat_cols)
|
||||||
|
dval = xgb.DMatrix(X_va, label=y_va, feature_names=feat_cols)
|
||||||
|
params = {
|
||||||
|
"objective": "multi:softprob" if n_class==3 else "binary:logistic",
|
||||||
|
"eval_metric": "mlogloss" if n_class==3 else "logloss",
|
||||||
|
"max_depth": 6, "learning_rate": 0.02, "subsample": 0.75,
|
||||||
|
"colsample_bytree": 0.75, "min_child_weight": 10,
|
||||||
|
"reg_alpha": 0.5, "reg_lambda": 2.0,
|
||||||
|
"verbosity": 0, "tree_method": "hist",
|
||||||
|
}
|
||||||
|
if n_class == 3:
|
||||||
|
params["num_class"] = 3
|
||||||
|
m = xgb.train(params, dtrain, num_boost_round=2000,
|
||||||
|
evals=[(dval,"val")], early_stopping_rounds=80,
|
||||||
|
verbose_eval=False)
|
||||||
|
p = m.predict(dval)
|
||||||
|
if n_class == 2:
|
||||||
|
p = np.column_stack([1-p, p])
|
||||||
|
acc = accuracy_score(y_va, p.argmax(1))
|
||||||
|
print(f" acc={acc:.4f}")
|
||||||
|
models["xgb"] = m
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# LightGBM
|
||||||
|
try:
|
||||||
|
import lightgbm as lgb
|
||||||
|
print(f" [LGB] Training {market.upper()}...")
|
||||||
|
ds_tr = lgb.Dataset(X_tr, label=y_tr)
|
||||||
|
ds_va = lgb.Dataset(X_va, label=y_va, reference=ds_tr)
|
||||||
|
par = {
|
||||||
|
"objective": "multiclass" if n_class==3 else "binary",
|
||||||
|
"metric": "multi_logloss" if n_class==3 else "binary_logloss",
|
||||||
|
"num_leaves": 48, "learning_rate": 0.02,
|
||||||
|
"feature_fraction": 0.7, "bagging_fraction": 0.7,
|
||||||
|
"bagging_freq": 1, "min_child_samples": 30,
|
||||||
|
"lambda_l1": 0.5, "lambda_l2": 2.0, "verbose": -1,
|
||||||
|
}
|
||||||
|
if n_class == 3:
|
||||||
|
par["num_class"] = 3
|
||||||
|
m = lgb.train(par, ds_tr, 2000, valid_sets=[ds_va],
|
||||||
|
callbacks=[lgb.early_stopping(80, verbose=False)])
|
||||||
|
p = m.predict(X_va)
|
||||||
|
if n_class == 2:
|
||||||
|
p = np.column_stack([1-p, p])
|
||||||
|
acc = accuracy_score(y_va, p.argmax(1))
|
||||||
|
print(f" acc={acc:.4f}")
|
||||||
|
models["lgb"] = m
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# CatBoost
|
||||||
|
try:
|
||||||
|
from catboost import CatBoostClassifier
|
||||||
|
print(f" [CB] Training {market.upper()}...")
|
||||||
|
m = CatBoostClassifier(
|
||||||
|
iterations=2000, learning_rate=0.02, depth=6,
|
||||||
|
l2_leaf_reg=5, loss_function="MultiClass" if n_class==3 else "Logloss",
|
||||||
|
early_stopping_rounds=80, verbose=0, task_type="CPU",
|
||||||
|
**({"classes_count": 3} if n_class==3 else {}),
|
||||||
|
)
|
||||||
|
m.fit(X_tr, y_tr, eval_set=(X_va, y_va))
|
||||||
|
p = m.predict_proba(X_va)
|
||||||
|
acc = accuracy_score(y_va, p.argmax(1))
|
||||||
|
print(f" acc={acc:.4f}")
|
||||||
|
models["cb"] = m
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return models
|
||||||
|
|
||||||
|
|
||||||
|
def ensemble_predict(models, X, feat_cols, n_class=3):
|
||||||
|
preds = []
|
||||||
|
for name, m in models.items():
|
||||||
|
if name == "xgb":
|
||||||
|
import xgboost as xgb
|
||||||
|
dm = xgb.DMatrix(X, feature_names=feat_cols)
|
||||||
|
p = m.predict(dm)
|
||||||
|
if n_class == 2 and p.ndim == 1:
|
||||||
|
p = np.column_stack([1-p, p])
|
||||||
|
elif name == "lgb":
|
||||||
|
p = m.predict(X)
|
||||||
|
if n_class == 2 and p.ndim == 1:
|
||||||
|
p = np.column_stack([1-p, p])
|
||||||
|
elif name == "cb":
|
||||||
|
p = m.predict_proba(X)
|
||||||
|
preds.append(np.array(p))
|
||||||
|
if not preds:
|
||||||
|
raise RuntimeError("No models!")
|
||||||
|
return np.mean(preds, axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
# STAGE B: Walk-Forward Backtest with Kelly
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
def kelly_fraction(model_prob, odds, fraction=0.25):
|
||||||
|
"""Fractional Kelly: f = fraction * (p*odds - 1) / (odds - 1)"""
|
||||||
|
edge = model_prob * odds - 1
|
||||||
|
if edge <= 0 or odds <= 1:
|
||||||
|
return 0.0
|
||||||
|
f = edge / (odds - 1)
|
||||||
|
return max(0, min(fraction * f, 0.10)) # cap at 10% bankroll
|
||||||
|
|
||||||
|
|
||||||
|
def backtest_value(models, df_test, feat_cols, market="ms",
|
||||||
|
min_edge=0.05, min_odds=1.40, max_odds=4.50,
|
||||||
|
use_kelly=True):
|
||||||
|
"""Realistic backtest: flat or Kelly sizing, edge filtering."""
|
||||||
|
X = df_test[feat_cols].values
|
||||||
|
n_class = 3 if market == "ms" else 2
|
||||||
|
probs = ensemble_predict(models, X, feat_cols, n_class)
|
||||||
|
|
||||||
|
if market == "ms":
|
||||||
|
y = df_test["label_ms"].values
|
||||||
|
odds_arr = df_test[["odds_ms_h","odds_ms_d","odds_ms_a"]].values
|
||||||
|
implied = df_test[["implied_h","implied_d","implied_a"]].values
|
||||||
|
class_names = ["Home","Draw","Away"]
|
||||||
|
elif market == "ou25":
|
||||||
|
if "label_ou25" not in df_test.columns:
|
||||||
|
return {}
|
||||||
|
y = df_test["label_ou25"].values
|
||||||
|
o_over = pd.to_numeric(df_test["odds_ou25_o"], errors="coerce").fillna(1.85).values if "odds_ou25_o" in df_test.columns else np.full(len(df_test), 1.85)
|
||||||
|
o_under = pd.to_numeric(df_test["odds_ou25_u"], errors="coerce").fillna(1.85).values if "odds_ou25_u" in df_test.columns else np.full(len(df_test), 1.85)
|
||||||
|
odds_arr = np.column_stack([o_under, o_over])
|
||||||
|
m = 1/odds_arr
|
||||||
|
implied = m / m.sum(axis=1, keepdims=True)
|
||||||
|
class_names = ["Under","Over"]
|
||||||
|
else:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
results = {"bets": [], "total": 0, "wins": 0, "pnl": 0.0, "bankroll_curve": [1000.0]}
|
||||||
|
bankroll = 1000.0
|
||||||
|
|
||||||
|
for i in range(len(y)):
|
||||||
|
for cls in range(n_class):
|
||||||
|
edge = probs[i, cls] - implied[i, cls]
|
||||||
|
odds_val = odds_arr[i, cls]
|
||||||
|
|
||||||
|
# FILTERS
|
||||||
|
if edge < min_edge:
|
||||||
|
continue
|
||||||
|
if odds_val < min_odds or odds_val > max_odds:
|
||||||
|
continue
|
||||||
|
# Don't bet on heavy favorites with tiny edge
|
||||||
|
if implied[i, cls] > 0.65 and edge < 0.08:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sizing
|
||||||
|
if use_kelly:
|
||||||
|
frac = kelly_fraction(probs[i, cls], odds_val, fraction=0.15)
|
||||||
|
stake = bankroll * frac
|
||||||
|
else:
|
||||||
|
stake = 10.0 # flat
|
||||||
|
|
||||||
|
if stake < 1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
won = (y[i] == cls)
|
||||||
|
pnl = stake * (odds_val - 1) if won else -stake
|
||||||
|
bankroll += pnl
|
||||||
|
|
||||||
|
results["bets"].append({
|
||||||
|
"edge": float(edge), "odds": float(odds_val),
|
||||||
|
"model_p": float(probs[i,cls]), "implied_p": float(implied[i,cls]),
|
||||||
|
"won": bool(won), "pnl": float(pnl), "stake": float(stake),
|
||||||
|
"class": class_names[cls],
|
||||||
|
})
|
||||||
|
results["bankroll_curve"].append(bankroll)
|
||||||
|
results["total"] += 1
|
||||||
|
if won:
|
||||||
|
results["wins"] += 1
|
||||||
|
results["pnl"] = bankroll - 1000.0
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def print_backtest(results, label=""):
|
||||||
|
total = results.get("total", 0)
|
||||||
|
if total == 0:
|
||||||
|
print(f" {label}: No bets placed")
|
||||||
|
return
|
||||||
|
wins = results["wins"]
|
||||||
|
pnl = results["pnl"]
|
||||||
|
hit = wins/total*100
|
||||||
|
roi = pnl / sum(b["stake"] for b in results["bets"]) * 100
|
||||||
|
curve = results["bankroll_curve"]
|
||||||
|
peak = max(curve)
|
||||||
|
dd = min((c - peak) / peak * 100 for c in curve if c <= peak) if len(curve) > 1 else 0
|
||||||
|
|
||||||
|
# Per-class breakdown
|
||||||
|
by_class = {}
|
||||||
|
for b in results["bets"]:
|
||||||
|
cls = b["class"]
|
||||||
|
if cls not in by_class:
|
||||||
|
by_class[cls] = {"n": 0, "w": 0, "pnl": 0}
|
||||||
|
by_class[cls]["n"] += 1
|
||||||
|
if b["won"]:
|
||||||
|
by_class[cls]["w"] += 1
|
||||||
|
by_class[cls]["pnl"] += b["pnl"]
|
||||||
|
|
||||||
|
print(f"\n {label}")
|
||||||
|
print(f" Bets: {total} | Hit: {hit:.1f}% | ROI: {roi:+.1f}%")
|
||||||
|
print(f" PnL: {pnl:+.0f} | Final: {curve[-1]:.0f} | MaxDD: {dd:.1f}%")
|
||||||
|
for cls, d in sorted(by_class.items()):
|
||||||
|
r = d["pnl"]/d["n"]*100 if d["n"] > 0 else 0
|
||||||
|
print(f" {cls:6s}: {d['n']:4d} bets, "
|
||||||
|
f"hit={d['w']/d['n']*100:.1f}%, avg_pnl={r:+.1f}%")
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
# MAIN
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
def main():
|
||||||
|
print("=" * 65)
|
||||||
|
print(" V27 VALUE SNIPER — PRO TRAINING (Odds-Free Fundamentals)")
|
||||||
|
print("=" * 65)
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
df = load_data()
|
||||||
|
clean_feats = get_clean_features(df)
|
||||||
|
print(f" Clean features (no odds): {len(clean_feats)}")
|
||||||
|
|
||||||
|
# Numerify
|
||||||
|
for c in clean_feats:
|
||||||
|
df[c] = pd.to_numeric(df[c], errors="coerce")
|
||||||
|
df[clean_feats] = df[clean_feats].fillna(df[clean_feats].median())
|
||||||
|
|
||||||
|
# Remove constant columns
|
||||||
|
clean_feats = [c for c in clean_feats if df[c].nunique() > 1]
|
||||||
|
print(f" After removing constants: {len(clean_feats)}")
|
||||||
|
|
||||||
|
# Split
|
||||||
|
tr, va, te = temporal_split(df)
|
||||||
|
print(f" Train: {len(tr)}, Val: {len(va)}, Test: {len(te)}")
|
||||||
|
print(f" Target: H={tr.label_ms.eq(0).mean():.1%}, "
|
||||||
|
f"D={tr.label_ms.eq(1).mean():.1%}, A={tr.label_ms.eq(2).mean():.1%}")
|
||||||
|
|
||||||
|
X_tr = tr[clean_feats].values
|
||||||
|
y_tr = tr["label_ms"].values
|
||||||
|
X_va = va[clean_feats].values
|
||||||
|
y_va = va["label_ms"].values
|
||||||
|
|
||||||
|
# ── STAGE A: Train fundamentals model (1X2) ──
|
||||||
|
print("\n" + "─"*65)
|
||||||
|
print(" STAGE A: Fundamentals-Only 1X2 Model")
|
||||||
|
print("─"*65)
|
||||||
|
ms_models = train_fundamentals_model(X_tr, y_tr, X_va, y_va, clean_feats, "ms")
|
||||||
|
|
||||||
|
val_probs = ensemble_predict(ms_models, X_va, clean_feats, 3)
|
||||||
|
val_acc = accuracy_score(y_va, val_probs.argmax(1))
|
||||||
|
val_ll = log_loss(y_va, val_probs)
|
||||||
|
print(f"\n Ensemble Val: acc={val_acc:.4f}, logloss={val_ll:.4f}")
|
||||||
|
|
||||||
|
# Compare with odds baseline
|
||||||
|
odds_pred = va[["implied_h","implied_d","implied_a"]].values.argmax(1)
|
||||||
|
odds_acc = accuracy_score(y_va, odds_pred)
|
||||||
|
print(f" Odds baseline: acc={odds_acc:.4f}")
|
||||||
|
print(f" Model vs Odds: {val_acc - odds_acc:+.4f}")
|
||||||
|
|
||||||
|
# ── STAGE B: O/U 2.5 Model ──
|
||||||
|
ou_models = None
|
||||||
|
if "label_ou25" in tr.columns:
|
||||||
|
print("\n" + "─"*65)
|
||||||
|
print(" STAGE A.2: Fundamentals-Only O/U 2.5 Model")
|
||||||
|
print("─"*65)
|
||||||
|
y_tr_ou = tr["label_ou25"].values
|
||||||
|
y_va_ou = va["label_ou25"].values
|
||||||
|
mask_tr = ~np.isnan(y_tr_ou)
|
||||||
|
mask_va = ~np.isnan(y_va_ou)
|
||||||
|
if mask_tr.sum() > 1000:
|
||||||
|
ou_models = train_fundamentals_model(
|
||||||
|
X_tr[mask_tr], y_tr_ou[mask_tr].astype(int),
|
||||||
|
X_va[mask_va], y_va_ou[mask_va].astype(int),
|
||||||
|
clean_feats, "ou25")
|
||||||
|
|
||||||
|
# ── STAGE C: Backtest ──
|
||||||
|
print("\n" + "─"*65)
|
||||||
|
print(" STAGE B: Walk-Forward Backtest (Test Set)")
|
||||||
|
print("─"*65)
|
||||||
|
|
||||||
|
# Try multiple edge thresholds
|
||||||
|
best_roi = -999
|
||||||
|
best_cfg = {}
|
||||||
|
for min_edge in [0.03, 0.05, 0.07, 0.10, 0.12, 0.15]:
|
||||||
|
for min_odds in [1.35, 1.50, 1.70]:
|
||||||
|
r = backtest_value(ms_models, te, clean_feats, "ms",
|
||||||
|
min_edge=min_edge, min_odds=min_odds,
|
||||||
|
max_odds=5.0, use_kelly=True)
|
||||||
|
if r.get("total", 0) >= 20:
|
||||||
|
invested = sum(b["stake"] for b in r["bets"])
|
||||||
|
roi = r["pnl"] / invested * 100 if invested > 0 else -100
|
||||||
|
if roi > best_roi:
|
||||||
|
best_roi = roi
|
||||||
|
best_cfg = {"edge": min_edge, "min_odds": min_odds, "result": r}
|
||||||
|
|
||||||
|
if best_cfg:
|
||||||
|
cfg = best_cfg
|
||||||
|
print(f"\n Best 1X2 Config: edge>{cfg['edge']}, odds>{cfg['min_odds']}")
|
||||||
|
print_backtest(cfg["result"], "1X2 VALUE")
|
||||||
|
|
||||||
|
# Flat bet comparison
|
||||||
|
print("\n --- Flat Bet Comparison ---")
|
||||||
|
for edge in [0.05, 0.07, 0.10]:
|
||||||
|
r = backtest_value(ms_models, te, clean_feats, "ms",
|
||||||
|
min_edge=edge, min_odds=1.50, max_odds=4.5,
|
||||||
|
use_kelly=False)
|
||||||
|
if r.get("total", 0) > 0:
|
||||||
|
inv = r["total"] * 10
|
||||||
|
roi = r["pnl"]/inv*100
|
||||||
|
print(f" Edge>{edge:.2f}: {r['total']} bets, "
|
||||||
|
f"hit={r['wins']/r['total']*100:.1f}%, ROI={roi:+.1f}%")
|
||||||
|
|
||||||
|
# OU25 backtest
|
||||||
|
if ou_models:
|
||||||
|
print("\n --- O/U 2.5 Backtest ---")
|
||||||
|
for edge in [0.05, 0.07, 0.10]:
|
||||||
|
r = backtest_value(ou_models, te, clean_feats, "ou25",
|
||||||
|
min_edge=edge, min_odds=1.50, max_odds=3.0,
|
||||||
|
use_kelly=True)
|
||||||
|
if r.get("total", 0) > 0:
|
||||||
|
print_backtest(r, f"OU25 edge>{edge}")
|
||||||
|
|
||||||
|
# ── Feature importance ──
|
||||||
|
if "lgb" in ms_models:
|
||||||
|
imp = ms_models["lgb"].feature_importance(importance_type="gain")
|
||||||
|
imp_df = pd.DataFrame({"feature": clean_feats, "importance": imp}
|
||||||
|
).sort_values("importance", ascending=False)
|
||||||
|
print("\n TOP 15 FEATURES (no odds!):")
|
||||||
|
for _, r in imp_df.head(15).iterrows():
|
||||||
|
print(f" {r['feature']:40s} {r['importance']:.0f}")
|
||||||
|
imp_df.to_csv(MODELS_DIR / "v27_feature_importance.csv", index=False)
|
||||||
|
|
||||||
|
# ── Save ──
|
||||||
|
print("\n" + "─"*65)
|
||||||
|
print(" SAVING MODELS")
|
||||||
|
print("─"*65)
|
||||||
|
for name, m in ms_models.items():
|
||||||
|
p = MODELS_DIR / f"v27_ms_{name}.pkl"
|
||||||
|
with open(p, "wb") as f:
|
||||||
|
pickle.dump(m, f)
|
||||||
|
print(f" ✓ {p.name}")
|
||||||
|
|
||||||
|
if ou_models:
|
||||||
|
for name, m in ou_models.items():
|
||||||
|
p = MODELS_DIR / f"v27_ou25_{name}.pkl"
|
||||||
|
with open(p, "wb") as f:
|
||||||
|
pickle.dump(m, f)
|
||||||
|
print(f" ✓ {p.name}")
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
"version": "v27-pro", "trained_at": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
"approach": "odds-free fundamentals + value edge detection",
|
||||||
|
"feature_count": len(clean_feats),
|
||||||
|
"total_samples": len(df),
|
||||||
|
"val_acc": round(val_acc, 4), "val_ll": round(val_ll, 4),
|
||||||
|
"best_config": {k: v for k, v in best_cfg.items() if k != "result"} if best_cfg else {},
|
||||||
|
"markets": ["ms"] + (["ou25"] if ou_models else []),
|
||||||
|
}
|
||||||
|
with open(MODELS_DIR / "v27_metadata.json", "w") as f:
|
||||||
|
json.dump(meta, f, indent=2, default=str)
|
||||||
|
with open(MODELS_DIR / "v27_feature_cols.json", "w") as f:
|
||||||
|
json.dump(clean_feats, f, indent=2)
|
||||||
|
print(f" ✓ metadata + feature_cols")
|
||||||
|
|
||||||
|
print(f"\n Total time: {(time.time()-t0)/60:.1f} min")
|
||||||
|
print(" DONE!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,137 @@
|
|||||||
|
"""
|
||||||
|
VQWEN Model Training Script (Optimized)
|
||||||
|
========================================
|
||||||
|
Fast, efficient, uses all 180k+ matches with rich features.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import pickle
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
import lightgbm as lgb
|
||||||
|
|
||||||
|
AI_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
ROOT_DIR = os.path.dirname(AI_DIR)
|
||||||
|
sys.path.insert(0, ROOT_DIR)
|
||||||
|
|
||||||
|
def get_clean_dsn() -> str:
|
||||||
|
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||||
|
|
||||||
|
def train_vqwen():
|
||||||
|
print("🧠 VQWEN MODEL EĞİTİMİ (OPTIMIZED)")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
dsn = get_clean_dsn()
|
||||||
|
conn = psycopg2.connect(dsn)
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
# ─── 1. HIZLI VERİ ÇEKME (Optimized Query) ───
|
||||||
|
query = """
|
||||||
|
SELECT
|
||||||
|
m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away,
|
||||||
|
-- Odds
|
||||||
|
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '1' LIMIT 1) as odds_h,
|
||||||
|
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = 'X' LIMIT 1) as odds_d,
|
||||||
|
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '2' LIMIT 1) as odds_a,
|
||||||
|
-- Form (Last 5)
|
||||||
|
COALESCE((SELECT AVG(CASE WHEN m2.home_team_id = m.home_team_id AND m2.score_home > m2.score_away THEN 3 WHEN m2.home_team_id = m.home_team_id AND m2.score_home = m2.score_away THEN 1 ELSE 0 END) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc LIMIT 5), 0) as home_form,
|
||||||
|
COALESCE((SELECT AVG(CASE WHEN m2.away_team_id = m.away_team_id AND m2.score_away > m2.score_home THEN 3 WHEN m2.away_team_id = m.away_team_id AND m2.score_away = m2.score_home THEN 1 ELSE 0 END) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc LIMIT 5), 0) as away_form,
|
||||||
|
-- Goal Averages
|
||||||
|
COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' LIMIT 10), 1.2) as h_avg_scored,
|
||||||
|
COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = m.home_team_id AND m2.status = 'FT' LIMIT 10), 1.2) as h_avg_conceded,
|
||||||
|
COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' LIMIT 10), 1.2) as a_avg_scored,
|
||||||
|
COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = m.away_team_id AND m2.status = 'FT' LIMIT 10), 1.2) as a_avg_conceded,
|
||||||
|
-- Team Stats
|
||||||
|
COALESCE(ts_home.possession_percentage, 50) as h_poss,
|
||||||
|
COALESCE(ts_home.shots_on_target, 4) as h_sot,
|
||||||
|
COALESCE(ts_home.corners, 5) as h_corners,
|
||||||
|
COALESCE(ts_away.possession_percentage, 50) as a_poss,
|
||||||
|
COALESCE(ts_away.shots_on_target, 3) as a_sot,
|
||||||
|
COALESCE(ts_away.corners, 4) as a_corners
|
||||||
|
FROM matches m
|
||||||
|
LEFT JOIN football_team_stats ts_home ON ts_home.match_id = m.id AND ts_home.team_id = m.home_team_id
|
||||||
|
LEFT JOIN football_team_stats ts_away ON ts_away.match_id = m.id AND ts_away.team_id = m.away_team_id
|
||||||
|
WHERE m.status = 'FT' AND m.score_home IS NOT NULL AND m.sport = 'football'
|
||||||
|
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 200000
|
||||||
|
"""
|
||||||
|
|
||||||
|
print("📊 Veritabanından özellikler çekiliyor (Limit 200k)...")
|
||||||
|
start = time.time()
|
||||||
|
cur.execute(query)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
print(f"✅ {len(rows)} maç çekildi ({time.time()-start:.1f}s)")
|
||||||
|
|
||||||
|
df = pd.DataFrame(rows, columns=[
|
||||||
|
'id', 'h_id', 'a_id', 'sh', 'sa', 'oh', 'od', 'oa',
|
||||||
|
'h_form', 'a_form', 'h_sc', 'h_co', 'a_sc', 'a_co',
|
||||||
|
'h_poss', 'h_sot', 'h_corn', 'a_poss', 'a_sot', 'a_corn'
|
||||||
|
])
|
||||||
|
|
||||||
|
for col in df.columns[5:]:
|
||||||
|
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||||
|
df = df.fillna(df.median(numeric_only=True))
|
||||||
|
|
||||||
|
# ─── 2. ÖZELLİK MÜHENDİSLİĞİ ───
|
||||||
|
df['h_xg'] = (df['h_sc'] + df['a_co']) / 2
|
||||||
|
df['a_xg'] = (df['a_sc'] + df['h_co']) / 2
|
||||||
|
df['total_xg'] = df['h_xg'] + df['a_xg']
|
||||||
|
|
||||||
|
df['h_pow'] = (df['h_form']*10) + (df['h_sc']*5) - (df['h_co']*5) + (df['h_sot']*2)
|
||||||
|
df['a_pow'] = (df['a_form']*10) + (df['a_sc']*5) - (df['a_co']*5) + (df['a_sot']*2)
|
||||||
|
df['pow_diff'] = df['h_pow'] - df['a_pow']
|
||||||
|
|
||||||
|
margin = (1/df['oh']) + (1/df['od']) + (1/df['oa'])
|
||||||
|
df['imp_h'] = (1/df['oh']) / margin
|
||||||
|
df['imp_d'] = (1/df['od']) / margin
|
||||||
|
df['imp_a'] = (1/df['oa']) / margin
|
||||||
|
|
||||||
|
# Targets
|
||||||
|
df['t_ms'] = df.apply(lambda r: 0 if r['sh']>r['sa'] else (2 if r['sh']<r['sa'] else 1), axis=1)
|
||||||
|
df['t_ou'] = ((df['sh'] + df['sa']) > 2.5).astype(int)
|
||||||
|
df['t_btts'] = ((df['sh'] > 0) & (df['sa'] > 0)).astype(int)
|
||||||
|
|
||||||
|
# ─── 3. MODELLER ───
|
||||||
|
feats_ms = ['h_form', 'a_form', 'h_xg', 'a_xg', 'pow_diff', 'imp_h', 'imp_d', 'imp_a', 'h_sot', 'a_sot']
|
||||||
|
X_ms, y_ms = df[feats_ms], df['t_ms']
|
||||||
|
|
||||||
|
X_tr, X_te, y_tr, y_te = train_test_split(X_ms, y_ms, test_size=0.15, random_state=42)
|
||||||
|
print("🤖 MS Modeli eğitiliyor...")
|
||||||
|
model_ms = lgb.train({'objective': 'multiclass', 'num_class': 3, 'metric': 'multi_logloss', 'verbose': -1, 'num_leaves': 63},
|
||||||
|
lgb.Dataset(X_tr, y_tr), num_boost_round=1000,
|
||||||
|
valid_sets=[lgb.Dataset(X_te, y_te)],
|
||||||
|
callbacks=[lgb.early_stopping(50)])
|
||||||
|
|
||||||
|
feats_ou = ['h_xg', 'a_xg', 'total_xg', 'h_sot', 'a_sot']
|
||||||
|
print("🤖 OU2.5 Modeli...")
|
||||||
|
model_ou = lgb.train({'objective': 'binary', 'metric': 'binary_logloss', 'verbose': -1},
|
||||||
|
lgb.Dataset(df[feats_ou], df['t_ou']), num_boost_round=500)
|
||||||
|
|
||||||
|
feats_btts = ['h_xg', 'a_xg', 'h_sc', 'a_sc']
|
||||||
|
print("🤖 BTTS Modeli...")
|
||||||
|
model_btts = lgb.train({'objective': 'binary', 'metric': 'binary_logloss', 'verbose': -1},
|
||||||
|
lgb.Dataset(df[feats_btts], df['t_btts']), num_boost_round=500)
|
||||||
|
|
||||||
|
# ─── 4. KAYDET ───
|
||||||
|
mdir = os.path.join(ROOT_DIR, 'models', 'vqwen')
|
||||||
|
os.makedirs(mdir, exist_ok=True)
|
||||||
|
for nm, md in [('ms', model_ms), ('ou25', model_ou), ('btts', model_btts)]:
|
||||||
|
p = os.path.join(mdir, f'vqwen_{nm}.pkl')
|
||||||
|
with open(p, 'wb') as f: pickle.dump(md, f)
|
||||||
|
print(f"✅ {p} kaydedildi.")
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
print("\n🎉 VQWEN EĞİTİMİ BİTTİ!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
train_vqwen()
|
||||||
@@ -0,0 +1,165 @@
|
|||||||
|
"""
|
||||||
|
VQWEN Deep Model Training Script (Final Version)
|
||||||
|
================================================
|
||||||
|
Includes: ELO, Contextual Goals, Rest Days, Player Participation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import pickle
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
import lightgbm as lgb
|
||||||
|
|
||||||
|
AI_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
ROOT_DIR = os.path.dirname(AI_DIR)
|
||||||
|
sys.path.insert(0, ROOT_DIR)
|
||||||
|
|
||||||
|
def get_clean_dsn() -> str:
|
||||||
|
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||||
|
|
||||||
|
def train_vqwen_deep():
|
||||||
|
print("🧠 VQWEN DEEP MODEL EĞİTİMİ (ELO + REST + CONTEXT)")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
dsn = get_clean_dsn()
|
||||||
|
conn = psycopg2.connect(dsn)
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
# ─── 1. GELİŞMİŞ VERİ SORGUSU ───
|
||||||
|
# ELO, Dinlenme Süresi, İç Saha/Deplasman Performansı
|
||||||
|
query = """
|
||||||
|
SELECT
|
||||||
|
m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away, m.mst_utc,
|
||||||
|
|
||||||
|
-- ELO Ratings
|
||||||
|
COALESCE(maf.home_elo, 1500) as home_elo,
|
||||||
|
COALESCE(maf.away_elo, 1500) as away_elo,
|
||||||
|
|
||||||
|
-- Contextual Goals (Home Team at Home, Away Team Away)
|
||||||
|
COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as h_home_goals,
|
||||||
|
COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as a_away_goals,
|
||||||
|
|
||||||
|
-- Rest Days (Yorgunluk)
|
||||||
|
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as h_rest,
|
||||||
|
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as a_rest,
|
||||||
|
|
||||||
|
-- Squad Participation
|
||||||
|
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.home_team_id AND mp.is_starting = true), 11) as h_xi,
|
||||||
|
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.away_team_id AND mp.is_starting = true), 11) as a_xi,
|
||||||
|
|
||||||
|
-- Cards
|
||||||
|
COALESCE((SELECT COUNT(*) FROM match_player_events mpe WHERE mpe.match_id = m.id AND mpe.event_type = 'card'), 4) as cards,
|
||||||
|
|
||||||
|
-- Odds
|
||||||
|
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '1' LIMIT 1) as oh,
|
||||||
|
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = 'X' LIMIT 1) as od,
|
||||||
|
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '2' LIMIT 1) as oa
|
||||||
|
|
||||||
|
FROM matches m
|
||||||
|
LEFT JOIN football_ai_features maf ON maf.match_id = m.id
|
||||||
|
WHERE m.status = 'FT' AND m.score_home IS NOT NULL AND m.sport = 'football'
|
||||||
|
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 150000
|
||||||
|
"""
|
||||||
|
|
||||||
|
print("📊 Veri çekiliyor...")
|
||||||
|
start = time.time()
|
||||||
|
cur.execute(query)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
print(f"✅ {len(rows)} maç çekildi ({time.time()-start:.1f}s)")
|
||||||
|
|
||||||
|
df = pd.DataFrame(rows, columns=[
|
||||||
|
'id', 'h_id', 'a_id', 'sh', 'sa', 'utc',
|
||||||
|
'h_elo', 'a_elo',
|
||||||
|
'h_home_goals', 'a_away_goals',
|
||||||
|
'h_rest', 'a_rest',
|
||||||
|
'h_xi', 'a_xi', 'cards',
|
||||||
|
'oh', 'od', 'oa'
|
||||||
|
])
|
||||||
|
|
||||||
|
# Temizlik
|
||||||
|
for col in df.columns[2:]:
|
||||||
|
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||||
|
df = df.fillna(df.median(numeric_only=True))
|
||||||
|
df = df[(df['oh'] > 1.0) & (df['oa'] > 1.0)]
|
||||||
|
|
||||||
|
# ─── 2. ÖZELLİK MÜHENDİSLİĞİ ───
|
||||||
|
|
||||||
|
# 1. ELO Farkı
|
||||||
|
df['elo_diff'] = df['h_elo'] - df['a_elo']
|
||||||
|
|
||||||
|
# 2. Yorgunluk Faktörü (Dinlenme < 3 günse performans düşer)
|
||||||
|
# xG hesaplamasında kullanacağız
|
||||||
|
def fatigue_factor(rest):
|
||||||
|
if rest < 3: return 0.85
|
||||||
|
if rest < 5: return 0.95
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
df['h_fatigue'] = df['h_rest'].apply(fatigue_factor)
|
||||||
|
df['a_fatigue'] = df['a_rest'].apply(fatigue_factor)
|
||||||
|
|
||||||
|
# 3. xG (Contextual Goals * Fatigue)
|
||||||
|
df['h_xg'] = df['h_home_goals'] * df['h_fatigue']
|
||||||
|
df['a_xg'] = df['a_away_goals'] * df['a_fatigue']
|
||||||
|
df['total_xg'] = df['h_xg'] + df['a_xg']
|
||||||
|
df['rest_diff'] = df['h_rest'] - df['a_rest']
|
||||||
|
|
||||||
|
# 4. Form (ELO bazlı power rating)
|
||||||
|
df['h_pow'] = (df['h_elo'] / 100) * df['h_fatigue']
|
||||||
|
df['a_pow'] = (df['a_elo'] / 100) * df['a_fatigue']
|
||||||
|
df['pow_diff'] = df['h_pow'] - df['a_pow']
|
||||||
|
|
||||||
|
# Oranlar
|
||||||
|
margin = (1/df['oh']) + (1/df['od']) + (1/df['oa'])
|
||||||
|
df['imp_h'] = (1/df['oh']) / margin
|
||||||
|
df['imp_d'] = (1/df['od']) / margin
|
||||||
|
df['imp_a'] = (1/df['oa']) / margin
|
||||||
|
|
||||||
|
# Hedefler
|
||||||
|
df['t_ms'] = df.apply(lambda r: 0 if r['sh']>r['sa'] else (2 if r['sh']<r['sa'] else 1), axis=1)
|
||||||
|
df['t_ou'] = ((df['sh'] + df['sa']) > 2.5).astype(int)
|
||||||
|
df['t_btts'] = ((df['sh'] > 0) & (df['sa'] > 0)).astype(int)
|
||||||
|
|
||||||
|
# ─── 3. MODEL EĞİTİMİ ───
|
||||||
|
# Yeni Özellik Seti
|
||||||
|
feats = ['elo_diff', 'h_xg', 'a_xg', 'total_xg', 'pow_diff', 'rest_diff', 'h_fatigue', 'a_fatigue',
|
||||||
|
'imp_h', 'imp_d', 'imp_a', 'h_xi', 'a_xi', 'cards']
|
||||||
|
|
||||||
|
# MS
|
||||||
|
print("🤖 MS...")
|
||||||
|
X_ms, y_ms = df[feats], df['t_ms']
|
||||||
|
X_tr, X_te, y_tr, y_te = train_test_split(X_ms, y_ms, test_size=0.15, random_state=42)
|
||||||
|
model_ms = lgb.train({'objective': 'multiclass', 'num_class': 3, 'verbose': -1, 'num_leaves': 63},
|
||||||
|
lgb.Dataset(X_tr, y_tr), num_boost_round=1000,
|
||||||
|
valid_sets=[lgb.Dataset(X_te, y_te)], callbacks=[lgb.early_stopping(50)])
|
||||||
|
|
||||||
|
# OU2.5
|
||||||
|
print("🤖 OU2.5...")
|
||||||
|
model_ou = lgb.train({'objective': 'binary', 'verbose': -1},
|
||||||
|
lgb.Dataset(df[feats], df['t_ou']), num_boost_round=500)
|
||||||
|
|
||||||
|
# BTTS
|
||||||
|
print("🤖 BTTS...")
|
||||||
|
model_btts = lgb.train({'objective': 'binary', 'verbose': -1},
|
||||||
|
lgb.Dataset(df[feats], df['t_btts']), num_boost_round=500)
|
||||||
|
|
||||||
|
# ─── 4. KAYDET ───
|
||||||
|
mdir = os.path.join(ROOT_DIR, 'models', 'vqwen')
|
||||||
|
os.makedirs(mdir, exist_ok=True)
|
||||||
|
for nm, md in [('ms', model_ms), ('ou25', model_ou), ('btts', model_btts)]:
|
||||||
|
p = os.path.join(mdir, f'vqwen_{nm}.pkl')
|
||||||
|
with open(p, 'wb') as f: pickle.dump(md, f)
|
||||||
|
print(f"✅ vqwen_{nm}.pkl")
|
||||||
|
|
||||||
|
print("\n🎉 VQWEN DEEP EĞİTİMİ BİTTİ!")
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
train_vqwen_deep()
|
||||||
@@ -0,0 +1,216 @@
|
|||||||
|
"""
|
||||||
|
VQWEN v3 Stress Test (Time Series Validation)
|
||||||
|
=============================================
|
||||||
|
Trains on OLDER data, Tests on NEWER data (Simulating Real Future).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import pickle
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import lightgbm as lgb
|
||||||
|
|
||||||
|
AI_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
ROOT_DIR = os.path.dirname(AI_DIR)
|
||||||
|
sys.path.insert(0, ROOT_DIR)
|
||||||
|
|
||||||
|
def get_clean_dsn() -> str:
|
||||||
|
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||||
|
|
||||||
|
def run_stress_test():
|
||||||
|
print("🧪 VQWEN v3 STRESS TEST (Time-Series Validation)")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
dsn = get_clean_dsn()
|
||||||
|
conn = psycopg2.connect(dsn)
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
# ─── 1. VERİ ÇEKME (En yeniden eskiye doğru) ───
|
||||||
|
# İlk baştakiler en yeni maçlar (Test Set), sonrakiler eski maçlar (Train Set)
|
||||||
|
query = """
|
||||||
|
WITH match_data AS (
|
||||||
|
SELECT
|
||||||
|
m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away, m.mst_utc,
|
||||||
|
COALESCE(maf.home_elo, 1500) as home_elo,
|
||||||
|
COALESCE(maf.away_elo, 1500) as away_elo,
|
||||||
|
-- Contextual Goals
|
||||||
|
COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as h_home_goals,
|
||||||
|
COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as a_away_goals,
|
||||||
|
-- Rest Days
|
||||||
|
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as h_rest,
|
||||||
|
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as a_rest,
|
||||||
|
-- Squad
|
||||||
|
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.home_team_id AND mp.is_starting = true), 11) as h_xi,
|
||||||
|
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.away_team_id AND mp.is_starting = true), 11) as a_xi,
|
||||||
|
-- Odds
|
||||||
|
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '1' LIMIT 1) as oh,
|
||||||
|
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = 'X' LIMIT 1) as od,
|
||||||
|
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '2' LIMIT 1) as oa
|
||||||
|
FROM matches m
|
||||||
|
LEFT JOIN football_ai_features maf ON maf.match_id = m.id
|
||||||
|
WHERE m.status = 'FT' AND m.score_home IS NOT NULL AND m.sport = 'football'
|
||||||
|
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 150000
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
md.*,
|
||||||
|
-- H2H Win Rate for Home Team
|
||||||
|
COALESCE((
|
||||||
|
SELECT COUNT(*) FILTER (WHERE m2.score_home > m2.score_away)::float / NULLIF(COUNT(*), 0)
|
||||||
|
FROM matches m2
|
||||||
|
WHERE m2.home_team_id = md.home_team_id AND m2.away_team_id = md.away_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc
|
||||||
|
), 0.5) as h2h_h_win_rate,
|
||||||
|
|
||||||
|
-- Form Points (Last 5)
|
||||||
|
COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_home > m2.score_away THEN 3 WHEN m2.score_home = m2.score_away THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.home_team_id = md.home_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as h_form_pts,
|
||||||
|
COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_away > m2.score_home THEN 3 WHEN m2.score_away = m2.score_home THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.away_team_id = md.away_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as a_form_pts
|
||||||
|
|
||||||
|
FROM match_data md
|
||||||
|
"""
|
||||||
|
|
||||||
|
print("📊 Veri çekiliyor (Time-Series)...")
|
||||||
|
start = time.time()
|
||||||
|
cur.execute(query)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
print(f"✅ {len(rows)} maç çekildi ({time.time()-start:.1f}s)")
|
||||||
|
|
||||||
|
df = pd.DataFrame(rows, columns=[
|
||||||
|
'id', 'h_id', 'a_id', 'sh', 'sa', 'utc', 'h_elo', 'a_elo',
|
||||||
|
'h_home_goals', 'a_away_goals', 'h_rest', 'a_rest', 'h_xi', 'a_xi',
|
||||||
|
'oh', 'od', 'oa',
|
||||||
|
'h2h_h_wr', 'h_form_pts', 'a_form_pts'
|
||||||
|
])
|
||||||
|
|
||||||
|
# Temizlik
|
||||||
|
for col in df.columns[2:]:
|
||||||
|
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||||
|
df = df.fillna(df.median(numeric_only=True))
|
||||||
|
df = df[(df['oh'] > 1.0) & (df['oa'] > 1.0)]
|
||||||
|
|
||||||
|
# Özellikler
|
||||||
|
df['elo_diff'] = df['h_elo'] - df['a_elo']
|
||||||
|
|
||||||
|
def fatigue(rest):
|
||||||
|
if rest < 3: return 0.85
|
||||||
|
if rest < 5: return 0.95
|
||||||
|
return 1.0
|
||||||
|
df['h_fat'] = df['h_rest'].apply(fatigue)
|
||||||
|
df['a_fat'] = df['a_rest'].apply(fatigue)
|
||||||
|
|
||||||
|
df['h_xg'] = df['h_home_goals'] * df['h_fat']
|
||||||
|
df['a_xg'] = df['a_away_goals'] * df['a_fat']
|
||||||
|
df['total_xg'] = df['h_xg'] + df['a_xg']
|
||||||
|
df['rest_diff'] = df['h_rest'] - df['a_rest']
|
||||||
|
df['pow_diff'] = (df['h_elo']/100)*df['h_fat'] - (df['a_elo']/100)*df['a_fat']
|
||||||
|
df['form_diff'] = df['h_form_pts'] - df['a_form_pts']
|
||||||
|
|
||||||
|
margin = (1/df['oh']) + (1/df['od']) + (1/df['oa'])
|
||||||
|
df['imp_h'] = (1/df['oh']) / margin
|
||||||
|
df['imp_d'] = (1/df['od']) / margin
|
||||||
|
df['imp_a'] = (1/df['oa']) / margin
|
||||||
|
|
||||||
|
df['t_ms'] = df.apply(lambda r: 0 if r['sh']>r['sa'] else (2 if r['sh']<r['sa'] else 1), axis=1)
|
||||||
|
df['t_ou'] = ((df['sh'] + df['sa']) > 2.5).astype(int)
|
||||||
|
df['t_btts'] = ((df['sh'] > 0) & (df['sa'] > 0)).astype(int)
|
||||||
|
|
||||||
|
feats = ['elo_diff', 'h_xg', 'a_xg', 'total_xg', 'pow_diff', 'rest_diff',
|
||||||
|
'h_fat', 'a_fat', 'imp_h', 'imp_d', 'imp_a',
|
||||||
|
'h_xi', 'a_xi', 'h2h_h_wr', 'form_diff']
|
||||||
|
|
||||||
|
# ─── 2. ZAMAN BAZLI BÖLME (Time-Series Split) ───
|
||||||
|
# DataFrame zaten en yeniden eskiye (DESC) sıralı.
|
||||||
|
# İlk %30'luk kısım (en yeniler) TEST SET olacak.
|
||||||
|
# Geri kalan %70 (daha eskiler) TRAIN SET olacak.
|
||||||
|
|
||||||
|
split_point = int(len(df) * 0.30)
|
||||||
|
|
||||||
|
# Test Set: En yeni maçlar (Model bunları "Gelecek" olarak görecek)
|
||||||
|
test_set = df.iloc[:split_point].copy()
|
||||||
|
# Train Set: Daha eski maçlar (Model bunlardan "Öğrenecek")
|
||||||
|
train_set = df.iloc[split_point:].copy()
|
||||||
|
|
||||||
|
print(f"\n📅 SPLIT INFO:")
|
||||||
|
print(f" Train Set (Eski): {len(train_set)} maç")
|
||||||
|
print(f" Test Set (YENİ/GELECEK): {len(test_set)} maç")
|
||||||
|
|
||||||
|
if len(train_set) < 1000:
|
||||||
|
print("❌ Yetersiz eğitim verisi.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# ─── 3. EĞİTİM (Sadece Geçmişle) ───
|
||||||
|
print("\n🤖 Geçmiş verilerle model eğitiliyor...")
|
||||||
|
model_ms = lgb.train({'objective': 'multiclass', 'num_class': 3, 'verbose': -1, 'num_leaves': 63},
|
||||||
|
lgb.Dataset(train_set[feats], train_set['t_ms']), num_boost_round=500)
|
||||||
|
|
||||||
|
model_ou = lgb.train({'objective': 'binary', 'verbose': -1},
|
||||||
|
lgb.Dataset(train_set[feats], train_set['t_ou']), num_boost_round=500)
|
||||||
|
|
||||||
|
model_btts = lgb.train({'objective': 'binary', 'verbose': -1},
|
||||||
|
lgb.Dataset(train_set[feats], train_set['t_btts']), num_boost_round=500)
|
||||||
|
print("✅ Model eğitimi tamamlandı. Şimdi Gelecek (Test Set) tahmin ediliyor...")
|
||||||
|
|
||||||
|
# ─── 4. TEST (Geleceği Tahmin) ───
|
||||||
|
# Value Betting Stratejisi
|
||||||
|
results = {'ms': {'bet': 0, 'won': 0, 'profit': 0}, 'ou25': {'bet': 0, 'won': 0, 'profit': 0}, 'btts': {'bet': 0, 'won': 0, 'profit': 0}}
|
||||||
|
|
||||||
|
for idx, row in test_set.iterrows():
|
||||||
|
oh = row['oh']
|
||||||
|
od = row['od']
|
||||||
|
oa = row['oa']
|
||||||
|
|
||||||
|
f = pd.DataFrame([row[feats]])
|
||||||
|
|
||||||
|
# MS Tahminleri
|
||||||
|
ms_probs = model_ms.predict(f)[0]
|
||||||
|
for pick, prob, odd in zip(['1', 'X', '2'], ms_probs, [oh, od, oa]):
|
||||||
|
if odd <= 1.0: continue
|
||||||
|
edge = prob - (1/odd)
|
||||||
|
# Value Check: Modelin olasılığı piyasa olasılığından %5 yüksekse oyna
|
||||||
|
if edge > 0.05 and prob > 0.45:
|
||||||
|
results['ms']['bet'] += 1
|
||||||
|
h, a = row['sh'], row['sa']
|
||||||
|
w = (pick=='1' and h>a) or (pick=='X' and h==a) or (pick=='2' and a>h)
|
||||||
|
if w: results['ms']['won'] += 1; results['ms']['profit'] += (odd - 1.0)
|
||||||
|
else: results['ms']['profit'] -= 1.0
|
||||||
|
break
|
||||||
|
|
||||||
|
# OU2.5
|
||||||
|
p_over = float(model_ou.predict(f)[0])
|
||||||
|
if p_over > 0.55: # Threshold
|
||||||
|
results['ou25']['bet'] += 1
|
||||||
|
if (row['sh'] + row['sa']) > 2.5: results['ou25']['won'] += 1; results['ou25']['profit'] += 0.85
|
||||||
|
else: results['ou25']['profit'] -= 1.0
|
||||||
|
|
||||||
|
# BTTS
|
||||||
|
p_btts = float(model_btts.predict(f)[0])
|
||||||
|
if p_btts > 0.55:
|
||||||
|
results['btts']['bet'] += 1
|
||||||
|
if row['sh'] > 0 and row['sa'] > 0: results['btts']['won'] += 1; results['btts']['profit'] += 0.85
|
||||||
|
else: results['btts']['profit'] -= 1.0
|
||||||
|
|
||||||
|
# ─── 5. SONUÇLAR ───
|
||||||
|
print("\n" + "="*60)
|
||||||
|
print("📊 STRESS TEST SONUÇLARI (GELECEK TAHMİNİ)")
|
||||||
|
print("="*60)
|
||||||
|
for mkt in ['ms', 'ou25', 'btts']:
|
||||||
|
r = results[mkt]
|
||||||
|
wr = (r['won'] / r['bet'] * 100) if r['bet'] > 0 else 0
|
||||||
|
print(f"{mkt.upper():<10} Oyn: {r['bet']:<5} Kaz: {r['won']:<5} WR: {wr:.1f}% Kâr: {r['profit']:+.2f}")
|
||||||
|
|
||||||
|
total = sum(r['profit'] for r in results.values())
|
||||||
|
print(f"\n💰 TOPLAM GELECEK KÂRI: {total:+.2f} Units")
|
||||||
|
if total > 0:
|
||||||
|
print("🟢 MODEL GÜVENİLİR! (Geleceği öngörebiliyor)")
|
||||||
|
else:
|
||||||
|
print("🔴 MODEL ZAYIF! (Sadece ezber yapmış olabilir)")
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_stress_test()
|
||||||
@@ -0,0 +1,702 @@
|
|||||||
|
"""
|
||||||
|
VQWEN v3 Training Script
|
||||||
|
========================
|
||||||
|
Retrains the VQWEN market models using only the configured top leagues.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import lightgbm as lgb
|
||||||
|
import pandas as pd
|
||||||
|
import psycopg2
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
AI_DIR = Path(__file__).resolve().parent
|
||||||
|
ENGINE_DIR = AI_DIR.parent
|
||||||
|
REPO_DIR = ENGINE_DIR.parent
|
||||||
|
MODELS_DIR = ENGINE_DIR / "models" / "vqwen"
|
||||||
|
TOP_LEAGUES_PATH = REPO_DIR / "top_leagues.json"
|
||||||
|
|
||||||
|
if str(ENGINE_DIR) not in sys.path:
|
||||||
|
sys.path.insert(0, str(ENGINE_DIR))
|
||||||
|
|
||||||
|
from features.vqwen_contract import (
|
||||||
|
FEATURE_COLUMNS,
|
||||||
|
VqwenFeatureInput,
|
||||||
|
build_vqwen_feature_row,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _load_env() -> None:
|
||||||
|
load_dotenv(REPO_DIR / ".env", override=False)
|
||||||
|
load_dotenv(ENGINE_DIR / ".env", override=False)
|
||||||
|
|
||||||
|
|
||||||
|
def get_clean_dsn() -> str:
|
||||||
|
_load_env()
|
||||||
|
raw = os.getenv("DATABASE_URL", "").strip().strip('"').strip("'")
|
||||||
|
if not raw:
|
||||||
|
raise RuntimeError("DATABASE_URL is missing.")
|
||||||
|
return raw.split("?", 1)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def load_top_league_ids() -> list[str]:
|
||||||
|
if not TOP_LEAGUES_PATH.exists():
|
||||||
|
raise FileNotFoundError(f"top_leagues.json not found at {TOP_LEAGUES_PATH}")
|
||||||
|
|
||||||
|
raw = json.loads(TOP_LEAGUES_PATH.read_text(encoding="utf-8"))
|
||||||
|
if not isinstance(raw, list):
|
||||||
|
raise ValueError("top_leagues.json must contain a JSON array.")
|
||||||
|
|
||||||
|
league_ids = [str(item).strip() for item in raw if str(item).strip()]
|
||||||
|
deduped = list(dict.fromkeys(league_ids))
|
||||||
|
if not deduped:
|
||||||
|
raise ValueError("top_leagues.json is empty.")
|
||||||
|
return deduped
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_dataframe(cur: psycopg2.extensions.cursor, league_ids: list[str]) -> pd.DataFrame:
|
||||||
|
query = """
|
||||||
|
WITH match_data AS (
|
||||||
|
SELECT
|
||||||
|
m.id,
|
||||||
|
m.league_id,
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away,
|
||||||
|
m.mst_utc,
|
||||||
|
ref.name AS referee_name,
|
||||||
|
COALESCE(maf.home_elo, 1500) AS home_elo,
|
||||||
|
COALESCE(maf.away_elo, 1500) AS away_elo,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT AVG(m2.score_home)
|
||||||
|
FROM matches m2
|
||||||
|
WHERE m2.home_team_id = m.home_team_id
|
||||||
|
AND m2.status = 'FT'
|
||||||
|
AND m2.mst_utc < m.mst_utc
|
||||||
|
),
|
||||||
|
1.2
|
||||||
|
) AS h_home_goals,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT AVG(m2.score_away)
|
||||||
|
FROM matches m2
|
||||||
|
WHERE m2.away_team_id = m.away_team_id
|
||||||
|
AND m2.status = 'FT'
|
||||||
|
AND m2.mst_utc < m.mst_utc
|
||||||
|
),
|
||||||
|
1.2
|
||||||
|
) AS a_away_goals,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT EXTRACT(
|
||||||
|
EPOCH FROM (
|
||||||
|
to_timestamp(m.mst_utc / 1000.0)
|
||||||
|
- MAX(to_timestamp(m2.mst_utc / 1000.0))
|
||||||
|
)
|
||||||
|
) / 86400.0
|
||||||
|
FROM matches m2
|
||||||
|
WHERE m2.home_team_id = m.home_team_id
|
||||||
|
AND m2.status = 'FT'
|
||||||
|
AND m2.mst_utc < m.mst_utc
|
||||||
|
),
|
||||||
|
7
|
||||||
|
) AS h_rest,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT EXTRACT(
|
||||||
|
EPOCH FROM (
|
||||||
|
to_timestamp(m.mst_utc / 1000.0)
|
||||||
|
- MAX(to_timestamp(m2.mst_utc / 1000.0))
|
||||||
|
)
|
||||||
|
) / 86400.0
|
||||||
|
FROM matches m2
|
||||||
|
WHERE m2.away_team_id = m.away_team_id
|
||||||
|
AND m2.status = 'FT'
|
||||||
|
AND m2.mst_utc < m.mst_utc
|
||||||
|
),
|
||||||
|
7
|
||||||
|
) AS a_rest,
|
||||||
|
(
|
||||||
|
SELECT os.odd_value
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = m.id
|
||||||
|
AND oc.name ILIKE 'Maç Sonucu'
|
||||||
|
AND os.name = '1'
|
||||||
|
LIMIT 1
|
||||||
|
) AS oh,
|
||||||
|
(
|
||||||
|
SELECT os.odd_value
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = m.id
|
||||||
|
AND oc.name ILIKE 'Maç Sonucu'
|
||||||
|
AND os.name = 'X'
|
||||||
|
LIMIT 1
|
||||||
|
) AS od,
|
||||||
|
(
|
||||||
|
SELECT os.odd_value
|
||||||
|
FROM odd_categories oc
|
||||||
|
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||||
|
WHERE oc.match_id = m.id
|
||||||
|
AND oc.name ILIKE 'Maç Sonucu'
|
||||||
|
AND os.name = '2'
|
||||||
|
LIMIT 1
|
||||||
|
) AS oa
|
||||||
|
FROM matches m
|
||||||
|
LEFT JOIN football_ai_features maf ON maf.match_id = m.id
|
||||||
|
LEFT JOIN match_officials ref ON ref.match_id = m.id AND ref.role_id = 1
|
||||||
|
WHERE m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.sport = 'football'
|
||||||
|
AND m.league_id = ANY(%s)
|
||||||
|
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
md.*,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
(
|
||||||
|
COUNT(*) FILTER (
|
||||||
|
WHERE (
|
||||||
|
(m2.home_team_id = md.home_team_id AND m2.score_home > m2.score_away)
|
||||||
|
OR
|
||||||
|
(m2.away_team_id = md.home_team_id AND m2.score_away > m2.score_home)
|
||||||
|
)
|
||||||
|
)::float
|
||||||
|
+ COUNT(*) FILTER (WHERE m2.score_home = m2.score_away)::float * 0.5
|
||||||
|
) / NULLIF(COUNT(*), 0)
|
||||||
|
FROM matches m2
|
||||||
|
WHERE m2.status = 'FT'
|
||||||
|
AND m2.mst_utc < md.mst_utc
|
||||||
|
AND (
|
||||||
|
(m2.home_team_id = md.home_team_id AND m2.away_team_id = md.away_team_id)
|
||||||
|
OR
|
||||||
|
(m2.home_team_id = md.away_team_id AND m2.away_team_id = md.home_team_id)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
0.5
|
||||||
|
) AS h2h_h_wr,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT SUM(points)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
CASE
|
||||||
|
WHEN m2.score_home > m2.score_away THEN 3
|
||||||
|
WHEN m2.score_home = m2.score_away THEN 1
|
||||||
|
ELSE 0
|
||||||
|
END AS points
|
||||||
|
FROM matches m2
|
||||||
|
WHERE m2.home_team_id = md.home_team_id
|
||||||
|
AND m2.status = 'FT'
|
||||||
|
AND m2.mst_utc < md.mst_utc
|
||||||
|
ORDER BY m2.mst_utc DESC
|
||||||
|
LIMIT 5
|
||||||
|
) home_form
|
||||||
|
),
|
||||||
|
0
|
||||||
|
) AS h_form_pts,
|
||||||
|
COALESCE(
|
||||||
|
(
|
||||||
|
SELECT SUM(points)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
CASE
|
||||||
|
WHEN m2.score_away > m2.score_home THEN 3
|
||||||
|
WHEN m2.score_away = m2.score_home THEN 1
|
||||||
|
ELSE 0
|
||||||
|
END AS points
|
||||||
|
FROM matches m2
|
||||||
|
WHERE m2.away_team_id = md.away_team_id
|
||||||
|
AND m2.status = 'FT'
|
||||||
|
AND m2.mst_utc < md.mst_utc
|
||||||
|
ORDER BY m2.mst_utc DESC
|
||||||
|
LIMIT 5
|
||||||
|
) away_form
|
||||||
|
),
|
||||||
|
0
|
||||||
|
) AS a_form_pts
|
||||||
|
FROM match_data md
|
||||||
|
ORDER BY md.mst_utc DESC
|
||||||
|
"""
|
||||||
|
|
||||||
|
print("Top league verisi cekiliyor...")
|
||||||
|
started_at = time.time()
|
||||||
|
cur.execute(query, (league_ids,))
|
||||||
|
rows = cur.fetchall()
|
||||||
|
elapsed = time.time() - started_at
|
||||||
|
print(f"{len(rows)} mac cekildi ({elapsed:.1f}s)")
|
||||||
|
|
||||||
|
dataframe = pd.DataFrame(
|
||||||
|
rows,
|
||||||
|
columns=[
|
||||||
|
"id",
|
||||||
|
"league_id",
|
||||||
|
"h_id",
|
||||||
|
"a_id",
|
||||||
|
"sh",
|
||||||
|
"sa",
|
||||||
|
"utc",
|
||||||
|
"referee_name",
|
||||||
|
"h_elo",
|
||||||
|
"a_elo",
|
||||||
|
"h_home_goals",
|
||||||
|
"a_away_goals",
|
||||||
|
"h_rest",
|
||||||
|
"a_rest",
|
||||||
|
"oh",
|
||||||
|
"od",
|
||||||
|
"oa",
|
||||||
|
"h2h_h_wr",
|
||||||
|
"h_form_pts",
|
||||||
|
"a_form_pts",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
return dataframe
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_league_avg_goals(
|
||||||
|
cur: psycopg2.extensions.cursor,
|
||||||
|
league_id: str,
|
||||||
|
before_ts: int,
|
||||||
|
) -> float:
|
||||||
|
if not league_id:
|
||||||
|
return 2.6
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT COALESCE(AVG(src.score_home + src.score_away), 2.6)
|
||||||
|
FROM (
|
||||||
|
SELECT score_home, score_away
|
||||||
|
FROM matches
|
||||||
|
WHERE league_id = %s
|
||||||
|
AND sport = 'football'
|
||||||
|
AND status = 'FT'
|
||||||
|
AND score_home IS NOT NULL
|
||||||
|
AND score_away IS NOT NULL
|
||||||
|
AND mst_utc < %s
|
||||||
|
ORDER BY mst_utc DESC
|
||||||
|
LIMIT 100
|
||||||
|
) src
|
||||||
|
""",
|
||||||
|
(league_id, before_ts),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
return float(row[0] or 2.6)
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_referee_profile(
|
||||||
|
cur: psycopg2.extensions.cursor,
|
||||||
|
referee_name: str | None,
|
||||||
|
before_ts: int,
|
||||||
|
) -> tuple[float, float]:
|
||||||
|
if not referee_name:
|
||||||
|
return 2.6, 0.0
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
COALESCE(AVG(score_home + score_away), 2.6) AS avg_goals,
|
||||||
|
COALESCE(AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END), 0.46) - 0.46 AS home_bias
|
||||||
|
FROM (
|
||||||
|
SELECT m.score_home, m.score_away
|
||||||
|
FROM match_officials mo
|
||||||
|
JOIN matches m ON m.id = mo.match_id
|
||||||
|
WHERE mo.name = %s
|
||||||
|
AND mo.role_id = 1
|
||||||
|
AND m.sport = 'football'
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 30
|
||||||
|
) src
|
||||||
|
""",
|
||||||
|
(referee_name, before_ts),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
return 2.6, 0.0
|
||||||
|
return float(row[0] or 2.6), float(row[1] or 0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_team_squad_profile(
|
||||||
|
cur: psycopg2.extensions.cursor,
|
||||||
|
team_id: str,
|
||||||
|
before_ts: int,
|
||||||
|
) -> tuple[float, float]:
|
||||||
|
if not team_id:
|
||||||
|
return 0.5, 0.0
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
WITH recent_matches AS (
|
||||||
|
SELECT m.id
|
||||||
|
FROM matches m
|
||||||
|
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
|
||||||
|
AND m.sport = 'football'
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 8
|
||||||
|
),
|
||||||
|
player_base AS (
|
||||||
|
SELECT
|
||||||
|
mpp.player_id,
|
||||||
|
COUNT(*)::float AS appearances,
|
||||||
|
COUNT(*) FILTER (WHERE mpp.is_starting = true)::float AS starts
|
||||||
|
FROM match_player_participation mpp
|
||||||
|
JOIN recent_matches rm ON rm.id = mpp.match_id
|
||||||
|
WHERE mpp.team_id = %s
|
||||||
|
GROUP BY mpp.player_id
|
||||||
|
),
|
||||||
|
player_goals AS (
|
||||||
|
SELECT
|
||||||
|
mpe.player_id,
|
||||||
|
COUNT(*) FILTER (
|
||||||
|
WHERE mpe.event_type = 'goal'
|
||||||
|
AND COALESCE(mpe.event_subtype, '') NOT ILIKE '%%penaltı kaçırma%%'
|
||||||
|
)::float AS goals,
|
||||||
|
0.0::float AS assists
|
||||||
|
FROM match_player_events mpe
|
||||||
|
JOIN recent_matches rm ON rm.id = mpe.match_id
|
||||||
|
WHERE mpe.team_id = %s
|
||||||
|
GROUP BY mpe.player_id
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
mpe.assist_player_id AS player_id,
|
||||||
|
0.0::float AS goals,
|
||||||
|
COUNT(*) FILTER (
|
||||||
|
WHERE mpe.event_type = 'goal'
|
||||||
|
AND mpe.assist_player_id IS NOT NULL
|
||||||
|
)::float AS assists
|
||||||
|
FROM match_player_events mpe
|
||||||
|
JOIN recent_matches rm ON rm.id = mpe.match_id
|
||||||
|
WHERE mpe.team_id = %s
|
||||||
|
AND mpe.assist_player_id IS NOT NULL
|
||||||
|
GROUP BY mpe.assist_player_id
|
||||||
|
),
|
||||||
|
player_events AS (
|
||||||
|
SELECT
|
||||||
|
player_id,
|
||||||
|
SUM(goals) AS goals,
|
||||||
|
SUM(assists) AS assists
|
||||||
|
FROM player_goals
|
||||||
|
GROUP BY player_id
|
||||||
|
),
|
||||||
|
player_scores AS (
|
||||||
|
SELECT
|
||||||
|
pb.player_id,
|
||||||
|
(pb.starts * 1.5)
|
||||||
|
+ ((pb.appearances - pb.starts) * 0.5)
|
||||||
|
+ (COALESCE(pe.goals, 0.0) * 2.5)
|
||||||
|
+ (COALESCE(pe.assists, 0.0) * 1.5) AS score
|
||||||
|
FROM player_base pb
|
||||||
|
LEFT JOIN player_events pe ON pe.player_id = pb.player_id
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
COALESCE(AVG(top_players.score), 0.0) AS avg_top_score,
|
||||||
|
COALESCE(COUNT(*) FILTER (WHERE top_players.score >= 6.0), 0) AS key_players
|
||||||
|
FROM (
|
||||||
|
SELECT score
|
||||||
|
FROM player_scores
|
||||||
|
ORDER BY score DESC
|
||||||
|
LIMIT 11
|
||||||
|
) top_players
|
||||||
|
""",
|
||||||
|
(team_id, team_id, before_ts, team_id, team_id, team_id),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
return 0.5, 0.0
|
||||||
|
|
||||||
|
avg_top_score = float(row[0] or 0.0)
|
||||||
|
return min(max(avg_top_score / 10.0, 0.0), 1.0), float(row[1] or 0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def _enrich_pre_match_context(
|
||||||
|
cur: psycopg2.extensions.cursor,
|
||||||
|
df: pd.DataFrame,
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
league_avg_goals: list[float] = []
|
||||||
|
referee_avg_goals: list[float] = []
|
||||||
|
referee_home_bias: list[float] = []
|
||||||
|
home_squad_strength: list[float] = []
|
||||||
|
away_squad_strength: list[float] = []
|
||||||
|
home_key_players: list[float] = []
|
||||||
|
away_key_players: list[float] = []
|
||||||
|
|
||||||
|
print("Pre-match context enrich ediliyor...")
|
||||||
|
started_at = time.time()
|
||||||
|
|
||||||
|
for row in df.itertuples(index=False):
|
||||||
|
before_ts = int(getattr(row, "utc") or 0)
|
||||||
|
league_id = str(getattr(row, "league_id") or "")
|
||||||
|
ref_name_raw: Any = getattr(row, "referee_name", None)
|
||||||
|
referee_name = str(ref_name_raw).strip() if ref_name_raw else None
|
||||||
|
|
||||||
|
lg_avg = _compute_league_avg_goals(cur, league_id, before_ts)
|
||||||
|
ref_avg, ref_bias = _compute_referee_profile(cur, referee_name, before_ts)
|
||||||
|
h_sq, h_key = _compute_team_squad_profile(cur, str(getattr(row, "h_id")), before_ts)
|
||||||
|
a_sq, a_key = _compute_team_squad_profile(cur, str(getattr(row, "a_id")), before_ts)
|
||||||
|
|
||||||
|
league_avg_goals.append(lg_avg)
|
||||||
|
referee_avg_goals.append(ref_avg)
|
||||||
|
referee_home_bias.append(ref_bias)
|
||||||
|
home_squad_strength.append(h_sq)
|
||||||
|
away_squad_strength.append(a_sq)
|
||||||
|
home_key_players.append(h_key)
|
||||||
|
away_key_players.append(a_key)
|
||||||
|
|
||||||
|
enriched = df.copy()
|
||||||
|
enriched["league_avg_goals"] = league_avg_goals
|
||||||
|
enriched["referee_avg_goals"] = referee_avg_goals
|
||||||
|
enriched["referee_home_bias"] = referee_home_bias
|
||||||
|
enriched["home_squad_strength"] = home_squad_strength
|
||||||
|
enriched["away_squad_strength"] = away_squad_strength
|
||||||
|
enriched["home_key_players"] = home_key_players
|
||||||
|
enriched["away_key_players"] = away_key_players
|
||||||
|
|
||||||
|
print(f"Pre-match context tamam ({time.time() - started_at:.1f}s)")
|
||||||
|
return enriched
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_features(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
numeric_columns = [
|
||||||
|
"sh",
|
||||||
|
"sa",
|
||||||
|
"utc",
|
||||||
|
"league_avg_goals",
|
||||||
|
"referee_avg_goals",
|
||||||
|
"referee_home_bias",
|
||||||
|
"home_squad_strength",
|
||||||
|
"away_squad_strength",
|
||||||
|
"home_key_players",
|
||||||
|
"away_key_players",
|
||||||
|
"h_elo",
|
||||||
|
"a_elo",
|
||||||
|
"h_home_goals",
|
||||||
|
"a_away_goals",
|
||||||
|
"h_rest",
|
||||||
|
"a_rest",
|
||||||
|
"oh",
|
||||||
|
"od",
|
||||||
|
"oa",
|
||||||
|
"h2h_h_wr",
|
||||||
|
"h_form_pts",
|
||||||
|
"a_form_pts",
|
||||||
|
]
|
||||||
|
for column in numeric_columns:
|
||||||
|
df[column] = pd.to_numeric(df[column], errors="coerce")
|
||||||
|
|
||||||
|
df = df.fillna(df.median(numeric_only=True))
|
||||||
|
df = df[(df["oh"] > 1.0) & (df["od"] > 1.0) & (df["oa"] > 1.0)].copy()
|
||||||
|
if df.empty:
|
||||||
|
raise RuntimeError("No valid rows remained after odds filtering.")
|
||||||
|
|
||||||
|
margin = (1.0 / df["oh"]) + (1.0 / df["od"]) + (1.0 / df["oa"])
|
||||||
|
df["imp_h"] = (1.0 / df["oh"]) / margin
|
||||||
|
df["imp_d"] = (1.0 / df["od"]) / margin
|
||||||
|
df["imp_a"] = (1.0 / df["oa"]) / margin
|
||||||
|
|
||||||
|
feature_rows = df.apply(
|
||||||
|
lambda row: build_vqwen_feature_row(
|
||||||
|
VqwenFeatureInput(
|
||||||
|
home_elo=float(row["h_elo"]),
|
||||||
|
away_elo=float(row["a_elo"]),
|
||||||
|
home_avg_goals_scored=float(row["h_home_goals"]),
|
||||||
|
away_avg_goals_scored=float(row["a_away_goals"]),
|
||||||
|
home_avg_goals_conceded=float(row["a_away_goals"]),
|
||||||
|
away_avg_goals_conceded=float(row["h_home_goals"]),
|
||||||
|
home_avg_shots_on_target=4.0,
|
||||||
|
away_avg_shots_on_target=4.0,
|
||||||
|
home_avg_possession=50.0,
|
||||||
|
away_avg_possession=50.0,
|
||||||
|
home_rest_days=float(row["h_rest"]),
|
||||||
|
away_rest_days=float(row["a_rest"]),
|
||||||
|
implied_prob_home=float(row["imp_h"]),
|
||||||
|
implied_prob_draw=float(row["imp_d"]),
|
||||||
|
implied_prob_away=float(row["imp_a"]),
|
||||||
|
# Historical training must not leak actual match lineups.
|
||||||
|
# Runtime also often defaults to 1.0 when pre-match lineup data
|
||||||
|
# is unavailable, so training should mirror that behavior.
|
||||||
|
home_lineup_availability=1.0,
|
||||||
|
away_lineup_availability=1.0,
|
||||||
|
h2h_home_win_rate=float(row["h2h_h_wr"]),
|
||||||
|
home_form_score=float(row["h_form_pts"]),
|
||||||
|
away_form_score=float(row["a_form_pts"]),
|
||||||
|
league_avg_goals=float(row["league_avg_goals"]),
|
||||||
|
referee_avg_goals=float(row["referee_avg_goals"]),
|
||||||
|
referee_home_bias=float(row["referee_home_bias"]),
|
||||||
|
home_squad_strength=float(row["home_squad_strength"]),
|
||||||
|
away_squad_strength=float(row["away_squad_strength"]),
|
||||||
|
home_key_players=float(row["home_key_players"]),
|
||||||
|
away_key_players=float(row["away_key_players"]),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
axis=1,
|
||||||
|
result_type="expand",
|
||||||
|
)
|
||||||
|
for column in FEATURE_COLUMNS:
|
||||||
|
df[column] = feature_rows[column]
|
||||||
|
|
||||||
|
df["t_ms"] = df.apply(
|
||||||
|
lambda row: 0 if row["sh"] > row["sa"] else (2 if row["sh"] < row["sa"] else 1),
|
||||||
|
axis=1,
|
||||||
|
)
|
||||||
|
df["t_ou"] = ((df["sh"] + df["sa"]) > 2.5).astype(int)
|
||||||
|
df["t_btts"] = ((df["sh"] > 0) & (df["sa"] > 0)).astype(int)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def _temporal_split(df: pd.DataFrame, validation_ratio: float = 0.15) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||||||
|
if df.empty:
|
||||||
|
raise RuntimeError("Cannot split an empty dataframe.")
|
||||||
|
|
||||||
|
ordered = df.sort_values("utc").reset_index(drop=True)
|
||||||
|
split_index = max(int(len(ordered) * (1.0 - validation_ratio)), 1)
|
||||||
|
split_index = min(split_index, len(ordered) - 1)
|
||||||
|
return ordered.iloc[:split_index].copy(), ordered.iloc[split_index:].copy()
|
||||||
|
|
||||||
|
|
||||||
|
def _save_metadata(df: pd.DataFrame, league_ids: list[str]) -> None:
|
||||||
|
metadata = {
|
||||||
|
"trained_at": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
"contract_version": "vqwen.shared.v1",
|
||||||
|
"league_count": len(league_ids),
|
||||||
|
"league_ids": league_ids,
|
||||||
|
"sample_count": int(len(df)),
|
||||||
|
"feature_columns": FEATURE_COLUMNS,
|
||||||
|
"target_distribution": {
|
||||||
|
"ms_home": int((df["t_ms"] == 0).sum()),
|
||||||
|
"ms_draw": int((df["t_ms"] == 1).sum()),
|
||||||
|
"ms_away": int((df["t_ms"] == 2).sum()),
|
||||||
|
"ou25_over": int(df["t_ou"].sum()),
|
||||||
|
"ou25_under": int(len(df) - df["t_ou"].sum()),
|
||||||
|
"btts_yes": int(df["t_btts"].sum()),
|
||||||
|
"btts_no": int(len(df) - df["t_btts"].sum()),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
(MODELS_DIR / "vqwen_training_meta.json").write_text(
|
||||||
|
json.dumps(metadata, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def train_vqwen_v3() -> None:
|
||||||
|
print("VQWEN v3 MODEL EGITIMI (TOP LEAGUES)")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
league_ids = load_top_league_ids()
|
||||||
|
print(f"League filter aktif: {len(league_ids)} lig")
|
||||||
|
|
||||||
|
dsn = get_clean_dsn()
|
||||||
|
conn = psycopg2.connect(dsn)
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
df = _fetch_dataframe(cur, league_ids)
|
||||||
|
df = _enrich_pre_match_context(cur, df)
|
||||||
|
df = _prepare_features(df)
|
||||||
|
print(f"Temiz egitim orneklemi: {len(df)} mac")
|
||||||
|
|
||||||
|
train_df, valid_df = _temporal_split(df)
|
||||||
|
X_train = train_df[FEATURE_COLUMNS]
|
||||||
|
X_valid = valid_df[FEATURE_COLUMNS]
|
||||||
|
y_train = train_df["t_ms"]
|
||||||
|
y_valid = valid_df["t_ms"]
|
||||||
|
|
||||||
|
print(
|
||||||
|
"Temporal split:"
|
||||||
|
f" train={len(train_df)}"
|
||||||
|
f" valid={len(valid_df)}"
|
||||||
|
f" train_end_utc={int(train_df['utc'].max())}"
|
||||||
|
f" valid_start_utc={int(valid_df['utc'].min())}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("MS modeli egitiliyor...")
|
||||||
|
model_ms = lgb.train(
|
||||||
|
{
|
||||||
|
"objective": "multiclass",
|
||||||
|
"num_class": 3,
|
||||||
|
"metric": "multi_logloss",
|
||||||
|
"verbose": -1,
|
||||||
|
"num_leaves": 63,
|
||||||
|
"learning_rate": 0.03,
|
||||||
|
"feature_fraction": 0.85,
|
||||||
|
"bagging_fraction": 0.85,
|
||||||
|
"bagging_freq": 1,
|
||||||
|
},
|
||||||
|
lgb.Dataset(X_train, y_train),
|
||||||
|
num_boost_round=1000,
|
||||||
|
valid_sets=[lgb.Dataset(X_valid, y_valid)],
|
||||||
|
callbacks=[lgb.early_stopping(50)],
|
||||||
|
)
|
||||||
|
|
||||||
|
print("OU2.5 modeli egitiliyor...")
|
||||||
|
model_ou25 = lgb.train(
|
||||||
|
{
|
||||||
|
"objective": "binary",
|
||||||
|
"metric": "binary_logloss",
|
||||||
|
"verbose": -1,
|
||||||
|
"learning_rate": 0.03,
|
||||||
|
"num_leaves": 31,
|
||||||
|
},
|
||||||
|
lgb.Dataset(train_df[FEATURE_COLUMNS], train_df["t_ou"]),
|
||||||
|
num_boost_round=1000,
|
||||||
|
valid_sets=[lgb.Dataset(valid_df[FEATURE_COLUMNS], valid_df["t_ou"])],
|
||||||
|
callbacks=[lgb.early_stopping(50)],
|
||||||
|
)
|
||||||
|
|
||||||
|
print("BTTS modeli egitiliyor...")
|
||||||
|
model_btts = lgb.train(
|
||||||
|
{
|
||||||
|
"objective": "binary",
|
||||||
|
"metric": "binary_logloss",
|
||||||
|
"verbose": -1,
|
||||||
|
"learning_rate": 0.03,
|
||||||
|
"num_leaves": 31,
|
||||||
|
},
|
||||||
|
lgb.Dataset(train_df[FEATURE_COLUMNS], train_df["t_btts"]),
|
||||||
|
num_boost_round=1000,
|
||||||
|
valid_sets=[lgb.Dataset(valid_df[FEATURE_COLUMNS], valid_df["t_btts"])],
|
||||||
|
callbacks=[lgb.early_stopping(50)],
|
||||||
|
)
|
||||||
|
|
||||||
|
MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
artifacts = {
|
||||||
|
"vqwen_ms.pkl": model_ms,
|
||||||
|
"vqwen_ou25.pkl": model_ou25,
|
||||||
|
"vqwen_btts.pkl": model_btts,
|
||||||
|
}
|
||||||
|
for filename, model in artifacts.items():
|
||||||
|
with (MODELS_DIR / filename).open("wb") as handle:
|
||||||
|
pickle.dump(model, handle)
|
||||||
|
print(f"Kaydedildi: {filename}")
|
||||||
|
|
||||||
|
_save_metadata(df, league_ids)
|
||||||
|
print("Kaydedildi: vqwen_training_meta.json")
|
||||||
|
print("VQWEN v3 top league egitimi tamamlandi.")
|
||||||
|
finally:
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
train_vqwen_v3()
|
||||||
Executable
+246
@@ -0,0 +1,246 @@
|
|||||||
|
"""
|
||||||
|
XGBoost Market Model Trainer
|
||||||
|
============================
|
||||||
|
Trains specialized XGBoost models for each betting market.
|
||||||
|
Includes 'Surprise Hunter' logic for HT/FT reversals (1/2, 2/1).
|
||||||
|
|
||||||
|
Models:
|
||||||
|
1. MS (1X2) - Multi-class
|
||||||
|
2. Over/Under 2.5 - Binary
|
||||||
|
3. BTTS - Binary
|
||||||
|
4. HT/FT - Multi-class (Imbalanced learning for 1/2, 2/1)
|
||||||
|
5. Other line variants (1.5, 3.5, etc.)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/train_xgboost_markets.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import xgboost as xgb
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import accuracy_score, log_loss, classification_report, roc_auc_score
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
|
||||||
|
# Config
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
|
||||||
|
MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "xgboost")
|
||||||
|
|
||||||
|
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# Feature Columns (Must match extraction + inference)
|
||||||
|
FEATURES = [
|
||||||
|
# ELO
|
||||||
|
"home_overall_elo", "away_overall_elo", "elo_diff",
|
||||||
|
"home_home_elo", "away_away_elo", "form_elo_diff",
|
||||||
|
|
||||||
|
# Form
|
||||||
|
"home_goals_avg", "home_conceded_avg",
|
||||||
|
"away_goals_avg", "away_conceded_avg",
|
||||||
|
"home_clean_sheet_rate", "away_clean_sheet_rate",
|
||||||
|
"home_scoring_rate", "away_scoring_rate",
|
||||||
|
"home_winning_streak", "away_winning_streak",
|
||||||
|
|
||||||
|
# H2H
|
||||||
|
"h2h_home_win_rate", "h2h_draw_rate",
|
||||||
|
"h2h_avg_goals", "h2h_btts_rate", "h2h_over25_rate",
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
"home_avg_possession", "away_avg_possession",
|
||||||
|
"home_avg_shots_on_target", "away_avg_shots_on_target",
|
||||||
|
"home_shot_conversion", "away_shot_conversion",
|
||||||
|
|
||||||
|
# Odds (Implicit market wisdom)
|
||||||
|
"odds_ms_h", "odds_ms_d", "odds_ms_a",
|
||||||
|
"implied_home", "implied_draw", "implied_away",
|
||||||
|
|
||||||
|
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
|
||||||
|
|
||||||
|
"odds_ou05_o", "odds_ou05_u",
|
||||||
|
"odds_ou15_o", "odds_ou15_u",
|
||||||
|
"odds_ou25_o", "odds_ou25_u",
|
||||||
|
"odds_ou35_o", "odds_ou35_u",
|
||||||
|
|
||||||
|
"odds_ht_ou05_o", "odds_ht_ou05_u",
|
||||||
|
"odds_ht_ou15_o", "odds_ht_ou15_u",
|
||||||
|
|
||||||
|
"odds_btts_y", "odds_btts_n",
|
||||||
|
|
||||||
|
# League/Context
|
||||||
|
"league_avg_goals", "league_zero_goal_rate",
|
||||||
|
"home_xga", "away_xga",
|
||||||
|
|
||||||
|
# Upset Engine
|
||||||
|
"upset_atmosphere", "upset_motivation", "upset_fatigue", "upset_potential",
|
||||||
|
|
||||||
|
# Referee Engine
|
||||||
|
"referee_home_bias", "referee_avg_goals", "referee_cards_total",
|
||||||
|
"referee_avg_yellow", "referee_experience",
|
||||||
|
|
||||||
|
# Momentum Engine
|
||||||
|
"home_momentum_score", "away_momentum_score", "momentum_diff",
|
||||||
|
]
|
||||||
|
|
||||||
|
def load_data():
|
||||||
|
if not os.path.exists(DATA_PATH):
|
||||||
|
print(f"❌ Data file not found: {DATA_PATH}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"📦 Loading data from {DATA_PATH}...")
|
||||||
|
df = pd.read_csv(DATA_PATH)
|
||||||
|
|
||||||
|
# Handle missing values - simple imputation for robustness
|
||||||
|
df.fillna(0, inplace=True)
|
||||||
|
|
||||||
|
print(f" Shape: {df.shape}")
|
||||||
|
return df
|
||||||
|
|
||||||
|
def train_model(df, target_col, model_name, objective, metric, num_class=None, class_weights=None):
|
||||||
|
"""
|
||||||
|
Generic trainer for XGBoost models.
|
||||||
|
Supports binary and multi-class.
|
||||||
|
Supports sample weighting for imbalanced classes (like 1/2 reversals).
|
||||||
|
"""
|
||||||
|
print(f"\n🚀 Training {model_name} (Target: {target_col})...")
|
||||||
|
|
||||||
|
# Filter valid rows for this target
|
||||||
|
valid_df = df[df[target_col].notna()].copy()
|
||||||
|
if valid_df.empty:
|
||||||
|
print(f" ⚠️ No valid data for {target_col}, skipping.")
|
||||||
|
return
|
||||||
|
|
||||||
|
X = valid_df[FEATURES]
|
||||||
|
y = valid_df[target_col].astype(int)
|
||||||
|
|
||||||
|
# Split
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||||||
|
X, y, test_size=0.2, random_state=42, stratify=y
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sample Weights (For HT/FT Surprise)
|
||||||
|
sample_weights__train = None
|
||||||
|
if class_weights:
|
||||||
|
print(" ⚖️ Applying class weights for surprise detection...")
|
||||||
|
sample_weights__train = y_train.map(class_weights).fillna(1.0)
|
||||||
|
|
||||||
|
# Model Params
|
||||||
|
params = {
|
||||||
|
'objective': objective,
|
||||||
|
'eval_metric': metric,
|
||||||
|
'eta': 0.05,
|
||||||
|
'max_depth': 6,
|
||||||
|
'subsample': 0.8,
|
||||||
|
'colsample_bytree': 0.8,
|
||||||
|
'nthread': 4,
|
||||||
|
'seed': 42
|
||||||
|
}
|
||||||
|
|
||||||
|
if num_class:
|
||||||
|
params['num_class'] = num_class
|
||||||
|
|
||||||
|
# Train using Scikit-Learn Wrapper so we can pickle it cleanly for v20_ensemble
|
||||||
|
if objective == "multi:softprob":
|
||||||
|
model = xgb.XGBClassifier(**params, n_estimators=1000, early_stopping_rounds=50)
|
||||||
|
else:
|
||||||
|
model = xgb.XGBClassifier(**params, n_estimators=1000, early_stopping_rounds=50)
|
||||||
|
|
||||||
|
# Fit with early stopping
|
||||||
|
model.fit(
|
||||||
|
X_train, y_train,
|
||||||
|
sample_weight=sample_weights__train,
|
||||||
|
eval_set=[(X_test, y_test)],
|
||||||
|
verbose=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Evaluation
|
||||||
|
preds = model.predict_proba(X_test)
|
||||||
|
|
||||||
|
if objective == "multi:softprob":
|
||||||
|
y_pred_class = np.argmax(preds, axis=1)
|
||||||
|
acc = accuracy_score(y_test, y_pred_class)
|
||||||
|
loss = log_loss(y_test, preds)
|
||||||
|
print(f" ✅ Accuracy: {acc:.4f} | LogLoss: {loss:.4f}")
|
||||||
|
|
||||||
|
# Detailed report for important classes
|
||||||
|
print(classification_report(y_test, y_pred_class))
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Binary
|
||||||
|
# Extract the probability for class 1
|
||||||
|
class_1_preds = preds[:, 1]
|
||||||
|
y_pred_class = (class_1_preds > 0.5).astype(int)
|
||||||
|
acc = accuracy_score(y_test, y_pred_class)
|
||||||
|
auc = roc_auc_score(y_test, class_1_preds)
|
||||||
|
print(f" ✅ Accuracy: {acc:.4f} | AUC: {auc:.4f}")
|
||||||
|
|
||||||
|
# Save raw json booster
|
||||||
|
model_json_path = os.path.join(MODELS_DIR, f"{model_name}.json")
|
||||||
|
model.get_booster().save_model(model_json_path)
|
||||||
|
|
||||||
|
# Save sklearn wrapped PKL (What v20_ensemble actually loads for Uncalibrated models like ht_ft!)
|
||||||
|
import pickle
|
||||||
|
model_pkl_path = os.path.join(MODELS_DIR, f"{model_name}.pkl")
|
||||||
|
with open(model_pkl_path, "wb") as f:
|
||||||
|
pickle.dump(model, f)
|
||||||
|
|
||||||
|
print(f" 💾 Model saved to {model_json_path} and {model_pkl_path}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
df = load_data()
|
||||||
|
|
||||||
|
# 1. Match Result (1X2)
|
||||||
|
train_model(
|
||||||
|
df, "label_ms", "xgb_ms",
|
||||||
|
objective="multi:softprob", metric="mlogloss", num_class=3
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Over/Under 2.5
|
||||||
|
train_model(
|
||||||
|
df, "label_ou25", "xgb_ou25",
|
||||||
|
objective="binary:logistic", metric="logloss"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. BTTS
|
||||||
|
train_model(
|
||||||
|
df, "label_btts", "xgb_btts",
|
||||||
|
objective="binary:logistic", metric="logloss"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. HT/FT SURPRISE HUNTER
|
||||||
|
# Classes: 0=1/1, 1=1/X, 2=1/2(HOME->AWAY), 3=X/1 ... 6=2/1(AWAY->HOME) ...
|
||||||
|
# We give HUGE weight to 2 (1/2) and 6 (2/1)
|
||||||
|
htft_weights = {
|
||||||
|
0: 1.0, 1: 3.0, 2: 15.0, # 1/1, 1/X, 1/2 (Reversal!)
|
||||||
|
3: 2.0, 4: 2.0, 5: 2.0, # X/1, X/X, X/2
|
||||||
|
6: 15.0, 7: 3.0, 8: 1.0 # 2/1 (Reversal!), 2/X, 2/2
|
||||||
|
}
|
||||||
|
|
||||||
|
train_model(
|
||||||
|
df, "label_ht_ft", "xgb_ht_ft",
|
||||||
|
objective="multi:softprob", metric="mlogloss", num_class=9,
|
||||||
|
class_weights=htft_weights
|
||||||
|
)
|
||||||
|
|
||||||
|
# 5. Over/Under 1.5 & 3.5 (Optional utility models)
|
||||||
|
train_model(df, "label_ou15", "xgb_ou15", objective="binary:logistic", metric="logloss")
|
||||||
|
train_model(df, "label_ou35", "xgb_ou35", objective="binary:logistic", metric="logloss")
|
||||||
|
|
||||||
|
# 6. Half-Time 1X2
|
||||||
|
train_model(df, "label_ht_result", "xgb_ht_result", objective="multi:softprob", metric="mlogloss", num_class=3)
|
||||||
|
|
||||||
|
# 7. Half-Time Over/Under
|
||||||
|
train_model(df, "label_ht_ou05", "xgb_ht_ou05", objective="binary:logistic", metric="logloss")
|
||||||
|
train_model(df, "label_ht_ou15", "xgb_ht_ou15", objective="binary:logistic", metric="logloss")
|
||||||
|
# 8. Handicap MS and Cards
|
||||||
|
train_model(df, "label_handicap_ms", "xgb_handicap_ms", objective="multi:softprob", metric="mlogloss", num_class=3)
|
||||||
|
train_model(df, "label_cards_ou45", "xgb_cards_ou45", objective="binary:logistic", metric="logloss")
|
||||||
|
|
||||||
|
print("\n✅ All models trained successfully!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Executable
+222
@@ -0,0 +1,222 @@
|
|||||||
|
"""
|
||||||
|
V20 Pro Model Trainer
|
||||||
|
=====================
|
||||||
|
Advanced training pipeline for Suggest-Bet V20 Ensemble.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
1. Optuna Hyperparameter Optimization
|
||||||
|
2. Stratified K-Fold Cross-Validation
|
||||||
|
3. Probability Calibration (Isotonic Regression)
|
||||||
|
4. Market-specific weight handling for reversals (1/2, 2/1)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/train_xgboost_pro.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import xgboost as xgb
|
||||||
|
import optuna
|
||||||
|
from optuna.samplers import TPESampler
|
||||||
|
from sklearn.model_selection import StratifiedKFold, train_test_split
|
||||||
|
from sklearn.metrics import accuracy_score, log_loss, brier_score_loss, classification_report
|
||||||
|
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# Config
|
||||||
|
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
|
||||||
|
MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "xgboost")
|
||||||
|
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports", "training_v20")
|
||||||
|
|
||||||
|
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||||
|
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# Feature Columns (Must match extraction + inference)
|
||||||
|
FEATURES = [
|
||||||
|
# ELO
|
||||||
|
"home_overall_elo", "away_overall_elo", "elo_diff",
|
||||||
|
"home_home_elo", "away_away_elo", "form_elo_diff",
|
||||||
|
|
||||||
|
# Form
|
||||||
|
"home_goals_avg", "home_conceded_avg",
|
||||||
|
"away_goals_avg", "away_conceded_avg",
|
||||||
|
"home_clean_sheet_rate", "away_clean_sheet_rate",
|
||||||
|
"home_scoring_rate", "away_scoring_rate",
|
||||||
|
"home_winning_streak", "away_winning_streak",
|
||||||
|
|
||||||
|
# H2H
|
||||||
|
"h2h_home_win_rate", "h2h_draw_rate",
|
||||||
|
"h2h_avg_goals", "h2h_btts_rate", "h2h_over25_rate",
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
"home_avg_possession", "away_avg_possession",
|
||||||
|
"home_avg_shots_on_target", "away_avg_shots_on_target",
|
||||||
|
"home_shot_conversion", "away_shot_conversion",
|
||||||
|
|
||||||
|
# Odds (Implicit market wisdom)
|
||||||
|
"odds_ms_h", "odds_ms_d", "odds_ms_a",
|
||||||
|
"implied_home", "implied_draw", "implied_away",
|
||||||
|
|
||||||
|
# League/Context
|
||||||
|
"league_avg_goals", "league_zero_goal_rate",
|
||||||
|
"home_xga", "away_xga"
|
||||||
|
]
|
||||||
|
|
||||||
|
def load_data():
|
||||||
|
if not os.path.exists(DATA_PATH):
|
||||||
|
print(f"❌ Data file not found: {DATA_PATH}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"📦 Loading data from {DATA_PATH}...")
|
||||||
|
df = pd.read_csv(DATA_PATH)
|
||||||
|
df.fillna(0, inplace=True)
|
||||||
|
print(f" Shape: {df.shape}")
|
||||||
|
return df
|
||||||
|
|
||||||
|
class MarketTrainer:
|
||||||
|
def __init__(self, df, target_col, market_name, is_multi=False, num_class=None, weights=None):
|
||||||
|
self.df = df[df[target_col].notna()].copy()
|
||||||
|
self.target_col = target_col
|
||||||
|
self.market_name = market_name
|
||||||
|
self.is_multi = is_multi
|
||||||
|
self.num_class = num_class
|
||||||
|
self.weights = weights
|
||||||
|
|
||||||
|
self.X = self.df[FEATURES]
|
||||||
|
self.y = self.df[target_col].astype(int)
|
||||||
|
|
||||||
|
# Split for final evaluation hold-out
|
||||||
|
self.X_train, self.X_holdout, self.y_train, self.y_holdout = train_test_split(
|
||||||
|
self.X, self.y, test_size=0.15, random_state=42, stratify=self.y
|
||||||
|
)
|
||||||
|
|
||||||
|
def optimize(self, n_trials=50):
|
||||||
|
print(f"\n🔍 Tuning {self.market_name} with Optuna ({n_trials} trials)...")
|
||||||
|
|
||||||
|
study = optuna.create_study(direction="minimize", sampler=TPESampler(seed=42))
|
||||||
|
study.optimize(self.objective, n_trials=n_trials)
|
||||||
|
|
||||||
|
print(f" Best params: {study.best_params}")
|
||||||
|
print(f" Best Cross-Validation LogLoss: {study.best_value:.4f}")
|
||||||
|
return study.best_params
|
||||||
|
|
||||||
|
def objective(self, trial):
|
||||||
|
params = {
|
||||||
|
"verbosity": 0,
|
||||||
|
"objective": "multi:softprob" if self.is_multi else "binary:logistic",
|
||||||
|
"eval_metric": "mlogloss" if self.is_multi else "logloss",
|
||||||
|
"booster": "gbtree",
|
||||||
|
"lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
|
||||||
|
"alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
|
||||||
|
"max_depth": trial.suggest_int("max_depth", 3, 9),
|
||||||
|
"eta": trial.suggest_float("eta", 1e-3, 0.1, log=True),
|
||||||
|
"gamma": trial.suggest_float("gamma", 1e-8, 1.0, log=True),
|
||||||
|
"grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"]),
|
||||||
|
"subsample": trial.suggest_float("subsample", 0.5, 1.0),
|
||||||
|
"colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
|
||||||
|
"n_estimators": trial.suggest_int("n_estimators", 100, 1000),
|
||||||
|
"early_stopping_rounds": 20,
|
||||||
|
"n_jobs": 4,
|
||||||
|
"random_state": 42
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.is_multi:
|
||||||
|
params["num_class"] = self.num_class
|
||||||
|
|
||||||
|
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
|
||||||
|
losses = []
|
||||||
|
|
||||||
|
for train_idx, val_idx in skf.split(self.X_train, self.y_train):
|
||||||
|
X_t, X_v = self.X_train.iloc[train_idx], self.X_train.iloc[val_idx]
|
||||||
|
y_t, y_v = self.y_train.iloc[train_idx], self.y_train.iloc[val_idx]
|
||||||
|
|
||||||
|
# Apply weights if available
|
||||||
|
w_t = None
|
||||||
|
if self.weights:
|
||||||
|
w_t = y_t.map(self.weights).fillna(1.0)
|
||||||
|
|
||||||
|
model = xgb.XGBClassifier(**params)
|
||||||
|
model.fit(X_t, y_t, sample_weight=w_t, eval_set=[(X_v, y_v)], verbose=False)
|
||||||
|
|
||||||
|
preds = model.predict_proba(X_v)
|
||||||
|
loss = log_loss(y_v, preds)
|
||||||
|
losses.append(loss)
|
||||||
|
|
||||||
|
return np.mean(losses)
|
||||||
|
|
||||||
|
def train_final(self, best_params):
|
||||||
|
print(f"🚀 Training final calibrated {self.market_name} model...")
|
||||||
|
|
||||||
|
# Add core params
|
||||||
|
best_params["objective"] = "multi:softprob" if self.is_multi else "binary:logistic"
|
||||||
|
best_params["eval_metric"] = "mlogloss" if self.is_multi else "logloss"
|
||||||
|
if self.is_multi:
|
||||||
|
best_params["num_class"] = self.num_class
|
||||||
|
|
||||||
|
base_model = xgb.XGBClassifier(**best_params)
|
||||||
|
|
||||||
|
# Sample weights for training
|
||||||
|
w_train = None
|
||||||
|
if self.weights:
|
||||||
|
w_train = self.y_train.map(self.weights).fillna(1.0)
|
||||||
|
|
||||||
|
# Calibration using Cross-Validation
|
||||||
|
calibrated_model = CalibratedClassifierCV(base_model, method='isotonic', cv=5)
|
||||||
|
calibrated_model.fit(self.X_train, self.y_train, sample_weight=w_train)
|
||||||
|
|
||||||
|
# Evaluate on Hold-out
|
||||||
|
holdout_preds_raw = calibrated_model.predict_proba(self.X_holdout)
|
||||||
|
holdout_preds_class = calibrated_model.predict(self.X_holdout)
|
||||||
|
|
||||||
|
acc = accuracy_score(self.y_holdout, holdout_preds_class)
|
||||||
|
loss = log_loss(self.y_holdout, holdout_preds_raw)
|
||||||
|
|
||||||
|
print(f"📊 Hold-out Results for {self.market_name}:")
|
||||||
|
print(f" Accuracy: {acc:.4f} | LogLoss: {loss:.4f}")
|
||||||
|
print(classification_report(self.y_holdout, holdout_preds_class))
|
||||||
|
|
||||||
|
# Save model
|
||||||
|
model_path = os.path.join(MODELS_DIR, f"xgb_{self.market_name.lower()}.pkl")
|
||||||
|
with open(model_path, "wb") as f:
|
||||||
|
pickle.dump(calibrated_model, f)
|
||||||
|
|
||||||
|
print(f"💾 Calibrated model saved to {model_path}")
|
||||||
|
return calibrated_model
|
||||||
|
|
||||||
|
def main():
|
||||||
|
df = load_data()
|
||||||
|
|
||||||
|
# 1. MS (1X2)
|
||||||
|
ms_trainer = MarketTrainer(df, "label_ms", "MS", is_multi=True, num_class=3)
|
||||||
|
ms_params = ms_trainer.optimize(n_trials=50)
|
||||||
|
ms_trainer.train_final(ms_params)
|
||||||
|
|
||||||
|
# 2. OU 2.5
|
||||||
|
ou25_trainer = MarketTrainer(df, "label_ou25", "OU25")
|
||||||
|
ou25_params = ou25_trainer.optimize(n_trials=30)
|
||||||
|
ou25_trainer.train_final(ou25_params)
|
||||||
|
|
||||||
|
# 3. BTTS
|
||||||
|
btts_trainer = MarketTrainer(df, "label_btts", "BTTS")
|
||||||
|
btts_params = btts_trainer.optimize(n_trials=30)
|
||||||
|
btts_trainer.train_final(btts_params)
|
||||||
|
|
||||||
|
# 4. HT/FT SURPRISE HUNTER
|
||||||
|
htft_weights = {
|
||||||
|
0: 1.0, 1: 3.0, 2: 20.0, # 1/1, 1/X, 1/2 (MAX WEIGHT)
|
||||||
|
3: 2.0, 4: 2.0, 5: 2.0,
|
||||||
|
6: 20.0, 7: 3.0, 8: 1.0 # 2/1 (MAX WEIGHT)
|
||||||
|
}
|
||||||
|
htft_trainer = MarketTrainer(df, "label_ht_ft", "HT_FT", is_multi=True, num_class=9, weights=htft_weights)
|
||||||
|
htft_params = htft_trainer.optimize(n_trials=50)
|
||||||
|
htft_trainer.train_final(htft_params)
|
||||||
|
|
||||||
|
print("\n✅ Advanced V20 Model Training Complete!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Executable
+3
@@ -0,0 +1,3 @@
|
|||||||
|
from .single_match_orchestrator import get_single_match_orchestrator
|
||||||
|
|
||||||
|
__all__ = ["get_single_match_orchestrator"]
|
||||||
@@ -0,0 +1,763 @@
|
|||||||
|
"""
|
||||||
|
Feature Enrichment Service
|
||||||
|
===========================
|
||||||
|
Computes real statistical features from DB for V25 model input.
|
||||||
|
|
||||||
|
Replaces hardcoded defaults in `_build_v25_features()` with rolling
|
||||||
|
averages from football_team_stats, matches, match_officials, and
|
||||||
|
match_player_events tables.
|
||||||
|
|
||||||
|
Each method receives a psycopg2 cursor + params and returns a dict.
|
||||||
|
All methods are fail-safe: they return sensible defaults when data
|
||||||
|
is missing or queries fail.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Optional, Tuple
|
||||||
|
|
||||||
|
from psycopg2.extras import RealDictCursor
|
||||||
|
|
||||||
|
|
||||||
|
class FeatureEnrichmentService:
|
||||||
|
"""Stateless service — all state comes from DB via cursor."""
|
||||||
|
|
||||||
|
# ─── Default fallback values ─────────────────────────────────────
|
||||||
|
_DEFAULT_TEAM_STATS = {
|
||||||
|
'avg_possession': 50.0,
|
||||||
|
'avg_shots_on_target': 4.0,
|
||||||
|
'shot_conversion': 0.1,
|
||||||
|
'avg_corners': 5.0,
|
||||||
|
}
|
||||||
|
_DEFAULT_H2H = {
|
||||||
|
'total_matches': 0,
|
||||||
|
'home_win_rate': 0.33,
|
||||||
|
'draw_rate': 0.33,
|
||||||
|
'avg_goals': 2.5,
|
||||||
|
'btts_rate': 0.5,
|
||||||
|
'over25_rate': 0.5,
|
||||||
|
# V27 expanded
|
||||||
|
'home_goals_avg': 1.3,
|
||||||
|
'away_goals_avg': 1.1,
|
||||||
|
'recent_trend': 0.0,
|
||||||
|
'venue_advantage': 0.0,
|
||||||
|
}
|
||||||
|
_DEFAULT_FORM = {
|
||||||
|
'clean_sheet_rate': 0.2,
|
||||||
|
'scoring_rate': 0.8,
|
||||||
|
'winning_streak': 0,
|
||||||
|
'unbeaten_streak': 0,
|
||||||
|
}
|
||||||
|
_DEFAULT_REFEREE = {
|
||||||
|
'home_bias': 0.0,
|
||||||
|
'avg_goals': 2.5,
|
||||||
|
'cards_total': 4.0,
|
||||||
|
'avg_yellow': 3.0,
|
||||||
|
'experience': 0,
|
||||||
|
}
|
||||||
|
_DEFAULT_LEAGUE = {
|
||||||
|
'avg_goals': 2.7,
|
||||||
|
'zero_goal_rate': 0.07,
|
||||||
|
# V27 expanded
|
||||||
|
'home_win_rate': 0.46,
|
||||||
|
'draw_rate': 0.26,
|
||||||
|
'btts_rate': 0.50,
|
||||||
|
'ou25_rate': 0.50,
|
||||||
|
'reliability_score': 0.0,
|
||||||
|
}
|
||||||
|
_DEFAULT_ROLLING = {
|
||||||
|
'rolling5_goals': 1.3,
|
||||||
|
'rolling5_conceded': 1.2,
|
||||||
|
'rolling10_goals': 1.3,
|
||||||
|
'rolling10_conceded': 1.2,
|
||||||
|
'rolling20_goals': 1.3,
|
||||||
|
'rolling20_conceded': 1.2,
|
||||||
|
'rolling5_cs': 0.2,
|
||||||
|
}
|
||||||
|
_DEFAULT_VENUE = {
|
||||||
|
'venue_goals': 1.4,
|
||||||
|
'venue_conceded': 1.1,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─── 1. Team Stats ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_team_stats(
|
||||||
|
self,
|
||||||
|
cur: RealDictCursor,
|
||||||
|
team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
limit: int = 10,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Rolling averages from football_team_stats for a team's last N matches.
|
||||||
|
|
||||||
|
Returns avg_possession, avg_shots_on_target, shot_conversion, avg_corners.
|
||||||
|
"""
|
||||||
|
if not team_id:
|
||||||
|
return dict(self._DEFAULT_TEAM_STATS)
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
mts.possession_percentage,
|
||||||
|
mts.shots_on_target,
|
||||||
|
mts.total_shots,
|
||||||
|
mts.corners
|
||||||
|
FROM football_team_stats mts
|
||||||
|
JOIN matches m ON m.id = mts.match_id
|
||||||
|
WHERE mts.team_id = %s
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
AND m.sport = 'football'
|
||||||
|
AND mts.possession_percentage IS NOT NULL
|
||||||
|
AND mts.possession_percentage > 0
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT %s
|
||||||
|
""",
|
||||||
|
(team_id, before_date_ms, limit),
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
except Exception:
|
||||||
|
return dict(self._DEFAULT_TEAM_STATS)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return dict(self._DEFAULT_TEAM_STATS)
|
||||||
|
|
||||||
|
possession_vals = []
|
||||||
|
sot_vals = []
|
||||||
|
conversion_vals = []
|
||||||
|
corner_vals = []
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
poss = row.get('possession_percentage')
|
||||||
|
if poss is not None:
|
||||||
|
possession_vals.append(float(poss))
|
||||||
|
|
||||||
|
sot = row.get('shots_on_target')
|
||||||
|
if sot is not None:
|
||||||
|
sot_vals.append(float(sot))
|
||||||
|
|
||||||
|
total_shots = row.get('total_shots')
|
||||||
|
if total_shots and sot and float(total_shots) > 0:
|
||||||
|
conversion_vals.append(float(sot) / float(total_shots))
|
||||||
|
|
||||||
|
corners = row.get('corners')
|
||||||
|
if corners is not None:
|
||||||
|
corner_vals.append(float(corners))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'avg_possession': _safe_avg(possession_vals, 50.0),
|
||||||
|
'avg_shots_on_target': _safe_avg(sot_vals, 4.0),
|
||||||
|
'shot_conversion': _safe_avg(conversion_vals, 0.1),
|
||||||
|
'avg_corners': _safe_avg(corner_vals, 5.0),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─── 2. Head-to-Head ────────────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_h2h(
|
||||||
|
self,
|
||||||
|
cur: RealDictCursor,
|
||||||
|
home_team_id: str,
|
||||||
|
away_team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
limit: int = 20,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Historical head-to-head between two teams (both directions).
|
||||||
|
|
||||||
|
Returns total_matches, home_win_rate, draw_rate, avg_goals,
|
||||||
|
btts_rate, over25_rate.
|
||||||
|
"""
|
||||||
|
if not home_team_id or not away_team_id:
|
||||||
|
return dict(self._DEFAULT_H2H)
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away
|
||||||
|
FROM matches m
|
||||||
|
WHERE m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
AND (
|
||||||
|
(m.home_team_id = %s AND m.away_team_id = %s) OR
|
||||||
|
(m.home_team_id = %s AND m.away_team_id = %s)
|
||||||
|
)
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT %s
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
before_date_ms,
|
||||||
|
home_team_id, away_team_id,
|
||||||
|
away_team_id, home_team_id,
|
||||||
|
limit,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
except Exception:
|
||||||
|
return dict(self._DEFAULT_H2H)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return dict(self._DEFAULT_H2H)
|
||||||
|
|
||||||
|
total = len(rows)
|
||||||
|
home_wins = 0
|
||||||
|
draws = 0
|
||||||
|
total_goals = 0
|
||||||
|
btts_count = 0
|
||||||
|
over25_count = 0
|
||||||
|
# V27 expanded trackers
|
||||||
|
home_team_goals_list = []
|
||||||
|
away_team_goals_list = []
|
||||||
|
home_team_venue_wins = 0
|
||||||
|
home_team_venue_total = 0
|
||||||
|
away_team_venue_wins = 0
|
||||||
|
away_team_venue_total = 0
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
sh = int(row['score_home'])
|
||||||
|
sa = int(row['score_away'])
|
||||||
|
match_goals = sh + sa
|
||||||
|
total_goals += match_goals
|
||||||
|
|
||||||
|
# Normalise: who is "home team" in THIS prediction context
|
||||||
|
if str(row['home_team_id']) == home_team_id:
|
||||||
|
home_team_goals_list.append(sh)
|
||||||
|
away_team_goals_list.append(sa)
|
||||||
|
home_team_venue_total += 1
|
||||||
|
if sh > sa:
|
||||||
|
home_wins += 1
|
||||||
|
home_team_venue_wins += 1
|
||||||
|
elif sh == sa:
|
||||||
|
draws += 1
|
||||||
|
else:
|
||||||
|
# Reversed fixture: away_team was at home
|
||||||
|
home_team_goals_list.append(sa)
|
||||||
|
away_team_goals_list.append(sh)
|
||||||
|
away_team_venue_total += 1
|
||||||
|
if sa > sh:
|
||||||
|
home_wins += 1
|
||||||
|
away_team_venue_wins += 1
|
||||||
|
elif sh == sa:
|
||||||
|
draws += 1
|
||||||
|
|
||||||
|
if sh > 0 and sa > 0:
|
||||||
|
btts_count += 1
|
||||||
|
if match_goals > 2:
|
||||||
|
over25_count += 1
|
||||||
|
|
||||||
|
# V27: recent_trend = last-5 home_win_rate - first-5 home_win_rate
|
||||||
|
recent_trend = 0.0
|
||||||
|
if total >= 6:
|
||||||
|
recent_5_wins = sum(
|
||||||
|
1 for r in rows[:5]
|
||||||
|
if (str(r['home_team_id']) == home_team_id and int(r['score_home']) > int(r['score_away']))
|
||||||
|
or (str(r['home_team_id']) != home_team_id and int(r['score_away']) > int(r['score_home']))
|
||||||
|
)
|
||||||
|
older_5_wins = sum(
|
||||||
|
1 for r in rows[-5:]
|
||||||
|
if (str(r['home_team_id']) == home_team_id and int(r['score_home']) > int(r['score_away']))
|
||||||
|
or (str(r['home_team_id']) != home_team_id and int(r['score_away']) > int(r['score_home']))
|
||||||
|
)
|
||||||
|
recent_trend = (recent_5_wins - older_5_wins) / 5.0
|
||||||
|
|
||||||
|
# V27: venue_advantage = home_win_rate_at_home - home_win_rate_away
|
||||||
|
venue_advantage = 0.0
|
||||||
|
if home_team_venue_total > 0 and away_team_venue_total > 0:
|
||||||
|
venue_advantage = (
|
||||||
|
home_team_venue_wins / home_team_venue_total
|
||||||
|
- away_team_venue_wins / away_team_venue_total
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'total_matches': total,
|
||||||
|
'home_win_rate': home_wins / total,
|
||||||
|
'draw_rate': draws / total,
|
||||||
|
'avg_goals': total_goals / total,
|
||||||
|
'btts_rate': btts_count / total,
|
||||||
|
'over25_rate': over25_count / total,
|
||||||
|
# V27 expanded
|
||||||
|
'home_goals_avg': _safe_avg(home_team_goals_list, 1.3),
|
||||||
|
'away_goals_avg': _safe_avg(away_team_goals_list, 1.1),
|
||||||
|
'recent_trend': round(recent_trend, 4),
|
||||||
|
'venue_advantage': round(venue_advantage, 4),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─── 3. Form & Streaks ──────────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_form_streaks(
|
||||||
|
self,
|
||||||
|
cur: RealDictCursor,
|
||||||
|
team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
limit: int = 10,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Clean sheet rate, scoring rate, and current streaks.
|
||||||
|
"""
|
||||||
|
if not team_id:
|
||||||
|
return dict(self._DEFAULT_FORM)
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
m.home_team_id,
|
||||||
|
m.away_team_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away
|
||||||
|
FROM matches m
|
||||||
|
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT %s
|
||||||
|
""",
|
||||||
|
(team_id, team_id, before_date_ms, limit),
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
except Exception:
|
||||||
|
return dict(self._DEFAULT_FORM)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return dict(self._DEFAULT_FORM)
|
||||||
|
|
||||||
|
total = len(rows)
|
||||||
|
clean_sheets = 0
|
||||||
|
scored_count = 0
|
||||||
|
winning_streak = 0
|
||||||
|
unbeaten_streak = 0
|
||||||
|
streak_broken_w = False
|
||||||
|
streak_broken_u = False
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
is_home = str(row['home_team_id']) == team_id
|
||||||
|
goals_for = int(row['score_home'] if is_home else row['score_away'])
|
||||||
|
goals_against = int(row['score_away'] if is_home else row['score_home'])
|
||||||
|
|
||||||
|
if goals_against == 0:
|
||||||
|
clean_sheets += 1
|
||||||
|
if goals_for > 0:
|
||||||
|
scored_count += 1
|
||||||
|
|
||||||
|
# Streak counting (most recent first)
|
||||||
|
won = goals_for > goals_against
|
||||||
|
not_lost = goals_for >= goals_against
|
||||||
|
|
||||||
|
if not streak_broken_w:
|
||||||
|
if won:
|
||||||
|
winning_streak += 1
|
||||||
|
else:
|
||||||
|
streak_broken_w = True
|
||||||
|
|
||||||
|
if not streak_broken_u:
|
||||||
|
if not_lost:
|
||||||
|
unbeaten_streak += 1
|
||||||
|
else:
|
||||||
|
streak_broken_u = True
|
||||||
|
|
||||||
|
return {
|
||||||
|
'clean_sheet_rate': clean_sheets / total,
|
||||||
|
'scoring_rate': scored_count / total,
|
||||||
|
'winning_streak': winning_streak,
|
||||||
|
'unbeaten_streak': unbeaten_streak,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─── 4. Referee Stats ───────────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_referee_stats(
|
||||||
|
self,
|
||||||
|
cur: RealDictCursor,
|
||||||
|
referee_name: Optional[str],
|
||||||
|
before_date_ms: int,
|
||||||
|
limit: int = 30,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Referee tendencies: home win bias, avg goals, card rates.
|
||||||
|
Matches referee by name in match_officials (role_id=1 = Orta Hakem).
|
||||||
|
"""
|
||||||
|
if not referee_name:
|
||||||
|
return dict(self._DEFAULT_REFEREE)
|
||||||
|
try:
|
||||||
|
# Get match IDs officiated by this referee
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
m.home_team_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away,
|
||||||
|
m.id AS match_id
|
||||||
|
FROM match_officials mo
|
||||||
|
JOIN matches m ON m.id = mo.match_id
|
||||||
|
WHERE mo.name = %s
|
||||||
|
AND mo.role_id = 1
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT %s
|
||||||
|
""",
|
||||||
|
(referee_name, before_date_ms, limit),
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
except Exception:
|
||||||
|
return dict(self._DEFAULT_REFEREE)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return dict(self._DEFAULT_REFEREE)
|
||||||
|
|
||||||
|
total = len(rows)
|
||||||
|
home_wins = 0
|
||||||
|
total_goals = 0
|
||||||
|
match_ids = []
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
sh = int(row['score_home'])
|
||||||
|
sa = int(row['score_away'])
|
||||||
|
total_goals += sh + sa
|
||||||
|
if sh > sa:
|
||||||
|
home_wins += 1
|
||||||
|
match_ids.append(row['match_id'])
|
||||||
|
|
||||||
|
# Card stats from match_player_events
|
||||||
|
total_yellows = 0.0
|
||||||
|
total_cards = 0.0
|
||||||
|
if match_ids:
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE event_subtype = 'yc') AS yellows,
|
||||||
|
COUNT(*) AS total_cards
|
||||||
|
FROM match_player_events
|
||||||
|
WHERE match_id = ANY(%s)
|
||||||
|
AND event_type = 'card'
|
||||||
|
""",
|
||||||
|
(match_ids,),
|
||||||
|
)
|
||||||
|
card_row = cur.fetchone()
|
||||||
|
if card_row:
|
||||||
|
total_yellows = float(card_row.get('yellows') or 0)
|
||||||
|
total_cards = float(card_row.get('total_cards') or 0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# home_bias: (actual home win rate) - 0.46 (league average ~46%)
|
||||||
|
home_bias = (home_wins / total) - 0.46
|
||||||
|
|
||||||
|
return {
|
||||||
|
'home_bias': round(home_bias, 4),
|
||||||
|
'avg_goals': total_goals / total,
|
||||||
|
'cards_total': total_cards / total if total > 0 else 4.0,
|
||||||
|
'avg_yellow': total_yellows / total if total > 0 else 3.0,
|
||||||
|
'experience': total,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─── 5. League Averages ─────────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_league_averages(
|
||||||
|
self,
|
||||||
|
cur: RealDictCursor,
|
||||||
|
league_id: Optional[str],
|
||||||
|
before_date_ms: int,
|
||||||
|
limit: int = 100,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
League-wide scoring tendencies.
|
||||||
|
"""
|
||||||
|
if not league_id:
|
||||||
|
return dict(self._DEFAULT_LEAGUE)
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
m.score_home,
|
||||||
|
m.score_away
|
||||||
|
FROM matches m
|
||||||
|
WHERE m.league_id = %s
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT %s
|
||||||
|
""",
|
||||||
|
(league_id, before_date_ms, limit),
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
except Exception:
|
||||||
|
return dict(self._DEFAULT_LEAGUE)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return dict(self._DEFAULT_LEAGUE)
|
||||||
|
|
||||||
|
total = len(rows)
|
||||||
|
total_goals = 0
|
||||||
|
zero_goal_matches = 0
|
||||||
|
home_wins = 0
|
||||||
|
draw_count = 0
|
||||||
|
btts_count = 0
|
||||||
|
over25_count = 0
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
sh = int(row['score_home'])
|
||||||
|
sa = int(row['score_away'])
|
||||||
|
match_goals = sh + sa
|
||||||
|
total_goals += match_goals
|
||||||
|
if match_goals == 0:
|
||||||
|
zero_goal_matches += 1
|
||||||
|
if sh > sa:
|
||||||
|
home_wins += 1
|
||||||
|
elif sh == sa:
|
||||||
|
draw_count += 1
|
||||||
|
if sh > 0 and sa > 0:
|
||||||
|
btts_count += 1
|
||||||
|
if match_goals > 2:
|
||||||
|
over25_count += 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
'avg_goals': total_goals / total,
|
||||||
|
'zero_goal_rate': zero_goal_matches / total,
|
||||||
|
# V27 expanded
|
||||||
|
'home_win_rate': home_wins / total,
|
||||||
|
'draw_rate': draw_count / total,
|
||||||
|
'btts_rate': btts_count / total,
|
||||||
|
'ou25_rate': over25_count / total,
|
||||||
|
'reliability_score': min(total / 50.0, 1.0),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─── 6. Momentum ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_momentum(
|
||||||
|
self,
|
||||||
|
cur: RealDictCursor,
|
||||||
|
team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
limit: int = 5,
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Recency-weighted momentum score: W=3, D=1, L=-1.
|
||||||
|
Returns normalised score in [-1.0, 1.0].
|
||||||
|
"""
|
||||||
|
if not team_id:
|
||||||
|
return 0.0
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
m.home_team_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away
|
||||||
|
FROM matches m
|
||||||
|
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT %s
|
||||||
|
""",
|
||||||
|
(team_id, team_id, before_date_ms, limit),
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
except Exception:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
total_count = len(rows)
|
||||||
|
weighted_score = 0.0
|
||||||
|
max_possible = 0.0
|
||||||
|
|
||||||
|
for idx, row in enumerate(rows):
|
||||||
|
weight = float(total_count - idx) # most recent = highest weight
|
||||||
|
is_home = str(row['home_team_id']) == team_id
|
||||||
|
gf = int(row['score_home'] if is_home else row['score_away'])
|
||||||
|
ga = int(row['score_away'] if is_home else row['score_home'])
|
||||||
|
|
||||||
|
if gf > ga:
|
||||||
|
result_score = 3.0
|
||||||
|
elif gf == ga:
|
||||||
|
result_score = 1.0
|
||||||
|
else:
|
||||||
|
result_score = -1.0
|
||||||
|
|
||||||
|
weighted_score += result_score * weight
|
||||||
|
max_possible += 3.0 * weight # max = all wins
|
||||||
|
|
||||||
|
if max_possible <= 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# Normalise to [-1.0, 1.0]
|
||||||
|
return round(weighted_score / max_possible, 4)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── 7. Rolling Stats (V27) ─────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_rolling_stats(
|
||||||
|
self,
|
||||||
|
cur: RealDictCursor,
|
||||||
|
team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Rolling goal averages and clean-sheet rates over the last 5/10/20 matches.
|
||||||
|
Single DB query, three windows computed programmatically.
|
||||||
|
"""
|
||||||
|
if not team_id:
|
||||||
|
return dict(self._DEFAULT_ROLLING)
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
m.home_team_id,
|
||||||
|
m.score_home,
|
||||||
|
m.score_away
|
||||||
|
FROM matches m
|
||||||
|
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 20
|
||||||
|
""",
|
||||||
|
(team_id, team_id, before_date_ms),
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
except Exception:
|
||||||
|
return dict(self._DEFAULT_ROLLING)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return dict(self._DEFAULT_ROLLING)
|
||||||
|
|
||||||
|
goals = []
|
||||||
|
conceded = []
|
||||||
|
clean_sheets = []
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
is_home = str(row['home_team_id']) == team_id
|
||||||
|
gf = int(row['score_home'] if is_home else row['score_away'])
|
||||||
|
ga = int(row['score_away'] if is_home else row['score_home'])
|
||||||
|
goals.append(gf)
|
||||||
|
conceded.append(ga)
|
||||||
|
clean_sheets.append(1 if ga == 0 else 0)
|
||||||
|
|
||||||
|
n = len(goals)
|
||||||
|
return {
|
||||||
|
'rolling5_goals': _safe_avg(goals[:5], 1.3),
|
||||||
|
'rolling5_conceded': _safe_avg(conceded[:5], 1.2),
|
||||||
|
'rolling10_goals': _safe_avg(goals[:min(10, n)], 1.3),
|
||||||
|
'rolling10_conceded': _safe_avg(conceded[:min(10, n)], 1.2),
|
||||||
|
'rolling20_goals': _safe_avg(goals[:n], 1.3),
|
||||||
|
'rolling20_conceded': _safe_avg(conceded[:n], 1.2),
|
||||||
|
'rolling5_cs': _safe_avg(clean_sheets[:5], 0.2),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─── 8. Venue Stats (V27) ──────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_venue_stats(
|
||||||
|
self,
|
||||||
|
cur: RealDictCursor,
|
||||||
|
team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
is_home: bool = True,
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Team goals scored/conceded at specific venue (home or away only).
|
||||||
|
"""
|
||||||
|
if not team_id:
|
||||||
|
return dict(self._DEFAULT_VENUE)
|
||||||
|
venue_col = 'home_team_id' if is_home else 'away_team_id'
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
f"""
|
||||||
|
SELECT m.score_home, m.score_away
|
||||||
|
FROM matches m
|
||||||
|
WHERE m.{venue_col} = %s
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.score_home IS NOT NULL
|
||||||
|
AND m.score_away IS NOT NULL
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 20
|
||||||
|
""",
|
||||||
|
(team_id, before_date_ms),
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
except Exception:
|
||||||
|
return dict(self._DEFAULT_VENUE)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return dict(self._DEFAULT_VENUE)
|
||||||
|
|
||||||
|
goals = []
|
||||||
|
conceded_list = []
|
||||||
|
for row in rows:
|
||||||
|
sh = int(row['score_home'])
|
||||||
|
sa = int(row['score_away'])
|
||||||
|
if is_home:
|
||||||
|
goals.append(sh)
|
||||||
|
conceded_list.append(sa)
|
||||||
|
else:
|
||||||
|
goals.append(sa)
|
||||||
|
conceded_list.append(sh)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'venue_goals': _safe_avg(goals, 1.4),
|
||||||
|
'venue_conceded': _safe_avg(conceded_list, 1.1),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─── 9. Days Rest (V27) ────────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_days_rest(
|
||||||
|
self,
|
||||||
|
cur: RealDictCursor,
|
||||||
|
team_id: str,
|
||||||
|
before_date_ms: int,
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Returns number of days since the team's last match.
|
||||||
|
Default: 7.0 (one-week rest).
|
||||||
|
"""
|
||||||
|
if not team_id:
|
||||||
|
return 7.0
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT m.mst_utc
|
||||||
|
FROM matches m
|
||||||
|
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
|
||||||
|
AND m.status = 'FT'
|
||||||
|
AND m.mst_utc < %s
|
||||||
|
ORDER BY m.mst_utc DESC
|
||||||
|
LIMIT 1
|
||||||
|
""",
|
||||||
|
(team_id, team_id, before_date_ms),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
except Exception:
|
||||||
|
return 7.0
|
||||||
|
|
||||||
|
if not row or not row.get('mst_utc'):
|
||||||
|
return 7.0
|
||||||
|
|
||||||
|
last_match_ms = int(row['mst_utc'])
|
||||||
|
diff_days = (before_date_ms - last_match_ms) / (1000 * 86400)
|
||||||
|
return round(max(0.0, min(diff_days, 30.0)), 1)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Utility ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _safe_avg(values: list, default: float) -> float:
|
||||||
|
"""Average with fallback for empty lists."""
|
||||||
|
if not values:
|
||||||
|
return default
|
||||||
|
return sum(values) / len(values)
|
||||||
+4655
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user