279 lines
15 KiB
PL/PgSQL
279 lines
15 KiB
PL/PgSQL
-- Migration: Sport-specific table partitioning
|
|
-- Purpose: Separate football and basketball data for better organization, query performance, and sport-specific schemas
|
|
-- Date: 2026-04-03
|
|
-- Impact: Zero data loss, backward compatible during migration, old tables dropped after verification
|
|
|
|
BEGIN;
|
|
|
|
-- ============================================
|
|
-- 1. match_team_stats → football_team_stats + basketball_team_stats
|
|
-- ============================================
|
|
-- Rationale: Football uses possession/shots/corners, Basketball uses points/quarters/fg/3pt
|
|
|
|
-- 1a. Create football_team_stats
|
|
CREATE TABLE IF NOT EXISTS football_team_stats (
|
|
id SERIAL PRIMARY KEY,
|
|
match_id TEXT NOT NULL,
|
|
team_id TEXT NOT NULL,
|
|
possession_percentage NUMERIC(5,2),
|
|
shots_on_target INT,
|
|
shots_off_target INT,
|
|
total_shots INT,
|
|
total_passes INT,
|
|
corners INT,
|
|
fouls INT,
|
|
offsides INT,
|
|
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
CONSTRAINT fk_football_team_stats_match FOREIGN KEY (match_id) REFERENCES matches(id) ON DELETE CASCADE,
|
|
CONSTRAINT fk_football_team_stats_team FOREIGN KEY (team_id) REFERENCES teams(id) ON DELETE CASCADE,
|
|
CONSTRAINT uq_football_team_stats_match_team UNIQUE (match_id, team_id)
|
|
);
|
|
|
|
-- 1b. Create basketball_team_stats
|
|
CREATE TABLE IF NOT EXISTS basketball_team_stats (
|
|
id SERIAL PRIMARY KEY,
|
|
match_id TEXT NOT NULL,
|
|
team_id TEXT NOT NULL,
|
|
points INT,
|
|
rebounds INT,
|
|
assists INT,
|
|
fg_made INT,
|
|
fg_attempted INT,
|
|
three_pt_made INT,
|
|
three_pt_attempted INT,
|
|
ft_made INT,
|
|
ft_attempted INT,
|
|
steals INT,
|
|
blocks INT,
|
|
turnovers INT,
|
|
q1_score INT,
|
|
q2_score INT,
|
|
q3_score INT,
|
|
q4_score INT,
|
|
ot_score INT,
|
|
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
CONSTRAINT fk_basketball_team_stats_match FOREIGN KEY (match_id) REFERENCES matches(id) ON DELETE CASCADE,
|
|
CONSTRAINT fk_basketball_team_stats_team FOREIGN KEY (team_id) REFERENCES teams(id) ON DELETE CASCADE,
|
|
CONSTRAINT uq_basketball_team_stats_match_team UNIQUE (match_id, team_id)
|
|
);
|
|
|
|
-- 1c. Copy data from match_team_stats to sport-specific tables
|
|
INSERT INTO football_team_stats (match_id, team_id, possession_percentage, shots_on_target, shots_off_target, total_shots, total_passes, corners, fouls, offsides, created_at)
|
|
SELECT match_id, team_id, possession_percentage, shots_on_target, shots_off_target, total_shots, total_passes, corners, fouls, offsides, created_at
|
|
FROM match_team_stats
|
|
WHERE EXISTS (SELECT 1 FROM matches m WHERE m.id = match_team_stats.match_id AND m.sport = 'football');
|
|
|
|
INSERT INTO basketball_team_stats (match_id, team_id, points, rebounds, assists, fg_made, fg_attempted, three_pt_made, three_pt_attempted, ft_made, ft_attempted, steals, blocks, turnovers, q1_score, q2_score, q3_score, q4_score, ot_score, created_at)
|
|
SELECT match_id, team_id, points, rebounds, assists, fg_made, fg_attempted, three_pt_made, three_pt_attempted, ft_made, ft_attempted, steals, blocks, turnovers, q1_score, q2_score, q3_score, q4_score, ot_score, created_at
|
|
FROM match_team_stats
|
|
WHERE EXISTS (SELECT 1 FROM matches m WHERE m.id = match_team_stats.match_id AND m.sport = 'basketball');
|
|
|
|
-- 1d. Reset sequences
|
|
SELECT setval('football_team_stats_id_seq', COALESCE((SELECT MAX(id) FROM football_team_stats), 0) + 1, false);
|
|
SELECT setval('basketball_team_stats_id_seq', COALESCE((SELECT MAX(id) FROM basketball_team_stats), 0) + 1, false);
|
|
|
|
-- 1e. Create indexes
|
|
CREATE INDEX idx_football_team_stats_match ON football_team_stats(match_id);
|
|
CREATE INDEX idx_football_team_stats_team ON football_team_stats(team_id);
|
|
CREATE INDEX idx_basketball_team_stats_match ON basketball_team_stats(match_id);
|
|
CREATE INDEX idx_basketball_team_stats_team ON basketball_team_stats(team_id);
|
|
|
|
-- ============================================
|
|
-- 2. match_player_stats → basketball_player_stats
|
|
-- ============================================
|
|
-- Rationale: match_player_stats is already 99% basketball data
|
|
|
|
ALTER TABLE match_player_stats RENAME TO basketball_player_stats;
|
|
ALTER INDEX match_player_stats_pkey RENAME TO basketball_player_stats_pkey;
|
|
ALTER INDEX match_player_stats_match_id_player_id_team_id_key RENAME TO basketball_player_stats_match_id_player_id_team_id_key;
|
|
ALTER INDEX match_player_stats_match_id_idx RENAME TO basketball_player_stats_match_id_idx;
|
|
|
|
-- Update FK constraint names
|
|
ALTER TABLE basketball_player_stats RENAME CONSTRAINT "match_player_stats_match_id_fkey" TO "basketball_player_stats_match_id_fkey";
|
|
ALTER TABLE basketball_player_stats RENAME CONSTRAINT "match_player_stats_player_id_fkey" TO "basketball_player_stats_player_id_fkey";
|
|
ALTER TABLE basketball_player_stats RENAME CONSTRAINT "match_player_stats_team_id_fkey" TO "basketball_player_stats_team_id_fkey";
|
|
|
|
-- ============================================
|
|
-- 3. match_ai_features → football_ai_features + basketball_ai_features
|
|
-- ============================================
|
|
-- Rationale: Different feature calculation pipelines per sport
|
|
|
|
-- 3a. Create football_ai_features (same structure as current)
|
|
CREATE TABLE IF NOT EXISTS football_ai_features (
|
|
match_id TEXT PRIMARY KEY,
|
|
home_elo FLOAT DEFAULT 1500.0,
|
|
away_elo FLOAT DEFAULT 1500.0,
|
|
home_home_elo FLOAT DEFAULT 1500.0,
|
|
away_away_elo FLOAT DEFAULT 1500.0,
|
|
home_form_elo FLOAT DEFAULT 1500.0,
|
|
away_form_elo FLOAT DEFAULT 1500.0,
|
|
elo_diff FLOAT DEFAULT 0.0,
|
|
home_form_score FLOAT DEFAULT 50.0,
|
|
away_form_score FLOAT DEFAULT 50.0,
|
|
home_goals_avg_5 FLOAT DEFAULT 0.0,
|
|
away_goals_avg_5 FLOAT DEFAULT 0.0,
|
|
home_conceded_avg_5 FLOAT DEFAULT 0.0,
|
|
away_conceded_avg_5 FLOAT DEFAULT 0.0,
|
|
home_clean_sheet_rate FLOAT DEFAULT 0.0,
|
|
away_clean_sheet_rate FLOAT DEFAULT 0.0,
|
|
home_scoring_rate FLOAT DEFAULT 0.0,
|
|
away_scoring_rate FLOAT DEFAULT 0.0,
|
|
home_win_streak INT DEFAULT 0,
|
|
away_win_streak INT DEFAULT 0,
|
|
implied_home FLOAT DEFAULT 0.33,
|
|
implied_draw FLOAT DEFAULT 0.33,
|
|
implied_away FLOAT DEFAULT 0.33,
|
|
implied_over25 FLOAT DEFAULT 0.5,
|
|
implied_btts_yes FLOAT DEFAULT 0.5,
|
|
odds_overround FLOAT DEFAULT 0.0,
|
|
home_avg_possession FLOAT DEFAULT 50.0,
|
|
away_avg_possession FLOAT DEFAULT 50.0,
|
|
home_avg_shots_on_target FLOAT DEFAULT 0.0,
|
|
away_avg_shots_on_target FLOAT DEFAULT 0.0,
|
|
home_shot_conversion FLOAT DEFAULT 0.0,
|
|
away_shot_conversion FLOAT DEFAULT 0.0,
|
|
home_avg_corners FLOAT DEFAULT 0.0,
|
|
away_avg_corners FLOAT DEFAULT 0.0,
|
|
h2h_total INT DEFAULT 0,
|
|
h2h_home_win_rate FLOAT DEFAULT 0.0,
|
|
h2h_avg_goals FLOAT DEFAULT 0.0,
|
|
h2h_over25_rate FLOAT DEFAULT 0.0,
|
|
h2h_btts_rate FLOAT DEFAULT 0.0,
|
|
referee_avg_cards FLOAT DEFAULT 0.0,
|
|
referee_home_bias FLOAT DEFAULT 0.0,
|
|
referee_avg_goals FLOAT DEFAULT 0.0,
|
|
league_avg_goals FLOAT DEFAULT 0.0,
|
|
league_home_win_pct FLOAT DEFAULT 0.0,
|
|
league_over25_pct FLOAT DEFAULT 0.0,
|
|
missing_players_impact FLOAT DEFAULT 0.0,
|
|
calculator_ver TEXT DEFAULT 'v2.0',
|
|
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
CONSTRAINT fk_football_ai_features_match FOREIGN KEY (match_id) REFERENCES matches(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- 3b. Create basketball_ai_features (adapted for basketball)
|
|
CREATE TABLE IF NOT EXISTS basketball_ai_features (
|
|
match_id TEXT PRIMARY KEY,
|
|
home_elo FLOAT DEFAULT 1500.0,
|
|
away_elo FLOAT DEFAULT 1500.0,
|
|
home_home_elo FLOAT DEFAULT 1500.0,
|
|
away_away_elo FLOAT DEFAULT 1500.0,
|
|
home_form_elo FLOAT DEFAULT 1500.0,
|
|
away_form_elo FLOAT DEFAULT 1500.0,
|
|
elo_diff FLOAT DEFAULT 0.0,
|
|
home_form_score FLOAT DEFAULT 50.0,
|
|
away_form_score FLOAT DEFAULT 50.0,
|
|
home_pts_avg_5 FLOAT DEFAULT 0.0,
|
|
away_pts_avg_5 FLOAT DEFAULT 0.0,
|
|
home_conceded_avg_5 FLOAT DEFAULT 0.0,
|
|
away_conceded_avg_5 FLOAT DEFAULT 0.0,
|
|
home_win_streak INT DEFAULT 0,
|
|
away_win_streak INT DEFAULT 0,
|
|
implied_home FLOAT DEFAULT 0.5,
|
|
implied_away FLOAT DEFAULT 0.5,
|
|
implied_over_total FLOAT DEFAULT 0.5,
|
|
implied_spread_home FLOAT DEFAULT 0.5,
|
|
odds_overround FLOAT DEFAULT 0.0,
|
|
home_avg_pts FLOAT DEFAULT 0.0,
|
|
away_avg_pts FLOAT DEFAULT 0.0,
|
|
home_avg_rebounds FLOAT DEFAULT 0.0,
|
|
away_avg_rebounds FLOAT DEFAULT 0.0,
|
|
home_fg_pct FLOAT DEFAULT 0.0,
|
|
away_fg_pct FLOAT DEFAULT 0.0,
|
|
home_avg_three_pt_made FLOAT DEFAULT 0.0,
|
|
away_avg_three_pt_made FLOAT DEFAULT 0.0,
|
|
home_avg_turnovers FLOAT DEFAULT 0.0,
|
|
away_avg_turnovers FLOAT DEFAULT 0.0,
|
|
h2h_total INT DEFAULT 0,
|
|
h2h_home_win_rate FLOAT DEFAULT 0.0,
|
|
h2h_avg_pts FLOAT DEFAULT 0.0,
|
|
h2h_avg_margin FLOAT DEFAULT 0.0,
|
|
missing_players_impact FLOAT DEFAULT 0.0,
|
|
calculator_ver TEXT DEFAULT 'v2.0',
|
|
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
CONSTRAINT fk_basketball_ai_features_match FOREIGN KEY (match_id) REFERENCES matches(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- 3c. Copy data
|
|
INSERT INTO football_ai_features (match_id, home_elo, away_elo, home_home_elo, away_away_elo, home_form_elo, away_form_elo, elo_diff, home_form_score, away_form_score, home_goals_avg_5, away_goals_avg_5, home_conceded_avg_5, away_conceded_avg_5, home_clean_sheet_rate, away_clean_sheet_rate, home_scoring_rate, away_scoring_rate, home_win_streak, away_win_streak, implied_home, implied_draw, implied_away, implied_over25, implied_btts_yes, odds_overround, home_avg_possession, away_avg_possession, home_avg_shots_on_target, away_avg_shots_on_target, home_shot_conversion, away_shot_conversion, home_avg_corners, away_avg_corners, h2h_total, h2h_home_win_rate, h2h_avg_goals, h2h_over25_rate, h2h_btts_rate, referee_avg_cards, referee_home_bias, referee_avg_goals, league_avg_goals, league_home_win_pct, league_over25_pct, missing_players_impact, calculator_ver, updated_at)
|
|
SELECT match_id, home_elo, away_elo, home_home_elo, away_away_elo, home_form_elo, away_form_elo, elo_diff, home_form_score, away_form_score, home_goals_avg_5, away_goals_avg_5, home_conceded_avg_5, away_conceded_avg_5, home_clean_sheet_rate, away_clean_sheet_rate, home_scoring_rate, away_scoring_rate, home_win_streak, away_win_streak, implied_home, implied_draw, implied_away, implied_over25, implied_btts_yes, odds_overround, home_avg_possession, away_avg_possession, home_avg_shots_on_target, away_avg_shots_on_target, home_shot_conversion, away_shot_conversion, home_avg_corners, away_avg_corners, h2h_total, h2h_home_win_rate, h2h_avg_goals, h2h_over25_rate, h2h_btts_rate, referee_avg_cards, referee_home_bias, referee_avg_goals, league_avg_goals, league_home_win_pct, league_over25_pct, missing_players_impact, calculator_ver, updated_at
|
|
FROM match_ai_features
|
|
WHERE EXISTS (SELECT 1 FROM matches m WHERE m.id = match_ai_features.match_id AND m.sport = 'football');
|
|
|
|
INSERT INTO basketball_ai_features (match_id, home_elo, away_elo, home_home_elo, away_away_elo, home_form_elo, away_form_elo, elo_diff, home_form_score, away_form_score, home_pts_avg_5, away_pts_avg_5, home_conceded_avg_5, away_conceded_avg_5, home_win_streak, away_win_streak, implied_home, implied_away, implied_over_total, implied_spread_home, odds_overround, home_avg_pts, away_avg_pts, home_avg_rebounds, away_avg_rebounds, home_fg_pct, away_fg_pct, home_avg_three_pt_made, away_avg_three_pt_made, home_avg_turnovers, away_avg_turnovers, h2h_total, h2h_home_win_rate, h2h_avg_pts, h2h_avg_margin, missing_players_impact, calculator_ver, updated_at)
|
|
SELECT match_id, home_elo, away_elo, home_home_elo, away_away_elo, home_form_elo, away_form_elo, elo_diff, home_form_score, away_form_score, home_goals_avg_5, away_goals_avg_5, home_conceded_avg_5, away_conceded_avg_5, home_win_streak, away_win_streak, implied_home, implied_away, implied_over25 as implied_over_total, implied_btts_yes as implied_spread_home, odds_overround, home_avg_possession as home_avg_pts, away_avg_possession as away_avg_pts, 0 as home_avg_rebounds, 0 as away_avg_rebounds, 0 as home_fg_pct, 0 as away_fg_pct, 0 as home_avg_three_pt_made, 0 as away_avg_three_pt_made, 0 as home_avg_turnovers, 0 as away_avg_turnovers, h2h_total, h2h_home_win_rate, h2h_avg_goals * 6 as h2h_avg_pts, 0 as h2h_avg_margin, missing_players_impact, calculator_ver, updated_at
|
|
FROM match_ai_features
|
|
WHERE EXISTS (SELECT 1 FROM matches m WHERE m.id = match_ai_features.match_id AND m.sport = 'basketball');
|
|
|
|
-- ============================================
|
|
-- 4. Add sport column to odd_categories for partitioning
|
|
-- ============================================
|
|
-- Rationale: 509MB table, needs sport-based filtering
|
|
|
|
ALTER TABLE odd_categories ADD COLUMN IF NOT EXISTS sport TEXT;
|
|
ALTER TABLE odd_selections ADD COLUMN IF NOT EXISTS sport TEXT;
|
|
|
|
-- Backfill sport from matches
|
|
UPDATE odd_categories SET sport = m.sport
|
|
FROM matches m
|
|
WHERE odd_categories.match_id = m.id AND odd_categories.sport IS NULL;
|
|
|
|
UPDATE odd_selections SET sport = oc.sport
|
|
FROM odd_categories oc
|
|
WHERE odd_selections.odd_category_db_id = oc.db_id AND odd_selections.sport IS NULL;
|
|
|
|
-- Create indexes for sport filtering
|
|
CREATE INDEX IF NOT EXISTS idx_odd_categories_sport ON odd_categories(sport) WHERE sport IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_odd_selections_sport ON odd_selections(sport) WHERE sport IS NOT NULL;
|
|
|
|
-- ============================================
|
|
-- 5. Add indexes for match_player_participation (via JOIN optimization)
|
|
-- ============================================
|
|
-- Rationale: 833MB table, needs efficient match_id lookups
|
|
-- Note: Can't use subquery in index predicate, so just optimize match_id lookup
|
|
CREATE INDEX IF NOT EXISTS idx_match_player_participation_match ON match_player_participation(match_id);
|
|
CREATE INDEX IF NOT EXISTS idx_match_player_participation_team ON match_player_participation(team_id);
|
|
|
|
-- ============================================
|
|
-- 6. Verification queries (run these before dropping old tables)
|
|
-- ============================================
|
|
-- DO $$
|
|
-- DECLARE
|
|
-- v_old_stats INT;
|
|
-- v_football_stats INT;
|
|
-- v_basketball_stats INT;
|
|
-- v_old_ai INT;
|
|
-- v_football_ai INT;
|
|
-- v_basketball_ai INT;
|
|
-- BEGIN
|
|
-- SELECT COUNT(*) INTO v_old_stats FROM match_team_stats;
|
|
-- SELECT COUNT(*) INTO v_football_stats FROM football_team_stats;
|
|
-- SELECT COUNT(*) INTO v_basketball_stats FROM basketball_team_stats;
|
|
--
|
|
-- SELECT COUNT(*) INTO v_old_ai FROM match_ai_features;
|
|
-- SELECT COUNT(*) INTO v_football_ai FROM football_ai_features;
|
|
-- SELECT COUNT(*) INTO v_basketball_ai FROM basketball_ai_features;
|
|
--
|
|
-- RAISE NOTICE '=== VERIFICATION ===';
|
|
-- RAISE NOTICE 'match_team_stats: % = football: % + basketball: %', v_old_stats, v_football_stats, v_basketball_stats;
|
|
-- RAISE NOTICE 'match_ai_features: % = football: % + basketball: %', v_old_ai, v_football_ai, v_basketball_ai;
|
|
--
|
|
-- IF v_old_stats != v_football_stats + v_basketball_stats THEN
|
|
-- RAISE EXCEPTION 'Data mismatch in team stats!';
|
|
-- END IF;
|
|
--
|
|
-- IF v_old_ai != v_football_ai + v_basketball_ai THEN
|
|
-- RAISE EXCEPTION 'Data mismatch in AI features!';
|
|
-- END IF;
|
|
--
|
|
-- RAISE NOTICE '✅ VERIFICATION PASSED - Safe to drop old tables';
|
|
-- END $$;
|
|
|
|
-- ============================================
|
|
-- 7. Drop old tables (UNCOMMENT AFTER VERIFICATION)
|
|
-- ============================================
|
|
-- DROP TABLE IF EXISTS match_team_stats CASCADE;
|
|
-- DROP TABLE IF EXISTS match_ai_features CASCADE;
|
|
|
|
COMMIT;
|