This commit is contained in:
Executable
+94
@@ -0,0 +1,94 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import psycopg2
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
# Database Connection
|
||||
DSN = os.getenv('DATABASE_URL', 'postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db')
|
||||
if '?' in DSN: DSN = DSN.split('?')[0]
|
||||
|
||||
def diagnose():
|
||||
try:
|
||||
conn = psycopg2.connect(DSN)
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("🔍 DIAGNOSTIC REPORT: AI Data Coverage")
|
||||
print("=======================================")
|
||||
|
||||
# 1. Total Football Matches (Finished)
|
||||
cursor.execute("SELECT COUNT(*) FROM matches WHERE sport='football' AND score_home IS NOT NULL")
|
||||
total_matches = cursor.fetchone()[0]
|
||||
print(f"Total Finished Football Matches: {total_matches:,}")
|
||||
|
||||
if total_matches == 0:
|
||||
print("❌ No matches found!")
|
||||
return
|
||||
|
||||
# 2. Stats Coverage (match_team_stats)
|
||||
cursor.execute("SELECT COUNT(DISTINCT match_id) FROM match_team_stats")
|
||||
stats_count = cursor.fetchone()[0]
|
||||
print(f"Matches with Team Stats: {stats_count:,} ({stats_count/total_matches*100:.1f}%)")
|
||||
|
||||
# 3. Squad Coverage (match_player_participation)
|
||||
cursor.execute("SELECT COUNT(DISTINCT match_id) FROM match_player_participation")
|
||||
squad_count = cursor.fetchone()[0]
|
||||
print(f"Matches with Lineups (Squad): {squad_count:,} ({squad_count/total_matches*100:.1f}%)")
|
||||
|
||||
# 4. Officials Coverage
|
||||
cursor.execute("SELECT COUNT(DISTINCT match_id) FROM match_officials")
|
||||
officials_count = cursor.fetchone()[0]
|
||||
print(f"Matches with Officials: {officials_count:,} ({officials_count/total_matches*100:.1f}%)")
|
||||
|
||||
# 5. Overlap (Gold Standard Data)
|
||||
cursor.execute("""
|
||||
SELECT COUNT(m.id)
|
||||
FROM matches m
|
||||
JOIN match_team_stats mts ON m.id = mts.match_id
|
||||
JOIN match_player_participation mpp ON m.id = mpp.match_id
|
||||
WHERE m.sport='football' AND m.score_home IS NOT NULL
|
||||
""")
|
||||
# Note: This join might be slow on huge DB without distinct on join inputs, but distinct count matches is better logic
|
||||
# Rewrite for speed: check distinct IDs in intersection
|
||||
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM (
|
||||
SELECT id FROM matches WHERE sport='football' AND score_home IS NOT NULL
|
||||
INTERSECT
|
||||
SELECT match_id FROM match_team_stats
|
||||
INTERSECT
|
||||
SELECT match_id FROM match_player_participation
|
||||
INTERSECT
|
||||
SELECT match_id FROM match_officials
|
||||
) as overlap
|
||||
""")
|
||||
overlap_count = cursor.fetchone()[0]
|
||||
print(f"Matches with ALL Data (Golden): {overlap_count:,} ({overlap_count/total_matches*100:.1f}%)")
|
||||
|
||||
print("\n🔍 RECENT DATA QUALITY (Last 1000 Matches)")
|
||||
print("==========================================")
|
||||
# Check last 1000 matches specifically
|
||||
cursor.execute("""
|
||||
WITH recent AS (
|
||||
SELECT id FROM matches
|
||||
WHERE sport='football' AND score_home IS NOT NULL
|
||||
ORDER BY mst_utc DESC LIMIT 1000
|
||||
)
|
||||
SELECT
|
||||
(SELECT COUNT(DISTINCT match_id) FROM match_team_stats WHERE match_id IN (SELECT id FROM recent)) as has_stats,
|
||||
(SELECT COUNT(DISTINCT match_id) FROM match_player_participation WHERE match_id IN (SELECT id FROM recent)) as has_squad,
|
||||
(SELECT COUNT(DISTINCT match_id) FROM match_officials WHERE match_id IN (SELECT id FROM recent)) as has_officials
|
||||
""")
|
||||
recent_stats = cursor.fetchone()
|
||||
print(f"Has Stats: {recent_stats[0]/10:.1f}%")
|
||||
print(f"Has Lineups: {recent_stats[1]/10:.1f}%")
|
||||
print(f"Has Officials: {recent_stats[2]/10:.1f}%")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
finally:
|
||||
if conn: conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
diagnose()
|
||||
Reference in New Issue
Block a user