import os import psycopg2 from psycopg2.extras import RealDictCursor def check_health(): try: db_url = os.environ.get('DATABASE_URL', 'postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db') conn = psycopg2.connect(db_url) cursor = conn.cursor(cursor_factory=RealDictCursor) print("šŸ“Š Data Health Check Starting...\n") # 1. Total Football Matches (Finished) cursor.execute("SELECT COUNT(*) as count FROM matches WHERE sport='football' AND state='postGame'") total_matches = cursor.fetchone()['count'] print(f"Total Finished Football Matches: {total_matches:,}") if total_matches == 0: return # 2. Matches with Team Stats (Possession, Shots) cursor.execute(""" SELECT COUNT(DISTINCT m.id) as count FROM matches m JOIN match_team_stats s ON m.id = s.match_id WHERE m.sport='football' AND m.state='postGame' """) stats_count = cursor.fetchone()['count'] print(f"Matches with Team Stats: {stats_count:,} ({stats_count/total_matches*100:.1f}%)") # 3. Matches with Player Events (Goals/Cards) cursor.execute(""" SELECT COUNT(DISTINCT m.id) as count FROM matches m JOIN match_player_events e ON m.id = e.match_id WHERE m.sport='football' AND m.state='postGame' """) events_count = cursor.fetchone()['count'] print(f"Matches with Player Events: {events_count:,} ({events_count/total_matches*100:.1f}%)") # 4. Matches with Odds (1X2) cursor.execute(""" SELECT COUNT(DISTINCT m.id) as count FROM matches m JOIN odd_categories oc ON m.id = oc.match_id WHERE m.sport='football' AND m.state='postGame' """) odds_count = cursor.fetchone()['count'] print(f"Matches with Odds Data: {odds_count:,} ({odds_count/total_matches*100:.1f}%)") # 5. Full Data Set (Intersection) cursor.execute(""" SELECT COUNT(DISTINCT m.id) as count FROM matches m JOIN match_team_stats s ON m.id = s.match_id JOIN match_player_events e ON m.id = e.match_id JOIN odd_categories oc ON m.id = oc.match_id WHERE m.sport='football' AND m.state='postGame' """) full_data_count = cursor.fetchone()['count'] print(f"\nāœ… GOLDEN DATASET (All 3 present): {full_data_count:,} ({full_data_count/total_matches*100:.1f}%)") conn.close() except Exception as e: print(f"Error: {e}") if __name__ == "__main__": check_health()