import os import psycopg2 from psycopg2.extras import RealDictCursor def inspect_recoverable_data(): try: db_url = os.environ.get('DATABASE_URL', 'postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db') conn = psycopg2.connect(db_url) cursor = conn.cursor(cursor_factory=RealDictCursor) print("\n🔍 RECOVERABLE DATA INSPECTION (Ignoring 'state' column)\n") # 1. Total Scored Football Matches # Logic: Valid score and valid date is what matters for training cursor.execute(""" SELECT COUNT(*) as count FROM matches WHERE sport='football' AND score_home IS NOT NULL AND score_away IS NOT NULL """) total_scored = cursor.fetchone()['count'] print(f"Total Scored Football Matches: {total_scored:,}") if total_scored == 0: return # 2. Recoverable Matches with Stats cursor.execute(""" SELECT COUNT(DISTINCT m.id) as count FROM matches m JOIN match_team_stats s ON m.id = s.match_id WHERE m.sport='football' AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL """) stats_count = cursor.fetchone()['count'] print(f"Matches with Team Stats: {stats_count:,} ({stats_count/total_scored*100:.1f}%)") # 3. Recoverable 'Golden Dataset' (Stats + Odds + Events) cursor.execute(""" SELECT COUNT(DISTINCT m.id) as count FROM matches m JOIN match_team_stats s ON m.id = s.match_id JOIN match_player_events e ON m.id = e.match_id JOIN odd_categories oc ON m.id = oc.match_id WHERE m.sport='football' AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL """) golden_count = cursor.fetchone()['count'] print(f"\n✨ TRUE GOLDEN DATASET (Scored + All 3): {golden_count:,} ({golden_count/total_scored*100:.1f}%)") conn.close() except Exception as e: print(f"Error: {e}") if __name__ == "__main__": inspect_recoverable_data()