61 lines
2.2 KiB
Python
Executable File
61 lines
2.2 KiB
Python
Executable File
|
|
import os
|
|
import psycopg2
|
|
from psycopg2.extras import RealDictCursor
|
|
|
|
def inspect_recoverable_data():
|
|
try:
|
|
db_url = os.environ.get('DATABASE_URL', 'postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db')
|
|
conn = psycopg2.connect(db_url)
|
|
cursor = conn.cursor(cursor_factory=RealDictCursor)
|
|
|
|
print("\n🔍 RECOVERABLE DATA INSPECTION (Ignoring 'state' column)\n")
|
|
|
|
# 1. Total Scored Football Matches
|
|
# Logic: Valid score and valid date is what matters for training
|
|
cursor.execute("""
|
|
SELECT COUNT(*) as count
|
|
FROM matches
|
|
WHERE sport='football'
|
|
AND score_home IS NOT NULL
|
|
AND score_away IS NOT NULL
|
|
""")
|
|
total_scored = cursor.fetchone()['count']
|
|
print(f"Total Scored Football Matches: {total_scored:,}")
|
|
|
|
if total_scored == 0:
|
|
return
|
|
|
|
# 2. Recoverable Matches with Stats
|
|
cursor.execute("""
|
|
SELECT COUNT(DISTINCT m.id) as count
|
|
FROM matches m
|
|
JOIN match_team_stats s ON m.id = s.match_id
|
|
WHERE m.sport='football'
|
|
AND m.score_home IS NOT NULL
|
|
AND m.score_away IS NOT NULL
|
|
""")
|
|
stats_count = cursor.fetchone()['count']
|
|
print(f"Matches with Team Stats: {stats_count:,} ({stats_count/total_scored*100:.1f}%)")
|
|
|
|
# 3. Recoverable 'Golden Dataset' (Stats + Odds + Events)
|
|
cursor.execute("""
|
|
SELECT COUNT(DISTINCT m.id) as count
|
|
FROM matches m
|
|
JOIN match_team_stats s ON m.id = s.match_id
|
|
JOIN match_player_events e ON m.id = e.match_id
|
|
JOIN odd_categories oc ON m.id = oc.match_id
|
|
WHERE m.sport='football'
|
|
AND m.score_home IS NOT NULL
|
|
AND m.score_away IS NOT NULL
|
|
""")
|
|
golden_count = cursor.fetchone()['count']
|
|
print(f"\n✨ TRUE GOLDEN DATASET (Scored + All 3): {golden_count:,} ({golden_count/total_scored*100:.1f}%)")
|
|
|
|
conn.close()
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
inspect_recoverable_data()
|