footypredict-pro / src /data /historical_data.py
NetBoss
feat: Complete 100% real data integration (6 phases)
c230fe3
"""
Historical Data Storage (SQLite)
Stores historical match data for:
- ML model training
- H2H analysis
- ELO rating calculation
- Form tracking
"""
import os
import sqlite3
import json
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass
from contextlib import contextmanager
# Database path
DB_PATH = os.path.join(os.path.dirname(__file__), 'football_history.db')
@dataclass
class HistoricalMatch:
"""Historical match record"""
id: str
date: str
home_team: str
away_team: str
home_score: int
away_score: int
league: str
season: str
class HistoricalDatabase:
"""
SQLite database for storing historical match data.
Used for ML training, H2H lookup, and ELO calculation.
"""
def __init__(self, db_path: str = DB_PATH):
self.db_path = db_path
self._init_db()
def _init_db(self):
"""Initialize database schema"""
with self._get_connection() as conn:
cursor = conn.cursor()
# Matches table
cursor.execute('''
CREATE TABLE IF NOT EXISTS matches (
id TEXT PRIMARY KEY,
date TEXT NOT NULL,
home_team TEXT NOT NULL,
away_team TEXT NOT NULL,
home_score INTEGER,
away_score INTEGER,
league TEXT,
season TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
''')
# Team ELO ratings table
cursor.execute('''
CREATE TABLE IF NOT EXISTS team_elo (
team TEXT PRIMARY KEY,
elo REAL DEFAULT 1500,
matches_played INTEGER DEFAULT 0,
last_updated TEXT
)
''')
# Team statistics table
cursor.execute('''
CREATE TABLE IF NOT EXISTS team_stats (
team TEXT PRIMARY KEY,
wins INTEGER DEFAULT 0,
draws INTEGER DEFAULT 0,
losses INTEGER DEFAULT 0,
goals_for INTEGER DEFAULT 0,
goals_against INTEGER DEFAULT 0,
last_5_results TEXT DEFAULT '[]',
last_updated TEXT
)
''')
# Create indexes
cursor.execute('CREATE INDEX IF NOT EXISTS idx_matches_teams ON matches(home_team, away_team)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_matches_date ON matches(date)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_matches_league ON matches(league)')
conn.commit()
@contextmanager
def _get_connection(self):
"""Context manager for database connection"""
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
try:
yield conn
finally:
conn.close()
def store_match(self, match: Dict) -> bool:
"""Store a match result"""
try:
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO matches
(id, date, home_team, away_team, home_score, away_score, league, season)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', (
match.get('id', f"{match['home_team']}_{match['away_team']}_{match['date']}"),
match.get('date'),
match.get('home_team'),
match.get('away_team'),
match.get('home_score'),
match.get('away_score'),
match.get('league', 'unknown'),
match.get('season', '2024-25')
))
conn.commit()
return True
except Exception as e:
print(f"Error storing match: {e}")
return False
def store_matches_batch(self, matches: List[Dict]) -> int:
"""Store multiple matches at once"""
stored = 0
for match in matches:
if self.store_match(match):
stored += 1
return stored
def get_h2h(self, team1: str, team2: str, limit: int = 10) -> List[Dict]:
"""Get head-to-head matches between two teams"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT * FROM matches
WHERE (home_team LIKE ? AND away_team LIKE ?)
OR (home_team LIKE ? AND away_team LIKE ?)
ORDER BY date DESC
LIMIT ?
''', (f'%{team1}%', f'%{team2}%', f'%{team2}%', f'%{team1}%', limit))
return [dict(row) for row in cursor.fetchall()]
def get_h2h_stats(self, team1: str, team2: str) -> Dict:
"""Get H2H statistics between two teams"""
matches = self.get_h2h(team1, team2, limit=50)
if not matches:
return {'found': False, 'total_matches': 0}
team1_wins = 0
draws = 0
team2_wins = 0
team1_goals = 0
team2_goals = 0
last_5 = []
for match in matches:
home = match['home_team']
h_score = match['home_score'] or 0
a_score = match['away_score'] or 0
is_team1_home = team1.lower() in home.lower()
if is_team1_home:
team1_goals += h_score
team2_goals += a_score
if h_score > a_score:
team1_wins += 1
elif h_score == a_score:
draws += 1
else:
team2_wins += 1
else:
team1_goals += a_score
team2_goals += h_score
if a_score > h_score:
team1_wins += 1
elif a_score == h_score:
draws += 1
else:
team2_wins += 1
if len(last_5) < 5:
last_5.append({
'date': match['date'],
'home_score': h_score,
'away_score': a_score,
'result': 'H' if h_score > a_score else ('D' if h_score == a_score else 'A')
})
total = len(matches)
return {
'found': True,
'total_matches': total,
'team1': team1,
'team2': team2,
'team1_wins': team1_wins,
'draws': draws,
'team2_wins': team2_wins,
'team1_goals': team1_goals,
'team2_goals': team2_goals,
'avg_goals': round((team1_goals + team2_goals) / total, 2) if total > 0 else 0,
'last_5': last_5
}
def get_team_form(self, team: str, limit: int = 5) -> List[str]:
"""Get team's recent form (W/D/L)"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT * FROM matches
WHERE home_team LIKE ? OR away_team LIKE ?
ORDER BY date DESC
LIMIT ?
''', (f'%{team}%', f'%{team}%', limit))
results = []
for row in cursor.fetchall():
match = dict(row)
h_score = match['home_score'] or 0
a_score = match['away_score'] or 0
is_home = team.lower() in match['home_team'].lower()
if is_home:
if h_score > a_score:
results.append('W')
elif h_score == a_score:
results.append('D')
else:
results.append('L')
else:
if a_score > h_score:
results.append('W')
elif a_score == h_score:
results.append('D')
else:
results.append('L')
return results
def update_team_elo(self, team: str, new_elo: float):
"""Update team's ELO rating"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO team_elo (team, elo, matches_played, last_updated)
VALUES (?, ?, COALESCE((SELECT matches_played FROM team_elo WHERE team = ?) + 1, 1), ?)
''', (team, new_elo, team, datetime.now().isoformat()))
conn.commit()
def get_team_elo(self, team: str) -> float:
"""Get team's ELO rating"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('SELECT elo FROM team_elo WHERE team LIKE ?', (f'%{team}%',))
row = cursor.fetchone()
return row['elo'] if row else 1500.0
def get_match_count(self) -> int:
"""Get total number of stored matches"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('SELECT COUNT(*) as count FROM matches')
return cursor.fetchone()['count']
def get_all_teams(self) -> List[str]:
"""Get all unique team names"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT DISTINCT home_team FROM matches
UNION
SELECT DISTINCT away_team FROM matches
''')
return [row['home_team'] for row in cursor.fetchall()]
# Global instance
history_db = HistoricalDatabase()
def sync_from_api():
"""Sync historical data from Football-Data.org API"""
from src.data.api_clients import FootballDataOrgClient
client = FootballDataOrgClient()
leagues = ['premier_league', 'la_liga', 'bundesliga', 'serie_a', 'ligue_1']
total_stored = 0
for league in leagues:
matches = client.get_finished_matches(league, limit=100)
for match in matches:
db_match = {
'id': str(match.get('id')),
'date': match.get('utcDate', '')[:10],
'home_team': match.get('homeTeam', {}).get('name', ''),
'away_team': match.get('awayTeam', {}).get('name', ''),
'home_score': match.get('score', {}).get('fullTime', {}).get('home'),
'away_score': match.get('score', {}).get('fullTime', {}).get('away'),
'league': league,
'season': match.get('season', {}).get('id', '2024-25')
}
if history_db.store_match(db_match):
total_stored += 1
return total_stored