File size: 11,131 Bytes
c230fe3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
"""
Historical Data Storage (SQLite)

Stores historical match data for:
- ML model training
- H2H analysis
- ELO rating calculation
- Form tracking
"""

import os
import sqlite3
import json
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass
from contextlib import contextmanager

# Database path
DB_PATH = os.path.join(os.path.dirname(__file__), 'football_history.db')


@dataclass
class HistoricalMatch:
    """Historical match record"""
    id: str
    date: str
    home_team: str
    away_team: str
    home_score: int
    away_score: int
    league: str
    season: str


class HistoricalDatabase:
    """
    SQLite database for storing historical match data.
    Used for ML training, H2H lookup, and ELO calculation.
    """
    
    def __init__(self, db_path: str = DB_PATH):
        self.db_path = db_path
        self._init_db()
    
    def _init_db(self):
        """Initialize database schema"""
        with self._get_connection() as conn:
            cursor = conn.cursor()
            
            # Matches table
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS matches (
                    id TEXT PRIMARY KEY,
                    date TEXT NOT NULL,
                    home_team TEXT NOT NULL,
                    away_team TEXT NOT NULL,
                    home_score INTEGER,
                    away_score INTEGER,
                    league TEXT,
                    season TEXT,
                    created_at TEXT DEFAULT CURRENT_TIMESTAMP
                )
            ''')
            
            # Team ELO ratings table
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS team_elo (
                    team TEXT PRIMARY KEY,
                    elo REAL DEFAULT 1500,
                    matches_played INTEGER DEFAULT 0,
                    last_updated TEXT
                )
            ''')
            
            # Team statistics table
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS team_stats (
                    team TEXT PRIMARY KEY,
                    wins INTEGER DEFAULT 0,
                    draws INTEGER DEFAULT 0,
                    losses INTEGER DEFAULT 0,
                    goals_for INTEGER DEFAULT 0,
                    goals_against INTEGER DEFAULT 0,
                    last_5_results TEXT DEFAULT '[]',
                    last_updated TEXT
                )
            ''')
            
            # Create indexes
            cursor.execute('CREATE INDEX IF NOT EXISTS idx_matches_teams ON matches(home_team, away_team)')
            cursor.execute('CREATE INDEX IF NOT EXISTS idx_matches_date ON matches(date)')
            cursor.execute('CREATE INDEX IF NOT EXISTS idx_matches_league ON matches(league)')
            
            conn.commit()
    
    @contextmanager
    def _get_connection(self):
        """Context manager for database connection"""
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        try:
            yield conn
        finally:
            conn.close()
    
    def store_match(self, match: Dict) -> bool:
        """Store a match result"""
        try:
            with self._get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute('''
                    INSERT OR REPLACE INTO matches 
                    (id, date, home_team, away_team, home_score, away_score, league, season)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                ''', (
                    match.get('id', f"{match['home_team']}_{match['away_team']}_{match['date']}"),
                    match.get('date'),
                    match.get('home_team'),
                    match.get('away_team'),
                    match.get('home_score'),
                    match.get('away_score'),
                    match.get('league', 'unknown'),
                    match.get('season', '2024-25')
                ))
                conn.commit()
                return True
        except Exception as e:
            print(f"Error storing match: {e}")
            return False
    
    def store_matches_batch(self, matches: List[Dict]) -> int:
        """Store multiple matches at once"""
        stored = 0
        for match in matches:
            if self.store_match(match):
                stored += 1
        return stored
    
    def get_h2h(self, team1: str, team2: str, limit: int = 10) -> List[Dict]:
        """Get head-to-head matches between two teams"""
        with self._get_connection() as conn:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT * FROM matches 
                WHERE (home_team LIKE ? AND away_team LIKE ?)
                   OR (home_team LIKE ? AND away_team LIKE ?)
                ORDER BY date DESC
                LIMIT ?
            ''', (f'%{team1}%', f'%{team2}%', f'%{team2}%', f'%{team1}%', limit))
            
            return [dict(row) for row in cursor.fetchall()]
    
    def get_h2h_stats(self, team1: str, team2: str) -> Dict:
        """Get H2H statistics between two teams"""
        matches = self.get_h2h(team1, team2, limit=50)
        
        if not matches:
            return {'found': False, 'total_matches': 0}
        
        team1_wins = 0
        draws = 0
        team2_wins = 0
        team1_goals = 0
        team2_goals = 0
        last_5 = []
        
        for match in matches:
            home = match['home_team']
            h_score = match['home_score'] or 0
            a_score = match['away_score'] or 0
            
            is_team1_home = team1.lower() in home.lower()
            
            if is_team1_home:
                team1_goals += h_score
                team2_goals += a_score
                if h_score > a_score:
                    team1_wins += 1
                elif h_score == a_score:
                    draws += 1
                else:
                    team2_wins += 1
            else:
                team1_goals += a_score
                team2_goals += h_score
                if a_score > h_score:
                    team1_wins += 1
                elif a_score == h_score:
                    draws += 1
                else:
                    team2_wins += 1
            
            if len(last_5) < 5:
                last_5.append({
                    'date': match['date'],
                    'home_score': h_score,
                    'away_score': a_score,
                    'result': 'H' if h_score > a_score else ('D' if h_score == a_score else 'A')
                })
        
        total = len(matches)
        return {
            'found': True,
            'total_matches': total,
            'team1': team1,
            'team2': team2,
            'team1_wins': team1_wins,
            'draws': draws,
            'team2_wins': team2_wins,
            'team1_goals': team1_goals,
            'team2_goals': team2_goals,
            'avg_goals': round((team1_goals + team2_goals) / total, 2) if total > 0 else 0,
            'last_5': last_5
        }
    
    def get_team_form(self, team: str, limit: int = 5) -> List[str]:
        """Get team's recent form (W/D/L)"""
        with self._get_connection() as conn:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT * FROM matches 
                WHERE home_team LIKE ? OR away_team LIKE ?
                ORDER BY date DESC
                LIMIT ?
            ''', (f'%{team}%', f'%{team}%', limit))
            
            results = []
            for row in cursor.fetchall():
                match = dict(row)
                h_score = match['home_score'] or 0
                a_score = match['away_score'] or 0
                is_home = team.lower() in match['home_team'].lower()
                
                if is_home:
                    if h_score > a_score:
                        results.append('W')
                    elif h_score == a_score:
                        results.append('D')
                    else:
                        results.append('L')
                else:
                    if a_score > h_score:
                        results.append('W')
                    elif a_score == h_score:
                        results.append('D')
                    else:
                        results.append('L')
            
            return results
    
    def update_team_elo(self, team: str, new_elo: float):
        """Update team's ELO rating"""
        with self._get_connection() as conn:
            cursor = conn.cursor()
            cursor.execute('''
                INSERT OR REPLACE INTO team_elo (team, elo, matches_played, last_updated)
                VALUES (?, ?, COALESCE((SELECT matches_played FROM team_elo WHERE team = ?) + 1, 1), ?)
            ''', (team, new_elo, team, datetime.now().isoformat()))
            conn.commit()
    
    def get_team_elo(self, team: str) -> float:
        """Get team's ELO rating"""
        with self._get_connection() as conn:
            cursor = conn.cursor()
            cursor.execute('SELECT elo FROM team_elo WHERE team LIKE ?', (f'%{team}%',))
            row = cursor.fetchone()
            return row['elo'] if row else 1500.0
    
    def get_match_count(self) -> int:
        """Get total number of stored matches"""
        with self._get_connection() as conn:
            cursor = conn.cursor()
            cursor.execute('SELECT COUNT(*) as count FROM matches')
            return cursor.fetchone()['count']
    
    def get_all_teams(self) -> List[str]:
        """Get all unique team names"""
        with self._get_connection() as conn:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT DISTINCT home_team FROM matches
                UNION
                SELECT DISTINCT away_team FROM matches
            ''')
            return [row['home_team'] for row in cursor.fetchall()]


# Global instance
history_db = HistoricalDatabase()


def sync_from_api():
    """Sync historical data from Football-Data.org API"""
    from src.data.api_clients import FootballDataOrgClient
    
    client = FootballDataOrgClient()
    leagues = ['premier_league', 'la_liga', 'bundesliga', 'serie_a', 'ligue_1']
    
    total_stored = 0
    for league in leagues:
        matches = client.get_finished_matches(league, limit=100)
        for match in matches:
            db_match = {
                'id': str(match.get('id')),
                'date': match.get('utcDate', '')[:10],
                'home_team': match.get('homeTeam', {}).get('name', ''),
                'away_team': match.get('awayTeam', {}).get('name', ''),
                'home_score': match.get('score', {}).get('fullTime', {}).get('home'),
                'away_score': match.get('score', {}).get('fullTime', {}).get('away'),
                'league': league,
                'season': match.get('season', {}).get('id', '2024-25')
            }
            if history_db.store_match(db_match):
                total_stored += 1
    
    return total_stored