nananie143 commited on
Commit
ee79f7b
·
verified ·
1 Parent(s): 2e6fc13

feat: Add src/data/data_sources.py

Browse files
Files changed (1) hide show
  1. src/data/data_sources.py +407 -0
src/data/data_sources.py ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Multi-Source Data Aggregator
3
+
4
+ Fetches football data from multiple free APIs to expand league coverage.
5
+ Sources: Football-Data.org, OpenLigaDB, TheSportsDB, Sports Open Data
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import logging
11
+ import requests
12
+ from datetime import datetime, timedelta
13
+ from typing import Dict, List, Optional, Any
14
+ from pathlib import Path
15
+ from functools import lru_cache
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Cache directory
20
+ CACHE_DIR = Path(__file__).parent.parent.parent / "data" / "cache"
21
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
22
+
23
+
24
+ class FootballDataAggregator:
25
+ """Aggregate football data from multiple free sources."""
26
+
27
+ def __init__(self):
28
+ self.sources = {
29
+ 'football_data': FootballDataOrg(),
30
+ 'openligadb': OpenLigaDB(),
31
+ 'thesportsdb': TheSportsDB(),
32
+ 'sports_open_data': SportsOpenData()
33
+ }
34
+
35
+ def get_fixtures(self, league: str, days_ahead: int = 7) -> List[Dict]:
36
+ """Get fixtures from all available sources."""
37
+ all_fixtures = []
38
+
39
+ for name, source in self.sources.items():
40
+ try:
41
+ fixtures = source.get_fixtures(league, days_ahead)
42
+ for f in fixtures:
43
+ f['source'] = name
44
+ all_fixtures.extend(fixtures)
45
+ except Exception as e:
46
+ logger.debug(f"{name} fixtures error: {e}")
47
+
48
+ # Deduplicate by team names
49
+ seen = set()
50
+ unique = []
51
+ for f in all_fixtures:
52
+ key = (f.get('home_team', ''), f.get('away_team', ''), f.get('date', ''))
53
+ if key not in seen:
54
+ seen.add(key)
55
+ unique.append(f)
56
+
57
+ return unique
58
+
59
+ def get_team_stats(self, team: str, league: str = None) -> Dict:
60
+ """Get team statistics from available sources."""
61
+ for name, source in self.sources.items():
62
+ try:
63
+ stats = source.get_team_stats(team, league)
64
+ if stats:
65
+ return stats
66
+ except Exception as e:
67
+ logger.debug(f"{name} stats error for {team}: {e}")
68
+
69
+ return {}
70
+
71
+ def get_league_standings(self, league: str) -> List[Dict]:
72
+ """Get league standings."""
73
+ for name, source in self.sources.items():
74
+ try:
75
+ standings = source.get_standings(league)
76
+ if standings:
77
+ return standings
78
+ except Exception as e:
79
+ logger.debug(f"{name} standings error: {e}")
80
+
81
+ return []
82
+
83
+
84
+ class FootballDataOrg:
85
+ """Football-Data.org API wrapper."""
86
+
87
+ BASE_URL = "https://api.football-data.org/v4"
88
+
89
+ LEAGUES = {
90
+ 'premier_league': 'PL',
91
+ 'bundesliga': 'BL1',
92
+ 'la_liga': 'PD',
93
+ 'serie_a': 'SA',
94
+ 'ligue_1': 'FL1',
95
+ 'eredivisie': 'DED',
96
+ 'primeira_liga': 'PPL',
97
+ 'championship': 'ELC'
98
+ }
99
+
100
+ def __init__(self):
101
+ self.api_key = os.getenv('FOOTBALL_DATA_API_KEY')
102
+
103
+ def _request(self, endpoint: str) -> Dict:
104
+ if not self.api_key:
105
+ return {}
106
+
107
+ headers = {'X-Auth-Token': self.api_key}
108
+ try:
109
+ resp = requests.get(f"{self.BASE_URL}{endpoint}", headers=headers, timeout=10)
110
+ if resp.status_code == 200:
111
+ return resp.json()
112
+ except Exception as e:
113
+ logger.debug(f"Football-Data.org error: {e}")
114
+
115
+ return {}
116
+
117
+ def get_fixtures(self, league: str, days_ahead: int = 7) -> List[Dict]:
118
+ code = self.LEAGUES.get(league)
119
+ if not code:
120
+ return []
121
+
122
+ today = datetime.now().strftime('%Y-%m-%d')
123
+ end = (datetime.now() + timedelta(days=days_ahead)).strftime('%Y-%m-%d')
124
+
125
+ data = self._request(f"/competitions/{code}/matches?dateFrom={today}&dateTo={end}")
126
+
127
+ fixtures = []
128
+ for match in data.get('matches', []):
129
+ fixtures.append({
130
+ 'home_team': match.get('homeTeam', {}).get('name', ''),
131
+ 'away_team': match.get('awayTeam', {}).get('name', ''),
132
+ 'date': match.get('utcDate', '')[:10],
133
+ 'time': match.get('utcDate', '')[11:16],
134
+ 'league': league,
135
+ 'status': match.get('status', 'SCHEDULED')
136
+ })
137
+
138
+ return fixtures
139
+
140
+ def get_team_stats(self, team: str, league: str = None) -> Dict:
141
+ # Would need team ID lookup
142
+ return {}
143
+
144
+ def get_standings(self, league: str) -> List[Dict]:
145
+ code = self.LEAGUES.get(league)
146
+ if not code:
147
+ return []
148
+
149
+ data = self._request(f"/competitions/{code}/standings")
150
+
151
+ standings = []
152
+ for standing in data.get('standings', []):
153
+ if standing.get('type') == 'TOTAL':
154
+ for entry in standing.get('table', []):
155
+ standings.append({
156
+ 'position': entry.get('position'),
157
+ 'team': entry.get('team', {}).get('name'),
158
+ 'points': entry.get('points'),
159
+ 'played': entry.get('playedGames'),
160
+ 'won': entry.get('won'),
161
+ 'draw': entry.get('draw'),
162
+ 'lost': entry.get('lost'),
163
+ 'gf': entry.get('goalsFor'),
164
+ 'ga': entry.get('goalsAgainst'),
165
+ 'gd': entry.get('goalDifference')
166
+ })
167
+
168
+ return standings
169
+
170
+
171
+ class OpenLigaDB:
172
+ """OpenLigaDB API - Free, no auth required. Mainly German leagues."""
173
+
174
+ BASE_URL = "https://api.openligadb.de"
175
+
176
+ LEAGUES = {
177
+ 'bundesliga': 'bl1',
178
+ 'bundesliga_2': 'bl2',
179
+ 'dfb_pokal': 'dfb'
180
+ }
181
+
182
+ def get_fixtures(self, league: str, days_ahead: int = 7) -> List[Dict]:
183
+ code = self.LEAGUES.get(league)
184
+ if not code:
185
+ return []
186
+
187
+ try:
188
+ # Get current matchday
189
+ resp = requests.get(f"{self.BASE_URL}/getmatchdata/{code}/2025", timeout=10)
190
+ if resp.status_code != 200:
191
+ return []
192
+
193
+ data = resp.json()
194
+ fixtures = []
195
+
196
+ today = datetime.now().date()
197
+ end_date = today + timedelta(days=days_ahead)
198
+
199
+ for match in data:
200
+ match_date = datetime.fromisoformat(match.get('matchDateTime', '').replace('Z', '')).date()
201
+ if today <= match_date <= end_date:
202
+ fixtures.append({
203
+ 'home_team': match.get('team1', {}).get('teamName', ''),
204
+ 'away_team': match.get('team2', {}).get('teamName', ''),
205
+ 'date': str(match_date),
206
+ 'time': match.get('matchDateTime', '')[11:16],
207
+ 'league': league
208
+ })
209
+
210
+ return fixtures
211
+ except Exception as e:
212
+ logger.debug(f"OpenLigaDB error: {e}")
213
+ return []
214
+
215
+ def get_team_stats(self, team: str, league: str = None) -> Dict:
216
+ return {}
217
+
218
+ def get_standings(self, league: str) -> List[Dict]:
219
+ code = self.LEAGUES.get(league)
220
+ if not code:
221
+ return []
222
+
223
+ try:
224
+ resp = requests.get(f"{self.BASE_URL}/getbltable/{code}/2025", timeout=10)
225
+ if resp.status_code != 200:
226
+ return []
227
+
228
+ data = resp.json()
229
+ standings = []
230
+
231
+ for i, entry in enumerate(data, 1):
232
+ standings.append({
233
+ 'position': i,
234
+ 'team': entry.get('teamName'),
235
+ 'points': entry.get('points'),
236
+ 'played': entry.get('matches'),
237
+ 'won': entry.get('won'),
238
+ 'draw': entry.get('draw'),
239
+ 'lost': entry.get('lost'),
240
+ 'gf': entry.get('goals'),
241
+ 'ga': entry.get('opponentGoals'),
242
+ 'gd': entry.get('goalDiff')
243
+ })
244
+
245
+ return standings
246
+ except Exception as e:
247
+ logger.debug(f"OpenLigaDB standings error: {e}")
248
+ return []
249
+
250
+
251
+ class TheSportsDB:
252
+ """TheSportsDB API - Free tier with 100 req/min limit."""
253
+
254
+ BASE_URL = "https://www.thesportsdb.com/api/v1/json/3"
255
+
256
+ LEAGUES = {
257
+ 'premier_league': '4328',
258
+ 'la_liga': '4335',
259
+ 'serie_a': '4332',
260
+ 'bundesliga': '4331',
261
+ 'ligue_1': '4334',
262
+ 'eredivisie': '4337',
263
+ 'primeira_liga': '4344',
264
+ 'mls': '4346',
265
+ 'championship': '4329'
266
+ }
267
+
268
+ def get_fixtures(self, league: str, days_ahead: int = 7) -> List[Dict]:
269
+ league_id = self.LEAGUES.get(league)
270
+ if not league_id:
271
+ return []
272
+
273
+ try:
274
+ resp = requests.get(
275
+ f"{self.BASE_URL}/eventsnextleague.php?id={league_id}",
276
+ timeout=10
277
+ )
278
+ if resp.status_code != 200:
279
+ return []
280
+
281
+ data = resp.json()
282
+ fixtures = []
283
+
284
+ today = datetime.now().date()
285
+ end_date = today + timedelta(days=days_ahead)
286
+
287
+ for event in (data.get('events') or []):
288
+ try:
289
+ match_date = datetime.strptime(event.get('dateEvent', ''), '%Y-%m-%d').date()
290
+ if today <= match_date <= end_date:
291
+ fixtures.append({
292
+ 'home_team': event.get('strHomeTeam', ''),
293
+ 'away_team': event.get('strAwayTeam', ''),
294
+ 'date': event.get('dateEvent', ''),
295
+ 'time': event.get('strTime', '00:00')[:5],
296
+ 'league': league,
297
+ 'venue': event.get('strVenue', '')
298
+ })
299
+ except:
300
+ pass
301
+
302
+ return fixtures
303
+ except Exception as e:
304
+ logger.debug(f"TheSportsDB error: {e}")
305
+ return []
306
+
307
+ def get_team_stats(self, team: str, league: str = None) -> Dict:
308
+ try:
309
+ resp = requests.get(
310
+ f"{self.BASE_URL}/searchteams.php?t={team}",
311
+ timeout=10
312
+ )
313
+ if resp.status_code == 200:
314
+ data = resp.json()
315
+ teams = data.get('teams', [])
316
+ if teams:
317
+ t = teams[0]
318
+ return {
319
+ 'name': t.get('strTeam'),
320
+ 'stadium': t.get('strStadium'),
321
+ 'capacity': t.get('intStadiumCapacity'),
322
+ 'formed': t.get('intFormedYear'),
323
+ 'league': t.get('strLeague')
324
+ }
325
+ except Exception as e:
326
+ logger.debug(f"TheSportsDB team error: {e}")
327
+
328
+ return {}
329
+
330
+ def get_standings(self, league: str) -> List[Dict]:
331
+ league_id = self.LEAGUES.get(league)
332
+ if not league_id:
333
+ return []
334
+
335
+ try:
336
+ resp = requests.get(
337
+ f"{self.BASE_URL}/lookuptable.php?l={league_id}&s=2025-2026",
338
+ timeout=10
339
+ )
340
+ if resp.status_code != 200:
341
+ return []
342
+
343
+ data = resp.json()
344
+ standings = []
345
+
346
+ for entry in (data.get('table') or []):
347
+ standings.append({
348
+ 'position': int(entry.get('intRank', 0)),
349
+ 'team': entry.get('strTeam'),
350
+ 'points': int(entry.get('intPoints', 0)),
351
+ 'played': int(entry.get('intPlayed', 0)),
352
+ 'won': int(entry.get('intWin', 0)),
353
+ 'draw': int(entry.get('intDraw', 0)),
354
+ 'lost': int(entry.get('intLoss', 0)),
355
+ 'gf': int(entry.get('intGoalsFor', 0)),
356
+ 'ga': int(entry.get('intGoalsAgainst', 0)),
357
+ 'gd': int(entry.get('intGoalDifference', 0))
358
+ })
359
+
360
+ return standings
361
+ except Exception as e:
362
+ logger.debug(f"TheSportsDB standings error: {e}")
363
+ return []
364
+
365
+
366
+ class SportsOpenData:
367
+ """Sports Open Data - Community-driven, no auth required."""
368
+
369
+ BASE_URL = "https://sports-open-data.api.sportradar.com/soccer/trial/v4/en"
370
+
371
+ # Note: This API may have limited free access
372
+ LEAGUES = {
373
+ 'serie_a': 'sr:competition:23',
374
+ 'la_liga': 'sr:competition:8'
375
+ }
376
+
377
+ def get_fixtures(self, league: str, days_ahead: int = 7) -> List[Dict]:
378
+ # Sports Open Data requires specific implementation
379
+ return []
380
+
381
+ def get_team_stats(self, team: str, league: str = None) -> Dict:
382
+ return {}
383
+
384
+ def get_standings(self, league: str) -> List[Dict]:
385
+ return []
386
+
387
+
388
+ # Global instance
389
+ _aggregator: Optional[FootballDataAggregator] = None
390
+
391
+
392
+ def get_data_aggregator() -> FootballDataAggregator:
393
+ """Get or create data aggregator singleton."""
394
+ global _aggregator
395
+ if _aggregator is None:
396
+ _aggregator = FootballDataAggregator()
397
+ return _aggregator
398
+
399
+
400
+ def get_all_fixtures(league: str, days: int = 7) -> List[Dict]:
401
+ """Get fixtures from all sources."""
402
+ return get_data_aggregator().get_fixtures(league, days)
403
+
404
+
405
+ def get_standings(league: str) -> List[Dict]:
406
+ """Get league standings."""
407
+ return get_data_aggregator().get_league_standings(league)