File size: 19,082 Bytes
c095e08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
"""
NBA ML Prediction System - Prediction Tracker
==============================================
Tracks predictions and measures accuracy using ChromaDB Cloud with local fallback.
"""

import logging
from datetime import datetime
from typing import Dict, List, Optional
import json
import hashlib
from pathlib import Path

from src.config import CHROMADB_CONFIG, PROCESSED_DATA_DIR

logger = logging.getLogger(__name__)


class PredictionTracker:
    """
    Tracks predictions and measures accuracy over time.
    
    Uses ChromaDB Cloud if available, otherwise falls back to local JSON storage.
    Stores predictions before games and updates with results after completion.
    Provides accuracy statistics by team, confidence level, and over time.
    """
    
    def __init__(self):
        """Initialize storage - try ChromaDB Cloud, fallback to local JSON."""
        self.collection = None
        self.client = None
        self._use_local = False
        self._local_file = PROCESSED_DATA_DIR / "predictions_local.json"
        self._local_data: List[Dict] = []
        
        # Try ChromaDB Cloud first
        try:
            import chromadb
            
            # Try CloudClient (the official way for Chroma Cloud)
            self.client = chromadb.CloudClient(
                tenant=CHROMADB_CONFIG.tenant,
                database=CHROMADB_CONFIG.database,
                api_key=CHROMADB_CONFIG.api_key,
            )
            
            # Get or create collection for predictions
            self.collection = self.client.get_or_create_collection(
                name=CHROMADB_CONFIG.collection_name,
                metadata={"description": "NBA game predictions with results"}
            )
            
            logger.info("Connected to ChromaDB Cloud successfully")
            
        except Exception as e:
            logger.warning(f"ChromaDB Cloud unavailable ({e}), using local JSON storage")
            self._use_local = True
            self._load_local_data()
    
    def _generate_id(self, game_id: str, prediction_date: str) -> str:
        """Generate unique ID for a prediction."""
        return hashlib.md5(f"{game_id}_{prediction_date}".encode()).hexdigest()
    
    def _load_local_data(self):
        """Load predictions from local JSON file."""
        if self._local_file.exists():
            try:
                with open(self._local_file, 'r') as f:
                    self._local_data = json.load(f)
            except Exception as e:
                logger.error(f"Failed to load local data: {e}")
                self._local_data = []
        else:
            self._local_data = []
    
    def _save_local_data(self):
        """Save predictions to local JSON file."""
        try:
            with open(self._local_file, 'w') as f:
                json.dump(self._local_data, f, indent=2)
        except Exception as e:
            logger.error(f"Failed to save local data: {e}")
    
    def _find_local_prediction(self, game_id: str) -> Optional[int]:
        """Find index of prediction by game_id in local data."""
        for i, pred in enumerate(self._local_data):
            if pred.get("game_id") == game_id:
                return i
        return None
    
    def save_prediction(self, game_id: str, prediction: Dict) -> bool:
        """
        Store a prediction before game starts.
        
        Args:
            game_id: NBA game ID
            prediction: Dict with home_team, away_team, predicted_winner,
                       home_win_prob, confidence, etc.
        
        Returns:
            True if saved successfully
        """
        now = datetime.now().isoformat()
        doc_id = self._generate_id(game_id, now[:10])
        
        # Prepare metadata
        metadata = {
            "id": doc_id,
            "game_id": game_id,
            "game_date": prediction.get("game_date", now[:10]),
            "home_team": prediction.get("home_team", ""),
            "away_team": prediction.get("away_team", ""),
            "predicted_winner": prediction.get("predicted_winner", ""),
            "home_win_prob": float(prediction.get("home_win_probability", 0.5)),
            "away_win_prob": float(prediction.get("away_win_probability", 0.5)),
            "confidence": prediction.get("confidence", "medium"),
            "home_elo": float(prediction.get("home_elo", 1500)),
            "away_elo": float(prediction.get("away_elo", 1500)),
            "actual_winner": "",  # Empty until game completes
            "is_correct": -1,  # -1 = pending, 0 = wrong, 1 = correct
            "created_at": now,
            "updated_at": now,
        }
        
        # Use local storage if ChromaDB not available
        if self._use_local:
            try:
                # Check if exists and update, otherwise append
                idx = self._find_local_prediction(game_id)
                if idx is not None:
                    self._local_data[idx] = metadata
                else:
                    self._local_data.append(metadata)
                self._save_local_data()
                logger.info(f"Saved prediction for game {game_id} (local)")
                return True
            except Exception as e:
                logger.error(f"Failed to save prediction locally: {e}")
                return False
        
        # Use ChromaDB Cloud
        if not self.collection:
            logger.warning("ChromaDB not available, prediction not saved")
            return False
        
        try:
            # Document text for semantic search
            doc_text = (
                f"NBA Game: {prediction.get('away_team')} @ {prediction.get('home_team')} "
                f"on {metadata['game_date']}. "
                f"Predicted winner: {metadata['predicted_winner']} "
                f"with {metadata['confidence']} confidence "
                f"({metadata['home_win_prob']:.1%} home win probability)"
            )
            
            # Upsert (update if exists, insert if not)
            self.collection.upsert(
                ids=[doc_id],
                documents=[doc_text],
                metadatas=[metadata]
            )
            
            logger.info(f"Saved prediction for game {game_id}")
            return True
            
        except Exception as e:
            logger.error(f"Failed to save prediction: {e}")
            return False
    
    def update_result(self, game_id: str, actual_winner: str, 
                     home_score: int = 0, away_score: int = 0) -> bool:
        """
        Update prediction with actual game result.
        
        Args:
            game_id: NBA game ID
            actual_winner: Team tricode of actual winner
            home_score: Final home score
            away_score: Final away score
        
        Returns:
            True if updated successfully
        """
        # Handle local storage
        if self._use_local:
            try:
                idx = self._find_local_prediction(game_id)
                if idx is None:
                    logger.warning(f"No prediction found for game {game_id}")
                    return False
                
                pred = self._local_data[idx]
                predicted_winner = pred.get("predicted_winner", "")
                is_correct = 1 if predicted_winner == actual_winner else 0
                
                pred["actual_winner"] = actual_winner
                pred["is_correct"] = is_correct
                pred["home_score"] = home_score
                pred["away_score"] = away_score
                pred["updated_at"] = datetime.now().isoformat()
                
                self._local_data[idx] = pred
                self._save_local_data()
                
                result_text = "CORRECT βœ“" if is_correct else "WRONG βœ—"
                logger.info(f"Updated result for game {game_id}: {result_text} (local)")
                return True
                
            except Exception as e:
                logger.error(f"Failed to update result locally: {e}")
                return False
        
        # Handle ChromaDB Cloud
        if not self.collection:
            return False
        
        try:
            # Find the prediction for this game
            results = self.collection.get(
                where={"game_id": game_id},
                include=["metadatas", "documents"]
            )
            
            if not results["ids"]:
                logger.warning(f"No prediction found for game {game_id}")
                return False
            
            doc_id = results["ids"][0]
            metadata = results["metadatas"][0]
            
            # Check if prediction was correct
            predicted_winner = metadata.get("predicted_winner", "")
            is_correct = 1 if predicted_winner == actual_winner else 0
            
            # Update metadata
            metadata["actual_winner"] = actual_winner
            metadata["is_correct"] = is_correct
            metadata["home_score"] = home_score
            metadata["away_score"] = away_score
            metadata["updated_at"] = datetime.now().isoformat()
            
            # Update document text
            result_text = "CORRECT βœ“" if is_correct else "WRONG βœ—"
            doc_text = (
                f"NBA Game: {metadata['away_team']} @ {metadata['home_team']}. "
                f"Predicted: {predicted_winner}, Actual: {actual_winner}. "
                f"Result: {result_text}"
            )
            
            self.collection.update(
                ids=[doc_id],
                documents=[doc_text],
                metadatas=[metadata]
            )
            
            logger.info(f"Updated result for game {game_id}: {result_text}")
            return True
            
        except Exception as e:
            logger.error(f"Failed to update result: {e}")
            return False
    
    def get_prediction(self, game_id: str) -> Optional[Dict]:
        """Get prediction for a specific game."""
        # Handle local storage
        if self._use_local:
            idx = self._find_local_prediction(game_id)
            if idx is not None:
                return self._local_data[idx]
            return None
        
        # Handle ChromaDB
        if not self.collection:
            return None
        
        try:
            results = self.collection.get(
                where={"game_id": game_id},
                include=["metadatas"]
            )
            
            if results["ids"]:
                return results["metadatas"][0]
            return None
            
        except Exception as e:
            logger.error(f"Failed to get prediction: {e}")
            return None
    
    def get_recent_predictions(self, n: int = 20) -> List[Dict]:
        """Get N most recent predictions with results."""
        # Handle local storage
        if self._use_local:
            predictions = sorted(
                self._local_data,
                key=lambda x: x.get("created_at", ""),
                reverse=True
            )
            return predictions[:n]
        
        # Handle ChromaDB
        if not self.collection:
            return []
        
        try:
            # Get all predictions and sort by date
            results = self.collection.get(
                include=["metadatas"]
            )
            
            if not results["ids"]:
                return []
            
            predictions = results["metadatas"]
            
            # Sort by created_at descending
            predictions.sort(key=lambda x: x.get("created_at", ""), reverse=True)
            
            return predictions[:n]
            
        except Exception as e:
            logger.error(f"Failed to get recent predictions: {e}")
            return []
    
    def _calculate_accuracy_from_predictions(self, predictions: List[Dict]) -> Dict:
        """Calculate accuracy stats from a list of predictions."""
        if not predictions:
            return {
                "total_predictions": 0,
                "completed_games": 0,
                "correct_predictions": 0,
                "overall_accuracy": 0.0,
                "by_confidence": {},
                "by_team": {},
            }
        
        # Filter to completed games only
        completed = [p for p in predictions if p.get("is_correct", -1) >= 0]
        correct = [p for p in completed if p.get("is_correct") == 1]
        
        # By confidence level
        confidence_stats = {}
        for conf in ["high", "medium", "low"]:
            conf_preds = [p for p in completed if p.get("confidence") == conf]
            conf_correct = [p for p in conf_preds if p.get("is_correct") == 1]
            if conf_preds:
                confidence_stats[conf] = {
                    "total": len(conf_preds),
                    "correct": len(conf_correct),
                    "accuracy": len(conf_correct) / len(conf_preds)
                }
        
        # By team predicted
        team_stats = {}
        for pred in completed:
            team = pred.get("predicted_winner", "")
            if team not in team_stats:
                team_stats[team] = {"total": 0, "correct": 0}
            team_stats[team]["total"] += 1
            if pred.get("is_correct") == 1:
                team_stats[team]["correct"] += 1
        
        for team in team_stats:
            total = team_stats[team]["total"]
            team_stats[team]["accuracy"] = team_stats[team]["correct"] / total if total > 0 else 0
        
        return {
            "total_predictions": len(predictions),
            "completed_games": len(completed),
            "correct_predictions": len(correct),
            "overall_accuracy": len(correct) / len(completed) if completed else 0.0,
            "by_confidence": confidence_stats,
            "by_team": team_stats,
        }
    
    def get_accuracy_stats(self) -> Dict:
        """
        Calculate comprehensive accuracy statistics.
        
        Returns:
            Dict with overall accuracy, by confidence, by team
        """
        # Handle local storage
        if self._use_local:
            return self._calculate_accuracy_from_predictions(self._local_data)
        
        # Handle ChromaDB
        if not self.collection:
            return {
                "total_predictions": 0,
                "completed_games": 0,
                "correct_predictions": 0,
                "overall_accuracy": 0.0,
                "by_confidence": {},
                "by_team": {},
            }
        
        try:
            results = self.collection.get(include=["metadatas"])
            
            if not results["ids"]:
                return {
                    "total_predictions": 0,
                    "completed_games": 0,
                    "correct_predictions": 0,
                    "overall_accuracy": 0.0,
                    "by_confidence": {},
                    "by_team": {},
                }
            
            predictions = results["metadatas"]
            
            # Filter to completed games only
            completed = [p for p in predictions if p.get("is_correct", -1) >= 0]
            correct = [p for p in completed if p.get("is_correct") == 1]
            
            # By confidence level
            confidence_stats = {}
            for conf in ["high", "medium", "low"]:
                conf_preds = [p for p in completed if p.get("confidence") == conf]
                conf_correct = [p for p in conf_preds if p.get("is_correct") == 1]
                if conf_preds:
                    confidence_stats[conf] = {
                        "total": len(conf_preds),
                        "correct": len(conf_correct),
                        "accuracy": len(conf_correct) / len(conf_preds)
                    }
            
            # By team predicted
            team_stats = {}
            for pred in completed:
                team = pred.get("predicted_winner", "")
                if team not in team_stats:
                    team_stats[team] = {"total": 0, "correct": 0}
                team_stats[team]["total"] += 1
                if pred.get("is_correct") == 1:
                    team_stats[team]["correct"] += 1
            
            for team in team_stats:
                total = team_stats[team]["total"]
                team_stats[team]["accuracy"] = team_stats[team]["correct"] / total if total > 0 else 0
            
            return {
                "total_predictions": len(predictions),
                "completed_games": len(completed),
                "correct_predictions": len(correct),
                "overall_accuracy": len(correct) / len(completed) if completed else 0.0,
                "by_confidence": confidence_stats,
                "by_team": team_stats,
            }
            
        except Exception as e:
            logger.error(f"Failed to get accuracy stats: {e}")
            return {
                "total_predictions": 0,
                "completed_games": 0,
                "correct_predictions": 0,
                "overall_accuracy": 0.0,
                "by_confidence": {},
                "by_team": {},
                "error": str(e)
            }
    
    def get_pending_predictions(self) -> List[Dict]:
        """Get predictions for games not yet completed."""
        if not self.collection:
            return []
        
        try:
            results = self.collection.get(
                where={"is_correct": -1},
                include=["metadatas"]
            )
            
            return results.get("metadatas", [])
            
        except Exception as e:
            logger.error(f"Failed to get pending predictions: {e}")
            return []


# =============================================================================
# CLI INTERFACE
# =============================================================================
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    
    tracker = PredictionTracker()
    
    print("\n=== Prediction Tracker Stats ===\n")
    
    stats = tracker.get_accuracy_stats()
    print(f"Total Predictions: {stats['total_predictions']}")
    print(f"Completed Games: {stats['completed_games']}")
    print(f"Correct Predictions: {stats['correct_predictions']}")
    print(f"Overall Accuracy: {stats['overall_accuracy']:.1%}")
    
    if stats['by_confidence']:
        print("\nBy Confidence Level:")
        for conf, data in stats['by_confidence'].items():
            print(f"  {conf.upper()}: {data['correct']}/{data['total']} ({data['accuracy']:.1%})")
    
    print("\n=== Recent Predictions ===\n")
    recent = tracker.get_recent_predictions(5)
    for pred in recent:
        status = "βœ“" if pred.get("is_correct") == 1 else "βœ—" if pred.get("is_correct") == 0 else "⏳"
        print(f"{status} {pred.get('away_team')} @ {pred.get('home_team')} - Predicted: {pred.get('predicted_winner')}")