File size: 13,061 Bytes
c095e08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da36223
 
 
 
 
 
dfac64b
da36223
 
 
 
 
 
 
 
 
 
dfac64b
da36223
 
 
dfac64b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da36223
dfac64b
 
 
 
 
 
 
 
 
da36223
 
dfac64b
da36223
c095e08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
"""
NBA ML Prediction System - Auto Training Scheduler
===================================================
Background scheduler that automatically trains the model on new game data.
Runs within the Streamlit app or as a standalone service.
"""

import logging
import threading
import time
from datetime import datetime, timedelta
from typing import Optional
import atexit

logger = logging.getLogger(__name__)


class AutoTrainer:
    """
    Automatic model training scheduler.
    
    Runs background tasks to:
    1. Ingest completed games every hour
    2. Retrain the model daily
    3. Update prediction results after games
    """
    
    _instance: Optional['AutoTrainer'] = None
    _lock = threading.Lock()
    
    def __new__(cls):
        """Singleton pattern - only one auto trainer instance."""
        with cls._lock:
            if cls._instance is None:
                cls._instance = super().__new__(cls)
                cls._instance._initialized = False
            return cls._instance
    
    def __init__(self):
        if self._initialized:
            return
        
        self._initialized = True
        self._running = False
        self._thread: Optional[threading.Thread] = None
        self._stop_event = threading.Event()
        
        # Track last run times
        self._last_ingest = None
        self._last_retrain = None
        self._last_results_check = None
        
        # Intervals (in seconds)
        self.INGEST_INTERVAL = 3600  # 1 hour
        self.RETRAIN_INTERVAL = 86400  # 24 hours (daily)
        self.RESULTS_CHECK_INTERVAL = 1800  # 30 minutes
        
        logger.info("AutoTrainer initialized")
    
    def start(self):
        """Start the background training scheduler."""
        if self._running:
            logger.info("AutoTrainer already running")
            return
        
        self._running = True
        self._stop_event.clear()
        self._thread = threading.Thread(target=self._run_loop, daemon=True)
        self._thread.start()
        
        # Register cleanup on exit
        atexit.register(self.stop)
        
        logger.info("AutoTrainer started - background training enabled")
    
    def stop(self):
        """Stop the background scheduler."""
        if not self._running:
            return
        
        self._running = False
        self._stop_event.set()
        
        if self._thread and self._thread.is_alive():
            self._thread.join(timeout=5)
        
        logger.info("AutoTrainer stopped")
    
    def _run_loop(self):
        """Main background loop - checks for tasks to run."""
        logger.info("AutoTrainer loop started")
        
        while not self._stop_event.is_set():
            try:
                now = datetime.now()
                
                # Check and update prediction results (every 30 min)
                if self._should_run(self._last_results_check, self.RESULTS_CHECK_INTERVAL):
                    self._check_results()
                    self._last_results_check = now
                
                # Ingest completed games (every hour)
                if self._should_run(self._last_ingest, self.INGEST_INTERVAL):
                    self._ingest_games()
                    self._last_ingest = now
                
                # Retrain model only after all daily games are complete
                # NBA games typically end by 1 AM ET, so we retrain at 4 AM ET (safe window)
                # 4 AM ET = 1:30 PM IST
                if self._should_run(self._last_retrain, self.RETRAIN_INTERVAL):
                    if self._all_daily_games_complete():
                        self._retrain_model()
                        self._last_retrain = now
                    else:
                        logger.info("AutoTrainer: Waiting for all games to complete before retrain")
                
            except Exception as e:
                logger.error(f"AutoTrainer error: {e}")
            
            # Sleep for 5 minutes between checks
            self._stop_event.wait(300)
    
    def _all_daily_games_complete(self) -> bool:
        """Check if all of today's games have completed."""
        try:
            from src.live_data_collector import LiveDataCollector
            collector = LiveDataCollector()
            
            # Get live games - if any are still in progress, don't retrain
            live_games = collector.get_live_games()
            if live_games:
                logger.info(f"AutoTrainer: {len(live_games)} games still in progress")
                return False
            
            # Get upcoming games - if any haven't started, don't retrain yet
            upcoming = collector.get_upcoming_games()
            if upcoming:
                logger.info(f"AutoTrainer: {len(upcoming)} games haven't started yet")
                return False
            
            # All games completed (or no games today)
            return True
            
        except Exception as e:
            logger.warning(f"Could not check game status: {e}")
            # Default to checking time - after 4 AM ET (safe window)
            hour = datetime.now().hour
            # 4 AM ET ≈ 1:30 PM IST, 9 AM UTC
            return hour >= 4 or hour < 12  # Between 4 AM and noon
    
    def _should_run(self, last_run: Optional[datetime], interval: int) -> bool:
        """Check if enough time has passed since last run."""
        if last_run is None:
            return True
        return (datetime.now() - last_run).total_seconds() >= interval
    
    def _check_results(self):
        """Check completed games and update prediction results."""
        logger.info("AutoTrainer: Checking prediction results...")
        try:
            from src.prediction_pipeline import PredictionPipeline
            pipeline = PredictionPipeline()
            updated = pipeline.check_prediction_results()
            logger.info(f"AutoTrainer: Updated {len(updated)} prediction results")
        except Exception as e:
            logger.error(f"AutoTrainer: Failed to check results: {e}")
    
    def _ingest_games(self):
        """Ingest completed games into training data."""
        logger.info("AutoTrainer: Ingesting completed games...")
        try:
            from src.continuous_learner import ContinuousLearner
            learner = ContinuousLearner()
            count = learner.ingest_completed_games()
            logger.info(f"AutoTrainer: Ingested {count} new games")
        except Exception as e:
            logger.error(f"AutoTrainer: Failed to ingest games: {e}")
    
    def _retrain_model(self):
        """Full model retrain cycle."""
        logger.info("AutoTrainer: Starting daily model retrain...")
        try:
            from src.continuous_learner import ContinuousLearner
            learner = ContinuousLearner()
            results = learner.run_update_cycle(retrain=True)
            
            if results.get("model_retrained"):
                accuracy = results.get("metrics", {}).get("test_accuracy", 0)
                logger.info(f"AutoTrainer: Model retrained! Accuracy: {accuracy:.2%}")
            else:
                logger.info("AutoTrainer: No new data to retrain on")
                
        except Exception as e:
            logger.error(f"AutoTrainer: Failed to retrain model: {e}")
    
    def get_status(self) -> dict:
        """Get current auto-trainer status."""
        return {
            "running": self._running,
            "last_ingest": self._last_ingest.isoformat() if self._last_ingest else None,
            "last_retrain": self._last_retrain.isoformat() if self._last_retrain else None,
            "last_results_check": self._last_results_check.isoformat() if self._last_results_check else None,
            "next_ingest_in": self._time_until_next(self._last_ingest, self.INGEST_INTERVAL),
            "next_retrain_in": self._time_until_next(self._last_retrain, self.RETRAIN_INTERVAL),
        }
    
    def _time_until_next(self, last_run: Optional[datetime], interval: int) -> str:
        """Human-readable time until next run."""
        if last_run is None:
            return "Soon"
        
        elapsed = (datetime.now() - last_run).total_seconds()
        remaining = max(0, interval - elapsed)
        
        if remaining < 60:
            return f"{int(remaining)}s"
        elif remaining < 3600:
            return f"{int(remaining / 60)}m"
        else:
            return f"{int(remaining / 3600)}h {int((remaining % 3600) / 60)}m"
    
    def force_ingest(self):
        """Force an immediate game ingestion."""
        threading.Thread(target=self._ingest_games, daemon=True).start()
    
    def run_training_cycle(self) -> dict:
        """
        Public method for external training requests (e.g., from server.py).
        Uses ContinuousLearner for the actual training.
        
        Returns:
            Dict with 'success', 'accuracy', 'skipped', and optionally 'error' keys
        """
        try:
            from src.continuous_learner import ContinuousLearner
            
            logger.info("AutoTrainer: Running training cycle via ContinuousLearner...")
            learner = ContinuousLearner()
            result = learner.run_update_cycle(retrain=True)
            
            accuracy = result.get("metrics", {}).get("test_accuracy", 0)
            success = result.get("model_retrained", False)
            games_ingested = result.get("games_ingested", 0)
            
            if success:
                logger.info(f"AutoTrainer: Training cycle complete. Accuracy: {accuracy:.2%}")
                return {
                    "success": True,
                    "skipped": False,
                    "accuracy": accuracy,
                    "games_ingested": games_ingested
                }
            elif games_ingested == 0:
                # No new data - this is NOT an error, just nothing to train on
                logger.info(f"AutoTrainer: No new games available for training")
                return {
                    "success": True,  # Not a failure
                    "skipped": True,  # Just skipped
                    "reason": "No new games to train on",
                    "accuracy": accuracy,
                    "games_ingested": 0
                }
            else:
                # Games ingested but training failed
                logger.warning(f"AutoTrainer: Training failed after ingesting {games_ingested} games")
                return {
                    "success": False,
                    "skipped": False,
                    "error": "Training failed - will retry next cycle",
                    "accuracy": accuracy,
                    "games_ingested": games_ingested
                }
        except Exception as e:
            logger.error(f"AutoTrainer: Training cycle failed: {e}")
            return {"success": False, "skipped": False, "error": str(e), "accuracy": 0}
    
    def force_retrain(self):
        """Force an immediate model retrain."""
        threading.Thread(target=self._retrain_model, daemon=True).start()


# Global instance
auto_trainer = AutoTrainer()


# =============================================================================
# CLI INTERFACE
# =============================================================================
if __name__ == "__main__":
    import argparse
    
    logging.basicConfig(level=logging.INFO)
    
    parser = argparse.ArgumentParser(description="NBA Auto Training Scheduler")
    parser.add_argument("--start", action="store_true", help="Start the scheduler")
    parser.add_argument("--status", action="store_true", help="Show scheduler status")
    
    args = parser.parse_args()
    
    if args.status:
        status = auto_trainer.get_status()
        print("\n=== Auto Trainer Status ===\n")
        print(f"Running: {'Yes ✓' if status['running'] else 'No'}")
        print(f"Last Ingest: {status['last_ingest'] or 'Never'}")
        print(f"Last Retrain: {status['last_retrain'] or 'Never'}")
        print(f"Next Ingest In: {status['next_ingest_in']}")
        print(f"Next Retrain In: {status['next_retrain_in']}")
    
    elif args.start:
        print("\n=== Starting Auto Trainer ===\n")
        print("Background training enabled!")
        print("- Checks prediction results every 30 minutes")
        print("- Ingests completed games every 1 hour")
        print("- Retrains model every 24 hours")
        print("\nPress Ctrl+C to stop...\n")
        
        auto_trainer.start()
        
        try:
            while True:
                time.sleep(60)
                status = auto_trainer.get_status()
                print(f"[{datetime.now().strftime('%H:%M:%S')}] Running... Next ingest: {status['next_ingest_in']}, Next retrain: {status['next_retrain_in']}")
        except KeyboardInterrupt:
            print("\nStopping...")
            auto_trainer.stop()
    
    else:
        print("Use --start to begin auto training or --status to check status")