Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Simple training monitor for Dressify. | |
| Shows real-time training progress and status. | |
| """ | |
| import os | |
| import time | |
| import json | |
| from datetime import datetime | |
| from typing import Dict, Any, Optional | |
| class TrainingMonitor: | |
| """Monitor training progress and status.""" | |
| def __init__(self, export_dir: str = "models/exports"): | |
| self.export_dir = export_dir | |
| self.status_file = os.path.join(export_dir, "training_status.json") | |
| self.start_time = None | |
| def start_training(self, model_name: str, config: Dict[str, Any]): | |
| """Start monitoring a training session.""" | |
| self.start_time = datetime.now() | |
| status = { | |
| "model": model_name, | |
| "status": "training", | |
| "start_time": self.start_time.isoformat(), | |
| "config": config, | |
| "epochs_completed": 0, | |
| "current_loss": None, | |
| "best_loss": float("inf"), | |
| "last_update": datetime.now().isoformat() | |
| } | |
| self._save_status(status) | |
| print(f"π Started monitoring {model_name} training") | |
| def update_progress(self, epoch: int, loss: float, is_best: bool = False): | |
| """Update training progress.""" | |
| if not self.start_time: | |
| return | |
| try: | |
| with open(self.status_file, 'r') as f: | |
| status = json.load(f) | |
| except: | |
| return | |
| status["epochs_completed"] = epoch | |
| status["current_loss"] = loss | |
| status["last_update"] = datetime.now().isoformat() | |
| if is_best: | |
| status["best_loss"] = min(status["best_loss"], loss) | |
| self._save_status(status) | |
| def complete_training(self, final_loss: float, total_epochs: int): | |
| """Mark training as completed.""" | |
| try: | |
| with open(self.status_file, 'r') as f: | |
| status = json.load(f) | |
| except: | |
| return | |
| status["status"] = "completed" | |
| status["epochs_completed"] = total_epochs | |
| status["current_loss"] = final_loss | |
| status["best_loss"] = min(status["best_loss"], final_loss) | |
| status["completion_time"] = datetime.now().isoformat() | |
| status["duration"] = str(datetime.now() - self.start_time) if self.start_time else None | |
| self._save_status(status) | |
| print(f"β Training completed in {status['duration']}") | |
| def fail_training(self, error: str): | |
| """Mark training as failed.""" | |
| try: | |
| with open(self.status_file, 'r') as f: | |
| status = json.load(f) | |
| except: | |
| return | |
| status["status"] = "failed" | |
| status["error"] = error | |
| status["failure_time"] = datetime.now().isoformat() | |
| self._save_status(status) | |
| print(f"β Training failed: {error}") | |
| def get_status(self) -> Optional[Dict[str, Any]]: | |
| """Get current training status.""" | |
| try: | |
| with open(self.status_file, 'r') as f: | |
| return json.load(f) | |
| except: | |
| return None | |
| def _save_status(self, status: Dict[str, Any]): | |
| """Save status to file.""" | |
| os.makedirs(self.export_dir, exist_ok=True) | |
| with open(self.status_file, 'w') as f: | |
| json.dump(status, f, indent=2) | |
| def print_status(self): | |
| """Print current training status.""" | |
| status = self.get_status() | |
| if not status: | |
| print("π No training status available") | |
| return | |
| print(f"\nπ Training Status: {status['model']}") | |
| print(f"Status: {status['status']}") | |
| print(f"Started: {status['start_time']}") | |
| print(f"Epochs: {status['epochs_completed']}") | |
| print(f"Current Loss: {status['current_loss']:.4f}" if status['current_loss'] else "Current Loss: N/A") | |
| print(f"Best Loss: {status['best_loss']:.4f}" if status['best_loss'] != float("inf") else "Best Loss: N/A") | |
| print(f"Last Update: {status['last_update']}") | |
| if status['status'] == 'completed': | |
| print(f"Duration: {status['duration']}") | |
| elif status['status'] == 'failed': | |
| print(f"Error: {status['error']}") | |
| def create_monitor() -> TrainingMonitor: | |
| """Create a training monitor instance.""" | |
| export_dir = os.getenv("EXPORT_DIR", "models/exports") | |
| return TrainingMonitor(export_dir) | |
| if __name__ == "__main__": | |
| # Test the monitor | |
| monitor = create_monitor() | |
| monitor.print_status() | |