Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import argparse | |
| from backend.animetix.services import AnimetixService | |
| from core.domain.services.long_context_service import LongContextDiscoveryService | |
| def run_evaluation(max_size=32000): | |
| """ | |
| Script d'évaluation de la mémoire longue (Needle In A Haystack). | |
| """ | |
| print("🚀 Starting Long-Context Memory Evaluation (Animetix RULER-lite)...") | |
| animetix = AnimetixService() | |
| long_ctx_service = LongContextDiscoveryService(inference_engine=animetix.inference_adapter) | |
| sizes = [2000, 8000, 16000, 32000] | |
| # Filtrer les tailles qui dépassent la limite du modèle actuel | |
| sizes = [s for s in sizes if s <= max_size] | |
| results = long_ctx_service.benchmark_model_limits(sizes=sizes) | |
| # Calcul des stats | |
| total = len(results) | |
| successes = sum(1 for r in results if r['success']) | |
| avg_latency = sum(r['latency_sec'] for r in results) / total | |
| print("\n" + "="*40) | |
| print(f"📊 FINAL RESULTS (Max Context: {max_size})") | |
| print(f"✅ Accuracy: {(successes/total)*100:.2f}% ({successes}/{total})") | |
| print(f"⏱️ Avg Latency: {avg_latency:.2f}s") | |
| print("="*40) | |
| # Save to data/mlops | |
| output_path = "data/mlops/long_context_benchmark.json" | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| json.dump(results, f, indent=2) | |
| print(f"💾 Report saved to {output_path}") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--max-size", type=int, default=32000) | |
| args = parser.parse_args() | |
| run_evaluation(max_size=args.max_size) | |