Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import argparse | |
| from backend.animetix.services import AnimetixService | |
| from core.domain.services.ragas_eval_service import RagasEvalService | |
| def run_mlops_eval(): | |
| """ | |
| Lance une évaluation automatique de la qualité de l'IA sur un échantillon de requêtes. | |
| Retourne un dictionnaire de stats pour le pipeline MLOps. | |
| """ | |
| print("🧪 Starting Automated RAG Evaluation (RAGAS)...") | |
| animetix = AnimetixService() | |
| judge = animetix.inference_adapter | |
| eval_service = RagasEvalService(judge_engine=judge) | |
| # Échantillon de test (Golden Dataset simplifié) | |
| test_queries = [ | |
| {"q": "Qui est le créateur de One Piece ?", "type": "Anime", "ctx": "Eiichiro Oda"}, | |
| {"q": "Quelle est l'intrigue de Akira ?", "type": "Movie", "ctx": "Katsuhiro Otomo, néo-tokyo"} | |
| ] | |
| all_scores = [] | |
| for item in test_queries: | |
| print(f"🧐 Evaluating query: '{item['q']}'") | |
| # 1. Génération de la réponse | |
| response = animetix.agentic_rag.plan_and_solve(item['q'], item['type']) | |
| # 2. Évaluation | |
| scores = eval_service.evaluate_response(item['q'], item['ctx'], response) | |
| all_scores.append({"query": item['q'], "scores": scores}) | |
| # Stats Finales | |
| avg_faith = sum(s['scores']['faithfulness'] for s in all_scores) / len(all_scores) | |
| avg_relevancy = sum(s['scores']['answer_relevancy'] for s in all_scores) / len(all_scores) | |
| report = { | |
| "avg_faithfulness": avg_faith, | |
| "avg_answer_relevancy": avg_relevancy, | |
| "timestamp": str(datetime.now()) | |
| } | |
| print("\n" + "═"*30) | |
| print(f"📊 MLOPS REPORT") | |
| print(f"✅ Avg Faithfulness: {avg_faith:.2f}") | |
| print(f"🎯 Avg Relevancy: {avg_relevancy:.2f}") | |
| print("═"*30) | |
| return report | |
| if __name__ == "__main__": | |
| from datetime import datetime | |
| run_mlops_eval() | |