Spaces:
Running
Running
| # ruff: noqa: E402 | |
| import os | |
| import json | |
| import logging | |
| from datasets import Dataset | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # Konfiguracja logowania | |
| logging.basicConfig(level=logging.INFO, format="%(name)s - %(levelname)s - %(message)s") | |
| logger = logging.getLogger(__name__) | |
| # Pobranie naszych domyślnych modułów | |
| from core.llm_router import get_llm | |
| from rag_pipeline.hybrid_retriever import get_hybrid_retriever | |
| from rag_pipeline.retriever import generate_answer | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| try: | |
| from ragas import evaluate | |
| from ragas.metrics import ( | |
| context_precision, | |
| faithfulness, | |
| answer_relevancy, | |
| context_recall, | |
| ) | |
| from ragas.llms.prompt import LangchainLLMWrapper | |
| from ragas.embeddings.base import LangchainEmbeddingsWrapper | |
| except ImportError: | |
| logger.error( | |
| "Brak zainstalowanych bibliotek 'ragas' / 'datasets'. Zainstaluj środowisko testowe." | |
| ) | |
| exit(1) | |
| def run_evaluation(): | |
| dataset_path = os.path.join( | |
| os.path.dirname(__file__), "..", "tests", "golden_dataset.json" | |
| ) | |
| if not os.path.exists(dataset_path): | |
| logger.error(f"Nie znaleziono pliku {dataset_path}") | |
| return | |
| with open(dataset_path, "r", encoding="utf-8") as f: | |
| golden_data = json.load(f) | |
| logger.info(f"Loaded {len(golden_data)} pytań z Golden Dataset.") | |
| # Inicjalizacja retrievera i łańcucha RAG do generowania odpowiedzi bieżącego systemu | |
| retriever = get_hybrid_retriever() | |
| if not retriever: | |
| logger.error( | |
| "Nie znaleziono funkcjonującego wektora w systemie. Uruchom najpierw ingest." | |
| ) | |
| return | |
| questions = [] | |
| answers = [] | |
| contexts = [] | |
| ground_truths = [] | |
| logger.info("🛠️ Trwa generowanie odpowiedzi RAG celem poddania ewaluacji...") | |
| for idx, item in enumerate(golden_data): | |
| q = item["question"] | |
| gt = item["ground_truth_answer"] | |
| docs = retriever.invoke(q) | |
| retrieved_texts = [d.page_content for d in docs] | |
| # Generowanie używając naszego agenta | |
| answer = generate_answer(q, docs) | |
| questions.append(q) | |
| answers.append(answer) | |
| contexts.append(retrieved_texts) | |
| # Ragas < 0.2 używa ground_truths, Ragas >= 0.2 używa ground_truth, dodajmy format listowy, standardowy dla starszych | |
| # Jeśli użyjemy Ragas >= 0.2.x, ground_truth powinno być typu string. Dopasujmy do nowszego RAGAS: | |
| ground_truths.append(gt) | |
| data = { | |
| "question": questions, | |
| "answer": answers, | |
| "contexts": contexts, | |
| "ground_truth": ground_truths, | |
| } | |
| dataset = Dataset.from_dict(data) | |
| # Skonfigurowanie RAGAS jako evaluatora używającego Google Gemini z naszego routera (unikanie kosztów OpenAI) | |
| logger.info( | |
| "📊 Rozpoczynam ocenę metryk (RAGAS). Używam LLM od Google via LangChainWrapper..." | |
| ) | |
| eval_llm = get_llm(task_type="critical") | |
| eval_embeddings = GoogleGenerativeAIEmbeddings( | |
| model="text-embedding-004", google_api_key=os.environ.get("GOOGLE_API_KEY") | |
| ) | |
| try: | |
| ragas_eval_llm = LangchainLLMWrapper(eval_llm) | |
| ragas_eval_emb = LangchainEmbeddingsWrapper(eval_embeddings) | |
| result = evaluate( | |
| dataset=dataset, | |
| metrics=[context_precision, faithfulness, answer_relevancy, context_recall], | |
| llm=ragas_eval_llm, | |
| embeddings=ragas_eval_emb, | |
| ) | |
| print("\n\n=== 🏆 WYNIKI EWALUACJI RAGAS ===") | |
| print(result) | |
| import datetime | |
| timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") | |
| # Safe format for json | |
| result_dict = result.to_pandas().to_dict(orient="records") | |
| out_json = f"rag_evaluation_results_{timestamp}.json" | |
| with open(out_json, "w", encoding="utf-8") as f: | |
| json.dump(result_dict, f, ensure_ascii=False, indent=2) | |
| out_csv = f"rag_evaluation_results_{timestamp}.csv" | |
| df = result.to_pandas() | |
| df.to_csv(out_csv, index=False) | |
| print( | |
| f"💡 Analiza szczegółowa została zapisana w lokalizacji: {out_json} oraz {out_csv}" | |
| ) | |
| except Exception as e: | |
| logger.error(f"Nie udało się zakończyć ewaluacji: {e}") | |
| if __name__ == "__main__": | |
| run_evaluation() | |