File size: 8,400 Bytes
bd93e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import os
import asyncio
import uuid
from dotenv import load_dotenv
from datasets import Dataset
import pandas as pd
from typing import Sequence, Any, List

# Ragas and LangChain components
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)
from ragas.testset import TestsetGenerator
# NOTE: The 'evolutions' import has been completely removed.

# Your specific RAG components from app.py
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.storage import InMemoryStore
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever, ContextualCompressionRetriever
from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
from langchain_core.documents import Document
from sentence_transformers.cross_encoder import CrossEncoder
from rag_processor import create_rag_chain
from langchain_community.chat_message_histories import ChatMessageHistory
import fitz

# Load environment variables
load_dotenv()

# --- Re-implementing LocalReranker from app.py ---
class LocalReranker(BaseDocumentCompressor):
    model: Any
    top_n: int = 3
    class Config:
        arbitrary_types_allowed = True
    def compress_documents(self, documents: Sequence[Document], query: str, callbacks=None) -> Sequence[Document]:
        if not documents: return []
        pairs = [[query, doc.page_content] for doc in documents]
        scores = self.model.predict(pairs, show_progress_bar=False)
        doc_scores = list(zip(documents, scores))
        sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1], reverse=True)
        top_docs = []
        for doc, score in sorted_doc_scores[:self.top_n]:
            doc.metadata['rerank_score'] = float(score)
            top_docs.append(doc)
        return top_docs

# --- Helper Functions ---
def load_pdf_with_fallback(filepath):
    """Load PDF using PyMuPDF"""
    try:
        docs = []
        with fitz.open(filepath) as pdf_doc:
            for page_num, page in enumerate(pdf_doc):
                text = page.get_text()
                if text.strip():
                    docs.append(Document(
                        page_content=text,
                        metadata={"source": os.path.basename(filepath), "page": page_num + 1}
                    ))
        if docs:
            print(f"βœ“ Successfully loaded PDF: {filepath}")
            return docs
        else:
            raise ValueError("No text content found in PDF.")
    except Exception as e:
        print(f"βœ— PyMuPDF failed for {filepath}: {e}")
        raise

async def main():
    """Main execution function"""
    print("\n" + "="*60 + "\nSTARTING RAGAS EVALUATION\n" + "="*60)
    
    pdf_path = "uploads/Unit_-_1_Introduction.pdf"
    if not os.path.exists(pdf_path):
        print(f"βœ— Error: PDF not found at {pdf_path}")
        return

    try:
        # --- 1. Setup Models ---
        print("\n--- 1. Initializing Models ---")
        groq_api_key = os.getenv("GROQ_API_KEY")
        if not groq_api_key or groq_api_key == "your_groq_api_key_here":
            raise ValueError("GROQ_API_KEY not found or is a placeholder.")

        generator_llm = ChatGroq(model="llama-3.1-8b-instant", api_key=groq_api_key)
        critic_llm = ChatGroq(model="llama-3.1-70b-versatile", api_key=groq_api_key)
        embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        reranker_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device='cpu')
        print("βœ“ Models initialized.")

        # --- 2. Setup RAG Pipeline ---
        print("\n--- 2. Setting up RAG Pipeline ---")
        documents = load_pdf_with_fallback(pdf_path)
        
        # Split documents
        parent_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=400)
        child_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
        parent_docs = parent_splitter.split_documents(documents)
        doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
        
        child_docs = []
        for i, doc in enumerate(parent_docs):
            _id = doc_ids[i]
            sub_docs = child_splitter.split_documents([doc])
            for child in sub_docs:
                child.metadata["doc_id"] = _id
            child_docs.extend(sub_docs)

        store = InMemoryStore()
        store.mset(list(zip(doc_ids, parent_docs)))
        vectorstore = FAISS.from_documents(child_docs, embedding_model)
        
        bm25_retriever = BM25Retriever.from_documents(child_docs, k=10)
        faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
        ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.4, 0.6])
        
        reranker = LocalReranker(model=reranker_model, top_n=5)
        compression_retriever = ContextualCompressionRetriever(base_compressor=reranker, base_retriever=ensemble_retriever)
        
        def get_parents(docs: List[Document]) -> List[Document]:
            parent_ids = {d.metadata["doc_id"] for d in docs}
            return store.mget(list(parent_ids))

        final_retriever = compression_retriever | get_parents
        
        message_histories = {}
        def get_session_history(session_id: str):
            if session_id not in message_histories:
                message_histories[session_id] = ChatMessageHistory()
            return message_histories[session_id]

        rag_chain = create_rag_chain(final_retriever, get_session_history)
        print("βœ“ RAG chain created successfully.")
        
        # --- 3. Generate Testset ---
        print("\n--- 3. Generating Test Questions ---")
        generator = TestsetGenerator.from_langchain(generator_llm, critic_llm, embedding_model)
        
        # Generate a simple test set without complex distributions
        testset = generator.generate_with_langchain_docs(documents, testset_size=5)
        print("βœ“ Testset generated.")
        
        # --- 4. Run RAG Chain on Testset ---
        print("\n--- 4. Running RAG Chain to Generate Answers ---")
        test_questions = [item['question'] for item in testset.to_pandas().to_dict('records')]
        ground_truths = [item['ground_truth'] for item in testset.to_pandas().to_dict('records')]
        
        answers = []
        contexts = []
        
        for i, question in enumerate(test_questions):
            print(f"  Processing question {i+1}/{len(test_questions)}...")
            # Retrieve contexts
            retrieved_docs = final_retriever.invoke(question)
            contexts.append([doc.page_content for doc in retrieved_docs])
            # Get answer from chain
            config = {"configurable": {"session_id": str(uuid.uuid4())}}
            answer = await rag_chain.ainvoke({"question": question}, config=config)
            answers.append(answer)

        # --- 5. Evaluate with Ragas ---
        print("\n--- 5. Evaluating Results with Ragas ---")
        eval_data = {
            'question': test_questions,
            'answer': answers,
            'contexts': contexts,
            'ground_truth': ground_truths
        }
        eval_dataset = Dataset.from_dict(eval_data)
        
        result = evaluate(
            eval_dataset,
            metrics=[faithfulness, answer_relevancy, context_precision, context_recall],
            llm=critic_llm,
            embeddings=embedding_model
        )
        
        print("\n" + "="*60 + "\nEVALUATION RESULTS\n" + "="*60)
        print(result)
        
        # --- 6. Save Results ---
        print("\n--- 6. Saving Results ---")
        results_df = result.to_pandas()
        results_df.to_csv("evaluation_results.csv", index=False)
        print("βœ“ Evaluation results saved to evaluation_results.csv")
        
        print("\n" + "="*60 + "\nEVALUATION COMPLETE!\n" + "="*60)
        
    except Exception as e:
        print(f"\nβœ— An error occurred during the process: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    asyncio.run(main())