Spaces:

negi2725
/

LegalLens-API

Runtime error

b24122 commited on Jul 25, 2025

Commit

5d0a351

1 Parent(s): 9ae222c

Improve legal case evaluation with Gemini AI and enhanced RAG system

Refactors GeminiService and RAGService to improve case evaluation using a dual retrieval RAG with LegalBERT predictions and Gemini AI evaluation.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 63975d62-3d3b-48af-8685-b7e915f31f2b
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/a5a12774-3181-414d-89e4-a4da8e3fb1ca/63975d62-3d3b-48af-8685-b7e915f31f2b/i8A93Md

Files changed (4) hide show

app/api/routes.py +7 -14
app/services/gemini_service.py +31 -29
app/services/rag_service.py +112 -55
attached_assets/raggy (3)_1753479511222.py +400 -0

app/api/routes.py CHANGED Viewed

@@ -47,28 +47,21 @@ async def analyze_case(request: CaseAnalysisRequest):
         logger.info(f"Initial verdict: {initial_verdict}, confidence: {confidence}")
-        # Step 2: Retrieve supporting legal documents using RAG
-        if request.useQueryGeneration:
-            support_chunks, search_query = rag_service.retrieveDualSupportChunks(
-                request.caseText, gemini_service
-            )
-        else:
-            support_chunks, logs = rag_service.retrieveSupportChunksParallel(request.caseText)
-            search_query = request.caseText
-        logger.info(f"Retrieved support chunks from {len(support_chunks)} sources")
-        # Step 3: Evaluate with Gemini AI
         evaluation_result = gemini_service.evaluateCaseWithGemini(
             inputText=request.caseText,
             modelVerdict=initial_verdict,
             confidence=confidence,
-            support=support_chunks,
-            searchQuery=search_query
         )
         logger.info(f"Gemini evaluation completed. Final verdict: {evaluation_result.get('finalVerdictByGemini')}")
         return CaseAnalysisResponse(
             initialVerdict=initial_verdict,
             initialConfidence=confidence,

         logger.info(f"Initial verdict: {initial_verdict}, confidence: {confidence}")
+        # Step 2: Evaluate with Gemini AI using RAG
         evaluation_result = gemini_service.evaluateCaseWithGemini(
             inputText=request.caseText,
             modelVerdict=initial_verdict,
             confidence=confidence,
+            retrieveFn=rag_service,
+            geminiQueryModel=gemini_service if request.useQueryGeneration else None
         )
+        logger.info(f"Retrieved support chunks from RAG system")
+        search_query = evaluation_result.get("ragSearchQuery", request.caseText)
         logger.info(f"Gemini evaluation completed. Final verdict: {evaluation_result.get('finalVerdictByGemini')}")
+        support_chunks = evaluation_result.get("support", {})
         return CaseAnalysisResponse(
             initialVerdict=initial_verdict,
             initialConfidence=confidence,

app/services/gemini_service.py CHANGED Viewed

@@ -22,7 +22,7 @@ class GeminiService:
         except Exception as e:
             logger.error(f"Failed to initialize Gemini client: {str(e)}")
-    def generateSearchQueryFromCase(self, caseFacts: str, verbose: bool = False) -> str:
         if not self.client:
             raise ValueError("Gemini client not initialized")
@@ -58,7 +58,7 @@ Return only the search query, no explanation or prefix:
             if response.text:
                 query = response.text.replace("Search Query:", "").strip().strip('"').replace("\n", "")
             else:
-                query = caseFacts[:50]  # Fallback to first 50 chars
             if verbose:
                 logger.info(f"Generated RAG Query: {query}")
@@ -68,18 +68,18 @@ Return only the search query, no explanation or prefix:
             logger.error(f"Error generating search query: {str(e)}")
             raise ValueError(f"Search query generation failed: {str(e)}")
-    def _build_gemini_prompt(self, input_text: str, model_verdict: str, confidence: float,
-                           support: Dict[str, List], query: Optional[str] = None) -> str:
-        verdict_outcome = "a loss for the person" if model_verdict.lower() == "guilty" else "in favor of the person"
         prompt = f"""You are a judge evaluating a legal dispute under Indian law.
 ### Case Facts:
-{input_text}
 ### Initial Model Verdict:
-{model_verdict.upper()} (Confidence: {confidence * 100:.2f}%)
-This verdict is interpreted as {verdict_outcome}.
 """
         if query:
@@ -122,8 +122,8 @@ This verdict is interpreted as {verdict_outcome}.
 2. If relevant past cases appear in the retrieved materials, summarize them and analyze whether they support or contradict the model's verdict.
 3. Using the above, assess the model's prediction:
-   - If confidence is below {settings.confidence_threshold * 100}%, you may revise or retain it.
-   - If confidence is {settings.confidence_threshold * 100}% or higher, retain unless clear legal grounds exist to challenge it.
 4. Provide a thorough and formal legal explanation that:
    - Justifies the final decision using legal logic
@@ -139,31 +139,33 @@ Respond in the tone of a formal Indian judge. Your explanation should reflect re
 """
         return prompt
-    def _extract_final_verdict(self, gemini_output: str) -> tuple[Optional[str], str]:
-        verdict_match = re.search(r"final verdict\s*[:\-]\s*(guilty|not guilty)", gemini_output, re.IGNORECASE)
-        changed_match = re.search(r"verdict changed\s*[:\-]\s*(yes|no)", gemini_output, re.IGNORECASE)
-        final_verdict = verdict_match.group(1).lower() if verdict_match else None
-        verdict_changed = "changed" if changed_match and changed_match.group(1).lower() == "yes" else "not changed"
-        return final_verdict, verdict_changed
-    def evaluateCaseWithGemini(self, inputText: str, modelVerdict: str, confidence: float,
-                                support: Dict[str, List], searchQuery: str) -> Dict[str, Any]:
-        if not self.client:
-            raise ValueError("Gemini client not initialized")
         try:
-            prompt = self._build_gemini_prompt(inputText, modelVerdict, confidence, support, searchQuery)
             response = self.client.models.generate_content(
                 model=settings.gemini_model,
                 contents=prompt
             )
             geminiOutput = response.text if response.text else "No response from Gemini"
-            finalVerdict, verdictChanged = self._extract_final_verdict(geminiOutput)
             logs = {
                 "inputText": inputText,
                 "modelVerdict": modelVerdict,
@@ -175,16 +177,16 @@ Respond in the tone of a formal Indian judge. Your explanation should reflect re
                 "verdictChanged": verdictChanged,
                 "ragSearchQuery": searchQuery
             }
             return logs
         except Exception as e:
-            logger.error(f"Error in Gemini evaluation: {str(e)}")
             return {
                 "error": str(e),
                 "inputText": inputText,
                 "modelVerdict": modelVerdict,
                 "confidence": confidence,
-                "ragSearchQuery": searchQuery,
                 "support": None,
                 "promptToGemini": None,
                 "geminiOutput": None,

         except Exception as e:
             logger.error(f"Failed to initialize Gemini client: {str(e)}")
+    def generateSearchQueryFromCase(self, caseFacts: str, geminiModel=None, verbose: bool = False) -> str:
         if not self.client:
             raise ValueError("Gemini client not initialized")
             if response.text:
                 query = response.text.replace("Search Query:", "").strip().strip('"').replace("\n", "")
             else:
+                query = caseFacts[:50]  # Fallback
             if verbose:
                 logger.info(f"Generated RAG Query: {query}")
             logger.error(f"Error generating search query: {str(e)}")
             raise ValueError(f"Search query generation failed: {str(e)}")
+    def buildGeminiPrompt(self, inputText: str, modelVerdict: str, confidence: float,
+                         support: Dict[str, List], query: Optional[str] = None) -> str:
+        verdictOutcome = "a loss for the person" if modelVerdict.lower() == "guilty" else "in favor of the person"
         prompt = f"""You are a judge evaluating a legal dispute under Indian law.
 ### Case Facts:
+{inputText}
 ### Initial Model Verdict:
+{modelVerdict.upper()} (Confidence: {confidence * 100:.2f}%)
+This verdict is interpreted as {verdictOutcome}.
 """
         if query:
 2. If relevant past cases appear in the retrieved materials, summarize them and analyze whether they support or contradict the model's verdict.
 3. Using the above, assess the model's prediction:
+   - If confidence is below 60%, you may revise or retain it.
+   - If confidence is 60% or higher, retain unless clear legal grounds exist to challenge it.
 4. Provide a thorough and formal legal explanation that:
    - Justifies the final decision using legal logic
 """
         return prompt
+    def extractFinalVerdict(self, geminiOutput: str) -> tuple[Optional[str], str]:
+        verdictMatch = re.search(r"final verdict\s*[:\-]\s*(guilty|not guilty)", geminiOutput, re.IGNORECASE)
+        changedMatch = re.search(r"verdict changed\s*[:\-]\s*(yes|no)", geminiOutput, re.IGNORECASE)
+        finalVerdict = verdictMatch.group(1).lower() if verdictMatch else None
+        verdictChanged = "changed" if changedMatch and changedMatch.group(1).lower() == "yes" else "not changed"
+        return finalVerdict, verdictChanged
+    def evaluateCaseWithGemini(self, inputText: str, modelVerdict: str, confidence: float,
+                              retrieveFn, geminiQueryModel=None):
         try:
+            if geminiQueryModel:
+                support, searchQuery = retrieveFn.retrieveDualSupportChunks(inputText, self)
+            else:
+                support, _ = retrieveFn.retrieveSupportChunksParallel(inputText)
+                searchQuery = inputText
+            prompt = self.buildGeminiPrompt(inputText, modelVerdict, confidence, support, searchQuery)
             response = self.client.models.generate_content(
                 model=settings.gemini_model,
                 contents=prompt
             )
             geminiOutput = response.text if response.text else "No response from Gemini"
+            finalVerdict, verdictChanged = self.extractFinalVerdict(geminiOutput)
             logs = {
                 "inputText": inputText,
                 "modelVerdict": modelVerdict,
                 "verdictChanged": verdictChanged,
                 "ragSearchQuery": searchQuery
             }
             return logs
         except Exception as e:
             return {
                 "error": str(e),
                 "inputText": inputText,
                 "modelVerdict": modelVerdict,
                 "confidence": confidence,
+                "ragSearchQuery": None,
                 "support": None,
                 "promptToGemini": None,
                 "geminiOutput": None,

app/services/rag_service.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Any, Tuple
 from app.core.config import settings
@@ -16,87 +17,143 @@ class RAGService:
     def _initialize_encoder(self):
         try:
-            logger.info(f"Sentence transformer placeholder initialized")
-            # TODO: Initialize actual sentence transformer when dependencies are available
             self.encoder = "placeholder"
         except Exception as e:
-            logger.error(f"Failed to initialize encoder: {str(e)}")
-    def _load_faiss_index_and_chunks(self, indexPath: str, chunkPath: str) -> Tuple[Any, List]:
         try:
             if not os.path.exists(indexPath) or not os.path.exists(chunkPath):
                 logger.warning(f"Missing files: {indexPath} or {chunkPath}")
                 return None, []
-            # TODO: Load actual FAISS index when faiss-cpu is available
             if chunkPath.endswith('.pkl'):
-                logger.info(f"Placeholder for pickle file: {chunkPath}")
-                chunks = []
             else:
-                try:
-                    with open(chunkPath, 'r', encoding='utf-8') as f:
-                        chunks = json.load(f)
-                except:
-                    chunks = []
-            logger.info(f"Loaded index placeholder from {indexPath} with {len(chunks)} chunks")
-            return "placeholder_index", chunks
         except Exception as e:
             logger.error(f"Failed to load index {indexPath}: {str(e)}")
             return None, []
     def _load_indexes(self):
-        indexConfigs = {
-            "constitution": (settings.constitution_index_path, settings.constitution_chunks_path),
-            "ipcSections": (settings.ipc_index_path, settings.ipc_chunks_path),
-            "ipcCase": (settings.ipc_case_index_path, settings.ipc_case_chunks_path),
-            "statutes": (settings.statute_index_path, settings.statute_chunks_path),
-            "qaTexts": (settings.qa_index_path, settings.qa_chunks_path),
-            "caseLaw": (settings.case_law_index_path, settings.case_law_chunks_path)
         }
-        for name, (indexPath, chunkPath) in indexConfigs.items():
-            indexData = self._load_faiss_index_and_chunks(indexPath, chunkPath)
-            if indexData[0] is not None:
-                self.preloadedIndexes[name] = indexData
-                logger.info(f"Successfully loaded {name} index placeholder")
-            else:
-                logger.warning(f"Failed to load {name} index")
     def retrieveSupportChunksParallel(self, inputText: str) -> Tuple[Dict[str, List], Dict]:
-        logger.info("Using placeholder RAG retrieval")
-        logs = {"query": inputText}
-        # Return placeholder support chunks
-        support = {}
-        for name in ["constitution", "ipcSections", "ipcCase", "statutes", "qaTexts", "caseLaw"]:
-            if name in self.preloadedIndexes:
-                _, chunks = self.preloadedIndexes[name]
-                support[name] = chunks[:5] if chunks else []
-            else:
-                support[name] = []
-        logs["supportChunksUsed"] = str(support)
-        return support, logs
-    def retrieveDualSupportChunks(self, inputText: str, geminiService) -> Tuple[Dict[str, List], str]:
         try:
-            # Generate search query using Gemini
-            geminiQuery = None
-            try:
-                geminiQuery = geminiService.generateSearchQueryFromCase(inputText)
-            except Exception as e:
-                logger.warning(f"Failed to generate Gemini query: {str(e)}")
-            # Use placeholder retrieval
-            support, _ = self.retrieveSupportChunksParallel(inputText)
-            return support, geminiQuery or inputText
         except Exception as e:
-            logger.error(f"Error in dual support retrieval: {str(e)}")
-            raise ValueError(f"Dual support retrieval failed: {str(e)}")
     def areIndexesLoaded(self) -> bool:
         return len(self.preloadedIndexes) > 0

 import json
 import os
+import pickle
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Any, Tuple
 from app.core.config import settings
     def _initialize_encoder(self):
         try:
+            from sentence_transformers import SentenceTransformer
+            logger.info(f"Loading sentence transformer: {settings.sentence_transformer_model}")
+            self.encoder = SentenceTransformer(settings.sentence_transformer_model)
+            logger.info("Sentence transformer loaded successfully")
+        except ImportError:
+            logger.warning("sentence-transformers not installed - using placeholder mode")
             self.encoder = "placeholder"
         except Exception as e:
+            logger.error(f"Failed to load sentence transformer: {str(e)}")
+            self.encoder = "placeholder"
+    def loadFaissIndexAndChunks(self, indexPath: str, chunkPath: str) -> Tuple[Any, List]:
         try:
             if not os.path.exists(indexPath) or not os.path.exists(chunkPath):
                 logger.warning(f"Missing files: {indexPath} or {chunkPath}")
                 return None, []
+            try:
+                import faiss
+                index = faiss.read_index(indexPath)
+            except ImportError:
+                logger.warning("faiss-cpu not installed - returning placeholder")
+                return "placeholder_index", []
             if chunkPath.endswith('.pkl'):
+                with open(chunkPath, 'rb') as f:
+                    chunks = pickle.load(f)
             else:
+                with open(chunkPath, 'r', encoding='utf-8') as f:
+                    chunks = json.load(f)
+            logger.info(f"Loaded index from {indexPath} with {len(chunks)} chunks")
+            return index, chunks
         except Exception as e:
             logger.error(f"Failed to load index {indexPath}: {str(e)}")
             return None, []
     def _load_indexes(self):
+        basePath = settings.faiss_indexes_base_path
+        self.preloadedIndexes = {
+            "constitution": self.loadFaissIndexAndChunks(f"{basePath}/constitution_bgeLarge.index", f"{basePath}/constitution_chunks.json"),
+            "ipcSections": self.loadFaissIndexAndChunks(f"{basePath}/ipc_bgeLarge.index", f"{basePath}/ipc_chunks.json"),
+            "ipcCase": self.loadFaissIndexAndChunks(f"{basePath}/ipc_case_flat.index", f"{basePath}/ipc_case_chunks.json"),
+            "statutes": self.loadFaissIndexAndChunks(f"{basePath}/statute_index.faiss", f"{basePath}/statute_chunks.pkl"),
+            "qaTexts": self.loadFaissIndexAndChunks(f"{basePath}/qa_faiss_index.idx", f"{basePath}/qa_text_chunks.json"),
+            "caseLaw": self.loadFaissIndexAndChunks(f"{basePath}/case_faiss.index", f"{basePath}/case_chunks.pkl")
         }
+        # Remove failed loads
+        self.preloadedIndexes = {k: v for k, v in self.preloadedIndexes.items() if v[0] is not None}
+        logger.info(f"Successfully loaded {len(self.preloadedIndexes)} indexes")
+    def search(self, index: Any, chunks: List, queryEmbedding, topK: int) -> List[Tuple[float, Any]]:
+        try:
+            if index == "placeholder_index":
+                return [(0.5, chunk) for chunk in chunks[:topK]]
+            import faiss
+            D, I = index.search(queryEmbedding, topK)
+            results = []
+            for score, idx in zip(D[0], I[0]):
+                if idx < len(chunks):
+                    results.append((score, chunks[idx]))
+            return results
+        except Exception as e:
+            logger.error(f"Search failed: {str(e)}")
+            return []
     def retrieveSupportChunksParallel(self, inputText: str) -> Tuple[Dict[str, List], Dict]:
+        if self.encoder == "placeholder":
+            logger.info("Using placeholder RAG retrieval")
+            logs = {"query": inputText}
+            support = {}
+            for name in ["constitution", "ipcSections", "ipcCase", "statutes", "qaTexts", "caseLaw"]:
+                if name in self.preloadedIndexes:
+                    _, chunks = self.preloadedIndexes[name]
+                    support[name] = chunks[:5] if chunks else []
+                else:
+                    support[name] = []
+            logs["supportChunksUsed"] = support
+            return support, logs
         try:
+            import faiss
+            queryEmbedding = self.encoder.encode([inputText], normalize_embeddings=True).astype('float32')
+            faiss.normalize_L2(queryEmbedding)
+            logs = {"query": inputText}
+            def retrieve(name):
+                if name not in self.preloadedIndexes:
+                    return name, []
+                idx, chunks = self.preloadedIndexes[name]
+                results = self.search(idx, chunks, queryEmbedding, 5)
+                return name, [c[1] for c in results]
+            support = {}
+            with ThreadPoolExecutor(max_workers=6) as executor:
+                futures = [executor.submit(retrieve, name) for name in self.preloadedIndexes.keys()]
+                for f in futures:
+                    name, topChunks = f.result()
+                    support[name] = topChunks
+            logs["supportChunksUsed"] = support
+            return support, logs
         except Exception as e:
+            logger.error(f"Error retrieving support chunks: {str(e)}")
+            raise ValueError(f"Support chunk retrieval failed: {str(e)}")
+    def retrieveDualSupportChunks(self, inputText: str, geminiQueryModel):
+        try:
+            geminiQuery = geminiQueryModel.generateSearchQueryFromCase(inputText, geminiQueryModel)
+        except:
+            geminiQuery = None
+        supportFromCase, _ = self.retrieveSupportChunksParallel(inputText)
+        supportFromQuery, _ = self.retrieveSupportChunksParallel(geminiQuery or inputText)
+        combinedSupport = {}
+        for key in supportFromCase:
+            combined = supportFromCase[key] + supportFromQuery[key]
+            seen = set()
+            unique = []
+            for chunk in combined:
+                if isinstance(chunk, str):
+                    rep = chunk
+                else:
+                    rep = chunk.get("text") or chunk.get("description") or chunk.get("section_desc") or str(chunk)
+                if rep not in seen:
+                    seen.add(rep)
+                    unique.append(chunk)
+                if len(unique) == 10:
+                    break
+            combinedSupport[key] = unique
+        return combinedSupport, geminiQuery
     def areIndexesLoaded(self) -> bool:
         return len(self.preloadedIndexes) > 0

attached_assets/raggy (3)_1753479511222.py ADDED Viewed

	@@ -0,0 +1,400 @@

+# -*- coding: utf-8 -*-
+"""raggy.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1qpREkLNBZPP521tI9IvkNaB3FaLnlH9d
+"""
+from google.colab import drive
+drive.mount('/content/drive')
+!pip install faiss-cpu --quiet
+!pip install faiss-cpu -q
+import zipfile
+import os
+zipPath = "/content/drive/MyDrive/legalbert_epoch4.zip"
+extractPath = "/content/legalbert_model"
+with zipfile.ZipFile(zipPath, 'r') as zipRef:
+    zipRef.extractall(extractPath)
+print("Model unzipped at:", extractPath)
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained("/content/legalbert_model")
+legalBertModel = AutoModelForSequenceClassification.from_pretrained("/content/legalbert_model").to(device)
+print("Model and tokenizer loaded on", device)
+import torch.nn.functional as F
+def predictVerdict(inputText):
+    inputs = tokenizer(inputText, return_tensors="pt", truncation=True, padding=True).to(device)
+    with torch.no_grad():
+        logits = legalBertModel(**inputs).logits
+        probabilities = F.softmax(logits, dim=1)
+        predictedLabel = torch.argmax(probabilities, dim=1).item()
+    return "guilty" if predictedLabel == 1 else "not guilty"
+def getConfidence(inputText):
+    inputs = tokenizer(inputText, return_tensors="pt", truncation=True, padding=True).to(device)
+    with torch.no_grad():
+        logits = legalBertModel(**inputs).logits
+        probabilities = F.softmax(logits, dim=1)
+    return float(torch.max(probabilities).item())
+inputText = "The accused was found in possession of stolen property and failed to provide a valid explanation."
+verdict = predictVerdict(inputText)
+confidence = getConfidence(inputText)
+print("Verdict:", verdict)
+print("Confidence:", confidence)
+!pip install -q google-generativeai
+import google.generativeai as genai
+import os
+apiKey = "AIzaSyB2MlvYuABxIQjs42lZsASp78q7F95NOgc"
+genai.configure(api_key=apiKey)
+model = genai.GenerativeModel("gemini-2.5-flash")
+def retrieveDualSupportChunks(inputText, geminiQueryModel):
+    try:
+        geminiQuery = generateSearchQueryFromCase(inputText, geminiQueryModel)
+    except:
+        geminiQuery = None
+    supportFromCase, _ = retrieveSupportChunksParallel(inputText)
+    supportFromQuery, _ = retrieveSupportChunksParallel(geminiQuery or inputText)
+    combinedSupport = {}
+    for key in supportFromCase:
+        combined = supportFromCase[key] + supportFromQuery[key]
+        seen = set()
+        unique = []
+        for chunk in combined:
+            if isinstance(chunk, str):
+                rep = chunk
+            else:
+                rep = chunk.get("text") or chunk.get("description") or chunk.get("section_desc") or str(chunk)
+            if rep not in seen:
+                seen.add(rep)
+                unique.append(chunk)
+            if len(unique) ==10:
+                break
+        combinedSupport[key] = unique
+    return combinedSupport, geminiQuery
+import json
+path = "/content/drive/MyDrive/faiss_indexes/constitution_bge_chunks.json"
+with open(path, "r", encoding="utf-8") as f:
+    data = json.load(f)
+for i, item in enumerate(data[:5]):
+    print(f"🔹 Chunk {i+1}:\n{item}\n")
+import json
+path="/content/drive/MyDrive/faiss_indexes/constitution_chunks.json"
+with open(path, "r", encoding="utf-8") as f:
+    data = json.load(f)
+for i, item in enumerate(data[:5]):
+    print(f"🔹 Chunk {i+1}:\n{item}\n")
+import faiss
+import numpy as np
+import json
+import pickle
+from sentence_transformers import SentenceTransformer
+encoder = SentenceTransformer('BAAI/bge-large-en-v1.5')
+basePath = "/content/drive/MyDrive/faiss_indexes"
+def loadFaissIndexAndChunks(indexPath, chunkPath):
+  index = faiss.read_index(indexPath)
+  with open(chunkPath, 'rb' if chunkPath.endswith('.pkl') else 'r') as f:
+    chunks = pickle.load(f) if chunkPath.endswith('.pkl') else json.load(f)
+  return index, chunks
+def search(index, chunks, queryEmbedding, topK):
+  D, I = index.search(queryEmbedding, topK)
+  results = []
+  for score, idx in zip(D[0], I[0]):
+    if idx < len(chunks):
+      results.append((score, chunks[idx]))
+  return results
+from concurrent.futures import ThreadPoolExecutor
+def retrieveSupportChunksParallel(inputText):
+  queryEmbedding = encoder.encode([inputText], normalize_embeddings=True).astype('float32')
+  faiss.normalize_L2(queryEmbedding)
+  logs = {"query": inputText}
+  def retrieve(name):
+    idx, chunks = preloadedIndexes[name]
+    results = search(idx, chunks, queryEmbedding, 5)
+    return name, [c[1] for c in results]
+  support = {}
+  with ThreadPoolExecutor(max_workers=6) as executor:
+    futures = [executor.submit(retrieve, name) for name in preloadedIndexes.keys()]
+    for f in futures:
+      name, topChunks = f.result()
+      support[name] = topChunks
+  logs["supportChunksUsed"] = support
+  return support, logs
+preloadedIndexes = {
+  "constitution": loadFaissIndexAndChunks(f"{basePath}/constitution_bgeLarge.index", f"{basePath}/constitution_chunks.json"),
+  "ipcSections": loadFaissIndexAndChunks(f"{basePath}/ipc_bgeLarge.index", f"{basePath}/ipc_chunks.json"),
+  "ipcCase": loadFaissIndexAndChunks(f"{basePath}/ipc_case_flat.index", f"{basePath}/ipc_case_chunks.json"),
+  "statutes": loadFaissIndexAndChunks(f"{basePath}/statute_index.faiss", f"{basePath}/statute_chunks.pkl"),
+  "qaTexts": loadFaissIndexAndChunks(f"{basePath}/qa_faiss_index.idx", f"{basePath}/qa_text_chunks.json"),
+  "caseLaw": loadFaissIndexAndChunks(f"{basePath}/case_faiss.index", f"{basePath}/case_chunks.pkl")
+}
+def generateSearchQueryFromCase(caseFacts, geminiModel, verbose=False):
+    prompt = f"""
+You are a legal assistant for a retrieval system based on Indian criminal law.
+Given the case facts below, generate a **concise and focused search query** with **only the most relevant legal keywords**. These should include:
+- Specific **IPC sections**
+- Core **legal concepts** (e.g., "right of private defence", "criminal breach of trust")
+- **Crime type** (e.g., "assault", "corruption")
+- Any relevant **procedural issue** (e.g., "absence of intent", "lack of evidence")
+Do **not** include:
+- Full sentences
+- Personal names
+- Generic or vague words (e.g., "man", "incident", "case", "situation")
+Keep the query under **20 words**. Separate terms by commas if needed. Optimize for legal document search.
+Case Facts:
+\"\"\"{caseFacts}\"\"\"
+Return only the search query, no explanation or prefix:
+"""
+    response = geminiModel.generate_content(prompt)
+    query = response.text.replace("Search Query:", "").strip().strip('"').replace("\n", "")
+    if verbose:
+        print("RAG Query:", query)
+    return query
+def buildGeminiPrompt(inputText, modelVerdict, confidence, support, query=None):
+  verdictOutcome = "a loss for the person" if modelVerdict.lower() == "guilty" else "in favor of the person"
+  prompt = f"""You are a judge evaluating a legal dispute under Indian law.
+### Case Facts:
+{inputText}
+### Initial Model Verdict:
+{modelVerdict.upper()} (Confidence: {confidence * 100:.2f}%)
+This verdict is interpreted as {verdictOutcome}.
+"""
+  if query:
+    prompt += f"\n### Legal Query Used:\n{query}\n"
+  prompt += "\n---\n\n### Legal References Retrieved:\n\n#### Constitution Articles (Top 5):\n"
+  for i, art in enumerate(support.get("constitution", [])):
+    prompt += f"- {i+1}. {str(art)}\n"
+  prompt += "\n#### IPC Sections (Top 5):\n"
+  for i, sec in enumerate(support.get("ipcSections", [])):
+    prompt += f"- {i+1}. {str(sec)}\n"
+  prompt += "\n#### IPC Case Law (Top 5):\n"
+  for i, case in enumerate(support.get("ipcCase", [])):
+    prompt += f"- {i+1}. {str(case)}\n"
+  prompt += "\n#### Statutes (Top 5):\n"
+  for i, stat in enumerate(support.get("statutes", [])):
+    prompt += f"- {i+1}. {str(stat)}\n"
+  prompt += "\n#### QA Texts (Top 5):\n"
+  for i, qa in enumerate(support.get("qaTexts", [])):
+    prompt += f"- {i+1}. {str(qa)}\n"
+  prompt += "\n#### General Case Law (Top 5):\n"
+  for i, gcase in enumerate(support.get("caseLaw", [])):
+    prompt += f"- {i+1}. {str(gcase)}\n"
+  prompt += f"""
+---
+### Instructions to the Judge (You):
+1. Review the legal materials provided:
+   - Identify which Constitution articles, IPC sections, statutes, and case laws are relevant to the facts.
+   - Also note and explain which retrieved references are **not applicable** or irrelevant.
+2. If relevant past cases appear in the retrieved materials, summarize them and analyze whether they support or contradict the model’s verdict.
+3. Using the above, assess the model's prediction:
+   - If confidence is below 60%, you may revise or retain it.
+   - If confidence is 60% or higher, retain unless clear legal grounds exist to challenge it.
+4. Provide a thorough and formal legal explanation that:
+   - Justifies the final decision using legal logic
+   - Cites relevant IPCs, constitutional provisions, statutes, and precedents
+   - Explains any reasoning for overriding the model's prediction, if applicable
+5. Conclude with the following lines, formatted as shown:
+Final Verdict: Guilty or Not Guilty
+Verdict Changed: Yes or No
+Respond in the tone of a formal Indian judge. Your explanation should reflect reasoning, neutrality, and respect for legal procedure.
+"""
+  return prompt
+import re
+def extractFinalVerdict(geminiOutput):
+  verdictMatch = re.search(r"final verdict\s*[:\-]\s*(guilty|not guilty)", geminiOutput, re.IGNORECASE)
+  changedMatch = re.search(r"verdict changed\s*[:\-]\s*(yes|no)", geminiOutput, re.IGNORECASE)
+  finalVerdict = verdictMatch.group(1).lower() if verdictMatch else None
+  verdictChanged = "changed" if changedMatch and changedMatch.group(1).lower() == "yes" else "not changed"
+  return finalVerdict, verdictChanged
+def evaluateCaseWithGemini(inputText, modelVerdict, confidence, retrieveFn, geminiQueryModel=None):
+  try:
+    if geminiQueryModel:
+      support, searchQuery = retrieveDualSupportChunks(inputText, geminiQueryModel)
+    else:
+      support, _ = retrieveFn(inputText)
+      searchQuery = inputText
+    prompt = buildGeminiPrompt(inputText, modelVerdict, confidence, support, searchQuery)
+    response = model.generate_content(prompt)
+    geminiOutput = response.text
+    finalVerdict, verdictChanged = extractFinalVerdict(geminiOutput)
+    logs = {
+      "inputText": inputText,
+      "modelVerdict": modelVerdict,
+      "confidence": confidence,
+      "support": support,
+      "promptToGemini": prompt,
+      "geminiOutput": geminiOutput,
+      "finalVerdictByGemini": finalVerdict,
+      "verdictChanged": verdictChanged,
+      "ragSearchQuery": searchQuery
+    }
+    return logs
+  except Exception as e:
+    return dict(
+  error=str(e),
+  inputText=inputText,
+  modelVerdict=modelVerdict,
+  confidence=confidence,
+  ragSearchQuery=None,
+  support=None,
+  promptToGemini=None,
+  geminiOutput=None,
+  finalVerdictByGemini=None,
+  verdictChanged=None
+)
+import pandas as pd
+df=pd.read_csv('/content/drive/MyDrive/Extracted/LegalRAGSystem/ILDC/test.csv')
+df['Label'][1971]
+inputText = df['Input'][1971]
+verdict = predictVerdict(inputText)
+confidence = getConfidence(inputText)
+logs = evaluateCaseWithGemini(
+  inputText=inputText,
+  modelVerdict=verdict,
+  confidence=confidence,
+  retrieveFn=retrieveSupportChunksParallel,
+  geminiQueryModel=model
+)
+print("🔍 Query sent to RAG:", logs["ragSearchQuery"])
+print(logs['modelVerdict'])
+print(logs['confidence'])
+# print("\n📜 Prompt to Gemini:\n", logs["promptToGemini"])
+print("\n🧑‍⚖️ Gemini Verdict Output:\n", logs["geminiOutput"])
+print("\n✅ Final Verdict:", logs["finalVerdictByGemini"])
+print("🔁 Verdict Changed:", logs["verdictChanged"])
+# import random
+# sampleIndices = random.sample(range(len(df)), 5)
+# correctCount = 0
+# total = 0
+# for idx in sampleIndices:
+#     inputText = df['Input'][idx]
+#     trueLabel = int(df['Label'][idx])
+#     verdict = predictVerdict(inputText)
+#     confidence = getConfidence(inputText)
+#     result = evaluateCaseWithGemini(
+#         inputText=inputText,
+#         modelVerdict=verdict,
+#         confidence=confidence,
+#         retrieveFn=retrieveSupportChunksParallel,
+#         geminiQueryModel=model
+#     )
+#     predicted = result.get("finalVerdictByGemini")
+#     if predicted is None:
+#         continue
+#     predictedLabel = 1 if predicted.lower() == "guilty" else 0
+#     print("Index:", idx)
+#     print("True Label:", trueLabel)
+#     print("Predicted Verdict:", predicted)
+#     print("Verdict Changed:", result.get("verdictChanged"))
+#     print("Match:", predictedLabel == trueLabel)
+#     print("----")
+#     correctCount += int(predictedLabel == trueLabel)
+#     total += 1
+# print("Samples Evaluated:", total)
+# print("Gemini Final Verdict Accuracy:", correctCount / total if total else 0)