Spaces:

negi2725
/

LegalLens-API

Sleeping

App Files Files Community

b24122 commited on Jul 25, 2025

Commit

00d8d42

0 Parent(s):

Initial commit

Browse files

Files changed (1) hide show

attached_assets/raggy (3)_1753453411048.py +400 -0

attached_assets/raggy (3)_1753453411048.py ADDED Viewed

	@@ -0,0 +1,400 @@

+# -*- coding: utf-8 -*-
+"""raggy.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1qpREkLNBZPP521tI9IvkNaB3FaLnlH9d
+"""
+from google.colab import drive
+drive.mount('/content/drive')
+!pip install faiss-cpu --quiet
+!pip install faiss-cpu -q
+import zipfile
+import os
+zipPath = "/content/drive/MyDrive/legalbert_epoch4.zip"
+extractPath = "/content/legalbert_model"
+with zipfile.ZipFile(zipPath, 'r') as zipRef:
+    zipRef.extractall(extractPath)
+print("Model unzipped at:", extractPath)
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained("/content/legalbert_model")
+legalBertModel = AutoModelForSequenceClassification.from_pretrained("/content/legalbert_model").to(device)
+print("Model and tokenizer loaded on", device)
+import torch.nn.functional as F
+def predictVerdict(inputText):
+    inputs = tokenizer(inputText, return_tensors="pt", truncation=True, padding=True).to(device)
+    with torch.no_grad():
+        logits = legalBertModel(**inputs).logits
+        probabilities = F.softmax(logits, dim=1)
+        predictedLabel = torch.argmax(probabilities, dim=1).item()
+    return "guilty" if predictedLabel == 1 else "not guilty"
+def getConfidence(inputText):
+    inputs = tokenizer(inputText, return_tensors="pt", truncation=True, padding=True).to(device)
+    with torch.no_grad():
+        logits = legalBertModel(**inputs).logits
+        probabilities = F.softmax(logits, dim=1)
+    return float(torch.max(probabilities).item())
+inputText = "The accused was found in possession of stolen property and failed to provide a valid explanation."
+verdict = predictVerdict(inputText)
+confidence = getConfidence(inputText)
+print("Verdict:", verdict)
+print("Confidence:", confidence)
+!pip install -q google-generativeai
+import google.generativeai as genai
+import os
+apiKey = "AIzaSyB2MlvYuABxIQjs42lZsASp78q7F95NOgc"
+genai.configure(api_key=apiKey)
+model = genai.GenerativeModel("gemini-2.5-flash")
+def retrieveDualSupportChunks(inputText, geminiQueryModel):
+    try:
+        geminiQuery = generateSearchQueryFromCase(inputText, geminiQueryModel)
+    except:
+        geminiQuery = None
+    supportFromCase, _ = retrieveSupportChunksParallel(inputText)
+    supportFromQuery, _ = retrieveSupportChunksParallel(geminiQuery or inputText)
+    combinedSupport = {}
+    for key in supportFromCase:
+        combined = supportFromCase[key] + supportFromQuery[key]
+        seen = set()
+        unique = []
+        for chunk in combined:
+            if isinstance(chunk, str):
+                rep = chunk
+            else:
+                rep = chunk.get("text") or chunk.get("description") or chunk.get("section_desc") or str(chunk)
+            if rep not in seen:
+                seen.add(rep)
+                unique.append(chunk)
+            if len(unique) ==10:
+                break
+        combinedSupport[key] = unique
+    return combinedSupport, geminiQuery
+import json
+path = "/content/drive/MyDrive/faiss_indexes/constitution_bge_chunks.json"
+with open(path, "r", encoding="utf-8") as f:
+    data = json.load(f)
+for i, item in enumerate(data[:5]):
+    print(f"🔹 Chunk {i+1}:\n{item}\n")
+import json
+path="/content/drive/MyDrive/faiss_indexes/constitution_chunks.json"
+with open(path, "r", encoding="utf-8") as f:
+    data = json.load(f)
+for i, item in enumerate(data[:5]):
+    print(f"🔹 Chunk {i+1}:\n{item}\n")
+import faiss
+import numpy as np
+import json
+import pickle
+from sentence_transformers import SentenceTransformer
+encoder = SentenceTransformer('BAAI/bge-large-en-v1.5')
+basePath = "/content/drive/MyDrive/faiss_indexes"
+def loadFaissIndexAndChunks(indexPath, chunkPath):
+  index = faiss.read_index(indexPath)
+  with open(chunkPath, 'rb' if chunkPath.endswith('.pkl') else 'r') as f:
+    chunks = pickle.load(f) if chunkPath.endswith('.pkl') else json.load(f)
+  return index, chunks
+def search(index, chunks, queryEmbedding, topK):
+  D, I = index.search(queryEmbedding, topK)
+  results = []
+  for score, idx in zip(D[0], I[0]):
+    if idx < len(chunks):
+      results.append((score, chunks[idx]))
+  return results
+from concurrent.futures import ThreadPoolExecutor
+def retrieveSupportChunksParallel(inputText):
+  queryEmbedding = encoder.encode([inputText], normalize_embeddings=True).astype('float32')
+  faiss.normalize_L2(queryEmbedding)
+  logs = {"query": inputText}
+  def retrieve(name):
+    idx, chunks = preloadedIndexes[name]
+    results = search(idx, chunks, queryEmbedding, 5)
+    return name, [c[1] for c in results]
+  support = {}
+  with ThreadPoolExecutor(max_workers=6) as executor:
+    futures = [executor.submit(retrieve, name) for name in preloadedIndexes.keys()]
+    for f in futures:
+      name, topChunks = f.result()
+      support[name] = topChunks
+  logs["supportChunksUsed"] = support
+  return support, logs
+preloadedIndexes = {
+  "constitution": loadFaissIndexAndChunks(f"{basePath}/constitution_bgeLarge.index", f"{basePath}/constitution_chunks.json"),
+  "ipcSections": loadFaissIndexAndChunks(f"{basePath}/ipc_bgeLarge.index", f"{basePath}/ipc_chunks.json"),
+  "ipcCase": loadFaissIndexAndChunks(f"{basePath}/ipc_case_flat.index", f"{basePath}/ipc_case_chunks.json"),
+  "statutes": loadFaissIndexAndChunks(f"{basePath}/statute_index.faiss", f"{basePath}/statute_chunks.pkl"),
+  "qaTexts": loadFaissIndexAndChunks(f"{basePath}/qa_faiss_index.idx", f"{basePath}/qa_text_chunks.json"),
+  "caseLaw": loadFaissIndexAndChunks(f"{basePath}/case_faiss.index", f"{basePath}/case_chunks.pkl")
+}
+def generateSearchQueryFromCase(caseFacts, geminiModel, verbose=False):
+    prompt = f"""
+You are a legal assistant for a retrieval system based on Indian criminal law.
+Given the case facts below, generate a **concise and focused search query** with **only the most relevant legal keywords**. These should include:
+- Specific **IPC sections**
+- Core **legal concepts** (e.g., "right of private defence", "criminal breach of trust")
+- **Crime type** (e.g., "assault", "corruption")
+- Any relevant **procedural issue** (e.g., "absence of intent", "lack of evidence")
+Do **not** include:
+- Full sentences
+- Personal names
+- Generic or vague words (e.g., "man", "incident", "case", "situation")
+Keep the query under **20 words**. Separate terms by commas if needed. Optimize for legal document search.
+Case Facts:
+\"\"\"{caseFacts}\"\"\"
+Return only the search query, no explanation or prefix:
+"""
+    response = geminiModel.generate_content(prompt)
+    query = response.text.replace("Search Query:", "").strip().strip('"').replace("\n", "")
+    if verbose:
+        print("RAG Query:", query)
+    return query
+def buildGeminiPrompt(inputText, modelVerdict, confidence, support, query=None):
+  verdictOutcome = "a loss for the person" if modelVerdict.lower() == "guilty" else "in favor of the person"
+  prompt = f"""You are a judge evaluating a legal dispute under Indian law.
+### Case Facts:
+{inputText}
+### Initial Model Verdict:
+{modelVerdict.upper()} (Confidence: {confidence * 100:.2f}%)
+This verdict is interpreted as {verdictOutcome}.
+"""
+  if query:
+    prompt += f"\n### Legal Query Used:\n{query}\n"
+  prompt += "\n---\n\n### Legal References Retrieved:\n\n#### Constitution Articles (Top 5):\n"
+  for i, art in enumerate(support.get("constitution", [])):
+    prompt += f"- {i+1}. {str(art)}\n"
+  prompt += "\n#### IPC Sections (Top 5):\n"
+  for i, sec in enumerate(support.get("ipcSections", [])):
+    prompt += f"- {i+1}. {str(sec)}\n"
+  prompt += "\n#### IPC Case Law (Top 5):\n"
+  for i, case in enumerate(support.get("ipcCase", [])):
+    prompt += f"- {i+1}. {str(case)}\n"
+  prompt += "\n#### Statutes (Top 5):\n"
+  for i, stat in enumerate(support.get("statutes", [])):
+    prompt += f"- {i+1}. {str(stat)}\n"
+  prompt += "\n#### QA Texts (Top 5):\n"
+  for i, qa in enumerate(support.get("qaTexts", [])):
+    prompt += f"- {i+1}. {str(qa)}\n"
+  prompt += "\n#### General Case Law (Top 5):\n"
+  for i, gcase in enumerate(support.get("caseLaw", [])):
+    prompt += f"- {i+1}. {str(gcase)}\n"
+  prompt += f"""
+---
+### Instructions to the Judge (You):
+1. Review the legal materials provided:
+   - Identify which Constitution articles, IPC sections, statutes, and case laws are relevant to the facts.
+   - Also note and explain which retrieved references are **not applicable** or irrelevant.
+2. If relevant past cases appear in the retrieved materials, summarize them and analyze whether they support or contradict the model’s verdict.
+3. Using the above, assess the model's prediction:
+   - If confidence is below 60%, you may revise or retain it.
+   - If confidence is 60% or higher, retain unless clear legal grounds exist to challenge it.
+4. Provide a thorough and formal legal explanation that:
+   - Justifies the final decision using legal logic
+   - Cites relevant IPCs, constitutional provisions, statutes, and precedents
+   - Explains any reasoning for overriding the model's prediction, if applicable
+5. Conclude with the following lines, formatted as shown:
+Final Verdict: Guilty or Not Guilty
+Verdict Changed: Yes or No
+Respond in the tone of a formal Indian judge. Your explanation should reflect reasoning, neutrality, and respect for legal procedure.
+"""
+  return prompt
+import re
+def extractFinalVerdict(geminiOutput):
+  verdictMatch = re.search(r"final verdict\s*[:\-]\s*(guilty|not guilty)", geminiOutput, re.IGNORECASE)
+  changedMatch = re.search(r"verdict changed\s*[:\-]\s*(yes|no)", geminiOutput, re.IGNORECASE)
+  finalVerdict = verdictMatch.group(1).lower() if verdictMatch else None
+  verdictChanged = "changed" if changedMatch and changedMatch.group(1).lower() == "yes" else "not changed"
+  return finalVerdict, verdictChanged
+def evaluateCaseWithGemini(inputText, modelVerdict, confidence, retrieveFn, geminiQueryModel=None):
+  try:
+    if geminiQueryModel:
+      support, searchQuery = retrieveDualSupportChunks(inputText, geminiQueryModel)
+    else:
+      support, _ = retrieveFn(inputText)
+      searchQuery = inputText
+    prompt = buildGeminiPrompt(inputText, modelVerdict, confidence, support, searchQuery)
+    response = model.generate_content(prompt)
+    geminiOutput = response.text
+    finalVerdict, verdictChanged = extractFinalVerdict(geminiOutput)
+    logs = {
+      "inputText": inputText,
+      "modelVerdict": modelVerdict,
+      "confidence": confidence,
+      "support": support,
+      "promptToGemini": prompt,
+      "geminiOutput": geminiOutput,
+      "finalVerdictByGemini": finalVerdict,
+      "verdictChanged": verdictChanged,
+      "ragSearchQuery": searchQuery
+    }
+    return logs
+  except Exception as e:
+    return dict(
+  error=str(e),
+  inputText=inputText,
+  modelVerdict=modelVerdict,
+  confidence=confidence,
+  ragSearchQuery=None,
+  support=None,
+  promptToGemini=None,
+  geminiOutput=None,
+  finalVerdictByGemini=None,
+  verdictChanged=None
+)
+import pandas as pd
+df=pd.read_csv('/content/drive/MyDrive/Extracted/LegalRAGSystem/ILDC/test.csv')
+df['Label'][1971]
+inputText = df['Input'][1971]
+verdict = predictVerdict(inputText)
+confidence = getConfidence(inputText)
+logs = evaluateCaseWithGemini(
+  inputText=inputText,
+  modelVerdict=verdict,
+  confidence=confidence,
+  retrieveFn=retrieveSupportChunksParallel,
+  geminiQueryModel=model
+)
+print("🔍 Query sent to RAG:", logs["ragSearchQuery"])
+print(logs['modelVerdict'])
+print(logs['confidence'])
+# print("\n📜 Prompt to Gemini:\n", logs["promptToGemini"])
+print("\n🧑‍⚖️ Gemini Verdict Output:\n", logs["geminiOutput"])
+print("\n✅ Final Verdict:", logs["finalVerdictByGemini"])
+print("🔁 Verdict Changed:", logs["verdictChanged"])
+# import random
+# sampleIndices = random.sample(range(len(df)), 5)
+# correctCount = 0
+# total = 0
+# for idx in sampleIndices:
+#     inputText = df['Input'][idx]
+#     trueLabel = int(df['Label'][idx])
+#     verdict = predictVerdict(inputText)
+#     confidence = getConfidence(inputText)
+#     result = evaluateCaseWithGemini(
+#         inputText=inputText,
+#         modelVerdict=verdict,
+#         confidence=confidence,
+#         retrieveFn=retrieveSupportChunksParallel,
+#         geminiQueryModel=model
+#     )
+#     predicted = result.get("finalVerdictByGemini")
+#     if predicted is None:
+#         continue
+#     predictedLabel = 1 if predicted.lower() == "guilty" else 0
+#     print("Index:", idx)
+#     print("True Label:", trueLabel)
+#     print("Predicted Verdict:", predicted)
+#     print("Verdict Changed:", result.get("verdictChanged"))
+#     print("Match:", predictedLabel == trueLabel)
+#     print("----")
+#     correctCount += int(predictedLabel == trueLabel)
+#     total += 1
+# print("Samples Evaluated:", total)
+# print("Gemini Final Verdict Accuracy:", correctCount / total if total else 0)