import gradio as gr import json import pickle import numpy as np import faiss import string import re import urllib.parse from sentence_transformers import SentenceTransformer from transformers import pipeline from rank_bm25 import BM25Okapi print("Loading data...") with open("passages.json", "r") as f: passages = json.load(f) with open("passage_meta.json", "r") as f: passage_meta = json.load(f) with open("bm25.pkl", "rb") as f: bm25 = pickle.load(f) faiss_index = faiss.read_index("faiss.index") print("Data loaded!") print("Loading models...") embedder = SentenceTransformer( "sentence-transformers/msmarco-distilbert-base-v3", device="cpu" ) qa_model = pipeline( task="question-answering", model="deepset/roberta-base-squad2", device=-1 ) print("All models loaded!") def tokenize(text): text = text.lower() text = text.translate(str.maketrans("", "", string.punctuation)) return [t for t in text.split() if len(t) > 2] def get_indiankanoon_link(filename, meta): try: parts = filename.replace(".pdf", "").split("__") docid = parts[-1] if docid.isdigit(): return f"https://indiankanoon.org/doc/{docid}/" pet = meta.get("pet", "") res = meta.get("res", "") if pet and pet != "Unknown" and res and res != "Unknown": query = urllib.parse.quote(f"{pet} vs {res}") else: query = urllib.parse.quote(filename[:40]) return f"https://indiankanoon.org/search/?formInput={query}" except: return "https://indiankanoon.org" def hybrid_retrieve(query, top_k=5): bm25_scores = bm25.get_scores(tokenize(query)) bm25_max = bm25_scores.max() if bm25_max > 0: bm25_scores = bm25_scores / bm25_max query_vec = embedder.encode([query]).astype("float32") faiss.normalize_L2(query_vec) dense_scores_raw, dense_indices = faiss_index.search( query_vec, len(passages) ) dense_scores = np.zeros(len(passages)) for rank, idx in enumerate(dense_indices[0]): if idx != -1: dense_scores[idx] = dense_scores_raw[0][rank] dense_max = dense_scores.max() if dense_max > 0: dense_scores = dense_scores / dense_max combined = (0.4 * bm25_scores) + (0.6 * dense_scores) top_indices = combined.argsort()[::-1][:top_k] return [{ "passage" : passages[idx], "score" : float(combined[idx]), "metadata": passage_meta[idx]["metadata"], "filename": passage_meta[idx]["filename"], } for idx in top_indices] def extract_answer(question, passages_list): all_answers = [] for p in passages_list: try: results = qa_model( question=question, context=p["passage"], max_answer_len=100, top_k=5 ) if isinstance(results, dict): results = [results] for r in results: if r["score"] > 0.01: all_answers.append({ "answer" : r["answer"], "score" : r["score"], "passage" : p["passage"], "metadata": p["metadata"], "filename": p["filename"], }) except: continue if not all_answers: return { "answer" : "I could not find an answer in the available judgments.", "score" : 0.0, "passage" : "", "metadata": {}, "filename": "" } return max(all_answers, key=lambda x: x["score"]) def format_response(answer_dict): ans = answer_dict["answer"] conf = round(answer_dict["score"] * 100, 1) meta = answer_dict["metadata"] filename = answer_dict["filename"] passage = answer_dict["passage"] link = get_indiankanoon_link(filename, meta) lines = [] lines.append(ans) lines.append("") if meta and any(v not in ["Unknown","nan",""] for v in meta.values()): lines.append("---") lines.append("**Source case**") for key, label in [ ("case_no","Case"),("pet","Petitioner"), ("res","Respondent"),("judgment_date","Date"), ("judgment_by","Judge") ]: v = meta.get(key,"") if v and v not in ["Unknown","nan"]: lines.append(f"**{label}:** {v}") else: lines.append(f"**Source:** {filename}") lines.append("") lines.append(f"[View full judgment on IndianKanoon]({link})") if passage: lines.append("") lines.append("---") lines.append("**Relevant excerpt**") short = passage[:800]+"..." if len(passage)>800 else passage lines.append(f"*{short}*") lines.append("") lines.append(f"*Confidence: {conf}%*") return "\n".join(lines) def chat(message, history): if not message.strip(): return history, "" history = history or [] history.append((message, "Searching judgments...")) yield history, "" retrieved = hybrid_retrieve(message, top_k=5) answer = extract_answer(message, retrieved) response = format_response(answer) history[-1] = (message, response) yield history, "" CSS = """ @import url(https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap); * { font-family: Inter, sans-serif !important; box-sizing: border-box; } body { background: #0f1117 !important; } .gradio-container { max-width: 900px !important; margin: 0 auto !important; background: #0f1117 !important; padding: 0 16px !important; } .header-wrap { background: linear-gradient(135deg, #1a1f2e 0%, #161b27 100%); border: 1px solid #2d3348; border-radius: 16px; padding: 24px; margin: 16px 0; text-align: center; } .app-title { font-size: 28px !important; font-weight: 700 !important; background: linear-gradient(135deg, #c9a84c, #f0d080, #c9a84c); -webkit-background-clip: text !important; -webkit-text-fill-color: transparent !important; background-clip: text !important; margin: 0 0 6px !important; letter-spacing: -0.5px; } .app-subtitle { color: #8892a4 !important; font-size: 14px !important; margin: 0 0 16px !important; } .stats-row { display: flex; gap: 8px; justify-content: center; flex-wrap: wrap; } .stat-pill { background: #1e2436; border: 1px solid #2d3348; border-radius: 20px; padding: 5px 14px; font-size: 12px; color: #c9a84c; font-weight: 500; } #chatbot { background: #161b27 !important; border: 1px solid #2d3348 !important; border-radius: 16px !important; } #chatbot .message.user { background: #1e2d4a !important; border: 1px solid #2d4a7a !important; border-radius: 16px 16px 4px 16px !important; color: #e8eaf0 !important; padding: 12px 16px !important; max-width: 75% !important; margin-left: auto !important; } #chatbot .message.bot { background: #1a1f2e !important; border: 1px solid #2d3348 !important; border-radius: 16px 16px 16px 4px !important; color: #e8eaf0 !important; padding: 12px 16px !important; max-width: 88% !important; } #chatbot .message.bot a { color: #c9a84c !important; text-decoration: underline !important; } .input-wrap { background: #161b27; border: 1px solid #2d3348; border-radius: 14px; padding: 8px 8px 8px 16px; display: flex; align-items: center; gap: 8px; margin-top: 10px; } #msg-box textarea { background: transparent !important; border: none !important; color: #e8eaf0 !important; font-size: 15px !important; outline: none !important; box-shadow: none !important; } #msg-box textarea::placeholder { color: #4a5568 !important; } #send-btn { background: linear-gradient(135deg, #c9a84c, #e8c96a) !important; color: #0f1117 !important; border: none !important; border-radius: 10px !important; font-weight: 600 !important; font-size: 14px !important; padding: 10px 20px !important; min-width: 80px !important; } #send-btn:hover { background: linear-gradient(135deg, #e8c96a, #f0d080) !important; } #clear-btn { background: #1a1f2e !important; border: 1px solid #2d3348 !important; border-radius: 10px !important; color: #8892a4 !important; font-size: 13px !important; padding: 8px 16px !important; } #clear-btn:hover { background: #1e2436 !important; color: #c9a84c !important; } .example-label { color: #8892a4; font-size: 12px; margin: 12px 0 6px; text-transform: uppercase; letter-spacing: 0.08em; font-weight: 500; } .example-btn button { background: #1a1f2e !important; border: 1px solid #2d3348 !important; border-radius: 20px !important; font-size: 12px !important; color: #c9a84c !important; padding: 6px 14px !important; font-weight: 500 !important; transition: all 0.2s !important; } .example-btn button:hover { background: #1e2d4a !important; border-color: #c9a84c !important; } .disclaimer { text-align: center; font-size: 11px; color: #4a5568; margin-top: 12px; padding-bottom: 16px; } footer { display: none !important; } """ EXAMPLES = [ "What is the punishment for murder?", "What are the grounds for bail?", "What is habeas corpus?", "What is the burden of proof?", "What is anticipatory bail?", "What is contempt of court?", "What is res judicata?", "What is the right to legal aid?", ] INITIAL = [( None, "Namaste! I am **LexBot** — your Indian Supreme Court legal research assistant.\n\n" "I am trained on **1000 Indian Supreme Court judgments** and can answer questions " "about Indian law, legal principles, and court procedures.\n\n" "Each answer includes a **direct link to the full judgment** on IndianKanoon.org.\n\n" "Ask me anything about Indian law!" )] with gr.Blocks(css=CSS, title="LexBot - Indian Legal Assistant") as demo: gr.HTML("""