|
|
| import gradio as gr |
| import json |
| import pickle |
| import numpy as np |
| import faiss |
| import string |
| import re |
| import urllib.parse |
| from sentence_transformers import SentenceTransformer |
| from transformers import pipeline |
| from rank_bm25 import BM25Okapi |
|
|
| print("Loading data...") |
| with open("passages.json", "r") as f: |
| passages = json.load(f) |
| with open("passage_meta.json", "r") as f: |
| passage_meta = json.load(f) |
| with open("bm25.pkl", "rb") as f: |
| bm25 = pickle.load(f) |
| faiss_index = faiss.read_index("faiss.index") |
| print("Data loaded!") |
|
|
| print("Loading models...") |
| embedder = SentenceTransformer( |
| "sentence-transformers/msmarco-distilbert-base-v3", device="cpu" |
| ) |
| qa_model = pipeline( |
| task="question-answering", |
| model="deepset/roberta-base-squad2", |
| device=-1 |
| ) |
| print("All models loaded!") |
|
|
| def tokenize(text): |
| text = text.lower() |
| text = text.translate(str.maketrans("", "", string.punctuation)) |
| return [t for t in text.split() if len(t) > 2] |
|
|
| def get_indiankanoon_link(filename, meta): |
| try: |
| parts = filename.replace(".pdf", "").split("__") |
| docid = parts[-1] |
| if docid.isdigit(): |
| return f"https://indiankanoon.org/doc/{docid}/" |
| pet = meta.get("pet", "") |
| res = meta.get("res", "") |
| if pet and pet != "Unknown" and res and res != "Unknown": |
| query = urllib.parse.quote(f"{pet} vs {res}") |
| else: |
| query = urllib.parse.quote(filename[:40]) |
| return f"https://indiankanoon.org/search/?formInput={query}" |
| except: |
| return "https://indiankanoon.org" |
|
|
| def hybrid_retrieve(query, top_k=5): |
| bm25_scores = bm25.get_scores(tokenize(query)) |
| bm25_max = bm25_scores.max() |
| if bm25_max > 0: |
| bm25_scores = bm25_scores / bm25_max |
| query_vec = embedder.encode([query]).astype("float32") |
| faiss.normalize_L2(query_vec) |
| dense_scores_raw, dense_indices = faiss_index.search( |
| query_vec, len(passages) |
| ) |
| dense_scores = np.zeros(len(passages)) |
| for rank, idx in enumerate(dense_indices[0]): |
| if idx != -1: |
| dense_scores[idx] = dense_scores_raw[0][rank] |
| dense_max = dense_scores.max() |
| if dense_max > 0: |
| dense_scores = dense_scores / dense_max |
| combined = (0.4 * bm25_scores) + (0.6 * dense_scores) |
| top_indices = combined.argsort()[::-1][:top_k] |
| return [{ |
| "passage" : passages[idx], |
| "score" : float(combined[idx]), |
| "metadata": passage_meta[idx]["metadata"], |
| "filename": passage_meta[idx]["filename"], |
| } for idx in top_indices] |
|
|
| def extract_answer(question, passages_list): |
| all_answers = [] |
| for p in passages_list: |
| try: |
| results = qa_model( |
| question=question, context=p["passage"], |
| max_answer_len=100, top_k=5 |
| ) |
| if isinstance(results, dict): |
| results = [results] |
| for r in results: |
| if r["score"] > 0.01: |
| all_answers.append({ |
| "answer" : r["answer"], |
| "score" : r["score"], |
| "passage" : p["passage"], |
| "metadata": p["metadata"], |
| "filename": p["filename"], |
| }) |
| except: |
| continue |
| if not all_answers: |
| return { |
| "answer" : "I could not find an answer in the available judgments.", |
| "score" : 0.0, |
| "passage" : "", |
| "metadata": {}, |
| "filename": "" |
| } |
| return max(all_answers, key=lambda x: x["score"]) |
|
|
| def format_response(answer_dict): |
| ans = answer_dict["answer"] |
| conf = round(answer_dict["score"] * 100, 1) |
| meta = answer_dict["metadata"] |
| filename = answer_dict["filename"] |
| passage = answer_dict["passage"] |
| link = get_indiankanoon_link(filename, meta) |
|
|
| lines = [] |
| lines.append(ans) |
| lines.append("") |
|
|
| if meta and any(v not in ["Unknown","nan",""] for v in meta.values()): |
| lines.append("---") |
| lines.append("**Source case**") |
| for key, label in [ |
| ("case_no","Case"),("pet","Petitioner"), |
| ("res","Respondent"),("judgment_date","Date"), |
| ("judgment_by","Judge") |
| ]: |
| v = meta.get(key,"") |
| if v and v not in ["Unknown","nan"]: |
| lines.append(f"**{label}:** {v}") |
| else: |
| lines.append(f"**Source:** {filename}") |
|
|
| lines.append("") |
| lines.append(f"[View full judgment on IndianKanoon]({link})") |
|
|
| if passage: |
| lines.append("") |
| lines.append("---") |
| lines.append("**Relevant excerpt**") |
| short = passage[:800]+"..." if len(passage)>800 else passage |
| lines.append(f"*{short}*") |
|
|
| lines.append("") |
| lines.append(f"*Confidence: {conf}%*") |
| return "\n".join(lines) |
|
|
| def chat(message, history): |
| if not message.strip(): |
| return history, "" |
| history = history or [] |
| history.append((message, "Searching judgments...")) |
| yield history, "" |
| retrieved = hybrid_retrieve(message, top_k=5) |
| answer = extract_answer(message, retrieved) |
| response = format_response(answer) |
| history[-1] = (message, response) |
| yield history, "" |
|
|
| CSS = """ |
| @import url(https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap); |
| |
| * { font-family: Inter, sans-serif !important; box-sizing: border-box; } |
| |
| body { background: #0f1117 !important; } |
| |
| .gradio-container { |
| max-width: 900px !important; |
| margin: 0 auto !important; |
| background: #0f1117 !important; |
| padding: 0 16px !important; |
| } |
| |
| .header-wrap { |
| background: linear-gradient(135deg, #1a1f2e 0%, #161b27 100%); |
| border: 1px solid #2d3348; |
| border-radius: 16px; |
| padding: 24px; |
| margin: 16px 0; |
| text-align: center; |
| } |
| |
| .app-title { |
| font-size: 28px !important; |
| font-weight: 700 !important; |
| background: linear-gradient(135deg, #c9a84c, #f0d080, #c9a84c); |
| -webkit-background-clip: text !important; |
| -webkit-text-fill-color: transparent !important; |
| background-clip: text !important; |
| margin: 0 0 6px !important; |
| letter-spacing: -0.5px; |
| } |
| |
| .app-subtitle { |
| color: #8892a4 !important; |
| font-size: 14px !important; |
| margin: 0 0 16px !important; |
| } |
| |
| .stats-row { |
| display: flex; |
| gap: 8px; |
| justify-content: center; |
| flex-wrap: wrap; |
| } |
| |
| .stat-pill { |
| background: #1e2436; |
| border: 1px solid #2d3348; |
| border-radius: 20px; |
| padding: 5px 14px; |
| font-size: 12px; |
| color: #c9a84c; |
| font-weight: 500; |
| } |
| |
| #chatbot { |
| background: #161b27 !important; |
| border: 1px solid #2d3348 !important; |
| border-radius: 16px !important; |
| } |
| |
| #chatbot .message.user { |
| background: #1e2d4a !important; |
| border: 1px solid #2d4a7a !important; |
| border-radius: 16px 16px 4px 16px !important; |
| color: #e8eaf0 !important; |
| padding: 12px 16px !important; |
| max-width: 75% !important; |
| margin-left: auto !important; |
| } |
| |
| #chatbot .message.bot { |
| background: #1a1f2e !important; |
| border: 1px solid #2d3348 !important; |
| border-radius: 16px 16px 16px 4px !important; |
| color: #e8eaf0 !important; |
| padding: 12px 16px !important; |
| max-width: 88% !important; |
| } |
| |
| #chatbot .message.bot a { |
| color: #c9a84c !important; |
| text-decoration: underline !important; |
| } |
| |
| .input-wrap { |
| background: #161b27; |
| border: 1px solid #2d3348; |
| border-radius: 14px; |
| padding: 8px 8px 8px 16px; |
| display: flex; |
| align-items: center; |
| gap: 8px; |
| margin-top: 10px; |
| } |
| |
| #msg-box textarea { |
| background: transparent !important; |
| border: none !important; |
| color: #e8eaf0 !important; |
| font-size: 15px !important; |
| outline: none !important; |
| box-shadow: none !important; |
| } |
| |
| #msg-box textarea::placeholder { color: #4a5568 !important; } |
| |
| #send-btn { |
| background: linear-gradient(135deg, #c9a84c, #e8c96a) !important; |
| color: #0f1117 !important; |
| border: none !important; |
| border-radius: 10px !important; |
| font-weight: 600 !important; |
| font-size: 14px !important; |
| padding: 10px 20px !important; |
| min-width: 80px !important; |
| } |
| |
| #send-btn:hover { |
| background: linear-gradient(135deg, #e8c96a, #f0d080) !important; |
| } |
| |
| #clear-btn { |
| background: #1a1f2e !important; |
| border: 1px solid #2d3348 !important; |
| border-radius: 10px !important; |
| color: #8892a4 !important; |
| font-size: 13px !important; |
| padding: 8px 16px !important; |
| } |
| |
| #clear-btn:hover { |
| background: #1e2436 !important; |
| color: #c9a84c !important; |
| } |
| |
| .example-label { |
| color: #8892a4; |
| font-size: 12px; |
| margin: 12px 0 6px; |
| text-transform: uppercase; |
| letter-spacing: 0.08em; |
| font-weight: 500; |
| } |
| |
| .example-btn button { |
| background: #1a1f2e !important; |
| border: 1px solid #2d3348 !important; |
| border-radius: 20px !important; |
| font-size: 12px !important; |
| color: #c9a84c !important; |
| padding: 6px 14px !important; |
| font-weight: 500 !important; |
| transition: all 0.2s !important; |
| } |
| |
| .example-btn button:hover { |
| background: #1e2d4a !important; |
| border-color: #c9a84c !important; |
| } |
| |
| .disclaimer { |
| text-align: center; |
| font-size: 11px; |
| color: #4a5568; |
| margin-top: 12px; |
| padding-bottom: 16px; |
| } |
| |
| footer { display: none !important; } |
| """ |
|
|
| EXAMPLES = [ |
| "What is the punishment for murder?", |
| "What are the grounds for bail?", |
| "What is habeas corpus?", |
| "What is the burden of proof?", |
| "What is anticipatory bail?", |
| "What is contempt of court?", |
| "What is res judicata?", |
| "What is the right to legal aid?", |
| ] |
|
|
| INITIAL = [( |
| None, |
| "Namaste! I am **LexBot** — your Indian Supreme Court legal research assistant.\n\n" |
| "I am trained on **1000 Indian Supreme Court judgments** and can answer questions " |
| "about Indian law, legal principles, and court procedures.\n\n" |
| "Each answer includes a **direct link to the full judgment** on IndianKanoon.org.\n\n" |
| "Ask me anything about Indian law!" |
| )] |
|
|
| with gr.Blocks(css=CSS, title="LexBot - Indian Legal Assistant") as demo: |
|
|
| gr.HTML(""" |
| <div class="header-wrap"> |
| <div style="font-size:36px;margin-bottom:8px">⚖️</div> |
| <div class="app-title">LexBot</div> |
| <div class="app-subtitle"> |
| Indian Supreme Court Legal Research Assistant |
| </div> |
| <div class="stats-row"> |
| <span class="stat-pill">MRR@5: 86.61%</span> |
| <span class="stat-pill">F1: 8.73%</span> |
| <span class="stat-pill">Response: 96.67%</span> |
| <span class="stat-pill">1000 judgments</span> |
| <span class="stat-pill">RoBERTa + FAISS</span> |
| </div> |
| </div> |
| """) |
|
|
| chatbot = gr.Chatbot( |
| value=INITIAL, |
| elem_id="chatbot", |
| height=500, |
| show_label=False, |
| bubble_full_width=False, |
| show_copy_button=True, |
| ) |
|
|
| with gr.Row(elem_classes="input-wrap"): |
| msg_input = gr.Textbox( |
| placeholder="Ask a legal question e.g. What is habeas corpus?", |
| show_label=False, |
| scale=9, |
| container=False, |
| lines=1, |
| elem_id="msg-box", |
| ) |
| send_btn = gr.Button( |
| "Send", |
| elem_id="send-btn", |
| scale=1, |
| min_width=80 |
| ) |
|
|
| with gr.Row(): |
| clear_btn = gr.Button( |
| "Clear conversation", |
| elem_id="clear-btn", |
| scale=1 |
| ) |
|
|
| gr.HTML("<div class=\'example-label\'>Try these questions</div>") |
|
|
| with gr.Row(): |
| for q in EXAMPLES[:4]: |
| gr.Button( |
| q, elem_classes="example-btn", size="sm" |
| ).click(fn=lambda x=q: x, outputs=msg_input) |
|
|
| with gr.Row(): |
| for q in EXAMPLES[4:]: |
| gr.Button( |
| q, elem_classes="example-btn", size="sm" |
| ).click(fn=lambda x=q: x, outputs=msg_input) |
|
|
| gr.HTML(""" |
| <div class="disclaimer"> |
| Answers extracted directly from Supreme Court judgment texts. |
| Click IndianKanoon links to read full judgments. |
| Always verify legal information with a qualified advocate. |
| </div> |
| """) |
|
|
| msg_input.submit( |
| fn=chat, inputs=[msg_input, chatbot], outputs=[chatbot, msg_input] |
| ) |
| send_btn.click( |
| fn=chat, inputs=[msg_input, chatbot], outputs=[chatbot, msg_input] |
| ) |
| clear_btn.click( |
| fn=lambda: (INITIAL, ""), outputs=[chatbot, msg_input] |
| ) |
|
|
| demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|