Spaces:

irthayag
/

lexbot

Runtime error

File size: 12,566 Bytes

import gradio as gr
import json
import pickle
import numpy as np
import faiss
import string
import re
import urllib.parse
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from rank_bm25 import BM25Okapi

print("Loading data...")
with open("passages.json", "r") as f:
    passages = json.load(f)
with open("passage_meta.json", "r") as f:
    passage_meta = json.load(f)
with open("bm25.pkl", "rb") as f:
    bm25 = pickle.load(f)
faiss_index = faiss.read_index("faiss.index")
print("Data loaded!")

print("Loading models...")
embedder = SentenceTransformer(
    "sentence-transformers/msmarco-distilbert-base-v3", device="cpu"
)
qa_model = pipeline(
    task="question-answering",
    model="deepset/roberta-base-squad2",
    device=-1
)
print("All models loaded!")

def tokenize(text):
    text = text.lower()
    text = text.translate(str.maketrans("", "", string.punctuation))
    return [t for t in text.split() if len(t) > 2]

def get_indiankanoon_link(filename, meta):
    try:
        parts = filename.replace(".pdf", "").split("__")
        docid = parts[-1]
        if docid.isdigit():
            return f"https://indiankanoon.org/doc/{docid}/"
        pet = meta.get("pet", "")
        res = meta.get("res", "")
        if pet and pet != "Unknown" and res and res != "Unknown":
            query = urllib.parse.quote(f"{pet} vs {res}")
        else:
            query = urllib.parse.quote(filename[:40])
        return f"https://indiankanoon.org/search/?formInput={query}"
    except:
        return "https://indiankanoon.org"

def hybrid_retrieve(query, top_k=5):
    bm25_scores = bm25.get_scores(tokenize(query))
    bm25_max    = bm25_scores.max()
    if bm25_max > 0:
        bm25_scores = bm25_scores / bm25_max
    query_vec = embedder.encode([query]).astype("float32")
    faiss.normalize_L2(query_vec)
    dense_scores_raw, dense_indices = faiss_index.search(
        query_vec, len(passages)
    )
    dense_scores = np.zeros(len(passages))
    for rank, idx in enumerate(dense_indices[0]):
        if idx != -1:
            dense_scores[idx] = dense_scores_raw[0][rank]
    dense_max = dense_scores.max()
    if dense_max > 0:
        dense_scores = dense_scores / dense_max
    combined    = (0.4 * bm25_scores) + (0.6 * dense_scores)
    top_indices = combined.argsort()[::-1][:top_k]
    return [{
        "passage" : passages[idx],
        "score"   : float(combined[idx]),
        "metadata": passage_meta[idx]["metadata"],
        "filename": passage_meta[idx]["filename"],
    } for idx in top_indices]

def extract_answer(question, passages_list):
    all_answers = []
    for p in passages_list:
        try:
            results = qa_model(
                question=question, context=p["passage"],
                max_answer_len=100, top_k=5
            )
            if isinstance(results, dict):
                results = [results]
            for r in results:
                if r["score"] > 0.01:
                    all_answers.append({
                        "answer"  : r["answer"],
                        "score"   : r["score"],
                        "passage" : p["passage"],
                        "metadata": p["metadata"],
                        "filename": p["filename"],
                    })
        except:
            continue
    if not all_answers:
        return {
            "answer"  : "I could not find an answer in the available judgments.",
            "score"   : 0.0,
            "passage" : "",
            "metadata": {},
            "filename": ""
        }
    return max(all_answers, key=lambda x: x["score"])

def format_response(answer_dict):
    ans      = answer_dict["answer"]
    conf     = round(answer_dict["score"] * 100, 1)
    meta     = answer_dict["metadata"]
    filename = answer_dict["filename"]
    passage  = answer_dict["passage"]
    link     = get_indiankanoon_link(filename, meta)

    lines = []
    lines.append(ans)
    lines.append("")

    if meta and any(v not in ["Unknown","nan",""] for v in meta.values()):
        lines.append("---")
        lines.append("**Source case**")
        for key, label in [
            ("case_no","Case"),("pet","Petitioner"),
            ("res","Respondent"),("judgment_date","Date"),
            ("judgment_by","Judge")
        ]:
            v = meta.get(key,"")
            if v and v not in ["Unknown","nan"]:
                lines.append(f"**{label}:** {v}")
    else:
        lines.append(f"**Source:** {filename}")

    lines.append("")
    lines.append(f"[View full judgment on IndianKanoon]({link})")

    if passage:
        lines.append("")
        lines.append("---")
        lines.append("**Relevant excerpt**")
        short = passage[:800]+"..." if len(passage)>800 else passage
        lines.append(f"*{short}*")

    lines.append("")
    lines.append(f"*Confidence: {conf}%*")
    return "\n".join(lines)

def chat(message, history):
    if not message.strip():
        return history, ""
    history = history or []
    history.append((message, "Searching judgments..."))
    yield history, ""
    retrieved = hybrid_retrieve(message, top_k=5)
    answer    = extract_answer(message, retrieved)
    response  = format_response(answer)
    history[-1] = (message, response)
    yield history, ""

CSS = """
@import url(https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap);

* { font-family: Inter, sans-serif !important; box-sizing: border-box; }

body { background: #0f1117 !important; }

.gradio-container {
    max-width: 900px !important;
    margin: 0 auto !important;
    background: #0f1117 !important;
    padding: 0 16px !important;
}

.header-wrap {
    background: linear-gradient(135deg, #1a1f2e 0%, #161b27 100%);
    border: 1px solid #2d3348;
    border-radius: 16px;
    padding: 24px;
    margin: 16px 0;
    text-align: center;
}

.app-title {
    font-size: 28px !important;
    font-weight: 700 !important;
    background: linear-gradient(135deg, #c9a84c, #f0d080, #c9a84c);
    -webkit-background-clip: text !important;
    -webkit-text-fill-color: transparent !important;
    background-clip: text !important;
    margin: 0 0 6px !important;
    letter-spacing: -0.5px;
}

.app-subtitle {
    color: #8892a4 !important;
    font-size: 14px !important;
    margin: 0 0 16px !important;
}

.stats-row {
    display: flex;
    gap: 8px;
    justify-content: center;
    flex-wrap: wrap;
}

.stat-pill {
    background: #1e2436;
    border: 1px solid #2d3348;
    border-radius: 20px;
    padding: 5px 14px;
    font-size: 12px;
    color: #c9a84c;
    font-weight: 500;
}

#chatbot {
    background: #161b27 !important;
    border: 1px solid #2d3348 !important;
    border-radius: 16px !important;
}

#chatbot .message.user {
    background: #1e2d4a !important;
    border: 1px solid #2d4a7a !important;
    border-radius: 16px 16px 4px 16px !important;
    color: #e8eaf0 !important;
    padding: 12px 16px !important;
    max-width: 75% !important;
    margin-left: auto !important;
}

#chatbot .message.bot {
    background: #1a1f2e !important;
    border: 1px solid #2d3348 !important;
    border-radius: 16px 16px 16px 4px !important;
    color: #e8eaf0 !important;
    padding: 12px 16px !important;
    max-width: 88% !important;
}

#chatbot .message.bot a {
    color: #c9a84c !important;
    text-decoration: underline !important;
}

.input-wrap {
    background: #161b27;
    border: 1px solid #2d3348;
    border-radius: 14px;
    padding: 8px 8px 8px 16px;
    display: flex;
    align-items: center;
    gap: 8px;
    margin-top: 10px;
}

#msg-box textarea {
    background: transparent !important;
    border: none !important;
    color: #e8eaf0 !important;
    font-size: 15px !important;
    outline: none !important;
    box-shadow: none !important;
}

#msg-box textarea::placeholder { color: #4a5568 !important; }

#send-btn {
    background: linear-gradient(135deg, #c9a84c, #e8c96a) !important;
    color: #0f1117 !important;
    border: none !important;
    border-radius: 10px !important;
    font-weight: 600 !important;
    font-size: 14px !important;
    padding: 10px 20px !important;
    min-width: 80px !important;
}

#send-btn:hover {
    background: linear-gradient(135deg, #e8c96a, #f0d080) !important;
}

#clear-btn {
    background: #1a1f2e !important;
    border: 1px solid #2d3348 !important;
    border-radius: 10px !important;
    color: #8892a4 !important;
    font-size: 13px !important;
    padding: 8px 16px !important;
}

#clear-btn:hover {
    background: #1e2436 !important;
    color: #c9a84c !important;
}

.example-label {
    color: #8892a4;
    font-size: 12px;
    margin: 12px 0 6px;
    text-transform: uppercase;
    letter-spacing: 0.08em;
    font-weight: 500;
}

.example-btn button {
    background: #1a1f2e !important;
    border: 1px solid #2d3348 !important;
    border-radius: 20px !important;
    font-size: 12px !important;
    color: #c9a84c !important;
    padding: 6px 14px !important;
    font-weight: 500 !important;
    transition: all 0.2s !important;
}

.example-btn button:hover {
    background: #1e2d4a !important;
    border-color: #c9a84c !important;
}

.disclaimer {
    text-align: center;
    font-size: 11px;
    color: #4a5568;
    margin-top: 12px;
    padding-bottom: 16px;
}

footer { display: none !important; }
"""

EXAMPLES = [
    "What is the punishment for murder?",
    "What are the grounds for bail?",
    "What is habeas corpus?",
    "What is the burden of proof?",
    "What is anticipatory bail?",
    "What is contempt of court?",
    "What is res judicata?",
    "What is the right to legal aid?",
]

INITIAL = [(
    None,
    "Namaste! I am **LexBot** — your Indian Supreme Court legal research assistant.\n\n"
    "I am trained on **1000 Indian Supreme Court judgments** and can answer questions "
    "about Indian law, legal principles, and court procedures.\n\n"
    "Each answer includes a **direct link to the full judgment** on IndianKanoon.org.\n\n"
    "Ask me anything about Indian law!"
)]

with gr.Blocks(css=CSS, title="LexBot - Indian Legal Assistant") as demo:

    gr.HTML("""
    <div class="header-wrap">
        <div style="font-size:36px;margin-bottom:8px">⚖️</div>
        <div class="app-title">LexBot</div>
        <div class="app-subtitle">
            Indian Supreme Court Legal Research Assistant
        </div>
        <div class="stats-row">
            <span class="stat-pill">MRR@5: 86.61%</span>
            <span class="stat-pill">F1: 8.73%</span>
            <span class="stat-pill">Response: 96.67%</span>
            <span class="stat-pill">1000 judgments</span>
            <span class="stat-pill">RoBERTa + FAISS</span>
        </div>
    </div>
    """)

    chatbot = gr.Chatbot(
        value=INITIAL,
        elem_id="chatbot",
        height=500,
        show_label=False,
        bubble_full_width=False,
        show_copy_button=True,
    )

    with gr.Row(elem_classes="input-wrap"):
        msg_input = gr.Textbox(
            placeholder="Ask a legal question e.g. What is habeas corpus?",
            show_label=False,
            scale=9,
            container=False,
            lines=1,
            elem_id="msg-box",
        )
        send_btn = gr.Button(
            "Send",
            elem_id="send-btn",
            scale=1,
            min_width=80
        )

    with gr.Row():
        clear_btn = gr.Button(
            "Clear conversation",
            elem_id="clear-btn",
            scale=1
        )

    gr.HTML("<div class=\'example-label\'>Try these questions</div>")

    with gr.Row():
        for q in EXAMPLES[:4]:
            gr.Button(
                q, elem_classes="example-btn", size="sm"
            ).click(fn=lambda x=q: x, outputs=msg_input)

    with gr.Row():
        for q in EXAMPLES[4:]:
            gr.Button(
                q, elem_classes="example-btn", size="sm"
            ).click(fn=lambda x=q: x, outputs=msg_input)

    gr.HTML("""
    <div class="disclaimer">
        Answers extracted directly from Supreme Court judgment texts.
        Click IndianKanoon links to read full judgments.
        Always verify legal information with a qualified advocate.
    </div>
    """)

    msg_input.submit(
        fn=chat, inputs=[msg_input, chatbot], outputs=[chatbot, msg_input]
    )
    send_btn.click(
        fn=chat, inputs=[msg_input, chatbot], outputs=[chatbot, msg_input]
    )
    clear_btn.click(
        fn=lambda: (INITIAL, ""), outputs=[chatbot, msg_input]
    )

demo.launch(server_name="0.0.0.0", server_port=7860)