Spaces:
Sleeping
Sleeping
File size: 4,977 Bytes
8ce50d3 cff393d 8ce50d3 cff393d 98be07f cff393d 8ce50d3 0c13650 8ce50d3 98be07f 8ce50d3 f6a9afe 6cef7c3 98be07f 0c13650 98be07f cff393d 6cef7c3 cff393d 98be07f cff393d 98be07f fd9bded cb23460 0c13650 cb23460 6cef7c3 8ce50d3 0c13650 cff393d 98be07f 0c13650 98be07f 8ce50d3 98be07f 8ce50d3 6cef7c3 cb23460 8ce50d3 cff393d 8ce50d3 0c13650 8ce50d3 cb23460 8ce50d3 6cef7c3 8ce50d3 cff393d 0c13650 cff393d 6cef7c3 cb23460 98be07f 8ce50d3 cff393d 8ce50d3 cff393d 6cef7c3 8ce50d3 98be07f cc90595 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | import gradio as gr
from huggingface_hub import InferenceClient
import os
from src.data_processor import LegalDocProcessor
from src.hybrid_retriever import HybridRetriever
# --- Configuration & Initialization ---
INDEX_DIR = "index_storage"
PARENT_DATA = "data/parent_docs.json"
CHILD_DATA = "data/child_docs.json"
def initialize_retriever():
try:
if os.path.exists(INDEX_DIR):
print("[*] Loading existing index...")
return HybridRetriever(index_dir=INDEX_DIR)
else:
print("[*] Building new index...")
processor = LegalDocProcessor(PARENT_DATA, CHILD_DATA)
docs = processor.load_and_clean()
if not docs:
return None
ret = HybridRetriever(documents=docs, index_dir=INDEX_DIR)
ret.save_index()
return ret
except Exception as e:
print(f"Error initializing retriever: {e}")
return None
# Global retriever instance
retriever = initialize_retriever()
def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
hf_token: gr.OAuthToken | None = None,
):
# 1. RETRIEVAL STEP
context = ""
if retriever:
try:
search_results = retriever.hybrid_search(message, top_k=3)
context = "\n\nRELEVANT NEPALESE LAW CONTEXT:\n"
if not search_results:
context += "No specific legal clauses found for this query."
for res in search_results:
src = res.get('legal_document_source', 'Unknown')
cid = res.get('parent_clause_id', 'N/A')
txt = res.get('parent_clause_text', 'Text not found')
context += f"--- Source: {src} ---\nClause: {cid}\nText: {txt}\n\n"
except Exception as e:
print(f"Retrieval Error: {e}")
context = "\n(Error retrieving specific law context.)"
# 2. PROMPT ENGINEERING
augmented_system_message = (
f"{system_message}\n\n"
"You are a legal assistant specializing in Nepalese Law. "
"Use the legal context provided below to answer. Cite the Source and Clause ID.\n"
f"{context}"
)
# 3. TOKEN SETUP
raw_token = hf_token.token if (hf_token and hasattr(hf_token, 'token')) else os.getenv("HF_TOKEN", "")
token = raw_token.strip() if raw_token else None
if not token:
yield "⚠️ Error: Please sign in with Hugging Face (see sidebar) or set HF_TOKEN secret."
return
client = InferenceClient(token=token, model="meta-llama/Llama-3.1-70B-Instruct")
# 4. UNIVERSAL HISTORY PARSER (Handles both List of Lists and List of Dicts)
messages = [{"role": "system", "content": augmented_system_message}]
for item in history:
if isinstance(item, dict):
# Gradio 5 or Newer Gradio 4 format: {"role": "user", "content": "..."}
messages.append({"role": item["role"], "content": item["content"]})
elif isinstance(item, (list, tuple)):
# Traditional Gradio 4 format: [user_msg, bot_msg]
u, a = item
if u: messages.append({"role": "user", "content": u})
if a: messages.append({"role": "assistant", "content": a})
messages.append({"role": "user", "content": message})
# 5. GENERATION
response = ""
try:
for msg in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token_text = msg.choices[0].delta.content
if token_text:
response += token_text
yield response
except Exception as e:
yield f"AI Error: {str(e)}"
# --- Gradio UI Setup ---
chatbot = gr.ChatInterface(
respond,
# REMOVED type="messages" to fix TypeError
additional_inputs=[
gr.Textbox(value="You are a helpful Nepalese Legal Advisor.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
],
title="Nepal Law Search AI",
description="Ask questions about Nepalese Constitution and Acts.",
examples=[
["What are the punishments for cybercrime?"],
["What does the constitution say about the right to equality?"],
["Is witchcraft accusation a crime in Nepal?"]
],
cache_examples=False,
)
with gr.Blocks() as demo:
with gr.Sidebar():
gr.Markdown("### Authentication")
gr.LoginButton()
gr.Markdown("---")
gr.Markdown("**Status:** Database Ready ✅")
chatbot.render()
if __name__ == "__main__":
# Disable SSR to avoid Python 3.13 asyncio noise
demo.launch(ssr_mode=False, show_error=True) |