Spaces:

Aranwer
/

LegalAssistantChatbot

Sleeping

App Files Files Community

LegalAssistantChatbot / app.py

Aranwer

Update app.py

0e1688f verified 10 months ago

raw

history blame contribute delete

2.64 kB

	import zipfile
	import os
	import pandas as pd
	import numpy as np
	import ast
	import gradio as gr
	import faiss

	from sentence_transformers import SentenceTransformer
	from transformers import pipeline

	"""
	Legal Assistant Chatbot using LexGLUE dataset and GPT-2
	Includes session memory for last 5 Q&A and sample questions for user guidance.
	"""

	zip_path = "lexglue-legal-nlp-benchmark-dataset.zip"
	extract_dir = "lexglue_data"

	if not os.path.exists(extract_dir):
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(extract_dir)

	df = pd.read_csv(os.path.join(extract_dir, "case_hold_test.csv"))
	df = df[['context', 'endings', 'label']]
	df['endings'] = df['endings'].apply(ast.literal_eval)

	corpus = []
	for idx, row in df.iterrows():
	context = row['context']
	for ending in row['endings']:
	corpus.append(f"{context.strip()} {ending.strip()}")

	embedder = SentenceTransformer('all-MiniLM-L6-v2')
	corpus_embeddings = embedder.encode(corpus, show_progress_bar=True)

	dimension = corpus_embeddings.shape[1]
	index = faiss.IndexFlatL2(dimension)
	index.add(np.array(corpus_embeddings))

	generator = pipeline("text-generation", model="gpt2")

	history = []

	def legal_assistant_query(query):
	query_embedding = embedder.encode([query])
	D, I = index.search(np.array(query_embedding), k=5)

	retrieved_docs = [corpus[i] for i in I[0]]
	context_combined = "\n\n".join(retrieved_docs[:3])
	context_combined = context_combined[:1024]

	prompt = f"Given the following legal references, answer the question:\n\n{context_combined}\n\nQuestion: {query}\nAnswer:"
	result = generator(prompt, max_new_tokens=200, do_sample=True)[0]['generated_text']
	answer = result.split("Answer:")[-1].strip()

	history.append((query, answer))
	if len(history) > 5:
	history.pop(0)

	formatted_history = "\n\n".join([f"Q: {q}\nA: {a}" for q, a in history])
	return f"{answer}\n\n---\nRecent Q&A:\n{formatted_history}"

	sample_questions = [
	"What rights does a person have under the Fourth Amendment?",
	"Explain due process in simple terms.",
	"What is double jeopardy?",
	"Can the police search your car without a warrant?",
	"What is considered a fair trial?"
	]

	iface = gr.Interface(
	fn=legal_assistant_query,
	inputs=gr.Textbox(lines=2, placeholder="Ask a legal question...", label="Your Question"),
	outputs=gr.Textbox(label="Legal Response with History"),
	title="🧑‍⚖️ Legal Assistant Chatbot",
	description="Ask any legal question and get context-based case.\n\n💡 Sample Questions:\n- " + "\n- ".join(sample_questions)
	)

	iface.launch()