Spaces:
Sleeping
Sleeping
| import zipfile | |
| import os | |
| import pandas as pd | |
| import numpy as np | |
| import ast | |
| import gradio as gr | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import pipeline | |
| """ | |
| Legal Assistant Chatbot using LexGLUE dataset and GPT-2 | |
| Includes session memory for last 5 Q&A and sample questions for user guidance. | |
| """ | |
| zip_path = "lexglue-legal-nlp-benchmark-dataset.zip" | |
| extract_dir = "lexglue_data" | |
| if not os.path.exists(extract_dir): | |
| with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
| zip_ref.extractall(extract_dir) | |
| df = pd.read_csv(os.path.join(extract_dir, "case_hold_test.csv")) | |
| df = df[['context', 'endings', 'label']] | |
| df['endings'] = df['endings'].apply(ast.literal_eval) | |
| corpus = [] | |
| for idx, row in df.iterrows(): | |
| context = row['context'] | |
| for ending in row['endings']: | |
| corpus.append(f"{context.strip()} {ending.strip()}") | |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| corpus_embeddings = embedder.encode(corpus, show_progress_bar=True) | |
| dimension = corpus_embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(np.array(corpus_embeddings)) | |
| generator = pipeline("text-generation", model="gpt2") | |
| history = [] | |
| def legal_assistant_query(query): | |
| query_embedding = embedder.encode([query]) | |
| D, I = index.search(np.array(query_embedding), k=5) | |
| retrieved_docs = [corpus[i] for i in I[0]] | |
| context_combined = "\n\n".join(retrieved_docs[:3]) | |
| context_combined = context_combined[:1024] | |
| prompt = f"Given the following legal references, answer the question:\n\n{context_combined}\n\nQuestion: {query}\nAnswer:" | |
| result = generator(prompt, max_new_tokens=200, do_sample=True)[0]['generated_text'] | |
| answer = result.split("Answer:")[-1].strip() | |
| history.append((query, answer)) | |
| if len(history) > 5: | |
| history.pop(0) | |
| formatted_history = "\n\n".join([f"Q: {q}\nA: {a}" for q, a in history]) | |
| return f"{answer}\n\n---\nRecent Q&A:\n{formatted_history}" | |
| sample_questions = [ | |
| "What rights does a person have under the Fourth Amendment?", | |
| "Explain due process in simple terms.", | |
| "What is double jeopardy?", | |
| "Can the police search your car without a warrant?", | |
| "What is considered a fair trial?" | |
| ] | |
| iface = gr.Interface( | |
| fn=legal_assistant_query, | |
| inputs=gr.Textbox(lines=2, placeholder="Ask a legal question...", label="Your Question"), | |
| outputs=gr.Textbox(label="Legal Response with History"), | |
| title="🧑⚖️ Legal Assistant Chatbot", | |
| description="Ask any legal question and get context-based case.\n\n💡 Sample Questions:\n- " + "\n- ".join(sample_questions) | |
| ) | |
| iface.launch() |