Spaces:

Aranwer
/

LegalAssistantChatbot

Sleeping

App Files Files Community

Aranwer commited on Apr 13, 2025

Commit

38b37ec

verified ·

1 Parent(s): b6df14b

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -24

app.py CHANGED Viewed

@@ -5,11 +5,9 @@ import numpy as np
 import ast
 import gradio as gr
 import faiss
 from sentence_transformers import SentenceTransformer
 from transformers import pipeline
-# Unzip the dataset if not already done
 zip_path = "lexglue-legal-nlp-benchmark-dataset.zip"
 extract_dir = "lexglue_data"
@@ -17,61 +15,81 @@ if not os.path.exists(extract_dir):
     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
         zip_ref.extractall(extract_dir)
-# Load CSV from extracted folder
 df = pd.read_csv(os.path.join(extract_dir, "case_hold_test.csv"))
 df = df[['context', 'endings', 'label']]
 df['endings'] = df['endings'].apply(ast.literal_eval)
-# Prepare corpus: concatenate context with each ending
 corpus = []
 for idx, row in df.iterrows():
     context = row['context']
     for ending in row['endings']:
         corpus.append(f"{context.strip()} {ending.strip()}")
-# Load Sentence Transformer and encode the corpus
 embedder = SentenceTransformer('all-MiniLM-L6-v2')
 corpus_embeddings = embedder.encode(corpus, show_progress_bar=True)
-# Create FAISS index
 dimension = corpus_embeddings.shape[1]
 index = faiss.IndexFlatL2(dimension)
 index.add(np.array(corpus_embeddings))
-# Load text generation pipeline
 generator = pipeline("text-generation", model="gpt2")
-# Query Function
 def legal_assistant_query(query):
     query_embedding = embedder.encode([query])
     D, I = index.search(np.array(query_embedding), k=5)
-    # Limit the number of retrieved documents or trim context
     retrieved_docs = [corpus[i] for i in I[0]]
-    # Combine retrieved documents into a single context and ensure it doesn't exceed token limit
-    context_combined = "\n\n".join(retrieved_docs[:3])  # Limit to 3 docs to avoid overflow
-    max_length = 1024  # Set appropriate limit based on GPT-2's token length (around 1024 tokens)
-    # Ensure the context combined does not exceed max length
     context_combined = context_combined[:max_length]
-    # Prepare the prompt for GPT-2
     prompt = f"Given the following legal references, answer the question:\n\n{context_combined}\n\nQuestion: {query}\nAnswer:"
-    # Generate the response
     result = generator(prompt, max_new_tokens=200, do_sample=True)[0]['generated_text']
-    # Extract the answer from the generated text
-    return result.split("Answer:")[-1].strip()
-# Gradio Interface
 iface = gr.Interface(
     fn=legal_assistant_query,
-    inputs=gr.Textbox(lines=2, placeholder="Ask a legal question..."),
-    outputs=gr.Textbox(label="Legal Response"),
     title="🧑‍⚖️ Legal Assistant Chatbot",
-    description="Ask any legal question and get context-based case references using the LexGLUE dataset."
 )
 iface.launch()

 import ast
 import gradio as gr
 import faiss
 from sentence_transformers import SentenceTransformer
 from transformers import pipeline
 zip_path = "lexglue-legal-nlp-benchmark-dataset.zip"
 extract_dir = "lexglue_data"
     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
         zip_ref.extractall(extract_dir)
 df = pd.read_csv(os.path.join(extract_dir, "case_hold_test.csv"))
 df = df[['context', 'endings', 'label']]
 df['endings'] = df['endings'].apply(ast.literal_eval)
 corpus = []
 for idx, row in df.iterrows():
     context = row['context']
     for ending in row['endings']:
         corpus.append(f"{context.strip()} {ending.strip()}")
 embedder = SentenceTransformer('all-MiniLM-L6-v2')
 corpus_embeddings = embedder.encode(corpus, show_progress_bar=True)
 dimension = corpus_embeddings.shape[1]
 index = faiss.IndexFlatL2(dimension)
 index.add(np.array(corpus_embeddings))
 generator = pipeline("text-generation", model="gpt2")
+history = []
+def simplify_legal_text(text):
+    prompt = f"Simplify the following legal text into plain English:\n\n{text}"
+    simplified_text = generator(prompt, max_new_tokens=100, do_sample=False)[0]['generated_text']
+    return simplified_text.strip()
 def legal_assistant_query(query):
     query_embedding = embedder.encode([query])
     D, I = index.search(np.array(query_embedding), k=5)
     retrieved_docs = [corpus[i] for i in I[0]]
+    context_combined = "\n\n".join(retrieved_docs[:3])
+    max_length = 1024
     context_combined = context_combined[:max_length]
     prompt = f"Given the following legal references, answer the question:\n\n{context_combined}\n\nQuestion: {query}\nAnswer:"
     result = generator(prompt, max_new_tokens=200, do_sample=True)[0]['generated_text']
+    answer = result.split("Answer:")[-1].strip()
+    # Simplify the answer if it's complex
+    simplified_answer = simplify_legal_text(answer)
+    # Maintain session history of last 5 questions and answers
+    history.append({"question": query, "answer": simplified_answer})
+    if len(history) > 5:
+        history.pop(0)
+    return simplified_answer
+def show_history():
+    history_text = "\n\n".join([f"Q: {entry['question']}\nA: {entry['answer']}" for entry in history])
+    return history_text if history_text else "No history yet."
+sample_questions = [
+    "Can you explain the constitutional rights of a citizen in simple terms?",
+    "What does a breach of contract mean?",
+    "How do courts determine if someone is guilty of a crime?",
+    "What is the difference between civil and criminal law?",
+    "Can you explain what 'reasonable doubt' is in a criminal trial?"
+]
 iface = gr.Interface(
     fn=legal_assistant_query,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Ask a legal question..."),
+        gr.Button("Show History")
+    ],
+    outputs=[
+        gr.Textbox(label="Legal Response"),
+        gr.Textbox(label="Session History", lines=10),
+        gr.Textbox(label="Sample Questions", value="\n".join(sample_questions), lines=6)
+    ],
     title="🧑‍⚖️ Legal Assistant Chatbot",
+    description="Ask any legal question and get context-based case references using the LexGLUE dataset. The assistant will also simplify legal language into plain English and maintain a session history."
 )
 iface.launch()