Spaces:

Namantiwariix
/

Legal

Runtime error

App Files Files Community

Namantiwariix commited on Apr 4, 2025

Commit

91ff18e

verified ·

1 Parent(s): 33edda9

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -70

app.py CHANGED Viewed

@@ -1,75 +1,28 @@
-import faiss
-import numpy as np
-import gradio as gr
 import torch
-import pymupdf
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-from sentence_transformers import SentenceTransformer
-# Step 1: Load the Sentence Transformer model to embed legal documents
-embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2')  # Lightweight for embedding
-# Step 2: Load the InLegalBERT for QA
-qa_model = AutoModelForSequenceClassification.from_pretrained("law-ai/InLegalBERT")
-qa_tokenizer = AutoTokenizer.from_pretrained("law-ai/InLegalBERT")
-qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=qa_tokenizer)
-# Step 3: Load and process the PDF documents
-def extract_text_from_pdf(pdf_path):
-    doc = pymupdf.open(pdf_path)
-    text = ""
-    for page_num in range(len(doc)):
-        page = doc.load_page(page_num)
-        text += page.get_text("text")
-    return text
-# Step 4: Build FAISS index
-def build_faiss_index(documents):
-    # Create embeddings for documents
-    embeddings = embedder.encode(documents, convert_to_numpy=True)
-    index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance index
-    index.add(embeddings)
-    return index
-# Step 5: Function to retrieve the most relevant document based on the query
-def retrieve_relevant_document(query, documents, faiss_index):
-    query_embedding = embedder.encode([query], convert_to_numpy=True)
-    distances, indices = faiss_index.search(query_embedding, k=1)  # Search for the most similar document
-    relevant_doc = documents[indices[0][0]]
-    return relevant_doc
-# Step 6: QA function using retrieved context
-def legal_chat(query, context):
-    result = qa_pipeline(question=query, context=context)
-    return result['answer']
-# Step 7: Gradio interface setup
-def run_legal_chat(query, pdf_path):
-    # Extract text from PDF
-    document_text = extract_text_from_pdf(pdf_path)
-    documents = [document_text]  # You can split this into smaller chunks for better search performance
-    # Build the FAISS index for document search
-    faiss_index = build_faiss_index(documents)
-    # Retrieve the most relevant document
-    relevant_doc = retrieve_relevant_document(query, documents, faiss_index)
-    # Get answer using QA pipeline
-    answer = legal_chat(query, relevant_doc)
-    return answer
-# Gradio UI
-interface = gr.Interface(
-    fn=run_legal_chat,
-    inputs=[
-        gr.Textbox(label="Ask your legal question"),
-        gr.File(label="Upload PDF with Legal Text")
-    ],
-    outputs="text",
-    title="Legal Advice Chatbot",
-    description="Ask questions related to Indian law and get answers based on the provided legal document."
 )
-if __name__ == "__main__":
-    interface.launch()

 import torch
+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Load Gemma 27B Model
+model_name = "gemma-ai/gemma-27b"  # Replace with correct model name from Hugging Face
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name, torch_dtype=torch.float16, device_map="auto"
+)
+# Function to generate response
+def generate_response(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    output = model.generate(**inputs, max_length=200)
+    return tokenizer.decode(output[0], skip_special_tokens=True)
+# Gradio Interface
+iface = gr.Interface(
+    fn=generate_response,
+    inputs=gr.Textbox(label="Enter your prompt"),
+    outputs=gr.Textbox(label="Gemma 27B Response"),
+    title="Gemma 27B Chatbot",
+    description="Ask Gemma anything!"
 )
+# Launch the app
+iface.launch()