Namantiwariix commited on
Commit
91ff18e
·
verified ·
1 Parent(s): 33edda9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -70
app.py CHANGED
@@ -1,75 +1,28 @@
1
- import faiss
2
- import numpy as np
3
- import gradio as gr
4
  import torch
5
- import pymupdf
6
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
7
- from sentence_transformers import SentenceTransformer
8
-
9
- # Step 1: Load the Sentence Transformer model to embed legal documents
10
- embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2') # Lightweight for embedding
11
-
12
- # Step 2: Load the InLegalBERT for QA
13
- qa_model = AutoModelForSequenceClassification.from_pretrained("law-ai/InLegalBERT")
14
- qa_tokenizer = AutoTokenizer.from_pretrained("law-ai/InLegalBERT")
15
- qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=qa_tokenizer)
16
-
17
- # Step 3: Load and process the PDF documents
18
- def extract_text_from_pdf(pdf_path):
19
- doc = pymupdf.open(pdf_path)
20
- text = ""
21
- for page_num in range(len(doc)):
22
- page = doc.load_page(page_num)
23
- text += page.get_text("text")
24
- return text
25
-
26
- # Step 4: Build FAISS index
27
- def build_faiss_index(documents):
28
- # Create embeddings for documents
29
- embeddings = embedder.encode(documents, convert_to_numpy=True)
30
- index = faiss.IndexFlatL2(embeddings.shape[1]) # L2 distance index
31
- index.add(embeddings)
32
- return index
33
-
34
- # Step 5: Function to retrieve the most relevant document based on the query
35
- def retrieve_relevant_document(query, documents, faiss_index):
36
- query_embedding = embedder.encode([query], convert_to_numpy=True)
37
- distances, indices = faiss_index.search(query_embedding, k=1) # Search for the most similar document
38
- relevant_doc = documents[indices[0][0]]
39
- return relevant_doc
40
-
41
- # Step 6: QA function using retrieved context
42
- def legal_chat(query, context):
43
- result = qa_pipeline(question=query, context=context)
44
- return result['answer']
45
-
46
- # Step 7: Gradio interface setup
47
- def run_legal_chat(query, pdf_path):
48
- # Extract text from PDF
49
- document_text = extract_text_from_pdf(pdf_path)
50
- documents = [document_text] # You can split this into smaller chunks for better search performance
51
-
52
- # Build the FAISS index for document search
53
- faiss_index = build_faiss_index(documents)
54
 
55
- # Retrieve the most relevant document
56
- relevant_doc = retrieve_relevant_document(query, documents, faiss_index)
57
-
58
- # Get answer using QA pipeline
59
- answer = legal_chat(query, relevant_doc)
60
- return answer
61
 
62
- # Gradio UI
63
- interface = gr.Interface(
64
- fn=run_legal_chat,
65
- inputs=[
66
- gr.Textbox(label="Ask your legal question"),
67
- gr.File(label="Upload PDF with Legal Text")
68
- ],
69
- outputs="text",
70
- title="Legal Advice Chatbot",
71
- description="Ask questions related to Indian law and get answers based on the provided legal document."
 
 
 
72
  )
73
 
74
- if __name__ == "__main__":
75
- interface.launch()
 
 
 
 
1
  import torch
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Load Gemma 27B Model
6
+ model_name = "gemma-ai/gemma-27b" # Replace with correct model name from Hugging Face
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_name, torch_dtype=torch.float16, device_map="auto"
10
+ )
11
 
12
+ # Function to generate response
13
+ def generate_response(prompt):
14
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
15
+ output = model.generate(**inputs, max_length=200)
16
+ return tokenizer.decode(output[0], skip_special_tokens=True)
17
+
18
+ # Gradio Interface
19
+ iface = gr.Interface(
20
+ fn=generate_response,
21
+ inputs=gr.Textbox(label="Enter your prompt"),
22
+ outputs=gr.Textbox(label="Gemma 27B Response"),
23
+ title="Gemma 27B Chatbot",
24
+ description="Ask Gemma anything!"
25
  )
26
 
27
+ # Launch the app
28
+ iface.launch()