Bofandra commited on
Commit
a3f8edb
·
verified ·
1 Parent(s): 0d05eb3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PyPDF2 import PdfReader
3
+ from sentence_transformers import SentenceTransformer
4
+ import faiss
5
+ import torch
6
+ from transformers import pipeline
7
+ import gradio as gr
8
+
9
+ # Load models
10
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
11
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2" # Replace with your preferred HF model
12
+ generator = pipeline("text-generation", model=model_name, device=0 if torch.cuda.is_available() else -1)
13
+
14
+ # Globals
15
+ texts = []
16
+ index = None
17
+
18
+ def process_pdf(file):
19
+ global texts, index
20
+ reader = PdfReader(file.name)
21
+ full_text = ""
22
+ for page in reader.pages:
23
+ full_text += page.extract_text() + "\n"
24
+
25
+ chunks = [full_text[i:i+500] for i in range(0, len(full_text), 500)]
26
+ texts = chunks
27
+
28
+ embeddings = embedder.encode(chunks)
29
+ index = faiss.IndexFlatL2(len(embeddings[0]))
30
+ index.add(embeddings)
31
+
32
+ return "PDF processed. Ask me anything about it!"
33
+
34
+ def chat_fn(message, history):
35
+ if index is None or not texts:
36
+ return "Please upload and process a PDF first."
37
+
38
+ q_embedding = embedder.encode([message])
39
+ D, I = index.search(q_embedding, k=3)
40
+ context = "\n".join([texts[i] for i in I[0]])
41
+
42
+ prompt = f"""You are a helpful assistant. Use the context to answer the question.
43
+
44
+ Context:
45
+ {context}
46
+
47
+ Question:
48
+ {message}
49
+
50
+ Answer:"""
51
+
52
+ output = generator(prompt, max_new_tokens=300, do_sample=True)[0]["generated_text"]
53
+ answer = output.split("Answer:")[-1].strip()
54
+ return answer
55
+
56
+ with gr.Blocks() as demo:
57
+ gr.Markdown("## 🧠 PDF ChatBot - Ask Anything from Your Document")
58
+
59
+ with gr.Row():
60
+ file = gr.File(file_types=[".pdf"], label="Upload PDF")
61
+ status = gr.Textbox(label="Status", interactive=False)
62
+ upload_btn = gr.Button("Process PDF")
63
+
64
+ upload_btn.click(fn=process_pdf, inputs=file, outputs=status)
65
+
66
+ chatbot = gr.ChatInterface(chat_fn)
67
+
68
+ demo.launch()