SyedWaqad commited on
Commit
03b2846
·
verified ·
1 Parent(s): dfce74d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pypdf import PdfReader
3
+ from sentence_transformers import SentenceTransformer
4
+ import faiss
5
+ import numpy as np
6
+ from transformers import pipeline
7
+
8
+ # Load embedding model
9
+ embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
10
+
11
+ # Load QA model
12
+ qa_model = pipeline("text-generation", model="gpt2")
13
+
14
+ # Temporary in-memory storage
15
+ documents = []
16
+ vectors = None
17
+ index = None
18
+
19
+ def read_pdfs(pdf_files):
20
+ global documents, vectors, index
21
+
22
+ all_text = ""
23
+ documents = []
24
+
25
+ for pdf in pdf_files:
26
+ reader = PdfReader(pdf.name)
27
+ text = ""
28
+ for page in reader.pages:
29
+ text += page.extract_text() + "\n"
30
+ documents.append(text)
31
+ all_text += text + "\n"
32
+
33
+ # Split text into chunks
34
+ chunks = all_text.split("\n")
35
+
36
+ # Embed chunks
37
+ embeddings = embed_model.encode(chunks)
38
+ vectors = np.array(embeddings).astype("float32")
39
+
40
+ # Create FAISS Index
41
+ index = faiss.IndexFlatL2(vectors.shape[1])
42
+ index.add(vectors)
43
+
44
+ return "Documents uploaded and processed. You may now ask questions."
45
+
46
+ def ask_question(query):
47
+ global vectors, index, documents
48
+
49
+ if index is None:
50
+ return "Please upload PDF documents first."
51
+
52
+ # Embed query
53
+ q_embed = embed_model.encode([query]).astype("float32")
54
+
55
+ # Search similar chunks
56
+ D, I = index.search(q_embed, k=3)
57
+
58
+ # Collect top matches
59
+ context = ""
60
+ for idx in I[0]:
61
+ context += documents[0][idx: idx + 500] + "\n"
62
+
63
+ # Generate answer
64
+ prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
65
+ answer = qa_model(prompt, max_length=120)[0]["generated_text"]
66
+
67
+ return answer
68
+
69
+ # Gradio UI
70
+ with gr.Blocks() as demo:
71
+ gr.Markdown("## PDF Chatbot")
72
+ pdf_input = gr.File(label="Upload multiple PDFs", file_count="multiple")
73
+ upload_btn = gr.Button("Process Documents")
74
+ status = gr.Textbox(label="Status")
75
+
76
+ question = gr.Textbox(label="Ask a question")
77
+ answer = gr.Textbox(label="Answer")
78
+
79
+ upload_btn.click(read_pdfs, inputs=pdf_input, outputs=status)
80
+ question.submit(ask_question, inputs=question, outputs=answer)
81
+
82
+ demo.launch()