charantejapolavarapu commited on
Commit
a5578e4
·
verified ·
1 Parent(s): 3e0b822

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.llms import HuggingFacePipeline
8
+
9
+ from transformers import pipeline
10
+ import os
11
+
12
+ # Global variables
13
+ vectorstore = None
14
+ qa_chain = None
15
+
16
+ # Load embedding model once
17
+ embeddings = HuggingFaceEmbeddings(
18
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
19
+ )
20
+
21
+ # Load LLM once
22
+ pipe = pipeline(
23
+ "text2text-generation",
24
+ model="google/flan-t5-base",
25
+ max_length=512
26
+ )
27
+
28
+ llm = HuggingFacePipeline(pipeline=pipe)
29
+
30
+ def process_pdf(pdf_file):
31
+ global vectorstore, qa_chain
32
+
33
+ if pdf_file is None:
34
+ return "Please upload a PDF first."
35
+
36
+ # Load PDF
37
+ loader = PyPDFLoader(pdf_file.name)
38
+ documents = loader.load()
39
+
40
+ # Split text
41
+ splitter = RecursiveCharacterTextSplitter(
42
+ chunk_size=1000,
43
+ chunk_overlap=200
44
+ )
45
+ texts = splitter.split_documents(documents)
46
+
47
+ # Create vector DB
48
+ vectorstore = FAISS.from_documents(texts, embeddings)
49
+
50
+ # Create QA chain
51
+ qa_chain = RetrievalQA.from_chain_type(
52
+ llm=llm,
53
+ retriever=vectorstore.as_retriever(),
54
+ return_source_documents=True
55
+ )
56
+
57
+ return "PDF processed successfully! You can now ask questions."
58
+
59
+ def ask_question(question):
60
+ global qa_chain
61
+
62
+ if qa_chain is None:
63
+ return "Upload and process a PDF first."
64
+
65
+ result = qa_chain(question)
66
+
67
+ answer = result["result"]
68
+ sources = "\n\n".join(
69
+ [doc.page_content[:300] for doc in result["source_documents"]]
70
+ )
71
+
72
+ return f"Answer:\n{answer}\n\nSources:\n{sources}"
73
+
74
+ # Gradio UI
75
+ with gr.Blocks() as demo:
76
+ gr.Markdown("# 📄 PDF Question Answering System")
77
+ gr.Markdown("Upload a PDF and ask questions about it.")
78
+
79
+ pdf_input = gr.File(file_types=[".pdf"])
80
+ process_btn = gr.Button("Process PDF")
81
+ status = gr.Textbox(label="Status")
82
+
83
+ question = gr.Textbox(label="Ask a question")
84
+ ask_btn = gr.Button("Get Answer")
85
+
86
+ output = gr.Textbox(label="Response", lines=15)
87
+
88
+ process_btn.click(process_pdf, inputs=pdf_input, outputs=status)
89
+ ask_btn.click(ask_question, inputs=question, outputs=output)
90
+
91
+ demo.launch()