Alpha108 commited on
Commit
c94a99e
·
verified ·
1 Parent(s): 749445b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.llms import HuggingFacePipeline
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.document_loaders import PyPDFLoader
8
+ from transformers import pipeline
9
+
10
+ # ----------------------------
11
+ # APP CONFIG
12
+ # ----------------------------
13
+ st.set_page_config(page_title="📘 PDF Question Answering", layout="wide")
14
+ st.title("📘 PDF Question Answering App")
15
+ st.markdown("Upload a PDF and ask questions about its content.")
16
+
17
+ # ----------------------------
18
+ # GLOBAL VARIABLES
19
+ # ----------------------------
20
+ qa_chain = None
21
+
22
+ # ----------------------------
23
+ # FUNCTIONS
24
+ # ----------------------------
25
+ def load_pdf(pdf_file):
26
+ loader = PyPDFLoader(pdf_file.name)
27
+ documents = loader.load()
28
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
29
+ docs = text_splitter.split_documents(documents)
30
+ return docs
31
+
32
+ def build_vectorstore(docs):
33
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
34
+ vectorstore = FAISS.from_documents(docs, embeddings)
35
+ return vectorstore
36
+
37
+ def build_qa_chain(vectorstore):
38
+ llm = HuggingFacePipeline(
39
+ pipeline=pipeline(
40
+ "text2text-generation",
41
+ model="google/flan-t5-base",
42
+ max_length=512,
43
+ temperature=0
44
+ )
45
+ )
46
+ qa_chain = RetrievalQA.from_chain_type(
47
+ llm=llm,
48
+ retriever=vectorstore.as_retriever(search_kwargs={"k":3}),
49
+ chain_type="stuff"
50
+ )
51
+ return qa_chain
52
+
53
+ # ----------------------------
54
+ # STREAMLIT UI
55
+ # ----------------------------
56
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
57
+
58
+ if uploaded_file:
59
+ with st.spinner("Processing PDF..."):
60
+ docs = load_pdf(uploaded_file)
61
+ vectorstore = build_vectorstore(docs)
62
+ qa_chain = build_qa_chain(vectorstore)
63
+ st.success("✅ PDF uploaded & processed. You can now ask questions!")
64
+
65
+ if qa_chain:
66
+ query = st.text_input("Ask a question about the PDF:")
67
+ if query:
68
+ with st.spinner("Searching..."):
69
+ answer = qa_chain.run(query)
70
+ st.markdown("### Answer:")
71
+ st.write(answer)