Itzadityapandey commited on
Commit
18b9631
·
verified ·
1 Parent(s): c19cf24

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from PyPDF2 import PdfReader
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain.chains.combine_documents import create_stuff_documents_chain
9
+ from langchain.chains import create_retrieval_chain
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
15
+
16
+ # Directory to save FAISS index
17
+ INDEX_PATH = "faiss_index"
18
+
19
+ def get_pdf_text(pdf_files):
20
+ text = ""
21
+ for pdf in pdf_files:
22
+ try:
23
+ pdf_reader = PdfReader(pdf.name) # pdf is a tempfile.NamedTemporaryFile in Gradio
24
+ for page in pdf_reader.pages:
25
+ extracted_text = page.extract_text()
26
+ if extracted_text:
27
+ text += extracted_text + "\n"
28
+ except Exception as e:
29
+ return f"Error reading PDF: {str(e)}"
30
+ return text
31
+
32
+ def get_text_chunks(text):
33
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
34
+ return text_splitter.split_text(text)
35
+
36
+ def create_vector_store(text_chunks):
37
+ try:
38
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
39
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
40
+ vector_store.save_local(INDEX_PATH)
41
+ return "PDFs processed successfully! Vector store saved. Now you can ask questions."
42
+ except Exception as e:
43
+ return f"Error creating vector store: {str(e)}"
44
+
45
+ def load_vector_store():
46
+ try:
47
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
48
+ if os.path.exists(INDEX_PATH):
49
+ return FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
50
+ return None
51
+ except Exception as e:
52
+ return None
53
+
54
+ def get_qa_chain():
55
+ # Modern stuff QA chain
56
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3, google_api_key=GOOGLE_API_KEY)
57
+
58
+ qa_prompt = ChatPromptTemplate.from_messages([
59
+ ("system", """
60
+ Answer the question as detailed as possible from the provided context only.
61
+ If the answer is not in the provided context, respond with "answer is not available in the context".
62
+ Do not make up information.
63
+
64
+ Context: {context}
65
+ """),
66
+ ("human", "{input}"),
67
+ ])
68
+
69
+ stuff_chain = create_stuff_documents_chain(llm, qa_prompt)
70
+ return stuff_chain
71
+
72
+ def query_pdf(user_question):
73
+ vector_store = load_vector_store()
74
+ if vector_store is None:
75
+ return "Please process a PDF first by uploading and submitting it."
76
+
77
+ try:
78
+ retriever = vector_store.as_retriever(search_kwargs={"k": 4}) # Retrieve top 4 docs
79
+ stuff_chain = get_qa_chain()
80
+
81
+ # Full retrieval QA chain
82
+ retrieval_chain = create_retrieval_chain(retriever, stuff_chain)
83
+
84
+ response = retrieval_chain.invoke({"input": user_question})
85
+ return response["answer"]
86
+ except Exception as e:
87
+ return f"Error querying the PDF: {str(e)}"
88
+
89
+ def process_pdfs(pdf_files):
90
+ if not pdf_files:
91
+ return "Please upload at least one PDF."
92
+
93
+ raw_text = get_pdf_text(pdf_files)
94
+ if "Error" in raw_text:
95
+ return raw_text
96
+ if not raw_text.strip():
97
+ return "No extractable text found in the uploaded PDFs."
98
+
99
+ text_chunks = get_text_chunks(raw_text)
100
+ result = create_vector_store(text_chunks)
101
+ return result
102
+
103
+ # Gradio UI
104
+ with gr.Blocks(title="Chat with PDF") as demo:
105
+ gr.Markdown("## Chat with PDF 💁")
106
+ pdf_input = gr.File(file_types=[".pdf"], label="Upload PDF(s)", file_count="multiple")
107
+ process_button = gr.Button("Submit & Process")
108
+ status_output = gr.Textbox(label="Status", placeholder="Status updates will appear here...")
109
+ question_input = gr.Textbox(label="Ask a Question from the PDF")
110
+ answer_output = gr.Textbox(label="Reply", placeholder="Answers will appear here...")
111
+ ask_button = gr.Button("Get Answer")
112
+
113
+ process_button.click(process_pdfs, inputs=[pdf_input], outputs=[status_output])
114
+ ask_button.click(query_pdf, inputs=[question_input], outputs=[answer_output])
115
+
116
+ if __name__ == "__main__":
117
+ demo.launch()