Spaces:

Itzadityapandey
/

ChatWithPDf

Runtime error

App Files Files Community

Itzadityapandey commited on Jan 3

Commit

18b9631

verified ·

1 Parent(s): c19cf24

Create app.py

Browse files

Files changed (1) hide show

app.py +117 -0

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+import gradio as gr
+from PyPDF2 import PdfReader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
+from langchain_community.vectorstores import FAISS
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain.chains import create_retrieval_chain
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+# Directory to save FAISS index
+INDEX_PATH = "faiss_index"
+def get_pdf_text(pdf_files):
+    text = ""
+    for pdf in pdf_files:
+        try:
+            pdf_reader = PdfReader(pdf.name)  # pdf is a tempfile.NamedTemporaryFile in Gradio
+            for page in pdf_reader.pages:
+                extracted_text = page.extract_text()
+                if extracted_text:
+                    text += extracted_text + "\n"
+        except Exception as e:
+            return f"Error reading PDF: {str(e)}"
+    return text
+def get_text_chunks(text):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
+    return text_splitter.split_text(text)
+def create_vector_store(text_chunks):
+    try:
+        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
+        vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
+        vector_store.save_local(INDEX_PATH)
+        return "PDFs processed successfully! Vector store saved. Now you can ask questions."
+    except Exception as e:
+        return f"Error creating vector store: {str(e)}"
+def load_vector_store():
+    try:
+        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
+        if os.path.exists(INDEX_PATH):
+            return FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
+        return None
+    except Exception as e:
+        return None
+def get_qa_chain():
+    # Modern stuff QA chain
+    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3, google_api_key=GOOGLE_API_KEY)
+    qa_prompt = ChatPromptTemplate.from_messages([
+        ("system", """
+        Answer the question as detailed as possible from the provided context only.
+        If the answer is not in the provided context, respond with "answer is not available in the context".
+        Do not make up information.
+        Context: {context}
+        """),
+        ("human", "{input}"),
+    ])
+    stuff_chain = create_stuff_documents_chain(llm, qa_prompt)
+    return stuff_chain
+def query_pdf(user_question):
+    vector_store = load_vector_store()
+    if vector_store is None:
+        return "Please process a PDF first by uploading and submitting it."
+    try:
+        retriever = vector_store.as_retriever(search_kwargs={"k": 4})  # Retrieve top 4 docs
+        stuff_chain = get_qa_chain()
+        # Full retrieval QA chain
+        retrieval_chain = create_retrieval_chain(retriever, stuff_chain)
+        response = retrieval_chain.invoke({"input": user_question})
+        return response["answer"]
+    except Exception as e:
+        return f"Error querying the PDF: {str(e)}"
+def process_pdfs(pdf_files):
+    if not pdf_files:
+        return "Please upload at least one PDF."
+    raw_text = get_pdf_text(pdf_files)
+    if "Error" in raw_text:
+        return raw_text
+    if not raw_text.strip():
+        return "No extractable text found in the uploaded PDFs."
+    text_chunks = get_text_chunks(raw_text)
+    result = create_vector_store(text_chunks)
+    return result
+# Gradio UI
+with gr.Blocks(title="Chat with PDF") as demo:
+    gr.Markdown("## Chat with PDF 💁")
+    pdf_input = gr.File(file_types=[".pdf"], label="Upload PDF(s)", file_count="multiple")
+    process_button = gr.Button("Submit & Process")
+    status_output = gr.Textbox(label="Status", placeholder="Status updates will appear here...")
+    question_input = gr.Textbox(label="Ask a Question from the PDF")
+    answer_output = gr.Textbox(label="Reply", placeholder="Answers will appear here...")
+    ask_button = gr.Button("Get Answer")
+    process_button.click(process_pdfs, inputs=[pdf_input], outputs=[status_output])
+    ask_button.click(query_pdf, inputs=[question_input], outputs=[answer_output])
+if __name__ == "__main__":
+    demo.launch()