Spaces:

Itzadityapandey
/

ChatWithPDf

Running

App Files Files Community

Itzadityapandey commited on 23 days ago

Commit

8563a16

verified ·

1 Parent(s): 75df3d3

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -7

app.py CHANGED Viewed

@@ -4,9 +4,8 @@ from PyPDF2 import PdfReader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from langchain_community.vectorstores import FAISS
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_classic.chains.combine_documents import create_stuff_documents_chain
-from langchain_classic.chains import create_retrieval_chain
 from dotenv import load_dotenv
 # Load environment variables
@@ -20,11 +19,11 @@ def get_pdf_text(pdf_files):
     text = ""
     for pdf in pdf_files:
         try:
-            pdf_reader = PdfReader(pdf.name)  # pdf is a tempfile.NamedTemporaryFile in Gradio
             for page in pdf_reader.pages:
-                extracted_text = page.extract_text()
-                if extracted_text:
-                    text += extracted_text + "\n"
         except Exception as e:
             return f"Error reading PDF: {str(e)}"
     return text
@@ -51,6 +50,82 @@ def load_vector_store():
     except Exception as e:
         return None
 def get_qa_chain():
     # Modern stuff QA chain
     llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3, google_api_key=GOOGLE_API_KEY)

 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from langchain_community.vectorstores import FAISS
+from langchain_classic.chains.question_answering import load_qa_chain  # Fixed import
+from langchain_core.prompts import PromptTemplate
 from dotenv import load_dotenv
 # Load environment variables
     text = ""
     for pdf in pdf_files:
         try:
+            pdf_reader = PdfReader(pdf.name)  # Gradio gives tempfile
             for page in pdf_reader.pages:
+                extracted = page.extract_text()
+                if extracted:
+                    text += extracted + "\n"
         except Exception as e:
             return f"Error reading PDF: {str(e)}"
     return text
     except Exception as e:
         return None
+def get_conversational_chain():
+    prompt_template = """
+    Answer the question as detailed as possible from the provided context.
+    If the answer is not in the provided context, respond with "answer is not available in the context".
+    Do not provide incorrect information.
+    Context:
+    {context}
+    Question:
+    {question}
+    Answer:
+    """
+    model = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3, google_api_key=GOOGLE_API_KEY)  # Updated to a current fast model
+    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
+    return load_qa_chain(model, chain_type="stuff", prompt=prompt)
+def query_pdf(user_question):
+    vector_store = load_vector_store()
+    if vector_store is None:
+        return "Please process a PDF first by uploading and submitting it."
+    try:
+        docs = vector_store.similarity_search(user_question, k=4)
+        chain = get_conversational_chain()
+        response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
+        return response["output_text"]
+    except Exception as e:
+        return f"Error querying the PDF: {str(e)}"
+def process_pdfs(pdf_files):
+    if not pdf_files:
+        return "Please upload at least one PDF."
+    raw_text = get_pdf_text(pdf_files)
+    if "Error" in raw_text:
+        return raw_text
+    if not raw_text.strip():
+        return "No extractable text found in the uploaded PDFs."
+    text_chunks = get_text_chunks(raw_text)
+    result = create_vector_store(text_chunks)
+    return result
+# Gradio UI
+with gr.Blocks(title="Chat with PDF") as demo:
+    gr.Markdown("## Chat with PDF 💁")
+    pdf_input = gr.File(file_types=[".pdf"], label="Upload PDF(s)", file_count="multiple")
+    process_button = gr.Button("Submit & Process")
+    status_output = gr.Textbox(label="Status", placeholder="Status updates will appear here...")
+    question_input = gr.Textbox(label="Ask a Question from the PDF")
+    answer_output = gr.Textbox(label="Reply", placeholder="Answers will appear here...")
+    ask_button = gr.Button("Get Answer")
+    process_button.click(process_pdfs, inputs=[pdf_input], outputs=[status_output])
+    ask_button.click(query_pdf, inputs=[question_input], outputs=[answer_output])
+if __name__ == "__main__":
+    demo.launch()    try:
+        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
+        vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
+        vector_store.save_local(INDEX_PATH)
+        return "PDFs processed successfully! Vector store saved. Now you can ask questions."
+    except Exception as e:
+        return f"Error creating vector store: {str(e)}"
+def load_vector_store():
+    try:
+        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
+        if os.path.exists(INDEX_PATH):
+            return FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
+        return None
+    except Exception as e:
+        return None
 def get_qa_chain():
     # Modern stuff QA chain
     llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3, google_api_key=GOOGLE_API_KEY)