Spaces:

nikhmr1235
/

PDF_document_chatbot

Sleeping

App Files Files Community

nikhmr1235 commited on Aug 29, 2025

Commit

1f44a86

verified ·

1 Parent(s): cfdf352

Application file with Gradio UI to upload PDF and chatbot interface for Q&A on the PDF document

Browse files

-- User Interface (Gradio): The interface will be an "all-in-one" single-page layout.
- It will feature a clear file upload area for PDF documents at the top.
- Below the upload area, a chat interface will be present.
- the chat interface will be "disabled by default". It will only become active and usable after a PDF has been successfully uploaded and processed.
- similarly PDF upload interface will be disabled after chat interface is enabled

Files changed (1) hide show

app.py +177 -0

app.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import gradio as gr
+import os
+import uuid
+import shutil
+import PyMuPDF
+from langchain_community.vectorstores import Chroma
+from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.prompts import PromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+import tempfile
+# Constants
+LLM_MODEL = "gemini-1.5-flash"
+EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
+CHROMA_DB_PATH = tempfile.gettempdir()  + "/chroma_db"
+# Global state to hold session data
+class SessionState:
+    def __init__(self):
+        self.session_id = str(uuid.uuid4())
+        self.db = None
+        self.vector_store_path = os.path.join(CHROMA_DB_PATH, self.session_id)
+    def is_db_ready(self):
+        return self.db is not None
+# Gradio components to be enabled/disabled
+CHAT_COMPONENTS = None
+FILE_UPLOAD_COMPONENTS = None
+def initialize_components(file_upload_input, chat_input, chatbot):
+    global CHAT_COMPONENTS, FILE_UPLOAD_COMPONENTS
+    CHAT_COMPONENTS = [chat_input, chatbot]
+    FILE_UPLOAD_COMPONENTS = [file_upload_input]
+# Helper function to generate a new session state
+def new_session():
+    return SessionState()
+# Function to handle PDF upload and ingestion
+def process_pdf(pdf_file, state):
+    try:
+        if state.is_db_ready():
+            return (
+                f"A PDF has already been processed. Please refresh the page to upload a new one.",
+                [],
+                gr.ChatInterface(disabled=False),
+                gr.File(disabled=True)
+            )
+        # Create a new session and directory for the user
+        state = new_session()
+        if not os.path.exists(state.vector_store_path):
+            os.makedirs(state.vector_store_path)
+        # Extract text from the PDF
+        doc = PyMuPDF.open(pdf_file.name)
+        text = ""
+        for page in doc:
+            text += page.get_text()
+        doc.close()
+        # Split text into chunks
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        docs = text_splitter.create_documents([text])
+        # Create a ChromaDB vector store from the documents
+        embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL)
+        state.db = Chroma.from_documents(
+            documents=docs,
+            embedding=embeddings,
+            persist_directory=state.vector_store_path
+        )
+        gr.Info("PDF processed successfully! You can now ask questions about the document.")
+        return (
+            "",
+            [["", "PDF processed successfully! You can now ask questions."]],
+            gr.ChatInterface(disabled=False),
+            gr.File(disabled=True)
+        )
+    except Exception as e:
+        # Clean up the directory in case of an error
+        if os.path.exists(state.vector_store_path):
+            shutil.rmtree(state.vector_store_path)
+        gr.Error(f"An error occurred: {str(e)}")
+        return (
+            "",
+            [["", f"An error occurred during processing: {str(e)}"]],
+            gr.ChatInterface(disabled=True),
+            gr.File(disabled=False)
+        )
+# Function to handle user queries
+def chat_with_pdf(message, history, state):
+    if not state.is_db_ready():
+        yield "Please upload a PDF first to begin the conversation."
+        return
+    # Use the ChromaDB instance from the session state
+    retriever = state.db.as_retriever()
+    # Set up the RAG chain
+    llm = ChatGoogleGenerativeAI(model=LLM_MODEL, temperature=0.7)
+    prompt_template = PromptTemplate(
+        template="""
+        You are a helpful assistant for a PDF document.
+        Answer the user's question based on the following context.
+        If you don't know the answer, just say that you don't know, don't try to make up an answer.
+        ----------------
+        Context: {context}
+        Question: {question}
+        """,
+        input_variables=["context", "question"],
+    )
+    rag_chain = (
+        {"context": retriever, "question": RunnablePassthrough()}
+        | prompt_template
+        | llm
+        | StrOutputParser()
+    )
+    response = rag_chain.invoke(message)
+    yield response
+# Gradio Interface
+with gr.Blocks(title="PDF Chatbot") as demo:
+    state = gr.State(new_session)
+    gr.Markdown(
+        """
+        # PDF Chatbot
+        Upload a PDF to start a conversation with your document.
+        The chat interface will become active after the file is processed.
+        """
+    )
+    with gr.Row():
+        file_upload_input = gr.File(
+            file_types=[".pdf"],
+            label="Upload your PDF document",
+            interactive=True
+        )
+    chatbot = gr.Chatbot(label="Chat History", placeholder="Upload a document to start a conversation...")
+    chat_input = gr.Textbox(
+        placeholder="Type your question here...",
+        scale=7
+    )
+    chat_interface = gr.ChatInterface(
+        fn=chat_with_pdf,
+        textbox=chat_input,
+        chatbot=chatbot,
+        examples=["What is the main topic of the document?", "Summarize the key findings.", "Who are the authors?"],
+        title="Chat Interface",
+        theme="soft",
+        # Chat is disabled until a file is processed
+        disabled=True
+    )
+    # Store components in global variables for easy access
+    initialize_components(file_upload_input, chat_input, chatbot)
+    # Event handlers
+    file_upload_input.upload(
+        fn=process_pdf,
+        inputs=[file_upload_input, state],
+        outputs=[file_upload_input, chatbot, chat_interface, file_upload_input]
+    )
+demo.launch()