Spaces:

nikhmr1235
/

PDF_document_chatbot

Sleeping

App Files Files Community

nikhmr1235 commited on Aug 31, 2025

Commit

9cc9353

verified ·

1 Parent(s): 2b5cccc

adding gr.Info messages during the PDF processing and also improved the error handling to display messages in the UI if something goes wrong.

Browse files

Files changed (1) hide show

app.py +66 -35

app.py CHANGED Viewed

@@ -73,23 +73,43 @@ class SessionState:
     def is_db_ready(self):
         return self.db is not None
-async def process_pdf(pdf_file, state: SessionState):
     try:
         file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
         if file_size_mb >= 75:
             gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
-            return
         print("Opening PDF file...")
         try:
             doc = fitz.open(pdf_file.name)
             text = ""
             for page in doc:
                 text += page.get_text()
             doc.close()
         except Exception as e:
             print(f"Error processing PDF document: {str(e)}")
-            return
         print("PDF file opened successfully. Splitting text into chunks...")
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
@@ -97,17 +117,36 @@ async def process_pdf(pdf_file, state: SessionState):
         print("Text split into chunks successfully.")
         embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=google_api_key)
-        state.db = await Chroma.afrom_documents(
             documents=docs,
             embedding=embeddings,
-            persist_directory=state.vector_store_path,
-            collection_name=state.session_id
         )
         print("PDF processed successfully! Database is ready.")
     except Exception as e:
-        if os.path.exists(state.vector_store_path):
             shutil.rmtree(state.vector_store_path)
         print(f"An error occurred: {str(e)}")
 async def chat_with_pdf(message, history, state: SessionState):
     print("Chat interface called. Checking if database is ready...")
@@ -143,7 +182,7 @@ async def chat_with_pdf(message, history, state: SessionState):
     yield response
 with gr.Blocks(title="PDF Chatbot") as demo:
-    state = gr.State()
     gr.Markdown(
         """
@@ -151,35 +190,27 @@ with gr.Blocks(title="PDF Chatbot") as demo:
         Upload a PDF to start a conversation with your document.
         """
     )
-    with gr.Row():
-        file_upload_input = gr.File(
-            file_types=[".pdf"],
-            label="Upload your PDF document",
-            interactive=True
-        )
-    with gr.Row(visible=False) as chat_row:
-        chat_interface = gr.ChatInterface(
-            fn=chat_with_pdf,
-            additional_inputs=[state],
-            chatbot=gr.Chatbot(type="messages"),
-            textbox=gr.Textbox(placeholder="Type your question here...", scale=7),
-            examples=[["What is the main topic of the document?"], ["Summarize the key findings."], ["Who are the authors?"]],
-            title="Chat Interface",
-            theme="soft",
-            type="messages"
-        )
-    async def process_and_show_chat(file):
-        new_state = SessionState()
-        await process_pdf(file, new_state)
-        return gr.update(visible=True), gr.update(interactive=False), new_state
     file_upload_input.upload(
-        fn=process_and_show_chat,
-        inputs=[file_upload_input],
-        outputs=[chat_row, file_upload_input, state]
     )
-demo.launch()

     def is_db_ready(self):
         return self.db is not None
+async def process_pdf(pdf_file, state: gr.State):
+    gr.Info("Processing PDF, please wait...")
     try:
+        # Check if a PDF has already been processed in this session
+        if state and state.is_db_ready():
+            return (
+                gr.update(interactive=False),
+                gr.update(interactive=True),
+                state
+            )
         file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
         if file_size_mb >= 75:
             gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
+            # Reset components on error
+            return (
+                gr.update(interactive=True),
+                gr.update(interactive=False),
+                gr.State() # Reset state
+            )
         print("Opening PDF file...")
         try:
             doc = fitz.open(pdf_file.name)
             text = ""
+            # CRITICAL FIX: Iterate over pages and get text from each page
             for page in doc:
                 text += page.get_text()
             doc.close()
         except Exception as e:
             print(f"Error processing PDF document: {str(e)}")
+            gr.Error(f"Error processing PDF document: {str(e)}")
+            return (
+                gr.update(interactive=True),
+                gr.update(interactive=False),
+                gr.State()
+            )
         print("PDF file opened successfully. Splitting text into chunks...")
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
         print("Text split into chunks successfully.")
         embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=google_api_key)
+        # Initialize a new session state object
+        new_state = SessionState()
+        new_state.db = await Chroma.afrom_documents(
             documents=docs,
             embedding=embeddings,
+            persist_directory=new_state.vector_store_path,
+            collection_name=new_state.session_id
         )
         print("PDF processed successfully! Database is ready.")
+        gr.Info("PDF processed! You can now ask questions about the document.")
+        return (
+            gr.update(interactive=False),
+            gr.update(interactive=True),
+            new_state
+        )
     except Exception as e:
+        if state and os.path.exists(state.vector_store_path):
             shutil.rmtree(state.vector_store_path)
         print(f"An error occurred: {str(e)}")
+        gr.Error(f"An error occurred: {str(e)}")
+        return (
+            gr.update(interactive=True),
+            gr.update(interactive=False),
+            gr.State()
+        )
 async def chat_with_pdf(message, history, state: SessionState):
     print("Chat interface called. Checking if database is ready...")
     yield response
 with gr.Blocks(title="PDF Chatbot") as demo:
+    state = gr.State(value=SessionState())
     gr.Markdown(
         """
         Upload a PDF to start a conversation with your document.
         """
     )
+    file_upload_input = gr.File(
+        file_types=[".pdf"],
+        label="Upload your PDF document",
+        interactive=True
+    )
+    chat_interface = gr.ChatInterface(
+        fn=chat_with_pdf,
+        additional_inputs=[state],
+        chatbot=gr.Chatbot(type="messages"),
+        textbox=gr.Textbox(placeholder="Type your question here...", scale=7, interactive=False),
+        examples=[["What is the main topic of the document?"], ["Summarize the key findings."], ["Who are the authors?"]],
+        title="Chat Interface",
+        theme="soft"
+    )
     file_upload_input.upload(
+        fn=process_pdf,
+        inputs=[file_upload_input, state],
+        outputs=[file_upload_input, chat_interface.textbox, state]
     )
+demo.launch()