nikhmr1235 commited on
Commit
f11f97c
·
verified ·
1 Parent(s): 22dd68c

fix state propagation issue between process_pdf() and chat_withdoc()

Browse files
Files changed (1) hide show
  1. app.py +28 -53
app.py CHANGED
@@ -77,43 +77,22 @@ class SessionState:
77
  def is_db_ready(self):
78
  return self.db is not None
79
 
80
- async def process_pdf(pdf_file, state: gr.State):
81
- gr.Info("Processing PDF, please wait...")
82
  try:
83
- # Check if a PDF has already been processed in this session
84
- if state and state.is_db_ready():
85
- return (
86
- gr.update(interactive=False),
87
- gr.update(interactive=True),
88
- state
89
- )
90
-
91
  file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
92
  if file_size_mb >= 75:
93
- gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
94
- # Reset components on error
95
- return (
96
- gr.update(interactive=True),
97
- gr.update(interactive=False),
98
- gr.State() # Reset state
99
- )
100
 
101
  print("Opening PDF file...")
102
  try:
103
  doc = fitz.open(pdf_file.name)
104
  text = ""
105
- # CRITICAL FIX: Iterate over pages and get text from each page
106
  for page in doc:
107
  text += page.get_text()
108
  doc.close()
109
  except Exception as e:
110
- print(f"Error processing PDF document: {str(e)}")
111
- gr.Error(f"Error processing PDF document: {str(e)}")
112
- return (
113
- gr.update(interactive=True),
114
- gr.update(interactive=False),
115
- gr.State()
116
- )
117
 
118
  print("PDF file opened successfully. Splitting text into chunks...")
119
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
@@ -122,35 +101,23 @@ async def process_pdf(pdf_file, state: gr.State):
122
 
123
  embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=google_api_key)
124
 
125
- # Initialize a new session state object
126
- new_state = SessionState()
127
-
128
- new_state.db = await Chroma.afrom_documents(
129
  documents=docs,
130
  embedding=embeddings,
131
- persist_directory=new_state.vector_store_path,
132
- collection_name=new_state.session_id
133
  )
134
  print("PDF processed successfully! Database is ready.")
135
- gr.Info("PDF processed! You can now ask questions about the document.")
136
-
137
- return (
138
- gr.update(interactive=False),
139
- gr.update(interactive=True),
140
- new_state
141
- )
142
 
143
  except Exception as e:
144
- if state and os.path.exists(state.vector_store_path):
145
  shutil.rmtree(state.vector_store_path)
146
- print(f"An error occurred: {str(e)}")
147
- gr.Error(f"An error occurred: {str(e)}")
148
 
149
- return (
150
- gr.update(interactive=True),
151
- gr.update(interactive=False),
152
- gr.State()
153
- )
154
 
155
  async def chat_with_pdf(message, history, state: SessionState):
156
  print("Chat interface called. Checking if database is ready...")
@@ -226,15 +193,23 @@ with gr.Blocks(title="PDF Chatbot") as demo:
226
 
227
  async def process_and_show_chat(file, state):
228
  gr.Info("Processing your PDF, please wait...")
 
229
  try:
230
- new_state = SessionState()
231
  await process_pdf(file, new_state)
232
  gr.Info("PDF processed successfully! You can now chat with it.")
233
- return gr.update(visible=True), gr.update(interactive=False), new_state
234
- except Exception:
235
- # The exception is already a gr.Error, so it will be displayed in the UI.
236
- # We just need to return the correct UI updates.
237
- return gr.update(visible=False), gr.update(interactive=True), state
 
 
 
 
 
 
 
 
238
 
239
  file_upload_input.upload(
240
  fn=process_and_show_chat,
@@ -242,4 +217,4 @@ with gr.Blocks(title="PDF Chatbot") as demo:
242
  outputs=[chat_row, file_upload_input, state]
243
  )
244
 
245
- demo.launch()
 
77
  def is_db_ready(self):
78
  return self.db is not None
79
 
80
+ async def process_pdf(pdf_file, state: SessionState):
81
+ """Processes the PDF and updates the state object."""
82
  try:
 
 
 
 
 
 
 
 
83
  file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
84
  if file_size_mb >= 75:
85
+ raise gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
 
 
 
 
 
 
86
 
87
  print("Opening PDF file...")
88
  try:
89
  doc = fitz.open(pdf_file.name)
90
  text = ""
 
91
  for page in doc:
92
  text += page.get_text()
93
  doc.close()
94
  except Exception as e:
95
+ raise gr.Error(f"Error processing PDF document: {str(e)}")
 
 
 
 
 
 
96
 
97
  print("PDF file opened successfully. Splitting text into chunks...")
98
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 
101
 
102
  embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=google_api_key)
103
 
104
+ state.db = await Chroma.afrom_documents(
 
 
 
105
  documents=docs,
106
  embedding=embeddings,
107
+ persist_directory=state.vector_store_path,
108
+ collection_name=state.session_id
109
  )
110
  print("PDF processed successfully! Database is ready.")
 
 
 
 
 
 
 
111
 
112
  except Exception as e:
113
+ if os.path.exists(state.vector_store_path):
114
  shutil.rmtree(state.vector_store_path)
 
 
115
 
116
+ if isinstance(e, gr.Error):
117
+ raise # Re-raise Gradio errors directly
118
+ else:
119
+ raise gr.Error(f"An unexpected error occurred: {str(e)}")
120
+
121
 
122
  async def chat_with_pdf(message, history, state: SessionState):
123
  print("Chat interface called. Checking if database is ready...")
 
193
 
194
  async def process_and_show_chat(file, state):
195
  gr.Info("Processing your PDF, please wait...")
196
+ new_state = SessionState()
197
  try:
 
198
  await process_pdf(file, new_state)
199
  gr.Info("PDF processed successfully! You can now chat with it.")
200
+ return {
201
+ chat_row: gr.update(visible=True),
202
+ file_upload_input: gr.update(interactive=False),
203
+ state: new_state,
204
+ }
205
+ except gr.Error as e:
206
+ # Display the Gradio error message to the user
207
+ gr.Error(str(e))
208
+ return {
209
+ chat_row: gr.update(visible=False),
210
+ file_upload_input: gr.update(interactive=True),
211
+ state: state, # Return original state on failure
212
+ }
213
 
214
  file_upload_input.upload(
215
  fn=process_and_show_chat,
 
217
  outputs=[chat_row, file_upload_input, state]
218
  )
219
 
220
+ demo.launch()