nikhmr1235 commited on
Commit
9cc9353
·
verified ·
1 Parent(s): 2b5cccc

adding gr.Info messages during the PDF processing and also improved the error handling to display messages in the UI if something goes wrong.

Browse files
Files changed (1) hide show
  1. app.py +66 -35
app.py CHANGED
@@ -73,23 +73,43 @@ class SessionState:
73
  def is_db_ready(self):
74
  return self.db is not None
75
 
76
- async def process_pdf(pdf_file, state: SessionState):
 
77
  try:
 
 
 
 
 
 
 
 
78
  file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
79
  if file_size_mb >= 75:
80
  gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
81
- return
 
 
 
 
 
82
 
83
  print("Opening PDF file...")
84
  try:
85
  doc = fitz.open(pdf_file.name)
86
  text = ""
 
87
  for page in doc:
88
  text += page.get_text()
89
  doc.close()
90
  except Exception as e:
91
  print(f"Error processing PDF document: {str(e)}")
92
- return
 
 
 
 
 
93
 
94
  print("PDF file opened successfully. Splitting text into chunks...")
95
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
@@ -97,17 +117,36 @@ async def process_pdf(pdf_file, state: SessionState):
97
  print("Text split into chunks successfully.")
98
 
99
  embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=google_api_key)
100
- state.db = await Chroma.afrom_documents(
 
 
 
 
101
  documents=docs,
102
  embedding=embeddings,
103
- persist_directory=state.vector_store_path,
104
- collection_name=state.session_id
105
  )
106
  print("PDF processed successfully! Database is ready.")
 
 
 
 
 
 
 
 
107
  except Exception as e:
108
- if os.path.exists(state.vector_store_path):
109
  shutil.rmtree(state.vector_store_path)
110
  print(f"An error occurred: {str(e)}")
 
 
 
 
 
 
 
111
 
112
  async def chat_with_pdf(message, history, state: SessionState):
113
  print("Chat interface called. Checking if database is ready...")
@@ -143,7 +182,7 @@ async def chat_with_pdf(message, history, state: SessionState):
143
  yield response
144
 
145
  with gr.Blocks(title="PDF Chatbot") as demo:
146
- state = gr.State()
147
 
148
  gr.Markdown(
149
  """
@@ -151,35 +190,27 @@ with gr.Blocks(title="PDF Chatbot") as demo:
151
  Upload a PDF to start a conversation with your document.
152
  """
153
  )
 
 
 
 
 
 
154
 
155
- with gr.Row():
156
- file_upload_input = gr.File(
157
- file_types=[".pdf"],
158
- label="Upload your PDF document",
159
- interactive=True
160
- )
161
-
162
- with gr.Row(visible=False) as chat_row:
163
- chat_interface = gr.ChatInterface(
164
- fn=chat_with_pdf,
165
- additional_inputs=[state],
166
- chatbot=gr.Chatbot(type="messages"),
167
- textbox=gr.Textbox(placeholder="Type your question here...", scale=7),
168
- examples=[["What is the main topic of the document?"], ["Summarize the key findings."], ["Who are the authors?"]],
169
- title="Chat Interface",
170
- theme="soft",
171
- type="messages"
172
- )
173
-
174
- async def process_and_show_chat(file):
175
- new_state = SessionState()
176
- await process_pdf(file, new_state)
177
- return gr.update(visible=True), gr.update(interactive=False), new_state
178
 
179
  file_upload_input.upload(
180
- fn=process_and_show_chat,
181
- inputs=[file_upload_input],
182
- outputs=[chat_row, file_upload_input, state]
183
  )
184
 
185
- demo.launch()
 
73
  def is_db_ready(self):
74
  return self.db is not None
75
 
76
+ async def process_pdf(pdf_file, state: gr.State):
77
+ gr.Info("Processing PDF, please wait...")
78
  try:
79
+ # Check if a PDF has already been processed in this session
80
+ if state and state.is_db_ready():
81
+ return (
82
+ gr.update(interactive=False),
83
+ gr.update(interactive=True),
84
+ state
85
+ )
86
+
87
  file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
88
  if file_size_mb >= 75:
89
  gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
90
+ # Reset components on error
91
+ return (
92
+ gr.update(interactive=True),
93
+ gr.update(interactive=False),
94
+ gr.State() # Reset state
95
+ )
96
 
97
  print("Opening PDF file...")
98
  try:
99
  doc = fitz.open(pdf_file.name)
100
  text = ""
101
+ # CRITICAL FIX: Iterate over pages and get text from each page
102
  for page in doc:
103
  text += page.get_text()
104
  doc.close()
105
  except Exception as e:
106
  print(f"Error processing PDF document: {str(e)}")
107
+ gr.Error(f"Error processing PDF document: {str(e)}")
108
+ return (
109
+ gr.update(interactive=True),
110
+ gr.update(interactive=False),
111
+ gr.State()
112
+ )
113
 
114
  print("PDF file opened successfully. Splitting text into chunks...")
115
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 
117
  print("Text split into chunks successfully.")
118
 
119
  embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=google_api_key)
120
+
121
+ # Initialize a new session state object
122
+ new_state = SessionState()
123
+
124
+ new_state.db = await Chroma.afrom_documents(
125
  documents=docs,
126
  embedding=embeddings,
127
+ persist_directory=new_state.vector_store_path,
128
+ collection_name=new_state.session_id
129
  )
130
  print("PDF processed successfully! Database is ready.")
131
+ gr.Info("PDF processed! You can now ask questions about the document.")
132
+
133
+ return (
134
+ gr.update(interactive=False),
135
+ gr.update(interactive=True),
136
+ new_state
137
+ )
138
+
139
  except Exception as e:
140
+ if state and os.path.exists(state.vector_store_path):
141
  shutil.rmtree(state.vector_store_path)
142
  print(f"An error occurred: {str(e)}")
143
+ gr.Error(f"An error occurred: {str(e)}")
144
+
145
+ return (
146
+ gr.update(interactive=True),
147
+ gr.update(interactive=False),
148
+ gr.State()
149
+ )
150
 
151
  async def chat_with_pdf(message, history, state: SessionState):
152
  print("Chat interface called. Checking if database is ready...")
 
182
  yield response
183
 
184
  with gr.Blocks(title="PDF Chatbot") as demo:
185
+ state = gr.State(value=SessionState())
186
 
187
  gr.Markdown(
188
  """
 
190
  Upload a PDF to start a conversation with your document.
191
  """
192
  )
193
+
194
+ file_upload_input = gr.File(
195
+ file_types=[".pdf"],
196
+ label="Upload your PDF document",
197
+ interactive=True
198
+ )
199
 
200
+ chat_interface = gr.ChatInterface(
201
+ fn=chat_with_pdf,
202
+ additional_inputs=[state],
203
+ chatbot=gr.Chatbot(type="messages"),
204
+ textbox=gr.Textbox(placeholder="Type your question here...", scale=7, interactive=False),
205
+ examples=[["What is the main topic of the document?"], ["Summarize the key findings."], ["Who are the authors?"]],
206
+ title="Chat Interface",
207
+ theme="soft"
208
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  file_upload_input.upload(
211
+ fn=process_pdf,
212
+ inputs=[file_upload_input, state],
213
+ outputs=[file_upload_input, chat_interface.textbox, state]
214
  )
215
 
216
+ demo.launch()