pranavinani commited on
Commit
6cd5233
·
1 Parent(s): e2e039a

fixed pdf upload not working

Browse files
Files changed (1) hide show
  1. app.py +39 -19
app.py CHANGED
@@ -179,9 +179,11 @@ def extract_text_from_pdf(pdf_path):
179
  if not text_content.strip():
180
  return "Error: No selectable text found in PDF. Please ensure the PDF contains selectable text, not just images."
181
 
 
182
  return text_content
183
 
184
  except Exception as e:
 
185
  return f"Error extracting text: {str(e)}"
186
 
187
  def extract_metadata(text):
@@ -320,23 +322,36 @@ def process_document(pdf_file):
320
  return "कृपया एक PDF फ़ाइल अपलोड करें।", "", "", gr.update(visible=False)
321
 
322
  try:
 
 
323
  # Check file size
324
  file_size = os.path.getsize(pdf_file.name)
 
 
325
  if file_size > CONFIG['MAX_FILE_SIZE']:
326
  return f"फ़ाइल बहुत बड़ी है! अधिकतम आकार: {CONFIG['MAX_FILE_SIZE'] // (1024*1024)}MB", "", "", gr.update(visible=False)
327
 
328
  # Extract text (no OCR - assumes selectable text)
 
329
  text_content = extract_text_from_pdf(pdf_file.name)
330
 
331
- if not text_content.strip() or "Error" in text_content:
 
 
 
 
332
  return text_content, "", "", gr.update(visible=False)
333
 
 
 
334
  # Extract metadata
 
335
  author_name, book_title = extract_metadata(text_content)
336
  SESSION_DATA['author_name'] = author_name
337
  SESSION_DATA['book_title'] = book_title
338
 
339
  # Create chunks
 
340
  chunks = chunk_text(text_content)
341
  SESSION_DATA['document_chunks'] = chunks
342
 
@@ -351,6 +366,8 @@ def process_document(pdf_file):
351
  word_count = len(text_content.split())
352
  char_count = len(text_content)
353
 
 
 
354
  success_msg = f"""✅ दस्तावेज़ सफलतापूर्वक प्रसंस्करित!
355
 
356
  📖 पुस्तक: {book_title}
@@ -364,7 +381,9 @@ def process_document(pdf_file):
364
  return success_msg, book_title, author_name, gr.update(visible=True)
365
 
366
  except Exception as e:
367
- return f"दस्तावेज़ प्रसंस्करण में त्रुटि: {str(e)}", "", "", gr.update(visible=False)
 
 
368
 
369
  # Query processing function
370
  def process_query(audio_input, text_input):
@@ -770,6 +789,7 @@ def create_interface():
770
  available_books = get_available_books()
771
  gallery_data, book_options = create_book_gallery()
772
 
 
773
  if available_books:
774
  book_gallery = gr.Gallery(
775
  value=gallery_data,
@@ -792,6 +812,7 @@ def create_interface():
792
  select_book_btn = gr.Button("📖 Load Selected Book / चुनी गई पुस्तक लोड करें", variant="primary")
793
  else:
794
  gr.Markdown("⚠️ No books available in library / पुस्तकालय में कोई पुस्तक उपलब्ध नहीं है")
 
795
  book_dropdown = gr.Dropdown(choices=["None"], value="None", visible=False)
796
  select_book_btn = gr.Button("No books available", interactive=False)
797
 
@@ -871,28 +892,27 @@ def create_interface():
871
  outputs=[auth_section, main_section, auth_status]
872
  )
873
 
874
- # Book selection event handler
875
- if 'select_book_btn' in locals():
 
 
 
 
 
 
 
876
  select_book_btn.click(
877
  process_selected_book,
878
  inputs=[book_dropdown],
879
  outputs=[doc_status, book_title_display, author_display, query_section]
880
  )
881
-
882
- # Gallery selection event handler
883
- if 'book_gallery' in locals():
884
- book_gallery.select(
885
- handle_gallery_selection,
886
- outputs=[book_dropdown]
887
- )
888
-
889
- # PDF upload event handler
890
- if 'process_pdf_btn' in locals():
891
- process_pdf_btn.click(
892
- process_document,
893
- inputs=[pdf_upload],
894
- outputs=[doc_status, book_title_display, author_display, query_section]
895
- )
896
 
897
  ask_button.click(
898
  process_query,
 
179
  if not text_content.strip():
180
  return "Error: No selectable text found in PDF. Please ensure the PDF contains selectable text, not just images."
181
 
182
+ print(f"Successfully extracted {len(text_content)} characters from PDF")
183
  return text_content
184
 
185
  except Exception as e:
186
+ print(f"PDF extraction error: {str(e)}")
187
  return f"Error extracting text: {str(e)}"
188
 
189
  def extract_metadata(text):
 
322
  return "कृपया एक PDF फ़ाइल अपलोड करें।", "", "", gr.update(visible=False)
323
 
324
  try:
325
+ print(f"Processing uploaded file: {pdf_file.name}")
326
+
327
  # Check file size
328
  file_size = os.path.getsize(pdf_file.name)
329
+ print(f"File size: {file_size} bytes")
330
+
331
  if file_size > CONFIG['MAX_FILE_SIZE']:
332
  return f"फ़ाइल बहुत बड़ी है! अधिकतम आकार: {CONFIG['MAX_FILE_SIZE'] // (1024*1024)}MB", "", "", gr.update(visible=False)
333
 
334
  # Extract text (no OCR - assumes selectable text)
335
+ print("Extracting text from PDF...")
336
  text_content = extract_text_from_pdf(pdf_file.name)
337
 
338
+ # Check if extraction failed
339
+ if not text_content.strip():
340
+ return "Error: फ़ाइल से टेक्स्ट निकालने में असफल।", "", "", gr.update(visible=False)
341
+
342
+ if text_content.startswith("Error"):
343
  return text_content, "", "", gr.update(visible=False)
344
 
345
+ print(f"Text extraction successful. Length: {len(text_content)} characters")
346
+
347
  # Extract metadata
348
+ print("Extracting metadata...")
349
  author_name, book_title = extract_metadata(text_content)
350
  SESSION_DATA['author_name'] = author_name
351
  SESSION_DATA['book_title'] = book_title
352
 
353
  # Create chunks
354
+ print("Creating text chunks...")
355
  chunks = chunk_text(text_content)
356
  SESSION_DATA['document_chunks'] = chunks
357
 
 
366
  word_count = len(text_content.split())
367
  char_count = len(text_content)
368
 
369
+ print(f"Processing complete. Chunks: {len(chunks)}, Words: {word_count}")
370
+
371
  success_msg = f"""✅ दस्तावेज़ सफलतापूर्वक प्रसंस्करित!
372
 
373
  📖 पुस्तक: {book_title}
 
381
  return success_msg, book_title, author_name, gr.update(visible=True)
382
 
383
  except Exception as e:
384
+ error_msg = f"दस्तावेज़ प्रसंस्करण में त्रुटि: {str(e)}"
385
+ print(f"Error in process_document: {str(e)}")
386
+ return error_msg, "", "", gr.update(visible=False)
387
 
388
  # Query processing function
389
  def process_query(audio_input, text_input):
 
789
  available_books = get_available_books()
790
  gallery_data, book_options = create_book_gallery()
791
 
792
+ # Always create these components, even if no books are available
793
  if available_books:
794
  book_gallery = gr.Gallery(
795
  value=gallery_data,
 
812
  select_book_btn = gr.Button("📖 Load Selected Book / चुनी गई पुस्तक लोड करें", variant="primary")
813
  else:
814
  gr.Markdown("⚠️ No books available in library / पुस्तकालय में कोई पुस्तक उपलब्ध नहीं है")
815
+ book_gallery = None
816
  book_dropdown = gr.Dropdown(choices=["None"], value="None", visible=False)
817
  select_book_btn = gr.Button("No books available", interactive=False)
818
 
 
892
  outputs=[auth_section, main_section, auth_status]
893
  )
894
 
895
+ # PDF upload event handler - Always available
896
+ process_pdf_btn.click(
897
+ process_document,
898
+ inputs=[pdf_upload],
899
+ outputs=[doc_status, book_title_display, author_display, query_section]
900
+ )
901
+
902
+ # Book selection event handler - Only if books are available
903
+ if available_books:
904
  select_book_btn.click(
905
  process_selected_book,
906
  inputs=[book_dropdown],
907
  outputs=[doc_status, book_title_display, author_display, query_section]
908
  )
909
+
910
+ # Gallery selection event handler - Only if gallery exists
911
+ if book_gallery is not None:
912
+ book_gallery.select(
913
+ handle_gallery_selection,
914
+ outputs=[book_dropdown]
915
+ )
 
 
 
 
 
 
 
 
916
 
917
  ask_button.click(
918
  process_query,