pranavinani commited on
Commit
36af225
·
1 Parent(s): 9d3a49a

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +242 -10
app.py CHANGED
@@ -31,6 +31,8 @@ CONFIG = {
31
  'MAX_AUDIO_DURATION': 120, # 2 minutes
32
  'GROQ_API_KEY': os.getenv('GAPI'),
33
  'AUDIO_CLIP_DURATION': 10, # First 10 seconds only
 
 
34
  }
35
 
36
  # Global session storage
@@ -429,6 +431,190 @@ def reset_session():
429
  })
430
  return "✅ नया सत्र शुरू किया गया!", "", "", gr.update(visible=False), "प्रश्न: 0/5"
431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  # Create Gradio interface
433
  def create_interface():
434
  """Create the Gradio interface"""
@@ -479,17 +665,53 @@ def create_interface():
479
  interactive=False
480
  )
481
 
482
- # Document upload section
483
- gr.Markdown("### 📁 Step 1: Upload Your Book / अपनी पुस्तक अपलोड करें")
484
- gr.Markdown("**Note:** Please ensure your PDF contains selectable text (not scanned images)")
485
 
486
- with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  pdf_upload = gr.File(
488
  label="Upload PDF / PDF अपलोड करें",
489
  file_types=[".pdf"],
490
  type="filepath"
491
  )
492
- process_btn = gr.Button("📖 Process Document / दस्तावेज़ प्रसंस्करित करें", variant="primary")
493
 
494
  doc_status = gr.Textbox(label="Processing Status / प्रसंस्करण स्थिति", interactive=False)
495
 
@@ -555,11 +777,21 @@ def create_interface():
555
  outputs=[auth_section, main_section, auth_status]
556
  )
557
 
558
- process_btn.click(
559
- process_document,
560
- inputs=[pdf_upload],
561
- outputs=[doc_status, book_title_display, author_display, query_section]
562
- )
 
 
 
 
 
 
 
 
 
 
563
 
564
  ask_button.click(
565
  process_query,
 
31
  'MAX_AUDIO_DURATION': 120, # 2 minutes
32
  'GROQ_API_KEY': os.getenv('GAPI'),
33
  'AUDIO_CLIP_DURATION': 10, # First 10 seconds only
34
+ 'BOOK_THUMBNAILS_DIR': './book_thumbnails',
35
+ 'OCR_BOOKS_DIR': './ocr_books',
36
  }
37
 
38
  # Global session storage
 
431
  })
432
  return "✅ नया सत्र शुरू किया गया!", "", "", gr.update(visible=False), "प्रश्न: 0/5"
433
 
434
+ # Book management functions
435
+ def get_available_books():
436
+ """Get list of available books with their thumbnails and text files"""
437
+ books = []
438
+
439
+ try:
440
+ # Get all image files from thumbnails directory
441
+ thumbnail_dir = CONFIG['BOOK_THUMBNAILS_DIR']
442
+ ocr_dir = CONFIG['OCR_BOOKS_DIR']
443
+
444
+ if os.path.exists(thumbnail_dir):
445
+ thumbnail_files = [f for f in os.listdir(thumbnail_dir)
446
+ if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
447
+ else:
448
+ thumbnail_files = []
449
+
450
+ # Get all text files from OCR directory
451
+ if os.path.exists(ocr_dir):
452
+ text_files = [f for f in os.listdir(ocr_dir)
453
+ if f.lower().endswith('.txt')]
454
+ else:
455
+ text_files = []
456
+
457
+ # Create book entries
458
+ for text_file in text_files:
459
+ book_name = os.path.splitext(text_file)[0]
460
+
461
+ # Look for matching thumbnail
462
+ thumbnail_path = None
463
+ for thumb_file in thumbnail_files:
464
+ thumb_name = os.path.splitext(thumb_file)[0]
465
+ if thumb_name.lower() == book_name.lower():
466
+ thumbnail_path = os.path.join(thumbnail_dir, thumb_file)
467
+ break
468
+
469
+ # If no matching thumbnail found, use a default placeholder
470
+ if not thumbnail_path:
471
+ # Create a simple text-based placeholder
472
+ placeholder_path = create_text_placeholder(book_name)
473
+ thumbnail_path = placeholder_path
474
+
475
+ books.append({
476
+ 'name': book_name,
477
+ 'display_name': book_name.replace('_', ' ').title(),
478
+ 'text_file': os.path.join(ocr_dir, text_file),
479
+ 'thumbnail': thumbnail_path
480
+ })
481
+
482
+ return books
483
+
484
+ except Exception as e:
485
+ print(f"Error getting available books: {str(e)}")
486
+ return []
487
+
488
+ def create_text_placeholder(book_name):
489
+ """Create a simple text placeholder image for books without thumbnails"""
490
+ try:
491
+ import matplotlib.pyplot as plt
492
+ import matplotlib.patches as patches
493
+
494
+ # Create a simple text-based image
495
+ fig, ax = plt.subplots(1, 1, figsize=(3, 4))
496
+ ax.set_xlim(0, 1)
497
+ ax.set_ylim(0, 1)
498
+ ax.axis('off')
499
+
500
+ # Add background
501
+ rect = patches.Rectangle((0, 0), 1, 1, linewidth=2, edgecolor='#2E86AB', facecolor='#E8F4FD')
502
+ ax.add_patch(rect)
503
+
504
+ # Add text
505
+ ax.text(0.5, 0.5, book_name.replace('_', '\n'),
506
+ ha='center', va='center', fontsize=10, weight='bold', color='#2E86AB')
507
+
508
+ # Save to temporary file
509
+ placeholder_path = os.path.join(tempfile.gettempdir(), f"{book_name}_placeholder.png")
510
+ plt.savefig(placeholder_path, dpi=100, bbox_inches='tight')
511
+ plt.close()
512
+
513
+ return placeholder_path
514
+
515
+ except Exception as e:
516
+ print(f"Error creating placeholder: {str(e)}")
517
+ return None
518
+
519
+ def load_book_text(book_info):
520
+ """Load text content from a pre-existing book"""
521
+ try:
522
+ with open(book_info['text_file'], 'r', encoding='utf-8') as file:
523
+ content = file.read()
524
+
525
+ if not content.strip():
526
+ return "Error: Empty text file"
527
+
528
+ return content
529
+
530
+ except Exception as e:
531
+ return f"Error loading book text: {str(e)}"
532
+
533
+ def process_selected_book(selected_book_name):
534
+ """Process a pre-selected book"""
535
+ if not selected_book_name or selected_book_name == "None":
536
+ return "कृपया एक पुस्तक चुनें।", "", "", gr.update(visible=False)
537
+
538
+ try:
539
+ # Get available books
540
+ available_books = get_available_books()
541
+
542
+ # Find the selected book
543
+ selected_book = None
544
+ for book in available_books:
545
+ if book['name'] == selected_book_name:
546
+ selected_book = book
547
+ break
548
+
549
+ if not selected_book:
550
+ return "चुनी गई पुस्तक नहीं मिली।", "", "", gr.update(visible=False)
551
+
552
+ # Load text content
553
+ text_content = load_book_text(selected_book)
554
+
555
+ if not text_content.strip() or "Error" in text_content:
556
+ return text_content, "", "", gr.update(visible=False)
557
+
558
+ # Extract metadata (use book name if no metadata found in text)
559
+ author_name, book_title = extract_metadata(text_content)
560
+
561
+ # If metadata extraction didn't work well, use the book name
562
+ if author_name == "अज्ञात लेखक":
563
+ author_name = "संग्रहित पुस्तक"
564
+ if book_title == "अनाम पुस्तक":
565
+ book_title = selected_book['display_name']
566
+
567
+ SESSION_DATA['author_name'] = author_name
568
+ SESSION_DATA['book_title'] = book_title
569
+
570
+ # Create chunks
571
+ chunks = chunk_text(text_content)
572
+ SESSION_DATA['document_chunks'] = chunks
573
+
574
+ # Create embeddings and index
575
+ print("Creating embeddings and search index for selected book...")
576
+ SESSION_DATA['faiss_index'] = create_embeddings(chunks)
577
+
578
+ # Reset query count
579
+ SESSION_DATA['query_count'] = 0
580
+
581
+ # Calculate statistics
582
+ word_count = len(text_content.split())
583
+ char_count = len(text_content)
584
+
585
+ success_msg = f"""✅ पुस्तक सफलतापूर्वक लोड की गई!
586
+
587
+ 📖 पुस्तक: {book_title}
588
+ ✍️ लेखक: {author_name}
589
+ 📄 टेक्स्ट खंड: {len(chunks)}
590
+ 📊 शब्द संख्या: {word_count:,}
591
+ 📝 अक्षर संख्या: {char_count:,}
592
+
593
+ अब आप प्रश्न पूछ सकते हैं।"""
594
+
595
+ return success_msg, book_title, author_name, gr.update(visible=True)
596
+
597
+ except Exception as e:
598
+ return f"पुस्तक लोड करने में त्रुटि: {str(e)}", "", "", gr.update(visible=False)
599
+
600
+ def create_book_gallery():
601
+ """Create a gallery of available books with thumbnails"""
602
+ available_books = get_available_books()
603
+
604
+ if not available_books:
605
+ return [], "कोई पुस्तक उपलब्ध नहीं है।"
606
+
607
+ # Create gallery data: list of (image_path, title) tuples
608
+ gallery_data = []
609
+ book_names = ["None"] # Add None option
610
+
611
+ for book in available_books:
612
+ if book['thumbnail'] and os.path.exists(book['thumbnail']):
613
+ gallery_data.append((book['thumbnail'], book['display_name']))
614
+ book_names.append(book['name'])
615
+
616
+ return gallery_data, book_names
617
+
618
  # Create Gradio interface
619
  def create_interface():
620
  """Create the Gradio interface"""
 
665
  interactive=False
666
  )
667
 
668
+ # Document selection/upload section
669
+ gr.Markdown("### 📁 Step 1: Choose Your Book / अपनी पुस्तक चुनें")
 
670
 
671
+ # Book selection section
672
+ with gr.Tab("📚 Select from Library / पुस्तकालय से चुनें"):
673
+ gr.Markdown("**Choose from available books / उपलब्ध पुस्तकों में से चुनें**")
674
+
675
+ # Initialize book gallery and dropdown
676
+ available_books = get_available_books()
677
+ gallery_data, book_options = create_book_gallery()
678
+
679
+ if available_books:
680
+ book_gallery = gr.Gallery(
681
+ value=gallery_data,
682
+ label="Available Books / उपलब्ध पुस्तकें",
683
+ show_label=True,
684
+ elem_id="book_gallery",
685
+ columns=3,
686
+ rows=2,
687
+ height="auto",
688
+ allow_preview=True
689
+ )
690
+
691
+ book_dropdown = gr.Dropdown(
692
+ choices=book_options,
693
+ label="Select Book / पुस्तक चुनें",
694
+ value="None",
695
+ interactive=True
696
+ )
697
+
698
+ select_book_btn = gr.Button("📖 Load Selected Book / चुनी गई पुस्तक लोड करें", variant="primary")
699
+ else:
700
+ gr.Markdown("⚠️ No books available in library / पुस्तकालय में कोई पुस्तक उपलब्ध नहीं है")
701
+ book_dropdown = gr.Dropdown(choices=["None"], value="None", visible=False)
702
+ select_book_btn = gr.Button("No books available", interactive=False)
703
+
704
+ # PDF upload section
705
+ with gr.Tab("📄 Upload PDF / PDF ��पलोड करें"):
706
+ gr.Markdown("**Upload your own PDF / अपनी PDF अपलोड करें**")
707
+ gr.Markdown("**Note:** Please ensure your PDF contains selectable text (not scanned images)")
708
+
709
  pdf_upload = gr.File(
710
  label="Upload PDF / PDF अपलोड करें",
711
  file_types=[".pdf"],
712
  type="filepath"
713
  )
714
+ process_pdf_btn = gr.Button("📖 Process PDF / PDF प्रसंस्करित करें", variant="primary")
715
 
716
  doc_status = gr.Textbox(label="Processing Status / प्रसंस्करण स्थिति", interactive=False)
717
 
 
777
  outputs=[auth_section, main_section, auth_status]
778
  )
779
 
780
+ # Book selection event handler
781
+ if 'select_book_btn' in locals():
782
+ select_book_btn.click(
783
+ process_selected_book,
784
+ inputs=[book_dropdown],
785
+ outputs=[doc_status, book_title_display, author_display, query_section]
786
+ )
787
+
788
+ # PDF upload event handler
789
+ if 'process_pdf_btn' in locals():
790
+ process_pdf_btn.click(
791
+ process_document,
792
+ inputs=[pdf_upload],
793
+ outputs=[doc_status, book_title_display, author_display, query_section]
794
+ )
795
 
796
  ask_button.click(
797
  process_query,