msmaje commited on
Commit
001ba5f
Β·
verified Β·
1 Parent(s): 9ce27e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +727 -303
app.py CHANGED
@@ -475,256 +475,619 @@ def answer_question(question):
475
  logger.error(f"Fallback error: {fallback_error}")
476
  return f"❌ Error answering question: {str(e)}", ""
477
 
478
- def get_device_info():
479
- """Simple function to detect if mobile (basic detection)"""
480
- return """
481
- <script>
482
- function isMobile() {
483
- return window.innerWidth <= 768;
484
- }
485
-
486
- function adjustLayout() {
487
- const isMob = isMobile();
488
- const root = document.documentElement;
489
- if (isMob) {
490
- root.style.setProperty('--mobile-mode', '1');
491
- } else {
492
- root.style.setProperty('--mobile-mode', '0');
493
- }
494
- }
495
-
496
- window.addEventListener('resize', adjustLayout);
497
- adjustLayout();
498
- </script>
499
- """
500
-
501
  def create_interface():
502
  """Create the fully responsive Gradio interface"""
503
 
504
- # Custom CSS for better responsiveness
505
  custom_css = """
506
- /* Base responsive styles */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  .gradio-container {
508
  max-width: 100% !important;
509
- margin: 0 auto;
510
- padding: 10px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  }
 
 
512
 
513
- /* Mobile-first responsive design */
514
- @media (max-width: 768px) {
515
  .gradio-container {
516
- padding: 5px;
517
  }
518
 
519
- /* Stack elements vertically on mobile */
520
  .gr-row {
521
  flex-direction: column !important;
522
- gap: 10px !important;
523
  }
524
 
525
- /* Full width on mobile */
526
  .gr-column {
527
  width: 100% !important;
528
- min-width: 100% !important;
529
  }
530
 
531
- /* Adjust component spacing */
532
- .gr-form > * {
533
- margin-bottom: 8px !important;
 
534
  }
535
 
536
- /* Better button sizing */
 
 
 
 
 
 
 
537
  .gr-button {
538
  width: 100% !important;
539
- min-height: 44px !important;
540
- font-size: 14px !important;
 
 
 
541
  }
542
 
543
- /* Text input improvements */
544
- .gr-textbox textarea {
545
- min-height: 60px !important;
546
  font-size: 16px !important; /* Prevents zoom on iOS */
 
 
 
547
  }
548
 
549
- /* File upload improvements */
550
  .gr-file {
551
- min-height: 100px !important;
 
 
 
 
 
 
 
 
 
 
552
  }
553
 
554
  /* Slider improvements */
555
  .gr-slider {
556
- margin: 10px 0 !important;
557
  }
558
 
559
- /* Tab improvements */
560
- .gr-tab-nav {
561
- flex-wrap: wrap !important;
562
  }
563
 
564
- .gr-tab-nav > button {
565
- flex: 1 1 auto !important;
566
- min-width: 80px !important;
567
- font-size: 12px !important;
568
  }
569
  }
570
-
571
- /* Tablet styles */
572
- @media (min-width: 769px) and (max-width: 1024px) {
573
  .gradio-container {
574
- padding: 15px;
 
 
 
 
575
  }
576
 
577
  .gr-button {
578
- min-height: 40px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
  }
580
  }
581
-
582
- /* Desktop styles */
583
  @media (min-width: 1025px) {
584
  .gradio-container {
585
- max-width: 1400px;
586
- padding: 20px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
  }
588
  }
589
-
590
- /* Improve readability */
591
  .gr-markdown h1 {
592
  font-size: clamp(1.5rem, 4vw, 2.5rem) !important;
 
593
  line-height: 1.2 !important;
594
  margin-bottom: 1rem !important;
 
595
  }
596
-
 
 
 
 
 
 
 
 
597
  .gr-markdown h3 {
598
- font-size: clamp(1.1rem, 3vw, 1.4rem) !important;
599
- margin: 1rem 0 0.5rem 0 !important;
 
 
 
600
  }
601
-
602
- .gr-markdown p, .gr-markdown li {
603
- font-size: clamp(0.9rem, 2.5vw, 1rem) !important;
604
- line-height: 1.5 !important;
 
 
 
605
  }
606
-
607
- /* Status text improvements */
608
- .gr-textbox[data-testid="textbox"] {
609
- font-family: monospace !important;
610
- font-size: clamp(0.8rem, 2vw, 0.9rem) !important;
 
 
 
 
 
 
611
  }
612
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  /* Accessibility improvements */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
614
  .gr-button:focus,
615
- .gr-textbox:focus,
 
616
  .gr-file:focus {
617
- outline: 2px solid #2563eb !important;
618
  outline-offset: 2px !important;
619
  }
620
-
621
- /* Dark mode considerations */
622
- @media (prefers-color-scheme: dark) {
623
- .gr-button {
624
- border: 1px solid #374151 !important;
625
- }
 
 
 
626
  }
627
  """
628
 
629
- with gr.Blocks(
630
- title="PDF RAG System",
631
- theme=gr.themes.Soft(),
632
- css=custom_css
633
- ) as demo:
634
-
635
- # Add device detection script
636
- gr.HTML(get_device_info())
637
 
 
638
  gr.Markdown("""
639
- # πŸ“š PDF Question Answering System
640
 
641
- Upload your PDF documents and ask questions about their content!
642
 
643
- **Quick Start:**
644
- 1. Upload PDFs or use pre-loaded ones
645
- 2. Click Process to prepare your documents
646
- 3. Ask questions about the content
 
647
  """)
648
 
649
- # Check for pre-loaded PDFs
650
- if PRELOADED_PDFS:
651
- gr.Markdown("""
652
- <div style="background: linear-gradient(90deg, #10b981, #059669);
653
- color: white; padding: 12px; border-radius: 8px; margin: 10px 0;">
654
- πŸŽ‰ <strong>Pre-loaded PDFs detected!</strong> Use the 'Load Pre-existing PDFs' button to get started quickly.
655
- </div>
656
- """)
657
-
658
- # Main layout - responsive columns
659
  with gr.Row():
660
- # Left column - Upload & Settings (collapses to full width on mobile)
661
- with gr.Column(scale=1, min_width=300):
662
- gr.Markdown("### πŸ“„ Document Management")
663
 
664
- with gr.Tabs():
665
- with gr.TabItem("πŸ“ Upload PDFs"):
666
- pdf_files = gr.File(
667
- label="Select PDF Files",
668
- file_count="multiple",
669
- file_types=[".pdf"],
670
- height=120
671
- )
672
- process_btn = gr.Button(
673
- "πŸ”„ Process PDFs",
674
- variant="primary",
675
- size="lg"
676
- )
677
 
678
- with gr.TabItem("πŸ—‚οΈ ZIP Upload"):
679
- zip_file = gr.File(
680
- label="Upload ZIP (with PDFs)",
681
- file_count="single",
682
- file_types=[".zip"],
683
- height=80
684
- )
685
- extract_btn = gr.Button(
686
- "πŸ“¦ Extract ZIP",
687
- variant="secondary",
688
- size="lg"
689
- )
690
- extract_output = gr.Textbox(
691
- label="Extraction Status",
692
- lines=2,
693
- max_lines=3
694
- )
695
 
696
- with gr.TabItem("πŸ’Ύ Pre-loaded"):
697
- if PRELOADED_PDFS:
698
- pdf_list = [f for f in os.listdir(PDF_FOLDER_PATH) if f.endswith('.pdf')]
699
- gr.Markdown(f"**Found {len(pdf_list)} PDF files**")
700
-
701
- # Show files in a more mobile-friendly way
702
- if len(pdf_list) <= 5:
703
- for pdf in pdf_list:
704
- gr.Markdown(f"πŸ“„ {pdf}")
705
- else:
706
- for pdf in pdf_list[:3]:
707
- gr.Markdown(f"πŸ“„ {pdf}")
708
- gr.Markdown(f"*... and {len(pdf_list) - 3} more files*")
709
- else:
710
- gr.Markdown("No pre-loaded PDFs found.")
711
-
712
- preload_btn = gr.Button(
713
- "πŸ“š Load Pre-existing PDFs",
714
- variant="primary",
715
- size="lg",
716
- interactive=PRELOADED_PDFS
717
- )
 
 
 
 
 
 
 
 
 
 
 
 
718
 
719
- # Settings section - collapsible on mobile
720
- with gr.Accordion("βš™οΈ Advanced Settings", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
  chunk_size = gr.Slider(
722
- minimum=200,
723
  maximum=2000,
724
  value=1000,
725
  step=100,
726
  label="Chunk Size",
727
- info="Larger chunks = more context, smaller = more precise"
728
  )
729
 
730
  chunk_overlap = gr.Slider(
@@ -733,176 +1096,237 @@ def create_interface():
733
  value=200,
734
  step=50,
735
  label="Chunk Overlap",
736
- info="Overlap between text chunks"
737
  )
738
 
739
- # Status display
740
- process_output = gr.Textbox(
741
- label="πŸ“Š Processing Status",
742
- lines=3,
743
- max_lines=5,
744
- placeholder="Status updates will appear here..."
745
- )
746
-
747
- # Right column - Q&A Section (collapses to full width on mobile)
748
- with gr.Column(scale=2, min_width=400):
749
- gr.Markdown("### ❓ Ask Questions")
750
-
751
- question_input = gr.Textbox(
752
- label="Your Question",
753
- placeholder="What would you like to know about your documents?",
754
- lines=2,
755
- max_lines=4
756
  )
757
 
758
- ask_btn = gr.Button(
759
- "πŸ€” Ask Question",
760
- variant="secondary",
761
- size="lg"
 
762
  )
 
 
 
763
 
764
- # Results section - stack vertically on mobile
765
- with gr.Row():
766
- answer_output = gr.Textbox(
767
- label="πŸ’‘ Answer",
768
- lines=6,
769
- max_lines=12,
770
- placeholder="Your answer will appear here..."
 
 
 
 
 
 
 
771
  )
772
 
773
- sources_output = gr.Textbox(
774
- label="πŸ“š Sources",
775
- lines=6,
776
- max_lines=12,
777
- placeholder="Source references will appear here..."
778
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
779
 
780
- # Event handlers (unchanged)
781
- process_btn.click(
782
- fn=process_pdfs,
783
- inputs=[pdf_files, chunk_size, chunk_overlap],
784
- outputs=[process_output]
785
- )
 
 
 
786
 
 
 
 
 
787
  preload_btn.click(
788
- fn=load_preloaded_pdfs,
789
- inputs=[chunk_size, chunk_overlap],
790
- outputs=[process_output]
791
  )
792
 
793
- extract_btn.click(
794
- fn=extract_zip_to_pdfs,
795
- inputs=[zip_file],
796
- outputs=[extract_output]
 
 
 
 
 
 
797
  )
798
 
799
  ask_btn.click(
800
- fn=answer_question,
801
- inputs=[question_input],
802
  outputs=[answer_output, sources_output]
803
  )
804
 
 
805
  question_input.submit(
806
- fn=answer_question,
807
- inputs=[question_input],
808
  outputs=[answer_output, sources_output]
809
  )
810
 
811
- # Example questions - more mobile-friendly
812
- with gr.Accordion("πŸ’‘ Example Questions", open=False):
813
- gr.Markdown("""
814
- **Try asking:**
815
- - What are the main topics in these documents?
816
- - Can you summarize the key findings?
817
- - What data is available for [specific topic]?
818
- - What are the differences between X and Y?
819
- """)
820
-
821
- # Footer with helpful info
822
  gr.Markdown("""
823
  ---
824
- <div style="text-align: center; color: #666; font-size: 0.9em;">
825
- πŸ’‘ <strong>Tips:</strong> Upload multiple PDFs β€’ Use specific questions β€’ Check sources for accuracy<br>
826
- πŸ”§ <strong>Powered by:</strong> LangChain β€’ HuggingFace β€’ FAISS β€’ Gradio
827
- </div>
828
  """)
829
 
830
- return demo
831
-
832
- # Check if environment is properly configured
833
- def check_environment():
834
- """Check if the environment is properly configured"""
835
- issues = []
836
-
837
- if not LANGCHAIN_AVAILABLE:
838
- issues.append("❌ LangChain not available - please install: pip install langchain langchain-community")
839
-
840
- if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
841
- issues.append("❌ HuggingFace API token not found - set HUGGINGFACEHUB_API_TOKEN environment variable")
842
-
843
- return issues
844
 
845
  # Main execution
846
  if __name__ == "__main__":
847
- print("πŸš€ Starting PDF RAG System...")
 
848
 
849
- # Check environment
850
- env_issues = check_environment()
851
- if env_issues:
852
- print("\n⚠️ Environment Issues Detected:")
853
- for issue in env_issues:
854
- print(f" {issue}")
855
- print("\nπŸ“‹ Setup Instructions:")
856
- print(" 1. Install dependencies: pip install langchain langchain-community sentence-transformers faiss-cpu PyPDF2 gradio")
857
- print(" 2. Get HuggingFace token: https://huggingface.co/settings/tokens")
858
- print(" 3. Set environment variable: export HUGGINGFACEHUB_API_TOKEN=your_token_here")
859
- print(" 4. Restart the application")
860
- print("\nπŸ”„ Continuing with limited functionality...\n")
861
 
862
- # Initialize models on startup
863
- if LANGCHAIN_AVAILABLE:
864
- print("πŸ”§ Initializing models...")
865
- success, message = initialize_models()
866
- print(f" {message}")
867
-
868
- # Check for pre-loaded PDFs
869
- if PRELOADED_PDFS:
870
- pdf_count = len([f for f in os.listdir(PDF_FOLDER_PATH) if f.endswith('.pdf')])
871
- print(f"πŸ“š Found {pdf_count} pre-loaded PDF files in ./pdfs/")
872
 
 
873
  try:
874
- # Create and launch the interface
875
- demo = create_interface()
876
-
877
- print("🌐 Launching web interface...")
878
- print(" πŸ“± Mobile-optimized interface")
879
- print(" πŸ–₯️ Desktop and tablet supported")
880
- print(" πŸ”— Access the app in your browser")
881
-
882
- # Launch with configuration for different environments
883
- demo.launch(
884
- share=False, # Set to True if you want a public shareable link
885
- server_name="0.0.0.0", # Allow external access
886
- server_port=7860, # Default Gradio port
887
- inbrowser=True, # Auto-open browser
888
- show_error=True, # Show detailed errors
889
- quiet=False # Show startup logs
 
 
 
 
 
890
  )
891
-
892
  except Exception as e:
 
893
  print(f"❌ Failed to launch interface: {e}")
894
- print("πŸ”§ Try these troubleshooting steps:")
895
- print(" 1. Check if port 7860 is available")
896
- print(" 2. Install Gradio: pip install gradio")
897
- print(" 3. Check firewall settings")
898
- print(" 4. Try running with: python app.py")
899
-
900
- except KeyboardInterrupt:
901
- print("\nπŸ‘‹ Shutting down PDF RAG System...")
902
- print(" Thank you for using the application!")
903
-
904
- finally:
905
- # Cleanup
906
- if 'vectorstore' in globals() and vectorstore is not None:
907
- print("🧹 Cleaning up resources...")
908
- print("βœ… Shutdown complete.")
 
475
  logger.error(f"Fallback error: {fallback_error}")
476
  return f"❌ Error answering question: {str(e)}", ""
477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  def create_interface():
479
  """Create the fully responsive Gradio interface"""
480
 
481
+ # Enhanced CSS for comprehensive responsiveness
482
  custom_css = """
483
+ /* CSS Variables for consistent theming */
484
+ :root {
485
+ --primary-color: #2563eb;
486
+ --secondary-color: #10b981;
487
+ --accent-color: #f59e0b;
488
+ --text-primary: #1f2937;
489
+ --text-secondary: #6b7280;
490
+ --bg-primary: #ffffff;
491
+ --bg-secondary: #f9fafb;
492
+ --border-color: #e5e7eb;
493
+ --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
494
+ --shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
495
+ --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
496
+ --radius-sm: 0.375rem;
497
+ --radius-md: 0.5rem;
498
+ --radius-lg: 0.75rem;
499
+ }
500
+
501
+ /* Dark mode support */
502
+ @media (prefers-color-scheme: dark) {
503
+ :root {
504
+ --text-primary: #f9fafb;
505
+ --text-secondary: #d1d5db;
506
+ --bg-primary: #1f2937;
507
+ --bg-secondary: #111827;
508
+ --border-color: #374151;
509
+ }
510
+ }
511
+
512
+ /* Base container improvements */
513
  .gradio-container {
514
  max-width: 100% !important;
515
+ margin: 0 auto !important;
516
+ padding: clamp(0.5rem, 2vw, 1.5rem) !important;
517
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important;
518
+ }
519
+
520
+ /* Responsive grid system */
521
+ .gr-row {
522
+ display: flex !important;
523
+ flex-wrap: wrap !important;
524
+ gap: clamp(0.75rem, 2vw, 1.5rem) !important;
525
+ margin-bottom: clamp(0.75rem, 2vw, 1.5rem) !important;
526
+ }
527
+
528
+ .gr-column {
529
+ flex: 1 1 auto !important;
530
+ min-width: 0 !important;
531
  }
532
+
533
+ /* Mobile-first responsive breakpoints */
534
 
535
+ /* Small devices (phones, 320px and up) */
536
+ @media (max-width: 640px) {
537
  .gradio-container {
538
+ padding: 0.75rem !important;
539
  }
540
 
 
541
  .gr-row {
542
  flex-direction: column !important;
543
+ gap: 1rem !important;
544
  }
545
 
 
546
  .gr-column {
547
  width: 100% !important;
548
+ flex: none !important;
549
  }
550
 
551
+ /* Stack tabs vertically on very small screens */
552
+ .gr-tab-nav {
553
+ flex-direction: column !important;
554
+ gap: 0.25rem !important;
555
  }
556
 
557
+ .gr-tab-nav > button {
558
+ width: 100% !important;
559
+ text-align: left !important;
560
+ padding: 0.75rem 1rem !important;
561
+ font-size: 0.875rem !important;
562
+ }
563
+
564
+ /* Improve button sizes for touch */
565
  .gr-button {
566
  width: 100% !important;
567
+ min-height: 48px !important;
568
+ font-size: 0.875rem !important;
569
+ padding: 0.75rem 1rem !important;
570
+ border-radius: var(--radius-md) !important;
571
+ font-weight: 500 !important;
572
  }
573
 
574
+ /* Text inputs */
575
+ .gr-textbox textarea,
576
+ .gr-textbox input {
577
  font-size: 16px !important; /* Prevents zoom on iOS */
578
+ padding: 0.75rem !important;
579
+ border-radius: var(--radius-md) !important;
580
+ border: 1px solid var(--border-color) !important;
581
  }
582
 
583
+ /* File upload areas */
584
  .gr-file {
585
+ min-height: 120px !important;
586
+ padding: 1rem !important;
587
+ border: 2px dashed var(--border-color) !important;
588
+ border-radius: var(--radius-lg) !important;
589
+ text-align: center !important;
590
+ }
591
+
592
+ /* Accordion improvements */
593
+ .gr-accordion {
594
+ border-radius: var(--radius-md) !important;
595
+ border: 1px solid var(--border-color) !important;
596
  }
597
 
598
  /* Slider improvements */
599
  .gr-slider {
600
+ margin: 1rem 0 !important;
601
  }
602
 
603
+ .gr-slider input[type="range"] {
604
+ height: 32px !important;
 
605
  }
606
 
607
+ /* Form spacing */
608
+ .gr-form > * {
609
+ margin-bottom: 1rem !important;
 
610
  }
611
  }
612
+
613
+ /* Medium devices (tablets, 641px and up) */
614
+ @media (min-width: 641px) and (max-width: 1024px) {
615
  .gradio-container {
616
+ padding: 1.25rem !important;
617
+ }
618
+
619
+ .gr-row {
620
+ gap: 1.25rem !important;
621
  }
622
 
623
  .gr-button {
624
+ min-height: 44px !important;
625
+ padding: 0.625rem 1.25rem !important;
626
+ font-size: 0.875rem !important;
627
+ }
628
+
629
+ .gr-textbox textarea,
630
+ .gr-textbox input {
631
+ font-size: 15px !important;
632
+ padding: 0.625rem !important;
633
+ }
634
+
635
+ /* Two-column layout for medium screens */
636
+ .gr-column:first-child {
637
+ flex: 0 0 35% !important;
638
+ }
639
+
640
+ .gr-column:last-child {
641
+ flex: 0 0 60% !important;
642
  }
643
  }
644
+
645
+ /* Large devices (desktops, 1025px and up) */
646
  @media (min-width: 1025px) {
647
  .gradio-container {
648
+ max-width: 1400px !important;
649
+ padding: 2rem !important;
650
+ }
651
+
652
+ .gr-row {
653
+ gap: 2rem !important;
654
+ }
655
+
656
+ .gr-button {
657
+ min-height: 42px !important;
658
+ padding: 0.625rem 1.5rem !important;
659
+ font-size: 0.875rem !important;
660
+ }
661
+
662
+ .gr-textbox textarea,
663
+ .gr-textbox input {
664
+ font-size: 14px !important;
665
+ padding: 0.625rem !important;
666
+ }
667
+
668
+ /* Optimal desktop layout */
669
+ .gr-column:first-child {
670
+ flex: 0 0 400px !important;
671
+ }
672
+
673
+ .gr-column:last-child {
674
+ flex: 1 1 auto !important;
675
  }
676
  }
677
+
678
+ /* Typography improvements */
679
  .gr-markdown h1 {
680
  font-size: clamp(1.5rem, 4vw, 2.5rem) !important;
681
+ font-weight: 700 !important;
682
  line-height: 1.2 !important;
683
  margin-bottom: 1rem !important;
684
+ color: var(--text-primary) !important;
685
  }
686
+
687
+ .gr-markdown h2 {
688
+ font-size: clamp(1.25rem, 3vw, 1.875rem) !important;
689
+ font-weight: 600 !important;
690
+ line-height: 1.3 !important;
691
+ margin: 1.5rem 0 0.75rem 0 !important;
692
+ color: var(--text-primary) !important;
693
+ }
694
+
695
  .gr-markdown h3 {
696
+ font-size: clamp(1.125rem, 2.5vw, 1.5rem) !important;
697
+ font-weight: 600 !important;
698
+ line-height: 1.4 !important;
699
+ margin: 1.25rem 0 0.5rem 0 !important;
700
+ color: var(--text-primary) !important;
701
  }
702
+
703
+ .gr-markdown p,
704
+ .gr-markdown li {
705
+ font-size: clamp(0.875rem, 2vw, 1rem) !important;
706
+ line-height: 1.6 !important;
707
+ color: var(--text-secondary) !important;
708
+ margin-bottom: 0.75rem !important;
709
  }
710
+
711
+ /* Enhanced button styling */
712
+ .gr-button {
713
+ background: linear-gradient(135deg, var(--primary-color), #1d4ed8) !important;
714
+ color: white !important;
715
+ border: none !important;
716
+ border-radius: var(--radius-md) !important;
717
+ font-weight: 500 !important;
718
+ transition: all 0.2s ease !important;
719
+ cursor: pointer !important;
720
+ box-shadow: var(--shadow-sm) !important;
721
  }
722
+
723
+ .gr-button:hover {
724
+ background: linear-gradient(135deg, #1d4ed8, var(--primary-color)) !important;
725
+ transform: translateY(-1px) !important;
726
+ box-shadow: var(--shadow-md) !important;
727
+ }
728
+
729
+ .gr-button:active {
730
+ transform: translateY(0) !important;
731
+ box-shadow: var(--shadow-sm) !important;
732
+ }
733
+
734
+ /* Secondary button variant */
735
+ .gr-button[variant="secondary"] {
736
+ background: linear-gradient(135deg, var(--secondary-color), #059669) !important;
737
+ }
738
+
739
+ .gr-button[variant="secondary"]:hover {
740
+ background: linear-gradient(135deg, #059669, var(--secondary-color)) !important;
741
+ }
742
+
743
+ /* Tab styling improvements */
744
+ .gr-tab-nav {
745
+ background: var(--bg-secondary) !important;
746
+ border-radius: var(--radius-md) !important;
747
+ padding: 0.25rem !important;
748
+ margin-bottom: 1rem !important;
749
+ display: flex !important;
750
+ gap: 0.25rem !important;
751
+ }
752
+
753
+ .gr-tab-nav > button {
754
+ background: transparent !important;
755
+ border: none !important;
756
+ padding: 0.5rem 1rem !important;
757
+ border-radius: var(--radius-sm) !important;
758
+ font-weight: 500 !important;
759
+ color: var(--text-secondary) !important;
760
+ transition: all 0.2s ease !important;
761
+ flex: 1 1 auto !important;
762
+ }
763
+
764
+ .gr-tab-nav > button.selected {
765
+ background: var(--bg-primary) !important;
766
+ color: var(--text-primary) !important;
767
+ box-shadow: var(--shadow-sm) !important;
768
+ }
769
+
770
+ .gr-tab-nav > button:hover {
771
+ color: var(--text-primary) !important;
772
+ background: rgba(255, 255, 255, 0.5) !important;
773
+ }
774
+
775
+ /* Input and textarea improvements */
776
+ .gr-textbox textarea,
777
+ .gr-textbox input {
778
+ border: 1px solid var(--border-color) !important;
779
+ border-radius: var(--radius-md) !important;
780
+ background: var(--bg-primary) !important;
781
+ color: var(--text-primary) !important;
782
+ transition: border-color 0.2s ease !important;
783
+ resize: vertical !important;
784
+ }
785
+
786
+ .gr-textbox textarea:focus,
787
+ .gr-textbox input:focus {
788
+ border-color: var(--primary-color) !important;
789
+ outline: none !important;
790
+ box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.1) !important;
791
+ }
792
+
793
+ /* File upload styling */
794
+ .gr-file {
795
+ border: 2px dashed var(--border-color) !important;
796
+ border-radius: var(--radius-lg) !important;
797
+ background: var(--bg-secondary) !important;
798
+ padding: 2rem !important;
799
+ text-align: center !important;
800
+ transition: all 0.2s ease !important;
801
+ }
802
+
803
+ .gr-file:hover {
804
+ border-color: var(--primary-color) !important;
805
+ background: rgba(37, 99, 235, 0.05) !important;
806
+ }
807
+
808
+ /* Accordion styling */
809
+ .gr-accordion {
810
+ border: 1px solid var(--border-color) !important;
811
+ border-radius: var(--radius-md) !important;
812
+ background: var(--bg-primary) !important;
813
+ margin-bottom: 1rem !important;
814
+ }
815
+
816
+ .gr-accordion-header {
817
+ background: var(--bg-secondary) !important;
818
+ padding: 1rem !important;
819
+ font-weight: 600 !important;
820
+ color: var(--text-primary) !important;
821
+ border-bottom: 1px solid var(--border-color) !important;
822
+ }
823
+
824
+ /* Slider styling */
825
+ .gr-slider {
826
+ margin: 1rem 0 !important;
827
+ }
828
+
829
+ .gr-slider input[type="range"] {
830
+ appearance: none !important;
831
+ background: var(--bg-secondary) !important;
832
+ border-radius: var(--radius-lg) !important;
833
+ height: 8px !important;
834
+ }
835
+
836
+ .gr-slider input[type="range"]::-webkit-slider-thumb {
837
+ appearance: none !important;
838
+ width: 20px !important;
839
+ height: 20px !important;
840
+ border-radius: 50% !important;
841
+ background: var(--primary-color) !important;
842
+ cursor: pointer !important;
843
+ box-shadow: var(--shadow-sm) !important;
844
+ }
845
+
846
+ .gr-slider input[type="range"]::-moz-range-thumb {
847
+ width: 20px !important;
848
+ height: 20px !important;
849
+ border-radius: 50% !important;
850
+ background: var(--primary-color) !important;
851
+ cursor: pointer !important;
852
+ border: none !important;
853
+ box-shadow: var(--shadow-sm) !important;
854
+ }
855
+
856
+ /* Loading and status indicators */
857
+ .gr-loading {
858
+ display: flex !important;
859
+ align-items: center !important;
860
+ justify-content: center !important;
861
+ padding: 2rem !important;
862
+ color: var(--text-secondary) !important;
863
+ }
864
+
865
+ /* Scrollbar styling */
866
+ ::-webkit-scrollbar {
867
+ width: 8px !important;
868
+ height: 8px !important;
869
+ }
870
+
871
+ ::-webkit-scrollbar-track {
872
+ background: var(--bg-secondary) !important;
873
+ border-radius: var(--radius-sm) !important;
874
+ }
875
+
876
+ ::-webkit-scrollbar-thumb {
877
+ background: var(--border-color) !important;
878
+ border-radius: var(--radius-sm) !important;
879
+ }
880
+
881
+ ::-webkit-scrollbar-thumb:hover {
882
+ background: var(--text-secondary) !important;
883
+ }
884
+
885
+ /* Animation classes */
886
+ .fade-in {
887
+ animation: fadeIn 0.3s ease-in-out !important;
888
+ }
889
+
890
+ @keyframes fadeIn {
891
+ from { opacity: 0; transform: translateY(10px); }
892
+ to { opacity: 1; transform: translateY(0); }
893
+ }
894
+
895
  /* Accessibility improvements */
896
+ .gr-button:focus-visible,
897
+ .gr-textbox input:focus-visible,
898
+ .gr-textbox textarea:focus-visible {
899
+ outline: 2px solid var(--primary-color) !important;
900
+ outline-offset: 2px !important;
901
+ }
902
+
903
+ /* Print styles */
904
+ @media print {
905
+ .gr-button,
906
+ .gr-file,
907
+ .gr-slider {
908
+ display: none !important;
909
+ }
910
+
911
+ .gr-textbox textarea,
912
+ .gr-textbox input {
913
+ border: 1px solid #000 !important;
914
+ background: white !important;
915
+ }
916
+ }
917
+
918
+ /* High contrast mode support */
919
+ @media (prefers-contrast: high) {
920
+ :root {
921
+ --border-color: #000000;
922
+ --text-primary: #000000;
923
+ --text-secondary: #333333;
924
+ --bg-primary: #ffffff;
925
+ --bg-secondary: #f0f0f0;
926
+ }
927
+ }
928
+
929
+ /* Reduced motion support */
930
+ @media (prefers-reduced-motion: reduce) {
931
+ * {
932
+ animation-duration: 0.01ms !important;
933
+ animation-iteration-count: 1 !important;
934
+ transition-duration: 0.01ms !important;
935
+ }
936
+ }
937
+
938
+ /* Error and success states */
939
+ .gr-textbox.error textarea,
940
+ .gr-textbox.error input {
941
+ border-color: #ef4444 !important;
942
+ background: rgba(239, 68, 68, 0.05) !important;
943
+ }
944
+
945
+ .gr-textbox.success textarea,
946
+ .gr-textbox.success input {
947
+ border-color: var(--secondary-color) !important;
948
+ background: rgba(16, 185, 129, 0.05) !important;
949
+ }
950
+
951
+ /* Custom status messages */
952
+ .status-message {
953
+ padding: 0.75rem 1rem !important;
954
+ border-radius: var(--radius-md) !important;
955
+ margin: 0.5rem 0 !important;
956
+ font-size: 0.875rem !important;
957
+ font-weight: 500 !important;
958
+ }
959
+
960
+ .status-success {
961
+ background: rgba(16, 185, 129, 0.1) !important;
962
+ color: #059669 !important;
963
+ border: 1px solid rgba(16, 185, 129, 0.2) !important;
964
+ }
965
+
966
+ .status-error {
967
+ background: rgba(239, 68, 68, 0.1) !important;
968
+ color: #dc2626 !important;
969
+ border: 1px solid rgba(239, 68, 68, 0.2) !important;
970
+ }
971
+
972
+ .status-warning {
973
+ background: rgba(245, 158, 11, 0.1) !important;
974
+ color: #d97706 !important;
975
+ border: 1px solid rgba(245, 158, 11, 0.2) !important;
976
+ }
977
+
978
+ /* Enhanced focus styles for accessibility */
979
  .gr-button:focus,
980
+ .gr-textbox input:focus,
981
+ .gr-textbox textarea:focus,
982
  .gr-file:focus {
983
+ outline: 2px solid var(--primary-color) !important;
984
  outline-offset: 2px !important;
985
  }
986
+
987
+ /* Custom scrollable areas */
988
+ .scrollable-content {
989
+ max-height: 400px !important;
990
+ overflow-y: auto !important;
991
+ padding: 1rem !important;
992
+ background: var(--bg-secondary) !important;
993
+ border-radius: var(--radius-md) !important;
994
+ border: 1px solid var(--border-color) !important;
995
  }
996
  """
997
 
998
+ # Create the interface
999
+ with gr.Blocks(css=custom_css, title="πŸ“š RAG PDF Chat Interface", theme=gr.themes.Soft()) as interface:
 
 
 
 
 
 
1000
 
1001
+ # Header
1002
  gr.Markdown("""
1003
+ # πŸ“š RAG PDF Chat Interface
1004
 
1005
+ **Upload PDF documents and ask questions about their content using advanced AI**
1006
 
1007
+ This interface allows you to:
1008
+ - Upload PDF files or ZIP archives containing PDFs
1009
+ - Process documents using state-of-the-art text chunking and embedding techniques
1010
+ - Ask questions about your documents using natural language
1011
+ - Get accurate answers with source citations
1012
  """)
1013
 
1014
+ # Main interface layout
 
 
 
 
 
 
 
 
 
1015
  with gr.Row():
1016
+ # Left column - Controls
1017
+ with gr.Column(scale=1):
 
1018
 
1019
+ # Pre-loaded PDFs section
1020
+ with gr.Accordion("πŸ“ Pre-loaded PDFs", open=PRELOADED_PDFS):
1021
+ gr.Markdown("""
1022
+ **Option 1: Use pre-existing PDFs**
 
 
 
 
 
 
 
 
 
1023
 
1024
+ If you have PDFs in the `./pdfs` folder, click the button below to process them.
1025
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1026
 
1027
+ preload_btn = gr.Button(
1028
+ "πŸ”„ Load Pre-existing PDFs",
1029
+ variant="secondary",
1030
+ size="sm"
1031
+ )
1032
+ preload_status = gr.Textbox(
1033
+ label="Pre-load Status",
1034
+ interactive=False,
1035
+ lines=2
1036
+ )
1037
+
1038
+ # ZIP upload section
1039
+ with gr.Accordion("πŸ“¦ Upload ZIP Archive", open=False):
1040
+ gr.Markdown("""
1041
+ **Option 2: Upload ZIP containing PDFs**
1042
+
1043
+ Upload a ZIP file containing PDF documents. They will be extracted to the PDFs folder.
1044
+ """)
1045
+
1046
+ zip_file = gr.File(
1047
+ label="Upload ZIP Archive",
1048
+ file_types=[".zip"],
1049
+ file_count="single"
1050
+ )
1051
+ zip_btn = gr.Button(
1052
+ "πŸ“¦ Extract ZIP to PDFs",
1053
+ variant="secondary",
1054
+ size="sm"
1055
+ )
1056
+ zip_status = gr.Textbox(
1057
+ label="ZIP Status",
1058
+ interactive=False,
1059
+ lines=2
1060
+ )
1061
 
1062
+ # Direct PDF upload section
1063
+ with gr.Accordion("πŸ“„ Upload PDF Files", open=True):
1064
+ gr.Markdown("""
1065
+ **Option 3: Direct PDF upload**
1066
+
1067
+ Upload PDF files directly for processing.
1068
+ """)
1069
+
1070
+ pdf_files = gr.File(
1071
+ label="Upload PDF Files",
1072
+ file_types=[".pdf"],
1073
+ file_count="multiple"
1074
+ )
1075
+
1076
+ # Processing parameters
1077
+ with gr.Accordion("βš™οΈ Processing Parameters", open=False):
1078
+ gr.Markdown("""
1079
+ **Advanced Settings**
1080
+
1081
+ Adjust these parameters to optimize document processing for your specific needs.
1082
+ """)
1083
+
1084
  chunk_size = gr.Slider(
1085
+ minimum=500,
1086
  maximum=2000,
1087
  value=1000,
1088
  step=100,
1089
  label="Chunk Size",
1090
+ info="Size of text chunks for processing (larger = more context, smaller = more precise)"
1091
  )
1092
 
1093
  chunk_overlap = gr.Slider(
 
1096
  value=200,
1097
  step=50,
1098
  label="Chunk Overlap",
1099
+ info="Overlap between chunks (helps maintain context across boundaries)"
1100
  )
1101
 
1102
+ # Process button
1103
+ process_btn = gr.Button(
1104
+ "πŸš€ Process Documents",
1105
+ variant="primary",
1106
+ size="lg"
 
 
 
 
 
 
 
 
 
 
 
 
1107
  )
1108
 
1109
+ # Status display
1110
+ status_output = gr.Textbox(
1111
+ label="Processing Status",
1112
+ interactive=False,
1113
+ lines=4
1114
  )
1115
+
1116
+ # Right column - Chat interface
1117
+ with gr.Column(scale=2):
1118
 
1119
+ # Chat interface
1120
+ with gr.Tab("πŸ’¬ Chat with Documents"):
1121
+ gr.Markdown("""
1122
+ **Ask questions about your documents**
1123
+
1124
+ Once you've processed your PDFs, you can ask questions about their content.
1125
+ The AI will provide answers based on the information in your documents.
1126
+ """)
1127
+
1128
+ # Question input
1129
+ question_input = gr.Textbox(
1130
+ label="Ask a question about your documents",
1131
+ placeholder="e.g., What is the main topic discussed in the document?",
1132
+ lines=2
1133
  )
1134
 
1135
+ # Ask button
1136
+ ask_btn = gr.Button(
1137
+ "πŸ” Ask Question",
1138
+ variant="primary",
1139
+ size="lg"
1140
  )
1141
+
1142
+ # Answer display
1143
+ with gr.Row():
1144
+ with gr.Column():
1145
+ answer_output = gr.Textbox(
1146
+ label="Answer",
1147
+ interactive=False,
1148
+ lines=8
1149
+ )
1150
+
1151
+ with gr.Column():
1152
+ sources_output = gr.Textbox(
1153
+ label="Sources & References",
1154
+ interactive=False,
1155
+ lines=8
1156
+ )
1157
+
1158
+ # Help tab
1159
+ with gr.Tab("❓ Help & Tips"):
1160
+ gr.Markdown("""
1161
+ ## πŸ”§ How to Use This Interface
1162
+
1163
+ ### Step 1: Upload Documents
1164
+ Choose one of three options:
1165
+ - **Pre-loaded PDFs**: Use documents already in the `./pdfs` folder
1166
+ - **ZIP Archive**: Upload a ZIP file containing multiple PDFs
1167
+ - **Direct Upload**: Upload PDF files directly
1168
+
1169
+ ### Step 2: Process Documents
1170
+ Click "Process Documents" to:
1171
+ - Extract text from PDFs
1172
+ - Split text into manageable chunks
1173
+ - Create embeddings for semantic search
1174
+ - Set up the question-answering system
1175
+
1176
+ ### Step 3: Ask Questions
1177
+ Once processing is complete, you can:
1178
+ - Ask specific questions about document content
1179
+ - Get answers with source citations
1180
+ - Explore different aspects of your documents
1181
+
1182
+ ## πŸ’‘ Tips for Better Results
1183
+
1184
+ ### Question Formatting
1185
+ - **Good**: "What are the main findings about climate change?"
1186
+ - **Better**: "What specific evidence does the document provide about climate change impacts?"
1187
+ - **Best**: "According to the research, what are the three most significant climate change impacts on agriculture?"
1188
+
1189
+ ### Document Preparation
1190
+ - Use high-quality, text-based PDFs (not scanned images)
1191
+ - Ensure documents are well-structured with clear headings
1192
+ - Remove unnecessary pages to improve processing speed
1193
+
1194
+ ### Processing Parameters
1195
+ - **Chunk Size**:
1196
+ - Larger (1500-2000): Better for broad context questions
1197
+ - Smaller (500-1000): Better for specific detail questions
1198
+ - **Chunk Overlap**:
1199
+ - More overlap (200-300): Better context continuity
1200
+ - Less overlap (0-100): Faster processing
1201
+
1202
+ ## 🚨 Troubleshooting
1203
+
1204
+ ### Common Issues
1205
+ - **"No documents loaded"**: Check PDF file format and quality
1206
+ - **"Model initialization failed"**: Verify HuggingFace token is set
1207
+ - **"Processing timeout"**: Try smaller chunk sizes or fewer documents
1208
+ - **"Empty answers"**: Rephrase questions or check document content
1209
+
1210
+ ### System Requirements
1211
+ - **HuggingFace Token**: Required for AI model access
1212
+ - **Memory**: At least 4GB RAM recommended for large documents
1213
+ - **Storage**: Sufficient space for temporary file processing
1214
+
1215
+ ## πŸ”’ Privacy & Security
1216
+
1217
+ - Documents are processed locally when possible
1218
+ - No document content is permanently stored
1219
+ - AI model queries may be sent to HuggingFace servers
1220
+ - Remove sensitive information before processing
1221
+
1222
+ ## πŸ“š Supported Features
1223
+
1224
+ - **File Types**: PDF documents only
1225
+ - **Languages**: Primarily English, limited support for other languages
1226
+ - **Document Size**: Up to 50MB per PDF recommended
1227
+ - **Concurrent Processing**: Multiple documents simultaneously
1228
+
1229
+ ---
1230
+
1231
+ *Need more help? Check the console output for detailed error messages and logs.*
1232
+ """)
1233
 
1234
+ # Event handlers
1235
+ def handle_preload():
1236
+ return load_preloaded_pdfs()
1237
+
1238
+ def handle_zip_extract(zip_file):
1239
+ return extract_zip_to_pdfs(zip_file)
1240
+
1241
+ def handle_process(pdf_files, chunk_size, chunk_overlap):
1242
+ return process_pdfs(pdf_files, chunk_size, chunk_overlap)
1243
 
1244
+ def handle_question(question):
1245
+ return answer_question(question)
1246
+
1247
+ # Connect event handlers
1248
  preload_btn.click(
1249
+ fn=handle_preload,
1250
+ outputs=preload_status
 
1251
  )
1252
 
1253
+ zip_btn.click(
1254
+ fn=handle_zip_extract,
1255
+ inputs=zip_file,
1256
+ outputs=zip_status
1257
+ )
1258
+
1259
+ process_btn.click(
1260
+ fn=handle_process,
1261
+ inputs=[pdf_files, chunk_size, chunk_overlap],
1262
+ outputs=status_output
1263
  )
1264
 
1265
  ask_btn.click(
1266
+ fn=handle_question,
1267
+ inputs=question_input,
1268
  outputs=[answer_output, sources_output]
1269
  )
1270
 
1271
+ # Enable Enter key for question input
1272
  question_input.submit(
1273
+ fn=handle_question,
1274
+ inputs=question_input,
1275
  outputs=[answer_output, sources_output]
1276
  )
1277
 
1278
+ # Add keyboard shortcuts info
 
 
 
 
 
 
 
 
 
 
1279
  gr.Markdown("""
1280
  ---
1281
+ **πŸ’‘ Keyboard Shortcuts**: Press Enter in the question box to ask your question quickly!
 
 
 
1282
  """)
1283
 
1284
+ return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
1285
 
1286
  # Main execution
1287
  if __name__ == "__main__":
1288
+ # Initialize the interface
1289
+ interface = create_interface()
1290
 
1291
+ # Check system status
1292
+ print("πŸ” System Status Check:")
1293
+ print(f"βœ… LangChain Available: {LANGCHAIN_AVAILABLE}")
1294
+ print(f"βœ… HuggingFace Hub Available: {HUGGINGFACE_HUB_AVAILABLE}")
1295
+ print(f"βœ… Pre-loaded PDFs: {PRELOADED_PDFS}")
1296
+ print(f"βœ… PDF Folder: {PDF_FOLDER_PATH}")
 
 
 
 
 
 
1297
 
1298
+ # Check for HuggingFace token
1299
+ hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
1300
+ if hf_token:
1301
+ print("βœ… HuggingFace API Token: Found")
1302
+ else:
1303
+ print("❌ HuggingFace API Token: Not found - Please set HUGGINGFACEHUB_API_TOKEN environment variable")
 
 
 
 
1304
 
1305
+ # Launch the interface
1306
  try:
1307
+ interface.launch(
1308
+ server_name="0.0.0.0",
1309
+ server_port=7860,
1310
+ share=False,
1311
+ debug=False,
1312
+ show_error=True,
1313
+ show_tips=True,
1314
+ enable_queue=True,
1315
+ max_threads=10,
1316
+ auth=None, # Add authentication if needed
1317
+ favicon_path=None,
1318
+ ssl_keyfile=None,
1319
+ ssl_certfile=None,
1320
+ ssl_keyfile_password=None,
1321
+ height=800,
1322
+ width="100%",
1323
+ prevent_thread_lock=False,
1324
+ allowed_paths=[PDF_FOLDER_PATH],
1325
+ blocked_paths=None,
1326
+ root_path=None,
1327
+ app_kwargs=None
1328
  )
 
1329
  except Exception as e:
1330
+ logger.error(f"Failed to launch interface: {e}")
1331
  print(f"❌ Failed to launch interface: {e}")
1332
+ print("πŸ”§ Try running with: python your_script.py")