Spaces:

msmaje
/

ragmodel

Sleeping

App Files Files Community

msmaje commited on Jul 9, 2025

Commit

8442587

verified ·

1 Parent(s): 684c9d1

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -447

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import zipfile
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 try:
     from langchain_community.document_loaders import PyPDFDirectoryLoader, PyPDFLoader
     from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -31,7 +32,7 @@ try:
         except ImportError:
             try:
                 from langchain_huggingface import HuggingFaceEndpoint
-                HUGGINGFACE_HUB_AVAILABLE = False
                 logger.info("Using HuggingFaceEndpoint as fallback")
             except ImportError:
                 logger.error("No suitable HuggingFace LLM implementation found")
@@ -69,7 +70,7 @@ def initialize_models():
         # Get HuggingFace token from environment
         hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not hf_token:
-            return False, "❌ HuggingFace API token not found in environment variables"
         return True, "✅ Models initialized successfully"
@@ -82,7 +83,7 @@ def create_llm():
     hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
     if not hf_token:
-        logger.error("HuggingFace API token not found")
         return create_fallback_llm()
     try:
@@ -116,7 +117,7 @@ def create_llm():
                     logger.warning(f"Failed to initialize {model_id} with HuggingFaceHub: {model_error}")
                     continue
-        # Fallback to HuggingFaceEndpoint if HuggingFaceHub is not available
         try:
             from langchain_huggingface import HuggingFaceEndpoint
@@ -147,10 +148,10 @@ def create_llm():
                     logger.warning(f"Failed to initialize {model_id} with HuggingFaceEndpoint: {model_error}")
                     continue
         except ImportError:
-            pass
         # If all else fails, return fallback
-        raise Exception("All model initialization attempts failed")
     except Exception as e:
         logger.error(f"LLM creation error: {e}")
@@ -195,7 +196,7 @@ def create_fallback_llm():
             def invoke(self, prompt):
                 return "System temporarily unavailable. Please try again later."
-            def __call__(self, prompt):
                 return self.invoke(prompt)
         return SimpleFallback()
@@ -222,7 +223,7 @@ def load_preloaded_pdfs(chunk_size=1000, chunk_overlap=200):
         documents = loader.load()
         if not documents:
-            return "❌ No documents were loaded from the PDFs folder."
         # Split documents into chunks
         text_splitter = RecursiveCharacterTextSplitter(
@@ -269,12 +270,12 @@ Helpful Answer:
                 test_result = retrieval_qa({"query": "test"})
                 logger.info("QA chain test successful")
             except Exception as test_error:
-                logger.warning(f"QA chain test failed: {test_error}")
                 # Chain created but might have issues - continue anyway
         except Exception as chain_error:
             logger.error(f"Chain creation error: {chain_error}")
-            return f"❌ Error creating QA chain: {str(chain_error)}"
         pdf_files = [f for f in os.listdir(PDF_FOLDER_PATH) if f.endswith('.pdf')]
         return f"✅ Successfully processed {len(documents)} documents from {len(pdf_files)} PDF files into {len(chunks)} chunks. Ready for questions!"
@@ -302,19 +303,18 @@ def extract_zip_to_pdfs(zip_file):
             for pdf_file in pdf_files:
                 # Extract to PDFs folder
-                zip_ref.extract(pdf_file, PDF_FOLDER_PATH)
-                # If file is in a subfolder, move it to the root of PDFs folder
-                extracted_path = os.path.join(PDF_FOLDER_PATH, pdf_file)
-                if os.path.dirname(pdf_file):  # File is in a subfolder
-                    new_path = os.path.join(PDF_FOLDER_PATH, os.path.basename(pdf_file))
-                    shutil.move(extracted_path, new_path)
-                    # Clean up empty directories
-                    try:
-                        os.rmdir(os.path.dirname(extracted_path))
-                    except:
-                        pass
         global PRELOADED_PDFS
         PRELOADED_PDFS = True
@@ -401,11 +401,11 @@ Helpful Answer:
                 test_result = retrieval_qa({"query": "test"})
                 logger.info("QA chain test successful")
             except Exception as test_error:
-                logger.warning(f"QA chain test failed: {test_error}")
         except Exception as chain_error:
             logger.error(f"Chain creation error: {chain_error}")
-            return f"❌ Error creating QA chain: {str(chain_error)}"
         # Clean up temp directory
         shutil.rmtree(temp_dir)
@@ -427,7 +427,7 @@ def answer_question(question):
         return "❌ Please upload and process PDF files first.", ""
     try:
-        # Get answer from RAG system with timeout and error handling
         result = retrieval_qa({"query": question})
         answer = result.get("result", "No answer generated")
@@ -448,7 +448,7 @@ def answer_question(question):
     except Exception as e:
         logger.error(f"Question answering error: {e}")
-        # Provide a fallback response using just the retriever
         try:
             if vectorstore is not None:
                 # Get relevant documents directly from vectorstore
@@ -469,11 +469,11 @@ def answer_question(question):
                 return fallback_answer + "\n*Note: This is a direct search result due to a technical issue with the AI model.*", sources_text
             else:
-                return f"❌ Error answering question: {str(e)}", ""
         except Exception as fallback_error:
-            logger.error(f"Fallback error: {fallback_error}")
-            return f"❌ Error answering question: {str(e)}", ""
 def create_interface():
     """Create the fully responsive Gradio interface"""
@@ -530,6 +530,28 @@ def create_interface():
         min-width: 0 !important;
     }
     /* Mobile-first responsive breakpoints */
     /* Small devices (phones, 320px and up) */
@@ -593,6 +615,24 @@ def create_interface():
         .gr-accordion {
             border-radius: var(--radius-md) !important;
             border: 1px solid var(--border-color) !important;
         }
         /* Slider improvements */
@@ -634,11 +674,16 @@ def create_interface():
         /* Two-column layout for medium screens */
         .gr-column:first-child {
-            flex: 0 0 35% !important;
         }
         .gr-column:last-child {
-            flex: 0 0 60% !important;
         }
     }
@@ -667,7 +712,8 @@ def create_interface():
         /* Optimal desktop layout */
         .gr-column:first-child {
-            flex: 0 0 400px !important;
         }
         .gr-column:last-child {
@@ -785,7 +831,7 @@ def create_interface():
     .gr-textbox textarea:focus,
     .gr-textbox input:focus {
-       border-color: var(--primary-color) !important;
         outline: none !important;
         box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.1) !important;
     }
@@ -882,443 +928,107 @@ def create_interface():
         background: var(--text-secondary) !important;
     }
-    /* Animation classes */
-    .fade-in {
-        animation: fadeIn 0.3s ease-in-out !important;
-    }
-    @keyframes fadeIn {
-        from { opacity: 0; transform: translateY(10px); }
-        to { opacity: 1; transform: translateY(0); }
-    }
-    /* Accessibility improvements */
-    .gr-button:focus-visible,
-    .gr-textbox input:focus-visible,
-    .gr-textbox textarea:focus-visible {
-        outline: 2px solid var(--primary-color) !important;
-        outline-offset: 2px !important;
-    }
-    /* Print styles */
-    @media print {
-        .gr-button,
-        .gr-file,
-        .gr-slider {
-            display: none !important;
-        }
-        .gr-textbox textarea,
-        .gr-textbox input {
-            border: 1px solid #000 !important;
-            background: white !important;
-        }
-    }
-    /* High contrast mode support */
-    @media (prefers-contrast: high) {
-        :root {
-            --border-color: #000000;
-            --text-primary: #000000;
-            --text-secondary: #333333;
-            --bg-primary: #ffffff;
-            --bg-secondary: #f0f0f0;
-        }
-    }
-    /* Reduced motion support */
-    @media (prefers-reduced-motion: reduce) {
-        * {
-            animation-duration: 0.01ms !important;
-            animation-iteration-count: 1 !important;
-            transition-duration: 0.01ms !important;
-        }
-    }
-    /* Error and success states */
-    .gr-textbox.error textarea,
-    .gr-textbox.error input {
-        border-color: #ef4444 !important;
-        background: rgba(239, 68, 68, 0.05) !important;
-    }
-    .gr-textbox.success textarea,
-    .gr-textbox.success input {
-        border-color: var(--secondary-color) !important;
-        background: rgba(16, 185, 129, 0.05) !important;
-    }
-    /* Custom status messages */
-    .status-message {
-        padding: 0.75rem 1rem !important;
-        border-radius: var(--radius-md) !important;
-        margin: 0.5rem 0 !important;
-        font-size: 0.875rem !important;
-        font-weight: 500 !important;
     }
-    .status-success {
-        background: rgba(16, 185, 129, 0.1) !important;
-        color: #059669 !important;
-        border: 1px solid rgba(16, 185, 129, 0.2) !important;
-    }
-    .status-error {
-        background: rgba(239, 68, 68, 0.1) !important;
-        color: #dc2626 !important;
-        border: 1px solid rgba(239, 68, 68, 0.2) !important;
-    }
-    .status-warning {
-        background: rgba(245, 158, 11, 0.1) !important;
-        color: #d97706 !important;
-        border: 1px solid rgba(245, 158, 11, 0.2) !important;
-    }
-    /* Enhanced focus styles for accessibility */
-    .gr-button:focus,
-    .gr-textbox input:focus,
-    .gr-textbox textarea:focus,
-    .gr-file:focus {
-        outline: 2px solid var(--primary-color) !important;
-        outline-offset: 2px !important;
-    }
-    /* Custom scrollable areas */
-    .scrollable-content {
-        max-height: 400px !important;
-        overflow-y: auto !important;
-        padding: 1rem !important;
-        background: var(--bg-secondary) !important;
-        border-radius: var(--radius-md) !important;
-        border: 1px solid var(--border-color) !important;
-    }
-    """
-    # Create the interface
-    with gr.Blocks(css=custom_css, title="📚 RAG PDF Chat Interface", theme=gr.themes.Soft()) as interface:
-        # Header
-        gr.Markdown("""
-        # 📚 RAG PDF Chat Interface
-        **Upload PDF documents and ask questions about their content using advanced AI**
-        This interface allows you to:
-        - Upload PDF files or ZIP archives containing PDFs
-        - Process documents using state-of-the-art text chunking and embedding techniques
-        - Ask questions about your documents using natural language
-        - Get accurate answers with source citations
-        """)
-        # Main interface layout
-        with gr.Row():
-            # Left column - Controls
-            with gr.Column(scale=1):
-                # Pre-loaded PDFs section
-                with gr.Accordion("📁 Pre-loaded PDFs", open=PRELOADED_PDFS):
-                    gr.Markdown("""
-                    **Option 1: Use pre-existing PDFs**
-                    If you have PDFs in the `./pdfs` folder, click the button below to process them.
-                    """)
-                    preload_btn = gr.Button(
-                        "🔄 Load Pre-existing PDFs",
-                        variant="secondary",
-                        size="sm"
-                    )
-                    preload_status = gr.Textbox(
-                        label="Pre-load Status",
-                        interactive=False,
-                        lines=2
-                    )
-                # ZIP upload section
-                with gr.Accordion("📦 Upload ZIP Archive", open=False):
-                    gr.Markdown("""
-                    **Option 2: Upload ZIP containing PDFs**
-                    Upload a ZIP file containing PDF documents. They will be extracted to the PDFs folder.
-                    """)
-                    zip_file = gr.File(
-                        label="Upload ZIP Archive",
-                        file_types=[".zip"],
-                        file_count="single"
-                    )
-                    zip_btn = gr.Button(
-                        "📦 Extract ZIP to PDFs",
-                        variant="secondary",
-                        size="sm"
-                    )
-                    zip_status = gr.Textbox(
-                        label="ZIP Status",
-                        interactive=False,
-                        lines=2
-                    )
-                # Direct PDF upload section
-                with gr.Accordion("📄 Upload PDF Files", open=True):
-                    gr.Markdown("""
-                    **Option 3: Direct PDF upload**
-                    Upload PDF files directly for processing.
-                    """)
-                    pdf_files = gr.File(
-                        label="Upload PDF Files",
-                        file_types=[".pdf"],
-                        file_count="multiple"
-                    )
-                # Processing parameters
                 with gr.Accordion("⚙️ Processing Parameters", open=False):
-                    gr.Markdown("""
-                    **Advanced Settings**
-                    Adjust these parameters to optimize document processing for your specific needs.
-                    """)
-                    chunk_size = gr.Slider(
-                        minimum=500,
                         maximum=2000,
                         value=1000,
-                        step=100,
                         label="Chunk Size",
-                        info="Size of text chunks for processing (larger = more context, smaller = more precise)"
                     )
-                    chunk_overlap = gr.Slider(
                         minimum=0,
                         maximum=500,
                         value=200,
-                        step=50,
                         label="Chunk Overlap",
-                        info="Overlap between chunks (helps maintain context across boundaries)"
-                    )
-                # Process button
-                process_btn = gr.Button(
-                    "🚀 Process Documents",
-                    variant="primary",
-                    size="lg"
-                )
-                # Status display
-                status_output = gr.Textbox(
-                    label="Processing Status",
-                    interactive=False,
-                    lines=4
-                )
-            # Right column - Chat interface
-            with gr.Column(scale=2):
-                # Chat interface
-                with gr.Tab("💬 Chat with Documents"):
-                    gr.Markdown("""
-                    **Ask questions about your documents**
-                    Once you've processed your PDFs, you can ask questions about their content.
-                    The AI will provide answers based on the information in your documents.
-                    """)
-                    # Question input
-                    question_input = gr.Textbox(
-                        label="Ask a question about your documents",
-                        placeholder="e.g., What is the main topic discussed in the document?",
-                        lines=2
-                    )
-                    # Ask button
-                    ask_btn = gr.Button(
-                        "🔍 Ask Question",
-                        variant="primary",
-                        size="lg"
                     )
-                    # Answer display
-                    with gr.Row():
-                        with gr.Column():
-                            answer_output = gr.Textbox(
-                                label="Answer",
-                                interactive=False,
-                                lines=8
-                            )
-                        with gr.Column():
-                            sources_output = gr.Textbox(
-                                label="Sources & References",
-                                interactive=False,
-                                lines=8
-                            )
-                # Help tab
-                with gr.Tab("❓ Help & Tips"):
-                    gr.Markdown("""
-                    ## 🔧 How to Use This Interface
-                    ### Step 1: Upload Documents
-                    Choose one of three options:
-                    - **Pre-loaded PDFs**: Use documents already in the `./pdfs` folder
-                    - **ZIP Archive**: Upload a ZIP file containing multiple PDFs
-                    - **Direct Upload**: Upload PDF files directly
-                    ### Step 2: Process Documents
-                    Click "Process Documents" to:
-                    - Extract text from PDFs
-                    - Split text into manageable chunks
-                    - Create embeddings for semantic search
-                    - Set up the question-answering system
-                    ### Step 3: Ask Questions
-                    Once processing is complete, you can:
-                    - Ask specific questions about document content
-                    - Get answers with source citations
-                    - Explore different aspects of your documents
-                    ## 💡 Tips for Better Results
-                    ### Question Formatting
-                    - **Good**: "What are the main findings about climate change?"
-                    - **Better**: "What specific evidence does the document provide about climate change impacts?"
-                    - **Best**: "According to the research, what are the three most significant climate change impacts on agriculture?"
-                    ### Document Preparation
-                    - Use high-quality, text-based PDFs (not scanned images)
-                    - Ensure documents are well-structured with clear headings
-                    - Remove unnecessary pages to improve processing speed
-                    ### Processing Parameters
-                    - **Chunk Size**:
-                      - Larger (1500-2000): Better for broad context questions
-                      - Smaller (500-1000): Better for specific detail questions
-                    - **Chunk Overlap**:
-                      - More overlap (200-300): Better context continuity
-                      - Less overlap (0-100): Faster processing
-                    ## 🚨 Troubleshooting
-                    ### Common Issues
-                    - **"No documents loaded"**: Check PDF file format and quality
-                    - **"Model initialization failed"**: Verify HuggingFace token is set
-                    - **"Processing timeout"**: Try smaller chunk sizes or fewer documents
-                    - **"Empty answers"**: Rephrase questions or check document content
-                    ### System Requirements
-                    - **HuggingFace Token**: Required for AI model access
-                    - **Memory**: At least 4GB RAM recommended for large documents
-                    - **Storage**: Sufficient space for temporary file processing
-                    ## 🔒 Privacy & Security
-                    - Documents are processed locally when possible
-                    - No document content is permanently stored
-                    - AI model queries may be sent to HuggingFace servers
-                    - Remove sensitive information before processing
-                    ## 📚 Supported Features
-                    - **File Types**: PDF documents only
-                    - **Languages**: Primarily English, limited support for other languages
-                    - **Document Size**: Up to 50MB per PDF recommended
-                    - **Concurrent Processing**: Multiple documents simultaneously
-                    ---
-                    *Need more help? Check the console output for detailed error messages and logs.*
-                    """)
-        # Event handlers
-        def handle_preload():
-            return load_preloaded_pdfs()
-        def handle_zip_extract(zip_file):
-            return extract_zip_to_pdfs(zip_file)
-        def handle_process(pdf_files, chunk_size, chunk_overlap):
-            return process_pdfs(pdf_files, chunk_size, chunk_overlap)
-        def handle_question(question):
-            return answer_question(question)
-        # Connect event handlers
-        preload_btn.click(
-            fn=handle_preload,
-            outputs=preload_status
         )
-        zip_btn.click(
-            fn=handle_zip_extract,
-            inputs=zip_file,
-            outputs=zip_status
         )
         process_btn.click(
-            fn=handle_process,
-            inputs=[pdf_files, chunk_size, chunk_overlap],
-            outputs=status_output
         )
         ask_btn.click(
-            fn=handle_question,
             inputs=question_input,
             outputs=[answer_output, sources_output]
         )
-        # Enable Enter key for question input
-        question_input.submit(
-            fn=handle_question,
-            inputs=question_input,
-            outputs=[answer_output, sources_output]
-        )
-        # Add keyboard shortcuts info
-        gr.Markdown("""
-        ---
-        **💡 Keyboard Shortcuts**: Press Enter in the question box to ask your question quickly!
-        """)
-    return interface
-# Main execution
 if __name__ == "__main__":
-    # Initialize the interface
-    interface = create_interface()
-    # Check system status
-    print("🔍 System Status Check:")
-    print(f"✅ LangChain Available: {LANGCHAIN_AVAILABLE}")
-    print(f"✅ HuggingFace Hub Available: {HUGGINGFACE_HUB_AVAILABLE}")
-    print(f"✅ Pre-loaded PDFs: {PRELOADED_PDFS}")
-    print(f"✅ PDF Folder: {PDF_FOLDER_PATH}")
-    # Check for HuggingFace token
-    hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-    if hf_token:
-        print("✅ HuggingFace API Token: Found")
-    else:
-        print("❌ HuggingFace API Token: Not found - Please set HUGGINGFACEHUB_API_TOKEN environment variable")
-    # Launch the interface
-    try:
-        interface.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=False,
-            debug=False,
-            show_error=True,
-            auth=None,
-            favicon_path=None,
-            ssl_keyfile=None,
-            ssl_certfile=None,
-            ssl_keyfile_password=None,
-            height=800,
-            prevent_thread_lock=False
-        )
-    except Exception as e:
-        logger.error(f"Failed to launch interface: {e}")
-        print(f"❌ Failed to launch interface: {e}")
-        print("🔧 Try running with: python your_script.py")

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Try importing LangChain components
 try:
     from langchain_community.document_loaders import PyPDFDirectoryLoader, PyPDFLoader
     from langchain.text_splitter import RecursiveCharacterTextSplitter
         except ImportError:
             try:
                 from langchain_huggingface import HuggingFaceEndpoint
+                HUGGINGFACE_HUB_AVAILABLE = False # HuggingFaceEndpoint doesn't have the same interface as HuggingFaceHub
                 logger.info("Using HuggingFaceEndpoint as fallback")
             except ImportError:
                 logger.error("No suitable HuggingFace LLM implementation found")
         # Get HuggingFace token from environment
         hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not hf_token:
+            return False, "❌ HuggingFace API token not found in environment variables. Please set HUGGINGFACEHUB_API_TOKEN."
         return True, "✅ Models initialized successfully"
     hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
     if not hf_token:
+        logger.error("HuggingFace API token not found for LLM creation.")
         return create_fallback_llm()
     try:
                     logger.warning(f"Failed to initialize {model_id} with HuggingFaceHub: {model_error}")
                     continue
+        # Fallback to HuggingFaceEndpoint if HuggingFaceHub is not available or failed
         try:
             from langchain_huggingface import HuggingFaceEndpoint
                     logger.warning(f"Failed to initialize {model_id} with HuggingFaceEndpoint: {model_error}")
                     continue
         except ImportError:
+            pass # HuggingFaceEndpoint not available
         # If all else fails, return fallback
+        raise Exception("All HuggingFace model initialization attempts failed")
     except Exception as e:
         logger.error(f"LLM creation error: {e}")
             def invoke(self, prompt):
                 return "System temporarily unavailable. Please try again later."
+            def __call__(self, prompt): # For compatibility with older LangChain chains
                 return self.invoke(prompt)
         return SimpleFallback()
         documents = loader.load()
         if not documents:
+            return "❌ No documents were loaded from the PDFs folder. Ensure the folder contains valid PDFs."
         # Split documents into chunks
         text_splitter = RecursiveCharacterTextSplitter(
                 test_result = retrieval_qa({"query": "test"})
                 logger.info("QA chain test successful")
             except Exception as test_error:
+                logger.warning(f"QA chain test failed during initial run: {test_error}")
                 # Chain created but might have issues - continue anyway
         except Exception as chain_error:
             logger.error(f"Chain creation error: {chain_error}")
+            return f"❌ Error creating QA chain: {str(chain_error)}. Check LLM availability."
         pdf_files = [f for f in os.listdir(PDF_FOLDER_PATH) if f.endswith('.pdf')]
         return f"✅ Successfully processed {len(documents)} documents from {len(pdf_files)} PDF files into {len(chunks)} chunks. Ready for questions!"
             for pdf_file in pdf_files:
                 # Extract to PDFs folder
+                # Ensure the path is safe and doesn't lead to directory traversal
+                extracted_path = os.path.join(PDF_FOLDER_PATH, os.path.basename(pdf_file))
+                # Check if the extracted path is within the intended PDF_FOLDER_PATH
+                if not os.path.abspath(extracted_path).startswith(os.path.abspath(PDF_FOLDER_PATH)):
+                    logger.warning(f"Attempted path traversal detected: {pdf_file}")
+                    continue # Skip this file
+                # Extract the file
+                with open(extracted_path, "wb") as f:
+                    f.write(zip_ref.read(pdf_file))
         global PRELOADED_PDFS
         PRELOADED_PDFS = True
                 test_result = retrieval_qa({"query": "test"})
                 logger.info("QA chain test successful")
             except Exception as test_error:
+                logger.warning(f"QA chain test failed during initial run: {test_error}")
         except Exception as chain_error:
             logger.error(f"Chain creation error: {chain_error}")
+            return f"❌ Error creating QA chain: {str(chain_error)}. Check LLM availability."
         # Clean up temp directory
         shutil.rmtree(temp_dir)
         return "❌ Please upload and process PDF files first.", ""
     try:
+        # Get answer from RAG system
         result = retrieval_qa({"query": question})
         answer = result.get("result", "No answer generated")
     except Exception as e:
         logger.error(f"Question answering error: {e}")
+        # Provide a fallback response using just the retriever if LLM fails
         try:
             if vectorstore is not None:
                 # Get relevant documents directly from vectorstore
                 return fallback_answer + "\n*Note: This is a direct search result due to a technical issue with the AI model.*", sources_text
             else:
+                return f"❌ Error answering question: {str(e)}. Vector store not initialized.", ""
         except Exception as fallback_error:
+            logger.error(f"Fallback error during question answering: {fallback_error}")
+            return f"❌ Critical error answering question: {str(e)}", ""
 def create_interface():
     """Create the fully responsive Gradio interface"""
         min-width: 0 !important;
     }
+    /* Remove any pre-existing or default Gradio styling that might conflict */
+    .gradio-container,
+    .gr-panel,
+    .gr-block,
+    .gr-group {
+        box-sizing: border-box !important;
+        min-width: 0 !important; /* Ensure elements can shrink */
+    }
+    /* Ensure images and media scale within their containers */
+    img, video {
+        max-width: 100% !important;
+        height: auto !important;
+        display: block !important;
+    }
+    /* Specific adjustments for file upload area text */
+    .gr-file .file-upload-text {
+        font-size: clamp(0.75rem, 3vw, 1rem) !important; /* Make text smaller on mobile */
+        line-height: 1.4 !important;
+    }
     /* Mobile-first responsive breakpoints */
     /* Small devices (phones, 320px and up) */
         .gr-accordion {
             border-radius: var(--radius-md) !important;
             border: 1px solid var(--border-color) !important;
+            width: 100% !important; /* Force full width */
+            flex: none !important; /* Prevent flex issues */
+        }
+        /* Adjust spacing for accordions within columns */
+        .gr-column .gr-accordion {
+            margin-bottom: 1rem !important;
+        }
+        /* Ensure direct children of gradio-container also respond well */
+        .gradio-container > *:not(.gr-footer) { /* Exclude footer if it exists */
+            width: 100% !important;
+            margin-left: auto !important;
+            margin-right: auto !important;
+        }
+        /* Make sure all gradio components inside rows take full width */
+        .gr-row > .gr-block {
+            width: 100% !important;
         }
         /* Slider improvements */
         /* Two-column layout for medium screens */
         .gr-column:first-child {
+            flex: 0 0 40% !important;
+            max-width: 40% !important;
         }
         .gr-column:last-child {
+            flex: 1 1 55% !important;
+            max-width: 55% !important;
+        }
+        .gr-row {
+            justify-content: space-between !important; /* Distribute space */
         }
     }
         /* Optimal desktop layout */
         .gr-column:first-child {
+            flex: 0 0 350px !important;
+            max-width: 350px !important;
         }
         .gr-column:last-child {
     .gr-textbox textarea:focus,
     .gr-textbox input:focus {
+        border-color: var(--primary-color) !important;
         outline: none !important;
         box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.1) !important;
     }
         background: var(--text-secondary) !important;
     }
+    /* Ensure good spacing for text outputs */
+    .gr-markdown {
+        padding: 1rem 0 !important;
     }
+    """
+    with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+        gr.Markdown(
+            """
+            # RAG PDF Chat Interface
+            Upload PDF documents and ask questions about their content using advanced AI.
+            This interface allows you to:
+            - Upload PDF files or ZIP archives containing PDFs
+            - Process documents using state-of-the-art text chunking and embedding techniques
+            - Ask questions about your documents using natural language
+            - Get accurate answers with source citations
+            """
+        )
+        # Main content area
+        with gr.Row():
+            with gr.Column(scale=1):  # This column will contain processing options
+                with gr.Accordion("📁 Pre-loaded PDFs", open=True):
+                    gr.Markdown("### Option 1: Use pre-existing PDFs")
+                    gr.Markdown("If you have PDFs in the `./pdfs` folder, click the button below to process them.")
+                    load_preloaded_btn = gr.Button("🔄 Load Pre-existing PDFs", variant="secondary")
+                    pre_load_status = gr.Textbox(label="Pre-load Status", interactive=False, value="No pre-loaded PDFs processed yet.")
+                with gr.Accordion("📦 Upload ZIP Archive", open=False):
+                    gr.Markdown("### Option 2: Upload ZIP Archive")
+                    zip_file_input = gr.File(label="Upload ZIP File", type="file", file_count="single", file_types=[".zip"])
+                    extract_zip_btn = gr.Button("📤 Extract ZIP Archive", variant="primary")
+                    zip_status_output = gr.Textbox(label="ZIP Extraction Status", interactive=False)
+                with gr.Accordion("📄 Upload PDF Files", open=False):
+                    gr.Markdown("### Option 3: Direct PDF upload")
+                    gr.Markdown("Upload PDF files directly for processing.")
+                    pdf_file_input = gr.File(label="Upload PDF Files", type="file", file_count="multiple", file_types=[".pdf"])
                 with gr.Accordion("⚙️ Processing Parameters", open=False):
+                    chunk_size_slider = gr.Slider(
+                        minimum=100,
                         maximum=2000,
                         value=1000,
+                        step=50,
                         label="Chunk Size",
+                        info="Size of text chunks for processing."
                     )
+                    chunk_overlap_slider = gr.Slider(
                         minimum=0,
                         maximum=500,
                         value=200,
+                        step=10,
                         label="Chunk Overlap",
+                        info="Overlap between text chunks to maintain context."
                     )
+                    process_btn = gr.Button("🚀 Process Documents", variant="primary")
+                    processing_status = gr.Textbox(label="Processing Status", interactive=False)
+            with gr.Column(scale=2):  # This column will contain the chat interface
+                with gr.Accordion("💬 Chat with Documents", open=True):
+                    gr.Markdown("### Ask questions about your documents")
+                    gr.Markdown("Once you've processed your PDFs, you can ask questions about their content. The AI will provide answers based on the information in your documents.")
+                    question_input = gr.Textbox(label="Ask a question about your documents", placeholder="e.g., What is the main topic of the documents?")
+                    answer_output = gr.Textbox(label="Answer", interactive=False)
+                    sources_output = gr.Textbox(label="Sources & References", interactive=False)
+                    ask_btn = gr.Button("🔍 Ask Question", variant="primary")
+                gr.Markdown("❓ Help & Tips: Ensure you have your HuggingFace API token set as an environment variable (HUGGINGFACEHUB_API_TOKEN) for the LLM to function properly.")
+        # Event listeners
+        load_preloaded_btn.click(
+            load_preloaded_pdfs,
+            inputs=[chunk_size_slider, chunk_overlap_slider], # Pass sliders to function
+            outputs=pre_load_status
         )
+        extract_zip_btn.click(
+            extract_zip_to_pdfs,
+            inputs=zip_file_input,
+            outputs=zip_status_output
         )
         process_btn.click(
+            process_pdfs,
+            inputs=[pdf_file_input, chunk_size_slider, chunk_overlap_slider],
+            outputs=processing_status
         )
         ask_btn.click(
+            answer_question,
             inputs=question_input,
             outputs=[answer_output, sources_output]
         )
+        # Initial model check
+        demo.load(initialize_models, outputs=pre_load_status) # Use pre_load_status to show init message
+    return demo
 if __name__ == "__main__":
+    demo = create_interface()
+    # It's better to explicitly set share=False for local development
+    # and only set it to True if you intend to share publicly (which creates a public link)
+    demo.launch(show_api=False, inline=False)