Spaces:

msmaje
/

ragmodel

Sleeping

App Files Files Community

msmaje commited on Jul 2, 2025

Commit

7cd9b93

verified ·

1 Parent(s): 6a0c640

Update app.py

Browse files

Files changed (1) hide show

app.py +614 -90

app.py CHANGED Viewed

@@ -11,187 +11,711 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 try:
-    from langchain_community.document_loaders import PyPDFDirectoryLoader
     from langchain.text_splitter import RecursiveCharacterTextSplitter
     from langchain_community.embeddings import HuggingFaceEmbeddings
     from langchain_community.vectorstores import FAISS
     from langchain.prompts import PromptTemplate
     from langchain.chains import RetrievalQA
-    from langchain.llms import HuggingFaceHub
     LANGCHAIN_AVAILABLE = True
 except ImportError as e:
     logger.error(f"LangChain import error: {e}")
     LANGCHAIN_AVAILABLE = False
 PDF_FOLDER_PATH = "./pdfs"
 os.makedirs(PDF_FOLDER_PATH, exist_ok=True)
 vectorstore = None
 retrieval_qa = None
 embedding_model = None
 PRELOADED_PDFS = os.path.exists(PDF_FOLDER_PATH) and len(os.listdir(PDF_FOLDER_PATH)) > 0
 def initialize_models():
     global embedding_model
     try:
         embedding_model = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
             model_kwargs={'device': 'cpu'}
         )
         hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not hf_token:
-            return False, "❌ HuggingFace API token not found"
-        return True, "✅ Models initialized"
     except Exception as e:
-        logger.error(f"Init error: {e}")
-        return False, str(e)
 def create_llm():
     hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-    if not hf_token:
         return create_fallback_llm()
-    models_to_try = [
-        "mistralai/Mistral-7B-Instruct-v0.2",
-        "google/flan-t5-base"
-    ]
-    for model_id in models_to_try:
-        try:
-            llm = HuggingFaceHub(
-                repo_id=model_id,
-                huggingfacehub_api_token=hf_token,
-                model_kwargs={
-                    "temperature": 0.7,
-                    "max_length": 512,
-                    "top_p": 0.9,
-                    "top_k": 50
-                }
-            )
-            return llm
-        except Exception as e:
-            logger.warning(f"Model {model_id} failed: {e}")
-    return create_fallback_llm()
 def create_fallback_llm():
     class FallbackLLM:
         def __call__(self, prompt):
-            return "Model is unavailable. Try again later."
         def invoke(self, prompt):
             return self.__call__(prompt)
     return FallbackLLM()
 def load_preloaded_pdfs(chunk_size=1000, chunk_overlap=200):
     global vectorstore, retrieval_qa, embedding_model
     if not LANGCHAIN_AVAILABLE:
-        return "❌ LangChain not available"
     if not PRELOADED_PDFS:
-        return "❌ No PDFs found"
     try:
         if embedding_model is None:
-            success, msg = initialize_models()
             if not success:
-                return msg
         loader = PyPDFDirectoryLoader(PDF_FOLDER_PATH)
         documents = loader.load()
         if not documents:
-            return "❌ No documents loaded"
-        splitter = RecursiveCharacterTextSplitter(
-            chunk_size=chunk_size, chunk_overlap=chunk_overlap
         )
-        chunks = splitter.split_documents(documents)
         vectorstore = FAISS.from_documents(chunks, embedding_model)
         retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
         prompt_template = """
-Use the following context to answer the question. If you cannot find the answer, say so.
 Context:
 {context}
 Question: {question}
-Answer:
 """
         prompt = PromptTemplate(
-            input_variables=["context", "question"],
             template=prompt_template
         )
         llm = create_llm()
-        retrieval_qa = RetrievalQA.from_chain_type(
-            llm=llm,
-            chain_type="stuff",
-            retriever=retriever,
-            return_source_documents=True,
-            chain_type_kwargs={"prompt": prompt}
         )
-        return f"✅ {len(documents)} docs loaded, {len(chunks)} chunks"
     except Exception as e:
-        return f"❌ Error: {str(e)}"
 def answer_question(question):
     global retrieval_qa
     if not question.strip():
-        return "❌ Enter a question", ""
     if retrieval_qa is None:
-        return "❌ Process documents first", ""
     try:
         result = retrieval_qa({"query": question})
-        answer = result.get("result", "No answer")
         sources = []
         for i, doc in enumerate(result.get("source_documents", []), 1):
             source = doc.metadata.get("source", "Unknown")
             page = doc.metadata.get("page", "Unknown")
-            preview = doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content
-            sources.append(f"**Source {i}:** {Path(source).name} (Page {page})\n{preview}")
-        return answer, "\n\n".join(sources)
     except Exception as e:
-        return f"❌ Error: {str(e)}", ""
 def create_interface():
-    with gr.Blocks(title="RAG PDF QA") as demo:
-        gr.Markdown("## PDF QA with LangChain + HuggingFaceHub")
         with gr.Row():
-            with gr.Column():
-                pdf_files = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload PDFs")
-                chunk_size = gr.Slider(200, 2000, value=1000, label="Chunk Size")
-                chunk_overlap = gr.Slider(0, 500, value=200, label="Chunk Overlap")
-                process_btn = gr.Button("🔄 Process PDFs")
-                process_output = gr.Textbox(label="Processing Result")
-            with gr.Column():
-                question = gr.Textbox(label="Ask a Question")
-                ask_btn = gr.Button("🤔 Ask")
-                answer = gr.Textbox(label="Answer")
-                sources = gr.Textbox(label="Sources")
         process_btn.click(
             fn=load_preloaded_pdfs,
             inputs=[chunk_size, chunk_overlap],
             outputs=[process_output]
         )
         ask_btn.click(
             fn=answer_question,
-            inputs=[question],
-            outputs=[answer, sources]
         )
     return demo
 if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch(share=True)

 logger = logging.getLogger(__name__)
 try:
+    from langchain_community.document_loaders import PyPDFDirectoryLoader, PyPDFLoader
     from langchain.text_splitter import RecursiveCharacterTextSplitter
     from langchain_community.embeddings import HuggingFaceEmbeddings
     from langchain_community.vectorstores import FAISS
     from langchain.prompts import PromptTemplate
     from langchain.chains import RetrievalQA
+    # This is the key change: Import HuggingFaceHub instead of HuggingFaceEndpoint
+    from langchain_community.llms import HuggingFaceHub
     LANGCHAIN_AVAILABLE = True
 except ImportError as e:
     logger.error(f"LangChain import error: {e}")
     LANGCHAIN_AVAILABLE = False
+# Create PDFs folder if it doesn't exist
 PDF_FOLDER_PATH = "./pdfs"
 os.makedirs(PDF_FOLDER_PATH, exist_ok=True)
+# Global variables for the RAG system
 vectorstore = None
 retrieval_qa = None
 embedding_model = None
+# Check for pre-existing PDF folder
 PRELOADED_PDFS = os.path.exists(PDF_FOLDER_PATH) and len(os.listdir(PDF_FOLDER_PATH)) > 0
 def initialize_models():
+    """Initialize the embedding model and LLM"""
     global embedding_model
     try:
+        # Initialize embedding model
         embedding_model = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
             model_kwargs={'device': 'cpu'}
         )
+        # Get HuggingFace token from environment
         hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not hf_token:
+            return False, "❌ HuggingFace API token not found in environment variables"
+        return True, "✅ Models initialized successfully"
     except Exception as e:
+        logger.error(f"Model initialization error: {e}")
+        return False, f"❌ Error initializing models: {str(e)}"
 def create_llm():
+    """Create and return the LLM instance with improved error handling"""
     hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    try:
+        # The crucial change: Use HuggingFaceHub directly as suggested
+        # Note: You need to specify a repo_id that is a text generation model.
+        # "mistralai/Mistral-7B-Instruct-v0.2" is a good choice for instruction following.
+        llm = HuggingFaceHub(
+            repo_id="mistralai/Mistral-7B-Instruct-v0.2", # Using the suggested model
+            huggingfacehub_api_token=hf_token,
+            model_kwargs={
+                "temperature": 0.7,
+                "max_length": 512, # Note: max_new_tokens is typically preferred for generation length
+                "do_sample": True,
+                "top_p": 0.9,
+                "top_k": 50
+            }
+        )
+        logger.info(f"Successfully initialized LLM with model: mistralai/Mistral-7B-Instruct-v0.2")
+        return llm
+    except Exception as e:
+        logger.error(f"LLM creation error: {e}")
+        # Return a simple fallback that doesn't use HuggingFace API
         return create_fallback_llm()
 def create_fallback_llm():
+    """Create a simple fallback LLM for basic responses"""
     class FallbackLLM:
         def __call__(self, prompt):
+            return "I apologize, but I'm experiencing technical difficulties with the language model. Please try again later or contact support."
         def invoke(self, prompt):
             return self.__call__(prompt)
     return FallbackLLM()
 def load_preloaded_pdfs(chunk_size=1000, chunk_overlap=200):
+    """Load PDFs from the pre-existing folder"""
     global vectorstore, retrieval_qa, embedding_model
     if not LANGCHAIN_AVAILABLE:
+        return "❌ LangChain is not available. Please check the installation."
     if not PRELOADED_PDFS:
+        return "❌ No pre-loaded PDFs found in ./pdfs folder."
     try:
+        # Initialize models if not already done
         if embedding_model is None:
+            success, message = initialize_models()
             if not success:
+                return message
+        # Load documents from pre-existing folder
         loader = PyPDFDirectoryLoader(PDF_FOLDER_PATH)
         documents = loader.load()
         if not documents:
+            return "❌ No documents were loaded from the PDFs folder."
+        # Split documents into chunks
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=int(chunk_size),
+            chunk_overlap=int(chunk_overlap)
         )
+        chunks = text_splitter.split_documents(documents)
+        # Create vector store
         vectorstore = FAISS.from_documents(chunks, embedding_model)
         retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
+        # Setup prompt template
         prompt_template = """
+Use the following context to answer the question. If you cannot find the answer in the context, say "I don't have enough information to answer this question."
 Context:
 {context}
 Question: {question}
+Helpful Answer:
 """
         prompt = PromptTemplate(
+            input_variables=["context", "question"],
             template=prompt_template
         )
+        # Initialize LLM using the updated function
         llm = create_llm()
+        # Create RetrievalQA chain with error handling
+        try:
+            retrieval_qa = RetrievalQA.from_chain_type(
+                llm=llm,
+                chain_type="stuff",
+                retriever=retriever,
+                return_source_documents=True,
+                chain_type_kwargs={"prompt": prompt}
+            )
+        except Exception as chain_error:
+            logger.error(f"Chain creation error: {chain_error}")
+            return f"❌ Error creating QA chain: {str(chain_error)}"
+        pdf_files = [f for f in os.listdir(PDF_FOLDER_PATH) if f.endswith('.pdf')]
+        return f"✅ Successfully processed {len(documents)} documents from {len(pdf_files)} PDF files into {len(chunks)} chunks. Ready for questions!"
+    except Exception as e:
+        logger.error(f"Pre-loaded PDF processing error: {e}")
+        return f"❌ Error processing pre-loaded PDFs: {str(e)}"
+def extract_zip_to_pdfs(zip_file):
+    """Extract uploaded ZIP file to PDFs folder"""
+    if not zip_file:
+        return "❌ Please upload a ZIP file."
+    try:
+        # Create PDFs directory if it doesn't exist
+        os.makedirs(PDF_FOLDER_PATH, exist_ok=True)
+        # Extract ZIP file
+        with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
+            # Extract only PDF files
+            pdf_files = [f for f in zip_ref.namelist() if f.lower().endswith('.pdf')]
+            if not pdf_files:
+                return "❌ No PDF files found in the ZIP archive."
+            for pdf_file in pdf_files:
+                # Extract to PDFs folder
+                zip_ref.extract(pdf_file, PDF_FOLDER_PATH)
+                # If file is in a subfolder, move it to the root of PDFs folder
+                extracted_path = os.path.join(PDF_FOLDER_PATH, pdf_file)
+                if os.path.dirname(pdf_file):  # File is in a subfolder
+                    new_path = os.path.join(PDF_FOLDER_PATH, os.path.basename(pdf_file))
+                    shutil.move(extracted_path, new_path)
+                    # Clean up empty directories
+                    try:
+                        os.rmdir(os.path.dirname(extracted_path))
+                    except:
+                        pass
+        global PRELOADED_PDFS
+        PRELOADED_PDFS = True
+        return f"✅ Successfully extracted {len(pdf_files)} PDF files. Now click 'Load Pre-existing PDFs' to process them."
+    except Exception as e:
+        return f"❌ Error extracting ZIP file: {str(e)}"
+def process_pdfs(pdf_files, chunk_size, chunk_overlap):
+    """Process uploaded PDF files and create vector store"""
+    global vectorstore, retrieval_qa, embedding_model
+    if not LANGCHAIN_AVAILABLE:
+        return "❌ LangChain is not available. Please check the installation."
+    if not pdf_files:
+        return "❌ Please upload at least one PDF file or use pre-loaded PDFs."
+    try:
+        # Initialize models if not already done
+        if embedding_model is None:
+            success, message = initialize_models()
+            if not success:
+                return message
+        # Create temporary directory for PDFs
+        temp_dir = tempfile.mkdtemp()
+        # Save uploaded files to temp directory
+        for pdf_file in pdf_files:
+            if pdf_file is not None:
+                temp_path = os.path.join(temp_dir, os.path.basename(pdf_file.name))
+                shutil.copy2(pdf_file.name, temp_path)
+        # Load documents
+        loader = PyPDFDirectoryLoader(temp_dir)
+        documents = loader.load()
+        if not documents:
+            return "❌ No documents were loaded. Please check your PDF files."
+        # Split documents into chunks
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=int(chunk_size),
+            chunk_overlap=int(chunk_overlap)
         )
+        chunks = text_splitter.split_documents(documents)
+        # Create vector store
+        vectorstore = FAISS.from_documents(chunks, embedding_model)
+        retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
+        # Setup prompt template
+        prompt_template = """
+Use the following context to answer the question. If you cannot find the answer in the context, say "I don't have enough information to answer this question."
+Context:
+{context}
+Question: {question}
+Helpful Answer:
+"""
+        prompt = PromptTemplate(
+            input_variables=["context", "question"],
+            template=prompt_template
+        )
+        # Initialize LLM using the updated function
+        llm = create_llm()
+        # Create RetrievalQA chain with error handling
+        try:
+            retrieval_qa = RetrievalQA.from_chain_type(
+                llm=llm,
+                chain_type="stuff",
+                retriever=retriever,
+                return_source_documents=True,
+                chain_type_kwargs={"prompt": prompt}
+            )
+        except Exception as chain_error:
+            logger.error(f"Chain creation error: {chain_error}")
+            return f"❌ Error creating QA chain: {str(chain_error)}"
+        # Clean up temp directory
+        shutil.rmtree(temp_dir)
+        return f"✅ Successfully processed {len(documents)} documents into {len(chunks)} chunks. Ready for questions!"
     except Exception as e:
+        logger.error(f"PDF processing error: {e}")
+        return f"❌ Error processing PDFs: {str(e)}"
 def answer_question(question):
+    """Answer a question using the RAG system with improved error handling"""
     global retrieval_qa
     if not question.strip():
+        return "❌ Please enter a question.", ""
     if retrieval_qa is None:
+        return "❌ Please upload and process PDF files first.", ""
     try:
+        # Get answer from RAG system with timeout and error handling
         result = retrieval_qa({"query": question})
+        answer = result.get("result", "No answer generated")
+        # Format source documents
         sources = []
         for i, doc in enumerate(result.get("source_documents", []), 1):
             source = doc.metadata.get("source", "Unknown")
             page = doc.metadata.get("page", "Unknown")
+            content_preview = doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content
+            sources.append(f"**Source {i}:**\n- File: {Path(source).name}\n- Page: {page}\n- Preview: {content_preview}\n")
+        sources_text = "\n".join(sources) if sources else "No sources found."
+        return answer, sources_text
     except Exception as e:
+        logger.error(f"Question answering error: {e}")
+        # Provide a fallback response using just the retriever
+        try:
+            if vectorstore is not None:
+                # Get relevant documents directly from vectorstore
+                docs = vectorstore.similarity_search(question, k=3)
+                fallback_answer = "I found some relevant content in your documents:\n\n"
+                sources = []
+                for i, doc in enumerate(docs, 1):
+                    source = doc.metadata.get("source", "Unknown")
+                    page = doc.metadata.get("page", "Unknown")
+                    content_preview = doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content
+                    fallback_answer += f"**Excerpt {i}:** {content_preview}\n\n"
+                    sources.append(f"**Source {i}:**\n- File: {Path(source).name}\n- Page: {page}\n")
+                sources_text = "\n".join(sources)
+                return fallback_answer + "\n*Note: This is a direct search result due to a technical issue with the AI model.*", sources_text
+            else:
+                return f"❌ Error answering question: {str(e)}", ""
+        except Exception as fallback_error:
+            logger.error(f"Fallback error: {fallback_error}")
+            return f"❌ Error answering question: {str(e)}", ""
+def get_device_info():
+    """Simple function to detect if mobile (basic detection)"""
+    return
+    <script>
+    function isMobile() {
+        return window.innerWidth <= 768;
+    }
+    function adjustLayout() {
+        const isMob = isMobile();
+        const root = document.documentElement;
+        if (isMob) {
+            root.style.setProperty('--mobile-mode', '1');
+        } else {
+            root.style.setProperty('--mobile-mode', '0');
+        }
+    }
+    window.addEventListener('resize', adjustLayout);
+    adjustLayout();
+    </script>
 def create_interface():
+    """Create the fully responsive Gradio interface"""
+    # Custom CSS for better responsiveness
+    custom_css =
+    /* Base responsive styles */
+    .gradio-container {
+        max-width: 100% !important;
+        margin: 0 auto;
+        padding: 10px;
+    }
+    /* Mobile-first responsive design */
+    @media (max-width: 768px) {
+        .gradio-container {
+            padding: 5px;
+        }
+        /* Stack elements vertically on mobile */
+        .gr-row {
+            flex-direction: column !important;
+            gap: 10px !important;
+        }
+        /* Full width on mobile */
+        .gr-column {
+            width: 100% !important;
+            min-width: 100% !important;
+        }
+        /* Adjust component spacing */
+        .gr-form > * {
+            margin-bottom: 8px !important;
+        }
+        /* Better button sizing */
+        .gr-button {
+            width: 100% !important;
+            min-height: 44px !important;
+            font-size: 14px !important;
+        }
+        /* Text input improvements */
+        .gr-textbox textarea {
+            min-height: 60px !important;
+            font-size: 16px !important; /* Prevents zoom on iOS */
+        }
+        /* File upload improvements */
+        .gr-file {
+            min-height: 100px !important;
+        }
+        /* Slider improvements */
+        .gr-slider {
+            margin: 10px 0 !important;
+        }
+        /* Tab improvements */
+        .gr-tab-nav {
+            flex-wrap: wrap !important;
+        }
+        .gr-tab-nav > button {
+            flex: 1 1 auto !important;
+            min-width: 80px !important;
+            font-size: 12px !important;
+        }
+    }
+    /* Tablet styles */
+    @media (min-width: 769px) and (max-width: 1024px) {
+        .gradio-container {
+            padding: 15px;
+        }
+        .gr-button {
+            min-height: 40px !important;
+        }
+    }
+    /* Desktop styles */
+    @media (min-width: 1025px) {
+        .gradio-container {
+            max-width: 1400px;
+            padding: 20px;
+        }
+    }
+    /* Improve readability */
+    .gr-markdown h1 {
+        font-size: clamp(1.5rem, 4vw, 2.5rem) !important;
+        line-height: 1.2 !important;
+        margin-bottom: 1rem !important;
+    }
+    .gr-markdown h3 {
+        font-size: clamp(1.1rem, 3vw, 1.4rem) !important;
+        margin: 1rem 0 0.5rem 0 !important;
+    }
+    .gr-markdown p, .gr-markdown li {
+        font-size: clamp(0.9rem, 2.5vw, 1rem) !important;
+        line-height: 1.5 !important;
+    }
+    /* Status text improvements */
+    .gr-textbox[data-testid="textbox"] {
+        font-family: monospace !important;
+        font-size: clamp(0.8rem, 2vw, 0.9rem) !important;
+    }
+    /* Accessibility improvements */
+    .gr-button:focus,
+    .gr-textbox:focus,
+    .gr-file:focus {
+        outline: 2px solid #2563eb !important;
+        outline-offset: 2px !important;
+    }
+    /* Dark mode considerations */
+    @media (prefers-color-scheme: dark) {
+        .gr-button {
+            border: 1px solid #374151 !important;
+        }
+    }
+    """
+    with gr.Blocks(
+        title="PDF RAG System",
+        theme=gr.themes.Soft(),
+        css=custom_css
+    ) as demo:
+        # Add device detection script
+        gr.HTML(get_device_info())
+        gr.Markdown("""
+        # 📚 PDF Question Answering System
+        Upload your PDF documents and ask questions about their content!
+        **Quick Start:**
+        1. Upload PDFs or use pre-loaded ones
+        2. Click Process to prepare your documents
+        3. Ask questions about the content
+        """)
+        # Check for pre-loaded PDFs
+        if PRELOADED_PDFS:
+            gr.Markdown(
+            <div style="background: linear-gradient(90deg, #10b981, #059669);
+                                color: white; padding: 12px; border-radius: 8px; margin: 10px 0;">
+            🎉 <strong>Pre-loaded PDFs detected!</strong> Use the 'Load Pre-existing PDFs' button to get started quickly.
+            </div>
+            )
+        # Main layout - responsive columns
         with gr.Row():
+            # Left column - Upload & Settings (collapses to full width on mobile)
+            with gr.Column(scale=1, min_width=300):
+                gr.Markdown("### 📄 Document Management")
+                with gr.Tabs():
+                    with gr.TabItem("📁 Upload PDFs"):
+                        pdf_files = gr.File(
+                            label="Select PDF Files",
+                            file_count="multiple",
+                            file_types=[".pdf"],
+                            height=120
+                        )
+                        process_btn = gr.Button(
+                            "🔄 Process PDFs",
+                            variant="primary",
+                            size="lg"
+                        )
+                    with gr.TabItem("🗂️ ZIP Upload"):
+                        zip_file = gr.File(
+                            label="Upload ZIP (with PDFs)",
+                            file_count="single",
+                            file_types=[".zip"],
+                            height=80
+                        )
+                        extract_btn = gr.Button(
+                            "📦 Extract ZIP",
+                            variant="secondary",
+                            size="lg"
+                        )
+                        extract_output = gr.Textbox(
+                            label="Extraction Status",
+                            lines=2,
+                            max_lines=3
+                        )
+                    with gr.TabItem("💾 Pre-loaded"):
+                        if PRELOADED_PDFS:
+                            pdf_list = [f for f in os.listdir(PDF_FOLDER_PATH) if f.endswith('.pdf')]
+                            gr.Markdown(f"**Found {len(pdf_list)} PDF files**")
+                            # Show files in a more mobile-friendly way
+                            if len(pdf_list) <= 5:
+                                for pdf in pdf_list:
+                                    gr.Markdown(f"📄 {pdf}")
+                            else:
+                                for pdf in pdf_list[:3]:
+                                    gr.Markdown(f"📄 {pdf}")
+                                gr.Markdown(f"*... and {len(pdf_list) - 3} more files*")
+                        else:
+                            gr.Markdown("No pre-loaded PDFs found.")
+                        preload_btn = gr.Button(
+                            "📚 Load Pre-existing PDFs",
+                            variant="primary",
+                            size="lg",
+                            interactive=PRELOADED_PDFS
+                        )
+                # Settings section - collapsible on mobile
+                with gr.Accordion("⚙️ Advanced Settings", open=False):
+                    chunk_size = gr.Slider(
+                        minimum=200,
+                        maximum=2000,
+                        value=1000,
+                        step=100,
+                        label="Chunk Size",
+                        info="Larger chunks = more context, smaller = more precise"
+                    )
+                    chunk_overlap = gr.Slider(
+                        minimum=0,
+                        maximum=500,
+                        value=200,
+                        step=50,
+                        label="Chunk Overlap",
+                        info="Overlap between text chunks"
+                    )
+                # Status display
+                process_output = gr.Textbox(
+                    label="📊 Processing Status",
+                    lines=3,
+                    max_lines=5,
+                    placeholder="Status updates will appear here..."
+                )
+            # Right column - Q&A Section (collapses to full width on mobile)
+            with gr.Column(scale=2, min_width=400):
+                gr.Markdown("### ❓ Ask Questions")
+                question_input = gr.Textbox(
+                    label="Your Question",
+                    placeholder="What would you like to know about your documents?",
+                    lines=2,
+                    max_lines=4
+                )
+                ask_btn = gr.Button(
+                    "🤔 Ask Question",
+                    variant="secondary",
+                    size="lg"
+                )
+                # Results section - stack vertically on mobile
+                with gr.Row():
+                    answer_output = gr.Textbox(
+                        label="💡 Answer",
+                        lines=6,
+                        max_lines=12,
+                        placeholder="Your answer will appear here..."
+                    )
+                    sources_output = gr.Textbox(
+                        label="📚 Sources",
+                        lines=6,
+                        max_lines=12,
+                        placeholder="Source references will appear here..."
+                    )
+        # Event handlers (unchanged)
         process_btn.click(
+            fn=process_pdfs,
+            inputs=[pdf_files, chunk_size, chunk_overlap],
+            outputs=[process_output]
+        )
+        preload_btn.click(
             fn=load_preloaded_pdfs,
             inputs=[chunk_size, chunk_overlap],
             outputs=[process_output]
         )
+        extract_btn.click(
+            fn=extract_zip_to_pdfs,
+            inputs=[zip_file],
+            outputs=[extract_output]
+        )
         ask_btn.click(
             fn=answer_question,
+            inputs=[question_input],
+            outputs=[answer_output, sources_output]
         )
+        question_input.submit(
+            fn=answer_question,
+            inputs=[question_input],
+            outputs=[answer_output, sources_output]
+        )
+        # Example questions - more mobile-friendly
+        with gr.Accordion("💡 Example Questions", open=False):
+            gr.Markdown("""
+            **Try asking:**
+            - What are the main topics in these documents?
+            - Can you summarize the key findings?
+            - What data is available for [specific topic]?
+            - What are the differences between X and Y?
+            """)
+        # Footer with helpful info
+        gr.Markdown("""
+        ---
+        <div style="text-align: center; color: #666; font-size: 0.9em;">
+        💡 <strong>Tip:</strong> For best results, ask specific questions about your documents
+        </div>
+        """)
     return demo
 if __name__ == "__main__":
+    # Check if running on HuggingFace Spaces
+    if os.getenv("SPACE_ID"):
+        demo = create_interface()
+        demo.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False
+        )
+    else:
+        # Local development
+        demo = create_interface()
+        demo.launch(share=True)