Spaces:

msmaje
/

ragmodel

Sleeping

App Files Files Community

msmaje commited on Jul 2, 2025

Commit

6c0a884

verified ·

1 Parent(s): 30050c4

Update app.py

Browse files

Files changed (1) hide show

app.py +284 -77

app.py CHANGED Viewed

@@ -17,19 +17,22 @@ try:
     from langchain_community.vectorstores import FAISS
     from langchain.prompts import PromptTemplate
     from langchain.chains import RetrievalQA
-    from langchain_community.llms import HuggingFaceHub
     LANGCHAIN_AVAILABLE = True
 except ImportError as e:
     logger.error(f"LangChain import error: {e}")
     LANGCHAIN_AVAILABLE = False
 # Global variables for the RAG system
 vectorstore = None
 retrieval_qa = None
 embedding_model = None
 # Check for pre-existing PDF folder
-PDF_FOLDER_PATH = "./pdfs"  # Default folder for PDFs in the space
 PRELOADED_PDFS = os.path.exists(PDF_FOLDER_PATH) and len(os.listdir(PDF_FOLDER_PATH)) > 0
 def initialize_models():
@@ -48,19 +51,25 @@ def initialize_models():
         if not hf_token:
             return False, "❌ HuggingFace API token not found in environment variables"
-        # Initialize LLM
-        llm = HuggingFaceHub(
-            repo_id="microsoft/DialoGPT-medium",
-            model_kwargs={"temperature": 0.7, "max_new_tokens": 512},
-            huggingfacehub_api_token=hf_token
-        )
         return True, "✅ Models initialized successfully"
     except Exception as e:
         logger.error(f"Model initialization error: {e}")
         return False, f"❌ Error initializing models: {str(e)}"
 def load_preloaded_pdfs(chunk_size=1000, chunk_overlap=200):
     """Load PDFs from the pre-existing folder"""
     global vectorstore, retrieval_qa, embedding_model
@@ -112,13 +121,8 @@ Helpful Answer:
             template=prompt_template
         )
-        # Initialize LLM
-        hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-        llm = HuggingFaceHub(
-            repo_id="google/flan-t5-base",
-            model_kwargs={"temperature": 0.7, "max_new_tokens": 512},
-            huggingfacehub_api_token=hf_token
-        )
         # Create RetrievalQA chain
         retrieval_qa = RetrievalQA.from_chain_type(
@@ -175,6 +179,7 @@ def extract_zip_to_pdfs(zip_file):
     except Exception as e:
         return f"❌ Error extracting ZIP file: {str(e)}"
 def process_pdfs(pdf_files, chunk_size, chunk_overlap):
     """Process uploaded PDF files and create vector store"""
     global vectorstore, retrieval_qa, embedding_model
@@ -235,13 +240,8 @@ Helpful Answer:
             template=prompt_template
         )
-        # Initialize LLM
-        hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-        llm = HuggingFaceHub(
-            repo_id="google/flan-t5-base",
-            model_kwargs={"temperature": 0.7, "max_new_tokens": 512},
-            huggingfacehub_api_token=hf_token
-        )
         # Create RetrievalQA chain
         retrieval_qa = RetrievalQA.from_chain_type(
@@ -294,71 +294,256 @@ def answer_question(question):
         logger.error(f"Question answering error: {e}")
         return f"❌ Error answering question: {str(e)}", ""
 def create_interface():
-    """Create the Gradio interface"""
-    with gr.Blocks(title="PDF RAG System", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
         # 📚 PDF Question Answering System
         Upload your PDF documents and ask questions about their content!
-        **Instructions:**
-        1. **Option A**: Upload individual PDF files and click "Process PDFs"
-        2. **Option B**: Upload a ZIP file containing PDFs and extract them
-        3. **Option C**: Use pre-loaded PDFs (if available in ./pdfs folder)
-        4. Ask questions about your documents
         """)
         # Check for pre-loaded PDFs
         if PRELOADED_PDFS:
-            gr.Markdown("🎉 **Pre-loaded PDFs detected!** You can use the 'Load Pre-existing PDFs' button.")
         with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("### 📄 Upload & Settings")
                 with gr.Tabs():
-                    with gr.TabItem("📁 Individual PDFs"):
                         pdf_files = gr.File(
-                            label="Upload PDF Files",
                             file_count="multiple",
                             file_types=[".pdf"],
-                            height=150
                         )
-                        process_btn = gr.Button("🔄 Process PDFs", variant="primary")
                     with gr.TabItem("🗂️ ZIP Upload"):
                         zip_file = gr.File(
-                            label="Upload ZIP File (containing PDFs)",
                             file_count="single",
                             file_types=[".zip"],
-                            height=100
                         )
-                        extract_btn = gr.Button("📦 Extract ZIP to PDFs Folder", variant="secondary")
-                        extract_output = gr.Textbox(label="Extraction Status", lines=2)
                     with gr.TabItem("💾 Pre-loaded"):
                         if PRELOADED_PDFS:
                             pdf_list = [f for f in os.listdir(PDF_FOLDER_PATH) if f.endswith('.pdf')]
-                            gr.Markdown(f"**Found {len(pdf_list)} PDF files:**")
-                            for pdf in pdf_list[:10]:  # Show first 10
-                                gr.Markdown(f"- {pdf}")
-                            if len(pdf_list) > 10:
-                                gr.Markdown(f"... and {len(pdf_list) - 10} more files")
                         else:
-                            gr.Markdown("No pre-loaded PDFs found. Place PDF files in `./pdfs/` folder.")
-                        preload_btn = gr.Button("📚 Load Pre-existing PDFs", variant="primary",
-                                              interactive=PRELOADED_PDFS)
-                with gr.Row():
                     chunk_size = gr.Slider(
                         minimum=200,
                         maximum=2000,
                         value=1000,
                         step=100,
-                        label="Chunk Size"
                     )
                     chunk_overlap = gr.Slider(
@@ -366,38 +551,52 @@ def create_interface():
                         maximum=500,
                         value=200,
                         step=50,
-                        label="Chunk Overlap"
                     )
-                process_output = gr.Textbox(label="Processing Status", lines=4)
-            with gr.Column(scale=2):
                 gr.Markdown("### ❓ Ask Questions")
                 question_input = gr.Textbox(
                     label="Your Question",
                     placeholder="What would you like to know about your documents?",
-                    lines=2
                 )
-                ask_btn = gr.Button("🤔 Ask Question", variant="secondary")
                 with gr.Row():
-                    with gr.Column():
-                        answer_output = gr.Textbox(
-                            label="Answer",
-                            lines=8,
-                            max_lines=15
-                        )
-                    with gr.Column():
-                        sources_output = gr.Textbox(
-                            label="Sources",
-                            lines=8,
-                            max_lines=15
-                        )
-        # Event handlers
         process_btn.click(
             fn=process_pdfs,
             inputs=[pdf_files, chunk_size, chunk_overlap],
@@ -428,14 +627,22 @@ def create_interface():
             outputs=[answer_output, sources_output]
         )
-        # Example questions
         gr.Markdown("""
-        ### 💡 Example Questions:
-        - What are the main topics covered in these documents?
-        - Can you summarize the key findings?
-        - What data is available for [specific topic]?
-        - What are the differences between [X] and [Y]?
-        - What are the differences in the uninsured rate by state in 2022?
         """)
     return demo

     from langchain_community.vectorstores import FAISS
     from langchain.prompts import PromptTemplate
     from langchain.chains import RetrievalQA
+    from langchain_community.llms import HuggingFaceEndpoint
     LANGCHAIN_AVAILABLE = True
 except ImportError as e:
     logger.error(f"LangChain import error: {e}")
     LANGCHAIN_AVAILABLE = False
+# Create PDFs folder if it doesn't exist
+PDF_FOLDER_PATH = "./pdfs"
+os.makedirs(PDF_FOLDER_PATH, exist_ok=True)
 # Global variables for the RAG system
 vectorstore = None
 retrieval_qa = None
 embedding_model = None
 # Check for pre-existing PDF folder
 PRELOADED_PDFS = os.path.exists(PDF_FOLDER_PATH) and len(os.listdir(PDF_FOLDER_PATH)) > 0
 def initialize_models():
         if not hf_token:
             return False, "❌ HuggingFace API token not found in environment variables"
         return True, "✅ Models initialized successfully"
     except Exception as e:
         logger.error(f"Model initialization error: {e}")
         return False, f"❌ Error initializing models: {str(e)}"
+def create_llm():
+    """Create and return the LLM instance"""
+    hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    llm = HuggingFaceEndpoint(
+        repo_id="google/flan-t5-base",
+        temperature=0.7,
+        max_new_tokens=512,
+        huggingfacehub_api_token=hf_token
+    )
+    return llm
 def load_preloaded_pdfs(chunk_size=1000, chunk_overlap=200):
     """Load PDFs from the pre-existing folder"""
     global vectorstore, retrieval_qa, embedding_model
             template=prompt_template
         )
+        # Initialize LLM using the new function
+        llm = create_llm()
         # Create RetrievalQA chain
         retrieval_qa = RetrievalQA.from_chain_type(
     except Exception as e:
         return f"❌ Error extracting ZIP file: {str(e)}"
 def process_pdfs(pdf_files, chunk_size, chunk_overlap):
     """Process uploaded PDF files and create vector store"""
     global vectorstore, retrieval_qa, embedding_model
             template=prompt_template
         )
+        # Initialize LLM using the new function
+        llm = create_llm()
         # Create RetrievalQA chain
         retrieval_qa = RetrievalQA.from_chain_type(
         logger.error(f"Question answering error: {e}")
         return f"❌ Error answering question: {str(e)}", ""
+def get_device_info():
+    """Simple function to detect if mobile (basic detection)"""
+    return """
+    <script>
+    function isMobile() {
+        return window.innerWidth <= 768;
+    }
+    function adjustLayout() {
+        const isMob = isMobile();
+        const root = document.documentElement;
+        if (isMob) {
+            root.style.setProperty('--mobile-mode', '1');
+        } else {
+            root.style.setProperty('--mobile-mode', '0');
+        }
+    }
+    window.addEventListener('resize', adjustLayout);
+    adjustLayout();
+    </script>
+    """
 def create_interface():
+    """Create the fully responsive Gradio interface"""
+    # Custom CSS for better responsiveness
+    custom_css = """
+    /* Base responsive styles */
+    .gradio-container {
+        max-width: 100% !important;
+        margin: 0 auto;
+        padding: 10px;
+    }
+    /* Mobile-first responsive design */
+    @media (max-width: 768px) {
+        .gradio-container {
+            padding: 5px;
+        }
+        /* Stack elements vertically on mobile */
+        .gr-row {
+            flex-direction: column !important;
+            gap: 10px !important;
+        }
+        /* Full width on mobile */
+        .gr-column {
+            width: 100% !important;
+            min-width: 100% !important;
+        }
+        /* Adjust component spacing */
+        .gr-form > * {
+            margin-bottom: 8px !important;
+        }
+        /* Better button sizing */
+        .gr-button {
+            width: 100% !important;
+            min-height: 44px !important;
+            font-size: 14px !important;
+        }
+        /* Text input improvements */
+        .gr-textbox textarea {
+            min-height: 60px !important;
+            font-size: 16px !important; /* Prevents zoom on iOS */
+        }
+        /* File upload improvements */
+        .gr-file {
+            min-height: 100px !important;
+        }
+        /* Slider improvements */
+        .gr-slider {
+            margin: 10px 0 !important;
+        }
+        /* Tab improvements */
+        .gr-tab-nav {
+            flex-wrap: wrap !important;
+        }
+        .gr-tab-nav > button {
+            flex: 1 1 auto !important;
+            min-width: 80px !important;
+            font-size: 12px !important;
+        }
+    }
+    /* Tablet styles */
+    @media (min-width: 769px) and (max-width: 1024px) {
+        .gradio-container {
+            padding: 15px;
+        }
+        .gr-button {
+            min-height: 40px !important;
+        }
+    }
+    /* Desktop styles */
+    @media (min-width: 1025px) {
+        .gradio-container {
+            max-width: 1400px;
+            padding: 20px;
+        }
+    }
+    /* Improve readability */
+    .gr-markdown h1 {
+        font-size: clamp(1.5rem, 4vw, 2.5rem) !important;
+        line-height: 1.2 !important;
+        margin-bottom: 1rem !important;
+    }
+    .gr-markdown h3 {
+        font-size: clamp(1.1rem, 3vw, 1.4rem) !important;
+        margin: 1rem 0 0.5rem 0 !important;
+    }
+    .gr-markdown p, .gr-markdown li {
+        font-size: clamp(0.9rem, 2.5vw, 1rem) !important;
+        line-height: 1.5 !important;
+    }
+    /* Status text improvements */
+    .gr-textbox[data-testid="textbox"] {
+        font-family: monospace !important;
+        font-size: clamp(0.8rem, 2vw, 0.9rem) !important;
+    }
+    /* Accessibility improvements */
+    .gr-button:focus,
+    .gr-textbox:focus,
+    .gr-file:focus {
+        outline: 2px solid #2563eb !important;
+        outline-offset: 2px !important;
+    }
+    /* Dark mode considerations */
+    @media (prefers-color-scheme: dark) {
+        .gr-button {
+            border: 1px solid #374151 !important;
+        }
+    }
+    """
+    with gr.Blocks(
+        title="PDF RAG System",
+        theme=gr.themes.Soft(),
+        css=custom_css
+    ) as demo:
+        # Add device detection script
+        gr.HTML(get_device_info())
         gr.Markdown("""
         # 📚 PDF Question Answering System
         Upload your PDF documents and ask questions about their content!
+        **Quick Start:**
+        1. Upload PDFs or use pre-loaded ones
+        2. Click Process to prepare your documents
+        3. Ask questions about the content
         """)
         # Check for pre-loaded PDFs
         if PRELOADED_PDFS:
+            gr.Markdown("""
+            <div style="background: linear-gradient(90deg, #10b981, #059669);
+                        color: white; padding: 12px; border-radius: 8px; margin: 10px 0;">
+            🎉 <strong>Pre-loaded PDFs detected!</strong> Use the 'Load Pre-existing PDFs' button to get started quickly.
+            </div>
+            """)
+        # Main layout - responsive columns
         with gr.Row():
+            # Left column - Upload & Settings (collapses to full width on mobile)
+            with gr.Column(scale=1, min_width=300):
+                gr.Markdown("### 📄 Document Management")
                 with gr.Tabs():
+                    with gr.TabItem("📁 Upload PDFs"):
                         pdf_files = gr.File(
+                            label="Select PDF Files",
                             file_count="multiple",
                             file_types=[".pdf"],
+                            height=120
+                        )
+                        process_btn = gr.Button(
+                            "🔄 Process PDFs",
+                            variant="primary",
+                            size="lg"
                         )
                     with gr.TabItem("🗂️ ZIP Upload"):
                         zip_file = gr.File(
+                            label="Upload ZIP (with PDFs)",
                             file_count="single",
                             file_types=[".zip"],
+                            height=80
+                        )
+                        extract_btn = gr.Button(
+                            "📦 Extract ZIP",
+                            variant="secondary",
+                            size="lg"
+                        )
+                        extract_output = gr.Textbox(
+                            label="Extraction Status",
+                            lines=2,
+                            max_lines=3
                         )
                     with gr.TabItem("💾 Pre-loaded"):
                         if PRELOADED_PDFS:
                             pdf_list = [f for f in os.listdir(PDF_FOLDER_PATH) if f.endswith('.pdf')]
+                            gr.Markdown(f"**Found {len(pdf_list)} PDF files**")
+                            # Show files in a more mobile-friendly way
+                            if len(pdf_list) <= 5:
+                                for pdf in pdf_list:
+                                    gr.Markdown(f"📄 {pdf}")
+                            else:
+                                for pdf in pdf_list[:3]:
+                                    gr.Markdown(f"📄 {pdf}")
+                                gr.Markdown(f"*... and {len(pdf_list) - 3} more files*")
                         else:
+                            gr.Markdown("No pre-loaded PDFs found.")
+                        preload_btn = gr.Button(
+                            "📚 Load Pre-existing PDFs",
+                            variant="primary",
+                            size="lg",
+                            interactive=PRELOADED_PDFS
+                        )
+                # Settings section - collapsible on mobile
+                with gr.Accordion("⚙️ Advanced Settings", open=False):
                     chunk_size = gr.Slider(
                         minimum=200,
                         maximum=2000,
                         value=1000,
                         step=100,
+                        label="Chunk Size",
+                        info="Larger chunks = more context, smaller = more precise"
                     )
                     chunk_overlap = gr.Slider(
                         maximum=500,
                         value=200,
                         step=50,
+                        label="Chunk Overlap",
+                        info="Overlap between text chunks"
                     )
+                # Status display
+                process_output = gr.Textbox(
+                    label="📊 Processing Status",
+                    lines=3,
+                    max_lines=5,
+                    placeholder="Status updates will appear here..."
+                )
+            # Right column - Q&A Section (collapses to full width on mobile)
+            with gr.Column(scale=2, min_width=400):
                 gr.Markdown("### ❓ Ask Questions")
                 question_input = gr.Textbox(
                     label="Your Question",
                     placeholder="What would you like to know about your documents?",
+                    lines=2,
+                    max_lines=4
                 )
+                ask_btn = gr.Button(
+                    "🤔 Ask Question",
+                    variant="secondary",
+                    size="lg"
+                )
+                # Results section - stack vertically on mobile
                 with gr.Row():
+                    answer_output = gr.Textbox(
+                        label="💡 Answer",
+                        lines=6,
+                        max_lines=12,
+                        placeholder="Your answer will appear here..."
+                    )
+                    sources_output = gr.Textbox(
+                        label="📚 Sources",
+                        lines=6,
+                        max_lines=12,
+                        placeholder="Source references will appear here..."
+                    )
+        # Event handlers (unchanged)
         process_btn.click(
             fn=process_pdfs,
             inputs=[pdf_files, chunk_size, chunk_overlap],
             outputs=[answer_output, sources_output]
         )
+        # Example questions - more mobile-friendly
+        with gr.Accordion("💡 Example Questions", open=False):
+            gr.Markdown("""
+            **Try asking:**
+            - What are the main topics in these documents?
+            - Can you summarize the key findings?
+            - What data is available for [specific topic]?
+            - What are the differences between X and Y?
+            """)
+        # Footer with helpful info
         gr.Markdown("""
+        ---
+        <div style="text-align: center; color: #666; font-size: 0.9em;">
+        💡 <strong>Tip:</strong> For best results, ask specific questions about your documents
+        </div>
         """)
     return demo