Spaces:

pradeepsengarr
/

RAG

Runtime error

App Files Files Community

pradeepsengarr commited on Jun 12, 2025

Commit

a2bdf76

verified ·

1 Parent(s): 345418e

Update app.py

Browse files

Files changed (1) hide show

app.py +729 -230

app.py CHANGED Viewed

@@ -2,313 +2,812 @@ import gradio as gr
 import requests
 import os
 import tempfile
 from PyPDF2 import PdfReader
 from sentence_transformers import SentenceTransformer
 import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
-TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")
-SERPER_API_KEY = os.environ.get("SERPER_API_KEY")
-model = SentenceTransformer("all-MiniLM-L6-v2")
-doc_chunks = []
-doc_embeddings = []
-# --- Extract text from PDF ---
-def extract_pdf_text(file_obj):
-    reader = PdfReader(file_obj)
-    return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
-# --- Break into small chunks ---
-def split_into_chunks(text, chunk_size=300):
-    words = text.split()
-    return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
-# --- Embed all chunks and cache ---
 def process_uploaded_file(file):
-    global doc_chunks, doc_embeddings
     if file is None:
-        return "⚠️ No file selected", gr.update(visible=False)
     try:
-        text = extract_pdf_text(file)
-        doc_chunks = split_into_chunks(text)
-        doc_embeddings = model.encode(doc_chunks)
-        status = f"✅ Successfully processed {len(doc_chunks)} chunks from your document!"
-        return status, gr.update(visible=True, value=f"📄 Document loaded: {len(doc_chunks)} chunks ready")
     except Exception as e:
-        return f"❌ Error processing file: {str(e)}", gr.update(visible=False)
-# --- RAG from file ---
-def retrieve_relevant_chunks(query):
-    query_emb = model.encode([query])
-    sims = cosine_similarity(query_emb, doc_embeddings)[0]
-    top_indices = np.argsort(sims)[::-1][:3]
-    return "\n\n".join([doc_chunks[i] for i in top_indices])
-# --- Together LLM call ---
-def call_together_llm(context, question):
-    url = "https://api.together.xyz/v1/chat/completions"
-    headers = {
-        "Authorization": f"Bearer {TOGETHER_API_KEY}",
-        "Content-Type": "application/json"
-    }
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant answering from the given context. Provide detailed, accurate responses based on the context provided."},
-        {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
-    ]
-    data = {
-        "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-        "messages": messages,
-        "temperature": 0.7,
-        "max_tokens": 512
-    }
-    response = requests.post(url, headers=headers, json=data)
-    return response.json()["choices"][0]["message"]["content"]
-# --- Web search via Serper ---
-def web_search(query):
-    url = "https://google.serper.dev/search"
-    headers = {"X-API-KEY": SERPER_API_KEY}
-    payload = {"q": query}
-    response = requests.post(url, json=payload, headers=headers)
-    data = response.json()
-    results = data.get("organic", [])
-    return "\n".join([f"{r['title']} - {r['link']}\n{r['snippet']}" for r in results[:3]])
-# --- Main Chat Logic ---
-def answer_question(question, source, history):
     if not question.strip():
         return history, ""
     try:
-        # Add user question to history
-        history = history + [[question, None]]
         if source == "🌐 Web Search":
-            context = web_search(question)
-            source_info = "🌐 **Source:** Web Search"
-        elif source == "📄 Uploaded File":
-            if not doc_chunks:
                 answer = "❌ Please upload a PDF document first to use this feature."
                 history[-1][1] = answer
                 return history, ""
-            context = retrieve_relevant_chunks(question)
-            source_info = "📄 **Source:** Uploaded Document"
         else:
             answer = "❌ Please select a valid knowledge source."
             history[-1][1] = answer
             return history, ""
-        # Get answer from LLM
-        answer = call_together_llm(context, question)
-        formatted_answer = f"{source_info}\n\n{answer}"
-        # Update history with answer
-        history[-1][1] = formatted_answer
         return history, ""
     except Exception as e:
-        error_msg = f"❌ **Error:** {str(e)}\n\nPlease check your API keys and try again."
         history[-1][1] = error_msg
         return history, ""
-# --- Clear chat history ---
 def clear_chat():
     return []
-# --- Custom CSS ---
-custom_css = """
 .gradio-container {
-    max-width: 1200px !important;
     margin: auto !important;
 }
-.header-text {
     text-align: center;
-    background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
-    -webkit-background-clip: text;
-    -webkit-text-fill-color: transparent;
-    font-size: 2.5em;
-    font-weight: bold;
-    margin-bottom: 10px;
 }
-.subtitle-text {
     text-align: center;
-    color: #666;
-    font-size: 1.2em;
-    margin-bottom: 30px;
 }
-.source-radio .wrap {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
     border-radius: 15px;
-    padding: 15px;
-    margin: 10px 0;
 }
-.source-radio label {
-    color: white !important;
-    font-weight: 600;
 }
-.upload-area {
-    border: 2px dashed #667eea;
     border-radius: 15px;
-    padding: 20px;
     text-align: center;
-    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
     transition: all 0.3s ease;
 }
-.upload-area:hover {
-    border-color: #764ba2;
-    transform: translateY(-2px);
 }
-.chat-container {
-    border-radius: 15px;
-    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
-    background: white;
-    padding: 20px;
-    margin: 20px 0;
 }
-.status-box {
-    background: linear-gradient(135deg, #84fab0 0%, #8fd3f4 100%);
-    border-radius: 10px;
-    padding: 15px;
-    margin: 10px 0;
     border: none;
     color: #2d3748;
     font-weight: 500;
 }
-.footer-text {
     text-align: center;
-    color: #888;
-    font-size: 0.9em;
-    margin-top: 30px;
-    padding: 20px;
-    border-top: 1px solid #eee;
 }
-"""
-# --- Enhanced Gradio UI ---
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="🤖 RAG Chatbot") as demo:
-    # Header
-    gr.HTML("""
-        <div class="header-text">🤖 Intelligent RAG Chatbot</div>
-        <div class="subtitle-text">Ask questions from web or upload your documents for AI-powered answers</div>
-    """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            # Knowledge Source Selection
-            gr.Markdown("### 🎯 **Choose Your Knowledge Source**")
-            source_choice = gr.Radio(
-                ["🌐 Web Search", "📄 Uploaded File"],
-                label="Select Knowledge Source",
-                value="🌐 Web Search",
-                elem_classes=["source-radio"]
-            )
-            # File Upload Section
-            gr.Markdown("### 📁 **Document Upload**")
-            file_input = gr.File(
-                label="Upload PDF Document",
-                file_types=[".pdf"],
-                elem_classes=["upload-area"]
-            )
-            file_status = gr.Textbox(
-                label="📊 Processing Status",
-                interactive=False,
-                elem_classes=["status-box"]
-            )
-            document_info = gr.Textbox(
-                label="📄 Document Info",
-                visible=False,
-                interactive=False,
-                elem_classes=["status-box"]
-            )
-        with gr.Column(scale=2):
-            # Chat Interface
-            gr.Markdown("### 💬 **Chat Interface**")
-            chatbot = gr.Chatbot(
-                label="Conversation",
-                height=500,
-                elem_classes=["chat-container"],
-                bubble_full_width=False,
-                show_label=False
-            )
-            with gr.Row():
-                question_input = gr.Textbox(
-                    label="Ask your question",
-                    placeholder="Type your question here... (Press Enter to send)",
-                    lines=2,
-                    scale=4
                 )
-                with gr.Column(scale=1, min_width=100):
-                    send_btn = gr.Button("🚀 Send", variant="primary", size="lg")
-                    clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="lg")
-    # Advanced Settings (Collapsible)
-    with gr.Accordion("⚙️ Advanced Settings", open=False):
-        gr.Markdown("""
-        - 🌐 **Web Search**: Get real-time information from the internet
-        - 📄 **Document Upload**: Upload PDF files and ask questions about their content
-        - 🤖 **AI-Powered**: Uses Mixtral-8x7B model for intelligent responses
-        - 🔍 **Semantic Search**: Advanced embedding-based document retrieval
         """)
-    # Footer
-    gr.HTML("""
-        <div class="footer-text">
-            🚀 Powered by Together AI & Serper API |
-            📚 Built with Sentence Transformers & Gradio |
-            💡 Enhanced RAG System
-        </div>
-    """)
-    # Event Handlers
-    file_input.change(
-        fn=process_uploaded_file,
-        inputs=file_input,
-        outputs=[file_status, document_info]
-    )
-    # Send message on button click or Enter key
-    question_input.submit(
-        fn=answer_question,
-        inputs=[question_input, source_choice, chatbot],
-        outputs=[chatbot, question_input]
-    )
-    send_btn.click(
-        fn=answer_question,
-        inputs=[question_input, source_choice, chatbot],
-        outputs=[chatbot, question_input]
-    )
-    clear_btn.click(
-        fn=clear_chat,
-        inputs=[],
-        outputs=[chatbot]
-    )
-# Launch the app
 if __name__ == "__main__":
     demo.launch(
         share=True,
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import requests
 import os
 import tempfile
+import asyncio
+import aiohttp
 from PyPDF2 import PdfReader
 from sentence_transformers import SentenceTransformer
 import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
+import logging
+from typing import List, Dict, Tuple, Optional
+import json
+from datetime import datetime
+import hashlib
+import pickle
+from pathlib import Path
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Configuration
+class Config:
+    TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")
+    SERPER_API_KEY = os.environ.get("SERPER_API_KEY")
+    MODEL_NAME = "all-MiniLM-L6-v2"
+    CHUNK_SIZE = 400
+    CHUNK_OVERLAP = 50
+    MAX_TOKENS = 1024
+    TEMPERATURE = 0.7
+    TOP_K_CHUNKS = 5
+    CACHE_DIR = Path("./cache")
+    def __init__(self):
+        self.CACHE_DIR.mkdir(exist_ok=True)
+config = Config()
+class DocumentProcessor:
+    """Advanced document processing with caching and optimization"""
+    def __init__(self):
+        self.model = SentenceTransformer(config.MODEL_NAME)
+        self.doc_chunks = []
+        self.doc_embeddings = []
+        self.document_metadata = {}
+    def extract_text_from_pdf(self, file_obj) -> str:
+        """Extract text from PDF with error handling"""
+        try:
+            reader = PdfReader(file_obj)
+            text_parts = []
+            for page_num, page in enumerate(reader.pages):
+                page_text = page.extract_text()
+                if page_text.strip():
+                    text_parts.append(f"[Page {page_num + 1}] {page_text}")
+            full_text = "\n".join(text_parts)
+            logger.info(f"Extracted {len(full_text)} characters from PDF")
+            return full_text
+        except Exception as e:
+            logger.error(f"PDF extraction error: {str(e)}")
+            raise ValueError(f"Failed to process PDF: {str(e)}")
+    def create_intelligent_chunks(self, text: str) -> List[str]:
+        """Create overlapping chunks with sentence boundary awareness"""
+        sentences = text.split('. ')
+        chunks = []
+        current_chunk = ""
+        for sentence in sentences:
+            test_chunk = current_chunk + sentence + ". "
+            if len(test_chunk.split()) <= config.CHUNK_SIZE:
+                current_chunk = test_chunk
+            else:
+                if current_chunk:
+                    chunks.append(current_chunk.strip())
+                current_chunk = sentence + ". "
+        if current_chunk:
+            chunks.append(current_chunk.strip())
+        # Add overlap between chunks
+        overlapped_chunks = []
+        for i, chunk in enumerate(chunks):
+            overlapped_chunks.append(chunk)
+            # Add overlapping chunk if not the last one
+            if i < len(chunks) - 1:
+                overlap_words = chunk.split()[-config.CHUNK_OVERLAP:]
+                next_words = chunks[i + 1].split()[:config.CHUNK_OVERLAP]
+                overlap_chunk = " ".join(overlap_words + next_words)
+                overlapped_chunks.append(overlap_chunk)
+        return overlapped_chunks
+    def generate_document_hash(self, file_obj) -> str:
+        """Generate hash for document caching"""
+        file_obj.seek(0)
+        content = file_obj.read()
+        file_obj.seek(0)
+        return hashlib.md5(content).hexdigest()
+    def load_cached_embeddings(self, doc_hash: str) -> Optional[Tuple[List[str], np.ndarray]]:
+        """Load cached embeddings if available"""
+        cache_file = config.CACHE_DIR / f"{doc_hash}.pkl"
+        if cache_file.exists():
+            try:
+                with open(cache_file, 'rb') as f:
+                    return pickle.load(f)
+            except Exception as e:
+                logger.warning(f"Failed to load cache: {e}")
+        return None
+    def save_embeddings_to_cache(self, doc_hash: str, chunks: List[str], embeddings: np.ndarray):
+        """Save embeddings to cache"""
+        cache_file = config.CACHE_DIR / f"{doc_hash}.pkl"
+        try:
+            with open(cache_file, 'wb') as f:
+                pickle.dump((chunks, embeddings), f)
+        except Exception as e:
+            logger.warning(f"Failed to save cache: {e}")
+    def process_document(self, file_obj) -> Tuple[str, bool]:
+        """Process uploaded document with caching"""
+        try:
+            doc_hash = self.generate_document_hash(file_obj)
+            # Try to load from cache first
+            cached_data = self.load_cached_embeddings(doc_hash)
+            if cached_data:
+                self.doc_chunks, self.doc_embeddings = cached_data
+                logger.info(f"Loaded {len(self.doc_chunks)} chunks from cache")
+                return f"✅ Successfully loaded {len(self.doc_chunks)} chunks from cache!", True
+            # Process document
+            text = self.extract_text_from_pdf(file_obj)
+            self.doc_chunks = self.create_intelligent_chunks(text)
+            # Generate embeddings
+            logger.info("Generating embeddings...")
+            self.doc_embeddings = self.model.encode(
+                self.doc_chunks,
+                batch_size=32,
+                show_progress_bar=True,
+                convert_to_numpy=True
+            )
+            # Save to cache
+            self.save_embeddings_to_cache(doc_hash, self.doc_chunks, self.doc_embeddings)
+            # Store metadata
+            self.document_metadata = {
+                'hash': doc_hash,
+                'chunks_count': len(self.doc_chunks),
+                'processed_at': datetime.now().isoformat(),
+                'total_characters': len(text)
+            }
+            return f"✅ Successfully processed {len(self.doc_chunks)} chunks from your document!", True
+        except Exception as e:
+            logger.error(f"Document processing error: {str(e)}")
+            return f"❌ Error processing document: {str(e)}", False
+    def retrieve_relevant_chunks(self, query: str, top_k: int = None) -> Tuple[str, List[float]]:
+        """Retrieve most relevant chunks with similarity scores"""
+        if not self.doc_chunks:
+            return "", []
+        top_k = top_k or config.TOP_K_CHUNKS
+        query_embedding = self.model.encode([query])
+        similarities = cosine_similarity(query_embedding, self.doc_embeddings)[0]
+        top_indices = np.argsort(similarities)[::-1][:top_k]
+        relevant_chunks = []
+        scores = []
+        for idx in top_indices:
+            if similarities[idx] > 0.1:  # Minimum similarity threshold
+                relevant_chunks.append(self.doc_chunks[idx])
+                scores.append(similarities[idx])
+        context = "\n\n---\n\n".join(relevant_chunks)
+        return context, scores
+class LLMService:
+    """Enhanced LLM service with multiple providers and error handling"""
+    @staticmethod
+    async def call_together_ai_async(context: str, question: str, system_prompt: str = None) -> str:
+        """Async call to Together AI API"""
+        url = "https://api.together.xyz/v1/chat/completions"
+        headers = {
+            "Authorization": f"Bearer {config.TOGETHER_API_KEY}",
+            "Content-Type": "application/json"
+        }
+        system_msg = system_prompt or """You are an intelligent AI assistant specializing in document analysis and web research.
+        Provide comprehensive, accurate, and well-structured responses based on the given context.
+        Use bullet points, numbered lists, and clear formatting when appropriate.
+        If the context doesn't contain enough information, clearly state what's missing."""
+        messages = [
+            {"role": "system", "content": system_msg},
+            {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\n\nPlease provide a detailed and helpful response."}
+        ]
+        data = {
+            "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+            "messages": messages,
+            "temperature": config.TEMPERATURE,
+            "max_tokens": config.MAX_TOKENS,
+            "top_p": 0.9,
+            "repetition_penalty": 1.1
+        }
+        async with aiohttp.ClientSession() as session:
+            async with session.post(url, headers=headers, json=data) as response:
+                if response.status == 200:
+                    result = await response.json()
+                    return result["choices"][0]["message"]["content"]
+                else:
+                    raise Exception(f"API call failed with status {response.status}")
+    @staticmethod
+    def call_together_ai_sync(context: str, question: str, system_prompt: str = None) -> str:
+        """Synchronous wrapper for Together AI API"""
+        try:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            return loop.run_until_complete(
+                LLMService.call_together_ai_async(context, question, system_prompt)
+            )
+        except Exception as e:
+            logger.error(f"LLM API error: {str(e)}")
+            return f"❌ Sorry, I encountered an error while generating the response: {str(e)}"
+class WebSearchService:
+    """Enhanced web search with multiple sources and caching"""
+    @staticmethod
+    def search_web(query: str, num_results: int = 5) -> str:
+        """Enhanced web search with better formatting"""
+        try:
+            url = "https://google.serper.dev/search"
+            headers = {"X-API-KEY": config.SERPER_API_KEY}
+            payload = {
+                "q": query,
+                "num": num_results,
+                "type": "search"
+            }
+            response = requests.post(url, json=payload, headers=headers, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            results = data.get("organic", [])
+            if not results:
+                return "No search results found for your query."
+            formatted_results = []
+            for i, result in enumerate(results[:num_results], 1):
+                title = result.get('title', 'No title')
+                link = result.get('link', '')
+                snippet = result.get('snippet', 'No description available')
+                formatted_results.append(f"""
+**Result {i}: {title}**
+URL: {link}
+Summary: {snippet}
+""")
+            return "\n".join(formatted_results)
+        except Exception as e:
+            logger.error(f"Web search error: {str(e)}")
+            return f"❌ Search failed: {str(e)}"
+# Global instances
+doc_processor = DocumentProcessor()
+llm_service = LLMService()
+search_service = WebSearchService()
+# Enhanced UI Functions
 def process_uploaded_file(file):
+    """Process uploaded file with enhanced feedback"""
     if file is None:
+        return "⚠️ No file selected", gr.update(visible=False), gr.update(visible=False)
     try:
+        status, success = doc_processor.process_document(file)
+        if success:
+            metadata = doc_processor.document_metadata
+            info_text = f"""📄 **Document Successfully Loaded**
+📊 Chunks: {metadata.get('chunks_count', 'N/A')}
+📝 Characters: {metadata.get('total_characters', 'N/A'):,}
+⏰ Processed: {metadata.get('processed_at', 'N/A')[:19]}
+🔍 Ready for questions!"""
+            return status, gr.update(visible=True, value=info_text), gr.update(visible=True)
+        else:
+            return status, gr.update(visible=False), gr.update(visible=False)
     except Exception as e:
+        error_msg = f"❌ Processing Error: {str(e)}"
+        return error_msg, gr.update(visible=False), gr.update(visible=False)
+def answer_question(question: str, source: str, history: List[List[str]], use_advanced: bool = False):
+    """Enhanced question answering with better context and formatting"""
     if not question.strip():
         return history, ""
+    # Add user question to history
+    history = history + [[question, None]]
     try:
         if source == "🌐 Web Search":
+            context = search_service.search_web(question, num_results=5)
+            source_info = "🌐 **Source:** Real-time Web Search"
+            system_prompt = """You are a web research assistant. Analyze the search results and provide a comprehensive answer.
+            Cite specific sources when possible and organize information clearly."""
+        elif source == "📄 Uploaded Document":
+            if not doc_processor.doc_chunks:
                 answer = "❌ Please upload a PDF document first to use this feature."
                 history[-1][1] = answer
                 return history, ""
+            context, similarity_scores = doc_processor.retrieve_relevant_chunks(question)
+            source_info = f"📄 **Source:** Uploaded Document ({len(similarity_scores)} relevant sections found)"
+            system_prompt = """You are a document analysis assistant. Based on the provided document excerpts,
+            give a detailed and accurate answer. If information is incomplete, clearly state what's missing."""
         else:
             answer = "❌ Please select a valid knowledge source."
             history[-1][1] = answer
             return history, ""
+        if not context.strip():
+            answer = "❌ No relevant information found for your question."
+            history[-1][1] = answer
+            return history, ""
+        # Generate response using LLM
+        llm_response = llm_service.call_together_ai_sync(context, question, system_prompt)
+        # Format final answer
+        timestamp = datetime.now().strftime("%H:%M:%S")
+        formatted_answer = f"""{source_info}
+⏰ **Generated at:** {timestamp}
+{llm_response}
+---
+💡 *Tip: Try asking follow-up questions for more details!*"""
+        history[-1][1] = formatted_answer
         return history, ""
     except Exception as e:
+        error_msg = f"""❌ **Error Occurred**
+🔍 **Details:** {str(e)}
+💡 **Suggestion:** Please check your API keys and try again.
+If the problem persists, try:
+- Rephrasing your question
+- Checking your internet connection
+- Ensuring API keys are properly configured"""
         history[-1][1] = error_msg
         return history, ""
 def clear_chat():
+    """Clear chat history"""
     return []
+def get_sample_questions(source):
+    """Provide sample questions based on source"""
+    if source == "🌐 Web Search":
+        return [
+            "What are the latest developments in AI technology?",
+            "Current weather in major cities",
+            "Recent news about renewable energy",
+            "What's trending in technology today?"
+        ]
+    else:
+        return [
+            "What is the main topic of this document?",
+            "Summarize the key points",
+            "What are the conclusions?",
+            "Explain the methodology used"
+        ]
+# Enhanced CSS with modern design
+enhanced_css = """
+/* Global Styles */
 .gradio-container {
+    max-width: 1400px !important;
     margin: auto !important;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 }
+/* Header Styles */
+.main-header {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    padding: 2rem;
+    border-radius: 20px;
+    margin-bottom: 2rem;
+    box-shadow: 0 10px 30px rgba(102, 126, 234, 0.3);
+}
+.header-title {
+    color: white;
+    font-size: 3rem;
+    font-weight: 800;
     text-align: center;
+    margin-bottom: 0.5rem;
+    text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
 }
+.header-subtitle {
+    color: rgba(255,255,255,0.9);
+    font-size: 1.3rem;
     text-align: center;
+    font-weight: 300;
 }
+/* Card Styles */
+.control-card {
+    background: white;
     border-radius: 15px;
+    padding: 1.5rem;
+    box-shadow: 0 5px 20px rgba(0,0,0,0.1);
+    border: 1px solid #e2e8f0;
+    margin-bottom: 1rem;
 }
+.chat-card {
+    background: white;
+    border-radius: 15px;
+    padding: 1.5rem;
+    box-shadow: 0 5px 20px rgba(0,0,0,0.1);
+    border: 1px solid #e2e8f0;
+    min-height: 600px;
+}
+/* Source Selection */
+.source-selector {
+    background: linear-gradient(135deg, #84fab0 0%, #8fd3f4 100%);
+    border-radius: 12px;
+    padding: 1rem;
+    margin: 1rem 0;
 }
+.source-selector label {
+    color: #2d3748 !important;
+    font-weight: 600 !important;
+    font-size: 1.1rem !important;
+}
+/* File Upload */
+.upload-zone {
+    background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
+    border: 3px dashed #ff8a65;
     border-radius: 15px;
+    padding: 2rem;
     text-align: center;
     transition: all 0.3s ease;
+    cursor: pointer;
 }
+.upload-zone:hover {
+    transform: translateY(-3px);
+    box-shadow: 0 8px 25px rgba(255, 138, 101, 0.3);
+    border-color: #ff7043;
 }
+/* Status Boxes */
+.status-success {
+    background: linear-gradient(135deg, #84fab0 0%, #8fd3f4 100%);
+    border: none;
+    border-radius: 12px;
+    padding: 1rem;
+    color: #2d3748;
+    font-weight: 500;
 }
+.status-info {
+    background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
     border: none;
+    border-radius: 12px;
+    padding: 1rem;
     color: #2d3748;
     font-weight: 500;
 }
+/* Chat Interface */
+.chat-container {
+    background: #f8fafc;
+    border-radius: 12px;
+    border: 1px solid #e2e8f0;
+    min-height: 500px;
+}
+/* Input Styles */
+.question-input {
+    border-radius: 12px;
+    border: 2px solid #cbd5e0;
+    padding: 1rem;
+    font-size: 1rem;
+    transition: all 0.3s ease;
+}
+.question-input:focus {
+    border-color: #667eea;
+    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
+}
+/* Button Styles */
+.btn-primary {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border: none;
+    border-radius: 12px;
+    padding: 0.75rem 1.5rem;
+    font-weight: 600;
+    color: white;
+    transition: all 0.3s ease;
+}
+.btn-primary:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4);
+}
+.btn-secondary {
+    background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
+    border: none;
+    border-radius: 12px;
+    padding: 0.75rem 1.5rem;
+    font-weight: 600;
+    color: #2d3748;
+    transition: all 0.3s ease;
+}
+.btn-secondary:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 8px 25px rgba(252, 182, 159, 0.4);
+}
+/* Advanced Settings */
+.advanced-panel {
+    background: linear-gradient(135deg, #e0c3fc 0%, #9bb5ff 100%);
+    border-radius: 12px;
+    padding: 1.5rem;
+    margin: 1rem 0;
+}
+/* Footer */
+.footer-info {
+    background: #2d3748;
+    color: white;
+    padding: 2rem;
+    border-radius: 15px;
     text-align: center;
+    margin-top: 2rem;
 }
+/* Animations */
+@keyframes fadeIn {
+    from { opacity: 0; transform: translateY(20px); }
+    to { opacity: 1; transform: translateY(0); }
+}
+.animate-in {
+    animation: fadeIn 0.6s ease-out;
+}
+/* Responsive Design */
+@media (max-width: 768px) {
+    .header-title {
+        font-size: 2rem;
+    }
+    .header-subtitle {
+        font-size: 1rem;
+    }
+    .control-card, .chat-card {
+        padding: 1rem;
+    }
+}
+"""
+# Build Enhanced Gradio Interface
+def create_enhanced_interface():
+    with gr.Blocks(
+        css=enhanced_css,
+        theme=gr.themes.Soft(
+            primary_hue="blue",
+            secondary_hue="purple",
+            neutral_hue="slate"
+        ),
+        title="🤖 Advanced RAG Chatbot"
+    ) as demo:
+        # Header Section
+        gr.HTML("""
+            <div class="main-header animate-in">
+                <div class="header-title">🤖 Advanced RAG Intelligence System</div>
+                <div class="header-subtitle">
+                    Next-generation AI assistant powered by advanced retrieval-augmented generation
+                </div>
+            </div>
+        """)
+        with gr.Row():
+            # Left Panel - Controls
+            with gr.Column(scale=1, elem_classes=["control-card"]):
+                # Knowledge Source Selection
+                gr.HTML("<h3 style='color: #4a5568; margin-bottom: 1rem;'>🎯 Knowledge Source</h3>")
+                source_choice = gr.Radio(
+                    ["🌐 Web Search", "📄 Uploaded Document"],
+                    label="Select Your Information Source",
+                    value="🌐 Web Search",
+                    elem_classes=["source-selector"]
+                )
+                # Document Upload Section
+                gr.HTML("<h3 style='color: #4a5568; margin: 2rem 0 1rem 0;'>📁 Document Processing</h3>")
+                file_input = gr.File(
+                    label="Upload PDF Document",
+                    file_types=[".pdf"],
+                    elem_classes=["upload-zone"]
+                )
+                file_status = gr.Textbox(
+                    label="Processing Status",
+                    interactive=False,
+                    elem_classes=["status-success"],
+                    visible=True
+                )
+                document_info = gr.Textbox(
+                    label="Document Information",
+                    interactive=False,
+                    elem_classes=["status-info"],
+                    visible=False,
+                    lines=6
+                )
+                # Quick Actions
+                gr.HTML("<h3 style='color: #4a5568; margin: 2rem 0 1rem 0;'>⚡ Quick Actions</h3>")
+                sample_questions_display = gr.HTML("""
+                    <div style='background: #f7fafc; padding: 1rem; border-radius: 8px; border-left: 4px solid #667eea;'>
+                        <strong>💡 Sample Questions for Web Search:</strong><br>
+                        • What are the latest AI breakthroughs?<br>
+                        • Current tech industry trends<br>
+                        • Recent scientific discoveries<br>
+                        • Today's market updates
+                    </div>
+                """)
+            # Right Panel - Chat Interface
+            with gr.Column(scale=2, elem_classes=["chat-card"]):
+                gr.HTML("<h3 style='color: #4a5568; margin-bottom: 1rem;'>💬 Intelligent Conversation</h3>")
+                chatbot = gr.Chatbot(
+                    label="AI Assistant",
+                    height=500,
+                    elem_classes=["chat-container"],
+                    bubble_full_width=False,
+                    show_label=False,
+                    avatar_images=("👤", "🤖")
                 )
+                with gr.Row():
+                    question_input = gr.Textbox(
+                        label="Your Question",
+                        placeholder="Ask me anything... (Press Enter or click Send)",
+                        lines=2,
+                        scale=4,
+                        elem_classes=["question-input"]
+                    )
+                    with gr.Column(scale=1, min_width=120):
+                        send_btn = gr.Button(
+                            "🚀 Send",
+                            variant="primary",
+                            size="lg",
+                            elem_classes=["btn-primary"]
+                        )
+                        clear_btn = gr.Button(
+                            "🗑️ Clear",
+                            variant="secondary",
+                            size="lg",
+                            elem_classes=["btn-secondary"]
+                        )
+        # Advanced Settings Panel
+        with gr.Accordion("⚙️ Advanced Configuration", open=False, elem_classes=["advanced-panel"]):
+            with gr.Row():
+                with gr.Column():
+                    gr.HTML("""
+                        <div style='background: white; padding: 1.5rem; border-radius: 12px; margin: 1rem 0;'>
+                            <h4>🔧 System Features</h4>
+                            <ul style='line-height: 1.8;'>
+                                <li><strong>🌐 Real-time Web Search:</strong> Live internet data retrieval</li>
+                                <li><strong>📄 Document Intelligence:</strong> Advanced PDF processing with semantic chunking</li>
+                                <li><strong>🧠 Neural Embeddings:</strong> Sentence-BERT powered similarity matching</li>
+                                <li><strong>⚡ Smart Caching:</strong> Optimized performance with intelligent storage</li>
+                            </ul>
+                        </div>
+                    """)
+                with gr.Column():
+                    gr.HTML("""
+                        <div style='background: white; padding: 1.5rem; border-radius: 12px; margin: 1rem 0;'>
+                            <h4>🤖 AI Capabilities</h4>
+                            <ul style='line-height: 1.8;'>
+                                <li><strong>Language Model:</strong> Mixtral-8x7B-Instruct</li>
+                                <li><strong>Context Understanding:</strong> Advanced semantic retrieval</li>
+                                <li><strong>Multi-source Fusion:</strong> Combined web + document insights</li>
+                                <li><strong>Error Recovery:</strong> Robust fallback mechanisms</li>
+                            </ul>
+                        </div>
+                    """)
+        # Footer with Credits
+        gr.HTML("""
+            <div class="footer-info">
+                <h4>🚀 Technical Architecture</h4>
+                <p>Built with cutting-edge AI technologies: Together AI • Serper API • Sentence Transformers • Advanced RAG Pipeline</p>
+                <p style='margin-top: 1rem; opacity: 0.8;'>
+                    💡 Engineered for optimal performance and user experience •
+                    🔒 Secure and scalable architecture •
+                    🎯 Production-ready implementation
+                </p>
+            </div>
         """)
+        # Event Handlers with Enhanced Logic
+        file_input.change(
+            fn=process_uploaded_file,
+            inputs=[file_input],
+            outputs=[file_status, document_info, gr.update()]
+        )
+        question_input.submit(
+            fn=answer_question,
+            inputs=[question_input, source_choice, chatbot],
+            outputs=[chatbot, question_input]
+        )
+        send_btn.click(
+            fn=answer_question,
+            inputs=[question_input, source_choice, chatbot],
+            outputs=[chatbot, question_input]
+        )
+        clear_btn.click(
+            fn=clear_chat,
+            inputs=[],
+            outputs=[chatbot]
+        )
+        # Dynamic sample questions update
+        def update_sample_questions(source):
+            if source == "🌐 Web Search":
+                return gr.HTML("""
+                    <div style='background: #f0fff4; padding: 1rem; border-radius: 8px; border-left: 4px solid #48bb78;'>
+                        <strong>💡 Sample Questions for Web Search:</strong><br>
+                        • What are the latest AI breakthroughs?<br>
+                        • Current cryptocurrency market trends<br>
+                        • Recent climate change developments<br>
+                        • Today's technology news
+                    </div>
+                """)
+            else:
+                return gr.HTML("""
+                    <div style='background: #fef5e7; padding: 1rem; border-radius: 8px; border-left: 4px solid #ed8936;'>
+                        <strong>💡 Sample Questions for Documents:</strong><br>
+                        • Summarize the main findings<br>
+                        • What methodology was used?<br>
+                        • List the key conclusions<br>
+                        • Explain the technical details
+                    </div>
+                """)
+        source_choice.change(
+            fn=update_sample_questions,
+            inputs=[source_choice],
+            outputs=[sample_questions_display]
+        )
+    return demo
+# Launch Application
 if __name__ == "__main__":
+    demo = create_enhanced_interface()
     demo.launch(
         share=True,
+        server_name="0