diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -491,7 +491,7 @@ Respond with JSON only: [/INST]"""
                 r'[\$₹£€]\s*([0-9,]+\.?\d*)',
                 
                 # Amounts at end of lines (common in invoices)
-                r'([0-9,]+\.?\d*)\s*[\$₹£€]?\s*']}
+                r'([0-9,]+\.?\d*)\s*[\$₹£€]?\s*
     
     def parse_date(self, date_str: str) -> str:
         """Parse date to YYYY-MM-DD format"""
@@ -778,2506 +778,10 @@ class InvoiceProcessor:
                 text_length = len(text)
                 st.info(f"📝 Extracted {text_length} characters of text")
                 
-                # Show text preview and extraction debug info
+                # Show text preview
                 if text_length > 0:
-                    with st.expander("📄 Text Preview & Extraction Debug", expanded=True):
-                        st.text_area("Extracted Text (First 1000 chars):", value=text[:1000], height=150, disabled=True)
-                        
-                        # Debug amount detection
-                        st.markdown("**🔍 Amount Detection Debug:**")
-                        amount_patterns = [
-                            r'total\s*(?:amount)?\s*:?\s*[\$₹£€]?\s*([0-9,]+\.?\d*)',
-                            r'[\$₹£€]\s*([0-9,]+\.?\d*)',
-                            r'([0-9,]+\.?\d*)\s*[\$₹£€]?\s*$',  # ✅ Added $ for end of line
-                        ]
-        
-       
-                # Extract invoice data
-                st.info("🤖 Extracting invoice data using AI/Regex...")
-                invoice_data = self.ai_extractor.extract_with_ai(text)
-                invoice_data.file_path = uploaded_file.name
-                
-                # Show extraction results
-                st.info(f"📊 Extraction completed with {invoice_data.extraction_confidence:.1%} confidence")
-                
-                # Save to storage
-                st.info("💾 Saving extracted data...")
-                self.save_invoice_data(invoice_data, text, file_size)
-                
-                self.processing_stats['successful'] += 1
-                st.success(f"✅ Successfully processed {uploaded_file.name}")
-                
-                return invoice_data
-                
-            finally:
-                # Cleanup
-                try:
-                    os.unlink(tmp_file_path)
-                    st.info("🧹 Cleaned up temporary file")
-                except:
-                    pass
-                
-        except Exception as e:
-            error_msg = f"Error processing {uploaded_file.name}: {str(e)}"
-            st.error(error_msg)
-            self.processing_stats['failed'] += 1
-            
-            # Show detailed error for debugging
-            with st.expander("🔍 Error Details", expanded=False):
-                st.code(str(e))
-                import traceback
-                st.code(traceback.format_exc())
-            
-            return InvoiceData()
-    
-    def save_invoice_data(self, invoice_data: InvoiceData, raw_text: str, file_size: int):
-        """Save invoice data to JSON and vector store"""
-        try:
-            # Load existing data
-            data = self.load_json_data()
-            
-            # Create invoice record
-            invoice_record = {
-                "id": len(data["invoices"]) + 1,
-                "invoice_number": invoice_data.invoice_number,
-                "supplier_name": invoice_data.supplier_name,
-                "buyer_name": invoice_data.buyer_name,
-                "date": invoice_data.date,
-                "amount": invoice_data.amount,
-                "quantity": invoice_data.quantity,
-                "product_description": invoice_data.product_description,
-                "file_info": {
-                    "file_name": invoice_data.file_path,
-                    "file_size": file_size
-                },
-                "extraction_info": {
-                    "confidence": invoice_data.extraction_confidence,
-                    "method": invoice_data.processing_method,
-                    "raw_text_preview": raw_text[:300]
-                },
-                "timestamps": {
-                    "created_at": datetime.now().isoformat()
-                }
-            }
-            
-            # Add to invoices
-            data["invoices"].append(invoice_record)
-            
-            # Update summary
-            self.update_summary(data)
-            
-            # Save JSON
-            self.save_json_data(data)
-            
-            # Add to vector store
-            if self.vector_store:
-                self.vector_store.add_document(invoice_record, raw_text)
-                self.vector_store.save_vector_store()
-            
-        except Exception as e:
-            st.error(f"Error saving invoice data: {e}")
-    
-    def update_summary(self, data: dict):
-        """Update summary statistics"""
-        invoices = data["invoices"]
-        
-        total_amount = sum(inv.get("amount", 0) for inv in invoices)
-        unique_suppliers = list(set(inv.get("supplier_name", "") for inv in invoices if inv.get("supplier_name")))
-        
-        data["summary"] = {
-            "total_amount": total_amount,
-            "unique_suppliers": unique_suppliers,
-            "processing_stats": {
-                "successful": self.processing_stats['successful'],
-                "failed": self.processing_stats['failed'],
-                "total_processed": self.processing_stats['total_processed']
-            }
-        }
-        
-        data["metadata"]["last_updated"] = datetime.now().isoformat()
-        data["metadata"]["total_invoices"] = len(invoices)
-
-# ===============================================================================
-# CHATBOT CLASS
-# ===============================================================================
-
-class ChatBot:
-    """Chatbot for invoice queries"""
-    
-    def __init__(self, processor: InvoiceProcessor):
-        self.processor = processor
-    
-    def query_database(self, query: str) -> str:
-        """Process user query and return response"""
-        try:
-            data = self.processor.load_json_data()
-            invoices = data.get("invoices", [])
-            
-            if not invoices:
-                return "No invoice data found. Please upload some invoices first."
-            
-            query_lower = query.lower()
-            
-            # Handle different query types
-            if any(phrase in query_lower for phrase in ["summary", "overview", "total"]):
-                return self.generate_summary(data)
-            
-            elif "count" in query_lower or "how many" in query_lower:
-                return self.handle_count_query(data)
-            
-            elif any(phrase in query_lower for phrase in ["amount", "value", "money", "cost"]):
-                return self.handle_amount_query(data)
-            
-            elif any(phrase in query_lower for phrase in ["supplier", "vendor", "company"]):
-                return self.handle_supplier_query(data, query)
-            
-            elif self.processor.vector_store:
-                return self.handle_semantic_search(query)
-            
-            else:
-                return self.handle_general_query(data, query)
-                
-        except Exception as e:
-            return f"Error processing query: {e}"
-    
-    def generate_summary(self, data: dict) -> str:
-        """Generate comprehensive summary"""
-        invoices = data.get("invoices", [])
-        summary = data.get("summary", {})
-        
-        if not invoices:
-            return "No invoices found in the system."
-        
-        total_amount = summary.get("total_amount", 0)
-        avg_amount = total_amount / len(invoices) if invoices else 0
-        unique_suppliers = len(summary.get("unique_suppliers", []))
-        
-        response = f"""
-**📊 Invoice System Summary**
-
-• **Total Invoices**: {len(invoices):,}
-• **Total Value**: ₹{total_amount:,.2f}
-• **Average Invoice**: ₹{avg_amount:,.2f}
-• **Unique Suppliers**: {unique_suppliers}
-
-**📈 Processing Stats**
-• **Successful**: {summary.get('processing_stats', {}).get('successful', 0)}
-• **Failed**: {summary.get('processing_stats', {}).get('failed', 0)}
-
-**🔍 Recent Invoices**
-"""
-        
-        # Show recent invoices
-        recent = sorted(invoices, key=lambda x: x.get('timestamps', {}).get('created_at', ''), reverse=True)[:5]
-        for i, inv in enumerate(recent, 1):
-            response += f"\n{i}. **{inv.get('invoice_number', 'N/A')}** - {inv.get('supplier_name', 'Unknown')} (₹{inv.get('amount', 0):,.2f})"
-        
-        return response
-    
-    def handle_count_query(self, data: dict) -> str:
-        """Handle count-related queries"""
-        invoices = data.get("invoices", [])
-        total = len(invoices)
-        unique_numbers = len(set(inv.get('invoice_number', '') for inv in invoices if inv.get('invoice_number')))
-        
-        return f"""
-**📊 Invoice Count Summary**
-
-• **Total Records**: {total}
-• **Unique Invoice Numbers**: {unique_numbers}
-• **Duplicates**: {total - unique_numbers if total > unique_numbers else 0}
-
-**📅 Processing Timeline**
-• **First Invoice**: {invoices[0].get('timestamps', {}).get('created_at', 'N/A')[:10] if invoices else 'N/A'}
-• **Latest Invoice**: {invoices[-1].get('timestamps', {}).get('created_at', 'N/A')[:10] if invoices else 'N/A'}
-"""
-    
-    def handle_amount_query(self, data: dict) -> str:
-        """Handle amount-related queries"""
-        invoices = data.get("invoices", [])
-        amounts = [inv.get('amount', 0) for inv in invoices if inv.get('amount', 0) > 0]
-        
-        if not amounts:
-            return "No amount information found in invoices."
-        
-        total_amount = sum(amounts)
-        avg_amount = total_amount / len(amounts)
-        max_amount = max(amounts)
-        min_amount = min(amounts)
-        
-        # Find high-value invoices
-        high_value_threshold = sorted(amounts, reverse=True)[min(4, len(amounts)-1)] if len(amounts) > 5 else max_amount
-        high_value_invoices = [inv for inv in invoices if inv.get('amount', 0) >= high_value_threshold]
-        
-        response = f"""
-**💰 Financial Analysis**
-
-• **Total Amount**: ₹{total_amount:,.2f}
-• **Average Amount**: ₹{avg_amount:,.2f}
-• **Highest Invoice**: ₹{max_amount:,.2f}
-• **Lowest Invoice**: ₹{min_amount:,.2f}
-
-**🎯 High-Value Invoices (₹{high_value_threshold:,.2f}+)**
-"""
-        
-        for i, inv in enumerate(high_value_invoices[:5], 1):
-            response += f"\n{i}. **{inv.get('invoice_number', 'N/A')}** - {inv.get('supplier_name', 'Unknown')} (₹{inv.get('amount', 0):,.2f})"
-        
-        return response
-    
-    def handle_supplier_query(self, data: dict, query: str) -> str:
-        """Handle supplier-related queries"""
-        invoices = data.get("invoices", [])
-        
-        # Count invoices by supplier
-        supplier_counts = {}
-        supplier_amounts = {}
-        
-        for inv in invoices:
-            supplier = inv.get('supplier_name', '').strip()
-            if supplier:
-                supplier_counts[supplier] = supplier_counts.get(supplier, 0) + 1
-                supplier_amounts[supplier] = supplier_amounts.get(supplier, 0) + inv.get('amount', 0)
-        
-        if not supplier_counts:
-            return "No supplier information found in invoices."
-        
-        # Sort suppliers by amount
-        top_suppliers = sorted(supplier_amounts.items(), key=lambda x: x[1], reverse=True)[:10]
-        
-        response = f"""
-**🏢 Supplier Analysis**
-
-• **Total Unique Suppliers**: {len(supplier_counts)}
-• **Most Active**: {max(supplier_counts, key=supplier_counts.get)} ({supplier_counts[max(supplier_counts, key=supplier_counts.get)]} invoices)
-
-**💰 Top Suppliers by Amount**
-"""
-        
-        for i, (supplier, amount) in enumerate(top_suppliers, 1):
-            count = supplier_counts[supplier]
-            avg = amount / count if count > 0 else 0
-            response += f"\n{i}. **{supplier}** - ₹{amount:,.2f} ({count} invoices, avg: ₹{avg:,.2f})"
-        
-        return response
-    
-    def handle_semantic_search(self, query: str) -> str:
-        """Handle semantic search queries"""
-        try:
-            results = self.processor.vector_store.semantic_search(query, top_k=5)
-            
-            if not results:
-                return f"No relevant results found for '{query}'. Try different keywords."
-            
-            response = f"🔍 **Semantic Search Results for '{query}'**\n\n"
-            
-            for i, result in enumerate(results, 1):
-                response += f"{i}. **{result.invoice_number}** - {result.supplier_name}\n"
-                response += f"   • Similarity: {result.similarity_score:.3f}\n"
-                response += f"   • Amount: ₹{result.metadata.get('amount', 0):,.2f}\n"
-                response += f"   • Preview: {result.content_preview[:100]}...\n\n"
-            
-            return response
-            
-        except Exception as e:
-            return f"Semantic search error: {e}"
-    
-    def handle_general_query(self, data: dict, query: str) -> str:
-        """Handle general queries with keyword search"""
-        invoices = data.get("invoices", [])
-        query_words = query.lower().split()
-        
-        # Simple keyword matching
-        matching_invoices = []
-        for inv in invoices:
-            text_to_search = (
-                inv.get('supplier_name', '') + ' ' +
-                inv.get('buyer_name', '') + ' ' +
-                inv.get('product_description', '') + ' ' +
-                inv.get('extraction_info', {}).get('raw_text_preview', '')
-            ).lower()
-            
-            if any(word in text_to_search for word in query_words):
-                matching_invoices.append(inv)
-        
-        if not matching_invoices:
-            return f"No invoices found matching '{query}'. Try different keywords or check the summary."
-        
-        response = f"🔍 **Found {len(matching_invoices)} invoices matching '{query}'**\n\n"
-        
-        for i, inv in enumerate(matching_invoices[:5], 1):
-            response += f"{i}. **{inv.get('invoice_number', 'N/A')}** - {inv.get('supplier_name', 'Unknown')}\n"
-            response += f"   • Amount: ₹{inv.get('amount', 0):,.2f}\n"
-            response += f"   • Date: {inv.get('date', 'N/A')}\n\n"
-        
-        if len(matching_invoices) > 5:
-            response += f"... and {len(matching_invoices) - 5} more results."
-        
-        return response
-
-# ===============================================================================
-# STREAMLIT APPLICATION
-# ===============================================================================
-
-def create_app():
-    """Main Streamlit application"""
-    
-    # Generate unique session ID for this run
-    if 'session_id' not in st.session_state:
-        st.session_state.session_id = str(uuid.uuid4())[:8]
-    
-    session_id = st.session_state.session_id
-    
-    # Custom CSS
-    st.markdown("""
-    <style>
-    .main-header {
-        font-size: 2.5rem;
-        font-weight: bold;
-        text-align: center;
-        color: #FF6B35;
-        margin-bottom: 1rem;
-    }
-    .feature-box {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        padding: 1rem;
-        border-radius: 10px;
-        color: white;
-        margin: 0.5rem 0;
-        text-align: center;
-    }
-    .status-ok { color: #28a745; font-weight: bold; }
-    .status-warning { color: #ffc107; font-weight: bold; }
-    .status-error { color: #dc3545; font-weight: bold; }
-    </style>
-    """, unsafe_allow_html=True)
-    
-    # Header
-    st.markdown('<h1 class="main-header">📄 AI Invoice Processing System</h1>', unsafe_allow_html=True)
-    st.markdown("""
-    <div style="text-align: center; margin-bottom: 2rem;">
-        <p style="font-size: 1.1rem; color: #666;">
-            AI-Powered Document Processing • Semantic Search • Smart Analytics • Hugging Face Spaces
-        </p>
-    </div>
-    """, unsafe_allow_html=True)
-    
-    # Initialize processor
-    if 'processor' not in st.session_state:
-        with st.spinner("🔧 Initializing AI Invoice Processor..."):
-            try:
-                st.session_state.processor = InvoiceProcessor()
-                st.session_state.chatbot = ChatBot(st.session_state.processor)
-                st.session_state.chat_history = []
-                st.success("✅ System initialized successfully!")
-            except Exception as e:
-                st.error(f"❌ Initialization failed: {e}")
-                st.stop()
-    
-    # Sidebar
-    with st.sidebar:
-        st.header("🎛️ System Status")
-        
-        processor = st.session_state.processor
-        
-        # Component status
-        if processor.document_processor.processors:
-            st.markdown('<span class="status-ok">✅ Document Processing</span>', unsafe_allow_html=True)
-        else:
-            st.markdown('<span class="status-error">❌ Document Processing</span>', unsafe_allow_html=True)
-        
-        if processor.ai_extractor.use_transformers:
-            st.markdown('<span class="status-ok">✅ AI Extraction</span>', unsafe_allow_html=True)
-        else:
-            st.markdown('<span class="status-warning">⚠️ Regex Extraction</span>', unsafe_allow_html=True)
-        
-        if processor.vector_store and processor.vector_store.embedding_model:
-            st.markdown('<span class="status-ok">✅ Semantic Search</span>', unsafe_allow_html=True)
-        else:
-            st.markdown('<span class="status-warning">⚠️ Keyword Search Only</span>', unsafe_allow_html=True)
-        
-        # Quick stats
-        st.header("📊 Quick Stats")
-        try:
-            data = processor.load_json_data()
-            total_invoices = len(data.get("invoices", []))
-            total_amount = data.get("summary", {}).get("total_amount", 0)
-            
-            st.metric("Total Invoices", total_invoices)
-            st.metric("Total Value", f"₹{total_amount:,.2f}")
-            st.metric("Success Rate", f"{processor.processing_stats['successful']}/{processor.processing_stats['total_processed']}")
-            
-        except Exception as e:
-            st.error(f"Stats error: {e}")
-        
-        # System info
-        st.header("⚙️ System Info")
-        st.info(f"""
-        **Session ID:** {session_id}
-        
-        **Limits:**
-        • Max file size: 10MB
-        • Max concurrent files: 3
-        • Timeout: 30s
-        """)
-    
-    # Main navigation
-    selected_tab = st.radio(
-        "Choose a section:",
-        ["📤 Upload & Process", "💬 AI Chat", "📊 Analytics", "📋 Data Explorer"],
-        horizontal=True,
-        key=f"main_navigation_{session_id}"
-    )
-    
-    # -------------------------------------------------------------------------
-    # UPLOAD & PROCESS SECTION
-    # -------------------------------------------------------------------------
-    
-    if selected_tab == "📤 Upload & Process":
-        st.header("📤 Upload Invoice Documents")
-        
-        # Feature highlights
-        col1, col2, col3 = st.columns(3)
-        
-        with col1:
-            st.markdown("""
-            <div class="feature-box">
-                <h4>🤖 AI Extraction</h4>
-                <p>Advanced NLP models extract structured data automatically</p>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        with col2:
-            st.markdown("""
-            <div class="feature-box">
-                <h4>🔍 Smart Search</h4>
-                <p>Semantic search finds invoices using natural language</p>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        with col3:
-            st.markdown("""
-            <div class="feature-box">
-                <h4>📊 Analytics</h4>
-                <p>Comprehensive insights and visualizations</p>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        # File upload
-        st.markdown("### 📁 Upload Your Invoices")
-        
-        # Initialize session state for files if not exists
-        if f'uploaded_files_{session_id}' not in st.session_state:
-            st.session_state[f'uploaded_files_{session_id}'] = None
-        if f'processing_complete_{session_id}' not in st.session_state:
-            st.session_state[f'processing_complete_{session_id}'] = False
-        if f'currently_processing_{session_id}' not in st.session_state:
-            st.session_state[f'currently_processing_{session_id}'] = False
-        if f'processed_file_hashes_{session_id}' not in st.session_state:
-            st.session_state[f'processed_file_hashes_{session_id}'] = set()
-        
-        # File uploader with stable key
-        uploaded_files = st.file_uploader(
-            "Choose invoice files (PDF, TXT supported)",
-            type=['pdf', 'txt'],
-            accept_multiple_files=True,
-            help="Maximum file size: 10MB per file",
-            key=f"file_uploader_stable_{session_id}"
-        )
-        
-        # Store uploaded files in session state only if they're new
-        if uploaded_files:
-            # Create file hashes to detect if files have changed
-            current_file_hashes = set()
-            for file in uploaded_files:
-                file_hash = hash((file.name, file.size))
-                current_file_hashes.add(file_hash)
-            
-            # Check if files have changed
-            stored_hashes = st.session_state.get(f'uploaded_file_hashes_{session_id}', set())
-            if current_file_hashes != stored_hashes:
-                st.session_state[f'uploaded_files_{session_id}'] = uploaded_files
-                st.session_state[f'uploaded_file_hashes_{session_id}'] = current_file_hashes
-                st.session_state[f'processing_complete_{session_id}'] = False
-                st.session_state[f'currently_processing_{session_id}'] = False
-                st.info("📄 New files detected - ready for processing")
-        
-        # Get files from session state
-        current_files = st.session_state[f'uploaded_files_{session_id}']
-        is_processing = st.session_state[f'currently_processing_{session_id}']
-        is_complete = st.session_state[f'processing_complete_{session_id}']
-        
-        if current_files:
-            max_files = 3
-            if len(current_files) > max_files:
-                st.warning(f"⚠️ Too many files selected. Processing first {max_files} files.")
-                current_files = current_files[:max_files]
-            
-            st.info(f"📊 {len(current_files)} files selected")
-            
-            # Show file names
-            st.markdown("**Selected Files:**")
-            for i, file in enumerate(current_files, 1):
-                file_size_mb = len(file.getvalue()) / (1024 * 1024)
-                file_hash = hash((file.name, file.size))
-                processed_icon = "✅" if file_hash in st.session_state[f'processed_file_hashes_{session_id}'] else "📄"
-                st.write(f"{processed_icon} {i}. {file.name} ({file_size_mb:.2f} MB)")
-            
-            # Process button - only show if not currently processing
-            col1, col2 = st.columns([1, 1])
-            
-            with col1:
-                if not is_processing and not is_complete:
-                    if st.button("🚀 Process Files", type="primary", key=f"process_btn_{session_id}"):
-                        st.session_state[f'currently_processing_{session_id}'] = True
-                        st.rerun()
-                elif is_processing:
-                    st.info("🔄 Processing in progress...")
-                    # Actually process the files here
-                    process_files_once(current_files, session_id)
-                elif is_complete:
-                    st.success("✅ Processing completed!")
-                    if st.button("🔄 Process Again", key=f"reprocess_btn_{session_id}"):
-                        st.session_state[f'processing_complete_{session_id}'] = False
-                        st.session_state[f'currently_processing_{session_id}'] = False
-                        st.session_state[f'processed_file_hashes_{session_id}'] = set()
-                        st.rerun()
-            
-            with col2:
-                if st.button("🗑️ Clear Files", key=f"clear_files_{session_id}"):
-                    # Clear all session state related to files
-                    keys_to_clear = [
-                        f'uploaded_files_{session_id}',
-                        f'uploaded_file_hashes_{session_id}',
-                        f'processing_complete_{session_id}',
-                        f'currently_processing_{session_id}',
-                        f'processed_file_hashes_{session_id}'
-                    ]
-                    
-                    for key in keys_to_clear:
-                        if key in st.session_state:
-                            del st.session_state[key]
-                    
-                    st.success("🗑️ Files cleared successfully!")
-                    time.sleep(1)  # Brief pause to show message
-                    st.rerun()
-        
-        else:
-            st.info("👆 Please select invoice files to upload and process")
-        
-        # Show processing results if completed
-        if is_complete:
-            st.markdown("### 📋 Recent Processing Results")
-            try:
-                data = st.session_state.processor.load_json_data()
-                recent_invoices = sorted(
-                    data.get("invoices", []), 
-                    key=lambda x: x.get('timestamps', {}).get('created_at', ''), 
-                    reverse=True
-                )[:5]
-                
-                if recent_invoices:
-                    for i, inv in enumerate(recent_invoices, 1):
-                        with st.expander(f"📄 {inv.get('invoice_number', f'Invoice {i}')} - {inv.get('supplier_name', 'Unknown')}", expanded=False):
-                            col1, col2 = st.columns(2)
-                            with col1:
-                                st.write(f"**Invoice #:** {inv.get('invoice_number', 'N/A')}")
-                                st.write(f"**Supplier:** {inv.get('supplier_name', 'N/A')}")
-                                st.write(f"**Amount:** ₹{inv.get('amount', 0):.2f}")
-                            with col2:
-                                st.write(f"**Date:** {inv.get('date', 'N/A')}")
-                                st.write(f"**Method:** {inv.get('extraction_info', {}).get('method', 'N/A')}")
-                                st.write(f"**Confidence:** {inv.get('extraction_info', {}).get('confidence', 0):.1%}")
-                else:
-                    st.info("No recent processing results found.")
-            except Exception as e:
-                st.error(f"Error loading recent results: {e}")
-    
-    # -------------------------------------------------------------------------
-    # AI CHAT SECTION
-    # -------------------------------------------------------------------------
-    
-    elif selected_tab == "💬 AI Chat":
-        st.header("💬 AI Chat Interface")
-        
-        # Display chat history
-        if st.session_state.chat_history:
-            st.markdown("### 💬 Chat History")
-            for i, message in enumerate(st.session_state.chat_history):
-                with st.chat_message(message["role"]):
-                    st.markdown(message["content"])
-        
-        # Chat input
-        st.markdown("### ✍️ Ask a Question")
-        
-        col1, col2 = st.columns([4, 1])
-        
-        with col1:
-            user_input = st.text_input(
-                "Type your question:",
-                placeholder="e.g., 'show me total spending'",
-                key=f"chat_input_{session_id}"
-            )
-        
-        with col2:
-            ask_btn = st.button("🚀 Ask", type="primary", key=f"ask_btn_{session_id}")
-        
-        if ask_btn and user_input:
-            handle_chat_query(user_input)
-        
-        # Suggested queries
-        if not st.session_state.chat_history:
-            st.markdown("### 💡 Try These Queries")
-            
-            col1, col2 = st.columns(2)
-            
-            with col1:
-                st.markdown("**📊 Basic Queries:**")
-                basic_queries = [
-                    "Show me a summary of all invoices",
-                    "How much have we spent in total?",
-                    "Who are our top suppliers?",
-                    "Find invoices with high amounts"
-                ]
-                for i, query in enumerate(basic_queries):
-                    if st.button(query, key=f"basic_{session_id}_{i}"):
-                        handle_chat_query(query)
-            
-            with col2:
-                st.markdown("**🔍 Advanced Queries:**")
-                advanced_queries = [
-                    "Find technology purchases",
-                    "Show office supplies",
-                    "Search consulting services",
-                    "Recent high-value invoices"
-                ]
-                for i, query in enumerate(advanced_queries):
-                    if st.button(query, key=f"advanced_{session_id}_{i}"):
-                        handle_chat_query(query)
-        
-        # Clear chat
-        if st.session_state.chat_history:
-            if st.button("🗑️ Clear Chat", key=f"clear_chat_{session_id}"):
-                st.session_state.chat_history = []
-                st.rerun()
-    
-    # -------------------------------------------------------------------------
-    # ANALYTICS SECTION
-    # -------------------------------------------------------------------------
-    
-    elif selected_tab == "📊 Analytics":
-        st.header("📊 Analytics Dashboard")
-        
-        try:
-            data = st.session_state.processor.load_json_data()
-            invoices = data.get("invoices", [])
-            
-            if not invoices:
-                st.info("📊 No data available. Upload some invoices to see analytics.")
-                return
-            
-            # Convert to DataFrame
-            df_data = []
-            for inv in invoices:
-                df_data.append({
-                    'invoice_number': inv.get('invoice_number', ''),
-                    'supplier_name': inv.get('supplier_name', ''),
-                    'amount': inv.get('amount', 0),
-                    'date': inv.get('date', ''),
-                    'confidence': inv.get('extraction_info', {}).get('confidence', 0)
-                })
-            
-            df = pd.DataFrame(df_data)
-            
-            # Key metrics
-            col1, col2, col3, col4 = st.columns(4)
-            
-            with col1:
-                st.metric("Total Invoices", len(df))
-            with col2:
-                st.metric("Total Amount", f"₹{df['amount'].sum():,.2f}")
-            with col3:
-                st.metric("Avg Amount", f"₹{df['amount'].mean():,.2f}")
-            with col4:
-                st.metric("Unique Suppliers", df['supplier_name'].nunique())
-            
-            # Visualizations
-            if len(df) > 0:
-                # Amount distribution
-                fig_hist = px.histogram(
-                    df, 
-                    x='amount', 
-                    title="Invoice Amount Distribution",
-                    labels={'amount': 'Amount (₹)', 'count': 'Number of Invoices'}
-                )
-                st.plotly_chart(fig_hist, use_container_width=True)
-                
-                # Top suppliers
-                if df['supplier_name'].notna().any():
-                    supplier_amounts = df.groupby('supplier_name')['amount'].sum().sort_values(ascending=False).head(10)
-                    
-                    if len(supplier_amounts) > 0:
-                        fig_suppliers = px.bar(
-                            x=supplier_amounts.values,
-                            y=supplier_amounts.index,
-                            orientation='h',
-                            title="Top 10 Suppliers by Total Amount",
-                            labels={'x': 'Total Amount (₹)', 'y': 'Supplier'}
-                        )
-                        st.plotly_chart(fig_suppliers, use_container_width=True)
-        
-        except Exception as e:
-            st.error(f"Analytics error: {e}")
-    
-    # -------------------------------------------------------------------------
-    # DATA EXPLORER SECTION
-    # -------------------------------------------------------------------------
-    
-    elif selected_tab == "📋 Data Explorer":
-        st.header("📋 Data Explorer")
-        
-        try:
-            data = st.session_state.processor.load_json_data()
-            invoices = data.get("invoices", [])
-            
-            if not invoices:
-                st.info("📊 No data available. Upload some invoices first.")
-                return
-            
-            # Convert to DataFrame
-            df_data = []
-            for inv in invoices:
-                df_data.append({
-                    'Invoice Number': inv.get('invoice_number', ''),
-                    'Supplier': inv.get('supplier_name', ''),
-                    'Buyer': inv.get('buyer_name', ''),
-                    'Amount': inv.get('amount', 0),
-                    'Date': inv.get('date', ''),
-                    'Confidence': inv.get('extraction_info', {}).get('confidence', 0),
-                    'Method': inv.get('extraction_info', {}).get('method', ''),
-                    'File': inv.get('file_info', {}).get('file_name', ''),
-                    'Created': inv.get('timestamps', {}).get('created_at', '')[:19]
-                })
-            
-            df = pd.DataFrame(df_data)
-            
-            # Filters
-            col1, col2, col3 = st.columns(3)
-            
-            with col1:
-                suppliers = ['All'] + sorted(df['Supplier'].dropna().unique().tolist())
-                selected_supplier = st.selectbox("Filter by Supplier", suppliers, key=f"supplier_filter_{session_id}")
-            
-            with col2:
-                methods = ['All'] + sorted(df['Method'].dropna().unique().tolist())
-                selected_method = st.selectbox("Filter by Method", methods, key=f"method_filter_{session_id}")
-            
-            with col3:
-                min_amount = st.number_input("Min Amount", min_value=0.0, value=0.0, key=f"amount_filter_{session_id}")
-            
-            # Apply filters
-            filtered_df = df.copy()
-            if selected_supplier != 'All':
-                filtered_df = filtered_df[filtered_df['Supplier'] == selected_supplier]
-            if selected_method != 'All':
-                filtered_df = filtered_df[filtered_df['Method'] == selected_method]
-            if min_amount > 0:
-                filtered_df = filtered_df[filtered_df['Amount'] >= min_amount]
-            
-            # Display data
-            st.dataframe(
-                filtered_df,
-                use_container_width=True,
-                column_config={
-                    "Amount": st.column_config.NumberColumn("Amount", format="₹%.2f"),
-                    "Confidence": st.column_config.ProgressColumn("Confidence", min_value=0, max_value=1)
-                }
-            )
-            
-            # Export options
-            col1, col2 = st.columns(2)
-            
-            with col1:
-                if st.button("📥 Export CSV", key=f"export_csv_{session_id}"):
-                    csv_data = filtered_df.to_csv(index=False)
-                    st.download_button(
-                        "Download CSV",
-                        csv_data,
-                        f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
-                        "text/csv",
-                        key=f"download_csv_{session_id}"
-                    )
-            
-            with col2:
-                if st.button("📄 Export JSON", key=f"export_json_{session_id}"):
-                    filtered_invoices = [inv for inv in invoices 
-                                       if inv.get('invoice_number') in filtered_df['Invoice Number'].values]
-                    
-                    export_data = {
-                        "exported_at": datetime.now().isoformat(),
-                        "total_records": len(filtered_invoices),
-                        "invoices": filtered_invoices
-                    }
-                    
-                    st.download_button(
-                        "Download JSON",
-                        json.dumps(export_data, indent=2),
-                        f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
-                        "application/json",
-                        key=f"download_json_{session_id}"
-                    )
-        
-        except Exception as e:
-            st.error(f"Data explorer error: {e}")
-    
-    # -------------------------------------------------------------------------
-    # GLOBAL CHAT INPUT
-    # -------------------------------------------------------------------------
-    
-    st.markdown("---")
-    st.markdown("### 💬 Quick Chat (Works from any section)")
-    
-    global_query = st.chat_input("Ask about your invoices...", key=f"global_chat_{session_id}")
-    
-    if global_query:
-        handle_chat_query(global_query, show_response=True)
-    
-    # Footer
-    st.markdown("---")
-    st.markdown("""
-    <div style="text-align: center; color: #666;">
-        <p>🚀 <strong>AI Invoice Processing System</strong> - Optimized for Hugging Face Spaces</p>
-        <p>Built with ❤️ using Streamlit, Transformers, and AI</p>
-    </div>
-    """, unsafe_allow_html=True)
-
-# ===============================================================================
-# HELPER FUNCTIONS
-# ===============================================================================
-
-def process_files_once(uploaded_files, session_id):
-    """Process uploaded files only once with proper state management"""
-    if not uploaded_files:
-        st.error("No files to process!")
-        st.session_state[f'currently_processing_{session_id}'] = False
-        return
-    
-    st.markdown("### 🔄 Processing Files...")
-    
-    # Get already processed file hashes
-    processed_hashes = st.session_state[f'processed_file_hashes_{session_id}']
-    
-    # Filter out already processed files
-    files_to_process = []
-    for file in uploaded_files:
-        file_hash = hash((file.name, file.size))
-        if file_hash not in processed_hashes:
-            files_to_process.append((file, file_hash))
-    
-    if not files_to_process:
-        st.info("✅ All files have already been processed!")
-        st.session_state[f'currently_processing_{session_id}'] = False
-        st.session_state[f'processing_complete_{session_id}'] = True
-        return
-    
-    # Create containers for dynamic updates
-    progress_container = st.container()
-    status_container = st.container()
-    results_container = st.container()
-    
-    successful = 0
-    failed = 0
-    
-    # Show progress
-    with progress_container:
-        progress_bar = st.progress(0)
-        progress_text = st.empty()
-    
-    with status_container:
-        st.info(f"Starting to process {len(files_to_process)} new files...")
-    
-    # Process each file only once
-    for i, (uploaded_file, file_hash) in enumerate(files_to_process):
-        current_progress = (i + 1) / len(files_to_process)
-        
-        with progress_container:
-            progress_bar.progress(current_progress)
-            progress_text.text(f"Processing file {i+1}/{len(files_to_process)}: {uploaded_file.name}")
-        
-        with status_container:
-            st.info(f"🔄 Processing: {uploaded_file.name} ({len(uploaded_file.getvalue())/1024:.1f} KB)")
-        
-        try:
-            # Process the file
-            result = st.session_state.processor.process_uploaded_file(uploaded_file)
-            
-            # Mark file as processed regardless of result
-            processed_hashes.add(file_hash)
-            
-            # Show result immediately
-            with results_container:
-                if result and hasattr(result, 'invoice_number') and result.invoice_number:
-                    successful += 1
-                    st.success(f"✅ Successfully processed: {uploaded_file.name}")
-                    
-                    # Show extracted data
-                    col1, col2, col3 = st.columns(3)
-                    with col1:
-                        st.write(f"**Invoice #:** {result.invoice_number}")
-                        st.write(f"**Supplier:** {result.supplier_name or 'Not found'}")
-                    with col2:
-                        st.write(f"**Amount:** ₹{result.amount:.2f}")
-                        st.write(f"**Date:** {result.date or 'Not found'}")
-                    with col3:
-                        st.write(f"**Method:** {result.processing_method}")
-                        st.write(f"**Confidence:** {result.extraction_confidence:.1%}")
-                    
-                    st.markdown("---")
-                else:
-                    failed += 1
-                    st.warning(f"⚠️ Could not extract complete data from: {uploaded_file.name}")
-                    if result:
-                        st.write(f"Partial data: {result.supplier_name}, ₹{result.amount}")
-                    st.markdown("---")
-        
-        except Exception as e:
-            failed += 1
-            # Still mark as processed to avoid reprocessing
-            processed_hashes.add(file_hash)
-            
-            with results_container:
-                st.error(f"❌ Error processing {uploaded_file.name}: {str(e)}")
-                st.markdown("---")
-        
-        # Update session state
-        st.session_state[f'processed_file_hashes_{session_id}'] = processed_hashes
-    
-    # Final summary
-    with progress_container:
-        progress_bar.progress(1.0)
-        progress_text.text("✅ Processing completed!")
-    
-    with status_container:
-        if successful > 0:
-            st.success(f"🎉 Processing complete! {successful} successful, {failed} failed")
-            if successful > 0:
-                st.balloons()
-        else:
-            st.error(f"❌ Processing failed for all {failed} files. Please check file formats and content.")
-    
-    # Update processing state
-    st.session_state[f'currently_processing_{session_id}'] = False
-    st.session_state[f'processing_complete_{session_id}'] = True
-    
-    # Force rerun to update UI
-    st.rerun()
-
-def process_files(uploaded_files, session_id):
-    """Legacy function - redirect to process_files_once"""
-    return process_files_once(uploaded_files, session_id)
-
-def handle_chat_query(query, show_response=False):
-    """Handle chat query"""
-    st.session_state.chat_history.append({
-        "role": "user", 
-        "content": query,
-        "timestamp": datetime.now()
-    })
-    
-    try:
-        with st.spinner("🤖 AI is analyzing..."):
-            response = st.session_state.chatbot.query_database(query)
-            
-            st.session_state.chat_history.append({
-                "role": "assistant",
-                "content": response,
-                "timestamp": datetime.now()
-            })
-        
-        if show_response:
-            with st.chat_message("assistant"):
-                st.markdown(response)
-            st.info("💡 Switch to the 'AI Chat' section to see full conversation history!")
-        
-        st.rerun()
-        
-    except Exception as e:
-        st.error(f"Chat error: {e}")
-
-# ===============================================================================
-# MAIN ENTRY POINT
-# ===============================================================================
-
-def main():
-    """Main entry point for Hugging Face Spaces"""
-    try:
-        if IS_HF_SPACE:
-            st.sidebar.info("🤗 Running on Hugging Face Spaces")
-        
-        create_app()
-        
-    except Exception as e:
-        st.error(f"""
-        ## 🚨 Application Error
-        
-        {e}
-        
-        Please refresh the page or check the logs for more details.
-        """)
-
-if __name__ == "__main__":
-    main()
-                
-
-
-
-        
-        # Extract invoice number with multiple attempts
-        for pattern in patterns['invoice_number']:
-            match = re.search(pattern, text_lower, re.IGNORECASE | re.MULTILINE)
-            if match:
-                invoice_data.invoice_number = match.group(1).upper().strip()
-                break
-        
-        # Extract amount with enhanced logic
-        amounts_found = []
-        for pattern in patterns['amount']:
-            matches = re.finditer(pattern, text_lower, re.IGNORECASE | re.MULTILINE)
-            for match in matches:
-                try:
-                    amount_str = match.group(1).replace(',', '').replace(' ', '')
-                    amount_val = float(amount_str)
-                    if 0.01 <= amount_val <= 1000000:  # Reasonable range
-                        amounts_found.append(amount_val)
-                except (ValueError, IndexError):
-                    continue
-        
-        # Choose the most likely amount (highest value or most repeated)
-        if amounts_found:
-            # Remove duplicates and sort
-            unique_amounts = sorted(set(amounts_found), reverse=True)
-            # Take the highest reasonable amount
-            invoice_data.amount = unique_amounts[0]
-        
-        # Extract date
-        for pattern in patterns['date']:
-            match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE)
-            if match:
-                invoice_data.date = self.parse_date(match.group(1))
-                break
-        
-        # Extract quantity
-        for pattern in patterns['quantity']:
-            match = re.search(pattern, text_lower, re.IGNORECASE)
-            if match:
-                try:
-                    invoice_data.quantity = int(match.group(1))
-                    break
-                except ValueError:
-                    continue
-        
-        # Enhanced company name extraction
-        company_patterns = [
-            r'(?:from|supplier|vendor)\s*:?\s*([A-Z][A-Za-z\s&,\.]{3,50})',
-            r'(?:to|buyer|client)\s*:?\s*([A-Z][A-Za-z\s&,\.]{3,50})',
-            r'([A-Z][A-Za-z\s&,\.]{3,50})\s*(?:ltd|inc|corp|llc|co\.|company|pvt|private|limited)',
-            r'(?:^|\n)([A-Z][A-Za-z\s&,\.]{3,50})\s*(?:\n|$)',
-        ]
-        
-        companies_found = []
-        for pattern in company_patterns:
-            matches = re.findall(pattern, text, re.MULTILINE)
-            for match in matches:
-                clean_company = match.strip().title()
-                if len(clean_company) > 3 and not any(word in clean_company.lower() for word in ['total', 'amount', 'date', 'invoice']):
-                    companies_found.append(clean_company)
-        
-        # Assign companies (first as supplier, second as buyer)
-        if companies_found:
-            invoice_data.supplier_name = companies_found[0]
-            if len(companies_found) > 1:
-                invoice_data.buyer_name = companies_found[1]
-        
-        # Extract product description
-        desc_patterns = [
-            r'(?:description|item|product|service)\s*:?\s*([A-Za-z0-9\s,.-]{10,200})',
-            r'(?:for|regarding)\s*:?\s*([A-Za-z0-9\s,.-]{10,200})',
-        ]
-        
-        for pattern in desc_patterns:
-            match = re.search(pattern, text, re.IGNORECASE)
-            if match:
-                desc = match.group(1).strip()
-                if len(desc) > 5:
-                    invoice_data.product_description = desc[:200]  # Limit length
-                    break
-        
-        # Set confidence based on how much we extracted
-        confidence_factors = []
-        if invoice_data.invoice_number:
-            confidence_factors.append(0.3)
-        if invoice_data.amount > 0:
-            confidence_factors.append(0.3)
-        if invoice_data.supplier_name:
-            confidence_factors.append(0.2)
-        if invoice_data.date:
-            confidence_factors.append(0.1)
-        if invoice_data.quantity > 0:
-            confidence_factors.append(0.1)
-        
-        invoice_data.extraction_confidence = sum(confidence_factors)
-        
-        return invoice_data
-    
-    def parse_date(self, date_str: str) -> str:
-        """Parse date to YYYY-MM-DD format"""
-        if not date_str:
-            return ""
-        
-        formats = ['%Y-%m-%d', '%m/%d/%Y', '%d/%m/%Y', '%m-%d-%Y', '%d-%m-%Y', '%Y/%m/%d']
-        
-        for fmt in formats:
-            try:
-                parsed_date = datetime.strptime(date_str, fmt)
-                return parsed_date.strftime('%Y-%m-%d')
-            except ValueError:
-                continue
-        
-        return date_str
-
-# ===============================================================================
-# VECTOR STORE CLASS
-# ===============================================================================
-
-class VectorStore:
-    """Simplified vector store for Hugging Face Spaces"""
-    
-    def __init__(self, embedding_model: str = "all-MiniLM-L6-v2"):
-        self.embedding_model_name = embedding_model
-        self.vector_store_path = os.path.join(HF_CONFIG["data_dir"], "vectors.pkl")
-        self.metadata_path = os.path.join(HF_CONFIG["data_dir"], "metadata.pkl")
-        self.embedding_model = None
-        self.vectors = []
-        self.document_metadata = []
-        self.embedding_dimension = None
-        
-        self.setup_embedding_model()
-        self.load_vector_store()
-    
-    def setup_embedding_model(self):
-        """Initialize the sentence transformer model"""
-        if not SENTENCE_TRANSFORMERS_AVAILABLE:
-            st.warning("⚠️ Sentence Transformers not available. Vector search disabled.")
-            return
-        
-        try:
-            with st.spinner(f"Loading embedding model: {self.embedding_model_name}..."):
-                self.embedding_model = SentenceTransformer(
-                    self.embedding_model_name,
-                    cache_folder=HF_CONFIG["cache_dir"]
-                )
-                
-                # Get embedding dimension
-                test_embedding = self.embedding_model.encode(["test"])
-                self.embedding_dimension = test_embedding.shape[0]
-                
-                st.success(f"✅ Embedding model loaded: {self.embedding_model_name}")
-                
-        except Exception as e:
-            st.error(f"❌ Failed to load embedding model: {e}")
-            self.embedding_model = None
-    
-    def load_vector_store(self):
-        """Load existing vector store"""
-        try:
-            if os.path.exists(self.vector_store_path) and os.path.exists(self.metadata_path):
-                with open(self.vector_store_path, 'rb') as f:
-                    self.vectors = pickle.load(f)
-                
-                with open(self.metadata_path, 'rb') as f:
-                    self.document_metadata = pickle.load(f)
-                
-                st.success(f"✅ Vector store loaded: {len(self.document_metadata)} documents")
-            else:
-                self.vectors = []
-                self.document_metadata = []
-                st.info("📄 New vector store initialized")
-                
-        except Exception as e:
-            st.error(f"❌ Error loading vector store: {e}")
-            self.vectors = []
-            self.document_metadata = []
-    
-    def save_vector_store(self):
-        """Save vector store to disk"""
-        try:
-            with open(self.vector_store_path, 'wb') as f:
-                pickle.dump(self.vectors, f)
-            
-            with open(self.metadata_path, 'wb') as f:
-                pickle.dump(self.document_metadata, f)
-            
-            return True
-        except Exception as e:
-            st.error(f"Error saving vector store: {e}")
-            return False
-    
-    def create_document_text(self, invoice_data: dict, raw_text: str = "") -> str:
-        """Create searchable text from invoice data"""
-        text_parts = []
-        
-        for field, value in invoice_data.items():
-            if value and field != 'id':
-                text_parts.append(f"{field}: {value}")
-        
-        if raw_text:
-            text_parts.append(f"content: {raw_text[:300]}")
-        
-        return " | ".join(text_parts)
-    
-    def add_document(self, invoice_data: dict, raw_text: str = "") -> bool:
-        """Add a document to the vector store"""
-        if not self.embedding_model:
-            return False
-        
-        try:
-            document_text = self.create_document_text(invoice_data, raw_text)
-            
-            # Generate embedding
-            embedding = self.embedding_model.encode(document_text, normalize_embeddings=True)
-            
-            # Create metadata
-            metadata = {
-                'invoice_id': invoice_data.get('id', ''),
-                'invoice_number': invoice_data.get('invoice_number', ''),
-                'supplier_name': invoice_data.get('supplier_name', ''),
-                'buyer_name': invoice_data.get('buyer_name', ''),
-                'amount': invoice_data.get('amount', 0),
-                'date': invoice_data.get('date', ''),
-                'file_name': invoice_data.get('file_info', {}).get('file_name', ''),
-                'document_text': document_text[:200],
-                'timestamp': datetime.now().isoformat()
-            }
-            
-            # Add to store
-            self.vectors.append(embedding)
-            self.document_metadata.append(metadata)
-            
-            return True
-            
-        except Exception as e:
-            st.error(f"Error adding document to vector store: {e}")
-            return False
-    
-    def semantic_search(self, query: str, top_k: int = 5) -> List[VectorSearchResult]:
-        """Perform semantic search using cosine similarity"""
-        if not self.embedding_model or not self.vectors:
-            return []
-        
-        try:
-            # Generate query embedding
-            query_embedding = self.embedding_model.encode(query, normalize_embeddings=True)
-            
-            # Calculate similarities
-            similarities = []
-            for i, doc_embedding in enumerate(self.vectors):
-                similarity = np.dot(query_embedding, doc_embedding)
-                similarities.append((similarity, i))
-            
-            # Sort by similarity
-            similarities.sort(reverse=True)
-            
-            # Return top results
-            results = []
-            for similarity, idx in similarities[:top_k]:
-                if similarity > 0.1:  # Relevance threshold
-                    metadata = self.document_metadata[idx]
-                    result = VectorSearchResult(
-                        invoice_id=metadata.get('invoice_id', ''),
-                        invoice_number=metadata.get('invoice_number', ''),
-                        supplier_name=metadata.get('supplier_name', ''),
-                        similarity_score=float(similarity),
-                        content_preview=metadata.get('document_text', ''),
-                        metadata=metadata
-                    )
-                    results.append(result)
-            
-            return results
-            
-        except Exception as e:
-            st.error(f"Error in semantic search: {e}")
-            return []
-
-# ===============================================================================
-# MAIN PROCESSOR CLASS
-# ===============================================================================
-
-class InvoiceProcessor:
-    """Main invoice processor for Hugging Face Spaces"""
-    
-    def __init__(self):
-        self.setup_storage()
-        self.document_processor = DocumentProcessor()
-        self.ai_extractor = AIExtractor()
-        self.vector_store = VectorStore() if SENTENCE_TRANSFORMERS_AVAILABLE else None
-        
-        # Initialize stats
-        self.processing_stats = {
-            'total_processed': 0,
-            'successful': 0,
-            'failed': 0,
-            'start_time': datetime.now()
-        }
-    
-    def setup_storage(self):
-        """Setup storage paths"""
-        self.data_dir = HF_CONFIG["data_dir"]
-        self.json_path = os.path.join(self.data_dir, "invoices.json")
-        
-        # Initialize JSON storage
-        if not os.path.exists(self.json_path):
-            initial_data = {
-                "metadata": {
-                    "created_at": datetime.now().isoformat(),
-                    "version": "hf_v1.0",
-                    "total_invoices": 0
-                },
-                "invoices": [],
-                "summary": {
-                    "total_amount": 0.0,
-                    "unique_suppliers": [],
-                    "processing_stats": {"successful": 0, "failed": 0}
-                }
-            }
-            self.save_json_data(initial_data)
-    
-    def load_json_data(self) -> dict:
-        """Load invoice data from JSON"""
-        try:
-            with open(self.json_path, 'r', encoding='utf-8') as f:
-                return json.load(f)
-        except (FileNotFoundError, json.JSONDecodeError):
-            self.setup_storage()
-            return self.load_json_data()
-    
-    def save_json_data(self, data: dict):
-        """Save invoice data to JSON"""
-        try:
-            with open(self.json_path, 'w', encoding='utf-8') as f:
-                json.dump(data, f, indent=2, ensure_ascii=False)
-        except Exception as e:
-            st.error(f"Error saving data: {e}")
-    
-    def process_uploaded_file(self, uploaded_file) -> InvoiceData:
-        """Process a single uploaded file with enhanced debugging"""
-        self.processing_stats['total_processed'] += 1
-        
-        try:
-            # Debug file info
-            file_size = len(uploaded_file.getvalue())
-            file_extension = uploaded_file.name.split('.')[-1].lower() if '.' in uploaded_file.name else 'unknown'
-            
-            st.info(f"📄 Processing: {uploaded_file.name} ({file_size/1024:.1f} KB, .{file_extension})")
-            
-            # Check file size
-            if file_size > HF_CONFIG["max_file_size_mb"] * 1024 * 1024:
-                error_msg = f"File too large: {file_size / 1024 / 1024:.2f}MB > {HF_CONFIG['max_file_size_mb']}MB"
-                st.error(error_msg)
-                self.processing_stats['failed'] += 1
-                return InvoiceData()
-            
-            # Check file type
-            if file_extension not in ['pdf', 'txt']:
-                error_msg = f"Unsupported file type: .{file_extension} (supported: PDF, TXT)"
-                st.warning(error_msg)
-                self.processing_stats['failed'] += 1
-                return InvoiceData()
-            
-            # Save temporarily
-            with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as tmp_file:
-                file_content = uploaded_file.getvalue()
-                tmp_file.write(file_content)
-                tmp_file_path = tmp_file.name
-                
-                st.info(f"💾 Saved temporarily to: {tmp_file_path}")
-            
-            try:
-                # Extract text
-                st.info("🔍 Extracting text from document...")
-                text = self.document_processor.extract_text_from_document(tmp_file_path)
-                
-                if not text or not text.strip():
-                    st.warning(f"❌ No text extracted from {uploaded_file.name}")
-                    self.processing_stats['failed'] += 1
-                    return InvoiceData()
-                
-                text_length = len(text)
-                st.info(f"📝 Extracted {text_length} characters of text")
-                
-                # Show text preview
-                if text_length > 0:
-                    with st.expander("📄 Text Preview (First 500 characters)", expanded=False):
-                        st.text(text[:500] + "..." if len(text) > 500 else text)
-                
-                # Extract invoice data
-                st.info("🤖 Extracting invoice data using AI/Regex...")
-                invoice_data = self.ai_extractor.extract_with_ai(text)
-                invoice_data.file_path = uploaded_file.name
-                
-                # Show extraction results
-                st.info(f"📊 Extraction completed with {invoice_data.extraction_confidence:.1%} confidence")
-                
-                # Save to storage
-                st.info("💾 Saving extracted data...")
-                self.save_invoice_data(invoice_data, text, file_size)
-                
-                self.processing_stats['successful'] += 1
-                st.success(f"✅ Successfully processed {uploaded_file.name}")
-                
-                return invoice_data
-                
-            finally:
-                # Cleanup
-                try:
-                    os.unlink(tmp_file_path)
-                    st.info("🧹 Cleaned up temporary file")
-                except:
-                    pass
-                
-        except Exception as e:
-            error_msg = f"Error processing {uploaded_file.name}: {str(e)}"
-            st.error(error_msg)
-            self.processing_stats['failed'] += 1
-            
-            # Show detailed error for debugging
-            with st.expander("🔍 Error Details", expanded=False):
-                st.code(str(e))
-                import traceback
-                st.code(traceback.format_exc())
-            
-            return InvoiceData()
-    
-    def save_invoice_data(self, invoice_data: InvoiceData, raw_text: str, file_size: int):
-        """Save invoice data to JSON and vector store"""
-        try:
-            # Load existing data
-            data = self.load_json_data()
-            
-            # Create invoice record
-            invoice_record = {
-                "id": len(data["invoices"]) + 1,
-                "invoice_number": invoice_data.invoice_number,
-                "supplier_name": invoice_data.supplier_name,
-                "buyer_name": invoice_data.buyer_name,
-                "date": invoice_data.date,
-                "amount": invoice_data.amount,
-                "quantity": invoice_data.quantity,
-                "product_description": invoice_data.product_description,
-                "file_info": {
-                    "file_name": invoice_data.file_path,
-                    "file_size": file_size
-                },
-                "extraction_info": {
-                    "confidence": invoice_data.extraction_confidence,
-                    "method": invoice_data.processing_method,
-                    "raw_text_preview": raw_text[:300]
-                },
-                "timestamps": {
-                    "created_at": datetime.now().isoformat()
-                }
-            }
-            
-            # Add to invoices
-            data["invoices"].append(invoice_record)
-            
-            # Update summary
-            self.update_summary(data)
-            
-            # Save JSON
-            self.save_json_data(data)
-            
-            # Add to vector store
-            if self.vector_store:
-                self.vector_store.add_document(invoice_record, raw_text)
-                self.vector_store.save_vector_store()
-            
-        except Exception as e:
-            st.error(f"Error saving invoice data: {e}")
-    
-    def update_summary(self, data: dict):
-        """Update summary statistics"""
-        invoices = data["invoices"]
-        
-        total_amount = sum(inv.get("amount", 0) for inv in invoices)
-        unique_suppliers = list(set(inv.get("supplier_name", "") for inv in invoices if inv.get("supplier_name")))
-        
-        data["summary"] = {
-            "total_amount": total_amount,
-            "unique_suppliers": unique_suppliers,
-            "processing_stats": {
-                "successful": self.processing_stats['successful'],
-                "failed": self.processing_stats['failed'],
-                "total_processed": self.processing_stats['total_processed']
-            }
-        }
-        
-        data["metadata"]["last_updated"] = datetime.now().isoformat()
-        data["metadata"]["total_invoices"] = len(invoices)
-
-# ===============================================================================
-# CHATBOT CLASS
-# ===============================================================================
-
-class ChatBot:
-    """Chatbot for invoice queries"""
-    
-    def __init__(self, processor: InvoiceProcessor):
-        self.processor = processor
-    
-    def query_database(self, query: str) -> str:
-        """Process user query and return response"""
-        try:
-            data = self.processor.load_json_data()
-            invoices = data.get("invoices", [])
-            
-            if not invoices:
-                return "No invoice data found. Please upload some invoices first."
-            
-            query_lower = query.lower()
-            
-            # Handle different query types
-            if any(phrase in query_lower for phrase in ["summary", "overview", "total"]):
-                return self.generate_summary(data)
-            
-            elif "count" in query_lower or "how many" in query_lower:
-                return self.handle_count_query(data)
-            
-            elif any(phrase in query_lower for phrase in ["amount", "value", "money", "cost"]):
-                return self.handle_amount_query(data)
-            
-            elif any(phrase in query_lower for phrase in ["supplier", "vendor", "company"]):
-                return self.handle_supplier_query(data, query)
-            
-            elif self.processor.vector_store:
-                return self.handle_semantic_search(query)
-            
-            else:
-                return self.handle_general_query(data, query)
-                
-        except Exception as e:
-            return f"Error processing query: {e}"
-    
-    def generate_summary(self, data: dict) -> str:
-        """Generate comprehensive summary"""
-        invoices = data.get("invoices", [])
-        summary = data.get("summary", {})
-        
-        if not invoices:
-            return "No invoices found in the system."
-        
-        total_amount = summary.get("total_amount", 0)
-        avg_amount = total_amount / len(invoices) if invoices else 0
-        unique_suppliers = len(summary.get("unique_suppliers", []))
-        
-        response = f"""
-**📊 Invoice System Summary**
-
-• **Total Invoices**: {len(invoices):,}
-• **Total Value**: ₹{total_amount:,.2f}
-• **Average Invoice**: ₹{avg_amount:,.2f}
-• **Unique Suppliers**: {unique_suppliers}
-
-**📈 Processing Stats**
-• **Successful**: {summary.get('processing_stats', {}).get('successful', 0)}
-• **Failed**: {summary.get('processing_stats', {}).get('failed', 0)}
-
-**🔍 Recent Invoices**
-"""
-        
-        # Show recent invoices
-        recent = sorted(invoices, key=lambda x: x.get('timestamps', {}).get('created_at', ''), reverse=True)[:5]
-        for i, inv in enumerate(recent, 1):
-            response += f"\n{i}. **{inv.get('invoice_number', 'N/A')}** - {inv.get('supplier_name', 'Unknown')} (₹{inv.get('amount', 0):,.2f})"
-        
-        return response
-    
-    def handle_count_query(self, data: dict) -> str:
-        """Handle count-related queries"""
-        invoices = data.get("invoices", [])
-        total = len(invoices)
-        unique_numbers = len(set(inv.get('invoice_number', '') for inv in invoices if inv.get('invoice_number')))
-        
-        return f"""
-**📊 Invoice Count Summary**
-
-• **Total Records**: {total}
-• **Unique Invoice Numbers**: {unique_numbers}
-• **Duplicates**: {total - unique_numbers if total > unique_numbers else 0}
-
-**📅 Processing Timeline**
-• **First Invoice**: {invoices[0].get('timestamps', {}).get('created_at', 'N/A')[:10] if invoices else 'N/A'}
-• **Latest Invoice**: {invoices[-1].get('timestamps', {}).get('created_at', 'N/A')[:10] if invoices else 'N/A'}
-"""
-    
-    def handle_amount_query(self, data: dict) -> str:
-        """Handle amount-related queries"""
-        invoices = data.get("invoices", [])
-        amounts = [inv.get('amount', 0) for inv in invoices if inv.get('amount', 0) > 0]
-        
-        if not amounts:
-            return "No amount information found in invoices."
-        
-        total_amount = sum(amounts)
-        avg_amount = total_amount / len(amounts)
-        max_amount = max(amounts)
-        min_amount = min(amounts)
-        
-        # Find high-value invoices
-        high_value_threshold = sorted(amounts, reverse=True)[min(4, len(amounts)-1)] if len(amounts) > 5 else max_amount
-        high_value_invoices = [inv for inv in invoices if inv.get('amount', 0) >= high_value_threshold]
-        
-        response = f"""
-**💰 Financial Analysis**
-
-• **Total Amount**: ₹{total_amount:,.2f}
-• **Average Amount**: ₹{avg_amount:,.2f}
-• **Highest Invoice**: ₹{max_amount:,.2f}
-• **Lowest Invoice**: ₹{min_amount:,.2f}
-
-**🎯 High-Value Invoices (₹{high_value_threshold:,.2f}+)**
-"""
-        
-        for i, inv in enumerate(high_value_invoices[:5], 1):
-            response += f"\n{i}. **{inv.get('invoice_number', 'N/A')}** - {inv.get('supplier_name', 'Unknown')} (₹{inv.get('amount', 0):,.2f})"
-        
-        return response
-    
-    def handle_supplier_query(self, data: dict, query: str) -> str:
-        """Handle supplier-related queries"""
-        invoices = data.get("invoices", [])
-        
-        # Count invoices by supplier
-        supplier_counts = {}
-        supplier_amounts = {}
-        
-        for inv in invoices:
-            supplier = inv.get('supplier_name', '').strip()
-            if supplier:
-                supplier_counts[supplier] = supplier_counts.get(supplier, 0) + 1
-                supplier_amounts[supplier] = supplier_amounts.get(supplier, 0) + inv.get('amount', 0)
-        
-        if not supplier_counts:
-            return "No supplier information found in invoices."
-        
-        # Sort suppliers by amount
-        top_suppliers = sorted(supplier_amounts.items(), key=lambda x: x[1], reverse=True)[:10]
-        
-        response = f"""
-**🏢 Supplier Analysis**
-
-• **Total Unique Suppliers**: {len(supplier_counts)}
-• **Most Active**: {max(supplier_counts, key=supplier_counts.get)} ({supplier_counts[max(supplier_counts, key=supplier_counts.get)]} invoices)
-
-**💰 Top Suppliers by Amount**
-"""
-        
-        for i, (supplier, amount) in enumerate(top_suppliers, 1):
-            count = supplier_counts[supplier]
-            avg = amount / count if count > 0 else 0
-            response += f"\n{i}. **{supplier}** - ₹{amount:,.2f} ({count} invoices, avg: ₹{avg:,.2f})"
-        
-        return response
-    
-    def handle_semantic_search(self, query: str) -> str:
-        """Handle semantic search queries"""
-        try:
-            results = self.processor.vector_store.semantic_search(query, top_k=5)
-            
-            if not results:
-                return f"No relevant results found for '{query}'. Try different keywords."
-            
-            response = f"🔍 **Semantic Search Results for '{query}'**\n\n"
-            
-            for i, result in enumerate(results, 1):
-                response += f"{i}. **{result.invoice_number}** - {result.supplier_name}\n"
-                response += f"   • Similarity: {result.similarity_score:.3f}\n"
-                response += f"   • Amount: ₹{result.metadata.get('amount', 0):,.2f}\n"
-                response += f"   • Preview: {result.content_preview[:100]}...\n\n"
-            
-            return response
-            
-        except Exception as e:
-            return f"Semantic search error: {e}"
-    
-    def handle_general_query(self, data: dict, query: str) -> str:
-        """Handle general queries with keyword search"""
-        invoices = data.get("invoices", [])
-        query_words = query.lower().split()
-        
-        # Simple keyword matching
-        matching_invoices = []
-        for inv in invoices:
-            text_to_search = (
-                inv.get('supplier_name', '') + ' ' +
-                inv.get('buyer_name', '') + ' ' +
-                inv.get('product_description', '') + ' ' +
-                inv.get('extraction_info', {}).get('raw_text_preview', '')
-            ).lower()
-            
-            if any(word in text_to_search for word in query_words):
-                matching_invoices.append(inv)
-        
-        if not matching_invoices:
-            return f"No invoices found matching '{query}'. Try different keywords or check the summary."
-        
-        response = f"🔍 **Found {len(matching_invoices)} invoices matching '{query}'**\n\n"
-        
-        for i, inv in enumerate(matching_invoices[:5], 1):
-            response += f"{i}. **{inv.get('invoice_number', 'N/A')}** - {inv.get('supplier_name', 'Unknown')}\n"
-            response += f"   • Amount: ₹{inv.get('amount', 0):,.2f}\n"
-            response += f"   • Date: {inv.get('date', 'N/A')}\n\n"
-        
-        if len(matching_invoices) > 5:
-            response += f"... and {len(matching_invoices) - 5} more results."
-        
-        return response
-
-# ===============================================================================
-# STREAMLIT APPLICATION
-# ===============================================================================
-
-def create_app():
-    """Main Streamlit application"""
-    
-    # Generate unique session ID for this run
-    if 'session_id' not in st.session_state:
-        st.session_state.session_id = str(uuid.uuid4())[:8]
-    
-    session_id = st.session_state.session_id
-    
-    # Custom CSS
-    st.markdown("""
-    <style>
-    .main-header {
-        font-size: 2.5rem;
-        font-weight: bold;
-        text-align: center;
-        color: #FF6B35;
-        margin-bottom: 1rem;
-    }
-    .feature-box {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        padding: 1rem;
-        border-radius: 10px;
-        color: white;
-        margin: 0.5rem 0;
-        text-align: center;
-    }
-    .status-ok { color: #28a745; font-weight: bold; }
-    .status-warning { color: #ffc107; font-weight: bold; }
-    .status-error { color: #dc3545; font-weight: bold; }
-    </style>
-    """, unsafe_allow_html=True)
-    
-    # Header
-    st.markdown('<h1 class="main-header">📄 AI Invoice Processing System</h1>', unsafe_allow_html=True)
-    st.markdown("""
-    <div style="text-align: center; margin-bottom: 2rem;">
-        <p style="font-size: 1.1rem; color: #666;">
-            AI-Powered Document Processing • Semantic Search • Smart Analytics • Hugging Face Spaces
-        </p>
-    </div>
-    """, unsafe_allow_html=True)
-    
-    # Initialize processor
-    if 'processor' not in st.session_state:
-        with st.spinner("🔧 Initializing AI Invoice Processor..."):
-            try:
-                st.session_state.processor = InvoiceProcessor()
-                st.session_state.chatbot = ChatBot(st.session_state.processor)
-                st.session_state.chat_history = []
-                st.success("✅ System initialized successfully!")
-            except Exception as e:
-                st.error(f"❌ Initialization failed: {e}")
-                st.stop()
-    
-    # Sidebar
-    with st.sidebar:
-        st.header("🎛️ System Status")
-        
-        processor = st.session_state.processor
-        
-        # Component status
-        if processor.document_processor.processors:
-            st.markdown('<span class="status-ok">✅ Document Processing</span>', unsafe_allow_html=True)
-        else:
-            st.markdown('<span class="status-error">❌ Document Processing</span>', unsafe_allow_html=True)
-        
-        if processor.ai_extractor.use_transformers:
-            st.markdown('<span class="status-ok">✅ AI Extraction</span>', unsafe_allow_html=True)
-        else:
-            st.markdown('<span class="status-warning">⚠️ Regex Extraction</span>', unsafe_allow_html=True)
-        
-        if processor.vector_store and processor.vector_store.embedding_model:
-            st.markdown('<span class="status-ok">✅ Semantic Search</span>', unsafe_allow_html=True)
-        else:
-            st.markdown('<span class="status-warning">⚠️ Keyword Search Only</span>', unsafe_allow_html=True)
-        
-        # Quick stats
-        st.header("📊 Quick Stats")
-        try:
-            data = processor.load_json_data()
-            total_invoices = len(data.get("invoices", []))
-            total_amount = data.get("summary", {}).get("total_amount", 0)
-            
-            st.metric("Total Invoices", total_invoices)
-            st.metric("Total Value", f"₹{total_amount:,.2f}")
-            st.metric("Success Rate", f"{processor.processing_stats['successful']}/{processor.processing_stats['total_processed']}")
-            
-        except Exception as e:
-            st.error(f"Stats error: {e}")
-        
-        # System info
-        st.header("⚙️ System Info")
-        st.info(f"""
-        **Session ID:** {session_id}
-        
-        **Limits:**
-        • Max file size: 10MB
-        • Max concurrent files: 3
-        • Timeout: 30s
-        """)
-    
-    # Main navigation
-    selected_tab = st.radio(
-        "Choose a section:",
-        ["📤 Upload & Process", "💬 AI Chat", "📊 Analytics", "📋 Data Explorer"],
-        horizontal=True,
-        key=f"main_navigation_{session_id}"
-    )
-    
-    # -------------------------------------------------------------------------
-    # UPLOAD & PROCESS SECTION
-    # -------------------------------------------------------------------------
-    
-    if selected_tab == "📤 Upload & Process":
-        st.header("📤 Upload Invoice Documents")
-        
-        # Feature highlights
-        col1, col2, col3 = st.columns(3)
-        
-        with col1:
-            st.markdown("""
-            <div class="feature-box">
-                <h4>🤖 AI Extraction</h4>
-                <p>Advanced NLP models extract structured data automatically</p>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        with col2:
-            st.markdown("""
-            <div class="feature-box">
-                <h4>🔍 Smart Search</h4>
-                <p>Semantic search finds invoices using natural language</p>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        with col3:
-            st.markdown("""
-            <div class="feature-box">
-                <h4>📊 Analytics</h4>
-                <p>Comprehensive insights and visualizations</p>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        # File upload
-        st.markdown("### 📁 Upload Your Invoices")
-        
-        # Initialize session state for files if not exists
-        if f'uploaded_files_{session_id}' not in st.session_state:
-            st.session_state[f'uploaded_files_{session_id}'] = None
-        if f'processing_complete_{session_id}' not in st.session_state:
-            st.session_state[f'processing_complete_{session_id}'] = False
-        if f'currently_processing_{session_id}' not in st.session_state:
-            st.session_state[f'currently_processing_{session_id}'] = False
-        if f'processed_file_hashes_{session_id}' not in st.session_state:
-            st.session_state[f'processed_file_hashes_{session_id}'] = set()
-        
-        # File uploader with stable key
-        uploaded_files = st.file_uploader(
-            "Choose invoice files (PDF, TXT supported)",
-            type=['pdf', 'txt'],
-            accept_multiple_files=True,
-            help="Maximum file size: 10MB per file",
-            key=f"file_uploader_stable_{session_id}"
-        )
-        
-        # Store uploaded files in session state only if they're new
-        if uploaded_files:
-            # Create file hashes to detect if files have changed
-            current_file_hashes = set()
-            for file in uploaded_files:
-                file_hash = hash((file.name, file.size))
-                current_file_hashes.add(file_hash)
-            
-            # Check if files have changed
-            stored_hashes = st.session_state.get(f'uploaded_file_hashes_{session_id}', set())
-            if current_file_hashes != stored_hashes:
-                st.session_state[f'uploaded_files_{session_id}'] = uploaded_files
-                st.session_state[f'uploaded_file_hashes_{session_id}'] = current_file_hashes
-                st.session_state[f'processing_complete_{session_id}'] = False
-                st.session_state[f'currently_processing_{session_id}'] = False
-                st.info("📄 New files detected - ready for processing")
-        
-        # Get files from session state
-        current_files = st.session_state[f'uploaded_files_{session_id}']
-        is_processing = st.session_state[f'currently_processing_{session_id}']
-        is_complete = st.session_state[f'processing_complete_{session_id}']
-        
-        if current_files:
-            max_files = 3
-            if len(current_files) > max_files:
-                st.warning(f"⚠️ Too many files selected. Processing first {max_files} files.")
-                current_files = current_files[:max_files]
-            
-            st.info(f"📊 {len(current_files)} files selected")
-            
-            # Show file names
-            st.markdown("**Selected Files:**")
-            for i, file in enumerate(current_files, 1):
-                file_size_mb = len(file.getvalue()) / (1024 * 1024)
-                file_hash = hash((file.name, file.size))
-                processed_icon = "✅" if file_hash in st.session_state[f'processed_file_hashes_{session_id}'] else "📄"
-                st.write(f"{processed_icon} {i}. {file.name} ({file_size_mb:.2f} MB)")
-            
-            # Process button - only show if not currently processing
-            col1, col2 = st.columns([1, 1])
-            
-            with col1:
-                if not is_processing and not is_complete:
-                    if st.button("🚀 Process Files", type="primary", key=f"process_btn_{session_id}"):
-                        st.session_state[f'currently_processing_{session_id}'] = True
-                        st.rerun()
-                elif is_processing:
-                    st.info("🔄 Processing in progress...")
-                    # Actually process the files here
-                    process_files_once(current_files, session_id)
-                elif is_complete:
-                    st.success("✅ Processing completed!")
-                    if st.button("🔄 Process Again", key=f"reprocess_btn_{session_id}"):
-                        st.session_state[f'processing_complete_{session_id}'] = False
-                        st.session_state[f'currently_processing_{session_id}'] = False
-                        st.session_state[f'processed_file_hashes_{session_id}'] = set()
-                        st.rerun()
-            
-            with col2:
-                if st.button("🗑️ Clear Files", key=f"clear_files_{session_id}"):
-                    st.session_state[f'uploaded_files_{session_id}'] = None
-                    st.session_state[f'uploaded_file_hashes_{session_id}'] = set()
-                    st.session_state[f'processing_complete_{session_id}'] = False
-                    st.session_state[f'currently_processing_{session_id}'] = False
-                    st.session_state[f'processed_file_hashes_{session_id}'] = set()
-                    st.rerun()
-        
-        else:
-            st.info("👆 Please select invoice files to upload and process")
-        
-        # Show processing results if completed
-        if is_complete:
-            st.markdown("### 📋 Recent Processing Results")
-            try:
-                data = st.session_state.processor.load_json_data()
-                recent_invoices = sorted(
-                    data.get("invoices", []), 
-                    key=lambda x: x.get('timestamps', {}).get('created_at', ''), 
-                    reverse=True
-                )[:5]
-                
-                if recent_invoices:
-                    for i, inv in enumerate(recent_invoices, 1):
-                        with st.expander(f"📄 {inv.get('invoice_number', f'Invoice {i}')} - {inv.get('supplier_name', 'Unknown')}", expanded=False):
-                            col1, col2 = st.columns(2)
-                            with col1:
-                                st.write(f"**Invoice #:** {inv.get('invoice_number', 'N/A')}")
-                                st.write(f"**Supplier:** {inv.get('supplier_name', 'N/A')}")
-                                st.write(f"**Amount:** ₹{inv.get('amount', 0):.2f}")
-                            with col2:
-                                st.write(f"**Date:** {inv.get('date', 'N/A')}")
-                                st.write(f"**Method:** {inv.get('extraction_info', {}).get('method', 'N/A')}")
-                                st.write(f"**Confidence:** {inv.get('extraction_info', {}).get('confidence', 0):.1%}")
-                else:
-                    st.info("No recent processing results found.")
-            except Exception as e:
-                st.error(f"Error loading recent results: {e}")
-    
-    # -------------------------------------------------------------------------
-    # AI CHAT SECTION
-    # -------------------------------------------------------------------------
-    
-    elif selected_tab == "💬 AI Chat":
-        st.header("💬 AI Chat Interface")
-        
-        # Display chat history
-        if st.session_state.chat_history:
-            st.markdown("### 💬 Chat History")
-            for i, message in enumerate(st.session_state.chat_history):
-                with st.chat_message(message["role"]):
-                    st.markdown(message["content"])
-        
-        # Chat input
-        st.markdown("### ✍️ Ask a Question")
-        
-        col1, col2 = st.columns([4, 1])
-        
-        with col1:
-            user_input = st.text_input(
-                "Type your question:",
-                placeholder="e.g., 'show me total spending'",
-                key=f"chat_input_{session_id}"
-            )
-        
-        with col2:
-            ask_btn = st.button("🚀 Ask", type="primary", key=f"ask_btn_{session_id}")
-        
-        if ask_btn and user_input:
-            handle_chat_query(user_input)
-        
-        # Suggested queries
-        if not st.session_state.chat_history:
-            st.markdown("### 💡 Try These Queries")
-            
-            col1, col2 = st.columns(2)
-            
-            with col1:
-                st.markdown("**📊 Basic Queries:**")
-                basic_queries = [
-                    "Show me a summary of all invoices",
-                    "How much have we spent in total?",
-                    "Who are our top suppliers?",
-                    "Find invoices with high amounts"
-                ]
-                for i, query in enumerate(basic_queries):
-                    if st.button(query, key=f"basic_{session_id}_{i}"):
-                        handle_chat_query(query)
-            
-            with col2:
-                st.markdown("**🔍 Advanced Queries:**")
-                advanced_queries = [
-                    "Find technology purchases",
-                    "Show office supplies",
-                    "Search consulting services",
-                    "Recent high-value invoices"
-                ]
-                for i, query in enumerate(advanced_queries):
-                    if st.button(query, key=f"advanced_{session_id}_{i}"):
-                        handle_chat_query(query)
-        
-        # Clear chat
-        if st.session_state.chat_history:
-            if st.button("🗑️ Clear Chat", key=f"clear_chat_{session_id}"):
-                st.session_state.chat_history = []
-                st.rerun()
-    
-    # -------------------------------------------------------------------------
-    # ANALYTICS SECTION
-    # -------------------------------------------------------------------------
-    
-    elif selected_tab == "📊 Analytics":
-        st.header("📊 Analytics Dashboard")
-        
-        try:
-            data = st.session_state.processor.load_json_data()
-            invoices = data.get("invoices", [])
-            
-            if not invoices:
-                st.info("📊 No data available. Upload some invoices to see analytics.")
-                return
-            
-            # Convert to DataFrame
-            df_data = []
-            for inv in invoices:
-                df_data.append({
-                    'invoice_number': inv.get('invoice_number', ''),
-                    'supplier_name': inv.get('supplier_name', ''),
-                    'amount': inv.get('amount', 0),
-                    'date': inv.get('date', ''),
-                    'confidence': inv.get('extraction_info', {}).get('confidence', 0)
-                })
-            
-            df = pd.DataFrame(df_data)
-            
-            # Key metrics
-            col1, col2, col3, col4 = st.columns(4)
-            
-            with col1:
-                st.metric("Total Invoices", len(df))
-            with col2:
-                st.metric("Total Amount", f"₹{df['amount'].sum():,.2f}")
-            with col3:
-                st.metric("Avg Amount", f"₹{df['amount'].mean():,.2f}")
-            with col4:
-                st.metric("Unique Suppliers", df['supplier_name'].nunique())
-            
-            # Visualizations
-            if len(df) > 0:
-                # Amount distribution
-                fig_hist = px.histogram(
-                    df, 
-                    x='amount', 
-                    title="Invoice Amount Distribution",
-                    labels={'amount': 'Amount (₹)', 'count': 'Number of Invoices'}
-                )
-                st.plotly_chart(fig_hist, use_container_width=True)
-                
-                # Top suppliers
-                if df['supplier_name'].notna().any():
-                    supplier_amounts = df.groupby('supplier_name')['amount'].sum().sort_values(ascending=False).head(10)
-                    
-                    if len(supplier_amounts) > 0:
-                        fig_suppliers = px.bar(
-                            x=supplier_amounts.values,
-                            y=supplier_amounts.index,
-                            orientation='h',
-                            title="Top 10 Suppliers by Total Amount",
-                            labels={'x': 'Total Amount (₹)', 'y': 'Supplier'}
-                        )
-                        st.plotly_chart(fig_suppliers, use_container_width=True)
-        
-        except Exception as e:
-            st.error(f"Analytics error: {e}")
-    
-    # -------------------------------------------------------------------------
-    # DATA EXPLORER SECTION
-    # -------------------------------------------------------------------------
-    
-    elif selected_tab == "📋 Data Explorer":
-        st.header("📋 Data Explorer")
-        
-        try:
-            data = st.session_state.processor.load_json_data()
-            invoices = data.get("invoices", [])
-            
-            if not invoices:
-                st.info("📊 No data available. Upload some invoices first.")
-                return
-            
-            # Convert to DataFrame
-            df_data = []
-            for inv in invoices:
-                df_data.append({
-                    'Invoice Number': inv.get('invoice_number', ''),
-                    'Supplier': inv.get('supplier_name', ''),
-                    'Buyer': inv.get('buyer_name', ''),
-                    'Amount': inv.get('amount', 0),
-                    'Date': inv.get('date', ''),
-                    'Confidence': inv.get('extraction_info', {}).get('confidence', 0),
-                    'Method': inv.get('extraction_info', {}).get('method', ''),
-                    'File': inv.get('file_info', {}).get('file_name', ''),
-                    'Created': inv.get('timestamps', {}).get('created_at', '')[:19]
-                })
-            
-            df = pd.DataFrame(df_data)
-            
-            # Filters
-            col1, col2, col3 = st.columns(3)
-            
-            with col1:
-                suppliers = ['All'] + sorted(df['Supplier'].dropna().unique().tolist())
-                selected_supplier = st.selectbox("Filter by Supplier", suppliers, key=f"supplier_filter_{session_id}")
-            
-            with col2:
-                methods = ['All'] + sorted(df['Method'].dropna().unique().tolist())
-                selected_method = st.selectbox("Filter by Method", methods, key=f"method_filter_{session_id}")
-            
-            with col3:
-                min_amount = st.number_input("Min Amount", min_value=0.0, value=0.0, key=f"amount_filter_{session_id}")
-            
-            # Apply filters
-            filtered_df = df.copy()
-            if selected_supplier != 'All':
-                filtered_df = filtered_df[filtered_df['Supplier'] == selected_supplier]
-            if selected_method != 'All':
-                filtered_df = filtered_df[filtered_df['Method'] == selected_method]
-            if min_amount > 0:
-                filtered_df = filtered_df[filtered_df['Amount'] >= min_amount]
-            
-            # Display data
-            st.dataframe(
-                filtered_df,
-                use_container_width=True,
-                column_config={
-                    "Amount": st.column_config.NumberColumn("Amount", format="₹%.2f"),
-                    "Confidence": st.column_config.ProgressColumn("Confidence", min_value=0, max_value=1)
-                }
-            )
-            
-            # Export options
-            col1, col2 = st.columns(2)
-            
-            with col1:
-                if st.button("📥 Export CSV", key=f"export_csv_{session_id}"):
-                    csv_data = filtered_df.to_csv(index=False)
-                    st.download_button(
-                        "Download CSV",
-                        csv_data,
-                        f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
-                        "text/csv",
-                        key=f"download_csv_{session_id}"
-                    )
-            
-            with col2:
-                if st.button("📄 Export JSON", key=f"export_json_{session_id}"):
-                    filtered_invoices = [inv for inv in invoices 
-                                       if inv.get('invoice_number') in filtered_df['Invoice Number'].values]
-                    
-                    export_data = {
-                        "exported_at": datetime.now().isoformat(),
-                        "total_records": len(filtered_invoices),
-                        "invoices": filtered_invoices
-                    }
-                    
-                    st.download_button(
-                        "Download JSON",
-                        json.dumps(export_data, indent=2),
-                        f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
-                        "application/json",
-                        key=f"download_json_{session_id}"
-                    )
-        
-        except Exception as e:
-            st.error(f"Data explorer error: {e}")
-    
-    # -------------------------------------------------------------------------
-    # GLOBAL CHAT INPUT
-    # -------------------------------------------------------------------------
-    
-    st.markdown("---")
-    st.markdown("### 💬 Quick Chat (Works from any section)")
-    
-    global_query = st.chat_input("Ask about your invoices...", key=f"global_chat_{session_id}")
-    
-    if global_query:
-        handle_chat_query(global_query, show_response=True)
-    
-    # Footer
-    st.markdown("---")
-    st.markdown("""
-    <div style="text-align: center; color: #666;">
-        <p>🚀 <strong>AI Invoice Processing System</strong> - Optimized for Hugging Face Spaces</p>
-        <p>Built with ❤️ using Streamlit, Transformers, and AI</p>
-    </div>
-    """, unsafe_allow_html=True)
-
-# ===============================================================================
-# HELPER FUNCTIONS
-# ===============================================================================
-
-def process_files_once(uploaded_files, session_id):
-    """Process uploaded files only once with proper state management"""
-    if not uploaded_files:
-        st.error("No files to process!")
-        st.session_state[f'currently_processing_{session_id}'] = False
-        return
-    
-    st.markdown("### 🔄 Processing Files...")
-    
-    # Get already processed file hashes
-    processed_hashes = st.session_state[f'processed_file_hashes_{session_id}']
-    
-    # Filter out already processed files
-    files_to_process = []
-    for file in uploaded_files:
-        file_hash = hash((file.name, file.size))
-        if file_hash not in processed_hashes:
-            files_to_process.append((file, file_hash))
-    
-    if not files_to_process:
-        st.info("✅ All files have already been processed!")
-        st.session_state[f'currently_processing_{session_id}'] = False
-        st.session_state[f'processing_complete_{session_id}'] = True
-        return
-    
-    # Create containers for dynamic updates
-    progress_container = st.container()
-    status_container = st.container()
-    results_container = st.container()
-    
-    successful = 0
-    failed = 0
-    
-    # Show progress
-    with progress_container:
-        progress_bar = st.progress(0)
-        progress_text = st.empty()
-    
-    with status_container:
-        st.info(f"Starting to process {len(files_to_process)} new files...")
-    
-    # Process each file only once
-    for i, (uploaded_file, file_hash) in enumerate(files_to_process):
-        current_progress = (i + 1) / len(files_to_process)
-        
-        with progress_container:
-            progress_bar.progress(current_progress)
-            progress_text.text(f"Processing file {i+1}/{len(files_to_process)}: {uploaded_file.name}")
-        
-        with status_container:
-            st.info(f"🔄 Processing: {uploaded_file.name} ({len(uploaded_file.getvalue())/1024:.1f} KB)")
-        
-        try:
-            # Process the file
-            result = st.session_state.processor.process_uploaded_file(uploaded_file)
-            
-            # Mark file as processed regardless of result
-            processed_hashes.add(file_hash)
-            
-            # Show result immediately
-            with results_container:
-                if result and hasattr(result, 'invoice_number') and result.invoice_number:
-                    successful += 1
-                    st.success(f"✅ Successfully processed: {uploaded_file.name}")
-                    
-                    # Show extracted data
-                    col1, col2, col3 = st.columns(3)
-                    with col1:
-                        st.write(f"**Invoice #:** {result.invoice_number}")
-                        st.write(f"**Supplier:** {result.supplier_name or 'Not found'}")
-                    with col2:
-                        st.write(f"**Amount:** ₹{result.amount:.2f}")
-                        st.write(f"**Date:** {result.date or 'Not found'}")
-                    with col3:
-                        st.write(f"**Method:** {result.processing_method}")
-                        st.write(f"**Confidence:** {result.extraction_confidence:.1%}")
-                    
-                    st.markdown("---")
-                else:
-                    failed += 1
-                    st.warning(f"⚠️ Could not extract complete data from: {uploaded_file.name}")
-                    if result:
-                        st.write(f"Partial data: {result.supplier_name}, ₹{result.amount}")
-                    st.markdown("---")
-        
-        except Exception as e:
-            failed += 1
-            # Still mark as processed to avoid reprocessing
-            processed_hashes.add(file_hash)
-            
-            with results_container:
-                st.error(f"❌ Error processing {uploaded_file.name}: {str(e)}")
-                st.markdown("---")
-        
-        # Update session state
-        st.session_state[f'processed_file_hashes_{session_id}'] = processed_hashes
-    
-    # Final summary
-    with progress_container:
-        progress_bar.progress(1.0)
-        progress_text.text("✅ Processing completed!")
-    
-    with status_container:
-        if successful > 0:
-            st.success(f"🎉 Processing complete! {successful} successful, {failed} failed")
-            if successful > 0:
-                st.balloons()
-        else:
-            st.error(f"❌ Processing failed for all {failed} files. Please check file formats and content.")
-    
-    # Update processing state
-    st.session_state[f'currently_processing_{session_id}'] = False
-    st.session_state[f'processing_complete_{session_id}'] = True
-    
-    # Force rerun to update UI
-    st.rerun()
-
-def process_files(uploaded_files, session_id):
-    """Legacy function - redirect to process_files_once"""
-    return process_files_once(uploaded_files, session_id)
-
-def handle_chat_query(query, show_response=False):
-    """Handle chat query"""
-    st.session_state.chat_history.append({
-        "role": "user", 
-        "content": query,
-        "timestamp": datetime.now()
-    })
-    
-    try:
-        with st.spinner("🤖 AI is analyzing..."):
-            response = st.session_state.chatbot.query_database(query)
-            
-            st.session_state.chat_history.append({
-                "role": "assistant",
-                "content": response,
-                "timestamp": datetime.now()
-            })
-        
-        if show_response:
-            with st.chat_message("assistant"):
-                st.markdown(response)
-            st.info("💡 Switch to the 'AI Chat' section to see full conversation history!")
-        
-        st.rerun()
-        
-    except Exception as e:
-        st.error(f"Chat error: {e}")
-
-# ===============================================================================
-# MAIN ENTRY POINT
-# ===============================================================================
-
-def main():
-    """Main entry point for Hugging Face Spaces"""
-    try:
-        if IS_HF_SPACE:
-            st.sidebar.info("🤗 Running on Hugging Face Spaces")
-        
-        create_app()
-        
-    except Exception as e:
-        st.error(f"""
-        ## 🚨 Application Error
-        
-        {e}
-        
-        Please refresh the page or check the logs for more details.
-        """)
-
-if __name__ == "__main__":
-    main(),
-                        ]
-                        
-                        amounts_found = []
-                        for i, pattern in enumerate(amount_patterns):
-                            matches = re.findall(pattern, text.lower(), re.IGNORECASE | re.MULTILINE)
-                            if matches:
-                                st.write(f"Pattern {i+1}: {matches}")
-                                for match in matches:
-                                    try:
-                                        amount_val = float(match.replace(',', ''))
-                                        amounts_found.append(amount_val)
-                                    except:
-                                        pass
-                        
-                        if amounts_found:
-                            st.success(f"✅ Found amounts: {amounts_found}")
-                        else:
-                            st.warning("⚠️ No amounts detected in text")
-                            
-                        # Debug invoice number detection
-                        st.markdown("**🔍 Invoice Number Detection Debug:**")
-                        inv_patterns = [
-                            r'invoice\s*(?:no|number|#)?\s*:?\s*([A-Z0-9\-_/]+)',
-                            r'#\s*([A-Z0-9\-_/]{3,})',
-                        ]
-                        
-                        for i, pattern in enumerate(inv_patterns):
-                            matches = re.findall(pattern, text.lower(), re.IGNORECASE)
-                            if matches:
-                                st.write(f"Invoice Pattern {i+1}: {matches}")
-                        
-                        # Show full text for manual inspection
-                        if st.checkbox("Show Full Extracted Text", key=f"debug_full_text_{uploaded_file.name}"):
-                            st.text_area("Full Text:", value=text, height=300, disabled=True)
+                    with st.expander("📄 Text Preview (First 500 characters)", expanded=False):
+                        st.text(text[:500] + "..." if len(text) > 500 else text)
                 
                 # Extract invoice data
                 st.info("🤖 Extracting invoice data using AI/Regex...")