diff --git "a/streamlit_app.py" "b/streamlit_app.py"
--- "a/streamlit_app.py"
+++ "b/streamlit_app.py"
@@ -1,5392 +1,1506 @@
-#!/usr/bin/env python3
-"""
-Enhanced Invoice Processing & Analysis System with Vector Storage
-A comprehensive system with Docling, Mistral AI, JSON storage, and semantic search capabilities.
-
-Author: AI Assistant
-Date: 2024
-"""
-
-# ===============================================================================
-# IMPORTS
-# ===============================================================================
-
-# Standard library imports
-import os
-import json
-import re
-import tempfile
-import shutil
-import pickle
-import numpy as np
-from datetime import datetime
-from typing import Dict, List, Optional, Tuple
-from dataclasses import dataclass
-from pathlib import Path
-
-# Third-party imports
-import streamlit as st
-import sqlite3
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-import requests
-import ollama
-
-# Vector storage and embeddings
-import faiss
-from sentence_transformers import SentenceTransformer
-import torch
-import importlib
-
-# Docling imports
-from docling.document_converter import DocumentConverter
-from docling.datamodel.base_models import InputFormat
-from docling.datamodel.pipeline_options import PdfPipelineOptions
-from docling.document_converter import PdfFormatOption
-
-# ===============================================================================
-# STREAMLIT CONFIGURATION
-# ===============================================================================
-
-st.set_page_config(
-    page_title="Enhanced Invoice Processing & Analysis System",
-    page_icon="📄",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
-
-# ===============================================================================
-# DATA STRUCTURES
-# ===============================================================================
-
-@dataclass
-class InvoiceData:
-    """Data structure for extracted invoice information"""
-    supplier_name: str = ""
-    buyer_name: str = ""
-    invoice_number: str = ""
-    date: str = ""
-    amount: float = 0.0
-    quantity: int = 0
-    product_description: str = ""
-    file_path: str = ""
-    extraction_confidence: float = 0.0
-
-@dataclass
-class VectorSearchResult:
-    """Data structure for vector search results"""
-    invoice_id: str
-    invoice_number: str
-    supplier_name: str
-    similarity_score: float
-    content_preview: str
-    metadata: Dict
-
-# ===============================================================================
-# VECTOR STORAGE CLASS
-# ===============================================================================
-
-class InvoiceVectorStore:
-    """Handles vector embeddings and semantic search for invoices"""
-    
-    def __init__(self, 
-                 embedding_model: str = "all-MiniLM-L6-v2",
-                 vector_store_path: str = "invoice_vectors.faiss",
-                 metadata_path: str = "vector_metadata.pkl"):
-        
-        self.embedding_model_name = embedding_model
-        self.vector_store_path = vector_store_path
-        self.metadata_path = metadata_path
-        self.embedding_model = None
-        self.vector_store = None
-        self.document_metadata = []
-        self.embedding_dimension = None
-        
-        self.setup_embedding_model()
-        self.load_vector_store()
-    
-    def setup_embedding_model(self):
-        """Initialize the sentence transformer model"""
-        try:
-            self.embedding_model = SentenceTransformer(self.embedding_model_name)
-            # Get embedding dimension
-            test_embedding = self.embedding_model.encode(["test"])
-            self.embedding_dimension = test_embedding.shape[1]
-            st.success(f"✅ Embedding model loaded: {self.embedding_model_name} (dim: {self.embedding_dimension})")
-        except Exception as e:
-            st.error(f"❌ Failed to load embedding model: {e}")
-            self.embedding_model = None
-    
-    def load_vector_store(self):
-        """Load existing FAISS vector store and metadata"""
-        try:
-            if os.path.exists(self.vector_store_path) and os.path.exists(self.metadata_path):
-                # Load FAISS index
-                self.vector_store = faiss.read_index(self.vector_store_path)
-                
-                # Load metadata
-                with open(self.metadata_path, 'rb') as f:
-                    self.document_metadata = pickle.load(f)
-                
-                st.success(f"✅ Vector store loaded: {len(self.document_metadata)} documents")
-            else:
-                # Initialize new vector store
-                if self.embedding_dimension:
-                    self.vector_store = faiss.IndexFlatIP(self.embedding_dimension)
-                    self.document_metadata = []
-                    st.info("📄 New vector store initialized")
-        except Exception as e:
-            st.error(f"❌ Error loading vector store: {e}")
-            if self.embedding_dimension:
-                self.vector_store = faiss.IndexFlatIP(self.embedding_dimension)
-                self.document_metadata = []
-    
-    def save_vector_store(self):
-        """Save FAISS vector store and metadata to disk"""
-        try:
-            if self.vector_store:
-                faiss.write_index(self.vector_store, self.vector_store_path)
-            
-            with open(self.metadata_path, 'wb') as f:
-                pickle.dump(self.document_metadata, f)
-            
-            return True
-        except Exception as e:
-            st.error(f"Error saving vector store: {e}")
-            return False
-    
-    def create_document_text(self, invoice_data: dict, raw_text: str = "") -> str:
-        """Create searchable text from invoice data"""
-        text_parts = []
-        
-        # Basic information
-        if invoice_data.get('invoice_number'):
-            text_parts.append(f"Invoice Number: {invoice_data['invoice_number']}")
-        
-        if invoice_data.get('supplier_name'):
-            text_parts.append(f"Supplier: {invoice_data['supplier_name']}")
-        
-        if invoice_data.get('buyer_name'):
-            text_parts.append(f"Buyer: {invoice_data['buyer_name']}")
-        
-        if invoice_data.get('product_description'):
-            text_parts.append(f"Description: {invoice_data['product_description']}")
-        
-        if invoice_data.get('amount'):
-            text_parts.append(f"Amount: {invoice_data['amount']}")
-        
-        if invoice_data.get('date'):
-            text_parts.append(f"Date: {invoice_data['date']}")
-        
-        # Add raw text preview for additional context
-        if raw_text:
-            text_parts.append(f"Content: {raw_text[:500]}")
-        
-        return " | ".join(text_parts)
-    
-    def add_document(self, invoice_data: dict, raw_text: str = "") -> bool:
-        """Add a document to the vector store"""
-        if not self.embedding_model or not self.vector_store:
-            return False
-        
-        try:
-            # Create searchable text
-            document_text = self.create_document_text(invoice_data, raw_text)
-            
-            # Generate embedding
-            embedding = self.embedding_model.encode([document_text], normalize_embeddings=True)
-            
-            # Create metadata
-            metadata = {
-                'invoice_id': invoice_data.get('id', ''),
-                'invoice_number': invoice_data.get('invoice_number', ''),
-                'supplier_name': invoice_data.get('supplier_name', ''),
-                'buyer_name': invoice_data.get('buyer_name', ''),
-                'amount': invoice_data.get('amount', 0),
-                'date': invoice_data.get('date', ''),
-                'file_name': invoice_data.get('file_info', {}).get('file_name', ''),
-                'extraction_confidence': invoice_data.get('extraction_info', {}).get('confidence', 0),
-                'document_text': document_text[:200],  # Preview
-                'timestamp': datetime.now().isoformat()
-            }
-            
-            # Check if document already exists (by invoice_number)
-            existing_idx = None
-            for i, meta in enumerate(self.document_metadata):
-                if meta.get('invoice_number') == metadata['invoice_number'] and metadata['invoice_number']:
-                    existing_idx = i
-                    break
-            
-            if existing_idx is not None:
-                # Update existing document
-                self.vector_store.remove_ids(np.array([existing_idx]))
-                self.document_metadata[existing_idx] = metadata
-                self.vector_store.add(embedding.astype('float32'))
-            else:
-                # Add new document
-                self.vector_store.add(embedding.astype('float32'))
-                self.document_metadata.append(metadata)
-            
-            return True
-            
-        except Exception as e:
-            st.error(f"Error adding document to vector store: {e}")
-            return False
-    
-    def semantic_search(self, query: str, top_k: int = 5) -> List[VectorSearchResult]:
-        """Perform semantic search on the vector store"""
-        if not self.embedding_model or not self.vector_store or len(self.document_metadata) == 0:
-            return []
-        
-        try:
-            # Generate query embedding
-            query_embedding = self.embedding_model.encode([query], normalize_embeddings=True)
-            
-            # Search in vector store
-            scores, indices = self.vector_store.search(
-                query_embedding.astype('float32'), 
-                min(top_k, len(self.document_metadata))
-            )
-            
-            results = []
-            for score, idx in zip(scores[0], indices[0]):
-                if idx < len(self.document_metadata) and score > 0.1:  # Relevance threshold
-                    metadata = self.document_metadata[idx]
-                    result = VectorSearchResult(
-                        invoice_id=metadata.get('invoice_id', ''),
-                        invoice_number=metadata.get('invoice_number', ''),
-                        supplier_name=metadata.get('supplier_name', ''),
-                        similarity_score=float(score),
-                        content_preview=metadata.get('document_text', ''),
-                        metadata=metadata
-                    )
-                    results.append(result)
-            
-            return results
-            
-        except Exception as e:
-            st.error(f"Error in semantic search: {e}")
-            return []
-    
-    def rebuild_vector_store(self, json_data: dict) -> bool:
-        """Rebuild vector store from JSON data"""
-        if not self.embedding_model:
-            return False
-        
-        try:
-            # Clear existing store
-            if self.embedding_dimension:
-                self.vector_store = faiss.IndexFlatIP(self.embedding_dimension)
-                self.document_metadata = []
-            
-            invoices = json_data.get('invoices', [])
-            if not invoices:
-                return True
-            
-            st.info(f"Rebuilding vector store with {len(invoices)} invoices...")
-            progress_bar = st.progress(0)
-            
-            for i, invoice in enumerate(invoices):
-                # Get raw text if available
-                raw_text = invoice.get('extraction_info', {}).get('raw_text_preview', '')
-                
-                # Add to vector store
-                self.add_document(invoice, raw_text)
-                
-                # Update progress
-                progress_bar.progress((i + 1) / len(invoices))
-            
-            # Save to disk
-            self.save_vector_store()
-            st.success(f"✅ Vector store rebuilt with {len(self.document_metadata)} documents")
-            return True
-            
-        except Exception as e:
-            st.error(f"Error rebuilding vector store: {e}")
-            return False
-    
-    def get_stats(self) -> Dict:
-        """Get vector store statistics"""
-        return {
-            'total_documents': len(self.document_metadata),
-            'embedding_dimension': self.embedding_dimension,
-            'model_name': self.embedding_model_name,
-            'vector_store_size': self.vector_store.ntotal if self.vector_store else 0
-        }
-
-# ===============================================================================
-# ENHANCED INVOICE PROCESSOR
-# ===============================================================================
-
-class EnhancedInvoiceProcessor:
-    """Enhanced invoice processor with vector storage capabilities"""
-    
-    def __init__(self, 
-                 db_path: str = "invoices.db", 
-                 json_path: str = "invoices_data.json", 
-                 model_name: str = "mistral:7b",
-                 embedding_model: str = "all-MiniLM-L6-v2"):
-        
-        self.db_path = db_path
-        self.json_path = json_path
-        self.model_name = model_name
-        
-        # Initialize components
-        self.setup_database()
-        self.setup_ollama()
-        self.setup_docling()
-        self.setup_json_storage()
-        
-        # Initialize vector store
-        self.vector_store = InvoiceVectorStore(embedding_model=embedding_model)
-    
-    # Copy all the existing setup methods from your original class
-    def setup_database(self):
-        """Initialize SQLite database with enhanced schema"""
-        conn = sqlite3.connect(self.db_path)
-        cursor = conn.cursor()
-        
-        # Main invoices table
-        cursor.execute('''
-            CREATE TABLE IF NOT EXISTS invoices (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                supplier_name TEXT,
-                buyer_name TEXT,
-                invoice_number TEXT UNIQUE,
-                date TEXT,
-                amount REAL,
-                quantity INTEGER,
-                product_description TEXT,
-                file_path TEXT,
-                file_name TEXT,
-                file_size INTEGER,
-                file_type TEXT,
-                extraction_confidence REAL,
-                raw_text TEXT,
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        ''')
-        
-        # Processing summary table
-        cursor.execute('''
-            CREATE TABLE IF NOT EXISTS processing_summary (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                session_date DATE,
-                total_files_processed INTEGER,
-                successful_extractions INTEGER,
-                failed_extractions INTEGER,
-                total_amount_processed REAL,
-                unique_suppliers INTEGER,
-                unique_buyers INTEGER,
-                processing_time_seconds REAL,
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        ''')
-        
-        # File processing log table
-        cursor.execute('''
-            CREATE TABLE IF NOT EXISTS file_processing_log (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                file_name TEXT,
-                file_path TEXT,
-                file_size INTEGER,
-                processing_status TEXT,
-                error_message TEXT,
-                processing_time_seconds REAL,
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        ''')
-        
-        conn.commit()
-        conn.close()
-    
-    def setup_ollama(self):
-        """Setup Ollama for local LLM processing"""
-        try:
-            response = requests.get('http://localhost:11434/api/tags', timeout=5)
-            if response.status_code == 200:
-                models = response.json().get('models', [])
-                model_names = [model['name'] for model in models]
-                
-                if self.model_name not in model_names:
-                    st.warning(f"Model {self.model_name} not found. Available: {model_names}")
-                    st.info(f"Run: `ollama pull {self.model_name}`")
-                    self.use_ai = False
-                else:
-                    self.use_ai = True
-                    st.success(f"Using {self.model_name} for processing")
-            else:
-                st.error("Ollama not responding")
-                self.use_ai = False
-        except Exception as e:
-            st.error(f"Ollama setup error: {e}")
-            st.info("Start Ollama with: `ollama serve`")
-            self.use_ai = False
-    
-    def setup_docling(self):
-        """Initialize Docling document converter"""
-        try:
-            pipeline_options = PdfPipelineOptions()
-            pipeline_options.do_ocr = True
-            pipeline_options.do_table_structure = True
-            pipeline_options.table_structure_options.do_cell_matching = True
-            
-            self.doc_converter = DocumentConverter(
-                format_options={
-                    InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
-                    InputFormat.IMAGE: PdfFormatOption(pipeline_options=pipeline_options),
-                }
-            )
-            
-            self.docling_available = True
-            st.success("Docling initialized successfully")
-            
-        except Exception as e:
-            st.error(f"Docling initialization failed: {e}")
-            self.docling_available = False
-    
-    def setup_json_storage(self):
-        """Initialize JSON storage file with proper structure"""
-        if not os.path.exists(self.json_path):
-            initial_data = {
-                "metadata": {
-                    "created_at": datetime.now().isoformat(),
-                    "last_updated": datetime.now().isoformat(),
-                    "total_invoices": 0,
-                    "version": "1.0",
-                    "vector_store_enabled": True
-                },
-                "invoices": [],
-                "summary": {
-                    "total_amount": 0.0,
-                    "total_quantity": 0,
-                    "unique_suppliers": [],
-                    "unique_buyers": [],
-                    "processing_stats": {
-                        "successful": 0,
-                        "failed": 0,
-                        "total_processed": 0
-                    }
-                }
-            }
-            self.save_json_data(initial_data)
-    
-    # Copy all existing JSON methods from your original class
-    def load_json_data(self) -> dict:
-        """Load invoice data from JSON file"""
-        try:
-            with open(self.json_path, 'r', encoding='utf-8') as f:
-                return json.load(f)
-        except (FileNotFoundError, json.JSONDecodeError):
-            self.setup_json_storage()
-            return self.load_json_data()
-    
-    def save_json_data(self, data: dict):
-        """Save invoice data to JSON file"""
-        try:
-            with open(self.json_path, 'w', encoding='utf-8') as f:
-                json.dump(data, f, indent=2, ensure_ascii=False)
-        except Exception as e:
-            st.error(f"Error saving JSON data: {e}")
-    
-    def update_json_summary(self, data: dict):
-        """Update summary statistics in JSON data"""
-        invoices = data.get("invoices", [])
-        
-        # Calculate totals
-        total_amount = sum(inv.get("amount", 0) for inv in invoices)
-        total_quantity = sum(inv.get("quantity", 0) for inv in invoices)
-        
-        # Get unique suppliers and buyers
-        unique_suppliers = list(set(inv.get("supplier_name", "") for inv in invoices if inv.get("supplier_name")))
-        unique_buyers = list(set(inv.get("buyer_name", "") for inv in invoices if inv.get("buyer_name")))
-        
-        # Update summary
-        data["summary"] = {
-            "total_amount": total_amount,
-            "total_quantity": total_quantity,
-            "unique_suppliers": unique_suppliers,
-            "unique_buyers": unique_buyers,
-            "processing_stats": data.get("summary", {}).get("processing_stats", {
-                "successful": len([inv for inv in invoices if inv.get("invoice_number")]),
-                "failed": 0,
-                "total_processed": len(invoices)
-            })
-        }
-        
-        # Update metadata
-        data["metadata"]["last_updated"] = datetime.now().isoformat()
-        data["metadata"]["total_invoices"] = len(invoices)
-        
-        return data
-    
-    # Copy all existing document processing methods
-    def extract_text_from_document(self, file_path: str) -> str:
-        """Extract text using Docling"""
-        if not self.docling_available:
-            return ""
-        
-        try:
-            result = self.doc_converter.convert(file_path)
-            
-            if not result.document:
-                return ""
-            
-            markdown_text = result.document.export_to_markdown()
-            
-            # Extract tables
-            tables_text = ""
-            if hasattr(result.document, 'tables') and result.document.tables:
-                tables_text = "\n\nTABLES:\n"
-                for i, table in enumerate(result.document.tables):
-                    tables_text += f"\nTable {i+1}:\n"
-                    tables_text += str(table.export_to_markdown())
-            
-            return markdown_text + tables_text
-            
-        except Exception as e:
-            st.error(f"Document extraction failed: {e}")
-            return ""
-    
-    def extract_invoice_info_with_ai(self, text: str, file_path: str) -> InvoiceData:
-        """Use Mistral to extract structured information"""
-        if not self.use_ai:
-            return self.extract_invoice_info_regex(text)
-        
-        try:
-            prompt = f"""
-            Extract invoice information from this document and return ONLY valid JSON:
-
-            {{
-                "supplier_name": "company providing goods/services",
-                "buyer_name": "company receiving goods/services", 
-                "invoice_number": "invoice/bill number",
-                "date": "date in YYYY-MM-DD format",
-                "amount": "total amount as number",
-                "quantity": "total quantity as integer",
-                "product_description": "description of items/services"
-            }}
-
-            Document: {text[:3000]}
-
-            Return only JSON:
-            """
-
-            response = ollama.chat(
-                model=self.model_name,
-                messages=[{'role': 'user', 'content': prompt}],
-                options={'temperature': 0.1, 'top_p': 0.9, 'num_predict': 300}
-            )
-            
-            response_text = response['message']['content'].strip()
-            json_start = response_text.find('{')
-            json_end = response_text.rfind('}') + 1
-            
-            if json_start != -1 and json_end > json_start:
-                json_str = response_text[json_start:json_end]
-                data = json.loads(json_str)
-                
-                invoice_data = InvoiceData()
-                invoice_data.supplier_name = str(data.get('supplier_name', '')).strip()
-                invoice_data.buyer_name = str(data.get('buyer_name', '')).strip()
-                invoice_data.invoice_number = str(data.get('invoice_number', '')).strip()
-                invoice_data.date = self.parse_date(str(data.get('date', '')))
-                
-                # Parse amount
-                try:
-                    amount_val = data.get('amount', 0)
-                    if isinstance(amount_val, str):
-                        amount_clean = re.sub(r'[^\d.]', '', amount_val)
-                        invoice_data.amount = float(amount_clean) if amount_clean else 0.0
-                    else:
-                        invoice_data.amount = float(amount_val)
-                except:
-                    invoice_data.amount = 0.0
-                
-                # Parse quantity
-                try:
-                    qty_val = data.get('quantity', 0)
-                    invoice_data.quantity = int(float(str(qty_val).replace(',', '')))
-                except:
-                    invoice_data.quantity = 0
-                
-                invoice_data.product_description = str(data.get('product_description', '')).strip()
-                invoice_data.extraction_confidence = 0.9
-                invoice_data.file_path = file_path
-                
-                return invoice_data
-            else:
-                raise ValueError("No valid JSON in response")
-                
-        except Exception as e:
-            st.error(f"AI extraction failed: {e}")
-            return self.extract_invoice_info_regex(text)
-    
-    def extract_invoice_info_regex(self, text: str) -> InvoiceData:
-        """Fallback regex extraction"""
-        invoice_data = InvoiceData()
-        
-        patterns = {
-            'invoice_number': [
-                r'invoice\s*#?\s*:?\s*([A-Z0-9\-_]+)',
-                r'bill\s*#?\s*:?\s*([A-Z0-9\-_]+)',
-                r'inv\s*#?\s*:?\s*([A-Z0-9\-_]+)'
-            ],
-            'date': [
-                r'date\s*:?\s*(\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4})',
-                r'(\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4})',
-                r'(\d{4}[/\-]\d{1,2}[/\-]\d{1,2})'
-            ],
-            'amount': [
-                r'total\s*:?\s*\₹?([0-9,]+\.?\d*)',
-                r'amount\s*:?\s*\₹?([0-9,]+\.?\d*)',
-                r'\₹([0-9,]+\.?\d*)'
-            ],
-            'quantity': [
-                r'qty\s*:?\s*(\d+)',
-                r'quantity\s*:?\s*(\d+)',
-                r'(\d+)\s*units?'
-            ]
-        }
-        
-        text_lower = text.lower()
-        
-        # Extract using patterns
-        for pattern in patterns['invoice_number']:
-            match = re.search(pattern, text_lower)
-            if match:
-                invoice_data.invoice_number = match.group(1).upper()
-                break
-        
-        for pattern in patterns['date']:
-            match = re.search(pattern, text)
-            if match:
-                invoice_data.date = self.parse_date(match.group(1))
-                break
-        
-        for pattern in patterns['amount']:
-            match = re.search(pattern, text_lower)
-            if match:
-                try:
-                    amount_str = match.group(1).replace(',', '')
-                    invoice_data.amount = float(amount_str)
-                    break
-                except ValueError:
-                    continue
-        
-        for pattern in patterns['quantity']:
-            match = re.search(pattern, text_lower)
-            if match:
-                try:
-                    invoice_data.quantity = int(match.group(1))
-                    break
-                except ValueError:
-                    continue
-        
-        invoice_data.extraction_confidence = 0.6
-        return invoice_data
-    
-    def parse_date(self, date_str: str) -> str:
-        """Parse date to YYYY-MM-DD format"""
-        if not date_str:
-            return ""
-        
-        formats = ['%Y-%m-%d', '%m/%d/%Y', '%d/%m/%Y', '%m-%d-%Y', '%d-%m-%Y', '%Y/%m/%d']
-        
-        for fmt in formats:
-            try:
-                parsed_date = datetime.strptime(date_str, fmt)
-                return parsed_date.strftime('%Y-%m-%d')
-            except ValueError:
-                continue
-        
-        return date_str
-    
-    # Enhanced data storage with vector integration
-    def save_invoice_data(self, invoice_data: InvoiceData, raw_text: str = "", file_size: int = 0, file_type: str = ""):
-        """Save to database, JSON, and vector store"""
-        # Save to database (existing functionality)
-        conn = sqlite3.connect(self.db_path)
-        cursor = conn.cursor()
-        
-        try:
-            cursor.execute('''
-                INSERT OR REPLACE INTO invoices 
-                (supplier_name, buyer_name, invoice_number, date, amount, 
-                 quantity, product_description, file_path, file_name, file_size, 
-                 file_type, extraction_confidence, raw_text, updated_at)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
-            ''', (
-                invoice_data.supplier_name,
-                invoice_data.buyer_name,
-                invoice_data.invoice_number,
-                invoice_data.date,
-                invoice_data.amount,
-                invoice_data.quantity,
-                invoice_data.product_description,
-                invoice_data.file_path,
-                Path(invoice_data.file_path).name if invoice_data.file_path else "",
-                file_size,
-                file_type,
-                invoice_data.extraction_confidence,
-                raw_text[:5000]  # Store first 5000 chars of raw text
-            ))
-            
-            conn.commit()
-            db_success = True
-            
-        except sqlite3.IntegrityError as e:
-            st.error(f"Database error: {e}")
-            db_success = False
-        finally:
-            conn.close()
-        
-        # Save to JSON file
-        json_success = self.save_invoice_to_json(invoice_data, raw_text, file_size, file_type)
-        
-        return db_success and json_success
-    
-    def save_invoice_to_json(self, invoice_data: InvoiceData, raw_text: str = "", file_size: int = 0, file_type: str = "") -> bool:
-        """Save invoice data to JSON file and vector store"""
-        try:
-            # Load existing data
-            data = self.load_json_data()
-            
-            # Create invoice record
-            invoice_record = {
-                "id": len(data["invoices"]) + 1,
-                "invoice_number": invoice_data.invoice_number,
-                "supplier_name": invoice_data.supplier_name,
-                "buyer_name": invoice_data.buyer_name,
-                "date": invoice_data.date,
-                "amount": invoice_data.amount,
-                "quantity": invoice_data.quantity,
-                "product_description": invoice_data.product_description,
-                "file_info": {
-                    "file_name": Path(invoice_data.file_path).name if invoice_data.file_path else "",
-                    "file_path": invoice_data.file_path,
-                    "file_size": file_size,
-                    "file_type": file_type
-                },
-                "extraction_info": {
-                    "confidence": invoice_data.extraction_confidence,
-                    "extraction_method": "AI" if self.use_ai else "Regex",
-                    "raw_text_preview": raw_text[:500] if raw_text else ""
-                },
-                "timestamps": {
-                    "created_at": datetime.now().isoformat(),
-                    "updated_at": datetime.now().isoformat()
-                }
-            }
-            
-            # Check for duplicates and update if exists
-            existing_index = None
-            for i, inv in enumerate(data["invoices"]):
-                if inv.get("invoice_number") == invoice_data.invoice_number:
-                    existing_index = i
-                    break
-            
-            if existing_index is not None:
-                # Update existing record
-                invoice_record["id"] = data["invoices"][existing_index]["id"]
-                invoice_record["timestamps"]["created_at"] = data["invoices"][existing_index]["timestamps"]["created_at"]
-                data["invoices"][existing_index] = invoice_record
-            else:
-                # Add new record
-                data["invoices"].append(invoice_record)
-            
-            # Update summary statistics
-            data = self.update_json_summary(data)
-            
-            # Save updated data
-            self.save_json_data(data)
-            
-            # Add to vector store
-            vector_success = self.vector_store.add_document(invoice_record, raw_text)
-            if vector_success:
-                self.vector_store.save_vector_store()
-            return True
-        except Exception as e:
-            st.error(f"Error saving to JSON: {e}")
-            return False
-        
-    def process_file(self, file_path: str, file_size: int = 0) -> InvoiceData:
-        """Process single file with enhanced logging"""
-        start_time = datetime.now()
-        file_name = Path(file_path).name
-        file_type = Path(file_path).suffix.lower()
-        
-        try:
-            text = self.extract_text_from_document(file_path)
-            if not text.strip():
-                return InvoiceData()
-            
-            invoice_data = self.extract_invoice_info_with_ai(text, file_path)
-            
-            # Save with additional metadata
-            self.save_invoice_data(invoice_data, text, file_size, file_type)
-            
-            return invoice_data
-            
-        except Exception as e:
-            return InvoiceData()
-
-
-# ===============================================================================
-# ENHANCED CHATBOT WITH SEMANTIC SEARCH
-# ===============================================================================
-
-class EnhancedInvoiceChatBot:
-    """Enhanced chat interface with hybrid search (SQL + Vector)"""
-    
-    def __init__(self, 
-                 db_path: str = "invoices.db", 
-                 json_path: str = "invoices_data.json", 
-                 model_name: str = "mistral:7b",
-                 vector_store: InvoiceVectorStore = None):
-        
-        self.db_path = db_path
-        self.json_path = json_path
-        self.model_name = model_name
-        self.vector_store = vector_store
-        self.setup_ollama()
-    
-    def setup_ollama(self):
-        """Setup Ollama"""
-        try:
-            response = requests.get('http://localhost:11434/api/tags', timeout=5)
-            self.use_ai = response.status_code == 200
-        except:
-            self.use_ai = False
-    
-    def load_json_data(self) -> dict:
-        """Load invoice data from JSON file"""
-        try:
-            with open(self.json_path, 'r', encoding='utf-8') as f:
-                return json.load(f)
-        except (FileNotFoundError, json.JSONDecodeError):
-            return {"invoices": [], "summary": {}, "metadata": {}}
-    
-    def get_invoice_data(self) -> pd.DataFrame:
-        """Get all invoice data from database (for compatibility)"""
-        conn = sqlite3.connect(self.db_path)
-        df = pd.read_sql_query("SELECT * FROM invoices", conn)
-        conn.close()
-        return df
-    
-    def hybrid_search(self, query: str, top_k: int = 5) -> Tuple[List[Dict], List[VectorSearchResult]]:
-        """Perform hybrid search combining SQL and vector search"""
-        # SQL search
-        sql_results = self.sql_search(query)
-        
-        # Vector search
-        vector_results = []
-        if self.vector_store:
-            vector_results = self.vector_store.semantic_search(query, top_k)
-        
-        return sql_results, vector_results
-    
-    def sql_search(self, query: str) -> List[Dict]:
-        """Traditional SQL search based on keywords"""
-        query_lower = query.lower()
-        
-        try:
-            conn = sqlite3.connect(self.db_path)
-            
-            # Determine query type and build SQL
-            if any(phrase in query_lower for phrase in ["amount", "value", "cost", "price"]):
-                # Amount-based queries
-                sql = """
-                    SELECT invoice_number, supplier_name, buyer_name, amount, date, 'amount_search' as search_type
-                    FROM invoices 
-                    WHERE amount IS NOT NULL AND amount > 0
-                    ORDER BY amount DESC
-                    LIMIT 10
-                """
-            elif any(phrase in query_lower for phrase in ["supplier", "vendor", "company"]):
-                # Supplier-based queries
-                sql = """
-                    SELECT invoice_number, supplier_name, buyer_name, amount, date, 'supplier_search' as search_type
-                    FROM invoices 
-                    WHERE supplier_name IS NOT NULL AND supplier_name != ''
-                    ORDER BY supplier_name
-                    LIMIT 10
-                """
-            elif any(phrase in query_lower for phrase in ["recent", "latest", "new"]):
-                # Recent invoices
-                sql = """
-                    SELECT invoice_number, supplier_name, buyer_name, amount, date, 'recent_search' as search_type
-                    FROM invoices 
-                    ORDER BY created_at DESC
-                    LIMIT 10
-                """
-            else:
-                # General search
-                sql = """
-                    SELECT invoice_number, supplier_name, buyer_name, amount, date, 'general_search' as search_type
-                    FROM invoices 
-                    ORDER BY created_at DESC
-                    LIMIT 10
-                """
-            
-            df = pd.read_sql_query(sql, conn)
-            conn.close()
-            
-            return df.to_dict('records')
-            
-        except Exception as e:
-            st.error(f"SQL search error: {e}")
-            return []
-    
-    def query_database(self, query: str) -> str:
-        """Enhanced query processing with hybrid search"""
-        json_data = self.load_json_data()
-        
-        if not json_data.get("invoices"):
-            return "No invoice data found. Please process some invoices first."
-        
-        query_lower = query.lower()
-        
-        try:
-            # Handle basic queries with JSON data (fast responses)
-            if any(phrase in query_lower for phrase in ["summary", "overview", "report", "all invoices"]):
-                return self.generate_json_summary(json_data)
-            
-            elif "how many" in query_lower and "invoice" in query_lower:
-                return self.handle_invoice_count_query(json_data)
-            
-            elif "total amount" in query_lower or "total value" in query_lower:
-                return self.handle_amount_query(json_data)
-            
-            # For complex queries, use hybrid search
-            else:
-                return self.hybrid_search_response(query, json_data)
-                
-        except Exception as e:
-            return f"Error processing query: {e}"
-    
-    def hybrid_search_response(self, query: str, json_data: dict) -> str:
-        """Generate response using hybrid search results"""
-        # Perform hybrid search
-        sql_results, vector_results = self.hybrid_search(query)
-        
-        response_parts = []
-        
-        # Add vector search results if available
-        if vector_results:
-            response_parts.append("🔍 **Semantic Search Results:**")
-            for i, result in enumerate(vector_results[:3], 1):
-                response_parts.append(f"{i}. **{result.invoice_number}** - {result.supplier_name}")
-                response_parts.append(f"   Similarity: {result.similarity_score:.3f}")
-                response_parts.append(f"   Preview: {result.content_preview[:100]}...")
-                response_parts.append("")
-        
-        # Add SQL search results
-        if sql_results:
-            response_parts.append("📊 **Database Search Results:**")
-            for i, result in enumerate(sql_results[:3], 1):
-                invoice_num = result.get('invoice_number', 'N/A')
-                supplier = result.get('supplier_name', 'N/A')
-                amount = result.get('amount', 0)
-                date = result.get('date', 'N/A')
-                
-                response_parts.append(f"{i}. **{invoice_num}** - {supplier}")
-                response_parts.append(f"   Amount: ₹{amount:.2f} | Date: {date}")
-                response_parts.append("")
-        
-        # Use AI for intelligent synthesis if available
-        if self.use_ai and (vector_results or sql_results):
-            ai_summary = self.ai_synthesize_results(query, vector_results, sql_results, json_data)
-            if ai_summary:
-                response_parts.insert(0, "🤖 **AI Analysis:**")
-                response_parts.insert(1, ai_summary)
-                response_parts.insert(2, "")
-        
-        if not response_parts:
-            return "No relevant results found for your query. Try rephrasing or being more specific."
-        
-        return "\n".join(response_parts)
-    
-    def ai_synthesize_results(self, query: str, vector_results: List[VectorSearchResult], 
-                            sql_results: List[Dict], json_data: dict) -> str:
-        """Use AI to synthesize search results into intelligent response"""
-        if not self.use_ai:
-            return ""
-        
-        try:
-            # Prepare context for AI
-            context_parts = []
-            
-            # Add vector search context
-            if vector_results:
-                context_parts.append("SEMANTIC SEARCH RESULTS:")
-                for result in vector_results[:3]:
-                    context_parts.append(f"- Invoice {result.invoice_number}: {result.supplier_name} (Similarity: {result.similarity_score:.3f})")
-            
-            # Add SQL search context
-            if sql_results:
-                context_parts.append("\nDATABASE SEARCH RESULTS:")
-                for result in sql_results[:3]:
-                    context_parts.append(f"- Invoice {result.get('invoice_number', 'N/A')}: {result.get('supplier_name', 'N/A')} (₹{result.get('amount', 0):.2f})")
-            
-            # Add summary statistics
-            summary = json_data.get("summary", {})
-            context_parts.append(f"\nDATABASE SUMMARY:")
-            context_parts.append(f"- Total invoices: {len(json_data.get('invoices', []))}")
-            context_parts.append(f"- Total amount: ₹{summary.get('total_amount', 0):,.2f}")
-            context_parts.append(f"- Unique suppliers: {len(summary.get('unique_suppliers', []))}")
-            
-            context = "\n".join(context_parts)
-            
-            prompt = f"""
-You are an AI assistant analyzing invoice data. Based on the search results and database summary, provide a helpful and insightful answer to the user's question.
-
-USER QUESTION: {query}
-
-SEARCH RESULTS AND CONTEXT:
-{context}
-
-Provide a concise, informative response that:
-1. Directly answers the user's question
-2. Highlights the most relevant findings
-3. Provides useful insights from the data
-4. Uses clear formatting with markdown
-
-Response:
-"""
-            
-            response = ollama.chat(
-                model=self.model_name,
-                messages=[{'role': 'user', 'content': prompt}],
-                options={'temperature': 0.3, 'num_predict': 400}
-            )
-            
-            return response['message']['content']
-            
-        except Exception as e:
-            st.error(f"AI synthesis error: {e}")
-            return ""
-    
-    # Copy existing helper methods from original chatbot
-    def handle_invoice_count_query(self, json_data: dict) -> str:
-        """Handle invoice counting queries"""
-        total_invoices = len(json_data["invoices"])
-        unique_invoices = len(set(inv.get("invoice_number", "") for inv in json_data["invoices"] if inv.get("invoice_number")))
-        
-        result = f"**Invoice Numbers Summary:**\n"
-        result += f"• Total unique invoice numbers: **{unique_invoices}**\n"
-        result += f"• Total records in database: **{total_invoices}**\n\n"
-        
-        if unique_invoices < total_invoices:
-            result += f"⚠️ Note: {total_invoices - unique_invoices} duplicate records found\n\n"
-        
-        result += "**Recent Invoice Numbers:**\n"
-        recent_invoices = sorted(json_data["invoices"], key=lambda x: x.get("timestamps", {}).get("created_at", ""), reverse=True)[:10]
-        for i, inv in enumerate(recent_invoices, 1):
-            if inv.get("invoice_number"):
-                result += f"{i}. {inv['invoice_number']}\n"
-        
-        return result
-    
-    def handle_amount_query(self, json_data: dict) -> str:
-        """Handle amount-related queries"""
-        total_amount = json_data.get("summary", {}).get("total_amount", 0)
-        avg_amount = total_amount / len(json_data["invoices"]) if json_data["invoices"] else 0
-        return f"**Financial Summary:**\n• Total amount: **₹{total_amount:,.2f}**\n• Average per invoice: **₹{avg_amount:,.2f}**"
-    
-    def generate_json_summary(self, json_data: dict) -> str:
-        """Generate comprehensive summary using JSON data"""
-        try:
-            metadata = json_data.get("metadata", {})
-            summary = json_data.get("summary", {})
-            invoices = json_data.get("invoices", [])
-            
-            # Build comprehensive summary
-            report = "# 📊 **COMPREHENSIVE INVOICE SUMMARY REPORT**\n\n"
-            
-            report += "## 🔢 **Overall Statistics**\n"
-            report += f"• **Total Records**: {len(invoices):,}\n"
-            report += f"• **Unique Invoice Numbers**: {len(set(inv.get('invoice_number', '') for inv in invoices if inv.get('invoice_number'))):,}\n"
-            report += f"• **Unique Suppliers**: {len(summary.get('unique_suppliers', [])):,}\n"
-            report += f"• **Total Invoice Value**: ₹{summary.get('total_amount', 0):,.2f}\n"
-            report += f"• **Average Invoice Amount**: ₹{(summary.get('total_amount', 0) / len(invoices) if invoices else 0):,.2f}\n"
-            report += f"• **Total Quantity**: {summary.get('total_quantity', 0):,}\n\n"
-            
-            # Vector store statistics if available
-            if self.vector_store:
-                vector_stats = self.vector_store.get_stats()
-                report += "## 🔍 **Semantic Search Capabilities**\n"
-                report += f"• **Vector Store**: {vector_stats['total_documents']} documents indexed\n"
-                report += f"• **Embedding Model**: {vector_stats['model_name']}\n"
-                report += f"• **Embedding Dimension**: {vector_stats['embedding_dimension']}\n\n"
-            
-            report += "## 📅 **Processing Timeline**\n"
-            if metadata.get("created_at"):
-                report += f"• **First Processing Session**: {metadata['created_at'][:19]}\n"
-                report += f"• **Last Updated**: {metadata['last_updated'][:19]}\n\n"
-            
-            # Top suppliers analysis
-            if summary.get("unique_suppliers"):
-                supplier_counts = {}
-                supplier_amounts = {}
-                for inv in invoices:
-                    supplier = inv.get("supplier_name", "")
-                    if supplier:
-                        supplier_counts[supplier] = supplier_counts.get(supplier, 0) + 1
-                        supplier_amounts[supplier] = supplier_amounts.get(supplier, 0) + inv.get("amount", 0)
-                
-                top_suppliers = sorted(supplier_counts.items(), key=lambda x: x[1], reverse=True)[:5]
-                report += "## 🏢 **Top 5 Suppliers by Volume**\n"
-                for supplier, count in top_suppliers:
-                    amount = supplier_amounts.get(supplier, 0)
-                    report += f"• **{supplier}**: {count} invoices (₹{amount:,.2f})\n"
-                report += "\n"
-            
-            # Processing statistics
-            processing_stats = summary.get("processing_stats", {})
-            if processing_stats:
-                report += "## ⚙️ **Processing Statistics**\n"
-                report += f"• ✅ **Successful**: {processing_stats.get('successful', 0)} files\n"
-                report += f"• ❌ **Failed**: {processing_stats.get('failed', 0)} files\n"
-                report += f"• 📄 **Total Processed**: {processing_stats.get('total_processed', 0)} files\n"
-            
-            return report
-            
-        except Exception as e:
-            return f"Error generating summary: {e}"
-
-
-# ===============================================================================
-# ENHANCED VISUALIZATION FUNCTIONS
-# ===============================================================================
-
-def create_enhanced_visualizations(df: pd.DataFrame, vector_store: InvoiceVectorStore = None):
-    """Create enhanced dashboard visualizations with vector store insights"""
-    if df.empty:
-        st.info("No data available for visualization")
-        return
-    
-    # Original visualizations
-    col1, col2 = st.columns(2)
-    
-    with col1:
-        # Suppliers chart
-        if not df['supplier_name'].isna().all():
-            supplier_counts = df['supplier_name'].value_counts().head(10)
-            fig_suppliers = px.bar(
-                x=supplier_counts.values,
-                y=supplier_counts.index,
-                orientation='h',
-                title="Top Suppliers by Delivery Count",
-                labels={'x': 'Number of Deliveries', 'y': 'Supplier'}
-            )
-            fig_suppliers.update_layout(height=400)
-            st.plotly_chart(fig_suppliers, use_container_width=True)
-    
-    with col2:
-        # Amount by supplier
-        if not df['supplier_name'].isna().all() and not df['amount'].isna().all():
-            supplier_amounts = df.groupby('supplier_name')['amount'].sum().sort_values(ascending=False).head(10)
-            fig_amounts = px.bar(
-                x=supplier_amounts.values,
-                y=supplier_amounts.index,
-                orientation='h',
-                title="Top Suppliers by Total Amount",
-                labels={'x': 'Total Amount (₹)', 'y': 'Supplier'}
-            )
-            fig_amounts.update_layout(height=400)
-            st.plotly_chart(fig_amounts, use_container_width=True)
-    
-    # Vector store insights
-    if vector_store:
-        st.subheader("🔍 Semantic Search Analytics")
-        
-        col1, col2 = st.columns(2)
-        
-        with col1:
-            # Vector store statistics
-            vector_stats = vector_store.get_stats()
-            
-            st.metric("Documents in Vector Store", vector_stats['total_documents'])
-            st.metric("Embedding Dimension", vector_stats['embedding_dimension'])
-            st.write(f"**Model**: {vector_stats['model_name']}")
-        
-        with col2:
-            # Test semantic search
-            st.write("**Test Semantic Search:**")
-            test_query = st.text_input("Enter search query:", placeholder="high value invoices")
-            
-            if test_query and st.button("🔍 Search"):
-                results = vector_store.semantic_search(test_query, top_k=3)
-                
-                if results:
-                    for i, result in enumerate(results, 1):
-                        with st.expander(f"Result {i}: {result.invoice_number} (Score: {result.similarity_score:.3f})"):
-                            st.write(f"**Supplier**: {result.supplier_name}")
-                            st.write(f"**Content**: {result.content_preview}")
-                else:
-                    st.info("No results found")
-
-
-# ===============================================================================
-# ENHANCED STREAMLIT APPLICATION
-# ===============================================================================
-
-def enhanced_main():
-    """Enhanced main Streamlit application with vector capabilities"""
-    
-    # Header
-    st.title("📄 Enhanced Invoice Processing & Analysis System")
-    st.markdown("Upload invoices, extract data with AI, and perform semantic search on your data!")
-    
-    # Initialize session state
-    if 'enhanced_processor' not in st.session_state:
-        st.session_state.enhanced_processor = EnhancedInvoiceProcessor()
-    
-    if 'enhanced_chatbot' not in st.session_state:
-        st.session_state.enhanced_chatbot = EnhancedInvoiceChatBot(
-            vector_store=st.session_state.enhanced_processor.vector_store
-        )
-    
-    if 'chat_history' not in st.session_state:
-        st.session_state.chat_history = []
-    
-    # -------------------------------------------------------------------------
-    # ENHANCED SIDEBAR
-    # -------------------------------------------------------------------------
-    
-    with st.sidebar:
-        st.header("🛠️ Enhanced System Status")
-        
-        # Original system checks
-        if hasattr(st.session_state.enhanced_processor, 'docling_available') and st.session_state.enhanced_processor.docling_available:
-            st.success("✅ Docling Ready")
-        else:
-            st.error("❌ Docling Not Available")
-        
-        if hasattr(st.session_state.enhanced_processor, 'use_ai') and st.session_state.enhanced_processor.use_ai:
-            st.success("✅ AI Processing Ready")
-        else:
-            st.warning("⚠️ Using Regex Fallback")
-        
-        # Vector store status
-        vector_store = st.session_state.enhanced_processor.vector_store
-        if vector_store and vector_store.embedding_model:
-            st.success("✅ Vector Search Ready")
-            vector_stats = vector_store.get_stats()
-            st.metric("Indexed Documents", vector_stats['total_documents'])
-        else:
-            st.error("❌ Vector Search Not Available")
-        
-        # JSON file status
-        json_path = st.session_state.enhanced_processor.json_path
-        if os.path.exists(json_path):
-            file_size = os.path.getsize(json_path) / 1024  # KB
-            st.success(f"✅ JSON Data Ready ({file_size:.1f} KB)")
-        else:
-            st.info("📄 JSON file will be created")
-        
-        st.header("📊 Enhanced Quick Stats")
-        
-        # Load stats from JSON
-        try:
-            json_data = st.session_state.enhanced_chatbot.load_json_data()
-            total_invoices = len(json_data.get("invoices", []))
-            total_amount = json_data.get("summary", {}).get("total_amount", 0)
-            unique_suppliers = len(json_data.get("summary", {}).get("unique_suppliers", []))
-            
-            if total_invoices > 0:
-                st.metric("Total Invoices", total_invoices)
-                st.metric("Total Amount", f"₹{total_amount:,.2f}")
-                st.metric("Unique Suppliers", unique_suppliers)
-            else:
-                st.info("No data yet - upload some invoices!")
-        except Exception as e:
-            st.error(f"Error loading stats: {e}")
-        
-        # Vector store management
-        st.header("🔍 Vector Store Management")
-        
-        if st.button("🔄 Rebuild Vector Store", use_container_width=True, key="rebuild_vector_sidebar"):
-            try:
-                json_data = st.session_state.enhanced_chatbot.load_json_data()
-                with st.spinner("Rebuilding vector store..."):
-                    success = vector_store.rebuild_vector_store(json_data)
-                    if success:
-                        st.success("✅ Vector store rebuilt!")
-                        st.rerun()
-            except Exception as e:
-                st.error(f"Error rebuilding vector store: {e}")
-        
-        if st.button("💾 Save Vector Store", use_container_width=True, key="save_vector_store_sidebar"):
-            if vector_store.save_vector_store():
-                st.success("✅ Vector store saved!")
-            else:
-                st.error("❌ Failed to save vector store")
-    
-    # -------------------------------------------------------------------------
-    # ENHANCED MAIN TABS
-    # -------------------------------------------------------------------------
-    
-    tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
-        "📤 Upload & Process", 
-        "💬 Enhanced Chat", 
-        "📊 Enhanced Dashboard", 
-        "📋 Data View", 
-        "🔧 Vector Manager",
-        "🔍 Semantic Search"
-    ])
-    
-    # -------------------------------------------------------------------------
-    # TAB 1: UPLOAD & PROCESS (Enhanced)
-    # -------------------------------------------------------------------------
-    
-    with tab1:
-        st.header("Upload Invoice Documents")
-        st.info("🚀 Now with automatic vector indexing for semantic search!")
-        
-        uploaded_files = st.file_uploader(
-            "Drop invoice files here",
-            type=['pdf', 'jpg', 'jpeg', 'png', 'docx', 'txt'],
-            accept_multiple_files=True,
-            help="Supported formats: PDF, Images (JPG, PNG), Word documents, Text files"
-        )
-        
-        if uploaded_files:
-            if st.button("🚀 Process All Files with Vector Indexing", type="primary", key="process_all_files"):
-                progress_bar = st.progress(0)
-                status_text = st.empty()
-                results_container = st.container()
-                
-                processed_count = 0
-                total_files = len(uploaded_files)
-                
-                for i, uploaded_file in enumerate(uploaded_files):
-                    # Save uploaded file temporarily
-                    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
-                        file_content = uploaded_file.getvalue()
-                        tmp_file.write(file_content)
-                        tmp_file_path = tmp_file.name
-                        file_size = len(file_content)
-                    
-                    try:
-                        status_text.text(f"Processing: {uploaded_file.name}")
-                        
-                        # Process the file
-                        invoice_data = st.session_state.enhanced_processor.process_file(tmp_file_path, file_size)
-                        
-                        if invoice_data.invoice_number:
-                            processed_count += 1
-                            
-                            with results_container:
-                                with st.expander(f"✅ {uploaded_file.name}", expanded=False):
-                                    col1, col2 = st.columns(2)
-                                    with col1:
-                                        st.write(f"**Invoice #:** {invoice_data.invoice_number}")
-                                        st.write(f"**Supplier:** {invoice_data.supplier_name}")
-                                        st.write(f"**Buyer:** {invoice_data.buyer_name}")
-                                    with col2:
-                                        st.write(f"**Date:** {invoice_data.date}")
-                                        st.write(f"**Amount:** ₹{invoice_data.amount:.2f}")
-                                        st.write(f"**Quantity:** {invoice_data.quantity}")
-                                    st.write(f"**Description:** {invoice_data.product_description}")
-                                    st.write(f"**Confidence:** {invoice_data.extraction_confidence:.1%}")
-                                    st.success("🔍 Added to vector store for semantic search")
-                        else:
-                            with results_container:
-                                st.warning(f"⚠️ Limited data extracted from {uploaded_file.name}")
-                    
-                    except Exception as e:
-                        with results_container:
-                            st.error(f"❌ Error processing {uploaded_file.name}: {e}")
-                    
-                    finally:
-                        # Clean up temp file
-                        os.unlink(tmp_file_path)
-                    
-                    # Update progress
-                    progress_bar.progress((i + 1) / total_files)
-                
-                status_text.text(f"✅ Processing complete! Successfully processed {processed_count}/{total_files} files")
-                
-                # Show vector store update confirmation
-                if processed_count > 0:
-                    st.success(f"📄 JSON data and vector store updated with {processed_count} new invoices!")
-                    # Save vector store
-                    vector_store.save_vector_store()
-    
-    # -------------------------------------------------------------------------
-    # TAB 2: ENHANCED CHAT
-    # -------------------------------------------------------------------------
-    
-    with tab2:
-        st.header("💬 Enhanced Chat with Semantic Search")
-        st.info("💡 Now powered by hybrid search: SQL database + Vector similarity!")
-        
-        # Chat input
-        user_query = st.chat_input("Ask about your invoices using natural language...")
-        
-        if user_query:
-            # Add user message to history
-            st.session_state.chat_history.append({"role": "user", "content": user_query})
-            
-            # Get enhanced bot response
-            with st.spinner("Analyzing with AI and semantic search..."):
-                bot_response = st.session_state.enhanced_chatbot.query_database(user_query)
-            
-            # Add bot response to history
-            st.session_state.chat_history.append({"role": "assistant", "content": bot_response})
-        
-        # Display chat history
-        for message in st.session_state.chat_history:
-            with st.chat_message(message["role"]):
-                st.markdown(message["content"])
-        
-        # Enhanced suggested queries
-        if not st.session_state.chat_history:
-            st.subheader("💡 Try these enhanced AI questions:")
-            
-            col1, col2, col3 = st.columns(3)
-            
-            with col1:
-                st.markdown("**📊 Basic Queries:**")
-                basic_queries = [
-                    "Get comprehensive summary",
-                    "How many invoices do we have?",
-                    "What's the total amount?",
-                    "List all suppliers"
-                ]
-                for i, query in enumerate(basic_queries):
-                    if st.button(query, key=f"basic_query_{i}"):
-                        st.session_state.chat_history.append({"role": "user", "content": query})
-                        bot_response = st.session_state.enhanced_chatbot.query_database(query)
-                        st.session_state.chat_history.append({"role": "assistant", "content": bot_response})
-                        st.rerun()
-            
-            with col2:
-                st.markdown("**🔍 Semantic Queries:**")
-                semantic_queries = [
-                    "Find high value transactions",
-                    "Show me technology related invoices",
-                    "Find invoices with office supplies",
-                    "Search for consulting services"
-                ]
-                for i, query in enumerate(semantic_queries):
-                    if st.button(query, key=f"semantic_query_{i}"):
-                        st.session_state.chat_history.append({"role": "user", "content": query})
-                        bot_response = st.session_state.enhanced_chatbot.query_database(query)
-                        st.session_state.chat_history.append({"role": "assistant", "content": bot_response})
-                        st.rerun()
-            
-            with col3:
-                st.markdown("**🤖 AI Analysis:**")
-                ai_queries = [
-                    "Analyze spending patterns",
-                    "Identify potential cost savings",
-                    "Compare supplier performance",
-                    "Find unusual invoice patterns"]
-                for i, query in enumerate(ai_queries):
-                    if st.button(query, key=f"ai_query_{i}"):
-                        st.session_state.chat_history.append({"role": "user", "content": query})
-                        bot_response = st.session_state.enhanced_chatbot.query_database(query)
-                        st.session_state.chat_history.append({"role": "assistant", "content": bot_response})
-                        st.rerun()
-    
-    # -------------------------------------------------------------------------
-    # TAB 3: ENHANCED DASHBOARD
-    # -------------------------------------------------------------------------
-    
-    with tab3:
-        st.header("📊 Enhanced Analytics Dashboard")
-        
-        # Load data from JSON for faster processing
-        json_data = st.session_state.enhanced_chatbot.load_json_data()
-        invoices = json_data.get("invoices", [])
-        
-        if invoices:
-            # Convert JSON data to DataFrame for visualizations
-            df_data = []
-            for inv in invoices:
-                df_data.append({
-                    'invoice_number': inv.get('invoice_number', ''),
-                    'supplier_name': inv.get('supplier_name', ''),
-                    'buyer_name': inv.get('buyer_name', ''),
-                    'amount': inv.get('amount', 0),
-                    'quantity': inv.get('quantity', 0),
-                    'date': inv.get('date', ''),
-                    'extraction_confidence': inv.get('extraction_info', {}).get('confidence', 0),
-                    'created_at': inv.get('timestamps', {}).get('created_at', '')
-                })
-            
-            df = pd.DataFrame(df_data)
-            
-            # Enhanced key metrics row
-            col1, col2, col3, col4, col5 = st.columns(5)
-            
-            with col1:
-                total_invoices = len(df)
-                unique_invoices = df['invoice_number'].nunique()
-                st.metric("Total Records", total_invoices, help="Total number of processed invoices")
-                st.metric("Unique Invoice Numbers", unique_invoices)
-            
-            with col2:
-                total_amount = df['amount'].sum()
-                avg_amount = df['amount'].mean()
-                st.metric("Total Amount", f"₹{total_amount:,.2f}")
-                st.metric("Average Amount", f"₹{avg_amount:,.2f}")
-            
-            with col3:
-                unique_suppliers = df['supplier_name'].nunique()
-                unique_buyers = df['buyer_name'].nunique()
-                st.metric("Unique Suppliers", unique_suppliers)
-                st.metric("Unique Buyers", unique_buyers)
-            
-            with col4:
-                total_quantity = df['quantity'].sum()
-                avg_confidence = df['extraction_confidence'].mean()
-                st.metric("Total Quantity", f"{total_quantity:,}")
-                st.metric("Avg Confidence", f"{avg_confidence:.1%}")
-            
-            with col5:
-                # Vector store metrics
-                vector_store = st.session_state.enhanced_processor.vector_store
-                if vector_store:
-                    vector_stats = vector_store.get_stats()
-                    st.metric("Vector Documents", vector_stats['total_documents'])
-                    st.metric("Embedding Dim", vector_stats['embedding_dimension'])
-                else:
-                    st.metric("Vector Store", "Not Available")
-                    st.metric("Semantic Search", "Disabled")
-            
-            # Create enhanced visualizations
-            create_enhanced_visualizations(df, vector_store)
-            
-        else:
-            st.info("No data available for visualization. Upload and process some invoices first!")
-    
-    # -------------------------------------------------------------------------
-    # TAB 4: DATA VIEW (Enhanced)
-    # -------------------------------------------------------------------------
-    
-    with tab4:
-        st.header("📋 Enhanced Invoice Data View")
-        
-        # Load from JSON for enhanced data view
-        json_data = st.session_state.enhanced_chatbot.load_json_data()
-        invoices = json_data.get("invoices", [])
-        
-        if invoices:
-            # Convert to DataFrame
-            df_data = []
-            for inv in invoices:
-                df_data.append({
-                    'ID': inv.get('id', ''),
-                    'Invoice Number': inv.get('invoice_number', ''),
-                    'Supplier': inv.get('supplier_name', ''),
-                    'Buyer': inv.get('buyer_name', ''),
-                    'Date': inv.get('date', ''),
-                    'Amount': inv.get('amount', 0),
-                    'Quantity': inv.get('quantity', 0),
-                    'Description': inv.get('product_description', ''),
-                    'Confidence': inv.get('extraction_info', {}).get('confidence', 0),
-                    'Method': inv.get('extraction_info', {}).get('extraction_method', ''),
-                    'File Type': inv.get('file_info', {}).get('file_type', ''),
-                    'Vector Indexed': 'Yes' if vector_store and any(meta.get('invoice_number') == inv.get('invoice_number') for meta in vector_store.document_metadata) else 'No',
-                    'Created': inv.get('timestamps', {}).get('created_at', '')[:19] if inv.get('timestamps', {}).get('created_at') else ''
-                })
-            
-            df = pd.DataFrame(df_data)
-            
-            # Enhanced filters
-            col1, col2, col3, col4, col5 = st.columns(5)
-            
-            with col1:
-                suppliers = ['All'] + list(df['Supplier'].dropna().unique())
-                selected_supplier = st.selectbox("Filter by Supplier", suppliers)
-            
-            with col2:
-                buyers = ['All'] + list(df['Buyer'].dropna().unique())
-                selected_buyer = st.selectbox("Filter by Buyer", buyers)
-            
-            with col3:
-                methods = ['All'] + list(df['Method'].dropna().unique())
-                selected_method = st.selectbox("Filter by Extraction Method", methods)
-            
-            with col4:
-                confidence_filter = st.selectbox("Confidence Filter", 
-                    ["All", "High (>80%)", "Medium (50-80%)", "Low (<50%)"])
-            
-            with col5:
-                vector_filter = st.selectbox("Vector Indexed", ["All", "Yes", "No"])
-            
-            # Apply filters
-            filtered_df = df.copy()
-            if selected_supplier != 'All':
-                filtered_df = filtered_df[filtered_df['Supplier'] == selected_supplier]
-            if selected_buyer != 'All':
-                filtered_df = filtered_df[filtered_df['Buyer'] == selected_buyer]
-            if selected_method != 'All':
-                filtered_df = filtered_df[filtered_df['Method'] == selected_method]
-            if vector_filter != 'All':
-                filtered_df = filtered_df[filtered_df['Vector Indexed'] == vector_filter]
-            
-            if confidence_filter == "High (>80%)":
-                filtered_df = filtered_df[filtered_df['Confidence'] > 0.8]
-            elif confidence_filter == "Medium (50-80%)":
-                filtered_df = filtered_df[(filtered_df['Confidence'] >= 0.5) & (filtered_df['Confidence'] <= 0.8)]
-            elif confidence_filter == "Low (<50%)":
-                filtered_df = filtered_df[filtered_df['Confidence'] < 0.5]
-            
-            # Display summary
-            if len(filtered_df) != len(df):
-                st.info(f"Showing {len(filtered_df)} of {len(df)} records")
-            
-            # Display data with enhanced columns
-            st.dataframe(
-                filtered_df,
-                use_container_width=True,
-                column_config={
-                    "Amount": st.column_config.NumberColumn("Amount", format="₹%.2f"),
-                    "Confidence": st.column_config.ProgressColumn("Confidence", min_value=0, max_value=1),
-                    "Vector Indexed": st.column_config.SelectboxColumn("Vector Indexed", options=["Yes", "No"]),
-                }
-            )
-            
-            # Enhanced export options
-            col1, col2, col3, col4 = st.columns(4)
-            
-            with col1:
-                csv = filtered_df.to_csv(index=False)
-                st.download_button(
-                    label="📥 Download CSV",
-                    data=csv,
-                    file_name=f"filtered_invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
-                    mime="text/csv"
-                )
-            
-            with col2:
-                # Export filtered JSON data
-                filtered_invoices = [inv for inv in invoices if inv.get('id') in filtered_df['ID'].values]
-                filtered_json = {
-                    "metadata": json_data.get("metadata", {}),
-                    "invoices": filtered_invoices,
-                    "filter_applied": {
-                        "supplier": selected_supplier if selected_supplier != 'All' else None,
-                        "buyer": selected_buyer if selected_buyer != 'All' else None,
-                        "method": selected_method if selected_method != 'All' else None,
-                        "confidence": confidence_filter if confidence_filter != 'All' else None,
-                        "vector_indexed": vector_filter if vector_filter != 'All' else None
-                    }
-                }
-                
-                st.download_button(
-                    label="📄 Download JSON",
-                    data=json.dumps(filtered_json, indent=2, ensure_ascii=False),
-                    file_name=f"filtered_invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
-                    mime="application/json"
-                )
-            
-            with col3:
-                # Export vector embeddings metadata
-                if vector_store and vector_store.document_metadata:
-                    vector_metadata = []
-                    for meta in vector_store.document_metadata:
-                        if any(inv.get('invoice_number') == meta.get('invoice_number') for inv in filtered_invoices):
-                            vector_metadata.append(meta)
-                    
-                    st.download_button(
-                        label="🔍 Download Vector Metadata",
-                        data=json.dumps(vector_metadata, indent=2, ensure_ascii=False),
-                        file_name=f"vector_metadata_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
-                        mime="application/json"
-                    )
-            
-            with col4:
-                if st.button("🗑️ Clear All Data", type="secondary"):
-                    if st.button("⚠️ Confirm Delete", type="secondary"):
-                        # Clear database
-                        conn = sqlite3.connect(st.session_state.enhanced_processor.db_path)
-                        cursor = conn.cursor()
-                        cursor.execute("DELETE FROM invoices")
-                        cursor.execute("DELETE FROM file_processing_log")
-                        cursor.execute("DELETE FROM processing_summary")
-                        conn.commit()
-                        conn.close()
-                        
-                        # Reset JSON file
-                        st.session_state.enhanced_processor.setup_json_storage()
-                        
-                        # Clear vector store
-                        if vector_store:
-                            vector_store.vector_store = faiss.IndexFlatIP(vector_store.embedding_dimension)
-                            vector_store.document_metadata = []
-                            vector_store.save_vector_store()
-                        
-                        st.success("All data cleared!")
-                        st.rerun()
-        else:
-            st.info("No invoice data available. Upload and process some invoices first!")
-    
-    # -------------------------------------------------------------------------
-    # TAB 5: VECTOR MANAGER
-    # -------------------------------------------------------------------------
-    
-    with tab5:
-        st.header("🔍 Vector Store Manager")
-        st.info("Manage your semantic search capabilities and vector embeddings")
-        
-        vector_store = st.session_state.enhanced_processor.vector_store
-        
-        if vector_store:
-            # Vector store information
-            col1, col2 = st.columns(2)
-            
-            with col1:
-                st.subheader("📊 Vector Store Info")
-                vector_stats = vector_store.get_stats()
-                st.write(f"**Total Documents:** {vector_stats['total_documents']}")
-                st.write(f"**Embedding Model:** {vector_stats['model_name']}")
-                st.write(f"**Embedding Dimension:** {vector_stats['embedding_dimension']}")
-                st.write(f"**Vector Store Size:** {vector_stats['vector_store_size']}")
-                
-                # File status
-                if os.path.exists(vector_store.vector_store_path):
-                    file_size = os.path.getsize(vector_store.vector_store_path) / 1024  # KB
-                    st.write(f"**Vector File Size:** {file_size:.2f} KB")
-                else:
-                    st.write("**Vector File:** Not saved yet")
-            
-            with col2:
-                st.subheader("🛠️ Management Tools")
-                
-                if st.button("🔄 Rebuild Vector Store", use_container_width=True, key="rebuild_vector_admin"):
-                    json_data = st.session_state.enhanced_chatbot.load_json_data()
-                    with st.spinner("Rebuilding vector store..."):
-                        success = vector_store.rebuild_vector_store(json_data)
-                        if success:
-                            st.success("✅ Vector store rebuilt successfully!")
-                            st.rerun()
-                
-                if st.button("💾 Save Vector Store", use_container_width=True, key="save_vector_sidebar"):
-                    if vector_store.save_vector_store():
-                        st.success("✅ Vector store saved to disk!")
-                    else:
-                        st.error("❌ Failed to save vector store")
-                
-                if st.button("📊 Validate Vector Store", use_container_width=True):
-                    try:
-                        # Validation checks
-                        issues = []
-                        
-                        if not vector_store.embedding_model:
-                            issues.append("❌ Embedding model not loaded")
-                        
-                        if not vector_store.vector_store:
-                            issues.append("❌ FAISS index not initialized")
-                        
-                        if len(vector_store.document_metadata) == 0:
-                            issues.append("⚠️ No documents in vector store")
-                        
-                        if vector_store.vector_store and vector_store.vector_store.ntotal != len(vector_store.document_metadata):
-                            issues.append("⚠️ Mismatch between vectors and metadata")
-                        
-                        if not issues:
-                            st.success("✅ Vector store validation passed!")
-                        else:
-                            for issue in issues:
-                                st.warning(issue)
-                    
-                    except Exception as e:
-                        st.error(f"Validation error: {e}")
-            
-            # Document metadata viewer
-            st.subheader("📋 Document Metadata")
-            
-            if vector_store.document_metadata:
-                # Create DataFrame from metadata
-                metadata_df = pd.DataFrame(vector_store.document_metadata)
-                
-                # Display metadata
-                st.dataframe(
-                    metadata_df,
-                    use_container_width=True,
-                    column_config={
-                        "extraction_confidence": st.column_config.ProgressColumn("Confidence", min_value=0, max_value=1),
-                        "amount": st.column_config.NumberColumn("Amount", format="₹%.2f"),
-                    }
-                )
-                
-                # Export metadata
-                if st.button("📥 Export Metadata CSV"):
-                    csv_data = metadata_df.to_csv(index=False)
-                    st.download_button(
-                        label="💾 Download Metadata",
-                        data=csv_data,
-                        file_name=f"vector_metadata_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
-                        mime="text/csv"
-                    )
-            else:
-                st.info("No documents in vector store yet.")
-            
-            # Vector store configuration
-            st.subheader("⚙️ Configuration")
-            
-            col1, col2 = st.columns(2)
-            
-            with col1:
-                st.write("**Current Settings:**")
-                st.code(f"""
-Embedding Model: {vector_store.embedding_model_name}
-Vector Store Path: {vector_store.vector_store_path}
-Metadata Path: {vector_store.metadata_path}
-Embedding Dimension: {vector_store.embedding_dimension}
-                """)
-            
-            with col2:
-                st.write("**Change Embedding Model:**")
-                new_model = st.selectbox(
-                    "Select new model:",
-                    [
-                        "all-MiniLM-L6-v2",
-                        "all-mpnet-base-v2", 
-                        "multi-qa-mpnet-base-dot-v1",
-                        "all-distilroberta-v1"
-                    ],
-                    index=0
-                )
-                
-                if st.button("🔄 Switch Model"):
-                    if new_model != vector_store.embedding_model_name:
-                        with st.spinner(f"Loading {new_model}..."):
-                            try:
-                                # Create new vector store with different model
-                                new_vector_store = InvoiceVectorStore(embedding_model=new_model)
-                                
-                                # Rebuild with new embeddings
-                                json_data = st.session_state.enhanced_chatbot.load_json_data()
-                                if new_vector_store.rebuild_vector_store(json_data):
-                                    st.session_state.enhanced_processor.vector_store = new_vector_store
-                                    st.session_state.enhanced_chatbot.vector_store = new_vector_store
-                                    st.success(f"✅ Switched to {new_model}!")
-                                    st.rerun()
-                                else:
-                                    st.error("Failed to rebuild with new model")
-                            except Exception as e:
-                                st.error(f"Error switching model: {e}")
-        else:
-            st.error("Vector store not available. Please check the system status.")
-    
-    # -------------------------------------------------------------------------
-    # TAB 6: SEMANTIC SEARCH
-    # -------------------------------------------------------------------------
-    
-    with tab6:
-        st.header("🔍 Semantic Search Interface")
-        st.info("Search your invoices using natural language and semantic similarity")
-        
-        vector_store = st.session_state.enhanced_processor.vector_store
-        
-        if vector_store and vector_store.document_metadata:
-            # Search interface
-            col1, col2 = st.columns([3, 1])
-            
-            with col1:
-                search_query = st.text_input(
-                    "Enter your search query:",
-                    placeholder="e.g., high value technology purchases, office supplies, consulting services",
-                    help="Use natural language to describe what you're looking for"
-                )
-            
-            with col2:
-                top_k = st.number_input("Number of results:", min_value=1, max_value=20, value=5)
-            
-            if search_query:
-                with st.spinner("Performing semantic search..."):
-                    results = vector_store.semantic_search(search_query, top_k)
-                
-                if results:
-                    st.subheader(f"🎯 Found {len(results)} similar documents:")
-                    
-                    for i, result in enumerate(results, 1):
-                        with st.expander(
-                            f"{i}. Invoice {result.invoice_number} - {result.supplier_name} "
-                            f"(Similarity: {result.similarity_score:.3f})",
-                            expanded=i <= 3
-                        ):
-                            col1, col2 = st.columns(2)
-                            
-                            with col1:
-                                st.write(f"**Invoice Number:** {result.invoice_number}")
-                                st.write(f"**Supplier:** {result.supplier_name}")
-                                st.write(f"**Amount:** ₹{result.metadata.get('amount', 0):,.2f}")
-                                st.write(f"**Date:** {result.metadata.get('date', 'N/A')}")
-                            
-                            with col2:
-                                st.write(f"**Similarity Score:** {result.similarity_score:.4f}")
-                                st.write(f"**File:** {result.metadata.get('file_name', 'N/A')}")
-                                st.write(f"**Confidence:** {result.metadata.get('extraction_confidence', 0):.1%}")
-                                st.write(f"**Timestamp:** {result.metadata.get('timestamp', 'N/A')[:19]}")
-                            
-                            st.write("**Content Preview:**")
-                            st.text_area(
-                                "Document content:",
-                                value=result.content_preview,
-                                height=100,
-                                key=f"content_{i}",
-                                disabled=True
-                            )
-                else:
-                    st.warning("No similar documents found. Try rephrasing your query.")
-            
-            # Search suggestions and examples
-            st.subheader("💡 Search Examples")
-            
-            col1, col2, col3 = st.columns(3)
-            
-            with col1:
-                st.markdown("**By Product/Service:**")
-                product_queries = [
-                    "office supplies and stationery",
-                    "technology equipment purchases",
-                    "consulting and professional services",
-                    "software licenses and subscriptions"
-                ]
-                for i, query in enumerate(product_queries):
-                    if st.button(query, key=f"product_query_{i}"):
-                        st.rerun()
-            
-            with col2:
-                st.markdown("**By Amount/Value:**")
-                amount_queries = [
-                    "high value transactions above 50000",
-                    "small purchases under 5000",
-                    "medium range invoices",
-                    "expensive equipment purchases"
-                ]
-                for i, query in enumerate(amount_queries):
-                    if st.button(query, key=f"amount_query_{i}"):
-                        st.rerun()
-            
-            with col3:
-                st.markdown("**By Pattern/Type:**")
-                pattern_queries = [
-                    "recurring monthly services",
-                    "one-time large purchases",
-                    "maintenance and support",
-                    "travel and expenses"
-                ]
-                for i, query in enumerate(pattern_queries):
-                    if st.button(query, key=f"pattern_query_{i}"):
-                        st.rerun()
-            
-            # Advanced search options
-            with st.expander("🔧 Advanced Search Options"):
-                col1, col2 = st.columns(2)
-                
-                with col1:
-                    similarity_threshold = st.slider(
-                        "Similarity Threshold:",
-                        min_value=0.0,
-                        max_value=1.0,
-                        value=0.1,
-                        step=0.05,
-                        help="Filter results below this similarity score"
-                    )
-                    
-                    date_range = st.date_input(
-                        "Date Range Filter:",
-                        value=None,
-                        help="Filter results by date range"
-                    )
-                
-                with col2:
-                    amount_range = st.slider(
-                        "Amount Range (₹):",
-                        min_value=0,
-                        max_value=100000,
-                        value=(0, 100000),
-                        help="Filter results by amount range"
-                    )
-                    
-                    supplier_filter = st.multiselect(
-                        "Filter by Suppliers:",
-                        options=[meta.get('supplier_name', '') for meta in vector_store.document_metadata if meta.get('supplier_name')],
-                        help="Select specific suppliers to search within"
-                    )
-                
-                if st.button("🎯 Apply Advanced Search"):
-                    if search_query:
-                        # Apply advanced filters to results
-                        filtered_results = []
-                        all_results = vector_store.semantic_search(search_query, 20)
-                        
-                        for result in all_results:
-                            # Apply filters
-                            if result.similarity_score < similarity_threshold:
-                                continue
-                            
-                            if supplier_filter and result.supplier_name not in supplier_filter:
-                                continue
-                            
-                            amount = result.metadata.get('amount', 0)
-                            if not (amount_range[0] <= amount <= amount_range[1]):
-                                continue
-                            
-                            filtered_results.append(result)
-                        
-                        st.success(f"Found {len(filtered_results)} results matching advanced criteria")
-                        
-                        # Display filtered results
-                        for i, result in enumerate(filtered_results[:top_k], 1):
-                            st.write(f"{i}. **{result.invoice_number}** - {result.supplier_name} (Score: {result.similarity_score:.3f})")
-            
-        else:
-            st.warning("No documents in vector store. Please upload and process some invoices first.")
-            
-            if st.button("🚀 Process Sample Data"):
-                st.info("Upload some invoices in the 'Upload & Process' tab to enable semantic search.")
-
-
-# ===============================================================================
-# UTILITY FUNCTIONS
-# ===============================================================================
-
-def export_complete_system_data():
-    """Export all system data for backup or migration"""
-    try:
-        # Get all data
-        json_data = st.session_state.enhanced_chatbot.load_json_data()
-        vector_stats = st.session_state.enhanced_processor.vector_store.get_stats()
-        
-        # Create comprehensive backup
-        backup_data = {
-            "export_timestamp": datetime.now().isoformat(),
-            "system_info": {
-                "version": "enhanced_v1.0",
-                "vector_model": vector_stats['model_name'],
-                "embedding_dimension": vector_stats['embedding_dimension']
-            },
-            "invoice_data": json_data,
-            "vector_metadata": st.session_state.enhanced_processor.vector_store.document_metadata,
-            "system_stats": vector_stats
-        }
-        
-        return json.dumps(backup_data, indent=2, ensure_ascii=False)
-    
-    except Exception as e:
-        st.error(f"Error creating backup: {e}")
-        return None
-
-def import_system_data(backup_data: str):
-    """Import system data from backup"""
-    try:
-        data = json.loads(backup_data)
-        
-        # Restore JSON data
-        invoice_data = data.get("invoice_data", {})
-        st.session_state.enhanced_processor.save_json_data(invoice_data)
-        
-        # Rebuild vector store
-        vector_store = st.session_state.enhanced_processor.vector_store
-        if vector_store:
-            vector_store.rebuild_vector_store(invoice_data)
-        
-        st.success("✅ System data imported successfully!")
-        return True
-    
-    except Exception as e:
-        st.error(f"Error importing data: {e}")
-        return False
-
-
-# ===============================================================================
-# MAIN APPLICATION ENTRY POINT
-# ===============================================================================
-
-if __name__ == "__main__":
-    enhanced_main()
-
-
-# ===============================================================================
-# ADDITIONAL CONFIGURATION AND SETUP
-# ===============================================================================
-
-# Configuration for different deployment environments
-DEPLOYMENT_CONFIG = {
-    "local": {
-        "embedding_model": "all-MiniLM-L6-v2",
-        "ollama_url": "http://localhost:11434",
-        "vector_store_path": "invoice_vectors.faiss",
-        "batch_size": 10
-    },
-    "cloud": {
-        "embedding_model": "all-MiniLM-L6-v2", 
-        "ollama_url": "http://ollama-service:11434",
-        "vector_store_path": "/data/invoice_vectors.faiss",
-        "batch_size": 20
-    }
-}
-
-# Performance optimization settings
-PERFORMANCE_CONFIG = {
-    "max_text_length": 5000,
-    "max_embedding_batch_size": 32,
-    "vector_search_timeout": 30,
-    "similarity_threshold": 0.1,
-    "max_results_per_query": 20
-}
-
-# Security settings
-SECURITY_CONFIG = {
-    "max_file_size_mb": 50,
-    "allowed_file_types": ['.pdf', '.jpg', '.jpeg', '.png', '.docx', '.txt'],
-    "sanitize_uploads": True,
-    "max_concurrent_uploads": 5
-}
-
-# Monitoring and logging configuration
-MONITORING_CONFIG = {
-    "log_level": "INFO",
-    "enable_performance_metrics": True,
-    "track_search_queries": True,
-    "enable_error_reporting": True
-}
-
-
-# ===============================================================================
-# HELPER FUNCTIONS FOR EXTENDED FUNCTIONALITY
-# ===============================================================================
-
-def setup_advanced_logging():
-    """Setup advanced logging for the enhanced system"""
-    import logging
-    
-    # Create formatters
-    detailed_formatter = logging.Formatter(
-        '%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s'
-    )
-    
-    # File handler for detailed logs
-    file_handler = logging.FileHandler('enhanced_invoice_system.log')
-    file_handler.setFormatter(detailed_formatter)
-    file_handler.setLevel(logging.DEBUG)
-    
-    # Console handler for important messages
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
-    console_handler.setLevel(logging.INFO)
-    
-    # Setup loggers
-    logger = logging.getLogger('enhanced_invoice_system')
-    logger.setLevel(logging.DEBUG)
-    logger.addHandler(file_handler)
-    logger.addHandler(console_handler)
-    
-    return logger
-
-def validate_system_requirements():
-    """Enhanced system validation that works with your compatible packages"""
-    requirements_met = True
-    issues = []
-    
-    # Define required packages with their actual import names
-    required_packages = [
-        {
-            'import_name': 'faiss',
-            'package_name': 'faiss-cpu',
-            'display_name': 'FAISS (Vector Search)',
-            'required': True
-        },
-        {
-            'import_name': 'sentence_transformers',
-            'package_name': 'sentence-transformers', 
-            'display_name': 'Sentence Transformers',
-            'required': True
-        },
-        {
-            'import_name': 'torch',
-            'package_name': 'torch',
-            'display_name': 'PyTorch',
-            'required': True
-        },
-        {
-            'import_name': 'streamlit',
-            'package_name': 'streamlit',
-            'display_name': 'Streamlit',
-            'required': True
-        },
-        {
-            'import_name': 'pandas',
-            'package_name': 'pandas',
-            'display_name': 'Pandas',
-            'required': True
-        },
-        {
-            'import_name': 'numpy',
-            'package_name': 'numpy',
-            'display_name': 'NumPy',
-            'required': True
-        },
-        {
-            'import_name': 'sklearn',
-            'package_name': 'scikit-learn',
-            'display_name': 'Scikit-learn',
-            'required': False
-        },
-        {
-            'import_name': 'requests',
-            'package_name': 'requests',
-            'display_name': 'Requests',
-            'required': False
-        }
-    ]
-    
-    st.markdown("### 🔍 System Requirements Check")
-    
-    for pkg in required_packages:
-        try:
-            # Try to import the package
-            module = importlib.import_module(pkg['import_name'])
-            
-            # Get version if available
-            version = getattr(module, '__version__', 'Unknown')
-            
-            # Success
-            st.success(f"✅ {pkg['display_name']}: {version}")
-            
-        except ImportError as e:
-            # Package missing
-            if pkg['required']:
-                st.error(f"❌ {pkg['display_name']}: Missing")
-                issues.append(f"Missing required package: {pkg['package_name']}")
-                requirements_met = False
-            else:
-                st.warning(f"⚠️ {pkg['display_name']}: Missing (optional)")
-                
-        except Exception as e:
-            # Other error
-            st.error(f"❌ {pkg['display_name']}: Error - {str(e)[:50]}...")
-            if pkg['required']:
-                issues.append(f"Error with package {pkg['package_name']}: {str(e)}")
-                requirements_met = False
-    
-    # Test critical functionality
-    st.markdown("### 🧪 Functionality Tests")
-    
-    try:
-        # Test FAISS basic functionality
-        import faiss
-        import numpy as np
-        
-        # Create a simple index
-        index = faiss.IndexFlatL2(64)
-        vectors = np.random.random((5, 64)).astype('float32')
-        index.add(vectors)
-        
-        # Test search
-        query = np.random.random((1, 64)).astype('float32')
-        distances, indices = index.search(query, 3)
-        
-        st.success("✅ FAISS: Vector search working")
-        
-    except Exception as e:
-        st.error(f"❌ FAISS: Functionality test failed - {str(e)[:100]}...")
-        issues.append(f"FAISS functionality error: {str(e)}")
-        requirements_met = False
-    
-    try:
-        # Test Sentence Transformers
-        from sentence_transformers import SentenceTransformer
-        
-        # Don't actually load a model (takes time), just test import
-        st.success("✅ Sentence Transformers: Import successful")
-        
-    except Exception as e:
-        st.error(f"❌ Sentence Transformers: Import failed - {str(e)[:100]}...")
-        issues.append(f"Sentence Transformers error: {str(e)}")
-        requirements_met = False
-      
-    # Check Ollama availability
-    try:
-        response = requests.get('http://localhost:11434/api/tags', timeout=5)
-        if response.status_code != 200:
-            issues.append("⚠️ Ollama service not responding")
-    except:
-        issues.append("⚠️ Ollama not available")
-    
-    # Check disk space (basic check)
-    import shutil
-    free_space_gb = shutil.disk_usage('.').free / (1024**3)
-    if free_space_gb < 1:
-        issues.append(f"⚠️ Low disk space: {free_space_gb:.2f} GB available")
-    
-    return requirements_met, issues
-
-def create_system_health_dashboard():
-    """Create a system health monitoring dashboard"""
-    st.subheader("🏥 System Health Monitor")
-    
-    requirements_met, issues = validate_system_requirements()
-    
-    if requirements_met:
-        st.success("✅ All system requirements met")
-    else:
-        st.error("❌ System requirements issues detected")
-        for issue in issues:
-            st.write(issue)
-    # System metrics
-    col1, col2, col3, col4 = st.columns(4)
-    
-    with col1:
-        # Memory usage
-        import psutil
-        memory_percent = psutil.virtual_memory().percent
-        st.metric("Memory Usage", f"{memory_percent:.1f}%")
-    
-    with col2:
-        # Disk usage
-        disk_percent = psutil.disk_usage('.').used / psutil.disk_usage('.').total * 100
-        st.metric("Disk Usage", f"{disk_percent:.1f}%")
-    
-    with col3:
-        # Vector store health
-        vector_store = st.session_state.enhanced_processor.vector_store
-        if vector_store and vector_store.document_metadata:
-            vector_health = "Healthy"
-        else:
-            vector_health = "Not Ready"
-        st.metric("Vector Store", vector_health)
-    
-    with col4:
-        # Database health
-        try:
-            conn = sqlite3.connect(st.session_state.enhanced_processor.db_path)
-            cursor = conn.cursor()
-            cursor.execute("SELECT COUNT(*) FROM invoices")
-            db_records = cursor.fetchone()[0]
-            conn.close()
-            st.metric("DB Records", db_records)
-        except:
-            st.metric("Database", "Error")
-
-def performance_benchmark():
-    """Run performance benchmarks on the system"""
-    st.subheader("⚡ Performance Benchmark")
-    
-    if st.button("🚀 Run Benchmark"):
-        with st.spinner("Running performance tests..."):
-            results = {}
-            
-            # Test embedding generation speed
-            vector_store = st.session_state.enhanced_processor.vector_store
-            if vector_store and vector_store.embedding_model:
-                start_time = datetime.now()
-                test_texts = [
-                    "Invoice number INV-2024-001 from ABC Corp for office supplies",
-                    "Technology equipment purchase from XYZ Ltd for $5000",
-                    "Consulting services invoice for project management"
-                ]
-                embeddings = vector_store.embedding_model.encode(test_texts)
-                embedding_time = (datetime.now() - start_time).total_seconds()
-                results['embedding_speed'] = f"{len(test_texts)/embedding_time:.2f} docs/sec"
-            
-            # Test vector search speed
-            if vector_store and vector_store.document_metadata:
-                start_time = datetime.now()
-                search_results = vector_store.semantic_search("office supplies", top_k=5)
-                search_time = (datetime.now() - start_time).total_seconds()
-                results['search_speed'] = f"{search_time*1000:.2f} ms"
-            
-            # Test database query speed
-            try:
-                start_time = datetime.now()
-                conn = sqlite3.connect(st.session_state.enhanced_processor.db_path)
-                df = pd.read_sql_query("SELECT * FROM invoices LIMIT 100", conn)
-                conn.close()
-                db_time = (datetime.now() - start_time).total_seconds()
-                results['db_query_speed'] = f"{db_time*1000:.2f} ms"
-            except:
-                results['db_query_speed'] = "Error"
-            
-            # Display results
-            for metric, value in results.items():
-                st.metric(metric.replace('_', ' ').title(), value)
-
-def create_backup_system():
-    """Create comprehensive backup functionality"""
-    st.subheader("💾 Backup & Restore System")
-    
-    col1, col2 = st.columns(2)
-    
-    with col1:
-        st.markdown("**Create Backup:**")
-        
-        backup_options = st.multiselect(
-            "Select data to backup:",
-            ["JSON Data", "Vector Store", "Database", "System Config"],
-            default=["JSON Data", "Vector Store", "Database"]
-        )
-        
-        if st.button("📦 Create Backup"):
-            with st.spinner("Creating backup..."):
-                backup_data = {}
-                
-                if "JSON Data" in backup_options:
-                    json_data = st.session_state.enhanced_chatbot.load_json_data()
-                    backup_data["json_data"] = json_data
-                
-                if "Vector Store" in backup_options:
-                    vector_store = st.session_state.enhanced_processor.vector_store
-                    if vector_store:
-                        backup_data["vector_metadata"] = vector_store.document_metadata
-                        backup_data["vector_config"] = {
-                            "model_name": vector_store.embedding_model_name,
-                            "dimension": vector_store.embedding_dimension
-                        }
-                
-                if "Database" in backup_options:
-                    try:
-                        conn = sqlite3.connect(st.session_state.enhanced_processor.db_path)
-                        df = pd.read_sql_query("SELECT * FROM invoices", conn)
-                        conn.close()
-                        backup_data["database_data"] = df.to_dict('records')
-                    except:
-                        st.error("Failed to backup database")
-                
-                if "System Config" in backup_options:
-                    backup_data["system_config"] = {
-                        "deployment_config": DEPLOYMENT_CONFIG,
-                        "performance_config": PERFORMANCE_CONFIG,
-                        "security_config": SECURITY_CONFIG
-                    }
-                
-                backup_data["backup_timestamp"] = datetime.now().isoformat()
-                backup_data["backup_version"] = "enhanced_v1.0"
-                
-                # Create downloadable backup
-                backup_json = json.dumps(backup_data, indent=2, ensure_ascii=False)
-                
-                st.download_button(
-                    label="💾 Download Backup",
-                    data=backup_json,
-                    file_name=f"invoice_system_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
-                    mime="application/json"
-                )
-                
-                st.success("✅ Backup created successfully!")
-    
-    with col2:
-        st.markdown("**Restore from Backup:**")
-        
-        uploaded_backup = st.file_uploader(
-            "Upload backup file:",
-            type=['json'],
-            help="Select a backup file created by this system"
-        )
-        
-        if uploaded_backup:
-            try:
-                backup_content = json.loads(uploaded_backup.getvalue().decode('utf-8'))
-                
-                st.write("**Backup Information:**")
-                st.write(f"Created: {backup_content.get('backup_timestamp', 'Unknown')}")
-                st.write(f"Version: {backup_content.get('backup_version', 'Unknown')}")
-                
-                restore_options = []
-                if "json_data" in backup_content:
-                    restore_options.append("JSON Data")
-                if "vector_metadata" in backup_content:
-                    restore_options.append("Vector Store")
-                if "database_data" in backup_content:
-                    restore_options.append("Database")
-                if "system_config" in backup_content:
-                    restore_options.append("System Config")
-                
-                selected_restore = st.multiselect(
-                    "Select data to restore:",
-                    restore_options,
-                    default=restore_options
-                )
-                
-                if st.button("🔄 Restore Data"):
-                    with st.spinner("Restoring data..."):
-                        try:
-                            if "JSON Data" in selected_restore and "json_data" in backup_content:
-                                st.session_state.enhanced_processor.save_json_data(backup_content["json_data"])
-                                st.success("✅ JSON data restored")
-                            
-                            if "Vector Store" in selected_restore and "vector_metadata" in backup_content:
-                                vector_store = st.session_state.enhanced_processor.vector_store
-                                if vector_store:
-                                    vector_store.document_metadata = backup_content["vector_metadata"]
-                                    vector_store.save_vector_store()
-                                    st.success("✅ Vector store metadata restored")
-                            
-                            if "Database" in selected_restore and "database_data" in backup_content:
-                                # Clear existing data and restore
-                                conn = sqlite3.connect(st.session_state.enhanced_processor.db_path)
-                                cursor = conn.cursor()
-                                cursor.execute("DELETE FROM invoices")
-                                
-                                # Insert restored data
-                                for record in backup_content["database_data"]:
-                                    cursor.execute('''
-                                        INSERT INTO invoices 
-                                        (supplier_name, buyer_name, invoice_number, date, amount, 
-                                         quantity, product_description, file_path, file_name, 
-                                         extraction_confidence, raw_text)
-                                        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-                                    ''', (
-                                        record.get('supplier_name', ''),
-                                        record.get('buyer_name', ''),
-                                        record.get('invoice_number', ''),
-                                        record.get('date', ''),
-                                        record.get('amount', 0),
-                                        record.get('quantity', 0),
-                                        record.get('product_description', ''),
-                                        record.get('file_path', ''),
-                                        record.get('file_name', ''),
-                                        record.get('extraction_confidence', 0),
-                                        record.get('raw_text', '')
-                                    ))
-                                
-                                conn.commit()
-                                conn.close()
-                                st.success("✅ Database restored")
-                            
-                            st.success("🎉 Restore completed! Please refresh the page.")
-                            
-                        except Exception as e:
-                            st.error(f"Restore failed: {e}")
-            
-            except Exception as e:
-                st.error(f"Invalid backup file: {e}")
-
-def create_deployment_guide():
-    """Create deployment configuration guide"""
-    st.subheader("🚀 Deployment Guide")
-    
-    deployment_type = st.selectbox(
-        "Select deployment environment:",
-        ["Local Development", "Docker Container", "Cloud Deployment", "Production Server"]
-    )
-    
-    if deployment_type == "Local Development":
-        st.markdown("""
-        ### 📋 Local Development Setup
-        
-        **Prerequisites:**
-        ```bash
-        # Install Python 3.8+
-        python --version
-        
-        # Install Ollama
-        curl -fsSL https://ollama.com/install.sh | sh
-        ollama serve
-        ollama pull mistral:7b
-        ```
-        
-        **Installation:**
-        ```bash
-        # Clone repository
-        git clone <repository-url>
-        cd enhanced-invoice-pipeline
-        
-        # Install dependencies
-        pip install -r requirements.txt
-        
-        # Run the application
-        streamlit run enhanced_main.py
-        ```
-        """)
-    
-    elif deployment_type == "Docker Container":
-        st.markdown("""
-        ### 🐳 Docker Deployment
-        
-        **Dockerfile:**
-        ```dockerfile
-        FROM python:3.9-slim
-        
-        WORKDIR /app
-        
-        # Install system dependencies
-        RUN apt-get update && apt-get install -y \\
-            curl \\
-            && rm -rf /var/lib/apt/lists/*
-        
-        # Install Ollama
-        RUN curl -fsSL https://ollama.com/install.sh | sh
-        
-        # Copy requirements and install Python dependencies
-        COPY requirements.txt .
-        RUN pip install -r requirements.txt
-        
-        # Copy application code
-        COPY . .
-        
-        # Expose ports
-        EXPOSE 8501 11434
-        
-        # Start script
-        CMD ["sh", "-c", "ollama serve & streamlit run enhanced_main.py --server.port=8501 --server.address=0.0.0.0"]
-        ```
-        
-        **Docker Compose:**
-        ```yaml
-        version: '3.8'
-        services:
-          invoice-app:
-            build: .
-            ports:
-              - "8501:8501"
-              - "11434:11434"
-            volumes:
-              - ./data:/app/data
-              - ./models:/app/models
-            environment:
-              - STREAMLIT_SERVER_ADDRESS=0.0.0.0
-        ```
-        """)
-    
-    elif deployment_type == "Cloud Deployment":
-        st.markdown("""
-        ### ☁️ Cloud Deployment (AWS/GCP/Azure)
-        
-        **Environment Variables:**
-        ```bash
-        export OLLAMA_HOST=0.0.0.0:11434
-        export STREAMLIT_SERVER_PORT=8501
-        export VECTOR_STORE_PATH=/data/vectors
-        export JSON_DATA_PATH=/data/invoices.json
-        ```
-        
-        **Cloud-specific considerations:**
-        - Use managed storage for persistence (S3, GCS, Azure Blob)
-        - Configure load balancing for high availability
-        - Set up monitoring and logging
-        - Implement proper security groups/firewalls
-        - Consider using managed vector databases (Pinecone, Weaviate)
-        """)
-    
-    elif deployment_type == "Production Server":
-        st.markdown("""
-        ### 🏭 Production Server Setup
-        
-        **System Requirements:**
-        - CPU: 4+ cores
-        - RAM: 8GB+ (16GB recommended)
-        - Storage: 50GB+ SSD
-        - GPU: Optional (for faster embeddings)
-        
-        **Production Configuration:**
-        ```python
-        PRODUCTION_CONFIG = {
-            "max_workers": 4,
-            "embedding_batch_size": 64,
-            "vector_store_backup_interval": 3600,  # 1 hour
-            "log_level": "WARNING",
-            "enable_metrics": True,
-            "secure_mode": True
-        }
-        ```
-        
-        **Security Checklist:**
-        - [ ] Enable HTTPS/TLS
-        - [ ] Configure authentication
-        - [ ] Set up firewall rules
-        - [ ] Regular security updates
-        - [ ] Data encryption at rest
-        - [ ] Backup automation
-        """)
-
-def create_api_documentation():
-    """Create API documentation for programmatic access"""
-    st.subheader("📡 API Documentation")
-    
-    st.markdown("""
-    ### REST API Endpoints
-    
-    The enhanced invoice system can be extended with REST API endpoints:
-    """)
-    
-    api_sections = st.tabs(["Upload API", "Search API", "Analytics API", "Management API"])
-    
-    with api_sections[0]:
-        st.markdown("""
-        #### 📤 Upload & Processing API
-        
-        **POST /api/upload**
-        ```python
-        import requests
-        
-        # Upload single file
-        with open('invoice.pdf', 'rb') as f:
-            response = requests.post(
-                'http://localhost:8501/api/upload',
-                files={'file': f},
-                data={'process_immediately': True}
-            )
-        
-        # Response
-        {
-            "success": true,
-            "invoice_id": "inv_123",
-            "extracted_data": {...},
-            "vector_indexed": true
-        }
-        ```
-        
-        **GET /api/status/{job_id}**
-        ```python
-        # Check processing status
-        response = requests.get('http://localhost:8501/api/status/job_123')
-        
-        # Response
-        {
-            "status": "completed",
-            "progress": 100,
-            "result": {...}
-        }
-        ```
-        """)
-    
-    with api_sections[1]:
-        st.markdown("""
-        #### 🔍 Search API
-        
-        **POST /api/search**
-        ```python
-        # Semantic search
-        response = requests.post(
-            'http://localhost:8501/api/search',
-            json={
-                "query": "high value technology purchases",
-                "type": "semantic",
-                "top_k": 5,
-                "filters": {
-                    "amount_min": 1000,
-                    "date_after": "2024-01-01"
-                }
-            }
-        )
-        
-        # Response
-        {
-            "results": [
-                {
-                    "invoice_number": "INV-2024-001",
-                    "similarity_score": 0.89,
-                    "metadata": {...}
-                }
-            ],
-            "total_found": 15
-        }
-        ```
-        
-        **GET /api/search/suggestions**
-        ```python
-        # Get search suggestions
-        response = requests.get(
-            'http://localhost:8501/api/search/suggestions',
-            params={"partial_query": "office"}
-        )
-        
-        # Response
-        {
-            "suggestions": [
-                "office supplies",
-                "office equipment", 
-                "office rent"
-            ]
-        }
-        ```
-        """)
-    
-    with api_sections[2]:
-        st.markdown("""
-        #### 📊 Analytics API
-        
-        **GET /api/analytics/summary**
-        ```python
-        # Get system summary
-        response = requests.get('http://localhost:8501/api/analytics/summary')
-        
-        # Response
-        {
-            "total_invoices": 1250,
-            "total_amount": 450000.50,
-            "unique_suppliers": 85,
-            "processing_stats": {...}
-        }
-        ```
-        
-        **GET /api/analytics/trends**
-        ```python
-        # Get spending trends
-        response = requests.get(
-            'http://localhost:8501/api/analytics/trends',
-            params={
-                "period": "monthly",
-                "start_date": "2024-01-01",
-                "end_date": "2024-12-31"
-            }
-        )
-        
-        # Response
-        {
-            "trends": [
-                {"month": "2024-01", "amount": 12500.00, "count": 45},
-                {"month": "2024-02", "amount": 15200.00, "count": 52}
-            ]
-        }
-        ```
-        """)
-    
-    with api_sections[3]:
-        st.markdown("""
-        #### ⚙️ Management API
-        
-        **POST /api/vector/rebuild**
-        ```python
-        # Rebuild vector store
-        response = requests.post(
-            'http://localhost:8501/api/vector/rebuild',
-            json={"force": true}
-        )
-        
-        # Response
-        {
-            "status": "rebuilding",
-            "job_id": "rebuild_456",
-            "estimated_time": 120
-        }
-        ```
-        
-        **GET /api/system/health**
-        ```python
-        # System health check
-        response = requests.get('http://localhost:8501/api/system/health')
-        
-        # Response
-        {
-            "status": "healthy",
-            "components": {
-                "database": "ok",
-                "vector_store": "ok",
-                "ollama": "ok",
-                "embedding_model": "ok"
-            },
-            "metrics": {
-                "memory_usage": 45.2,
-                "disk_usage": 23.1,
-                "uptime": 86400
-            }
-        }
-        ```
-        """)
-
-def create_troubleshooting_guide():
-    """Create comprehensive troubleshooting guide"""
-    st.subheader("🔧 Troubleshooting Guide")
-    
-    issue_categories = st.tabs([
-        "Installation Issues", 
-        "Processing Errors", 
-        "Vector Store Problems", 
-        "Performance Issues",
-        "Integration Problems"
-    ])
-    
-    with issue_categories[0]:
-        st.markdown("""
-        ### 🛠️ Installation Issues
-        
-        **Problem: ModuleNotFoundError for required packages**
-        ```bash
-        # Solution: Install missing packages
-        pip install -r requirements.txt
-        
-        # For development environment
-        pip install -r requirements-dev.txt
-        
-        # For specific packages
-        pip install sentence-transformers faiss-cpu streamlit
-        ```
-        
-        **Problem: Ollama not found or not responding**
-        ```bash
-        # Install Ollama
-        curl -fsSL https://ollama.com/install.sh | sh
-        
-        # Start Ollama service
-        ollama serve
-        
-        # Pull required model
-        ollama pull mistral:7b
-        
-        # Check if running
-        curl http://localhost:11434/api/tags
-        ```
-        
-        **Problem: CUDA/GPU issues with embeddings**
-        ```bash
-        # For CPU-only deployment
-        pip install torch --index-url https://download.pytorch.org/whl/cpu
-        
-        # For GPU support
-        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
-        ```
-        """)
-    
-    with issue_categories[1]:
-        st.markdown("""
-        ### ⚠️ Processing Errors
-        
-        **Problem: PDF extraction fails**
-        - Check file permissions and size
-        - Ensure PDF is not password protected
-        - Try converting PDF to images first
-        - Check Docling installation
-        
-        **Problem: AI extraction returns empty results**
-        - Verify Ollama is running
-        - Check model availability
-        - Increase timeout settings
-        - Fall back to regex extraction
-        
-        **Problem: JSON serialization errors**
-        ```python
-        # Debug extracted data
-        print(f"Extracted data type: {type(extracted_data)}")
-        print(f"Data content: {extracted_data}")
-        
-        # Handle non-serializable data
-        cleaned_data = {k: str(v) for k, v in extracted_data.items()}
-        ```
-        """)
-    
-    with issue_categories[2]:
-        st.markdown("""
-        ### 🔍 Vector Store Problems
-        
-        **Problem: Vector store fails to load**
-        ```python
-        # Check file permissions
-        import os
-        print(f"Vector file exists: {os.path.exists('invoice_vectors.faiss')}")
-        print(f"Metadata file exists: {os.path.exists('vector_metadata.pkl')}")
-        
-        # Rebuild if corrupted
-        vector_store.rebuild_vector_store(json_data)
-        ```
-        
-        **Problem: Embedding model fails to load**
-        ```python
-        # Try different models
-        models_to_try = [
-            "all-MiniLM-L6-v2",
-            "all-mpnet-base-v2",
-            "paraphrase-MiniLM-L6-v2"
-        ]
-        
-        for model in models_to_try:
-            try:
-                embedding_model = SentenceTransformer(model)
-                break
-            except Exception as e:
-                print(f"Failed to load {model}: {e}")
-        ```
-        
-        **Problem: Search returns no results**
-        - Check if documents are indexed
-        - Verify similarity threshold
-        - Try different query formulations
-        - Check embedding model compatibility
-        """)
-    
-    with issue_categories[3]:
-        st.markdown("""
-        ### ⚡ Performance Issues
-        
-        **Problem: Slow processing speed**
-        ```python
-        # Optimize batch processing
-        BATCH_SIZE = 10  # Reduce if memory issues
-        
-        # Use CPU-optimized models
-        embedding_model = "all-MiniLM-L6-v2"  # Fastest
-        
-        # Limit text length
-        text = text[:5000]  # Truncate long documents
-        ```
-        
-        **Problem: High memory usage**
-        ```python
-        # Monitor memory
-        import psutil
-        process = psutil.Process()
-        print(f"Memory usage: {process.memory_info().rss / 1024 / 1024:.2f} MB")
-        
-        # Optimize settings
-        torch.set_num_threads(2)  # Limit CPU threads
-        os.environ['OMP_NUM_THREADS'] = '2'
-        ```
-        
-        **Problem: Slow vector search**
-        - Use FAISS optimization
-        - Implement result caching
-        - Reduce embedding dimensions
-        - Use approximate search methods
-        """)
-    
-    with issue_categories[4]:
-        st.markdown("""
-        ### 🔗 Integration Problems
-        
-        **Problem: Database connection issues**
-        ```python
-        # Check database file
-        import sqlite3
-        try:
-            conn = sqlite3.connect('invoices.db')
-            cursor = conn.cursor()
-            cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
-            tables = cursor.fetchall()
-            print(f"Available tables: {tables}")
-            conn.close()
-        except Exception as e:
-            print(f"Database error: {e}")
-        ```
-        
-        **Problem: Streamlit deployment issues**
-        ```bash
-        # Check Streamlit version
-        streamlit --version
-        
-        # Run with debug mode
-        streamlit run app.py --logger.level=debug
-        
-        # Check port availability
-        netstat -tulpn | grep :8501
-        ```
-        
-        **Problem: Cross-platform compatibility**
-        - Use pathlib instead of os.path
-        - Handle file encoding explicitly
-        - Test on target platform
-        - Use platform-specific configurations
-        """)
-
-# ===============================================================================
-# FINAL SYSTEM INTEGRATION
-# ===============================================================================
-
-def create_system_dashboard():
-    """Create comprehensive system dashboard"""
-    st.title("🎛️ Enhanced Invoice System Dashboard")
-    
-    # System overview tabs
-    main_tabs = st.tabs([
-        "📊 Overview", 
-        "🏥 Health", 
-        "⚡ Performance", 
-        "💾 Backup", 
-        "🚀 Deploy",
-        "🔧 Troubleshoot"
-    ])
-    
-    with main_tabs[0]:
-        # System overview
-        col1, col2, col3 = st.columns(3)
-        
-        with col1:
-            st.metric("System Status", "✅ Operational")
-            st.metric("Uptime", "24h 35m")
-            
-        with col2:
-            st.metric("Total Processing", "1,234 files")
-            st.metric("Success Rate", "96.8%")
-            
-        with col3:
-            st.metric("Vector Store", "1,190 docs")
-            st.metric("Storage Used", "2.3 GB")
-    
-    with main_tabs[1]:
-        create_system_health_dashboard()
-    
-    with main_tabs[2]:
-        performance_benchmark()
-    
-    with main_tabs[3]:
-        create_backup_system()
-    
-    with main_tabs[4]:
-        create_deployment_guide()
-    
-    with main_tabs[5]:
-        create_troubleshooting_guide()
-
-# ===============================================================================
-# REQUIREMENTS AND DEPENDENCIES
-# ===============================================================================
-
-def generate_requirements_file():
-    """Generate requirements.txt file content"""
-    requirements = """
-# Core dependencies
-streamlit>=1.28.0
-pandas>=1.5.0
-numpy>=1.21.0
-sqlite3
-
-# Document processing
-pdfplumber>=0.7.0
-python-docx>=0.8.11
-Pillow>=9.0.0
-
-# AI and ML
-sentence-transformers>=2.2.0
-torch>=1.13.0
-transformers>=4.21.0
-ollama>=0.1.0
-
-# Vector storage
-faiss-cpu>=1.7.0
-# faiss-gpu>=1.7.0  # Uncomment for GPU support
-
-# Web and API
-requests>=2.28.0
-streamlit-chat>=0.1.0
-
-# Visualization
-plotly>=5.0.0
-matplotlib>=3.5.0
-seaborn>=0.11.0
-
-# Utilities
-python-dateutil>=2.8.0
-pytz>=2022.1
-tqdm>=4.64.0
-psutil>=5.9.0
-
-# Development and testing (optional)
-pytest>=7.0.0
-black>=22.0.0
-flake8>=5.0.0
-mypy>=0.991
-
-# Production deployment (optional)
-gunicorn>=20.1.0
-nginx-python>=1.0.0
-supervisor>=4.2.0
-"""
-    return requirements.strip()
-
-def generate_docker_files():
-    """Generate Docker configuration files"""
-    
-    dockerfile_content = """
-FROM python:3.9-slim
-
-# Set working directory
-WORKDIR /app
-
-# Install system dependencies
-RUN apt-get update && apt-get install -y \\
-    curl \\
-    gcc \\
-    g++ \\
-    && rm -rf /var/lib/apt/lists/*
-
-# Install Ollama
-RUN curl -fsSL https://ollama.com/install.sh | sh
-
-# Copy requirements and install Python dependencies
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-
-# Copy application code
-COPY . .
-
-# Create data directory
-RUN mkdir -p /app/data
-
-# Expose ports
-EXPOSE 8501 11434
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \\
-    CMD curl -f http://localhost:8501/_stcore/health || exit 1
-
-# Start script
-COPY start.sh .
-RUN chmod +x start.sh
-CMD ["./start.sh"]
-"""
-
-    docker_compose_content = """
-version: '3.8'
-
-services:
-  invoice-app:
-    build: .
-    ports:
-      - "8501:8501"
-      - "11434:11434"
-    volumes:
-      - ./data:/app/data
-      - ./models:/app/models
-      - ./backups:/app/backups
-    environment:
-      - STREAMLIT_SERVER_ADDRESS=0.0.0.0
-      - STREAMLIT_SERVER_PORT=8501
-      - OLLAMA_HOST=0.0.0.0:11434
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 40s
-
-  # Optional: Add a reverse proxy
-  nginx:
-    image: nginx:alpine
-    ports:
-      - "80:80"
-      - "443:443"
-    volumes:
-      - ./nginx.conf:/etc/nginx/nginx.conf
-      - ./ssl:/etc/ssl
-    depends_on:
-      - invoice-app
-    restart: unless-stopped
-"""
-
-    start_script_content = """#!/bin/bash
-set -e
-
-# Start Ollama in background
-ollama serve &
-OLLAMA_PID=$!
-
-# Wait for Ollama to be ready
-echo "Waiting for Ollama to start..."
-sleep 10
-
-# Pull required model
-ollama pull mistral:7b
-
-# Start Streamlit
-echo "Starting Streamlit application..."
-streamlit run enhanced_main.py \\
-    --server.port=8501 \\
-    --server.address=0.0.0.0 \\
-    --server.enableCORS=false \\
-    --server.enableXsrfProtection=false
-
-# Keep the script running
-wait $OLLAMA_PID
-"""
-
-    return {
-        "Dockerfile": dockerfile_content.strip(),
-        "docker-compose.yml": docker_compose_content.strip(),
-        "start.sh": start_script_content.strip()
-    }
-
-# ===============================================================================
-# ===============================================================================
-# FINAL MAIN FUNCTION WITH ALL FEATURES
-# ===============================================================================
-
-def ultimate_enhanced_main():
-    # Custom CSS for better UI
-    st.markdown("""
-    <style>
-    .main-header {
-        font-size: 3rem;
-        font-weight: bold;
-        text-align: center;
-        background: linear-gradient(90deg, #FF6B6B, #4ECDC4, #45B7D1);
-        -webkit-background-clip: text;
-        -webkit-text-fill-color: transparent;
-        margin-bottom: 2rem;
-    }
-    
-    .feature-card {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        padding: 1rem;
-        border-radius: 10px;
-        color: white;
-        margin: 0.5rem 0;
-    }
-    
-    .metric-card {
-        background: #f8f9fa;
-        padding: 1rem;
-        border-radius: 8px;
-        border-left: 4px solid #007bff;
-        margin: 0.5rem 0;
-    }
-    
-    .status-ok { color: #28a745; }
-    .status-warning { color: #ffc107; }
-    .status-error { color: #dc3545; }
-    </style>
-    """, unsafe_allow_html=True)
-    
-    # Main header
-    st.markdown('<h1 class="main-header">🚀 Ultimate Invoice Processing System</h1>', unsafe_allow_html=True)
-    st.markdown("""
-    <div style="text-align: center; margin-bottom: 2rem;">
-        <p style="font-size: 1.2rem; color: #666;">
-            AI-Powered Document Processing • Semantic Search • Advanced Analytics • Production Ready
-        </p>
-    </div>
-    """, unsafe_allow_html=True)
-    
-    # Initialize session state with error handling
-    try:
-        if 'ultimate_processor' not in st.session_state:
-            with st.spinner("🔧 Initializing Ultimate Invoice Processor..."):
-                st.session_state.ultimate_processor = EnhancedInvoiceProcessor()
-        
-        if 'ultimate_chatbot' not in st.session_state:
-            with st.spinner("🤖 Setting up Enhanced AI Chatbot..."):
-                st.session_state.ultimate_chatbot = EnhancedInvoiceChatBot(
-                    vector_store=st.session_state.ultimate_processor.vector_store
-                )
-        
-        if 'chat_history' not in st.session_state:
-            st.session_state.chat_history = []
-        
-        if 'system_initialized' not in st.session_state:
-            st.session_state.system_initialized = True
-            st.success("✅ System initialized successfully!")
-    
-    except Exception as e:
-        st.error(f"❌ System initialization failed: {e}")
-        st.stop()
-    
-    # -------------------------------------------------------------------------
-    # ULTIMATE SIDEBAR WITH COMPREHENSIVE STATUS
-    # -------------------------------------------------------------------------
-    
-    with st.sidebar:
-        st.markdown("## 🎛️ System Control Center")
-        
-        # System status overview
-        with st.expander("🏥 System Health", expanded=True):
-            try:
-                # Check all components
-                components_status = {
-                    "Docling": hasattr(st.session_state.ultimate_processor, 'docling_available') and st.session_state.ultimate_processor.docling_available,
-                    "AI Processing": hasattr(st.session_state.ultimate_processor, 'use_ai') and st.session_state.ultimate_processor.use_ai,
-                    "Vector Store": st.session_state.ultimate_processor.vector_store and st.session_state.ultimate_processor.vector_store.embedding_model,
-                    "Database": os.path.exists(st.session_state.ultimate_processor.db_path),
-                    "JSON Storage": os.path.exists(st.session_state.ultimate_processor.json_path)
-                }
-                
-                for component, status in components_status.items():
-                    status_icon = "✅" if status else "❌"
-                    status_class = "status-ok" if status else "status-error"
-                    st.markdown(f'<span class="{status_class}">{status_icon} {component}</span>', unsafe_allow_html=True)
-            
-            except Exception as e:
-                st.error(f"Status check failed: {e}")
-        
-        # Quick stats
-        with st.expander("📊 Quick Statistics", expanded=True):
-            try:
-                json_data = st.session_state.ultimate_chatbot.load_json_data()
-                total_invoices = len(json_data.get("invoices", []))
-                total_amount = json_data.get("summary", {}).get("total_amount", 0)
-                unique_suppliers = len(json_data.get("summary", {}).get("unique_suppliers", []))
-                vector_docs = len(st.session_state.ultimate_processor.vector_store.document_metadata) if st.session_state.ultimate_processor.vector_store else 0
-                
-                st.metric("📄 Total Invoices", f"{total_invoices:,}")
-                st.metric("💰 Total Value", f"₹{total_amount:,.2f}")
-                st.metric("🏢 Suppliers", f"{unique_suppliers:,}")
-                st.metric("🔍 Vector Docs", f"{vector_docs:,}")
-                
-            except Exception as e:
-                st.error(f"Stats loading failed: {e}")
-        
-        # Quick actions
-        st.markdown("### ⚡ Quick Actions")
-        
-        col1, col2 = st.columns(2)
-        
-        with col1:
-            if st.button("🔄 Refresh", use_container_width=True, key="sidebar_refresh"):
-                st.rerun()
-            
-            if st.button("💾 Backup", use_container_width=True,key="sidebar_backup"):
-                try:
-                    backup_data = export_complete_system_data()
-                    if backup_data:
-                        st.download_button(
-                            "📥 Download",
-                            backup_data,
-                            f"system_backup_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
-                            "application/json"
-                        )
-                except Exception as e:
-                    st.error(f"Backup failed: {e}")
-        
-        with col2:
-            if st.button("🧹 Clean", use_container_width=True, key="sidebar_clean"):
-                # Clean temporary files
-                try:
-                    import tempfile
-                    import glob
-                    temp_files = glob.glob(os.path.join(tempfile.gettempdir(), "tmp*"))
-                    for file in temp_files[:10]:  # Limit to avoid issues
-                        try:
-                            os.unlink(file)
-                        except:
-                            pass
-                    st.success("✅ Cleaned temp files")
-                except:
-                    st.warning("⚠️ Cleanup partially failed")
-            
-            if st.button("📊 Dashboard", use_container_width=True, key="sidebar_dashboard"):
-                st.session_state.show_dashboard = not st.session_state.get('show_dashboard', False)
-                st.rerun()
-        
-        # Advanced settings
-        with st.expander("⚙️ Advanced Settings"):
-            # Performance settings
-            st.markdown("**Performance:**")
-            batch_size = st.slider("Batch Size", 1, 50, 10)
-            max_text_length = st.slider("Max Text Length", 1000, 10000, 5000)
-            
-            # Vector settings
-            st.markdown("**Vector Search:**")
-            similarity_threshold = st.slider("Similarity Threshold", 0.0, 1.0, 0.1, 0.05)
-            max_results = st.slider("Max Results", 1, 50, 10)
-            
-            # Save settings
-            if st.button("💾 Save Settings", key="save_settings_admin"):
-                settings = {
-                    "batch_size": batch_size,
-                    "max_text_length": max_text_length,
-                    "similarity_threshold": similarity_threshold,
-                    "max_results": max_results
-                }
-                # Save to session state or file
-                st.session_state.user_settings = settings
-                st.success("✅ Settings saved!")
-    
-    # -------------------------------------------------------------------------
-    # MAIN NAVIGATION TABS
-    # -------------------------------------------------------------------------
-    
-    tab_names = [
-        "🏠 Home",
-        "📤 Upload",
-        "💬 AI Chat", 
-        "📊 Analytics", 
-        "🔍 Search",
-        "📋 Data",
-        "🎛️ Admin"
-    ]
-    
-    if st.session_state.get('show_dashboard', False):
-        tab_names.append("📈 Dashboard")
-    
-    tabs = st.tabs(tab_names)
-    
-    # -------------------------------------------------------------------------
-    # TAB: HOME - SYSTEM OVERVIEW
-    # -------------------------------------------------------------------------
-    
-    with tabs[0]:
-        st.markdown("## 🏠 Welcome to the Ultimate Invoice Processing System")
-        
-        # Feature highlights
-        col1, col2, col3 = st.columns(3)
-        
-        with col1:
-            st.markdown("""
-            <div class="feature-card">
-                <h3>🤖 AI-Powered Extraction</h3>
-                <p>Advanced AI models extract structured data from any invoice format with high accuracy.</p>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        with col2:
-            st.markdown("""
-            <div class="feature-card">
-                <h3>🔍 Semantic Search</h3>
-                <p>Find invoices using natural language queries with vector similarity search.</p>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        with col3:
-            st.markdown("""
-            <div class="feature-card">
-                <h3>📊 Advanced Analytics</h3>
-                <p>Comprehensive insights, trends, and patterns in your invoice data.</p>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        # Getting started guide
-        st.markdown("### 🚀 Getting Started")
-        
-        steps = [
-            ("1️⃣", "Upload Documents", "Go to Upload tab and drop your invoice files"),
-            ("2️⃣", "AI Processing", "Watch as AI extracts structured data automatically"),
-            ("3️⃣", "Search & Analyze", "Use natural language to search and analyze your data"),
-            ("4️⃣", "Export & Integrate", "Download results or integrate with your systems")
-        ]
-        
-        for icon, title, description in steps:
-            st.markdown(f"""
-            <div style="display: flex; align-items: center; margin: 1rem 0; padding: 1rem; background: #f8f9fa; border-radius: 8px;">
-                <div style="font-size: 2rem; margin-right: 1rem;">{icon}</div>
-                <div>
-                    <strong>{title}</strong><br>
-                    <span style="color: #666;">{description}</span>
-                </div>
-            </div>
-            """, unsafe_allow_html=True)
-        
-        # Recent activity
-        st.markdown("### 📈 Recent Activity")
-        
-        try:
-            json_data = st.session_state.ultimate_chatbot.load_json_data()
-            recent_invoices = sorted(
-                json_data.get("invoices", []), 
-                key=lambda x: x.get("timestamps", {}).get("created_at", ""), 
-                reverse=True
-            )[:5]
-            
-            if recent_invoices:
-                for i, invoice in enumerate(recent_invoices, 1):
-                    with st.expander(f"📄 {invoice.get('invoice_number', f'Invoice {i}')} - {invoice.get('supplier_name', 'Unknown Supplier')}"):
-                        col1, col2, col3 = st.columns(3)
-                        with col1:
-                            st.write(f"**Amount:** ₹{invoice.get('amount', 0):,.2f}")
-                            st.write(f"**Date:** {invoice.get('date', 'N/A')}")
-                        with col2:
-                            st.write(f"**Buyer:** {invoice.get('buyer_name', 'N/A')}")
-                            st.write(f"**Confidence:** {invoice.get('extraction_info', {}).get('confidence', 0):.1%}")
-                        with col3:
-                            st.write(f"**File:** {invoice.get('file_info', {}).get('file_name', 'N/A')}")
-                            st.write(f"**Processed:** {invoice.get('timestamps', {}).get('created_at', 'N/A')[:19]}")
-            else:
-                st.info("No invoices processed yet. Upload some documents to get started!")
-        
-        except Exception as e:
-            st.error(f"Error loading recent activity: {e}")
-    
-    # -------------------------------------------------------------------------
-    # TAB: UPLOAD - ENHANCED FILE PROCESSING
-    # -------------------------------------------------------------------------
-    
-    with tabs[1]:
-        st.markdown("## 📤 Upload & Process Documents")
-        
-        # Upload interface with drag & drop
-        st.markdown("""
-        <div style="border: 2px dashed #ccc; border-radius: 10px; padding: 2rem; text-align: center; margin: 1rem 0;">
-            <h3>📁 Drag & Drop Your Invoice Files Here</h3>
-            <p>Supported formats: PDF, JPG, PNG, DOCX, TXT</p>
-        </div>
-        """, unsafe_allow_html=True)
-        
-        uploaded_files = st.file_uploader(
-            "Choose invoice files",
-            type=['pdf', 'jpg', 'jpeg', 'png', 'docx', 'txt'],
-            accept_multiple_files=True,
-            label_visibility="collapsed"
-        )
-        
-        if uploaded_files:
-            # Processing options
-            col1, col2, col3 = st.columns(3)
-            
-            with col1:
-                auto_extract = st.checkbox("🤖 AI Extraction", value=True)
-                vector_index = st.checkbox("🔍 Vector Indexing", value=True)
-            
-            with col2:
-                batch_process = st.checkbox("⚡ Batch Processing", value=True)
-                save_originals = st.checkbox("💾 Save Originals", value=False)
-            
-            with col3:
-                notify_completion = st.checkbox("🔔 Notify on Completion", value=True)
-                auto_backup = st.checkbox("💾 Auto Backup", value=False)
-            
-            # File preview
-            st.markdown("### 📋 Files to Process")
-            
-            total_size = sum(len(f.getvalue()) for f in uploaded_files)
-            st.info(f"📊 {len(uploaded_files)} files selected • Total size: {total_size / 1024 / 1024:.2f} MB")
-            
-            # Process files
-            if st.button("🚀 Process All Files", type="primary", use_container_width=True, key="process_all_files_ultimate"):
-                # Create processing container
-                progress_container = st.container()
-                results_container = st.container()
-                
-                with progress_container:
-                    progress_bar = st.progress(0)
-                    status_text = st.empty()
-                    
-                    # Processing metrics
-                    metrics_col1, metrics_col2, metrics_col3, metrics_col4 = st.columns(4)
-                    processed_metric = metrics_col1.empty()
-                    success_metric = metrics_col2.empty()
-                    failed_metric = metrics_col3.empty()
-                    time_metric = metrics_col4.empty()
-                
-                # Process files with enhanced error handling
-                start_time = datetime.now()
-                processed_count = 0
-                success_count = 0
-                failed_count = 0
-                
-                for i, uploaded_file in enumerate(uploaded_files):
-                    current_progress = (i + 1) / len(uploaded_files)
-                    progress_bar.progress(current_progress)
-                    status_text.text(f"Processing: {uploaded_file.name}")
-                    
-                    # Update metrics
-                    processed_count = i + 1
-                    processed_metric.metric("Processed", f"{processed_count}/{len(uploaded_files)}")
-                    success_metric.metric("Success", success_count)
-                    failed_metric.metric("Failed", failed_count)
-                    
-                    elapsed_time = (datetime.now() - start_time).total_seconds()
-                    time_metric.metric("Time", f"{elapsed_time:.1f}s")
-                    
-                    # Process file
-                    try:
-                        # Save temporarily
-                        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
-                            file_content = uploaded_file.getvalue()
-                            tmp_file.write(file_content)
-                            tmp_file_path = tmp_file.name
-                            file_size = len(file_content)
-                        
-                        # Enhanced processing
-                        invoice_data = st.session_state.ultimate_processor.process_file(tmp_file_path, file_size)
-                        
-                        if invoice_data.invoice_number:
-                            success_count += 1
-                            
-                            # Show success in results
-                            with results_container:
-                                with st.expander(f"✅ {uploaded_file.name}", expanded=False):
-                                    col1, col2 = st.columns(2)
-                                    with col1:
-                                        st.write(f"**Invoice #:** {invoice_data.invoice_number}")
-                                        st.write(f"**Supplier:** {invoice_data.supplier_name}")
-                                        st.write(f"**Amount:** ₹{invoice_data.amount:.2f}")
-                                    with col2:
-                                        st.write(f"**Date:** {invoice_data.date}")
-                                        st.write(f"**Confidence:** {invoice_data.extraction_confidence:.1%}")
-                                        st.write(f"**Vector Indexed:** {'Yes' if vector_index else 'No'}")
-                        else:
-                            failed_count += 1
-                            with results_container:
-                                st.warning(f"⚠️ Limited data extracted from {uploaded_file.name}")
-                    
-                    except Exception as e:
-                        failed_count += 1
-                        with results_container:
-                            st.error(f"❌ Error processing {uploaded_file.name}: {str(e)[:100]}...")
-                    
-                    finally:
-                        # Cleanup
-                        try:
-                            os.unlink(tmp_file_path)
-                        except:
-                            pass
-                
-                # Final summary
-                total_time = (datetime.now() - start_time).total_seconds()
-                status_text.success(f"✅ Processing complete! {success_count}/{len(uploaded_files)} successful in {total_time:.1f}s")
-                
-                # Show completion notification
-                if notify_completion:
-                    st.balloons()
-                
-                # Auto backup if enabled
-                if auto_backup and success_count > 0:
-                    try:
-                        backup_data = export_complete_system_data()
-                        if backup_data:
-                            st.download_button(
-                                "📥 Download Auto-Backup",
-                                backup_data,
-                                f"auto_backup_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
-                                "application/json"
-                            )
-                    except:
-                        st.warning("Auto-backup failed")
-    
-    # -------------------------------------------------------------------------
-    # TAB: AI CHAT - ENHANCED CONVERSATIONAL INTERFACE
-    # -------------------------------------------------------------------------
-    
-    with tabs[2]:
-        st.markdown("## 💬 AI Chat Interface")
-        
-        # Chat configuration
-        col1, col2, col3 = st.columns([2, 1, 1])
-        
-        with col1:
-            st.markdown("**Ask anything about your invoices using natural language**")
-        
-        with col2:
-            search_mode = st.selectbox("Search Mode", ["Hybrid", "Semantic Only", "SQL Only"])
-        
-        with col3:
-            if st.button("🗑️ Clear Chat", key="clear_chat_history"):
-                st.session_state.chat_history = []
-                st.rerun()
-        
-        # Chat interface
-        chat_container = st.container()
-        
-        # Chat input
-        user_query = st.chat_input("Ask about your invoices... (e.g., 'Show me high-value technology purchases')")
-        
-        if user_query:
-            # Add user message
-            st.session_state.chat_history.append({"role": "user", "content": user_query, "timestamp": datetime.now()})
-            
-            # Process query with enhanced AI
-            with st.spinner("🤖 AI is analyzing your request..."):
-                try:
-                    if search_mode == "Semantic Only":
-                        # Pure vector search
-                        vector_results = st.session_state.ultimate_processor.vector_store.semantic_search(user_query, 5)
-                        bot_response = f"🔍 **Semantic Search Results:**\n\n"
-                        for i, result in enumerate(vector_results, 1):
-                            bot_response += f"{i}. **{result.invoice_number}** - {result.supplier_name} (Score: {result.similarity_score:.3f})\n"
-                    
-                    elif search_mode == "SQL Only":
-                        # Pure SQL search
-                        sql_results = st.session_state.ultimate_chatbot.sql_search(user_query)
-                        bot_response = f"📊 **Database Search Results:**\n\n"
-                        for i, result in enumerate(sql_results, 1):
-                            bot_response += f"{i}. **{result.get('invoice_number', 'N/A')}** - {result.get('supplier_name', 'N/A')} (₹{result.get('amount', 0):,.2f})\n"
-                    
-                    else:
-                        # Hybrid search (default)
-                        bot_response = st.session_state.ultimate_chatbot.query_database(user_query)
-                    
-                    # Add bot response
-                    st.session_state.chat_history.append({
-                        "role": "assistant", 
-                        "content": bot_response, 
-                        "timestamp": datetime.now(),
-                        "search_mode": search_mode
-                    })
-                
-                except Exception as e:
-                    error_response = f"❌ Sorry, I encountered an error: {str(e)[:100]}..."
-                    st.session_state.chat_history.append({
-                        "role": "assistant", 
-                        "content": error_response, 
-                        "timestamp": datetime.now()
-                    })
-        
-        # Display chat history with enhanced formatting
-        with chat_container:
-            for i, message in enumerate(st.session_state.chat_history):
-                with st.chat_message(message["role"]):
-                    st.markdown(message["content"])
-                    
-                    # Show metadata for assistant messages
-                    if message["role"] == "assistant" and message.get("search_mode"):
-                        st.caption(f"🔧 Mode: {message['search_mode']} • {message['timestamp'].strftime('%H:%M:%S')}")
-        
-        # Suggested queries with categories
-        if not st.session_state.chat_history:
-            st.markdown("### 💡 Try These Smart Queries")
-            
-            query_categories = st.tabs(["📊 Analytics", "🔍 Search", "🤖 AI Insights", "📈 Trends"])
-            
-            with query_categories[0]:
-                analytics_queries = [
-                    "What's our total spending this year?",
-                    "Which supplier do we pay the most?",
-                    "Show me invoices over ₹50,000",
-                    "How many invoices do we process monthly?"
-                ]
-                for i, query in enumerate(analytics_queries):
-                    if st.button(query, key=f"analytics_query{i}"):
-                        st.session_state.chat_history.append({"role": "user", "content": query, "timestamp": datetime.now()})
-                        st.rerun()
-            
-            with query_categories[1]:
-                search_queries = [
-                    "Find technology equipment purchases",
-                    "Show me office supplies invoices",
-                    "Search for consulting services",
-                    "Find invoices from last quarter"
-                ]
-            for i, query in enumerate(search_queries):
-                if st.button(query, key=f"search_ai_query_{i}"):
-                        st.session_state.chat_history.append({"role": "user", "content": query, "timestamp": datetime.now()})
-                        st.rerun()
-            
-            with query_categories[2]:
-                ai_queries = [
-                    "Analyze our spending patterns",
-                    "Identify cost-saving opportunities",
-                    "Compare supplier performance",
-                    "Find unusual invoice patterns"
-                ]
-                for i, query in enumerate(ai_queries):
-                    if st.button(query, key=f"chat_ai_query_{i}"):
-                               
-                        st.session_state.chat_history.append({"role": "user", "content": query, "timestamp": datetime.now()})
-                        st.rerun()
-            
-            with query_categories[3]:
-                trend_queries = [
-                    "Show spending trends over time",
-                    "Which months have highest expenses?",
-                    "How has our supplier diversity changed?",
-                    "Predict next month's spending"
-                ]
-                for i, query in enumerate(trend_queries):
-                    if st.button(query, key=f"trend_query_{i}"):
-                        st.session_state.chat_history.append({"role": "user", "content": query, "timestamp": datetime.now()})
-                        st.rerun()
-    
-    # -------------------------------------------------------------------------
-    # TAB: ANALYTICS - COMPREHENSIVE BUSINESS INTELLIGENCE
-    # -------------------------------------------------------------------------
-    
-    with tabs[3]:
-        st.markdown("## 📊 Advanced Analytics Dashboard")
-        
-        # Load data
-        try:
-            json_data = st.session_state.ultimate_chatbot.load_json_data()
-            invoices = json_data.get("invoices", [])
-            
-            if not invoices:
-                st.warning("📊 No invoice data available. Upload and process some invoices first!")
-                return
-            
-            # Convert to DataFrame
-            df_data = []
-            for inv in invoices:
-                df_data.append({
-                    'invoice_number': inv.get('invoice_number', ''),
-                    'supplier_name': inv.get('supplier_name', ''),
-                    'buyer_name': inv.get('buyer_name', ''),
-                    'amount': inv.get('amount', 0),
-                    'quantity': inv.get('quantity', 0),
-                    'date': inv.get('date', ''),
-                    'extraction_confidence': inv.get('extraction_info', {}).get('confidence', 0),
-                    'created_at': inv.get('timestamps', {}).get('created_at', ''),
-                    'product_description': inv.get('product_description', '')
-                })
-            
-            df = pd.DataFrame(df_data)
-            
-            # KPI Dashboard
-            st.markdown("### 🎯 Key Performance Indicators")
-            
-            kpi_col1, kpi_col2, kpi_col3, kpi_col4, kpi_col5 = st.columns(5)
-            
-            with kpi_col1:
-                total_invoices = len(df)
-                st.metric("📄 Total Invoices", f"{total_invoices:,}")
-            
-            with kpi_col2:
-                total_amount = df['amount'].sum()
-                avg_amount = df['amount'].mean()
-                st.metric("💰 Total Value", f"₹{total_amount:,.0f}", f"Avg: ₹{avg_amount:,.0f}")
-            
-            with kpi_col3:
-                unique_suppliers = df['supplier_name'].nunique()
-                top_supplier_pct = (df['supplier_name'].value_counts().iloc[0] / len(df) * 100) if len(df) > 0 else 0
-                st.metric("🏢 Suppliers", f"{unique_suppliers:,}", f"Top: {top_supplier_pct:.1f}%")
-            
-            with kpi_col4:
-                avg_confidence = df['extraction_confidence'].mean()
-                high_confidence = (df['extraction_confidence'] > 0.8).sum()
-                st.metric("🎯 Avg Confidence", f"{avg_confidence:.1%}", f"High: {high_confidence}")
-            
-            with kpi_col5:
-                # Processing efficiency
-                success_rate = (df['invoice_number'].notna()).mean()
-                vector_indexed = len(st.session_state.ultimate_processor.vector_store.document_metadata) if st.session_state.ultimate_processor.vector_store else 0
-                st.metric("⚡ Success Rate", f"{success_rate:.1%}", f"Indexed: {vector_indexed}")
-            
-            # Advanced Analytics Tabs
-            analytics_tabs = st.tabs([
-                "📈 Trends", 
-                "🏢 Suppliers", 
-                "💰 Financial", 
-                "🔍 Quality", 
-                "📊 Patterns"
-            ])
-            
-            with analytics_tabs[0]:
-                st.markdown("#### 📈 Spending Trends")
-                
-                # Time series analysis
-                if 'date' in df.columns and df['date'].notna().any():
-                    df['date_parsed'] = pd.to_datetime(df['date'], errors='coerce')
-                    df_dated = df.dropna(subset=['date_parsed'])
-                    
-                    if len(df_dated) > 0:
-                        # Monthly trends
-                        monthly_data = df_dated.groupby(df_dated['date_parsed'].dt.to_period('M')).agg({
-                            'amount': ['sum', 'mean', 'count']
-                        }).round(2)
-                        
-                        monthly_data.columns = ['Total Amount', 'Average Amount', 'Invoice Count']
-                        monthly_data.index = monthly_data.index.astype(str)          
-                        # Create trend charts
-                        fig_trend = px.line(
-                            x=monthly_data.index, 
-                            y=monthly_data['Total Amount'],
-                            title="Monthly Spending Trend",
-                            labels={'x': 'Month', 'y': 'Total Amount (₹)'}
-                        )
-                        st.plotly_chart(fig_trend, use_container_width=True)
-                        
-                        # Show data table
-                        st.dataframe(monthly_data, use_container_width=True)
-                    else:
-                        st.info("No valid dates found for trend analysis")
-                else:
-                    st.info("Date information not available for trend analysis")
-            
-            with analytics_tabs[1]:
-                st.markdown("#### 🏢 Supplier Analysis")
-                
-                # Supplier performance metrics
-                supplier_analysis = df.groupby('supplier_name').agg({
-                    'amount': ['sum', 'mean', 'count'],
-                    'extraction_confidence': 'mean'
-                }).round(2)
-                
-                supplier_analysis.columns = ['Total Spent', 'Avg Invoice', 'Invoice Count', 'Avg Confidence']
-                supplier_analysis = supplier_analysis.sort_values('Total Spent', ascending=False)
-                
-                # Top suppliers visualization
-                top_suppliers = supplier_analysis.head(10)
-                
-                col1, col2 = st.columns(2)
-                
-                with col1:
-                    fig_suppliers = px.bar(
-                        x=top_suppliers['Total Spent'],
-                        y=top_suppliers.index,
-                        orientation='h',
-                        title="Top 10 Suppliers by Total Spending",
-                        labels={'x': 'Total Spent (₹)', 'y': 'Supplier'}
-                    )
-                    st.plotly_chart(fig_suppliers, use_container_width=True)
-                
-                with col2:
-                    fig_count = px.bar(
-                        x=top_suppliers['Invoice Count'],
-                        y=top_suppliers.index,
-                        orientation='h',
-                        title="Top 10 Suppliers by Invoice Volume",
-                        labels={'x': 'Invoice Count', 'y': 'Supplier'}
-                    )
-                    st.plotly_chart(fig_count, use_container_width=True)
-                
-                # Supplier performance table
-                st.markdown("**Supplier Performance Summary:**")
-                st.dataframe(
-                    supplier_analysis.head(20),
-                    column_config={
-                        "Total Spent": st.column_config.NumberColumn("Total Spent", format="₹%.2f"),
-                        "Avg Invoice": st.column_config.NumberColumn("Avg Invoice", format="₹%.2f"),
-                        "Avg Confidence": st.column_config.ProgressColumn("Avg Confidence", min_value=0, max_value=1),
-                    }
-                )
-            
-            with analytics_tabs[2]:
-                st.markdown("#### 💰 Financial Analysis")
-                
-                # Amount distribution
-                col1, col2 = st.columns(2)
-                
-                with col1:
-                    # Amount histogram
-                    fig_hist = px.histogram(
-                        df, 
-                        x='amount', 
-                        nbins=30,
-                        title="Invoice Amount Distribution",
-                        labels={'x': 'Amount (₹)', 'y': 'Frequency'}
-                    )
-                    st.plotly_chart(fig_hist, use_container_width=True)
-                
-                with col2:
-                    # Box plot for amount ranges
-                    fig_box = px.box(
-                        df, 
-                        y='amount',
-                        title="Invoice Amount Range Analysis",
-                        labels={'y': 'Amount (₹)'}
-                    )
-                    st.plotly_chart(fig_box, use_container_width=True)
-                
-                # Financial summary statistics
-                st.markdown("**Financial Statistics:**")
-                
-                financial_stats = {
-                    "Total Value": f"₹{df['amount'].sum():,.2f}",
-                    "Average Invoice": f"₹{df['amount'].mean():,.2f}",
-                    "Median Invoice": f"₹{df['amount'].median():,.2f}",
-                    "Largest Invoice": f"₹{df['amount'].max():,.2f}",
-                    "Smallest Invoice": f"₹{df['amount'].min():,.2f}",
-                    "Standard Deviation": f"₹{df['amount'].std():,.2f}"
-                }
-                
-                stat_cols = st.columns(3)
-                for i, (stat, value) in enumerate(financial_stats.items()):
-                    with stat_cols[i % 3]:
-                        st.metric(stat, value)
-                
-                # High-value invoice analysis
-                high_value_threshold = df['amount'].quantile(0.9)
-                high_value_invoices = df[df['amount'] >= high_value_threshold]
-                
-                if len(high_value_invoices) > 0:
-                    st.markdown(f"**High-Value Invoices (Top 10% - Above ₹{high_value_threshold:,.2f}):**")
-                    st.dataframe(
-                        high_value_invoices[['invoice_number', 'supplier_name', 'amount', 'date']].sort_values('amount', ascending=False),
-                        column_config={
-                            "amount": st.column_config.NumberColumn("Amount", format="₹%.2f")
-                        }
-                    )
-            
-            with analytics_tabs[3]:
-                st.markdown("#### 🔍 Data Quality Analysis")
-                
-                # Data completeness analysis
-                completeness = {}
-                for col in ['invoice_number', 'supplier_name', 'buyer_name', 'amount', 'date']:
-                    if col in df.columns:
-                        completeness[col] = (df[col].notna() & (df[col] != '')).mean()
-                
-                # Quality metrics
-                col1, col2 = st.columns(2)
-                
-                with col1:
-                    st.markdown("**Data Completeness:**")
-                    for field, percentage in completeness.items():
-                        st.progress(percentage, text=f"{field}: {percentage:.1%}")
-                
-                with col2:
-                    # Confidence distribution
-                    fig_confidence = px.histogram(
-                        df, 
-                        x='extraction_confidence',
-                        nbins=20,
-                        title="Extraction Confidence Distribution",
-                        labels={'x': 'Confidence Score', 'y': 'Count'}
-                    )
-                    st.plotly_chart(fig_confidence, use_container_width=True)
-                
-                # Quality issues identification
-                st.markdown("**Quality Issues:**")
-                
-                issues = []
-                
-                # Missing data
-                missing_invoice_numbers = (df['invoice_number'].isna() | (df['invoice_number'] == '')).sum()
-                if missing_invoice_numbers > 0:
-                    issues.append(f"❌ {missing_invoice_numbers} invoices missing invoice numbers")
-                
-                missing_amounts = (df['amount'].isna() | (df['amount'] == 0)).sum()
-                if missing_amounts > 0:
-                    issues.append(f"❌ {missing_amounts} invoices missing amounts")
-                
-                low_confidence = (df['extraction_confidence'] < 0.5).sum()
-                if low_confidence > 0:
-                    issues.append(f"⚠️ {low_confidence} invoices with low confidence (<50%)")
-                
-                # Display issues
-                if issues:
-                    for issue in issues:
-                        st.write(issue)
-                else:
-                    st.success("✅ No major quality issues detected!")
-            
-            with analytics_tabs[4]:
-                st.markdown("#### 📊 Pattern Analysis")
-                
-                # Advanced pattern analysis
-                col1, col2 = st.columns(2)
-                
-                with col1:
-                    # Amount vs Confidence correlation
-                    if len(df) > 1:
-                        correlation = df['amount'].corr(df['extraction_confidence'])
-                        
-                        fig_scatter = px.scatter(
-                            df, 
-                            x='extraction_confidence', 
-                            y='amount',
-                            title=f"Amount vs Confidence (Correlation: {correlation:.2f})",
-                            labels={'x': 'Extraction Confidence', 'y': 'Amount (₹)'}
-                        )
-                        st.plotly_chart(fig_scatter, use_container_width=True)
-                
-                with col2:
-                    # Supplier diversity over time
-                    if 'date' in df.columns:
-                        try:
-                            df['date_parsed'] = pd.to_datetime(df['date'], errors='coerce')
-                            df_dated = df.dropna(subset=['date_parsed'])
-                            
-                            if len(df_dated) > 0:
-                                monthly_suppliers = df_dated.groupby(df_dated['date_parsed'].dt.to_period('M'))['supplier_name'].nunique()
-                                
-                                fig_diversity = px.line(
-                                    x=monthly_suppliers.index.astype(str), 
-                                    y=monthly_suppliers.values,
-                                    title="Supplier Diversity Over Time",
-                                    labels={'x': 'Month', 'y': 'Unique Suppliers'}
-                                )
-                                st.plotly_chart(fig_diversity, use_container_width=True)
-                        except:
-                            st.info("Could not analyze supplier diversity over time")
-                
-                # Pattern insights
-                st.markdown("**Pattern Insights:**")
-                
-                insights = []
-                
-                # Most common amount ranges
-                amount_ranges = pd.cut(df['amount'], bins=5, labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])
-                most_common_range = amount_ranges.value_counts().index[0]
-                insights.append(f"📊 Most invoices fall in the '{most_common_range}' amount range")
-                
-                # Supplier concentration
-                top_supplier_share = (df['supplier_name'].value_counts().iloc[0] / len(df)) if len(df) > 0 else 0
-                if top_supplier_share > 0.3:
-                    insights.append(f"⚠️ High supplier concentration: Top supplier represents {top_supplier_share:.1%} of invoices")
-                else:
-                    insights.append(f"✅ Good supplier diversity: Top supplier represents {top_supplier_share:.1%} of invoices")
-                
-                # Confidence patterns
-                avg_confidence = df['extraction_confidence'].mean()
-                if avg_confidence > 0.8:
-                    insights.append(f"✅ High extraction quality: Average confidence is {avg_confidence:.1%}")
-                elif avg_confidence > 0.6:
-                    insights.append(f"⚠️ Moderate extraction quality: Average confidence is {avg_confidence:.1%}")
-                else:
-                    insights.append(f"❌ Low extraction quality: Average confidence is {avg_confidence:.1%}")
-                
-                for insight in insights:
-                    st.write(insight)
-        
-        except Exception as e:
-            st.error(f"Error in analytics: {e}")
-    
-    # -------------------------------------------------------------------------
-    # TAB: SEARCH - ADVANCED SEMANTIC SEARCH
-    # -------------------------------------------------------------------------
-    
-    with tabs[4]:
-        st.markdown("## 🔍 Advanced Semantic Search")
-        
-        vector_store = st.session_state.ultimate_processor.vector_store
-        
-        if not vector_store or not vector_store.document_metadata:
-            st.warning("🔍 No documents in vector store. Please upload and process some invoices first.")
-            return
-        
-        # Search interface
-        st.markdown("### 🎯 Natural Language Search")
-        
-        col1, col2, col3 = st.columns([3, 1, 1])
-        
-        with col1:
-            search_query = st.text_input(
-                "Search Query:",
-                placeholder="e.g., expensive technology equipment, office supplies under 5000, consulting services from last quarter",
-                help="Use natural language to describe what you're looking for"
-            )
-        
-        with col2:
-            top_k = st.number_input("Results", min_value=1, max_value=50, value=10)
-        
-        with col3:
-            similarity_threshold = st.slider("Min Similarity", 0.0, 1.0, 0.1, 0.05)
-        
-        # Advanced filters
-        with st.expander("🔧 Advanced Filters"):
-            filter_col1, filter_col2, filter_col3 = st.columns(3)
-            
-            with filter_col1:
-                # Amount filter
-                amount_range = st.slider(
-                    "Amount Range (₹):",
-                    min_value=0,
-                    max_value=100000,
-                    value=(0, 100000),
-                    step=1000
-                )
-            
-            with filter_col2:
-                # Date filter
-                date_filter = st.date_input(
-                    "Date Range:",
-                    value=None,
-                    help="Filter by invoice date range"
-                )
-            
-            with filter_col3:
-                # Supplier filter
-                all_suppliers = [meta.get('supplier_name', '') for meta in vector_store.document_metadata if meta.get('supplier_name')]
-                supplier_filter = st.multiselect(
-                    "Suppliers:",
-                    options=list(set(all_suppliers)),
-                    help="Filter by specific suppliers"
-                )
-        
-        # Perform search
-        if search_query:
-            with st.spinner("🔍 Searching with AI..."):
-                try:
-                    # Get search results
-                    results = vector_store.semantic_search(search_query, top_k * 2)  # Get more for filtering
-                    
-                    # Apply filters
-                    filtered_results = []
-                    for result in results:
-                        # Similarity filter
-                        if result.similarity_score < similarity_threshold:
-                            continue
-                        
-                        # Amount filter
-                        amount = result.metadata.get('amount', 0)
-                        if not (amount_range[0] <= amount <= amount_range[1]):
-                            continue
-                        
-                        # Supplier filter
-                        if supplier_filter and result.supplier_name not in supplier_filter:
-                            continue
-                        
-                        # Date filter (if implemented)
-                        # Add date filtering logic here if needed
-                        
-                        filtered_results.append(result)
-                    
-                    # Display results
-                    if filtered_results:
-                        st.success(f"🎯 Found {len(filtered_results)} matching documents")
-                        
-                        # Results summary
-                        col1, col2, col3 = st.columns(3)
-                        with col1:
-                            avg_similarity = sum(r.similarity_score for r in filtered_results) / len(filtered_results)
-                            st.metric("Avg Similarity", f"{avg_similarity:.3f}")
-                        with col2:
-                            total_value = sum(r.metadata.get('amount', 0) for r in filtered_results)
-                            st.metric("Total Value", f"₹{total_value:,.2f}")
-                        with col3:
-                            unique_suppliers = len(set(r.supplier_name for r in filtered_results))
-                            st.metric("Unique Suppliers", unique_suppliers)
-                        
-                        # Display individual results
-                        for i, result in enumerate(filtered_results[:top_k], 1):
-                            with st.expander(
-                                f"{i}. {result.invoice_number} - {result.supplier_name} "
-                                f"(Similarity: {result.similarity_score:.3f})",
-                                expanded=i <= 3
-                            ):
-                                col1, col2 = st.columns(2)
-                                
-                                with col1:
-                                    st.write(f"**Invoice Number:** {result.invoice_number}")
-                                    st.write(f"**Supplier:** {result.supplier_name}")
-                                    st.write(f"**Amount:** ₹{result.metadata.get('amount', 0):,.2f}")
-                                    st.write(f"**Date:** {result.metadata.get('date', 'N/A')}")
-                                
-                                with col2:
-                                    st.write(f"**Similarity Score:** {result.similarity_score:.4f}")
-                                    st.write(f"**File:** {result.metadata.get('file_name', 'N/A')}")
-                                    st.write(f"**Confidence:** {result.metadata.get('extraction_confidence', 0):.1%}")
-                                    st.write(f"**Indexed:** {result.metadata.get('timestamp', 'N/A')[:19]}")
-                                
-                                st.write("**Content Preview:**")
-                                st.text_area(
-                                    "Document content:",
-                                    value=result.content_preview,
-                                    height=80,
-                                    key=f"content_{i}",
-                                    disabled=True
-                                )
-                        
-                        # Export results
-                        if st.button("📥 Export Search Results"):
-                            export_data = []
-                            for result in filtered_results[:top_k]:
-                                export_data.append({
-                                    'search_query': search_query,
-                                    'invoice_number': result.invoice_number,
-                                    'supplier_name': result.supplier_name,
-                                    'similarity_score': result.similarity_score,
-                                    'amount': result.metadata.get('amount', 0),
-                                    'date': result.metadata.get('date', ''),
-                                    'file_name': result.metadata.get('file_name', ''),
-                                    'content_preview': result.content_preview
-                                })
-                            
-                            export_df = pd.DataFrame(export_data)
-                            csv_data = export_df.to_csv(index=False)
-                            
-                            st.download_button(
-                                "📄 Download CSV",
-                                csv_data,
-                                f"search_results_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
-                                "text/csv"
-                            )
-                    
-                    else:
-                        st.warning("🔍 No results found matching your criteria. Try:")
-                        st.write("• Broadening your search terms")
-                        st.write("• Lowering the similarity threshold")
-                        st.write("• Removing some filters")
-                        st.write("• Using different keywords")
-                
-                except Exception as e:
-                    st.error(f"Search error: {e}")
-        
-        # Search suggestions and examples
-        st.markdown("### 💡 Search Examples & Tips")
-        
-        example_tabs = st.tabs(["🎯 By Content", "💰 By Amount", "🏢 By Supplier", "📅 By Time"])
-        
-        with example_tabs[0]:
-            content_examples = [
-                "office supplies and stationery items",
-                "technology equipment and software",
-                "consulting and professional services",
-                "travel and transportation expenses",
-                "maintenance and repair services"
-            ]
-            st.write("**Search by product/service type:**")
-            for i, example in enumerate(content_examples):
-                if st.button(f"🔍 {example}", key=f"content_example_{i}"):
-                    st.text_input("Search Query:", value=example, key="auto_fill_content")
-        
-        with example_tabs[1]:
-            amount_examples = [
-                "high value purchases over 50000",
-                "small expenses under 5000",
-                "medium range invoices between 10000 and 30000",
-                "expensive equipment purchases"
-            ]
-            st.write("**Search by amount range:**")
-            for i, example in enumerate(amount_examples):
-                if st.button(f"💰 {example}", key=f"amount_example_{i}"):
-                    st.text_input("Search Query:", value=example, key="auto_fill_amount")
-        
-        with example_tabs[2]:
-            supplier_examples = [
-                "invoices from technology vendors",
-                "services from consulting companies",
-                "purchases from office supply stores",
-                "payments to maintenance contractors"
-            ]
-            st.write("**Search by supplier type:**")
-            for i, example in enumerate(supplier_examples):
-                if st.button(f"🏢 {example}", key=f"supplier_example_{i}"):
-                    st.text_input("Search Query:", value=example, key="auto_fill_supplier")
-        
-        with example_tabs[3]:
-            time_examples = [
-                "recent invoices from this month",
-                "quarterly expenses and spending",
-                "annual contract payments",
-                "recurring monthly services"
-            ]
-            st.write("**Search by time period:**")
-            for i, example in enumerate(time_examples):
-                if st.button(f"📅 {example}", key=f"time_example_{i}"):
-                    st.text_input("Search Query:", value=example, key="auto_fill_time")
-    
-    # -------------------------------------------------------------------------
-    # TAB: DATA - COMPREHENSIVE DATA MANAGEMENT
-    # -------------------------------------------------------------------------
-    
-    with tabs[5]:
-        st.markdown("## 📋 Data Management & Export")
-        
-        # Data overview
-        try:
-            json_data = st.session_state.ultimate_chatbot.load_json_data()
-            invoices = json_data.get("invoices", [])
-            
-            if not invoices:
-                st.warning("📊 No invoice data available.")
-                return
-            
-            # Convert to DataFrame for display
-            df_data = []
-            for inv in invoices:
-                df_data.append({
-                    'ID': inv.get('id', ''),
-                    'Invoice Number': inv.get('invoice_number', ''),
-                    'Supplier': inv.get('supplier_name', ''),
-                    'Buyer': inv.get('buyer_name', ''),
-                    'Date': inv.get('date', ''),
-                    'Amount': inv.get('amount', 0),
-                    'Quantity': inv.get('quantity', 0),
-                    'Description': inv.get('product_description', ''),
-                    'Confidence': inv.get('extraction_info', {}).get('confidence', 0),
-                    'Method': inv.get('extraction_info', {}).get('extraction_method', ''),
-                    'File Type': inv.get('file_info', {}).get('file_type', ''),
-                    'File Size': inv.get('file_info', {}).get('file_size', 0),
-                    'Vector Indexed': 'Yes' if any(meta.get('invoice_number') == inv.get('invoice_number') 
-                                                  for meta in st.session_state.ultimate_processor.vector_store.document_metadata) else 'No',
-                    'Created': inv.get('timestamps', {}).get('created_at', '')[:19]
-                })
-            
-            df = pd.DataFrame(df_data)
-            
-            # Data summary
-            st.markdown("### 📊 Data Summary")
-            
-            summary_col1, summary_col2, summary_col3, summary_col4 = st.columns(4)
-            
-            with summary_col1:
-                st.metric("Total Records", len(df))
-                st.metric("Complete Records", (df['Invoice Number'].notna() & (df['Invoice Number'] != '')).sum())
-            
-            with summary_col2:
-                st.metric("Total Value", f"₹{df['Amount'].sum():,.2f}")
-                st.metric("Avg Value", f"₹{df['Amount'].mean():,.2f}")
-            
-            with summary_col3:
-                st.metric("Unique Suppliers", df['Supplier'].nunique())
-                st.metric("Unique Buyers", df['Buyer'].nunique())
-            
-            with summary_col4:
-                st.metric("Avg Confidence", f"{df['Confidence'].mean():.1%}")
-                st.metric("Vector Indexed", (df['Vector Indexed'] == 'Yes').sum())
-            
-            # Filtering interface
-            st.markdown("### 🔍 Filter & View Data")
-            
-            filter_col1, filter_col2, filter_col3, filter_col4, filter_col5 = st.columns(5)
-            
-            with filter_col1:
-                suppliers = ['All'] + sorted(df['Supplier'].dropna().unique().tolist())
-                selected_supplier = st.selectbox("Supplier", suppliers)
-            
-            with filter_col2:
-                buyers = ['All'] + sorted(df['Buyer'].dropna().unique().tolist())
-                selected_buyer = st.selectbox("Buyer", buyers)
-            
-            with filter_col3:
-                methods = ['All'] + sorted(df['Method'].dropna().unique().tolist())
-                selected_method = st.selectbox("Method", methods)
-            
-            with filter_col4:
-                confidence_options = ["All", "High (>80%)", "Medium (50-80%)", "Low (<50%)"]
-                confidence_filter = st.selectbox("Confidence", confidence_options)
-            
-            with filter_col5:
-                vector_options = ["All", "Yes", "No"]
-                vector_filter = st.selectbox("Vector Indexed", vector_options)
-            
-            # Apply filters
-            filtered_df = df.copy()
-            
-            if selected_supplier != 'All':
-                filtered_df = filtered_df[filtered_df['Supplier'] == selected_supplier]
-            if selected_buyer != 'All':
-                filtered_df = filtered_df[filtered_df['Buyer'] == selected_buyer]
-            if selected_method != 'All':
-                filtered_df = filtered_df[filtered_df['Method'] == selected_method]
-            if vector_filter != 'All':
-                filtered_df = filtered_df[filtered_df['Vector Indexed'] == vector_filter]
-            
-            if confidence_filter == "High (>80%)":
-                filtered_df = filtered_df[filtered_df['Confidence'] > 0.8]
-            elif confidence_filter == "Medium (50-80%)":
-                filtered_df = filtered_df[(filtered_df['Confidence'] >= 0.5) & (filtered_df['Confidence'] <= 0.8)]
-            elif confidence_filter == "Low (<50%)":
-                filtered_df = filtered_df[filtered_df['Confidence'] < 0.5]
-            
-            # Display filtered data
-            if len(filtered_df) != len(df):
-                st.info(f"Showing {len(filtered_df)} of {len(df)} records")
-            
-            # Data table with enhanced configuration
-            st.dataframe(
-                filtered_df,
-                use_container_width=True,
-                column_config={
-                    "Amount": st.column_config.NumberColumn("Amount", format="₹%.2f"),
-                    "Confidence": st.column_config.ProgressColumn("Confidence", min_value=0, max_value=1),
-                    "File Size": st.column_config.NumberColumn("File Size", format="%d bytes"),
-                    "Vector Indexed": st.column_config.SelectboxColumn("Vector Indexed", options=["Yes", "No"]),
-                },
-                height=400
-            )
-            
-            # Export options
-            st.markdown("### 📥 Export Options")
-            
-            export_col1, export_col2, export_col3, export_col4 = st.columns(4)
-            
-            with export_col1:
-                # CSV Export
-                csv_data = filtered_df.to_csv(index=False)
-                st.download_button(
-                    "📊 Export CSV",
-                    csv_data,
-                    f"invoice_data_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
-                    "text/csv",
-                    use_container_width=True
-                )
-            
-            with export_col2:
-                # JSON Export
-                filtered_invoices = [inv for inv in invoices if inv.get('id') in filtered_df['ID'].values]
-                filtered_json = {
-                    "metadata": json_data.get("metadata", {}),
-                    "invoices": filtered_invoices,
-                    "export_info": {
-                        "exported_at": datetime.now().isoformat(),
-                        "total_records": len(filtered_invoices),
-                        "filters_applied": {
-                            "supplier": selected_supplier if selected_supplier != 'All' else None,
-                            "buyer": selected_buyer if selected_buyer != 'All' else None,
-                            "method": selected_method if selected_method != 'All' else None,
-                            "confidence": confidence_filter if confidence_filter != 'All' else None,
-                            "vector_indexed": vector_filter if vector_filter != 'All' else None
-                        }
-                    }
-                }
-                
-                st.download_button(
-                    "📄 Export JSON",
-                    json.dumps(filtered_json, indent=2, ensure_ascii=False),
-                    f"invoice_data_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
-                    "application/json",
-                    use_container_width=True
-                )
-            
-            with export_col3:
-                # Excel Export (if openpyxl available)
-                try:
-                    import io
-                    excel_buffer = io.BytesIO()
-                    with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
-                        filtered_df.to_excel(writer, sheet_name='Invoice Data', index=False)
-                        
-                        # Add summary sheet
-                        summary_data = {
-                            'Metric': ['Total Records', 'Total Value', 'Unique Suppliers', 'Avg Confidence'],
-                            'Value': [len(filtered_df), f"₹{filtered_df['Amount'].sum():,.2f}", 
-                                     filtered_df['Supplier'].nunique(), f"{filtered_df['Confidence'].mean():.1%}"]
-                        }
-                        pd.DataFrame(summary_data).to_excel(writer, sheet_name='Summary', index=False)
-                    
-                    st.download_button(
-                        "📈 Export Excel",
-                        excel_buffer.getvalue(),
-                        f"invoice_data_{datetime.now().strftime('%Y%m%d_%H%M')}.xlsx",
-                        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-                        use_container_width=True
-                    )
-                except ImportError:
-                    st.button("📈 Excel Export", disabled=True, help="Install openpyxl for Excel export", use_container_width=True)
-            
-            with export_col4:
-                # Vector Metadata Export
-                if st.session_state.ultimate_processor.vector_store:
-                    vector_metadata = [meta for meta in st.session_state.ultimate_processor.vector_store.document_metadata 
-                                     if any(inv.get('invoice_number') == meta.get('invoice_number') for inv in filtered_invoices)]
-                    
-                    st.download_button(
-                        "🔍 Export Vector Metadata",
-                        json.dumps(vector_metadata, indent=2, ensure_ascii=False),
-                        f"vector_metadata_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
-                        "application/json",
-                        use_container_width=True
-                    )
-                    # Data management actions
-            st.markdown("### 🛠️ Data Management Actions")
-            
-            action_col1, action_col2, action_col3, action_col4 = st.columns(4)
-            
-            with action_col1:
-                if st.button("🔄 Refresh Data", use_container_width=True, key="refresh_data_main"):
-                    st.rerun()
-            
-            with action_col2:
-                if st.button("🧹 Clean Duplicates", use_container_width=True, key="clean_duplicates_main"):
-                    # Find and remove duplicates based on invoice number
-                    duplicates = df[df.duplicated('Invoice Number', keep=False)]
-                    if len(duplicates) > 0:
-                        st.warning(f"Found {len(duplicates)} duplicate records")
-                        # Show duplicates for review
-                        st.dataframe(duplicates[['Invoice Number', 'Supplier', 'Amount', 'Created']])
-                    else:
-                        st.success("No duplicates found!")
-            
-            with action_col3:
-                if st.button("📊 Validate Data", use_container_width=True,key="validate_data_main"):
-                    # Data validation
-                    validation_results = []
-                    
-                    # Check for missing critical fields
-                    missing_invoice_numbers = (df['Invoice Number'].isna() | (df['Invoice Number'] == '')).sum()
-                    if missing_invoice_numbers > 0:
-                        validation_results.append(f"❌ {missing_invoice_numbers} records missing invoice numbers")
-                    
-                    missing_amounts = (df['Amount'].isna() | (df['Amount'] == 0)).sum()
-                    if missing_amounts > 0:
-                        validation_results.append(f"❌ {missing_amounts} records missing amounts")
-                    
-                    low_confidence = (df['Confidence'] < 0.5).sum()
-                    if low_confidence > 0:
-                        validation_results.append(f"⚠️ {low_confidence} records with low confidence")
-                    
-                    # Check for unusual patterns
-                    very_high_amounts = (df['Amount'] > df['Amount'].quantile(0.99)).sum()
-                    if very_high_amounts > 0:
-                        validation_results.append(f"🔍 {very_high_amounts} unusually high amounts detected")
-                    
-                    # Display results
-                    if validation_results:
-                        for result in validation_results:
-                            st.write(result)
-                    else:
-                        st.success("✅ All validations passed!")
-            
-            with action_col4:
-                if st.button("⚠️ Clear All Data", use_container_width=True,key="clear_all_data_main"):
-                    # Confirmation dialog
-                    if st.button("🗑️ Confirm Delete All", type="secondary"):
-                        try:
-                            # Clear database
-                            conn = sqlite3.connect(st.session_state.ultimate_processor.db_path)
-                            cursor = conn.cursor()
-                            cursor.execute("DELETE FROM invoices")
-                            cursor.execute("DELETE FROM file_processing_log")
-                            cursor.execute("DELETE FROM processing_summary")
-                            conn.commit()
-                            conn.close()
-                            
-                            # Reset JSON file
-                            st.session_state.ultimate_processor.setup_json_storage()
-                            
-                            # Clear vector store
-                            vector_store = st.session_state.ultimate_processor.vector_store
-                            if vector_store:
-                                vector_store.vector_store = faiss.IndexFlatIP(vector_store.embedding_dimension)
-                                vector_store.document_metadata = []
-                                vector_store.save_vector_store()
-                            
-                            st.success("✅ All data cleared successfully!")
-                            st.rerun()
-                        except Exception as e:
-                            st.error(f"Error clearing data: {e}")
-        
-        except Exception as e:
-            st.error(f"Error in data management: {e}")
-    # -------------------------------------------------------------------------
-    # TAB: ADMIN - SYSTEM ADMINISTRATION
-    # -------------------------------------------------------------------------
-    
-    with tabs[6]:
-        st.markdown("## 🎛️ System Administration")
-        
-        # Admin tabs
-        admin_tabs = st.tabs([
-            "🏥 Health Monitor", 
-            "⚡ Performance", 
-            "💾 Backup & Restore", 
-            "🚀 Deployment", 
-            "🔧 Settings",
-            "📊 Logs"
-        ])
-        
-        with admin_tabs[0]:
-            create_system_health_dashboard()
-        
-        with admin_tabs[1]:
-            performance_benchmark()
-        
-        with admin_tabs[2]:
-            create_backup_system()
-        
-        with admin_tabs[3]:
-            create_deployment_guide()
-        
-        with admin_tabs[4]:
-            st.markdown("### ⚙️ System Settings")
-            
-            # Model settings
-            st.markdown("#### 🤖 AI Model Configuration")
-            
-            current_embedding_model = st.session_state.ultimate_processor.vector_store.embedding_model_name
-            
-            new_embedding_model = st.selectbox(
-                "Embedding Model:",
-                [
-                    "all-MiniLM-L6-v2",
-                    "all-mpnet-base-v2", 
-                    "multi-qa-mpnet-base-dot-v1",
-                    "all-distilroberta-v1",
-                    "paraphrase-multilingual-mpnet-base-v2"
-                ],
-                index=0 if current_embedding_model == "all-MiniLM-L6-v2" else 0
-            )
-            
-            if st.button("🔄 Update Embedding Model"):
-                if new_embedding_model != current_embedding_model:
-                    with st.spinner(f"Switching to {new_embedding_model}..."):
-                        try:
-                            # Create new vector store
-                            new_vector_store = InvoiceVectorStore(embedding_model=new_embedding_model)
-                            
-                            # Rebuild with existing data
-                            json_data = st.session_state.ultimate_chatbot.load_json_data()
-                            if new_vector_store.rebuild_vector_store(json_data):
-                                st.session_state.ultimate_processor.vector_store = new_vector_store
-                                st.session_state.ultimate_chatbot.vector_store = new_vector_store
-                                st.success(f"✅ Switched to {new_embedding_model}")
-                                st.rerun()
-                            else:
-                                st.error("Failed to rebuild vector store")
-                        except Exception as e:
-                            st.error(f"Error switching model: {e}")
-            
-            # Processing settings
-            st.markdown("#### ⚙️ Processing Configuration")
-            
-            col1, col2 = st.columns(2)
-            
-            with col1:
-                batch_size = st.number_input("Batch Size", min_value=1, max_value=100, value=10)
-                max_text_length = st.number_input("Max Text Length", min_value=1000, max_value=20000, value=5000)
-                
-            with col2:
-                similarity_threshold = st.slider("Default Similarity Threshold", 0.0, 1.0, 0.1, 0.05)
-                auto_backup_enabled = st.checkbox("Enable Auto Backup", value=False)
-            
-            if st.button("💾 Save Settings", key="save_settings_user"):
-                settings = {
-                    "batch_size": batch_size,
-                    "max_text_length": max_text_length,
-                    "similarity_threshold": similarity_threshold,
-                    "auto_backup_enabled": auto_backup_enabled,
-                    "embedding_model": new_embedding_model,
-                    "updated_at": datetime.now().isoformat()
-                }
-                
-                # Save to session state and file
-                st.session_state.system_settings = settings
-                
-                try:
-                    with open("system_settings.json", "w") as f:
-                        json.dump(settings, f, indent=2)
-                    st.success("✅ Settings saved successfully!")
-                except Exception as e:
-                    st.warning(f"Settings saved to session but file save failed: {e}")
-        
-        with admin_tabs[5]:
-            st.markdown("### 📊 System Logs")
-            
-            # Log viewer
-            log_type = st.selectbox("Log Type", ["Application", "Processing", "Errors", "Performance"])
-            
-            # Simulated logs (in real implementation, read from log files)
-            if log_type == "Application":
-                logs = [
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - INFO - System initialized successfully",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - INFO - Vector store loaded with 150 documents",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - INFO - User query processed: 'show me all invoices'",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - INFO - Analytics dashboard accessed",
-                ]
-            elif log_type == "Processing":
-                logs = [
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - INFO - Processing invoice_001.pdf",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - SUCCESS - Extracted data from invoice_001.pdf",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - INFO - Added to vector store",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - INFO - Processing complete",
-                ]
-            elif log_type == "Errors":
-                logs = [
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - ERROR - Failed to process corrupt_file.pdf",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - WARNING - Low extraction confidence for invoice_045.pdf",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - ERROR - Ollama connection timeout",
-                ]
-            else:  # Performance
-                logs = [
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - PERF - Query response time: 1.2s",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - PERF - Vector search: 0.8s",
-                    f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - PERF - Memory usage: 75%",
-                ]
-            
-            # Display logs
-            log_container = st.container()
-            with log_container:
-                for log in logs[-50:]:  # Show last 50 entries
-                    if "ERROR" in log:
-                        st.error(log)
-                    elif "WARNING" in log:
-                        st.warning(log)
-                    elif "SUCCESS" in log:
-                        st.success(log)
-                    else:
-                        st.info(log)
-            
-            # Log controls
-            col1, col2, col3 = st.columns(3)
-            
-            with col1:
-                if st.button("🔄 Refresh Logs"):
-                    st.rerun()
-            
-            with col2:
-                if st.button("📥 Export Logs"):
-                    log_data = "\n".join(logs)
-                    st.download_button(
-                        "Download Log File",
-                        log_data,
-                        f"{log_type.lower()}_logs_{datetime.now().strftime('%Y%m%d_%H%M')}.txt",
-                        "text/plain"
-                    )
-            
-            with col3:
-                if st.button("🧹 Clear Logs"):
-                    st.info("Logs cleared (simulated)")
-    
-    # -------------------------------------------------------------------------
-    # OPTIONAL: DASHBOARD TAB
-    # -------------------------------------------------------------------------
-    
-    if st.session_state.get('show_dashboard', False) and len(tabs) > 6:
-        with tabs[7]:
-            create_system_dashboard()
-    
-    # -------------------------------------------------------------------------
-    # FOOTER
-    # -------------------------------------------------------------------------
-    
-    st.markdown("---")
-    st.markdown("""
-    <div style="text-align: center; color: #666; margin-top: 2rem;">
-        <p>🚀 <strong>Ultimate Invoice Processing System</strong> - Powered by AI & Vector Search</p>
-        <p>Built with ❤️ using Streamlit, spaCy, FAISS, and Ollama</p>
-    </div>
-    """, unsafe_allow_html=True)
-
-
-# ===============================================================================
-# MAIN ENTRY POINT AND CONFIGURATION
-# ===============================================================================
-
-def main():
-    """Main entry point - choose between original or ultimate version"""
-    
-    # Check if we should run ultimate version
-    if st.query_params.get("version") == "ultimate" or st.session_state.get("use_ultimate", False):
-        ultimate_enhanced_main()
-    else:
-        # Show version selector
-        st.title("📄 Invoice Processing System")
-        
-        col1, col2 = st.columns(2)
-        
-        with col1:
-            st.markdown("""
-            ### 🔧 Standard Version
-            - Basic invoice processing
-            - Simple analytics
-            - Regular features
-            """)
-            if st.button("Use Standard Version", use_container_width=True):
-                enhanced_main()  # From previous parts
-        
-        with col2:
-            st.markdown("""
-            ### 🚀 Ultimate Version
-            - AI-powered extraction
-            - Semantic search
-            - Advanced analytics
-            - Production features
-            """)
-            if st.button("Use Ultimate Version", use_container_width=True, type="primary"):
-                st.session_state.use_ultimate = True
-                st.rerun()
-
-
-# ===============================================================================
-# UTILITY FUNCTIONS AND HELPERS
-# ===============================================================================
-
-def initialize_system():
-    """Initialize the complete system with all components"""
-    try:
-        # Check dependencies
-        requirements_met, issues = validate_system_requirements()
-        if not requirements_met:
-            st.error("System requirements not met:")
-            for issue in issues:
-                st.write(issue)
-            return False
-        
-        # Initialize logging
-        logger = setup_advanced_logging()
-        logger.info("System initialization started")
-        
-        # Load configuration
-        try:
-            with open("system_settings.json", "r") as f:
-                settings = json.load(f)
-                st.session_state.system_settings = settings
-        except FileNotFoundError:
-            # Use default settings
-            st.session_state.system_settings = {
-                "batch_size": 10,
-                "max_text_length": 5000,
-                "similarity_threshold": 0.1,
-                "embedding_model": "all-MiniLM-L6-v2"
-            }
-        
-        logger.info("System initialization completed successfully")
-        return True
-        
-    except Exception as e:
-        st.error(f"System initialization failed: {e}")
-        return False
-
-def create_installation_guide():
-    """Create comprehensive installation guide"""
-    st.markdown("""
-    ## 🛠️ Installation Guide
-    
-    ### Quick Start (5 minutes)
-    
-    ```bash
-    # 1. Clone the repository
-    git clone https://github.com/your-repo/enhanced-invoice-system
-    cd enhanced-invoice-system
-    
-    # 2. Install Python dependencies
-    pip install -r requirements.txt
-    
-    # 3. Install and start Ollama
-    curl -fsSL https://ollama.com/install.sh | sh
-    ollama serve
-    ollama pull mistral:7b
-    
-    # 4. Run the application
-    streamlit run enhanced_main.py
-    ```
-    
-    ### System Requirements
-    - Python 3.8+
-    - 8GB RAM (16GB recommended)
-    - 10GB disk space
-    - Internet connection (for initial setup)
-    
-    ### Optional GPU Setup
-    ```bash
-    # For GPU acceleration (NVIDIA)
-    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
-    pip install faiss-gpu
-    ```
-    
-    ### Docker Setup
-    ```bash
-    # Build and run with Docker
-    docker-compose up --build
-    ```
-    
-    ### Troubleshooting
-    - Ensure Ollama is running: `curl http://localhost:11434/api/tags`
-    - Check Python version: `python --version`
-    - Verify dependencies: `pip list`
-    """)
-
-def generate_api_endpoints():
-    """Generate FastAPI endpoints for the system"""
-    
-    api_code = '''
-from fastapi import FastAPI, UploadFile, File, HTTPException
-from pydantic import BaseModel
-from typing import List, Optional
-import uvicorn
-
-app = FastAPI(title="Enhanced Invoice Processing API", version="1.0.0")
-
-# Initialize the invoice processor
-processor = EnhancedInvoiceProcessor()
-
-class SearchRequest(BaseModel):
-    query: str
-    top_k: int = 5
-    similarity_threshold: float = 0.1
-
-class SearchResponse(BaseModel):
-    results: List[dict]
-    total_found: int
-    query_time: float
-
-@app.post("/api/upload", response_model=dict)
-async def upload_invoice(file: UploadFile = File(...)):
-    """Upload and process an invoice"""
-    try:
-        # Save uploaded file
-        file_content = await file.read()
-        
-        # Process with the enhanced processor
-        result = processor.process_file(file.filename, len(file_content))
-        
-        return {
-            "success": True,
-            "invoice_data": result.__dict__,
-            "message": "Invoice processed successfully"
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-@app.post("/api/search", response_model=SearchResponse)
-async def semantic_search(request: SearchRequest):
-    """Perform semantic search on invoices"""
-    try:
-        import time
-        start_time = time.time()
-        
-        # Perform search
-        results = processor.vector_store.semantic_search(
-            request.query, 
-            request.top_k
-        )
-        
-        query_time = time.time() - start_time
-        
-        return SearchResponse(
-            results=[r.__dict__ for r in results],
-            total_found=len(results),
-            query_time=query_time
-        )
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-@app.get("/api/analytics/summary")
-async def get_analytics_summary():
-    """Get system analytics summary"""
-    try:
-        json_data = processor.load_json_data()
-        summary = json_data.get("summary", {})
-        
-        return {
-            "total_invoices": len(json_data.get("invoices", [])),
-            "total_amount": summary.get("total_amount", 0),
-            "unique_suppliers": len(summary.get("unique_suppliers", [])),
-            "processing_stats": summary.get("processing_stats", {})
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-@app.get("/api/health")
-async def health_check():
-    """System health check"""
-    return {
-        "status": "healthy",
-        "timestamp": datetime.now().isoformat(),
-        "components": {
-            "vector_store": "ok" if processor.vector_store else "error",
-            "database": "ok" if os.path.exists(processor.db_path) else "error",
-            "embedding_model": "ok" if processor.vector_store.embedding_model else "error"
-        }
-    }
-
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)
-'''
-    
-    return api_code
-
-def create_readme_documentation():
-    """Generate comprehensive README.md content"""
-    
-    readme_content = f'''
-# 🚀 Enhanced Invoice Processing System
-
-A comprehensive AI-powered invoice processing system with semantic search capabilities, advanced analytics, and production-ready features.
-
-## ✨ Features
-
-- **🤖 AI-Powered Extraction**: Uses advanced NLP models for accurate data extraction
-- **🔍 Semantic Search**: Natural language search with vector similarity
-- **📊 Advanced Analytics**: Comprehensive business intelligence dashboards
-- **💾 Hybrid Storage**: SQLite + JSON + Vector embeddings
-- **🎛️ Admin Interface**: Complete system management tools
-- **🚀 Production Ready**: Docker support, monitoring, backup/restore
-
-## 🏗️ Architecture
-
-```
-┌─────────────────┐    ┌─────────────────┐    ┌─────────────────┐
-│   PDF Upload    │───▶│  AI Extraction  │───▶│  Data Storage   │
-└─────────────────┘    └─────────────────┘    └─────────────────┘
-                                                        │
-┌─────────────────┐    ┌─────────────────┐    ┌─────────────────┐
-│  Search & Chat  │◀───│ Vector Embeddings│◀───│  JSON + SQLite  │
-└─────────────────┘    └─────────────────┘    └─────────────────┘
-```
-
-## 🚀 Quick Start
-
-### Option 1: Local Installation
-
-1. **Install Dependencies**
-   ```bash
-   git clone https://github.com/your-repo/enhanced-invoice-system
-   cd enhanced-invoice-system
-   pip install -r requirements.txt
-   ```
-
-2. **Setup Ollama**
-   ```bash
-   curl -fsSL https://ollama.com/install.sh | sh
-   ollama serve
-   ollama pull mistral:7b
-   ```
-
-3. **Run the Application**
-   ```bash
-   streamlit run enhanced_main.py
-   ```
-
-### Option 2: Docker
-
-```bash
-docker-compose up --build
-```
-
-## 📊 Usage Examples
-
-### Basic Upload and Processing
-```python
-from enhanced_invoice_pipeline import EnhancedInvoiceProcessor
-
-# Initialize processor
-processor = EnhancedInvoiceProcessor()
-
-# Process invoice
-result = processor.process_file("invoice.pdf")
-print(f"Extracted: {{result.invoice_number}}")
-```
-
-### Semantic Search
-```python
-# Search invoices
-results = processor.vector_store.semantic_search(
-    "high value technology purchases", 
-    top_k=5
-)
-
-for result in results:
-    print(f"Found: {{result.invoice_number}} (Score: {{result.similarity_score}})")
-```
-
-### Analytics Query
-```python
-# Get comprehensive summary
-chatbot = EnhancedInvoiceChatBot()
-response = chatbot.query_database("Show me spending trends over time")
-print(response)
-```
-
-## 🛠️ Configuration
-
-### Environment Variables
-```bash
-export OLLAMA_HOST=localhost:11434
-export EMBEDDING_MODEL=all-MiniLM-L6-v2
-export VECTOR_STORE_PATH=./data/vectors
-export DATABASE_PATH=./data/invoices.db
-```
-
-### Custom Settings
-```json
-{{
-  "batch_size": 10,
-  "max_text_length": 5000,
-  "similarity_threshold": 0.1,
-  "auto_backup_enabled": true
-}}
-```
-
-## 📡 API Reference
-
-### REST Endpoints
-
-- `POST /api/upload` - Upload and process invoice
-- `POST /api/search` - Semantic search
-- `GET /api/analytics/summary` - Get analytics summary
-- `GET /api/health` - Health check
-
-### Python API
-
-```python
-# Core classes
-from enhanced_invoice_pipeline import (
-    EnhancedInvoiceProcessor,
-    InvoiceVectorStore, 
-    EnhancedInvoiceChatBot
-)
-
-# Initialize components
-processor = EnhancedInvoiceProcessor()
-vector_store = InvoiceVectorStore()
-chatbot = EnhancedInvoiceChatBot()
-```
-
-## 🔧 Advanced Features
-
-### Custom NER Models
-Train your own spaCy NER model for domain-specific extraction:
-
-```python
-# Training data format
-TRAINING_DATA = [
-    ("Invoice Number: INV-2024-001", {{"entities": [(16, 27, "INVOICE_NUMBER")]}}),
-    ("Total Amount: $1,250.00", {{"entities": [(14, 23, "TOTAL_AMOUNT")]}}),
-]
-
-# Train and use custom model
-nlp = train_custom_ner_model(TRAINING_DATA)
-processor = EnhancedInvoiceProcessor(ner_model_path="./custom_model")
-```
-
-### Vector Store Customization
-```python
-# Use different embedding models
-vector_store = InvoiceVectorStore(
-    embedding_model="all-mpnet-base-v2"  # Higher quality
-)
-
-# Custom similarity search
-results = vector_store.semantic_search(
-    query="office supplies",
-    top_k=10,
-    similarity_threshold=0.3
-)
-```
-
-## 📊 Monitoring & Analytics
-
-### Built-in Dashboards
-- **📈 Processing Analytics**: Success rates, processing times
-- **💰 Financial Analytics**: Spending trends, supplier analysis  
-- **🔍 Search Analytics**: Query patterns, result quality
-- **⚙️ System Health**: Resource usage, component status
-
-### Custom Metrics
-```python
-# Get system statistics
-stats = processor.get_system_stats()
-print(f"Total processed: {{stats['total_invoices']}}")
-print(f"Success rate: {{stats['success_rate']}}")
-```
-
-## 🔒 Security & Compliance
-
-### Data Protection
-- Encrypted storage options
-- Access control and authentication
-- Audit logging
-- GDPR compliance features
-
-### Deployment Security
-```yaml
-# docker-compose.yml security settings
-services:
-  invoice-app:
-    environment:
-      - SECURE_MODE=true
-      - SSL_CERT_PATH=/certs/cert.pem
-      - SSL_KEY_PATH=/certs/key.pem
-```
-
-## 🚀 Production Deployment
-
-### Performance Optimization
-- GPU acceleration support
-- Batch processing optimization  
-- Caching strategies
-- Load balancing ready
-
-### Scaling Options
-- Horizontal scaling with container orchestration
-- Database clustering
-- Distributed vector storage
-- Microservices architecture
-
-## 🤝 Contributing
-
-1. Fork the repository
-2. Create feature branch (`git checkout -b feature/amazing-feature`)
-3. Commit changes (`git commit -m 'Add amazing feature'`)
-4. Push to branch (`git push origin feature/amazing-feature`)
-5. Open a Pull Request
-
-## 📄 License
-
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
-
-## 🙏 Acknowledgments
-
-- [spaCy](https://spacy.io/) for NLP capabilities
-- [Sentence Transformers](https://www.sbert.net/) for embeddings
-- [FAISS](https://faiss.ai/) for vector similarity search
-- [Streamlit](https://streamlit.io/) for the web interface
-- [Ollama](https://ollama.ai/) for local LLM support
-
-## 📞 Support
-
-- 📧 Email: support@your-company.com
-- 💬 Discord: [Your Discord Server]
-- 📖 Documentation: [Full Documentation]
-- 🐛 Issues: [GitHub Issues]
-
----
-
-**Made with ❤️ for the AI community**
-'''
-    
-    return readme_content
-
-# ===============================================================================
-# FINAL SYSTEM INTEGRATION AND STARTUP
-# ===============================================================================
-
-if __name__ == "__main__":
-    # Initialize system
-    if initialize_system():
-        # Run main application
-        main()
-    else:
-        st.error("❌ System initialization failed. Please check the logs and try again.")
+#!/usr/bin/env python3
+"""
+Enhanced Invoice Processing & Analysis System - Hugging Face Spaces Compatible
+A comprehensive system with AI-powered extraction, semantic search, and analytics.
+
+Author: AI Assistant
+Date: 2024
+Version: HuggingFace v1.0
+"""
+
+# ===============================================================================
+# IMPORTS AND HUGGING FACE COMPATIBILITY
+# ===============================================================================
+
+import os
+import json
+import re
+import tempfile
+import shutil
+import pickle
+import numpy as np
+from datetime import datetime
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+from pathlib import Path
+import time
+import logging
+
+# Check if running on Hugging Face Spaces
+IS_HF_SPACE = os.getenv("SPACE_ID") is not None
+
+# Streamlit and core libraries
+import streamlit as st
+import sqlite3
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import requests
+
+# Vector storage and embeddings (HF compatible)
+try:
+    import faiss
+    FAISS_AVAILABLE = True
+except ImportError:
+    FAISS_AVAILABLE = False
+    st.warning("⚠️ FAISS not available. Vector search will be disabled.")
+
+try:
+    from sentence_transformers import SentenceTransformer
+    SENTENCE_TRANSFORMERS_AVAILABLE = True
+except ImportError:
+    SENTENCE_TRANSFORMERS_AVAILABLE = False
+    st.warning("⚠️ Sentence Transformers not available. Using fallback methods.")
+
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
+# Document processing (simplified for HF)
+try:
+    from docling.document_converter import DocumentConverter
+    from docling.datamodel.base_models import InputFormat
+    from docling.datamodel.pipeline_options import PdfPipelineOptions
+    from docling.document_converter import PdfFormatOption
+    DOCLING_AVAILABLE = True
+except ImportError:
+    DOCLING_AVAILABLE = False
+    st.warning("⚠️ Docling not available. Using simplified document processing.")
+
+# Alternative document processing for HF
+try:
+    import pdfplumber
+    PDF_PROCESSING_AVAILABLE = True
+except ImportError:
+    try:
+        import PyPDF2
+        PDF_PROCESSING_AVAILABLE = True
+    except ImportError:
+        PDF_PROCESSING_AVAILABLE = False
+
+# ===============================================================================
+# HUGGING FACE CONFIGURATION
+# ===============================================================================
+
+# Hugging Face Spaces configuration
+HF_CONFIG = {
+    "max_file_size_mb": 10,  # Reduced for HF Spaces
+    "max_concurrent_files": 3,  # Reduced for HF Spaces
+    "timeout_seconds": 30,
+    "use_cpu_only": True,  # Force CPU for HF Spaces
+    "embedding_model": "all-MiniLM-L6-v2",  # Lightweight model
+    "cache_dir": "./cache",
+    "data_dir": "./data",
+    "enable_ollama": False,  # Disable Ollama for HF Spaces
+}
+
+# Create necessary directories
+os.makedirs(HF_CONFIG["cache_dir"], exist_ok=True)
+os.makedirs(HF_CONFIG["data_dir"], exist_ok=True)
+
+# ===============================================================================
+# STREAMLIT CONFIGURATION FOR HUGGING FACE
+# ===============================================================================
+
+st.set_page_config(
+    page_title="AI Invoice Processing System",
+    page_icon="📄",
+    layout="wide",
+    initial_sidebar_state="expanded",
+    menu_items={
+        'Get Help': 'https://huggingface.co/spaces/your-space/help',
+        'Report a bug': 'https://huggingface.co/spaces/your-space/issues',
+        'About': """
+        # AI Invoice Processing System
+        Built for Hugging Face Spaces with AI-powered extraction and semantic search.
+        """
+    }
+)
+
+# ===============================================================================
+# SIMPLIFIED DATA STRUCTURES FOR HF
+# ===============================================================================
+
+@dataclass
+class InvoiceData:
+    """Simplified data structure for extracted invoice information"""
+    supplier_name: str = ""
+    buyer_name: str = ""
+    invoice_number: str = ""
+    date: str = ""
+    amount: float = 0.0
+    quantity: int = 0
+    product_description: str = ""
+    file_path: str = ""
+    extraction_confidence: float = 0.0
+    processing_method: str = "regex"
+
+@dataclass
+class VectorSearchResult:
+    """Data structure for vector search results"""
+    invoice_id: str
+    invoice_number: str
+    supplier_name: str
+    similarity_score: float
+    content_preview: str
+    metadata: Dict
+
+# ===============================================================================
+# HUGGING FACE COMPATIBLE VECTOR STORE
+# ===============================================================================
+
+class HuggingFaceVectorStore:
+    """Simplified vector store compatible with Hugging Face Spaces"""
+    
+    def __init__(self, embedding_model: str = "all-MiniLM-L6-v2"):
+        self.embedding_model_name = embedding_model
+        self.vector_store_path = os.path.join(HF_CONFIG["data_dir"], "vectors.pkl")
+        self.metadata_path = os.path.join(HF_CONFIG["data_dir"], "metadata.pkl")
+        self.embedding_model = None
+        self.vectors = []
+        self.document_metadata = []
+        self.embedding_dimension = None
+        
+        self.setup_embedding_model()
+        self.load_vector_store()
+    
+    def setup_embedding_model(self):
+        """Initialize the sentence transformer model"""
+        if not SENTENCE_TRANSFORMERS_AVAILABLE:
+            st.warning("⚠️ Sentence Transformers not available. Vector search disabled.")
+            return
+        
+        try:
+            with st.spinner(f"Loading embedding model: {self.embedding_model_name}..."):
+                self.embedding_model = SentenceTransformer(
+                    self.embedding_model_name,
+                    cache_folder=HF_CONFIG["cache_dir"]
+                )
+                
+                # Get embedding dimension
+                test_embedding = self.embedding_model.encode(["test"])
+                self.embedding_dimension = test_embedding.shape[0]
+                
+                st.success(f"✅ Embedding model loaded: {self.embedding_model_name}")
+                
+        except Exception as e:
+            st.error(f"❌ Failed to load embedding model: {e}")
+            self.embedding_model = None
+    
+    def load_vector_store(self):
+        """Load existing vector store"""
+        try:
+            if os.path.exists(self.vector_store_path) and os.path.exists(self.metadata_path):
+                with open(self.vector_store_path, 'rb') as f:
+                    self.vectors = pickle.load(f)
+                
+                with open(self.metadata_path, 'rb') as f:
+                    self.document_metadata = pickle.load(f)
+                
+                st.success(f"✅ Vector store loaded: {len(self.document_metadata)} documents")
+            else:
+                self.vectors = []
+                self.document_metadata = []
+                st.info("📄 New vector store initialized")
+                
+        except Exception as e:
+            st.error(f"❌ Error loading vector store: {e}")
+            self.vectors = []
+            self.document_metadata = []
+    
+    def save_vector_store(self):
+        """Save vector store to disk"""
+        try:
+            with open(self.vector_store_path, 'wb') as f:
+                pickle.dump(self.vectors, f)
+            
+            with open(self.metadata_path, 'wb') as f:
+                pickle.dump(self.document_metadata, f)
+            
+            return True
+        except Exception as e:
+            st.error(f"Error saving vector store: {e}")
+            return False
+    
+    def create_document_text(self, invoice_data: dict, raw_text: str = "") -> str:
+        """Create searchable text from invoice data"""
+        text_parts = []
+        
+        for field, value in invoice_data.items():
+            if value and field != 'id':
+                text_parts.append(f"{field}: {value}")
+        
+        if raw_text:
+            text_parts.append(f"content: {raw_text[:300]}")
+        
+        return " | ".join(text_parts)
+    
+    def add_document(self, invoice_data: dict, raw_text: str = "") -> bool:
+        """Add a document to the vector store"""
+        if not self.embedding_model:
+            return False
+        
+        try:
+            document_text = self.create_document_text(invoice_data, raw_text)
+            
+            # Generate embedding
+            embedding = self.embedding_model.encode(document_text, normalize_embeddings=True)
+            
+            # Create metadata
+            metadata = {
+                'invoice_id': invoice_data.get('id', ''),
+                'invoice_number': invoice_data.get('invoice_number', ''),
+                'supplier_name': invoice_data.get('supplier_name', ''),
+                'buyer_name': invoice_data.get('buyer_name', ''),
+                'amount': invoice_data.get('amount', 0),
+                'date': invoice_data.get('date', ''),
+                'file_name': invoice_data.get('file_info', {}).get('file_name', ''),
+                'document_text': document_text[:200],
+                'timestamp': datetime.now().isoformat()
+            }
+            
+            # Add to store
+            self.vectors.append(embedding)
+            self.document_metadata.append(metadata)
+            
+            return True
+            
+        except Exception as e:
+            st.error(f"Error adding document to vector store: {e}")
+            return False
+    
+    def semantic_search(self, query: str, top_k: int = 5) -> List[VectorSearchResult]:
+        """Perform semantic search using cosine similarity"""
+        if not self.embedding_model or not self.vectors:
+            return []
+        
+        try:
+            # Generate query embedding
+            query_embedding = self.embedding_model.encode(query, normalize_embeddings=True)
+            
+            # Calculate similarities
+            similarities = []
+            for i, doc_embedding in enumerate(self.vectors):
+                similarity = np.dot(query_embedding, doc_embedding)
+                similarities.append((similarity, i))
+            
+            # Sort by similarity
+            similarities.sort(reverse=True)
+            
+            # Return top results
+            results = []
+            for similarity, idx in similarities[:top_k]:
+                if similarity > 0.1:  # Relevance threshold
+                    metadata = self.document_metadata[idx]
+                    result = VectorSearchResult(
+                        invoice_id=metadata.get('invoice_id', ''),
+                        invoice_number=metadata.get('invoice_number', ''),
+                        supplier_name=metadata.get('supplier_name', ''),
+                        similarity_score=float(similarity),
+                        content_preview=metadata.get('document_text', ''),
+                        metadata=metadata
+                    )
+                    results.append(result)
+            
+            return results
+            
+        except Exception as e:
+            st.error(f"Error in semantic search: {e}")
+            return []
+    
+    def get_stats(self) -> Dict:
+        """Get vector store statistics"""
+        return {
+            'total_documents': len(self.document_metadata),
+            'embedding_dimension': self.embedding_dimension,
+            'model_name': self.embedding_model_name,
+            'vector_store_size': len(self.vectors)
+        }
+
+# ===============================================================================
+# SIMPLIFIED DOCUMENT PROCESSING FOR HF
+# ===============================================================================
+
+class HuggingFaceDocumentProcessor:
+    """Simplified document processor for Hugging Face Spaces"""
+    
+    def __init__(self):
+        self.setup_processors()
+    
+    def setup_processors(self):
+        """Setup available document processors"""
+        self.processors = {}
+        
+        # PDF processing
+        if PDF_PROCESSING_AVAILABLE:
+            try:
+                import pdfplumber
+                self.processors['pdf'] = self.extract_with_pdfplumber
+                st.success("✅ PDF processing available (pdfplumber)")
+            except ImportError:
+                try:
+                    import PyPDF2
+                    self.processors['pdf'] = self.extract_with_pypdf2
+                    st.success("✅ PDF processing available (PyPDF2)")
+                except ImportError:
+                    st.warning("⚠️ No PDF processor available")
+        
+        # Text files
+        self.processors['txt'] = self.extract_text_file
+        
+        # Images (basic OCR alternative)
+        self.processors['image'] = self.extract_image_text
+    
+    def extract_with_pdfplumber(self, file_path: str) -> str:
+        """Extract text using pdfplumber"""
+        try:
+            import pdfplumber
+            text = ""
+            with pdfplumber.open(file_path) as pdf:
+                for page in pdf.pages:
+                    page_text = page.extract_text()
+                    if page_text:
+                        text += page_text + "\n"
+            return text
+        except Exception as e:
+            st.error(f"PDF extraction failed: {e}")
+            return ""
+    
+    def extract_with_pypdf2(self, file_path: str) -> str:
+        """Extract text using PyPDF2"""
+        try:
+            import PyPDF2
+            text = ""
+            with open(file_path, 'rb') as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                for page in pdf_reader.pages:
+                    text += page.extract_text() + "\n"
+            return text
+        except Exception as e:
+            st.error(f"PDF extraction failed: {e}")
+            return ""
+    
+    def extract_text_file(self, file_path: str) -> str:
+        """Extract text from text files"""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        except Exception as e:
+            st.error(f"Text file extraction failed: {e}")
+            return ""
+    
+    def extract_image_text(self, file_path: str) -> str:
+        """Basic image text extraction (placeholder for OCR)"""
+        st.warning("⚠️ OCR not available in this environment. Please use text-based documents.")
+        return ""
+    
+    def extract_text_from_document(self, file_path: str) -> str:
+        """Extract text from document based on file type"""
+        file_ext = Path(file_path).suffix.lower()
+        
+        if file_ext == '.pdf':
+            processor = self.processors.get('pdf')
+        elif file_ext == '.txt':
+            processor = self.processors.get('txt')
+        elif file_ext in ['.jpg', '.jpeg', '.png']:
+            processor = self.processors.get('image')
+        else:
+            st.warning(f"Unsupported file type: {file_ext}")
+            return ""
+        
+        if processor:
+            return processor(file_path)
+        else:
+            st.error(f"No processor available for {file_ext}")
+            return ""
+
+# ===============================================================================
+# SIMPLIFIED AI EXTRACTION FOR HF
+# ===============================================================================
+
+class HuggingFaceAIExtractor:
+    """Simplified AI extraction for Hugging Face Spaces"""
+    
+    def __init__(self):
+        self.use_transformers = self.setup_transformers()
+    
+    def setup_transformers(self):
+        """Try to setup Hugging Face transformers for NER"""
+        try:
+            from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
+            
+            # Use a lightweight NER model
+            model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
+            
+            with st.spinner("Loading AI extraction model..."):
+                self.ner_pipeline = pipeline(
+                    "ner", 
+                    model=model_name,
+                    tokenizer=model_name,
+                    aggregation_strategy="simple"
+                )
+            
+            st.success("✅ AI extraction model loaded")
+            return True
+            
+        except Exception as e:
+            st.warning(f"⚠️ AI extraction not available: {e}")
+            return False
+    
+    def extract_with_ai(self, text: str) -> InvoiceData:
+        """Extract invoice data using AI"""
+        if not self.use_transformers:
+            return self.extract_with_regex(text)
+        
+        try:
+            # Use NER to extract entities
+            entities = self.ner_pipeline(text[:512])  # Limit text length
+            
+            invoice_data = InvoiceData()
+            invoice_data.processing_method = "ai_ner"
+            
+            # Extract specific entities (simplified)
+            for entity in entities:
+                entity_text = entity['word'].replace('##', '')
+                
+                # Simple mapping based on entity types
+                if entity['entity_group'] == 'ORG':
+                    if not invoice_data.supplier_name:
+                        invoice_data.supplier_name = entity_text
+                    elif not invoice_data.buyer_name:
+                        invoice_data.buyer_name = entity_text
+                
+                elif entity['entity_group'] == 'MISC':
+                    if not invoice_data.invoice_number and any(c.isdigit() for c in entity_text):
+                        invoice_data.invoice_number = entity_text
+            
+            # Fall back to regex for missing fields
+            regex_data = self.extract_with_regex(text)
+            
+            # Combine results
+            if not invoice_data.invoice_number:
+                invoice_data.invoice_number = regex_data.invoice_number
+            if not invoice_data.amount:
+                invoice_data.amount = regex_data.amount
+            if not invoice_data.date:
+                invoice_data.date = regex_data.date
+            if not invoice_data.quantity:
+                invoice_data.quantity = regex_data.quantity
+            
+            invoice_data.extraction_confidence = 0.8
+            
+            return invoice_data
+            
+        except Exception as e:
+            st.error(f"AI extraction failed: {e}")
+            return self.extract_with_regex(text)
+    
+    def extract_with_regex(self, text: str) -> InvoiceData:
+        """Fallback regex extraction"""
+        invoice_data = InvoiceData()
+        invoice_data.processing_method = "regex"
+        
+        # Enhanced regex patterns
+        patterns = {
+            'invoice_number': [
+                r'invoice[#\s]*:?\s*([A-Z0-9\-_]+)',
+                r'inv[#\s]*:?\s*([A-Z0-9\-_]+)',
+                r'bill[#\s]*:?\s*([A-Z0-9\-_]+)',
+                r'#([A-Z0-9\-_]{3,})'
+            ],
+            'amount': [
+                r'total[:\s]*[\$₹]?([0-9,]+\.?\d*)',
+                r'amount[:\s]*[\$₹]?([0-9,]+\.?\d*)',
+                r'[\$₹]([0-9,]+\.?\d*)',
+                r'([0-9,]+\.?\d*)\s*(?:dollars?|₹|USD|INR)'
+            ],
+            'date': [
+                r'date[:\s]*(\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4})',
+                r'(\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4})',
+                r'(\d{4}[/\-]\d{1,2}[/\-]\d{1,2})'
+            ],
+            'quantity': [
+                r'qty[:\s]*(\d+)',
+                r'quantity[:\s]*(\d+)',
+                r'(\d+)\s*(?:pcs?|units?|items?)'
+            ]
+        }
+        
+        text_lower = text.lower()
+        
+        # Extract using patterns
+        for pattern_list in patterns['invoice_number']:
+            match = re.search(pattern_list, text_lower, re.IGNORECASE)
+            if match:
+                invoice_data.invoice_number = match.group(1).upper()
+                break
+        
+        for pattern in patterns['amount']:
+            match = re.search(pattern, text_lower, re.IGNORECASE)
+            if match:
+                try:
+                    amount_str = match.group(1).replace(',', '')
+                    invoice_data.amount = float(amount_str)
+                    break
+                except ValueError:
+                    continue
+        
+        for pattern in patterns['date']:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                invoice_data.date = self.parse_date(match.group(1))
+                break
+        
+        for pattern in patterns['quantity']:
+            match = re.search(pattern, text_lower, re.IGNORECASE)
+            if match:
+                try:
+                    invoice_data.quantity = int(match.group(1))
+                    break
+                except ValueError:
+                    continue
+        
+        # Extract company names (basic)
+        company_patterns = [
+            r'(?:from|to|vendor|supplier)[:]\s*([A-Z][A-Za-z\s&,\.]{2,30})',
+            r'([A-Z][A-Za-z\s&,\.]{3,30})\s*(?:Ltd|Inc|Corp|LLC|Co\.|Company)',
+        ]
+        
+        for pattern in company_patterns:
+            matches = re.findall(pattern, text)
+            if matches:
+                if not invoice_data.supplier_name:
+                    invoice_data.supplier_name = matches[0].strip()
+                elif len(matches) > 1 and not invoice_data.buyer_name:
+                    invoice_data.buyer_name = matches[1].strip()
+        
+        # Extract product description (basic)
+        desc_patterns = [
+            r'description[:]\s*([A-Za-z0-9\s,.-]{10,100})',
+            r'item[:]\s*([A-Za-z0-9\s,.-]{10,100})',
+            r'service[:]\s*([A-Za-z0-9\s,.-]{10,100})'
+        ]
+        
+        for pattern in desc_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                invoice_data.product_description = match.group(1).strip()
+                break
+        
+        invoice_data.extraction_confidence = 0.6
+        return invoice_data
+    
+    def parse_date(self, date_str: str) -> str:
+        """Parse date to YYYY-MM-DD format"""
+        if not date_str:
+            return ""
+        
+        formats = ['%Y-%m-%d', '%m/%d/%Y', '%d/%m/%Y', '%m-%d-%Y', '%d-%m-%Y', '%Y/%m/%d']
+        
+        for fmt in formats:
+            try:
+                parsed_date = datetime.strptime(date_str, fmt)
+                return parsed_date.strftime('%Y-%m-%d')
+            except ValueError:
+                continue
+        
+        return date_str
+
+# ===============================================================================
+# MAIN PROCESSOR FOR HUGGING FACE
+# ===============================================================================
+
+class HuggingFaceInvoiceProcessor:
+    """Main invoice processor optimized for Hugging Face Spaces"""
+    
+    def __init__(self):
+        self.setup_storage()
+        self.document_processor = HuggingFaceDocumentProcessor()
+        self.ai_extractor = HuggingFaceAIExtractor()
+        self.vector_store = HuggingFaceVectorStore() if SENTENCE_TRANSFORMERS_AVAILABLE else None
+        
+        # Initialize stats
+        self.processing_stats = {
+            'total_processed': 0,
+            'successful': 0,
+            'failed': 0,
+            'start_time': datetime.now()
+        }
+    
+    def setup_storage(self):
+        """Setup storage paths"""
+        self.data_dir = HF_CONFIG["data_dir"]
+        self.json_path = os.path.join(self.data_dir, "invoices.json")
+        
+        # Initialize JSON storage
+        if not os.path.exists(self.json_path):
+            initial_data = {
+                "metadata": {
+                    "created_at": datetime.now().isoformat(),
+                    "version": "hf_v1.0",
+                    "total_invoices": 0
+                },
+                "invoices": [],
+                "summary": {
+                    "total_amount": 0.0,
+                    "unique_suppliers": [],
+                    "processing_stats": {"successful": 0, "failed": 0}
+                }
+            }
+            self.save_json_data(initial_data)
+    
+    def load_json_data(self) -> dict:
+        """Load invoice data from JSON"""
+        try:
+            with open(self.json_path, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        except (FileNotFoundError, json.JSONDecodeError):
+            self.setup_storage()
+            return self.load_json_data()
+    
+    def save_json_data(self, data: dict):
+        """Save invoice data to JSON"""
+        try:
+            with open(self.json_path, 'w', encoding='utf-8') as f:
+                json.dump(data, f, indent=2, ensure_ascii=False)
+        except Exception as e:
+            st.error(f"Error saving data: {e}")
+    
+    def process_uploaded_file(self, uploaded_file) -> InvoiceData:
+        """Process a single uploaded file"""
+        self.processing_stats['total_processed'] += 1
+        
+        try:
+            # Check file size
+            file_size = len(uploaded_file.getvalue())
+            if file_size > HF_CONFIG["max_file_size_mb"] * 1024 * 1024:
+                st.error(f"File too large: {file_size / 1024 / 1024:.2f}MB > {HF_CONFIG['max_file_size_mb']}MB")
+                self.processing_stats['failed'] += 1
+                return InvoiceData()
+            
+            # Save temporarily
+            with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
+                tmp_file.write(uploaded_file.getvalue())
+                tmp_file_path = tmp_file.name
+            
+            try:
+                # Extract text
+                text = self.document_processor.extract_text_from_document(tmp_file_path)
+                
+                if not text.strip():
+                    st.warning(f"No text extracted from {uploaded_file.name}")
+                    self.processing_stats['failed'] += 1
+                    return InvoiceData()
+                
+                # Extract invoice data
+                invoice_data = self.ai_extractor.extract_with_ai(text)
+                invoice_data.file_path = uploaded_file.name
+                
+                # Save to storage
+                self.save_invoice_data(invoice_data, text, file_size)
+                
+                self.processing_stats['successful'] += 1
+                return invoice_data
+                
+            finally:
+                # Cleanup
+                os.unlink(tmp_file_path)
+                
+        except Exception as e:
+            st.error(f"Error processing {uploaded_file.name}: {e}")
+            self.processing_stats['failed'] += 1
+            return InvoiceData()
+    
+    def save_invoice_data(self, invoice_data: InvoiceData, raw_text: str, file_size: int):
+        """Save invoice data to JSON and vector store"""
+        try:
+            # Load existing data
+            data = self.load_json_data()
+            
+            # Create invoice record
+            invoice_record = {
+                "id": len(data["invoices"]) + 1,
+                "invoice_number": invoice_data.invoice_number,
+                "supplier_name": invoice_data.supplier_name,
+                "buyer_name": invoice_data.buyer_name,
+                "date": invoice_data.date,
+                "amount": invoice_data.amount,
+                "quantity": invoice_data.quantity,
+                "product_description": invoice_data.product_description,
+                "file_info": {
+                    "file_name": invoice_data.file_path,
+                    "file_size": file_size
+                },
+                "extraction_info": {
+                    "confidence": invoice_data.extraction_confidence,
+                    "method": invoice_data.processing_method,
+                    "raw_text_preview": raw_text[:300]
+                },
+                "timestamps": {
+                    "created_at": datetime.now().isoformat()
+                }
+            }
+            
+            # Add to invoices
+            data["invoices"].append(invoice_record)
+            
+            # Update summary
+            self.update_summary(data)
+            
+            # Save JSON
+            self.save_json_data(data)
+            
+            # Add to vector store
+            if self.vector_store:
+                self.vector_store.add_document(invoice_record, raw_text)
+                self.vector_store.save_vector_store()
+            
+        except Exception as e:
+            st.error(f"Error saving invoice data: {e}")
+    
+    def update_summary(self, data: dict):
+        """Update summary statistics"""
+        invoices = data["invoices"]
+        
+        total_amount = sum(inv.get("amount", 0) for inv in invoices)
+        unique_suppliers = list(set(inv.get("supplier_name", "") for inv in invoices if inv.get("supplier_name")))
+        
+        data["summary"] = {
+            "total_amount": total_amount,
+            "unique_suppliers": unique_suppliers,
+            "processing_stats": {
+                "successful": self.processing_stats['successful'],
+                "failed": self.processing_stats['failed'],
+                "total_processed": self.processing_stats['total_processed']
+            }
+        }
+        
+        data["metadata"]["last_updated"] = datetime.now().isoformat()
+        data["metadata"]["total_invoices"] = len(invoices)
+
+# ===============================================================================
+# SIMPLIFIED CHATBOT FOR HF
+# ===============================================================================
+
+class HuggingFaceChatBot:
+    """Simplified chatbot for Hugging Face Spaces"""
+    
+    def __init__(self, processor: HuggingFaceInvoiceProcessor):
+        self.processor = processor
+    
+    def query_database(self, query: str) -> str:
+        """Process user query and return response"""
+        try:
+            data = self.processor.load_json_data()
+            invoices = data.get("invoices", [])
+            
+            if not invoices:
+                return "No invoice data found. Please upload some invoices first."
+            
+            query_lower = query.lower()
+            
+            # Handle different query types
+            if any(phrase in query_lower for phrase in ["summary", "overview", "total"]):
+                return self.generate_summary(data)
+            
+            elif "count" in query_lower or "how many" in query_lower:
+                return self.handle_count_query(data)
+            
+            elif any(phrase in query_lower for phrase in ["amount", "value", "money", "cost"]):
+                return self.handle_amount_query(data)
+            
+            elif any(phrase in query_lower for phrase in ["supplier", "vendor", "company"]):
+                return self.handle_supplier_query(data, query)
+            
+            
+            elif self.processor.vector_store:
+                return self.handle_semantic_search(query)
+            
+            else:
+                return self.handle_general_query(data, query)
+                
+        except Exception as e:
+            return f"Error processing query: {e}"
+    
+    def generate_summary(self, data: dict) -> str:
+        """Generate comprehensive summary"""
+        invoices = data.get("invoices", [])
+        summary = data.get("summary", {})
+        
+        if not invoices:
+            return "No invoices found in the system."
+        
+        total_amount = summary.get("total_amount", 0)
+        avg_amount = total_amount / len(invoices) if invoices else 0
+        unique_suppliers = len(summary.get("unique_suppliers", []))
+        
+        response = f"""
+**📊 Invoice System Summary**
+
+• **Total Invoices**: {len(invoices):,}
+• **Total Value**: ₹{total_amount:,.2f}
+• **Average Invoice**: ₹{avg_amount:,.2f}
+• **Unique Suppliers**: {unique_suppliers}
+
+**📈 Processing Stats**
+• **Successful**: {summary.get('processing_stats', {}).get('successful', 0)}
+• **Failed**: {summary.get('processing_stats', {}).get('failed', 0)}
+
+**🔍 Recent Invoices**
+"""
+        
+        # Show recent invoices
+        recent = sorted(invoices, key=lambda x: x.get('timestamps', {}).get('created_at', ''), reverse=True)[:5]
+        for i, inv in enumerate(recent, 1):
+            response += f"\n{i}. **{inv.get('invoice_number', 'N/A')}** - {inv.get('supplier_name', 'Unknown')} (₹{inv.get('amount', 0):,.2f})"
+        
+        return response
+    
+    def handle_count_query(self, data: dict) -> str:
+        """Handle count-related queries"""
+        invoices = data.get("invoices", [])
+        total = len(invoices)
+        unique_numbers = len(set(inv.get('invoice_number', '') for inv in invoices if inv.get('invoice_number')))
+        
+        return f"""
+**📊 Invoice Count Summary**
+
+• **Total Records**: {total}
+• **Unique Invoice Numbers**: {unique_numbers}
+• **Duplicates**: {total - unique_numbers if total > unique_numbers else 0}
+
+**📅 Processing Timeline**
+• **First Invoice**: {invoices[0].get('timestamps', {}).get('created_at', 'N/A')[:10] if invoices else 'N/A'}
+• **Latest Invoice**: {invoices[-1].get('timestamps', {}).get('created_at', 'N/A')[:10] if invoices else 'N/A'}
+"""
+    
+    def handle_amount_query(self, data: dict) -> str:
+        """Handle amount-related queries"""
+        invoices = data.get("invoices", [])
+        amounts = [inv.get('amount', 0) for inv in invoices if inv.get('amount', 0) > 0]
+        
+        if not amounts:
+            return "No amount information found in invoices."
+        
+        total_amount = sum(amounts)
+        avg_amount = total_amount / len(amounts)
+        max_amount = max(amounts)
+        min_amount = min(amounts)
+        
+        # Find high-value invoices
+        high_value_threshold = sorted(amounts, reverse=True)[min(4, len(amounts)-1)] if len(amounts) > 5 else max_amount
+        high_value_invoices = [inv for inv in invoices if inv.get('amount', 0) >= high_value_threshold]
+        
+        response = f"""
+**💰 Financial Analysis**
+
+• **Total Amount**: ₹{total_amount:,.2f}
+• **Average Amount**: ₹{avg_amount:,.2f}
+• **Highest Invoice**: ₹{max_amount:,.2f}
+• **Lowest Invoice**: ₹{min_amount:,.2f}
+
+**🎯 High-Value Invoices (₹{high_value_threshold:,.2f}+)**
+"""
+        
+        for i, inv in enumerate(high_value_invoices[:5], 1):
+            response += f"\n{i}. **{inv.get('invoice_number', 'N/A')}** - {inv.get('supplier_name', 'Unknown')} (₹{inv.get('amount', 0):,.2f})"
+        
+        return response
+    
+    def handle_supplier_query(self, data: dict, query: str) -> str:
+        """Handle supplier-related queries"""
+        invoices = data.get("invoices", [])
+        
+        # Count invoices by supplier
+        supplier_counts = {}
+        supplier_amounts = {}
+        
+        for inv in invoices:
+            supplier = inv.get('supplier_name', '').strip()
+            if supplier:
+                supplier_counts[supplier] = supplier_counts.get(supplier, 0) + 1
+                supplier_amounts[supplier] = supplier_amounts.get(supplier, 0) + inv.get('amount', 0)
+        
+        if not supplier_counts:
+            return "No supplier information found in invoices."
+        
+        # Sort suppliers by amount
+        top_suppliers = sorted(supplier_amounts.items(), key=lambda x: x[1], reverse=True)[:10]
+        
+        response = f"""
+**🏢 Supplier Analysis**
+
+• **Total Unique Suppliers**: {len(supplier_counts)}
+• **Most Active**: {max(supplier_counts, key=supplier_counts.get)} ({supplier_counts[max(supplier_counts, key=supplier_counts.get)]} invoices)
+
+**💰 Top Suppliers by Amount**
+"""
+        
+        for i, (supplier, amount) in enumerate(top_suppliers, 1):
+            count = supplier_counts[supplier]
+            avg = amount / count if count > 0 else 0
+            response += f"\n{i}. **{supplier}** - ₹{amount:,.2f} ({count} invoices, avg: ₹{avg:,.2f})"
+        
+        return response
+    
+    def handle_semantic_search(self, query: str) -> str:
+        """Handle semantic search queries"""
+        try:
+            results = self.processor.vector_store.semantic_search(query, top_k=5)
+            
+            if not results:
+                return f"No relevant results found for '{query}'. Try different keywords."
+            
+            response = f"🔍 **Semantic Search Results for '{query}'**\n\n"
+            
+            for i, result in enumerate(results, 1):
+                response += f"{i}. **{result.invoice_number}** - {result.supplier_name}\n"
+                response += f"   • Similarity: {result.similarity_score:.3f}\n"
+                response += f"   • Amount: ₹{result.metadata.get('amount', 0):,.2f}\n"
+                response += f"   • Preview: {result.content_preview[:100]}...\n\n"
+            
+            return response
+            
+        except Exception as e:
+            return f"Semantic search error: {e}"
+    
+    def handle_general_query(self, data: dict, query: str) -> str:
+        """Handle general queries with keyword search"""
+        invoices = data.get("invoices", [])
+        query_words = query.lower().split()
+        
+        # Simple keyword matching
+        matching_invoices = []
+        for inv in invoices:
+            text_to_search = (
+                inv.get('supplier_name', '') + ' ' +
+                inv.get('buyer_name', '') + ' ' +
+                inv.get('product_description', '') + ' ' +
+                inv.get('extraction_info', {}).get('raw_text_preview', '')
+            ).lower()
+            
+            if any(word in text_to_search for word in query_words):
+                matching_invoices.append(inv)
+        
+        if not matching_invoices:
+            return f"No invoices found matching '{query}'. Try different keywords or check the summary."
+        
+        response = f"🔍 **Found {len(matching_invoices)} invoices matching '{query}'**\n\n"
+        
+        for i, inv in enumerate(matching_invoices[:5], 1):
+            response += f"{i}. **{inv.get('invoice_number', 'N/A')}** - {inv.get('supplier_name', 'Unknown')}\n"
+            response += f"   • Amount: ₹{inv.get('amount', 0):,.2f}\n"
+            response += f"   • Date: {inv.get('date', 'N/A')}\n\n"
+        
+        if len(matching_invoices) > 5:
+            response += f"... and {len(matching_invoices) - 5} more results."
+        
+        return response
+
+# ===============================================================================
+# STREAMLIT APPLICATION FOR HUGGING FACE
+# ===============================================================================
+
+def create_huggingface_app():
+    """Main Streamlit application optimized for Hugging Face Spaces"""
+    
+    # Custom CSS for better UI
+    st.markdown("""
+    <style>
+    .main-header {
+        font-size: 2.5rem;
+        font-weight: bold;
+        text-align: center;
+        color: #FF6B35;
+        margin-bottom: 1rem;
+    }
+    .feature-box {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        padding: 1rem;
+        border-radius: 10px;
+        color: white;
+        margin: 0.5rem 0;
+        text-align: center;
+    }
+    .status-ok { color: #28a745; font-weight: bold; }
+    .status-warning { color: #ffc107; font-weight: bold; }
+    .status-error { color: #dc3545; font-weight: bold; }
+    </style>
+    """, unsafe_allow_html=True)
+    
+    # Header
+    st.markdown('<h1 class="main-header">📄 AI Invoice Processing System</h1>', unsafe_allow_html=True)
+    st.markdown("""
+    <div style="text-align: center; margin-bottom: 2rem;">
+        <p style="font-size: 1.1rem; color: #666;">
+            AI-Powered Document Processing • Semantic Search • Smart Analytics • Hugging Face Spaces
+        </p>
+    </div>
+    """, unsafe_allow_html=True)
+    
+    # Initialize processor
+    if 'hf_processor' not in st.session_state:
+        with st.spinner("🔧 Initializing AI Invoice Processor..."):
+            st.session_state.hf_processor = HuggingFaceInvoiceProcessor()
+            st.session_state.hf_chatbot = HuggingFaceChatBot(st.session_state.hf_processor)
+            st.session_state.chat_history = []
+    
+    # Sidebar with system status
+    with st.sidebar:
+        st.header("🎛️ System Status")
+        
+        # Check component status
+        processor = st.session_state.hf_processor
+        
+        # Document processing
+        if processor.document_processor.processors:
+            st.markdown('<span class="status-ok">✅ Document Processing</span>', unsafe_allow_html=True)
+        else:
+            st.markdown('<span class="status-error">❌ Document Processing</span>', unsafe_allow_html=True)
+        
+        # AI extraction
+        if processor.ai_extractor.use_transformers:
+            st.markdown('<span class="status-ok">✅ AI Extraction</span>', unsafe_allow_html=True)
+        else:
+            st.markdown('<span class="status-warning">⚠️ Regex Extraction</span>', unsafe_allow_html=True)
+        
+        # Vector search
+        if processor.vector_store and processor.vector_store.embedding_model:
+            st.markdown('<span class="status-ok">✅ Semantic Search</span>', unsafe_allow_html=True)
+        else:
+            st.markdown('<span class="status-warning">⚠️ Keyword Search Only</span>', unsafe_allow_html=True)
+        
+        # Quick stats
+        st.header("📊 Quick Stats")
+        try:
+            data = processor.load_json_data()
+            total_invoices = len(data.get("invoices", []))
+            total_amount = data.get("summary", {}).get("total_amount", 0)
+            
+            st.metric("Total Invoices", total_invoices)
+            st.metric("Total Value", f"₹{total_amount:,.2f}")
+            st.metric("Success Rate", f"{processor.processing_stats['successful']}/{processor.processing_stats['total_processed']}")
+            
+        except Exception as e:
+            st.error(f"Stats error: {e}")
+        
+        # Processing info
+        st.header("⚙️ Processing Info")
+        st.info(f"""
+        **Limits for Hugging Face Spaces:**
+        • Max file size: {HF_CONFIG['max_file_size_mb']}MB
+        • Max concurrent files: {HF_CONFIG['max_concurrent_files']}
+        • Timeout: {HF_CONFIG['timeout_seconds']}s
+        """)
+    
+    # Main tabs
+    tab1, tab2, tab3, tab4 = st.tabs([
+        "📤 Upload & Process", 
+        "💬 AI Chat", 
+        "📊 Analytics", 
+        "📋 Data Explorer"
+    ])
+    
+    # -------------------------------------------------------------------------
+    # TAB 1: UPLOAD & PROCESS
+    # -------------------------------------------------------------------------
+    
+    with tab1:
+        st.header("📤 Upload Invoice Documents")
+        
+        # Feature highlights
+        col1, col2, col3 = st.columns(3)
+        
+        with col1:
+            st.markdown("""
+            <div class="feature-box">
+                <h4>🤖 AI Extraction</h4>
+                <p>Advanced NLP models extract structured data automatically</p>
+            </div>
+            """, unsafe_allow_html=True)
+        
+        with col2:
+            st.markdown("""
+            <div class="feature-box">
+                <h4>🔍 Smart Search</h4>
+                <p>Semantic search finds invoices using natural language</p>
+            </div>
+            """, unsafe_allow_html=True)
+        
+        with col3:
+            st.markdown("""
+            <div class="feature-box">
+                <h4>📊 Analytics</h4>
+                <p>Comprehensive insights and visualizations</p>
+            </div>
+            """, unsafe_allow_html=True)
+        
+        # File upload interface
+        st.markdown("### 📁 Upload Your Invoices")
+        
+        uploaded_files = st.file_uploader(
+            "Choose invoice files (PDF, TXT supported)",
+            type=['pdf', 'txt'],
+            accept_multiple_files=True,
+            help=f"Maximum file size: {HF_CONFIG['max_file_size_mb']}MB per file"
+        )
+        
+        if uploaded_files:
+            # Limit concurrent processing for HF Spaces
+            if len(uploaded_files) > HF_CONFIG['max_concurrent_files']:
+                st.warning(f"⚠️ Too many files selected. Processing first {HF_CONFIG['max_concurrent_files']} files.")
+                uploaded_files = uploaded_files[:HF_CONFIG['max_concurrent_files']]
+            
+            st.info(f"📊 {len(uploaded_files)} files selected")
+            
+            if st.button("🚀 Process Files", type="primary", use_container_width=True):
+                progress_bar = st.progress(0)
+                status_container = st.container()
+                results_container = st.container()
+                
+                successful = 0
+                failed = 0
+                
+                for i, uploaded_file in enumerate(uploaded_files):
+                    progress_bar.progress((i + 1) / len(uploaded_files))
+                    
+                    with status_container:
+                        st.info(f"Processing: {uploaded_file.name}")
+                    
+                    # Process file
+                    result = st.session_state.hf_processor.process_uploaded_file(uploaded_file)
+                    
+                    with results_container:
+                        if result.invoice_number:
+                            successful += 1
+                            with st.expander(f"✅ {uploaded_file.name}", expanded=False):
+                                col1, col2 = st.columns(2)
+                                with col1:
+                                    st.write(f"**Invoice #:** {result.invoice_number}")
+                                    st.write(f"**Supplier:** {result.supplier_name}")
+                                    st.write(f"**Amount:** ₹{result.amount:.2f}")
+                                with col2:
+                                    st.write(f"**Date:** {result.date}")
+                                    st.write(f"**Method:** {result.processing_method}")
+                                    st.write(f"**Confidence:** {result.extraction_confidence:.1%}")
+                        else:
+                            failed += 1
+                            st.warning(f"⚠️ Could not extract data from {uploaded_file.name}")
+                
+                # Final status
+                with status_container:
+                    st.success(f"✅ Processing complete! {successful} successful, {failed} failed")
+                
+                if successful > 0:
+                    st.balloons()
+    
+    # -------------------------------------------------------------------------
+    # TAB 2: AI CHAT
+    # -------------------------------------------------------------------------
+    
+    with tab2:
+        st.header("💬 AI Chat Interface")
+        
+        # Chat interface
+        user_query = st.chat_input("Ask about your invoices... (e.g., 'show me total spending')")
+        
+        if user_query:
+            # Add user message
+            st.session_state.chat_history.append({
+                "role": "user", 
+                "content": user_query,
+                "timestamp": datetime.now()
+            })
+            
+            # Get AI response
+            with st.spinner("🤖 AI is analyzing..."):
+                response = st.session_state.hf_chatbot.query_database(user_query)
+                
+                st.session_state.chat_history.append({
+                    "role": "assistant",
+                    "content": response,
+                    "timestamp": datetime.now()
+                })
+        
+        # Display chat history
+        for message in st.session_state.chat_history:
+            with st.chat_message(message["role"]):
+                st.markdown(message["content"])
+        
+        # Suggested queries
+        if not st.session_state.chat_history:
+            st.markdown("### 💡 Try These Queries")
+            
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                queries = [
+                    "Show me a summary of all invoices",
+                    "How much have we spent in total?",
+                    "Who are our top suppliers?",
+                    "Find invoices with high amounts"
+                ]
+                for i, query in enumerate(queries):
+                    if st.button(query, key=f"query_{i}"):
+                        st.session_state.chat_history.append({"role": "user", "content": query, "timestamp": datetime.now()})
+                        response = st.session_state.hf_chatbot.query_database(query)
+                        st.session_state.chat_history.append({"role": "assistant", "content": response, "timestamp": datetime.now()})
+                        st.rerun()
+            
+            with col2:
+                if st.session_state.hf_processor.vector_store:
+                    semantic_queries = [
+                        "Find technology equipment purchases",
+                        "Show me office supplies",
+                        "Search for consulting services",
+                        "Find maintenance contracts"
+                    ]
+                    for i, query in enumerate(semantic_queries):
+                        if st.button(query, key=f"semantic_{i}"):
+                            st.session_state.chat_history.append({"role": "user", "content": query, "timestamp": datetime.now()})
+                            response = st.session_state.hf_chatbot.query_database(query)
+                            st.session_state.chat_history.append({"role": "assistant", "content": response, "timestamp": datetime.now()})
+                            st.rerun()
+    
+    # -------------------------------------------------------------------------
+    # TAB 3: ANALYTICS
+    # -------------------------------------------------------------------------
+    
+    with tab3:
+        st.header("📊 Analytics Dashboard")
+        
+        try:
+            data = st.session_state.hf_processor.load_json_data()
+            invoices = data.get("invoices", [])
+            
+            if not invoices:
+                st.info("📊 No data available. Upload some invoices to see analytics.")
+                return
+            
+            # Convert to DataFrame
+            df_data = []
+            for inv in invoices:
+                df_data.append({
+                    'invoice_number': inv.get('invoice_number', ''),
+                    'supplier_name': inv.get('supplier_name', ''),
+                    'amount': inv.get('amount', 0),
+                    'date': inv.get('date', ''),
+                    'confidence': inv.get('extraction_info', {}).get('confidence', 0)
+                })
+            
+            df = pd.DataFrame(df_data)
+            
+            # Key metrics
+            col1, col2, col3, col4 = st.columns(4)
+            
+            with col1:
+                st.metric("Total Invoices", len(df))
+            with col2:
+                st.metric("Total Amount", f"₹{df['amount'].sum():,.2f}")
+            with col3:
+                st.metric("Avg Amount", f"₹{df['amount'].mean():,.2f}")
+            with col4:
+                st.metric("Unique Suppliers", df['supplier_name'].nunique())
+            
+            # Visualizations
+            if len(df) > 0:
+                # Amount distribution
+                fig_hist = px.histogram(
+                    df, 
+                    x='amount', 
+                    title="Invoice Amount Distribution",
+                    labels={'amount': 'Amount (₹)', 'count': 'Number of Invoices'}
+                )
+                st.plotly_chart(fig_hist, use_container_width=True)
+                
+                # Top suppliers
+                if df['supplier_name'].notna().any():
+                    supplier_amounts = df.groupby('supplier_name')['amount'].sum().sort_values(ascending=False).head(10)
+                    
+                    fig_suppliers = px.bar(
+                        x=supplier_amounts.values,
+                        y=supplier_amounts.index,
+                        orientation='h',
+                        title="Top 10 Suppliers by Total Amount",
+                        labels={'x': 'Total Amount (₹)', 'y': 'Supplier'}
+                    )
+                    st.plotly_chart(fig_suppliers, use_container_width=True)
+                
+                # Confidence analysis
+                fig_confidence = px.histogram(
+                    df,
+                    x='confidence',
+                    title="Extraction Confidence Distribution",
+                    labels={'confidence': 'Confidence Score', 'count': 'Number of Invoices'}
+                )
+                st.plotly_chart(fig_confidence, use_container_width=True)
+        
+        except Exception as e:
+            st.error(f"Analytics error: {e}")
+    
+    # -------------------------------------------------------------------------
+    # TAB 4: DATA EXPLORER
+    # -------------------------------------------------------------------------
+    
+    with tab4:
+        st.header("📋 Data Explorer")
+        
+        try:
+            data = st.session_state.hf_processor.load_json_data()
+            invoices = data.get("invoices", [])
+            
+            if not invoices:
+                st.info("📊 No data available. Upload some invoices first.")
+                return
+            
+            # Convert to DataFrame for display
+            df_data = []
+            for inv in invoices:
+                df_data.append({
+                    'Invoice Number': inv.get('invoice_number', ''),
+                    'Supplier': inv.get('supplier_name', ''),
+                    'Buyer': inv.get('buyer_name', ''),
+                    'Amount': inv.get('amount', 0),
+                    'Date': inv.get('date', ''),
+                    'Confidence': inv.get('extraction_info', {}).get('confidence', 0),
+                    'Method': inv.get('extraction_info', {}).get('method', ''),
+                    'File': inv.get('file_info', {}).get('file_name', ''),
+                    'Created': inv.get('timestamps', {}).get('created_at', '')[:19]
+                })
+            
+            df = pd.DataFrame(df_data)
+            
+            # Filters
+            col1, col2, col3 = st.columns(3)
+            
+            with col1:
+                suppliers = ['All'] + sorted(df['Supplier'].dropna().unique().tolist())
+                selected_supplier = st.selectbox("Filter by Supplier", suppliers)
+            
+            with col2:
+                methods = ['All'] + sorted(df['Method'].dropna().unique().tolist())
+                selected_method = st.selectbox("Filter by Method", methods)
+            
+            with col3:
+                min_amount = st.number_input("Min Amount", min_value=0.0, value=0.0)
+            
+            # Apply filters
+            filtered_df = df.copy()
+            if selected_supplier != 'All':
+                filtered_df = filtered_df[filtered_df['Supplier'] == selected_supplier]
+            if selected_method != 'All':
+                filtered_df = filtered_df[filtered_df['Method'] == selected_method]
+            if min_amount > 0:
+                filtered_df = filtered_df[filtered_df['Amount'] >= min_amount]
+            
+            # Display filtered data
+            st.dataframe(
+                filtered_df,
+                use_container_width=True,
+                column_config={
+                    "Amount": st.column_config.NumberColumn("Amount", format="₹%.2f"),
+                    "Confidence": st.column_config.ProgressColumn("Confidence", min_value=0, max_value=1)
+                }
+            )
+            
+            # Export options
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                if st.button("📥 Export CSV", use_container_width=True):
+                    csv_data = filtered_df.to_csv(index=False)
+                    st.download_button(
+                        "Download CSV",
+                        csv_data,
+                        f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
+                        "text/csv"
+                    )
+            
+            with col2:
+                if st.button("📄 Export JSON", use_container_width=True):
+                    filtered_invoices = [inv for inv in invoices 
+                                       if inv.get('invoice_number') in filtered_df['Invoice Number'].values]
+                    
+                    export_data = {
+                        "exported_at": datetime.now().isoformat(),
+                        "total_records": len(filtered_invoices),
+                        "invoices": filtered_invoices
+                    }
+                    
+                    st.download_button(
+                        "Download JSON",
+                        json.dumps(export_data, indent=2),
+                        f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
+                        "application/json"
+                    )
+        
+        except Exception as e:
+            st.error(f"Data explorer error: {e}")
+    
+    # Footer
+    st.markdown("---")
+    st.markdown("""
+    <div style="text-align: center; color: #666; margin-top: 2rem;">
+        <p>🚀 <strong>AI Invoice Processing System</strong> - Optimized for Hugging Face Spaces</p>
+        <p>Built with ❤️ using Streamlit, Transformers, and AI</p>
+    </div>
+    """, unsafe_allow_html=True)
+
+# ===============================================================================
+# HUGGING FACE REQUIREMENTS AND CONFIGURATION
+# ===============================================================================
+
+def generate_hf_requirements():
+    """Generate requirements.txt optimized for Hugging Face Spaces"""
+    requirements = """streamlit>=1.28.0
+pandas>=1.5.0
+numpy>=1.21.0
+plotly>=5.0.0
+sentence-transformers>=2.2.0
+transformers>=4.21.0
+torch>=1.13.0
+faiss-cpu>=1.7.0
+pdfplumber>=0.7.0
+requests>=2.28.0
+python-dateutil>=2.8.0
+Pillow>=9.0.0
+"""
+    return requirements.strip()
+
+def generate_hf_config():
+    """Generate app configuration for Hugging Face Spaces"""
+    config = {
+        "title": "AI Invoice Processing System",
+        "emoji": "📄",
+        "colorFrom": "blue",
+        "colorTo": "purple",
+        "sdk": "streamlit",
+        "sdk_version": "1.28.0",
+        "app_file": "app.py",
+        "pinned": False,
+        "python_version": "3.9"
+    }
+    return config
+
+# ===============================================================================
+# MAIN APPLICATION ENTRY POINT
+# ===============================================================================
+
+def main():
+    """Main entry point for Hugging Face Spaces"""
+    try:
+        # Display Hugging Face info if running on HF Spaces
+        if IS_HF_SPACE:
+            st.sidebar.info("🤗 Running on Hugging Face Spaces")
+        
+        # Create and run the app
+        create_huggingface_app()
+        
+    except Exception as e:
+        st.error(f"Application error: {e}")
+        st.info("Please refresh the page or contact support if the error persists.")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file