Spaces:

jashdoshi77
/

notebooklm-fast

Running

App Files Files Community

jashdoshi77 commited on 4 days ago

Commit

64deb3c

0 Parent(s):

feat: Add AI-powered query understanding with DeepSeek parsing

Browse files

Files changed (25) hide show

.agent/workflows/push-to-huggingface.md +24 -0
.env.example +21 -0
.gitignore +0 -0
Dockerfile +29 -0
README.md +31 -0
app.py +732 -0
config.py +48 -0
find_buckets.py +25 -0
find_users.py +21 -0
migrate_metadata.py +273 -0
requirements.txt +15 -0
services/__init__.py +1 -0
services/auth_service.py +177 -0
services/chroma_service.py +1009 -0
services/date_parser.py +285 -0
services/document_processor.py +336 -0
services/metadata_extractor.py +446 -0
services/number_extractor.py +302 -0
services/ocr_service.py +231 -0
services/rag_service.py +1870 -0
static/css/styles.css +2567 -0
static/images/WhatsApp Image 2025-12-23 at 5.10.00 PM.jpeg +0 -0
static/index.html +411 -0
static/js/app.js +1798 -0
test_chroma.py +16 -0

.agent/workflows/push-to-huggingface.md ADDED Viewed

	@@ -0,0 +1,24 @@

+---
+description: Push changes to Hugging Face Spaces
+---
+# Push to Hugging Face
+// turbo-all
+1. Stage all changes:
+```bash
+git add .
+```
+2. Commit with a message:
+```bash
+git commit -m "Your commit message here"
+```
+3. Push to Hugging Face:
+```bash
+git push https://jashdoshi77:YOUR_HF_TOKEN@huggingface.co/spaces/jashdoshi77/notebooklm-fast master:main
+```
+**Note**: Replace `YOUR_HF_TOKEN` with your Hugging Face token. The Space will automatically rebuild after pushing.

.env.example ADDED Viewed

	@@ -0,0 +1,21 @@

+# Groq API (Ultra-fast inference) - Get key from https://console.groq.com
+GROQ_API_KEY=your_groq_api_key_here
+# Mistral AI API (for OCR) - Get key from https://console.mistral.ai/
+MISTRAL_API_KEY=your_mistral_api_key_here
+# OpenRouter API (fallback)
+OPENROUTER_API_KEY=your_openrouter_api_key_here
+# ChromaDB Cloud Configuration
+# Get your API key from https://www.trychroma.com/
+CHROMA_API_KEY=your_chromadb_api_key_here
+CHROMA_TENANT=your_tenant_id
+CHROMA_DATABASE=your_database_name
+# JWT Secret (change in production)
+JWT_SECRET=Iribl AI-secret-key-change-me-in-production
+# App Configuration
+FLASK_ENV=development
+FLASK_DEBUG=True

.gitignore ADDED Viewed

Binary file (258 Bytes). View file

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+ FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies for PyMuPDF and other packages
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libffi-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for Docker layer caching
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create necessary directories
+RUN mkdir -p uploads chroma_data
+# Hugging Face Spaces uses port 7860
+EXPOSE 7860
+# Set environment variables
+ENV FLASK_ENV=production
+ENV PYTHONUNBUFFERED=1
+# Run with gunicorn for production
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--worker-class", "gthread", "--threads", "4", "--workers", "2", "--timeout", "1200", "--access-logfile", "-", "--error-logfile", "-", "app:app"]

README.md ADDED Viewed

	@@ -0,0 +1,31 @@

+---
+title: NotebookLM Fast
+emoji: 📚
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+---
+# NotebookLM Fast
+AI-powered document intelligence platform with RAG (Retrieval Augmented Generation).
+## Features
+- 📄 Upload PDFs, Word docs, Excel files, PowerPoints, and images
+- 💬 Chat with your documents using AI
+- 🗂️ Organize documents in buckets
+- 👥 Admin/Employee role management
+- 🔒 Secure authentication
+## Environment Variables
+Set these as secrets in your Hugging Face Space settings:
+- `OPENROUTER_API_KEY` - Your OpenRouter API key
+- `GROQ_API_KEY` - Your Groq API key
+- `CHROMA_API_KEY` - Your ChromaDB Cloud API key
+- `CHROMA_TENANT` - Your ChromaDB tenant ID
+- `CHROMA_DATABASE` - Your ChromaDB database name
+- `JWT_SECRET` - Secret key for JWT tokens

app.py ADDED Viewed

	@@ -0,0 +1,732 @@

+"""
+NotebookLM Clone - Main Flask Application
+AI-powered document intelligence platform with RAG
+Supports Admin/Employee roles and Bucket organization
+"""
+import os
+import uuid
+from functools import wraps
+from flask import Flask, request, jsonify, send_from_directory, send_file, Response
+from flask_cors import CORS
+from werkzeug.utils import secure_filename
+from config import Config
+from services.auth_service import auth_service
+from services.document_processor import document_processor
+from services.chroma_service import chroma_service
+from services.rag_service import rag_service
+from services.metadata_extractor import metadata_extractor
+# Initialize Flask app
+app = Flask(__name__, static_folder='static')
+app.config['MAX_CONTENT_LENGTH'] = Config.MAX_CONTENT_LENGTH
+CORS(app)
+# Ensure upload directory exists
+os.makedirs(Config.UPLOAD_FOLDER, exist_ok=True)
+# ==================== Auth Decorators ====================
+def require_auth(f):
+    """Decorator to require authentication"""
+    @wraps(f)
+    def decorated(*args, **kwargs):
+        auth_header = request.headers.get('Authorization')
+        if not auth_header or not auth_header.startswith('Bearer '):
+            return jsonify({"error": "Missing or invalid authorization header"}), 401
+        token = auth_header.split(' ')[1]
+        user = auth_service.get_current_user(token)
+        if not user:
+            return jsonify({"error": "Invalid or expired token"}), 401
+        request.current_user = user
+        return f(*args, **kwargs)
+    return decorated
+def require_admin(f):
+    """Decorator to require admin role"""
+    @wraps(f)
+    def decorated(*args, **kwargs):
+        auth_header = request.headers.get('Authorization')
+        if not auth_header or not auth_header.startswith('Bearer '):
+            return jsonify({"error": "Missing or invalid authorization header"}), 401
+        token = auth_header.split(' ')[1]
+        user = auth_service.get_current_user(token)
+        if not user:
+            return jsonify({"error": "Invalid or expired token"}), 401
+        if user.get('role') != 'admin':
+            return jsonify({"error": "Admin access required"}), 403
+        request.current_user = user
+        return f(*args, **kwargs)
+    return decorated
+# ==================== Static Routes ====================
+@app.route('/')
+def index():
+    return send_from_directory(app.static_folder, 'index.html')
+@app.route('/<path:path>')
+def serve_static(path):
+    return send_from_directory(app.static_folder, path)
+# ==================== Auth Routes ====================
+@app.route('/api/auth/register/admin', methods=['POST'])
+def register_admin():
+    data = request.get_json()
+    if not data:
+        return jsonify({"error": "No data provided"}), 400
+    username = data.get('username', '').strip()
+    password = data.get('password', '')
+    email = data.get('email', '').strip()
+    result = auth_service.register_admin(username, password, email)
+    if result['success']:
+        return jsonify({
+            "token": result['token'],
+            "user_id": result['user_id'],
+            "username": result['username'],
+            "role": result['role']
+        })
+    else:
+        return jsonify({"error": result['error']}), 400
+@app.route('/api/auth/login', methods=['POST'])
+def login():
+    data = request.get_json()
+    if not data:
+        return jsonify({"error": "No data provided"}), 400
+    username = data.get('username', '').strip()
+    password = data.get('password', '')
+    role = data.get('role', 'admin')
+    result = auth_service.login(username, password, role)
+    if result['success']:
+        return jsonify({
+            "token": result['token'],
+            "user_id": result['user_id'],
+            "username": result['username'],
+            "role": result['role']
+        })
+    else:
+        return jsonify({"error": result['error']}), 401
+@app.route('/api/auth/verify', methods=['GET'])
+@require_auth
+def verify_token():
+    return jsonify({
+        "user_id": request.current_user['user_id'],
+        "username": request.current_user['username'],
+        "role": request.current_user.get('role', 'admin')
+    })
+# ==================== Admin Employee Management ====================
+@app.route('/api/admin/employees', methods=['GET'])
+@require_admin
+def list_employees():
+    employees = auth_service.get_admin_employees(request.current_user['user_id'])
+    return jsonify({"employees": employees})
+@app.route('/api/admin/employees', methods=['POST'])
+@require_admin
+def add_employee():
+    data = request.get_json()
+    if not data:
+        return jsonify({"error": "No data provided"}), 400
+    email = data.get('email', '').strip()
+    password = data.get('password', '')
+    result = auth_service.register_employee(
+        admin_user_id=request.current_user['user_id'],
+        email=email,
+        password=password
+    )
+    if result['success']:
+        return jsonify({"user_id": result['user_id'], "email": result['email']})
+    else:
+        return jsonify({"error": result['error']}), 400
+@app.route('/api/admin/employees/<employee_id>', methods=['DELETE'])
+@require_admin
+def delete_employee(employee_id):
+    success = auth_service.delete_employee(
+        admin_user_id=request.current_user['user_id'],
+        employee_id=employee_id
+    )
+    if success:
+        return jsonify({"success": True})
+    else:
+        return jsonify({"error": "Employee not found or access denied"}), 404
+# ==================== Bucket Routes ====================
+@app.route('/api/buckets', methods=['GET'])
+@require_auth
+def list_buckets():
+    """List all buckets for current user"""
+    buckets = chroma_service.get_user_buckets(request.current_user['user_id'])
+    return jsonify({"buckets": buckets})
+@app.route('/api/buckets', methods=['POST'])
+@require_auth
+def create_bucket():
+    """Create a new bucket"""
+    data = request.get_json()
+    if not data or not data.get('name'):
+        return jsonify({"error": "Bucket name is required"}), 400
+    name = data.get('name', '').strip()
+    description = data.get('description', '').strip()
+    result = chroma_service.create_bucket(
+        user_id=request.current_user['user_id'],
+        name=name,
+        description=description
+    )
+    return jsonify(result)
+@app.route('/api/buckets/<bucket_id>', methods=['DELETE'])
+@require_auth
+def delete_bucket(bucket_id):
+    """Delete a bucket"""
+    success = chroma_service.delete_bucket(
+        bucket_id=bucket_id,
+        user_id=request.current_user['user_id']
+    )
+    if success:
+        return jsonify({"success": True})
+    else:
+        return jsonify({"error": "Bucket not found or access denied"}), 404
+# ==================== Document Routes ====================
+# ==================== Async Processing ====================
+# Global status store: doc_id -> {status, progress, message, result, error}
+processing_status = {}
+def process_document_background(doc_id, user_id, file_path, filename, bucket_id):
+    """Background task for processing documents"""
+    import threading
+    try:
+        processing_status[doc_id] = {
+            "status": "processing",
+            "progress": 10,
+            "message": "Starting processing..."
+        }
+        print(f"[BACKGROUND] Processing file: {filename}")
+        # Step 1: Text Extraction (OCR)
+        processing_status[doc_id]["message"] = "Extracting text (OCR)..."
+        processing_status[doc_id]["progress"] = 20
+        result = document_processor.process(file_path, filename)
+        if not result['success']:
+            processing_status[doc_id] = {
+                "status": "failed",
+                "error": result['error']
+            }
+            if os.path.exists(file_path):
+                os.remove(file_path)
+            return
+        processing_status[doc_id]["progress"] = 50
+        processing_status[doc_id]["message"] = "Storing document..."
+        # Step 2: Store Metadata
+        doc_type = document_processor.get_file_type(filename)
+        chroma_service.store_document(
+            user_id=user_id,
+            doc_id=doc_id,
+            filename=filename,
+            doc_type=doc_type,
+            content=result['text'],
+            bucket_id=bucket_id
+        )
+        processing_status[doc_id]["progress"] = 70
+        processing_status[doc_id]["message"] = "generating embeddings..."
+        # Step 3: Chunking & Embeddings
+        chunk_count = rag_service.process_document(
+            user_id=user_id,
+            doc_id=doc_id,
+            content=result['text'],
+            bucket_id=bucket_id
+        )
+        processing_status[doc_id]["progress"] = 90
+        processing_status[doc_id]["message"] = "Generating summary..."
+        # Step 4: Summary Generation
+        summary_result = rag_service.generate_summary(result['text'], filename)
+        summary = summary_result.get('summary', f'Document: {filename}')
+        # Step 5: Extract and store metadata for aggregate queries (NEW)
+        processing_status[doc_id]["progress"] = 95
+        processing_status[doc_id]["message"] = "Extracting metadata..."
+        try:
+            # Extract structured metadata from document
+            metadata = metadata_extractor.extract_metadata(result['text'], filename)
+            # Store metadata for aggregate queries
+            chroma_service.store_document_metadata(
+                doc_id=doc_id,
+                user_id=user_id,
+                bucket_id=bucket_id,
+                metadata=metadata
+            )
+            # Store summary chunk for aggregate queries
+            chroma_service.store_summary_chunk(
+                doc_id=doc_id,
+                user_id=user_id,
+                summary=summary,
+                bucket_id=bucket_id,
+                filename=filename
+            )
+            print(f"[METADATA] Extracted and stored metadata for {filename}")
+        except Exception as e:
+            print(f"[METADATA] Warning: Failed to extract metadata for {filename}: {e}")
+            # Non-fatal - continue processing
+        # Complete
+        processing_status[doc_id] = {
+            "status": "completed",
+            "progress": 100,
+            "message": "Complete",
+            "result": {
+                "doc_id": doc_id,
+                "filename": filename,
+                "doc_type": doc_type,
+                "bucket_id": bucket_id,
+                "chunk_count": chunk_count,
+                "summary": summary
+            }
+        }
+        print(f"[BACKGROUND] Completed {filename}")
+    except Exception as e:
+        import traceback
+        print(f"[BACKGROUND ERROR] {str(e)}")
+        print(traceback.format_exc())
+        processing_status[doc_id] = {
+            "status": "failed",
+            "error": str(e)
+        }
+        if os.path.exists(file_path):
+            try:
+                os.remove(file_path)
+            except:
+                pass
+@app.route('/api/documents/upload', methods=['POST'])
+@require_auth
+def upload_document():
+    """Upload and process a document (Async)"""
+    if 'file' not in request.files:
+        return jsonify({"error": "No file provided"}), 400
+    file = request.files['file']
+    bucket_id = request.form.get('bucket_id', '')
+    if file.filename == '':
+        return jsonify({"error": "No file selected"}), 400
+    if not document_processor.is_supported(file.filename):
+        return jsonify({"error": "Unsupported file type"}), 400
+    doc_id = str(uuid.uuid4())
+    filename = secure_filename(file.filename)
+    user_folder = os.path.join(Config.UPLOAD_FOLDER, request.current_user['user_id'])
+    os.makedirs(user_folder, exist_ok=True)
+    file_path = os.path.join(user_folder, f"{doc_id}_{filename}")
+    file.save(file_path)
+    # Initialize status
+    processing_status[doc_id] = {
+        "status": "queued",
+        "progress": 0,
+        "message": "Queued for processing..."
+    }
+    # Start background thread
+    import threading
+    thread = threading.Thread(
+        target=process_document_background,
+        args=(doc_id, request.current_user['user_id'], file_path, filename, bucket_id)
+    )
+    thread.daemon = True
+    thread.start()
+    # Return immediately
+    return jsonify({
+        "status": "queued",
+        "doc_id": doc_id,
+        "filename": filename,
+        "message": "Upload accepted, processing in background"
+    }), 202
+@app.route('/api/documents/<doc_id>/status', methods=['GET'])
+@require_auth
+def get_document_status(doc_id):
+    """Get processing status of a document"""
+    status = processing_status.get(doc_id)
+    if not status:
+        # Check if it exists in DB (might be completed and cleared from memory)
+        # For now, just return not found if not in memory or DB check logic here
+        # Simple version:
+        return jsonify({"status": "unknown"}), 404
+    return jsonify(status)
+@app.route('/api/documents/<doc_id>/summary', methods=['GET'])
+@require_auth
+def get_document_summary(doc_id):
+    """Get or generate summary for a document"""
+    doc = chroma_service.get_document(doc_id, request.current_user['user_id'])
+    if not doc:
+        return jsonify({"error": "Document not found"}), 404
+    # Get the full document content from the stored preview
+    # For a more complete summary, we'd need to re-read the document
+    content_preview = doc.get('content_preview', '')
+    # Generate summary
+    summary_result = rag_service.generate_summary(content_preview, doc['filename'])
+    return jsonify({
+        "doc_id": doc_id,
+        "filename": doc['filename'],
+        "summary": summary_result.get('summary', f'Document: {doc["filename"]}'),
+        "success": summary_result.get('success', False)
+    })
+@app.route('/api/documents', methods=['GET'])
+@require_auth
+def list_documents():
+    """List all documents, optionally filtered by bucket"""
+    bucket_id = request.args.get('bucket_id')
+    documents = chroma_service.get_user_documents(
+        request.current_user['user_id'],
+        bucket_id=bucket_id if bucket_id else None
+    )
+    return jsonify({"documents": documents})
+@app.route('/api/documents/<doc_id>', methods=['GET'])
+@require_auth
+def get_document(doc_id):
+    """Get document details"""
+    doc = chroma_service.get_document(doc_id, request.current_user['user_id'])
+    if doc:
+        return jsonify(doc)
+    else:
+        return jsonify({"error": "Document not found"}), 404
+@app.route('/api/documents/<doc_id>/view', methods=['GET'])
+@require_auth
+def view_document(doc_id):
+    """View/download the actual document file"""
+    doc = chroma_service.get_document(doc_id, request.current_user['user_id'])
+    if not doc:
+        return jsonify({"error": "Document not found"}), 404
+    user_folder = os.path.join(Config.UPLOAD_FOLDER, request.current_user['user_id'])
+    # Find the file
+    for f in os.listdir(user_folder):
+        if f.startswith(doc_id):
+            file_path = os.path.join(user_folder, f)
+            return send_file(file_path, as_attachment=False)
+    return jsonify({"error": "File not found on server"}), 404
+@app.route('/api/documents/<doc_id>/bucket', methods=['PUT'])
+@require_auth
+def update_document_bucket(doc_id):
+    """Move document to a different bucket"""
+    data = request.get_json()
+    bucket_id = data.get('bucket_id', '') if data else ''
+    success = chroma_service.update_document_bucket(
+        doc_id=doc_id,
+        user_id=request.current_user['user_id'],
+        bucket_id=bucket_id
+    )
+    if success:
+        return jsonify({"success": True})
+    else:
+        return jsonify({"error": "Document not found or access denied"}), 404
+@app.route('/api/documents/<doc_id>', methods=['DELETE'])
+@require_auth
+def delete_document(doc_id):
+    """Delete a document"""
+    success = chroma_service.delete_document(
+        doc_id=doc_id,
+        user_id=request.current_user['user_id']
+    )
+    if success:
+        user_folder = os.path.join(Config.UPLOAD_FOLDER, request.current_user['user_id'])
+        try:
+            for f in os.listdir(user_folder):
+                if f.startswith(doc_id):
+                    os.remove(os.path.join(user_folder, f))
+                    break
+        except:
+            pass
+        return jsonify({"success": True})
+    else:
+        return jsonify({"error": "Document not found or access denied"}), 404
+# ==================== Chat/RAG Routes ====================
+@app.route('/api/chat', methods=['POST'])
+@require_auth
+def chat():
+    """Process a chat query using RAG with optional bucket filtering"""
+    data = request.get_json()
+    if not data or not data.get('message'):
+        return jsonify({"error": "No message provided"}), 400
+    message = data.get('message', '').strip()
+    doc_ids = data.get('doc_ids')
+    bucket_id = data.get('bucket_id')  # New: filter by bucket
+    conversation_history = data.get('history', [])
+    result = rag_service.query(
+        user_id=request.current_user['user_id'],
+        query=message,
+        doc_ids=doc_ids,
+        bucket_id=bucket_id,
+        conversation_history=conversation_history
+    )
+    if result['success']:
+        return jsonify({
+            "response": result['response'],
+            "model": result.get('model', 'unknown'),
+            "sources": result.get('sources', []),
+            "source_files": result.get('source_files', []),
+            "chunks_used": result.get('chunks_used', 0),
+            "chunks_filtered": result.get('chunks_filtered', 0)
+        })
+    else:
+        return jsonify({"error": result['error']}), 500
+@app.route('/api/chat/stream', methods=['POST'])
+@require_auth
+def chat_stream():
+    """Streaming chat endpoint - sends response chunks as they arrive"""
+    import json
+    import time
+    start_time = time.time()
+    print(f"[STREAM] Endpoint called")
+    data = request.get_json()
+    if not data or not data.get('message'):
+        return jsonify({"error": "No message provided"}), 400
+    message = data.get('message', '').strip()
+    bucket_id = data.get('bucket_id')
+    chat_id = data.get('chat_id', '')  # Get chat_id from request
+    user_id = request.current_user['user_id']
+    print(f"[STREAM] Request parsed in {time.time()-start_time:.2f}s")
+    def generate():
+        # Immediately yield to start the stream
+        yield f"data: {json.dumps({'type': 'start'})}\n\n"
+        sse_chunk_count = 0
+        for chunk in rag_service.query_stream(
+            user_id=user_id,
+            query=message,
+            bucket_id=bucket_id,
+            chat_id=chat_id
+        ):
+            sse_chunk_count += 1
+            if sse_chunk_count <= 5:
+                print(f"[SSE] Sending chunk {sse_chunk_count}: type={chunk.get('type', 'unknown')}")
+            yield f"data: {json.dumps(chunk)}\n\n"
+        print(f"[SSE] Stream complete, sent {sse_chunk_count} chunks total")
+    return Response(
+        generate(),
+        mimetype='text/event-stream',
+        headers={
+            'Cache-Control': 'no-cache',
+            'Connection': 'keep-alive',
+            'X-Accel-Buffering': 'no'
+        }
+    )
+@app.route('/api/chat/clear', methods=['POST'])
+@require_auth
+def clear_chat_memory():
+    """Clear conversation memory for the current user"""
+    data = request.get_json() or {}
+    bucket_id = data.get('bucket_id')
+    success = rag_service.clear_memory(
+        user_id=request.current_user['user_id'],
+        bucket_id=bucket_id
+    )
+    if success:
+        return jsonify({"success": True, "message": "Conversation memory cleared"})
+    else:
+        return jsonify({"error": "Failed to clear memory"}), 500
+@app.route('/api/cleanup/chunks', methods=['POST'])
+@require_auth
+def cleanup_user_chunks():
+    """Clear ALL chunks for the current user - use to fix stale data issues"""
+    deleted_count = chroma_service.clear_all_user_chunks(
+        user_id=request.current_user['user_id']
+    )
+    return jsonify({
+        "success": True,
+        "message": f"Deleted {deleted_count} chunks. Please re-upload your documents."
+    })
+# ==================== Chat History Routes ====================
+@app.route('/api/chats', methods=['GET'])
+@require_auth
+def list_chat_sessions():
+    """Get all chat sessions for current user"""
+    sessions = chroma_service.get_user_chat_sessions(request.current_user['user_id'])
+    return jsonify({"chats": sessions})
+@app.route('/api/chats', methods=['POST'])
+@require_auth
+def save_chat_session():
+    """Save or update a chat session"""
+    data = request.get_json()
+    if not data:
+        return jsonify({"error": "No data provided"}), 400
+    chat_id = data.get('id')
+    topic = data.get('topic', 'Chat')
+    messages = data.get('messages', [])
+    bucket_id = data.get('bucket', '')
+    if not chat_id:
+        return jsonify({"error": "Chat ID is required"}), 400
+    result = chroma_service.save_chat_session(
+        user_id=request.current_user['user_id'],
+        chat_id=chat_id,
+        topic=topic,
+        messages=messages,
+        bucket_id=bucket_id
+    )
+    return jsonify({"success": True, **result})
+@app.route('/api/chats/<chat_id>', methods=['DELETE'])
+@require_auth
+def delete_chat_session(chat_id):
+    """Delete a chat session"""
+    success = chroma_service.delete_chat_session(
+        user_id=request.current_user['user_id'],
+        chat_id=chat_id
+    )
+    if success:
+        return jsonify({"success": True})
+    else:
+        return jsonify({"error": "Chat not found or access denied"}), 404
+# ==================== Health Check ====================
+@app.route('/api/health', methods=['GET'])
+def health_check():
+    return jsonify({"status": "healthy", "version": "1.1.0"})
+# ==================== Main ====================
+if __name__ == '__main__':
+    print("=" * 50)
+    print("NotebookLM Clone - AI Document Intelligence")
+    print("=" * 50)
+    print(f"Upload folder: {Config.UPLOAD_FOLDER}")
+    print(f"ChromaDB Cloud: {Config.CHROMA_TENANT}/{Config.CHROMA_DATABASE}")
+    print("Starting server on http://localhost:5000")
+    print("=" * 50)
+    app.run(host='0.0.0.0', port=5000, debug=True)

config.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+class Config:
+    # DeepSeek API - PRIMARY (OpenAI-compatible)
+    DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY', '')
+    DEEPSEEK_BASE_URL = 'https://api.deepseek.com/v1'
+    DEEPSEEK_MODEL = 'deepseek-chat'  # DeepSeek V3 model
+    # OpenRouter API - For OCR and fallback LLM
+    OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', '')
+    OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'
+    # AI Models (OpenRouter fallback)
+    MODEL_MAP = {
+        'gemma': 'google/gemma-3-4b-it:free',
+        'mistral': 'mistralai/mistral-small-3.1-24b-instruct:free',
+    }
+    # Use DeepSeek first, then OpenRouter models
+    USE_DEEPSEEK = True  # Set to False to use OpenRouter instead
+    FALLBACK_ORDER = ['gemma', 'mistral']
+    # ChromaDB Cloud Configuration
+    CHROMA_API_KEY = os.getenv('CHROMA_API_KEY', '')
+    CHROMA_TENANT = os.getenv('CHROMA_TENANT', 'default_tenant')
+    CHROMA_DATABASE = os.getenv('CHROMA_DATABASE', 'default_database')
+    CHROMA_HOST = 'api.trychroma.com'  # ChromaDB Cloud endpoint
+    # JWT Configuration
+    JWT_SECRET = os.getenv('JWT_SECRET', 'your-secret-key-change-in-production')
+    JWT_EXPIRY_HOURS = 24
+    # Upload Configuration
+    UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
+    MAX_CONTENT_LENGTH = 200 * 1024 * 1024  # 200MB max file size
+    ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'txt', 'md', 'png', 'jpg', 'jpeg', 'gif', 'webp'}
+    CHUNK_SIZE = 500  # Smaller chunks for higher precision with many documents
+    CHUNK_OVERLAP = 150  # Good overlap to avoid losing info at boundaries
+    TOP_K_RESULTS = 100  # High - comprehensive retrieval for 3000+ docs across buckets
+    AI_TEMPERATURE = 0.0  # Zero temperature for maximum determinism and accuracy
+    RELEVANCE_THRESHOLD = 3.0  # Higher threshold - include all potentially relevant
+    MAX_CONVERSATION_HISTORY = 20  # Remember more conversation for pronoun context
+    AI_MAX_TOKENS = 4096  # Maximum tokens for detailed responses
+    AI_TIMEOUT = 90  # More time for complex multi-document queries

find_buckets.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""Helper script to find buckets for a user"""
+import sys
+sys.path.insert(0, '.')
+from services.chroma_service import chroma_service
+# Check both users
+user_ids = ['55c0893720ef38eb', '7ac2ed69d52d2010']
+for user_id in user_ids:
+    print(f"\nUser: {user_id}")
+    print("-" * 40)
+    # Get documents
+    docs = chroma_service.get_user_documents(user_id)
+    print(f"Documents: {len(docs)}")
+    # Get buckets
+    buckets = chroma_service.get_user_buckets(user_id)
+    if buckets:
+        print("Buckets:")
+        for b in buckets:
+            print(f"  - {b['name']} (ID: {b['bucket_id']}, Docs: {b.get('doc_count', 0)})")
+    else:
+        print("No buckets found")

find_users.py ADDED Viewed

	@@ -0,0 +1,21 @@

+"""Helper script to find user IDs"""
+import sys
+sys.path.insert(0, '.')
+from services.chroma_service import chroma_service
+print("Finding users in ChromaDB...")
+users = chroma_service.users_collection.get()
+if not users['ids']:
+    print("No users found!")
+else:
+    print(f"Found {len(users['ids'])} users:")
+    for i in range(len(users['ids'])):
+        user_id = users['ids'][i]
+        username = users['metadatas'][i].get('username', 'N/A')
+        role = users['metadatas'][i].get('role', 'N/A')
+        print(f"  - User ID: {user_id}")
+        print(f"    Username: {username}")
+        print(f"    Role: {role}")
+        print()

migrate_metadata.py ADDED Viewed

	@@ -0,0 +1,273 @@

+"""
+Migration Script - Extract Metadata for Existing Documents
+Run this script once to populate document_metadata and document_summaries
+collections for all existing documents.
+Usage:
+    python migrate_metadata.py --user-id <user_id> [--bucket-id <bucket_id>]
+"""
+import sys
+import time
+import argparse
+from typing import List, Dict
+# Add parent directory to path for imports
+sys.path.insert(0, '.')
+from services.chroma_service import chroma_service
+from services.metadata_extractor import metadata_extractor
+from services.rag_service import rag_service
+def reconstruct_document_content(doc_id: str) -> str:
+    """
+    Reconstruct document content from stored chunks.
+    """
+    chunks = chroma_service.get_document_chunks(doc_id)
+    if not chunks:
+        return ""
+    # Sort by chunk index and combine
+    chunks.sort(key=lambda x: x.get('chunk_index', 0))
+    content = ' '.join(chunk.get('text', '') for chunk in chunks)
+    return content
+def migrate_single_document(doc: Dict, user_id: str, skip_existing: bool = True) -> Dict:
+    """
+    Migrate a single document: extract metadata and create summary.
+    Args:
+        doc: Document dictionary with doc_id, filename, bucket_id
+        user_id: User ID
+        skip_existing: If True, skip documents that already have metadata and summary
+    """
+    doc_id = doc['doc_id']
+    filename = doc.get('filename', '')
+    bucket_id = doc.get('bucket_id', '')
+    result = {
+        'doc_id': doc_id,
+        'filename': filename,
+        'status': 'pending',
+        'metadata_extracted': False,
+        'summary_created': False,
+        'error': None
+    }
+    try:
+        # Check if already migrated (fast skip)
+        if skip_existing:
+            existing_metadata = chroma_service.get_document_metadata(doc_id, user_id)
+            summary_id = f"{doc_id}_summary"
+            existing_summary = chroma_service.summary_chunks_collection.get(ids=[summary_id])
+            if existing_metadata and existing_summary['ids']:
+                result['status'] = 'skipped'
+                result['metadata_extracted'] = True
+                result['summary_created'] = True
+                print(f"  [SKIP] Already migrated: {filename}")
+                return result
+        # Step 1: Reconstruct content from chunks
+        print(f"  Reconstructing content for {filename}...")
+        content = reconstruct_document_content(doc_id)
+        if not content:
+            result['status'] = 'skipped'
+            result['error'] = 'No content found'
+            return result
+        # Step 2: Extract metadata (only if not exists or skip_existing is False)
+        needs_metadata = not skip_existing or not chroma_service.get_document_metadata(doc_id, user_id)
+        if needs_metadata:
+            print(f"  Extracting metadata...")
+            metadata = metadata_extractor.extract_metadata(content, filename)
+            # Store metadata
+            chroma_service.store_document_metadata(
+                doc_id=doc_id,
+                user_id=user_id,
+                bucket_id=bucket_id,
+                metadata=metadata
+            )
+            result['metadata_extracted'] = True
+        else:
+            result['metadata_extracted'] = True  # Already exists
+        # Step 3: Generate and store summary (only if not exists or skip_existing is False)
+        summary_id = f"{doc_id}_summary"
+        existing_summary = chroma_service.summary_chunks_collection.get(ids=[summary_id])
+        needs_summary = not skip_existing or not existing_summary['ids']
+        if needs_summary:
+            print(f"  Generating summary...")
+            try:
+                summary_result = rag_service.generate_summary(content, filename)
+                # Extract the summary string from the result dict
+                if isinstance(summary_result, dict):
+                    summary = summary_result.get('summary', f'Document: {filename}')
+                else:
+                    summary = str(summary_result) if summary_result else f'Document: {filename}'
+                if summary:
+                    chroma_service.store_summary_chunk(
+                        doc_id=doc_id,
+                        user_id=user_id,
+                        summary=summary,
+                        bucket_id=bucket_id,
+                        filename=filename
+                    )
+                    result['summary_created'] = True
+            except Exception as e:
+                print(f"    Warning: Summary generation failed: {e}")
+        else:
+            result['summary_created'] = True  # Already exists
+        result['status'] = 'success'
+        print(f"  [OK] Completed: {filename}")
+    except Exception as e:
+        result['status'] = 'error'
+        # Sanitize error message for console encoding
+        error_msg = str(e).encode('ascii', 'replace').decode('ascii')
+        result['error'] = error_msg
+        print(f"  [ERROR] {filename} - {error_msg}")
+    return result
+def migrate_all_documents(user_id: str, bucket_id: str = None,
+                          batch_size: int = 10, delay: float = 0.5, skip_existing: bool = True):
+    """
+    Migrate all documents for a user/bucket.
+    Args:
+        user_id: User ID to migrate documents for
+        bucket_id: Optional bucket ID to filter by
+        batch_size: Number of documents to process before pausing
+        delay: Seconds to wait between documents (rate limiting)
+        skip_existing: If True, skip documents that already have metadata and summary
+    """
+    print("=" * 60)
+    print("Document Metadata Migration")
+    print("=" * 60)
+    print(f"User ID: {user_id}")
+    print(f"Bucket ID: {bucket_id or 'All buckets'}")
+    print(f"Skip existing: {skip_existing}")
+    print()
+    # Get all documents
+    print("Fetching documents...")
+    documents = chroma_service.get_user_documents(user_id, bucket_id)
+    total_docs = len(documents)
+    print(f"Found {total_docs} documents to process")
+    print()
+    if total_docs == 0:
+        print("No documents found. Exiting.")
+        return
+    # Track results
+    results = {
+        'total': total_docs,
+        'success': 0,
+        'skipped': 0,
+        'already_migrated': 0,
+        'error': 0,
+        'metadata_extracted': 0,
+        'summaries_created': 0
+    }
+    start_time = time.time()
+    # Process documents
+    for i, doc in enumerate(documents, 1):
+        print(f"\n[{i}/{total_docs}] Processing: {doc.get('filename', 'Unknown')}")
+        result = migrate_single_document(doc, user_id, skip_existing=skip_existing)
+        # Update results
+        if result['status'] == 'success':
+            results['success'] += 1
+        elif result['status'] == 'skipped':
+            if result.get('metadata_extracted') and result.get('summary_created'):
+                results['already_migrated'] += 1
+            else:
+                results['skipped'] += 1
+        else:
+            results['error'] += 1
+        if result['metadata_extracted']:
+            results['metadata_extracted'] += 1
+        if result['summary_created']:
+            results['summaries_created'] += 1
+        # Rate limiting
+        if delay > 0:
+            time.sleep(delay)
+        # Progress update every batch_size documents
+        if i % batch_size == 0:
+            elapsed = time.time() - start_time
+            rate = i / elapsed if elapsed > 0 else 0
+            remaining = (total_docs - i) / rate if rate > 0 else 0
+            print(f"\n--- Progress: {i}/{total_docs} ({i/total_docs*100:.1f}%) ---")
+            print(f"    Elapsed: {elapsed:.1f}s | ETA: {remaining:.1f}s")
+            print(f"    Success: {results['success']} | Already migrated: {results['already_migrated']} | Errors: {results['error']}")
+    # Final summary
+    elapsed = time.time() - start_time
+    print("\n" + "=" * 60)
+    print("Migration Complete!")
+    print("=" * 60)
+    print(f"Total documents: {results['total']}")
+    print(f"  [OK] Success: {results['success']}")
+    print(f"  [SKIP] Already migrated: {results['already_migrated']}")
+    print(f"  [SKIP] Skipped (no content): {results['skipped']}")
+    print(f"  [ERR] Errors: {results['error']}")
+    print()
+    print(f"Metadata extracted: {results['metadata_extracted']}")
+    print(f"Summaries created: {results['summaries_created']}")
+    print()
+    print(f"Total time: {elapsed:.1f} seconds")
+    if total_docs > 0:
+        print(f"Average: {elapsed/total_docs:.2f} seconds per document")
+    return results
+def main():
+    parser = argparse.ArgumentParser(description='Migrate existing documents to extract metadata')
+    parser.add_argument('--user-id', required=True, help='User ID to migrate documents for')
+    parser.add_argument('--bucket-id', help='Optional bucket ID to filter by')
+    parser.add_argument('--batch-size', type=int, default=10, help='Batch size for progress updates')
+    parser.add_argument('--delay', type=float, default=0.5, help='Delay between documents (seconds)')
+    parser.add_argument('--dry-run', action='store_true', help='Show what would be done without making changes')
+    args = parser.parse_args()
+    if args.dry_run:
+        print("DRY RUN MODE - No changes will be made")
+        documents = chroma_service.get_user_documents(args.user_id, args.bucket_id)
+        print(f"Would process {len(documents)} documents:")
+        for doc in documents[:10]:
+            print(f"  - {doc.get('filename', 'Unknown')}")
+        if len(documents) > 10:
+            print(f"  ... and {len(documents) - 10} more")
+        return
+    migrate_all_documents(
+        user_id=args.user_id,
+        bucket_id=args.bucket_id,
+        batch_size=args.batch_size,
+        delay=args.delay,
+        skip_existing=True
+    )
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+flask
+flask-cors
+chromadb
+python-dotenv
+python-docx
+openpyxl
+pandas
+Pillow
+requests
+bcrypt
+PyJWT
+werkzeug
+python-pptx
+pymupdf
+gunicorn

services/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Services package

services/auth_service.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+Authentication Service with Role-Based Access
+Handles user registration, login, and JWT token management
+Supports Admin and Employee roles
+Uses ChromaDB for user storage
+"""
+import bcrypt
+import jwt
+import time
+from datetime import datetime, timedelta
+from config import Config
+from services.chroma_service import chroma_service
+class AuthService:
+    def __init__(self):
+        self.jwt_secret = Config.JWT_SECRET
+        self.jwt_expiry_hours = Config.JWT_EXPIRY_HOURS
+    def _hash_password(self, password: str) -> str:
+        """Hash password using bcrypt"""
+        salt = bcrypt.gensalt()
+        return bcrypt.hashpw(password.encode('utf-8'), salt).decode('utf-8')
+    def _verify_password(self, password: str, hashed: str) -> bool:
+        """Verify password against hash"""
+        return bcrypt.checkpw(
+            password.encode('utf-8'),
+            hashed.encode('utf-8')
+        )
+    def _generate_token(self, user_id: str, username: str, role: str) -> str:
+        """Generate JWT token with role"""
+        payload = {
+            "user_id": user_id,
+            "username": username,
+            "role": role,
+            "exp": datetime.utcnow() + timedelta(hours=self.jwt_expiry_hours),
+            "iat": datetime.utcnow()
+        }
+        return jwt.encode(payload, self.jwt_secret, algorithm="HS256")
+    def verify_token(self, token: str) -> dict | None:
+        """Verify and decode JWT token"""
+        try:
+            payload = jwt.decode(token, self.jwt_secret, algorithms=["HS256"])
+            return {
+                "user_id": payload['user_id'],
+                "username": payload['username'],
+                "role": payload.get('role', 'employee')
+            }
+        except jwt.ExpiredSignatureError:
+            return None
+        except jwt.InvalidTokenError:
+            return None
+    def register_admin(self, username: str, password: str, email: str = "") -> dict:
+        """
+        Register a new admin user
+        Returns: {"success": bool, "token": str, "user_id": str, "error": str}
+        """
+        # Validate input
+        if not username or len(username) < 3:
+            return {"success": False, "error": "Username must be at least 3 characters"}
+        if not password or len(password) < 6:
+            return {"success": False, "error": "Password must be at least 6 characters"}
+        # Check if user exists
+        existing = chroma_service.get_user(username)
+        if existing:
+            return {"success": False, "error": "Username already exists"}
+        # Hash password and create admin user
+        password_hash = self._hash_password(password)
+        result = chroma_service.create_user(username, password_hash, email, role="admin")
+        if "error" in result:
+            return {"success": False, "error": result['error']}
+        # Generate token
+        token = self._generate_token(result['user_id'], username, "admin")
+        return {
+            "success": True,
+            "token": token,
+            "user_id": result['user_id'],
+            "username": username,
+            "role": "admin"
+        }
+    def register_employee(self, admin_user_id: str, email: str, password: str) -> dict:
+        """
+        Admin registers an employee
+        Returns: {"success": bool, "user_id": str, "error": str}
+        """
+        # Validate input
+        if not email or "@" not in email:
+            return {"success": False, "error": "Valid email is required"}
+        if not password or len(password) < 6:
+            return {"success": False, "error": "Password must be at least 6 characters"}
+        # Check if employee email already exists
+        existing = chroma_service.get_user(email)
+        if existing:
+            return {"success": False, "error": "Employee with this email already exists"}
+        # Hash password and create employee user
+        password_hash = self._hash_password(password)
+        result = chroma_service.create_user(
+            username=email,
+            password_hash=password_hash,
+            email=email,
+            role="employee",
+            admin_id=admin_user_id
+        )
+        if "error" in result:
+            return {"success": False, "error": result['error']}
+        return {
+            "success": True,
+            "user_id": result['user_id'],
+            "email": email
+        }
+    def login(self, username: str, password: str, role: str = "admin") -> dict:
+        """
+        Login user with role check
+        Returns: {"success": bool, "token": str, "user_id": str, "error": str}
+        """
+        # Get user
+        user = chroma_service.get_user(username)
+        if not user:
+            return {"success": False, "error": "Invalid credentials"}
+        # Verify password
+        if not self._verify_password(password, user['password_hash']):
+            return {"success": False, "error": "Invalid credentials"}
+        # Verify role matches
+        user_role = user.get('role', 'admin')
+        if user_role != role:
+            if role == "admin":
+                return {"success": False, "error": "This account is not an admin account"}
+            else:
+                return {"success": False, "error": "This account is not an employee account"}
+        # Generate token
+        token = self._generate_token(user['user_id'], username, user_role)
+        return {
+            "success": True,
+            "token": token,
+            "user_id": user['user_id'],
+            "username": username,
+            "role": user_role
+        }
+    def get_admin_employees(self, admin_user_id: str) -> list:
+        """Get all employees created by an admin"""
+        return chroma_service.get_employees_by_admin(admin_user_id)
+    def delete_employee(self, admin_user_id: str, employee_id: str) -> bool:
+        """Admin deletes an employee"""
+        return chroma_service.delete_employee(admin_user_id, employee_id)
+    def get_current_user(self, token: str) -> dict | None:
+        """Get current user from token"""
+        return self.verify_token(token)
+# Singleton instance
+auth_service = AuthService()

services/chroma_service.py ADDED Viewed

	@@ -0,0 +1,1009 @@

+"""
+ChromaDB Cloud Service - Vector Storage & Retrieval
+With Role-Based User Management and Bucket Organization
+"""
+import chromadb
+from chromadb.utils.embedding_functions import DefaultEmbeddingFunction
+import hashlib
+import time
+from config import Config
+class ChromaService:
+    _instance = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialize()
+        return cls._instance
+    def _initialize(self):
+        """Initialize ChromaDB Cloud client"""
+        if not Config.CHROMA_API_KEY:
+            raise ValueError(
+                "CHROMA_API_KEY is required! Please set it in your .env file. "
+                "Get your API key from https://www.trychroma.com/"
+            )
+        print(f"Connecting to ChromaDB Cloud...")
+        print(f"Tenant: {Config.CHROMA_TENANT}")
+        print(f"Database: {Config.CHROMA_DATABASE}")
+        # Connect to ChromaDB Cloud using CloudClient
+        self.client = chromadb.CloudClient(
+            tenant=Config.CHROMA_TENANT,
+            database=Config.CHROMA_DATABASE,
+            api_key=Config.CHROMA_API_KEY
+        )
+        print("Connected to ChromaDB Cloud successfully!")
+        # Initialize collections
+        self._init_collections()
+    def _init_collections(self):
+        """Initialize required collections"""
+        # Users collection
+        self.users_collection = self.client.get_or_create_collection(
+            name="users",
+            metadata={"description": "User authentication data with roles"}
+        )
+        # Buckets collection for organizing documents
+        self.buckets_collection = self.client.get_or_create_collection(
+            name="buckets",
+            metadata={"description": "Document buckets for organization"}
+        )
+        # Documents collection
+        self.documents_collection = self.client.get_or_create_collection(
+            name="documents",
+            metadata={"description": "Document metadata and embeddings"}
+        )
+        # Chunks collection for RAG
+        self.chunks_collection = self.client.get_or_create_collection(
+            name="document_chunks",
+            metadata={"description": "Document chunks for RAG retrieval"}
+        )
+        # Conversation history collection for memory
+        self.conversations_collection = self.client.get_or_create_collection(
+            name="conversation_history",
+            metadata={"description": "Persistent conversation memory for context"}
+        )
+        # Chat sessions collection for complete chat history
+        self.chat_sessions_collection = self.client.get_or_create_collection(
+            name="chat_sessions",
+            metadata={"description": "Complete chat session storage"}
+        )
+        # Document metadata collection for structured data (aggregate queries)
+        self.metadata_collection = self.client.get_or_create_collection(
+            name="document_metadata",
+            metadata={"description": "Structured document metadata for aggregate queries"}
+        )
+        # Summary chunks collection for fast aggregate retrieval
+        self.summary_chunks_collection = self.client.get_or_create_collection(
+            name="document_summaries",
+            metadata={"description": "Document summaries for aggregate queries"}
+        )
+        print("Collections initialized: users, buckets, documents, document_chunks, conversation_history, chat_sessions, document_metadata, document_summaries")
+    # ==================== User Operations ====================
+    def create_user(self, username: str, password_hash: str, email: str = "",
+                    role: str = "admin", admin_id: str = None) -> dict:
+        """Create a new user (admin or employee)"""
+        user_id = hashlib.sha256(username.encode()).hexdigest()[:16]
+        existing = self.users_collection.get(ids=[user_id])
+        if existing['ids']:
+            return {"error": "User already exists"}
+        metadata = {
+            "username": username,
+            "password_hash": password_hash,
+            "email": email,
+            "role": role,
+            "created_at": time.time()
+        }
+        if admin_id:
+            metadata["admin_id"] = admin_id
+        self.users_collection.add(
+            ids=[user_id],
+            documents=[username],
+            metadatas=[metadata]
+        )
+        return {"user_id": user_id, "username": username, "role": role}
+    def get_user(self, username: str) -> dict | None:
+        """Get user by username"""
+        user_id = hashlib.sha256(username.encode()).hexdigest()[:16]
+        result = self.users_collection.get(ids=[user_id])
+        if result['ids']:
+            return {
+                "user_id": result['ids'][0],
+                "username": result['metadatas'][0]['username'],
+                "password_hash": result['metadatas'][0]['password_hash'],
+                "email": result['metadatas'][0].get('email', ''),
+                "role": result['metadatas'][0].get('role', 'admin'),
+                "admin_id": result['metadatas'][0].get('admin_id')
+            }
+        return None
+    def get_employees_by_admin(self, admin_id: str) -> list:
+        """Get all employees created by a specific admin"""
+        results = self.users_collection.get(where={"admin_id": admin_id})
+        employees = []
+        for i, user_id in enumerate(results['ids']):
+            employees.append({
+                "user_id": user_id,
+                "email": results['metadatas'][i].get('email', ''),
+                "username": results['metadatas'][i].get('username', ''),
+                "created_at": results['metadatas'][i].get('created_at', 0)
+            })
+        return employees
+    def delete_employee(self, admin_id: str, employee_id: str) -> bool:
+        """Delete an employee (verify admin ownership)"""
+        result = self.users_collection.get(ids=[employee_id])
+        if not result['ids']:
+            return False
+        if result['metadatas'][0].get('admin_id') != admin_id:
+            return False
+        self.users_collection.delete(ids=[employee_id])
+        return True
+    # ==================== Bucket Operations ====================
+    def create_bucket(self, user_id: str, name: str, description: str = "") -> dict:
+        """Create a new bucket for organizing documents"""
+        bucket_id = hashlib.sha256(f"{user_id}_{name}_{time.time()}".encode()).hexdigest()[:16]
+        self.buckets_collection.add(
+            ids=[bucket_id],
+            documents=[name],
+            metadatas=[{
+                "user_id": user_id,
+                "name": name,
+                "description": description,
+                "created_at": time.time()
+            }]
+        )
+        return {"bucket_id": bucket_id, "name": name}
+    def get_user_buckets(self, user_id: str) -> list:
+        """Get all buckets for a user"""
+        results = self.buckets_collection.get(where={"user_id": user_id})
+        buckets = []
+        for i, bucket_id in enumerate(results['ids']):
+            # Count documents in this bucket
+            doc_count = len(self.documents_collection.get(
+                where={"bucket_id": bucket_id}
+            )['ids'])
+            buckets.append({
+                "bucket_id": bucket_id,
+                "name": results['metadatas'][i]['name'],
+                "description": results['metadatas'][i].get('description', ''),
+                "doc_count": doc_count,
+                "created_at": results['metadatas'][i]['created_at']
+            })
+        return buckets
+    def delete_bucket(self, bucket_id: str, user_id: str) -> bool:
+        """Delete a bucket and optionally its documents"""
+        bucket = self.buckets_collection.get(ids=[bucket_id])
+        if not bucket['ids'] or bucket['metadatas'][0]['user_id'] != user_id:
+            return False
+        # Delete bucket
+        self.buckets_collection.delete(ids=[bucket_id])
+        # Update documents to remove bucket_id
+        docs = self.documents_collection.get(where={"bucket_id": bucket_id})
+        for i, doc_id in enumerate(docs['ids']):
+            # Update metadata to remove bucket_id (set to empty)
+            meta = docs['metadatas'][i]
+            meta['bucket_id'] = ""
+            self.documents_collection.update(
+                ids=[doc_id],
+                metadatas=[meta]
+            )
+        return True
+    # ==================== Document Operations ====================
+    def store_document(self, user_id: str, doc_id: str, filename: str,
+                       doc_type: str, content: str, bucket_id: str = "") -> dict:
+        """Store document metadata"""
+        self.documents_collection.add(
+            ids=[doc_id],
+            documents=[content[:1000]],  # Store preview
+            metadatas=[{
+                "user_id": user_id,
+                "filename": filename,
+                "doc_type": doc_type,
+                "bucket_id": bucket_id,
+                "content_length": len(content),
+                "created_at": time.time()
+            }]
+        )
+        return {"doc_id": doc_id, "filename": filename}
+    def update_document_bucket(self, doc_id: str, user_id: str, bucket_id: str) -> bool:
+        """Move document to a different bucket"""
+        doc = self.documents_collection.get(ids=[doc_id])
+        if not doc['ids'] or doc['metadatas'][0]['user_id'] != user_id:
+            return False
+        meta = doc['metadatas'][0]
+        meta['bucket_id'] = bucket_id
+        self.documents_collection.update(
+            ids=[doc_id],
+            metadatas=[meta]
+        )
+        return True
+    def get_user_documents(self, user_id: str, bucket_id: str = None) -> list:
+        """Get all documents for a user, optionally filtered by bucket"""
+        if bucket_id:
+            results = self.documents_collection.get(
+                where={"$and": [{"user_id": user_id}, {"bucket_id": bucket_id}]}
+            )
+        else:
+            results = self.documents_collection.get(where={"user_id": user_id})
+        documents = []
+        for i, doc_id in enumerate(results['ids']):
+            documents.append({
+                "doc_id": doc_id,
+                "filename": results['metadatas'][i]['filename'],
+                "doc_type": results['metadatas'][i]['doc_type'],
+                "bucket_id": results['metadatas'][i].get('bucket_id', ''),
+                "created_at": results['metadatas'][i]['created_at']
+            })
+        return documents
+    def get_document(self, doc_id: str, user_id: str) -> dict | None:
+        """Get a single document by ID"""
+        doc = self.documents_collection.get(ids=[doc_id])
+        if not doc['ids'] or doc['metadatas'][0]['user_id'] != user_id:
+            return None
+        return {
+            "doc_id": doc_id,
+            "filename": doc['metadatas'][0]['filename'],
+            "doc_type": doc['metadatas'][0]['doc_type'],
+            "bucket_id": doc['metadatas'][0].get('bucket_id', ''),
+            "content_preview": doc['documents'][0],
+            "created_at": doc['metadatas'][0]['created_at']
+        }
+    def delete_document(self, doc_id: str, user_id: str) -> bool:
+        """Delete a document and ALL its chunks from the database"""
+        doc = self.documents_collection.get(ids=[doc_id])
+        if not doc['ids'] or doc['metadatas'][0]['user_id'] != user_id:
+            print(f"Document {doc_id} not found or access denied for user {user_id}")
+            return False
+        filename = doc['metadatas'][0].get('filename', 'unknown')
+        print(f"Deleting document: {filename} (ID: {doc_id})")
+        # First, delete all chunks for this document
+        try:
+            chunks = self.chunks_collection.get(where={"doc_id": doc_id})
+            chunk_count = len(chunks['ids']) if chunks['ids'] else 0
+            if chunk_count > 0:
+                print(f"  Deleting {chunk_count} chunks for document {doc_id}...")
+                self.chunks_collection.delete(ids=chunks['ids'])
+                print(f"  Successfully deleted {chunk_count} chunks")
+            else:
+                print(f"  No chunks found for document {doc_id}")
+        except Exception as e:
+            print(f"  Error deleting chunks: {e}")
+            # Continue to delete document even if chunk deletion fails
+        # Then delete the document metadata
+        try:
+            self.documents_collection.delete(ids=[doc_id])
+            print(f"  Successfully deleted document metadata for {doc_id}")
+        except Exception as e:
+            print(f"  Error deleting document metadata: {e}")
+            return False
+        return True
+    def clear_all_user_chunks(self, user_id: str) -> int:
+        """Clear ALL chunks for a user - useful for cleanup after stale data issues"""
+        chunks = self.chunks_collection.get(where={"user_id": user_id})
+        if chunks['ids']:
+            self.chunks_collection.delete(ids=chunks['ids'])
+            return len(chunks['ids'])
+        return 0
+    # ==================== Chunk Operations (RAG) ====================
+    def store_chunks(self, doc_id: str, user_id: str, chunks: list[dict], bucket_id: str = ""):
+        """Store document chunks with embeddings for RAG - one at a time for quota compliance"""
+        if not chunks:
+            return
+        total_chunks = len(chunks)
+        print(f"Storing {total_chunks} chunks for document...")
+        total_chunks = len(chunks)
+        print(f"Storing {total_chunks} chunks for document...")
+        # Batch size for ChromaDB Cloud (max 100 per batch recommended)
+        BATCH_SIZE = 100
+        for i in range(0, total_chunks, BATCH_SIZE):
+            batch = chunks[i:i + BATCH_SIZE]
+            batch_ids = []
+            batch_documents = []
+            batch_metadatas = []
+            for j, chunk in enumerate(batch):
+                # Global index
+                global_idx = i + j
+                chunk_id = f"{doc_id}_chunk_{global_idx}"
+                # Truncate chunk text if too large
+                text = chunk['text']
+                if len(text) > 4000:
+                    text = text[:4000]
+                metadata = {
+                    "doc_id": doc_id,
+                    "user_id": user_id,
+                    "bucket_id": bucket_id,
+                    "chunk_index": global_idx,
+                    "start_char": chunk.get('start', 0),
+                    "end_char": chunk.get('end', 0)
+                }
+                batch_ids.append(chunk_id)
+                batch_documents.append(text)
+                batch_metadatas.append(metadata)
+            try:
+                self.chunks_collection.add(
+                    ids=batch_ids,
+                    documents=batch_documents,
+                    metadatas=batch_metadatas
+                )
+                print(f"  Stored batch {i // BATCH_SIZE + 1} ({len(batch)} chunks)")
+            except Exception as e:
+                print(f"  Error storing batch starting at index {i}: {str(e)[:100]}")
+                # Fallback: try one by one for this failed batch
+                print("  Retrying invalid batch one by one...")
+                for k, (bid, doc, meta) in enumerate(zip(batch_ids, batch_documents, batch_metadatas)):
+                    try:
+                        self.chunks_collection.add(ids=[bid], documents=[doc], metadatas=[meta])
+                    except Exception as inner_e:
+                        print(f"    Failed chunk {i+k}: {str(inner_e)[:50]}")
+    def search_chunks(self, user_id: str, query: str,
+                      doc_ids: list[str] = None, bucket_id: str = None,
+                      top_k: int = 5) -> list[dict]:
+        """Search for relevant chunks with filtering by bucket or documents.
+        IMPORTANT: When bucket_id is provided, ONLY chunks from that bucket are returned.
+        This ensures strict bucket isolation for multi-bucket deployments.
+        """
+        # Build where clause with strict bucket isolation
+        if bucket_id:
+            where_clause = {
+                "$and": [
+                    {"user_id": user_id},
+                    {"bucket_id": bucket_id}
+                ]
+            }
+            print(f"[CHROMA] Strict bucket isolation: searching only bucket '{bucket_id}'")
+        elif doc_ids:
+            where_clause = {
+                "$and": [
+                    {"user_id": user_id},
+                    {"doc_id": {"$in": doc_ids}}
+                ]
+            }
+        else:
+            where_clause = {"user_id": user_id}
+        results = self.chunks_collection.query(
+            query_texts=[query],
+            n_results=top_k,
+            where=where_clause
+        )
+        chunks = []
+        if results['ids'] and results['ids'][0]:
+            for i, chunk_id in enumerate(results['ids'][0]):
+                chunk_bucket = results['metadatas'][0][i].get('bucket_id', '')
+                # Validate bucket isolation (should never happen, but log if it does)
+                if bucket_id and chunk_bucket != bucket_id:
+                    print(f"[CHROMA WARNING] Bucket leak detected! Expected '{bucket_id}', got '{chunk_bucket}'")
+                    continue  # Skip leaked chunks
+                chunks.append({
+                    "chunk_id": chunk_id,
+                    "text": results['documents'][0][i],
+                    "doc_id": results['metadatas'][0][i]['doc_id'],
+                    "bucket_id": chunk_bucket,
+                    "distance": results['distances'][0][i] if results.get('distances') else 0
+                })
+        return chunks
+    def get_bucket_document_list(self, user_id: str, bucket_id: str) -> list[str]:
+        """Get list of document filenames in a bucket for cross-document queries."""
+        if not bucket_id:
+            return []
+        docs = self.get_user_documents(user_id, bucket_id)
+        return [doc.get('filename', 'Unknown') for doc in docs]
+    def get_document_chunks(self, doc_id: str) -> list[dict]:
+        """Get all chunks for a specific document"""
+        results = self.chunks_collection.get(where={"doc_id": doc_id})
+        chunks = []
+        for i, chunk_id in enumerate(results['ids']):
+            chunks.append({
+                "chunk_id": chunk_id,
+                "text": results['documents'][i],
+                "chunk_index": results['metadatas'][i]['chunk_index']
+            })
+        chunks.sort(key=lambda x: x['chunk_index'])
+        return chunks
+    # ==================== Conversation Memory Operations ====================
+    def store_conversation(self, user_id: str, role: str, content: str,
+                           bucket_id: str = "", chat_id: str = "") -> dict:
+        """Store a conversation message for persistent memory"""
+        import time
+        msg_id = f"{user_id}_{int(time.time() * 1000)}"
+        self.conversations_collection.add(
+            ids=[msg_id],
+            documents=[content],
+            metadatas=[{
+                "user_id": user_id,
+                "role": role,  # 'user' or 'assistant'
+                "bucket_id": bucket_id,
+                "chat_id": chat_id,
+                "timestamp": time.time()
+            }]
+        )
+        return {"msg_id": msg_id}
+    def get_conversation_history(self, user_id: str, bucket_id: str = None,
+                                  limit: int = 20) -> list[dict]:
+        """Retrieve conversation history for a user, optionally filtered by bucket"""
+        if bucket_id:
+            where_clause = {
+                "$and": [
+                    {"user_id": user_id},
+                    {"bucket_id": bucket_id}
+                ]
+            }
+        else:
+            where_clause = {"user_id": user_id}
+        results = self.conversations_collection.get(
+            where=where_clause
+        )
+        messages = []
+        for i, msg_id in enumerate(results['ids']):
+            messages.append({
+                "msg_id": msg_id,
+                "role": results['metadatas'][i]['role'],
+                "content": results['documents'][i],
+                "timestamp": results['metadatas'][i]['timestamp'],
+                "bucket_id": results['metadatas'][i].get('bucket_id', ''),
+                "chat_id": results['metadatas'][i].get('chat_id', '')
+            })
+        # Sort by timestamp (newest last) and limit
+        messages.sort(key=lambda x: x['timestamp'])
+        return messages[-limit:]
+    def clear_conversation(self, user_id: str, bucket_id: str = None) -> bool:
+        """Clear conversation history for a user"""
+        if bucket_id:
+            where_clause = {
+                "$and": [
+                    {"user_id": user_id},
+                    {"bucket_id": bucket_id}
+                ]
+            }
+        else:
+            where_clause = {"user_id": user_id}
+        results = self.conversations_collection.get(where=where_clause)
+        if results['ids']:
+            self.conversations_collection.delete(ids=results['ids'])
+        return True
+    # ==================== Chat Session Operations ====================
+    def save_chat_session(self, user_id: str, chat_id: str, topic: str,
+                          messages: list, bucket_id: str = "") -> dict:
+        """Store or update a complete chat session.
+        Note: ChromaDB Cloud has a 16KB document size limit, so we truncate
+        long messages to fit within this constraint.
+        """
+        import json
+        # ChromaDB Cloud has a 16KB (16384 bytes) document size limit
+        MAX_DOC_SIZE = 14000  # Leave buffer for metadata overhead
+        MAX_MESSAGE_LENGTH = 3000  # Max chars per message when truncating
+        # First, try to serialize as-is
+        messages_json = json.dumps(messages)
+        # If too large, truncate individual message contents
+        if len(messages_json) > MAX_DOC_SIZE:
+            truncated_messages = []
+            for msg in messages:
+                truncated_msg = {**msg}
+                if len(msg.get('content', '')) > MAX_MESSAGE_LENGTH:
+                    truncated_msg['content'] = msg['content'][:MAX_MESSAGE_LENGTH] + '... [truncated for storage]'
+                truncated_messages.append(truncated_msg)
+            messages_json = json.dumps(truncated_messages)
+            # If still too large, keep only the last N messages
+            if len(messages_json) > MAX_DOC_SIZE:
+                # Keep first message (context) and last few exchanges
+                keep_count = 10
+                while len(messages_json) > MAX_DOC_SIZE and keep_count > 2:
+                    truncated_messages = truncated_messages[-keep_count:]
+                    messages_json = json.dumps(truncated_messages)
+                    keep_count -= 2
+        metadata = {
+            "user_id": user_id,
+            "topic": topic,
+            "bucket_id": bucket_id,
+            "message_count": len(messages),
+            "timestamp": time.time()
+        }
+        # Check if chat exists
+        existing = self.chat_sessions_collection.get(ids=[chat_id])
+        try:
+            if existing['ids']:
+                # Update existing chat
+                self.chat_sessions_collection.update(
+                    ids=[chat_id],
+                    documents=[messages_json],
+                    metadatas=[metadata]
+                )
+            else:
+                # Add new chat
+                self.chat_sessions_collection.add(
+                    ids=[chat_id],
+                    documents=[messages_json],
+                    metadatas=[metadata]
+                )
+        except Exception as e:
+            # If still failing, store minimal version
+            print(f"[CHAT SAVE] Error saving full chat, storing minimal: {e}")
+            minimal_messages = [{"role": "system", "content": f"Chat with {len(messages)} messages (too large to store)"}]
+            self.chat_sessions_collection.upsert(
+                ids=[chat_id],
+                documents=[json.dumps(minimal_messages)],
+                metadatas=[metadata]
+            )
+        return {"chat_id": chat_id, "topic": topic}
+    def get_user_chat_sessions(self, user_id: str) -> list:
+        """Get all chat sessions for a user"""
+        import json
+        results = self.chat_sessions_collection.get(where={"user_id": user_id})
+        sessions = []
+        for i, chat_id in enumerate(results['ids']):
+            try:
+                messages = json.loads(results['documents'][i])
+            except:
+                messages = []
+            sessions.append({
+                "id": chat_id,
+                "topic": results['metadatas'][i].get('topic', 'Chat'),
+                "messages": messages,
+                "bucket": results['metadatas'][i].get('bucket_id', ''),
+                "timestamp": results['metadatas'][i].get('timestamp', 0)
+            })
+        # Sort by timestamp (newest first)
+        sessions.sort(key=lambda x: x['timestamp'], reverse=True)
+        return sessions
+    def get_chat_session(self, user_id: str, chat_id: str) -> dict | None:
+        """Get a single chat session by ID"""
+        import json
+        result = self.chat_sessions_collection.get(ids=[chat_id])
+        if not result['ids']:
+            return None
+        # Verify ownership
+        if result['metadatas'][0].get('user_id') != user_id:
+            return None
+        try:
+            messages = json.loads(result['documents'][0])
+        except:
+            messages = []
+        return {
+            "id": chat_id,
+            "topic": result['metadatas'][0].get('topic', 'Chat'),
+            "messages": messages,
+            "bucket": result['metadatas'][0].get('bucket_id', ''),
+            "timestamp": result['metadatas'][0].get('timestamp', 0)
+        }
+    def delete_chat_session(self, user_id: str, chat_id: str) -> bool:
+        """Delete a chat session and all associated conversation history"""
+        result = self.chat_sessions_collection.get(ids=[chat_id])
+        if not result['ids']:
+            return False
+        # Verify ownership
+        if result['metadatas'][0].get('user_id') != user_id:
+            return False
+        # Delete the chat session
+        self.chat_sessions_collection.delete(ids=[chat_id])
+        # Also delete all conversation history entries for this chat
+        try:
+            conv_results = self.conversations_collection.get(
+                where={
+                    "$and": [
+                        {"user_id": user_id},
+                        {"chat_id": chat_id}
+                    ]
+                }
+            )
+            if conv_results['ids']:
+                self.conversations_collection.delete(ids=conv_results['ids'])
+                print(f"Deleted {len(conv_results['ids'])} conversation history entries for chat {chat_id}")
+        except Exception as e:
+            print(f"Warning: Could not delete conversation history for chat {chat_id}: {e}")
+        return True
+    # ==================== Document Metadata Operations (Aggregate Queries) ====================
+    def store_document_metadata(self, doc_id: str, user_id: str, bucket_id: str,
+                                 metadata: dict) -> dict:
+        """
+        Store structured metadata for a document.
+        Used for aggregate queries like 'list all manufacturing units'.
+        Args:
+            doc_id: Document ID
+            user_id: User ID
+            bucket_id: Bucket ID
+            metadata: Structured metadata dict
+        """
+        import json
+        # Flatten metadata for ChromaDB (which only supports primitive types in metadata)
+        # Helper functions for safe type conversion
+        def safe_float(value, default=0.0):
+            if value is None:
+                return default
+            if isinstance(value, (int, float)):
+                return float(value)
+            try:
+                # Try to extract numbers from string
+                import re
+                if isinstance(value, str):
+                    # Remove currency symbols and commas
+                    cleaned = re.sub(r'[^\d.]', '', str(value).replace(',', ''))
+                    if cleaned:
+                        return float(cleaned)
+                return default
+            except:
+                return default
+        def safe_int(value, default=0):
+            if value is None:
+                return default
+            if isinstance(value, int):
+                return value
+            try:
+                return int(safe_float(value, default))
+            except:
+                return default
+        flat_metadata = {
+            "doc_id": doc_id,
+            "user_id": user_id,
+            "bucket_id": bucket_id,
+            "document_type": str(metadata.get("document_type", "")),
+            "document_title": str(metadata.get("document_title", "")),
+            "policy_number": str(metadata.get("policy_number", "")),
+            "insurer_name": str(metadata.get("insurer_name", "")),
+            "insured_name": str(metadata.get("insured_name", "")),
+            "broker_name": str(metadata.get("broker_name", "")),
+            "policy_type": str(metadata.get("policy_type", "")),
+            "industry": str(metadata.get("industry", "")),
+            "is_manufacturing": bool(metadata.get("is_manufacturing", False)),
+            "sum_insured": safe_float(metadata.get("sum_insured")),
+            "premium_amount": safe_float(metadata.get("premium_amount")),
+            "policy_start_date": str(metadata.get("policy_start_date", "")),
+            "policy_end_date": str(metadata.get("policy_end_date", "")),
+            "renewal_date": str(metadata.get("renewal_date", "")),
+            "renewal_year": safe_int(metadata.get("renewal_year")),
+            "city": str(metadata.get("city", "")),
+            "state": str(metadata.get("state", "")),
+            "pincode": str(metadata.get("pincode", "")),
+            "property_address": str(metadata.get("property_address", ""))[:500],
+            "created_at": metadata.get("created_at", time.time())
+        }
+        # Store arrays as JSON strings
+        coverage_types = metadata.get("coverage_type", [])
+        flat_metadata["coverage_type_json"] = json.dumps(coverage_types if isinstance(coverage_types, list) else [])
+        keywords = metadata.get("keywords", [])
+        flat_metadata["keywords_json"] = json.dumps(keywords if isinstance(keywords, list) else [])
+        # Create searchable text from metadata
+        searchable_text = f"""
+        {metadata.get('document_title', '')}
+        {metadata.get('insured_name', '')}
+        {metadata.get('insurer_name', '')}
+        {metadata.get('policy_type', '')}
+        {metadata.get('industry', '')}
+        {metadata.get('city', '')} {metadata.get('state', '')}
+        Policy Number: {metadata.get('policy_number', '')}
+        Sum Insured: {metadata.get('sum_insured', '')}
+        """.strip()
+        # Check if metadata already exists for this doc
+        existing = self.metadata_collection.get(ids=[doc_id])
+        if existing['ids']:
+            self.metadata_collection.update(
+                ids=[doc_id],
+                documents=[searchable_text],
+                metadatas=[flat_metadata]
+            )
+        else:
+            self.metadata_collection.add(
+                ids=[doc_id],
+                documents=[searchable_text],
+                metadatas=[flat_metadata]
+            )
+        return {"doc_id": doc_id, "status": "stored"}
+    def get_document_metadata(self, doc_id: str, user_id: str) -> dict | None:
+        """Get metadata for a specific document."""
+        result = self.metadata_collection.get(ids=[doc_id])
+        if not result['ids']:
+            return None
+        meta = result['metadatas'][0]
+        if meta.get('user_id') != user_id:
+            return None
+        return meta
+    def get_all_metadata(self, user_id: str, bucket_id: str = None) -> list[dict]:
+        """
+        Get ALL document metadata for a user/bucket.
+        Used for aggregate queries - returns complete list, no top-K limit.
+        """
+        import json
+        if bucket_id:
+            where_clause = {
+                "$and": [
+                    {"user_id": user_id},
+                    {"bucket_id": bucket_id}
+                ]
+            }
+        else:
+            where_clause = {"user_id": user_id}
+        results = self.metadata_collection.get(where=where_clause)
+        metadata_list = []
+        for i, doc_id in enumerate(results['ids']):
+            meta = results['metadatas'][i]
+            # Parse JSON arrays back
+            try:
+                meta['coverage_type'] = json.loads(meta.get('coverage_type_json', '[]'))
+            except:
+                meta['coverage_type'] = []
+            try:
+                meta['keywords'] = json.loads(meta.get('keywords_json', '[]'))
+            except:
+                meta['keywords'] = []
+            metadata_list.append(meta)
+        return metadata_list
+    def search_metadata(self, user_id: str, bucket_id: str, filters: dict) -> list[dict]:
+        """
+        Search metadata with filters.
+        Supports filtering by: policy_type, industry, is_manufacturing, renewal_year, city, state
+        """
+        # Build where clause
+        conditions = [{"user_id": user_id}]
+        if bucket_id:
+            conditions.append({"bucket_id": bucket_id})
+        for field, value in filters.items():
+            if value is not None and value != "":
+                conditions.append({field: value})
+        if len(conditions) > 1:
+            where_clause = {"$and": conditions}
+        else:
+            where_clause = conditions[0]
+        results = self.metadata_collection.get(where=where_clause)
+        return [results['metadatas'][i] for i in range(len(results['ids']))]
+    def delete_document_metadata(self, doc_id: str) -> bool:
+        """Delete metadata for a document."""
+        try:
+            self.metadata_collection.delete(ids=[doc_id])
+            return True
+        except:
+            return False
+    # ==================== Summary Chunks Operations ====================
+    def store_summary_chunk(self, doc_id: str, user_id: str, summary: str,
+                            bucket_id: str = "", filename: str = "") -> dict:
+        """
+        Store a document summary as a special chunk for aggregate queries.
+        """
+        summary_id = f"{doc_id}_summary"
+        metadata = {
+            "doc_id": doc_id,
+            "user_id": user_id,
+            "bucket_id": bucket_id,
+            "filename": filename,
+            "chunk_type": "summary",
+            "created_at": time.time()
+        }
+        # Check if summary exists
+        existing = self.summary_chunks_collection.get(ids=[summary_id])
+        if existing['ids']:
+            self.summary_chunks_collection.update(
+                ids=[summary_id],
+                documents=[summary],
+                metadatas=[metadata]
+            )
+        else:
+            self.summary_chunks_collection.add(
+                ids=[summary_id],
+                documents=[summary],
+                metadatas=[metadata]
+            )
+        return {"summary_id": summary_id, "status": "stored"}
+    def get_all_summaries(self, user_id: str, bucket_id: str = None) -> list[dict]:
+        """
+        Get ALL document summaries for a user/bucket.
+        Returns complete list - no top-K limit!
+        """
+        if bucket_id:
+            where_clause = {
+                "$and": [
+                    {"user_id": user_id},
+                    {"bucket_id": bucket_id}
+                ]
+            }
+        else:
+            where_clause = {"user_id": user_id}
+        results = self.summary_chunks_collection.get(where=where_clause)
+        summaries = []
+        for i, summary_id in enumerate(results['ids']):
+            summaries.append({
+                "doc_id": results['metadatas'][i]['doc_id'],
+                "filename": results['metadatas'][i].get('filename', ''),
+                "summary": results['documents'][i],
+                "bucket_id": results['metadatas'][i].get('bucket_id', '')
+            })
+        return summaries
+    def search_summaries(self, user_id: str, query: str, bucket_id: str = None,
+                         top_k: int = 50) -> list[dict]:
+        """Search summaries by semantic similarity."""
+        if bucket_id:
+            where_clause = {
+                "$and": [
+                    {"user_id": user_id},
+                    {"bucket_id": bucket_id}
+                ]
+            }
+        else:
+            where_clause = {"user_id": user_id}
+        results = self.summary_chunks_collection.query(
+            query_texts=[query],
+            n_results=top_k,
+            where=where_clause
+        )
+        summaries = []
+        if results['ids'] and results['ids'][0]:
+            for i, summary_id in enumerate(results['ids'][0]):
+                summaries.append({
+                    "doc_id": results['metadatas'][0][i]['doc_id'],
+                    "filename": results['metadatas'][0][i].get('filename', ''),
+                    "summary": results['documents'][0][i],
+                    "distance": results['distances'][0][i] if results.get('distances') else 0
+                })
+        return summaries
+    def delete_summary_chunk(self, doc_id: str) -> bool:
+        """Delete summary chunk for a document."""
+        try:
+            summary_id = f"{doc_id}_summary"
+            self.summary_chunks_collection.delete(ids=[summary_id])
+            return True
+        except:
+            return False
+# Singleton instance
+chroma_service = ChromaService()

services/date_parser.py ADDED Viewed

	@@ -0,0 +1,285 @@

+"""
+Date Parser Service
+Handles parsing of various date formats commonly found in insurance documents.
+Supports:
+- 1-1-25, 01-01-2025, 1/1/25, 01/01/2025
+- January 1, 2025, Jan 1, 2025, 1 January 2025
+- 2025-01-01 (ISO format)
+- Date ranges and period calculations
+"""
+import re
+from datetime import datetime, timedelta
+from typing import Optional, List, Dict, Tuple
+class DateParser:
+    """Parse and normalize dates from various formats."""
+    # Month name mappings
+    MONTHS = {
+        'january': 1, 'jan': 1,
+        'february': 2, 'feb': 2,
+        'march': 3, 'mar': 3,
+        'april': 4, 'apr': 4,
+        'may': 5,
+        'june': 6, 'jun': 6,
+        'july': 7, 'jul': 7,
+        'august': 8, 'aug': 8,
+        'september': 9, 'sep': 9, 'sept': 9,
+        'october': 10, 'oct': 10,
+        'november': 11, 'nov': 11,
+        'december': 12, 'dec': 12
+    }
+    # Date context keywords for identifying date types
+    DATE_CONTEXTS = {
+        'start': ['start', 'commence', 'inception', 'effective', 'from', 'begins', 'starting'],
+        'end': ['end', 'expiry', 'expire', 'expiration', 'until', 'to', 'ending', 'valid till', 'valid until'],
+        'renewal': ['renewal', 'renew', 'next renewal', 'due for renewal'],
+        'issue': ['issue', 'issued', 'date of issue', 'policy date']
+    }
+    def __init__(self):
+        self._compile_patterns()
+    def _compile_patterns(self):
+        """Compile regex patterns for date extraction."""
+        # DD-MM-YY or DD-MM-YYYY (with - or /)
+        self.pattern_dmy = re.compile(
+            r'\b(\d{1,2})[-/](\d{1,2})[-/](\d{2,4})\b'
+        )
+        # YYYY-MM-DD (ISO format)
+        self.pattern_iso = re.compile(
+            r'\b(\d{4})[-/](\d{1,2})[-/](\d{1,2})\b'
+        )
+        # Month DD, YYYY or DD Month YYYY
+        month_names = '|'.join(self.MONTHS.keys())
+        self.pattern_month_name = re.compile(
+            rf'\b(\d{{1,2}})\s*(?:st|nd|rd|th)?\s*({month_names})[,]?\s*(\d{{4}})\b|'
+            rf'\b({month_names})\s*(\d{{1,2}})(?:st|nd|rd|th)?[,]?\s*(\d{{4}})\b',
+            re.IGNORECASE
+        )
+    def parse_date(self, date_str: str) -> Optional[datetime]:
+        """
+        Parse a date string in various formats to datetime object.
+        Args:
+            date_str: Date string to parse
+        Returns:
+            datetime object or None if parsing fails
+        """
+        if not date_str:
+            return None
+        date_str = str(date_str).strip()
+        # Try ISO format first (YYYY-MM-DD)
+        match = self.pattern_iso.search(date_str)
+        if match:
+            year, month, day = match.groups()
+            try:
+                return datetime(int(year), int(month), int(day))
+            except ValueError:
+                pass
+        # Try DMY format (DD-MM-YY or DD-MM-YYYY)
+        match = self.pattern_dmy.search(date_str)
+        if match:
+            day, month, year = match.groups()
+            year = int(year)
+            # Handle 2-digit years
+            if year < 100:
+                year = 2000 + year if year < 50 else 1900 + year
+            try:
+                return datetime(year, int(month), int(day))
+            except ValueError:
+                # Try swapping day/month for US format
+                try:
+                    return datetime(year, int(day), int(month))
+                except ValueError:
+                    pass
+        # Try month name format
+        match = self.pattern_month_name.search(date_str)
+        if match:
+            groups = match.groups()
+            if groups[0]:  # DD Month YYYY format
+                day, month_name, year = groups[0], groups[1], groups[2]
+            else:  # Month DD, YYYY format
+                month_name, day, year = groups[3], groups[4], groups[5]
+            month = self.MONTHS.get(month_name.lower())
+            if month:
+                try:
+                    return datetime(int(year), month, int(day))
+                except ValueError:
+                    pass
+        return None
+    def extract_dates_from_text(self, text: str) -> List[Dict]:
+        """
+        Extract all dates from text with their context.
+        Args:
+            text: Text to search for dates
+        Returns:
+            List of dicts with date info:
+            [{"date": datetime, "context": "start/end/renewal/issue/unknown",
+              "original": "01-01-2025", "position": 123}]
+        """
+        if not text:
+            return []
+        results = []
+        text_lower = text.lower()
+        # Find all date matches
+        all_matches = []
+        # DMY format
+        for match in self.pattern_dmy.finditer(text):
+            parsed = self.parse_date(match.group())
+            if parsed:
+                all_matches.append({
+                    'date': parsed,
+                    'original': match.group(),
+                    'position': match.start()
+                })
+        # ISO format
+        for match in self.pattern_iso.finditer(text):
+            parsed = self.parse_date(match.group())
+            if parsed:
+                all_matches.append({
+                    'date': parsed,
+                    'original': match.group(),
+                    'position': match.start()
+                })
+        # Month name format
+        for match in self.pattern_month_name.finditer(text):
+            parsed = self.parse_date(match.group())
+            if parsed:
+                all_matches.append({
+                    'date': parsed,
+                    'original': match.group(),
+                    'position': match.start()
+                })
+        # Determine context for each date
+        for match in all_matches:
+            pos = match['position']
+            # Look at surrounding text (100 chars before)
+            context_start = max(0, pos - 100)
+            context_text = text_lower[context_start:pos]
+            date_type = 'unknown'
+            for dtype, keywords in self.DATE_CONTEXTS.items():
+                if any(kw in context_text for kw in keywords):
+                    date_type = dtype
+                    break
+            results.append({
+                'date': match['date'],
+                'date_str': match['date'].strftime('%Y-%m-%d'),
+                'context': date_type,
+                'original': match['original'],
+                'position': pos
+            })
+        # Remove duplicates based on date
+        seen_dates = set()
+        unique_results = []
+        for r in results:
+            date_key = r['date_str']
+            if date_key not in seen_dates:
+                seen_dates.add(date_key)
+                unique_results.append(r)
+        return unique_results
+    def calculate_renewal_date(self, policy_start: datetime,
+                                term_months: int = 12) -> datetime:
+        """
+        Calculate policy renewal date.
+        Args:
+            policy_start: Policy start date
+            term_months: Policy term in months (default 12)
+        Returns:
+            Renewal date (policy_start + term_months)
+        """
+        # Add months
+        new_month = policy_start.month + term_months
+        new_year = policy_start.year + (new_month - 1) // 12
+        new_month = ((new_month - 1) % 12) + 1
+        # Handle day overflow
+        try:
+            return datetime(new_year, new_month, policy_start.day)
+        except ValueError:
+            # Last day of month for dates like Jan 31 + 1 month
+            if new_month == 12:
+                next_month = datetime(new_year + 1, 1, 1)
+            else:
+                next_month = datetime(new_year, new_month + 1, 1)
+            return next_month - timedelta(days=1)
+    def is_date_in_range(self, date: datetime,
+                          year: int = None,
+                          before: datetime = None,
+                          after: datetime = None) -> bool:
+        """
+        Check if date matches filter criteria.
+        Args:
+            date: Date to check
+            year: Match specific year
+            before: Date must be before this
+            after: Date must be after this
+        Returns:
+            True if date matches all criteria
+        """
+        if not date:
+            return False
+        if year and date.year != year:
+            return False
+        if before and date >= before:
+            return False
+        if after and date <= after:
+            return False
+        return True
+    def get_year_from_query(self, query: str) -> Optional[int]:
+        """Extract year from query like 'policies renewing in 2026'."""
+        match = re.search(r'\b(20\d{2})\b', query)
+        if match:
+            return int(match.group(1))
+        # Handle relative years
+        current_year = datetime.now().year
+        if 'this year' in query.lower():
+            return current_year
+        if 'next year' in query.lower():
+            return current_year + 1
+        if 'last year' in query.lower():
+            return current_year - 1
+        return None
+# Singleton instance
+date_parser = DateParser()

services/document_processor.py ADDED Viewed

	@@ -0,0 +1,336 @@

+"""
+Document Processor Service
+Handles text extraction from various document types:
+- PDF (text extraction + OCR fallback)
+- DOCX (Word documents)
+- Excel (XLS, XLSX)
+- Images (via OCR)
+- Plain text (TXT, MD)
+"""
+import os
+import io
+from pathlib import Path
+from typing import Optional
+import fitz  # PyMuPDF
+from docx import Document
+from pptx import Presentation
+from pptx.util import Inches
+import pandas as pd
+from PIL import Image
+from services.ocr_service import ocr_service
+from config import Config
+class DocumentProcessor:
+    def __init__(self):
+        self.supported_extensions = Config.ALLOWED_EXTENSIONS
+    def get_file_type(self, filename: str) -> str:
+        """Determine file type from extension"""
+        ext = Path(filename).suffix.lower().lstrip('.')
+        type_map = {
+            'pdf': 'pdf',
+            'doc': 'word',
+            'docx': 'word',
+            'ppt': 'powerpoint',
+            'pptx': 'powerpoint',
+            'xls': 'excel',
+            'xlsx': 'excel',
+            'txt': 'text',
+            'md': 'text',
+            'png': 'image',
+            'jpg': 'image',
+            'jpeg': 'image',
+            'gif': 'image',
+            'webp': 'image'
+        }
+        return type_map.get(ext, 'unknown')
+    def is_supported(self, filename: str) -> bool:
+        """Check if file type is supported"""
+        ext = Path(filename).suffix.lower().lstrip('.')
+        return ext in self.supported_extensions
+    def process(self, file_path: str, filename: str) -> dict:
+        """
+        Process a document and extract text
+        Returns: {"success": bool, "text": str, "method": str, "error": str}
+        """
+        file_type = self.get_file_type(filename)
+        try:
+            if file_type == 'pdf':
+                return self._process_pdf(file_path)
+            elif file_type == 'word':
+                return self._process_word(file_path)
+            elif file_type == 'powerpoint':
+                return self._process_pptx(file_path)
+            elif file_type == 'excel':
+                return self._process_excel(file_path)
+            elif file_type == 'image':
+                return self._process_image(file_path)
+            elif file_type == 'text':
+                return self._process_text(file_path)
+            else:
+                return {
+                    "success": False,
+                    "error": f"Unsupported file type: {file_type}"
+                }
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+    def _process_pdf(self, file_path: str) -> dict:
+        """
+        Process PDF - Always use complete OpenRouter vision OCR for best accuracy
+        """
+        try:
+            doc = fitz.open(file_path)
+            total_pages = len(doc)
+            doc.close()
+            print(f"Processing {total_pages} page PDF with OpenRouter vision OCR...")
+            # Use OpenRouter vision models for OCR
+            ocr_result = ocr_service.extract_text_from_pdf(file_path)
+            if ocr_result['success']:
+                print(f"PDF OCR successful")
+                return {
+                    "success": True,
+                    "text": ocr_result['text'],
+                    "method": ocr_result.get('model', 'OpenRouter Vision OCR'),
+                    "page_count": total_pages
+                }
+            else:
+                return {
+                    "success": False,
+                    "error": f"OCR failed: {ocr_result['error']}"
+                }
+        except Exception as e:
+            return {"success": False, "error": f"PDF processing error: {str(e)}"}
+    def _process_pdf_hybrid(self, file_path: str, text_pages: list, ocr_needed_pages: list) -> dict:
+        """
+        Hybrid PDF processing: combine text extraction with OCR for scanned pages only
+        Used as fallback when full PDF OCR fails
+        """
+        try:
+            doc = fitz.open(file_path)
+            total_pages = len(doc)
+            all_pages = {}
+            # Add already extracted text pages
+            for page_num, text in text_pages:
+                all_pages[page_num] = f"--- Page {page_num + 1} ---\n{text}"
+            # OCR the scanned pages in batches
+            print(f"OCR processing {len(ocr_needed_pages)} scanned pages...")
+            for i, page_num in enumerate(ocr_needed_pages):
+                page = doc[page_num]
+                # Render page to image
+                mat = fitz.Matrix(2, 2)  # 2x zoom for better OCR
+                pix = page.get_pixmap(matrix=mat)
+                temp_path = f"{file_path}_page_{page_num}.png"
+                pix.save(temp_path)
+                ocr_result = ocr_service.extract_text(temp_path)
+                # Clean up temp file
+                if os.path.exists(temp_path):
+                    os.remove(temp_path)
+                if ocr_result['success']:
+                    all_pages[page_num] = f"--- Page {page_num + 1} (OCR) ---\n{ocr_result['text']}"
+                else:
+                    all_pages[page_num] = f"--- Page {page_num + 1} ---\n[OCR failed: {ocr_result['error']}]"
+                # Progress logging every 10 pages
+                if (i + 1) % 10 == 0:
+                    print(f"OCR progress: {i + 1}/{len(ocr_needed_pages)} pages")
+            doc.close()
+            # Combine all pages in order
+            text_parts = [all_pages[i] for i in sorted(all_pages.keys())]
+            return {
+                "success": True,
+                "text": "\n\n".join(text_parts),
+                "method": "hybrid (text + OCR)",
+                "page_count": total_pages
+            }
+        except Exception as e:
+            return {"success": False, "error": f"Hybrid PDF processing error: {str(e)}"}
+    def _process_word(self, file_path: str) -> dict:
+        """Process Word documents (DOCX)"""
+        try:
+            doc = Document(file_path)
+            text_parts = []
+            # Extract paragraphs
+            for para in doc.paragraphs:
+                if para.text.strip():
+                    text_parts.append(para.text)
+            # Extract tables
+            for table in doc.tables:
+                table_text = []
+                for row in table.rows:
+                    row_text = [cell.text.strip() for cell in row.cells]
+                    table_text.append(" | ".join(row_text))
+                if table_text:
+                    text_parts.append("\n[Table]\n" + "\n".join(table_text))
+            return {
+                "success": True,
+                "text": "\n\n".join(text_parts),
+                "method": "docx extraction"
+            }
+        except Exception as e:
+            return {"success": False, "error": f"Word processing error: {str(e)}"}
+    def _process_pptx(self, file_path: str) -> dict:
+        """Process PowerPoint files (PPTX) - extracts all text from slides"""
+        try:
+            prs = Presentation(file_path)
+            text_parts = []
+            slide_count = 0
+            for slide_num, slide in enumerate(prs.slides, 1):
+                slide_count += 1
+                slide_text_parts = []
+                # Extract text from all shapes
+                for shape in slide.shapes:
+                    # Text frames (text boxes, titles, etc.)
+                    if shape.has_text_frame:
+                        for paragraph in shape.text_frame.paragraphs:
+                            para_text = ""
+                            for run in paragraph.runs:
+                                para_text += run.text
+                            if para_text.strip():
+                                slide_text_parts.append(para_text.strip())
+                    # Tables in slides
+                    if shape.has_table:
+                        table = shape.table
+                        table_rows = []
+                        for row in table.rows:
+                            row_cells = []
+                            for cell in row.cells:
+                                cell_text = ""
+                                for paragraph in cell.text_frame.paragraphs:
+                                    for run in paragraph.runs:
+                                        cell_text += run.text
+                                row_cells.append(cell_text.strip())
+                            table_rows.append(" | ".join(row_cells))
+                        if table_rows:
+                            slide_text_parts.append("[Table]\n" + "\n".join(table_rows))
+                # Speaker notes
+                if slide.has_notes_slide:
+                    notes_frame = slide.notes_slide.notes_text_frame
+                    if notes_frame:
+                        notes_text = ""
+                        for paragraph in notes_frame.paragraphs:
+                            for run in paragraph.runs:
+                                notes_text += run.text
+                        if notes_text.strip():
+                            slide_text_parts.append(f"[Speaker Notes]\n{notes_text.strip()}")
+                if slide_text_parts:
+                    text_parts.append(f"--- Slide {slide_num} ---\n" + "\n".join(slide_text_parts))
+            if not text_parts:
+                return {
+                    "success": False,
+                    "error": "No text content found in PowerPoint file"
+                }
+            return {
+                "success": True,
+                "text": "\n\n".join(text_parts),
+                "method": "pptx extraction",
+                "slide_count": slide_count
+            }
+        except Exception as e:
+            return {"success": False, "error": f"PowerPoint processing error: {str(e)}"}
+    def _process_excel(self, file_path: str) -> dict:
+        """Process Excel files"""
+        try:
+            # Read all sheets
+            excel_file = pd.ExcelFile(file_path)
+            text_parts = []
+            for sheet_name in excel_file.sheet_names:
+                df = pd.read_excel(excel_file, sheet_name=sheet_name)
+                if not df.empty:
+                    # Convert to string representation
+                    sheet_text = f"=== Sheet: {sheet_name} ===\n"
+                    sheet_text += df.to_string(index=False)
+                    text_parts.append(sheet_text)
+            return {
+                "success": True,
+                "text": "\n\n".join(text_parts),
+                "method": "excel extraction",
+                "sheet_count": len(excel_file.sheet_names)
+            }
+        except Exception as e:
+            return {"success": False, "error": f"Excel processing error: {str(e)}"}
+    def _process_image(self, file_path: str) -> dict:
+        """Process images using OCR"""
+        result = ocr_service.extract_text(file_path)
+        if result['success']:
+            return {
+                "success": True,
+                "text": result['text'],
+                "method": f"OCR ({result.get('model', 'unknown')})"
+            }
+        else:
+            return {"success": False, "error": result['error']}
+    def _process_text(self, file_path: str) -> dict:
+        """Process plain text files"""
+        try:
+            # Try different encodings
+            encodings = ['utf-8', 'latin-1', 'cp1252']
+            for encoding in encodings:
+                try:
+                    with open(file_path, 'r', encoding=encoding) as f:
+                        text = f.read()
+                    return {
+                        "success": True,
+                        "text": text,
+                        "method": f"text read ({encoding})"
+                    }
+                except UnicodeDecodeError:
+                    continue
+            return {"success": False, "error": "Could not decode text file"}
+        except Exception as e:
+            return {"success": False, "error": f"Text processing error: {str(e)}"}
+# Singleton instance
+document_processor = DocumentProcessor()

services/metadata_extractor.py ADDED Viewed

	@@ -0,0 +1,446 @@

+"""
+Metadata Extractor Service
+Extracts structured metadata from insurance policy documents using AI.
+Handles various document formats and naming conventions.
+"""
+import re
+import json
+import requests
+from typing import Optional, Dict, List
+from config import Config
+from services.date_parser import date_parser
+from services.number_extractor import number_extractor
+class MetadataExtractor:
+    """Extract structured metadata from document content using AI and regex."""
+    # Default metadata schema
+    DEFAULT_METADATA = {
+        # Identity
+        "document_type": "",
+        "document_title": "",
+        "policy_number": "",
+        "insurer_name": "",
+        "issue_date": "",
+        # Parties
+        "insured_name": "",
+        "broker_name": "",
+        # Dates
+        "policy_start_date": "",
+        "policy_end_date": "",
+        "renewal_date": "",
+        "renewal_year": None,
+        # Financial
+        "sum_insured": None,
+        "premium_amount": None,
+        "tax_amount": None,
+        "deductible": None,
+        # Risk & Coverage
+        "policy_type": "",
+        "insured_property_type": "",
+        "coverage_type": [],
+        "exclusions_present": False,
+        "add_on_covers": [],
+        # Location & Asset
+        "property_address": "",
+        "city": "",
+        "state": "",
+        "pincode": "",
+        "construction_type": "",
+        # RAG helpers
+        "section_name": "",
+        "clause_reference": "",
+        "page_number": "",
+        "chunk_type": "full_document",
+        # Search helpers
+        "keywords": [],
+        "industry": "",
+        "is_manufacturing": False
+    }
+    # Field name variations commonly found in documents
+    FIELD_VARIATIONS = {
+        'insured_name': [
+            'insured', 'name of insured', 'proposer', 'policyholder',
+            'policy holder', 'insured party', 'insured name', 'name of the insured',
+            'assured', 'name of assured', 'customer name', 'client name'
+        ],
+        'insurer_name': [
+            'insurer', 'insurance company', 'underwriter', 'company name',
+            'issued by', 'insuring company'
+        ],
+        'policy_number': [
+            'policy no', 'policy number', 'policy #', 'certificate no',
+            'certificate number', 'policy ref', 'reference number', 'ref no'
+        ],
+        'sum_insured': [
+            'sum insured', 'total sum insured', 'tsi', 'si', 'insured value',
+            'coverage amount', 'insured amount', 'sum assured', 'cover amount',
+            'amount insured', 'value insured'
+        ],
+        'premium_amount': [
+            'premium', 'total premium', 'net premium', 'gross premium',
+            'annual premium', 'premium payable', 'premium amount'
+        ],
+        'policy_start_date': [
+            'start date', 'commencement', 'inception date', 'effective from',
+            'period from', 'from date', 'valid from', 'cover starts'
+        ],
+        'policy_end_date': [
+            'end date', 'expiry date', 'expiry', 'valid until', 'valid till',
+            'period to', 'to date', 'cover ends', 'expires on'
+        ],
+        'policy_type': [
+            'type of policy', 'policy type', 'cover type', 'insurance type',
+            'class of insurance', 'product name', 'product type', 'scheme name'
+        ],
+        'property_address': [
+            'address', 'risk location', 'location of risk', 'property address',
+            'insured location', 'premises address', 'site address'
+        ]
+    }
+    # Policy type patterns
+    POLICY_TYPES = {
+        'fire': ['fire', 'fire & allied', 'fire insurance', 'sfsp'],
+        'marine': ['marine', 'cargo', 'marine cargo', 'marine hull'],
+        'motor': ['motor', 'vehicle', 'car', 'two wheeler', 'automobile'],
+        'health': ['health', 'mediclaim', 'medical', 'hospitalization'],
+        'life': ['life', 'term', 'endowment', 'ulip'],
+        'property': ['property', 'building', 'structure', 'premises'],
+        'liability': ['liability', 'professional indemnity', 'pi', 'directors'],
+        'engineering': ['engineering', 'car', 'eai', 'cpm', 'boiler', 'machinery'],
+        'personal_accident': ['personal accident', 'pa', 'accident'],
+        'travel': ['travel', 'overseas', 'foreign travel'],
+        'home': ['home', 'householder', 'household'],
+        'group': ['group', 'employee', 'gpa', 'gmc']
+    }
+    # Industry classification patterns
+    INDUSTRY_PATTERNS = {
+        'manufacturing': ['manufacturing', 'factory', 'plant', 'production', 'industrial'],
+        'chemical': ['chemical', 'petrochemical', 'pharmaceutical', 'fertilizer'],
+        'automotive': ['automobile', 'automotive', 'tyre', 'tire', 'vehicle'],
+        'food_processing': ['food', 'beverage', 'dairy', 'agro'],
+        'textile': ['textile', 'garment', 'apparel', 'fabric'],
+        'it_services': ['software', 'it services', 'technology', 'tech'],
+        'banking': ['bank', 'finance', 'nbfc', 'financial services'],
+        'hospitality': ['hotel', 'restaurant', 'hospitality', 'resort'],
+        'healthcare': ['hospital', 'clinic', 'healthcare', 'medical'],
+        'retail': ['retail', 'shop', 'store', 'mall', 'supermarket'],
+        'real_estate': ['real estate', 'construction', 'builder', 'developer'],
+        'education': ['school', 'college', 'university', 'education', 'institute']
+    }
+    def __init__(self):
+        self.deepseek_api_key = getattr(Config, 'DEEPSEEK_API_KEY', '')
+        self.deepseek_base_url = getattr(Config, 'DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
+        self.deepseek_model = getattr(Config, 'DEEPSEEK_MODEL', 'deepseek-chat')
+    def extract_metadata(self, content: str, filename: str = "") -> Dict:
+        """
+        Extract structured metadata from document content.
+        Uses AI for complex extraction with regex fallback.
+        Args:
+            content: Document text content
+            filename: Original filename for context
+        Returns:
+            Dictionary with extracted metadata
+        """
+        # Start with default metadata
+        metadata = self.DEFAULT_METADATA.copy()
+        metadata['document_title'] = filename
+        # Try AI extraction first (more accurate)
+        if self.deepseek_api_key and len(content) > 100:
+            ai_metadata = self._extract_with_ai(content, filename)
+            if ai_metadata:
+                metadata.update({k: v for k, v in ai_metadata.items() if v})
+        # Fill in missing fields with regex extraction
+        metadata = self._extract_with_regex(content, metadata)
+        # Extract dates using date_parser
+        metadata = self._extract_dates(content, metadata)
+        # Extract numbers using number_extractor
+        metadata = self._extract_numbers(content, metadata)
+        # Determine policy type
+        if not metadata.get('policy_type'):
+            metadata['policy_type'] = self._detect_policy_type(content)
+        # Determine industry
+        if not metadata.get('industry'):
+            metadata['industry'] = self._detect_industry(content)
+        # Check if manufacturing
+        metadata['is_manufacturing'] = self._is_manufacturing(content, metadata)
+        # Extract keywords for search
+        metadata['keywords'] = self._extract_keywords(content, filename)
+        return metadata
+    def _extract_with_ai(self, content: str, filename: str) -> Optional[Dict]:
+        """Use DeepSeek AI to extract metadata."""
+        if not self.deepseek_api_key:
+            return None
+        # Truncate content to avoid token limits
+        max_content = content[:15000] if len(content) > 15000 else content
+        prompt = f"""Extract the following metadata from this insurance document. Return ONLY a valid JSON object with no explanation.
+Document filename: {filename}
+Document content:
+{max_content}
+Extract these fields (use empty string if not found, use null for missing numbers):
+{{
+    "document_type": "policy/endorsement/certificate/schedule/etc",
+    "policy_number": "",
+    "insurer_name": "name of insurance company",
+    "insured_name": "name of insured party/policyholder",
+    "broker_name": "",
+    "policy_type": "fire/motor/health/marine/property/liability/etc",
+    "sum_insured": null,
+    "premium_amount": null,
+    "deductible": null,
+    "policy_start_date": "YYYY-MM-DD format",
+    "policy_end_date": "YYYY-MM-DD format",
+    "property_address": "",
+    "city": "",
+    "state": "",
+    "pincode": "",
+    "construction_type": "",
+    "insured_property_type": "",
+    "coverage_type": [],
+    "add_on_covers": [],
+    "industry": ""
+}}
+Return ONLY the JSON object, no markdown, no explanation."""
+        try:
+            response = requests.post(
+                f"{self.deepseek_base_url}/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {self.deepseek_api_key}",
+                    "Content-Type": "application/json"
+                },
+                json={
+                    "model": self.deepseek_model,
+                    "messages": [{"role": "user", "content": prompt}],
+                    "max_tokens": 1000,
+                    "temperature": 0
+                },
+                timeout=30
+            )
+            if response.status_code == 200:
+                data = response.json()
+                ai_response = data['choices'][0]['message']['content'].strip()
+                # Parse JSON from response
+                # Remove markdown code blocks if present
+                if ai_response.startswith('```'):
+                    ai_response = re.sub(r'^```(?:json)?\n?', '', ai_response)
+                    ai_response = re.sub(r'\n?```$', '', ai_response)
+                return json.loads(ai_response)
+        except Exception as e:
+            print(f"[METADATA] AI extraction failed: {e}")
+        return None
+    def _extract_with_regex(self, content: str, metadata: Dict) -> Dict:
+        """Extract metadata using regex patterns."""
+        content_lower = content.lower()
+        # Extract fields using variations
+        for field, variations in self.FIELD_VARIATIONS.items():
+            if metadata.get(field):  # Already extracted
+                continue
+            for variation in variations:
+                # Look for pattern: "variation: value" or "variation - value"
+                pattern = rf'{re.escape(variation)}\s*[:|-]\s*([^\n]+)'
+                match = re.search(pattern, content_lower)
+                if match:
+                    value = match.group(1).strip()
+                    # Clean up the value
+                    value = re.sub(r'\s+', ' ', value)[:200]  # Limit length
+                    if value and len(value) > 2:
+                        metadata[field] = value
+                        break
+        # Extract policy number (often in specific formats)
+        if not metadata.get('policy_number'):
+            # Common policy number patterns
+            patterns = [
+                r'policy\s*(?:no|number|#)?\s*[:.]?\s*([A-Z0-9/-]{5,30})',
+                r'([A-Z]{2,5}[/-]?\d{6,15})',
+                r'(\d{10,20})'
+            ]
+            for pattern in patterns:
+                match = re.search(pattern, content, re.IGNORECASE)
+                if match:
+                    metadata['policy_number'] = match.group(1).strip()
+                    break
+        # Extract pincode
+        if not metadata.get('pincode'):
+            match = re.search(r'\b(\d{6})\b', content)
+            if match:
+                metadata['pincode'] = match.group(1)
+        return metadata
+    def _extract_dates(self, content: str, metadata: Dict) -> Dict:
+        """Extract dates using date_parser."""
+        dates = date_parser.extract_dates_from_text(content)
+        for date_info in dates:
+            context = date_info['context']
+            date_str = date_info['date_str']
+            if context == 'start' and not metadata.get('policy_start_date'):
+                metadata['policy_start_date'] = date_str
+            elif context == 'end' and not metadata.get('policy_end_date'):
+                metadata['policy_end_date'] = date_str
+            elif context == 'renewal' and not metadata.get('renewal_date'):
+                metadata['renewal_date'] = date_str
+            elif context == 'issue' and not metadata.get('issue_date'):
+                metadata['issue_date'] = date_str
+        # Calculate renewal date if not found but we have end date
+        if not metadata.get('renewal_date') and metadata.get('policy_end_date'):
+            end_date = date_parser.parse_date(metadata['policy_end_date'])
+            if end_date:
+                metadata['renewal_date'] = metadata['policy_end_date']
+                metadata['renewal_year'] = end_date.year
+        # Set renewal year
+        if metadata.get('renewal_date') and not metadata.get('renewal_year'):
+            renewal = date_parser.parse_date(metadata['renewal_date'])
+            if renewal:
+                metadata['renewal_year'] = renewal.year
+        return metadata
+    def _extract_numbers(self, content: str, metadata: Dict) -> Dict:
+        """Extract numerical values using number_extractor."""
+        numbers = number_extractor.extract_numbers(content)
+        for num_info in numbers:
+            context = num_info['context']
+            value = num_info['value']
+            if context == 'sum_insured' and not metadata.get('sum_insured'):
+                metadata['sum_insured'] = value
+            elif context == 'premium' and not metadata.get('premium_amount'):
+                metadata['premium_amount'] = value
+            elif context == 'tax' and not metadata.get('tax_amount'):
+                metadata['tax_amount'] = value
+            elif context == 'deductible' and not metadata.get('deductible'):
+                metadata['deductible'] = value
+        # If sum_insured not found, use largest number
+        if not metadata.get('sum_insured'):
+            sum_insured = number_extractor.extract_sum_insured(content)
+            if sum_insured:
+                metadata['sum_insured'] = sum_insured
+        return metadata
+    def _detect_policy_type(self, content: str) -> str:
+        """Detect policy type from content."""
+        content_lower = content.lower()
+        for policy_type, keywords in self.POLICY_TYPES.items():
+            if any(kw in content_lower for kw in keywords):
+                return policy_type
+        return "general"
+    def _detect_industry(self, content: str) -> str:
+        """Detect industry classification from content."""
+        content_lower = content.lower()
+        for industry, keywords in self.INDUSTRY_PATTERNS.items():
+            if any(kw in content_lower for kw in keywords):
+                return industry
+        return ""
+    def _is_manufacturing(self, content: str, metadata: Dict) -> bool:
+        """Check if this is a manufacturing-related policy."""
+        content_lower = content.lower()
+        manufacturing_keywords = [
+            'manufacturing', 'factory', 'plant', 'production', 'industrial',
+            'machinery', 'equipment', 'boiler', 'pressure vessel'
+        ]
+        if metadata.get('industry') == 'manufacturing':
+            return True
+        return any(kw in content_lower for kw in manufacturing_keywords)
+    def _extract_keywords(self, content: str, filename: str) -> List[str]:
+        """Extract keywords for search enhancement."""
+        keywords = []
+        # Add words from filename
+        filename_words = re.findall(r'[A-Za-z]{3,}', filename)
+        keywords.extend([w.lower() for w in filename_words])
+        # Extract capitalized words (likely proper nouns/company names)
+        proper_nouns = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content[:5000])
+        keywords.extend([n.lower() for n in proper_nouns[:20]])
+        # Remove duplicates and common words
+        stop_words = {'the', 'and', 'for', 'with', 'this', 'that', 'from', 'are', 'was', 'were'}
+        keywords = list(set(kw for kw in keywords if kw not in stop_words and len(kw) > 2))
+        return keywords[:30]  # Limit to 30 keywords
+    def extract_metadata_batch(self, documents: List[Dict]) -> List[Dict]:
+        """
+        Extract metadata for multiple documents.
+        Args:
+            documents: List of dicts with 'content' and 'filename' keys
+        Returns:
+            List of metadata dicts
+        """
+        results = []
+        for doc in documents:
+            try:
+                metadata = self.extract_metadata(
+                    doc.get('content', ''),
+                    doc.get('filename', '')
+                )
+                metadata['doc_id'] = doc.get('doc_id', '')
+                results.append(metadata)
+            except Exception as e:
+                print(f"[METADATA] Error extracting from {doc.get('filename')}: {e}")
+                results.append({**self.DEFAULT_METADATA, 'doc_id': doc.get('doc_id', '')})
+        return results
+# Singleton instance
+metadata_extractor = MetadataExtractor()

services/number_extractor.py ADDED Viewed

	@@ -0,0 +1,302 @@

+"""
+Number Extractor Service
+Handles extraction and normalization of numerical values from insurance documents.
+Supports:
+- Indian number formats (lakhs, crores)
+- Currency symbols (₹, Rs., INR, USD)
+- Comma-separated numbers
+- Word numbers (One Hundred Million)
+- Percentage values
+"""
+import re
+from typing import Optional, List, Dict, Tuple
+from decimal import Decimal, InvalidOperation
+class NumberExtractor:
+    """Extract and normalize numerical values from text."""
+    # Indian number words
+    WORD_TO_NUMBER = {
+        'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4,
+        'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9,
+        'ten': 10, 'eleven': 11, 'twelve': 12, 'thirteen': 13,
+        'fourteen': 14, 'fifteen': 15, 'sixteen': 16, 'seventeen': 17,
+        'eighteen': 18, 'nineteen': 19, 'twenty': 20, 'thirty': 30,
+        'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70,
+        'eighty': 80, 'ninety': 90
+    }
+    MAGNITUDE_WORDS = {
+        'hundred': 100,
+        'thousand': 1000,
+        'lakh': 100000,
+        'lac': 100000,
+        'lakhs': 100000,
+        'lacs': 100000,
+        'million': 1000000,
+        'crore': 10000000,
+        'crores': 10000000,
+        'billion': 1000000000
+    }
+    # Currency patterns
+    CURRENCY_PATTERNS = {
+        'INR': [r'₹', r'Rs\.?', r'INR', r'Rupees?'],
+        'USD': [r'\$', r'USD', r'Dollars?'],
+        'EUR': [r'€', r'EUR', r'Euros?']
+    }
+    # Context keywords for identifying number types
+    NUMBER_CONTEXTS = {
+        'sum_insured': ['sum insured', 'total sum insured', 'tsi', 'si', 'insured value',
+                        'coverage amount', 'insured amount', 'sum assured'],
+        'premium': ['premium', 'premium amount', 'total premium', 'net premium',
+                    'gross premium', 'annual premium'],
+        'tax': ['tax', 'gst', 'cgst', 'sgst', 'igst', 'service tax'],
+        'deductible': ['deductible', 'excess', 'franchise']
+    }
+    def __init__(self):
+        self._compile_patterns()
+    def _compile_patterns(self):
+        """Compile regex patterns for number extraction."""
+        # Currency amount: ₹1,00,000 or Rs. 1,00,000.00 or INR 100000
+        currency_symbols = '|'.join(
+            p for patterns in self.CURRENCY_PATTERNS.values() for p in patterns
+        )
+        self.pattern_currency = re.compile(
+            rf'({currency_symbols})\s*([\d,]+(?:\.\d{{1,2}})?)',
+            re.IGNORECASE
+        )
+        # Plain number with commas: 1,00,00,000 or 100,000,000
+        self.pattern_number = re.compile(
+            r'\b([\d,]+(?:\.\d+)?)\b'
+        )
+        # Number with magnitude words: 10 crore, 5.5 lakhs
+        magnitude_words = '|'.join(self.MAGNITUDE_WORDS.keys())
+        self.pattern_magnitude = re.compile(
+            rf'\b([\d,.]+)\s*({magnitude_words})\b',
+            re.IGNORECASE
+        )
+        # Percentage: 10%, 10.5 percent
+        self.pattern_percent = re.compile(
+            r'\b([\d.]+)\s*(?:%|percent|percentage)\b',
+            re.IGNORECASE
+        )
+    def parse_number(self, num_str: str) -> Optional[float]:
+        """
+        Parse a number string to float, handling Indian format.
+        Args:
+            num_str: Number string (e.g., "1,00,000" or "100,000.50")
+        Returns:
+            Float value or None
+        """
+        if not num_str:
+            return None
+        # Remove commas and spaces
+        num_str = str(num_str).replace(',', '').replace(' ', '').strip()
+        try:
+            return float(num_str)
+        except ValueError:
+            return None
+    def parse_indian_number(self, text: str) -> Optional[float]:
+        """
+        Parse Indian number format (lakhs, crores).
+        Args:
+            text: Text like "10 crore" or "5.5 lakhs"
+        Returns:
+            Float value or None
+        """
+        text = text.lower().strip()
+        match = self.pattern_magnitude.search(text)
+        if match:
+            num_part = self.parse_number(match.group(1))
+            magnitude = self.MAGNITUDE_WORDS.get(match.group(2).lower(), 1)
+            if num_part is not None:
+                return num_part * magnitude
+        return None
+    def word_to_number(self, text: str) -> Optional[int]:
+        """
+        Convert word numbers to integers.
+        Args:
+            text: Text like "One Hundred Million"
+        Returns:
+            Integer value or None
+        """
+        text = text.lower().strip()
+        words = text.split()
+        if not words:
+            return None
+        result = 0
+        current = 0
+        for word in words:
+            word = word.strip(',').strip()
+            if word in self.WORD_TO_NUMBER:
+                current += self.WORD_TO_NUMBER[word]
+            elif word in self.MAGNITUDE_WORDS:
+                magnitude = self.MAGNITUDE_WORDS[word]
+                if magnitude >= 1000:
+                    current = (current or 1) * magnitude
+                    result += current
+                    current = 0
+                else:
+                    current *= magnitude
+            elif word == 'and':
+                continue
+            else:
+                # Unknown word, try to parse as number
+                try:
+                    current += int(word)
+                except ValueError:
+                    pass
+        result += current
+        return result if result > 0 else None
+    def extract_numbers(self, text: str) -> List[Dict]:
+        """
+        Extract all numerical values from text with context.
+        Args:
+            text: Text to search for numbers
+        Returns:
+            List of dicts with number info:
+            [{"value": 101000000, "context": "sum_insured", "currency": "INR",
+              "original": "₹10,10,00,000"}]
+        """
+        if not text:
+            return []
+        results = []
+        text_lower = text.lower()
+        # Extract currency amounts
+        for match in self.pattern_currency.finditer(text):
+            currency_symbol = match.group(1)
+            num_str = match.group(2)
+            value = self.parse_number(num_str)
+            if value is not None and value > 0:
+                # Determine currency
+                currency = 'INR'  # Default
+                for curr, patterns in self.CURRENCY_PATTERNS.items():
+                    if any(re.match(p, currency_symbol, re.IGNORECASE) for p in patterns):
+                        currency = curr
+                        break
+                # Determine context
+                context = self._determine_number_context(text_lower, match.start())
+                results.append({
+                    'value': value,
+                    'context': context,
+                    'currency': currency,
+                    'original': match.group(),
+                    'position': match.start()
+                })
+        # Extract numbers with magnitude words (10 crore, 5 lakhs)
+        for match in self.pattern_magnitude.finditer(text):
+            value = self.parse_indian_number(match.group())
+            if value is not None and value > 0:
+                context = self._determine_number_context(text_lower, match.start())
+                results.append({
+                    'value': value,
+                    'context': context,
+                    'currency': 'INR',  # Lakhs/crores are typically INR
+                    'original': match.group(),
+                    'position': match.start()
+                })
+        # Remove duplicates based on position (currency matches often overlap with magnitude)
+        seen_positions = set()
+        unique_results = []
+        for r in sorted(results, key=lambda x: -x['value']):  # Prefer larger values
+            # Check if any existing result overlaps with this one
+            overlaps = False
+            for pos in seen_positions:
+                if abs(r['position'] - pos) < 20:  # Within 20 chars
+                    overlaps = True
+                    break
+            if not overlaps:
+                seen_positions.add(r['position'])
+                unique_results.append(r)
+        return unique_results
+    def _determine_number_context(self, text: str, position: int) -> str:
+        """Determine what type of number this is based on surrounding text."""
+        # Look at 100 chars before the number
+        context_start = max(0, position - 100)
+        context_text = text[context_start:position]
+        for num_type, keywords in self.NUMBER_CONTEXTS.items():
+            if any(kw in context_text for kw in keywords):
+                return num_type
+        return 'unknown'
+    def extract_sum_insured(self, text: str) -> Optional[float]:
+        """Extract the sum insured value from text."""
+        numbers = self.extract_numbers(text)
+        # First, look for explicitly labeled sum insured
+        for num in numbers:
+            if num['context'] == 'sum_insured':
+                return num['value']
+        # Otherwise, return the largest number (likely to be sum insured)
+        if numbers:
+            return max(num['value'] for num in numbers)
+        return None
+    def extract_premium(self, text: str) -> Optional[float]:
+        """Extract the premium amount from text."""
+        numbers = self.extract_numbers(text)
+        for num in numbers:
+            if num['context'] == 'premium':
+                return num['value']
+        return None
+    def calculate_sum(self, values: List[float]) -> float:
+        """Calculate sum of values."""
+        return sum(v for v in values if v is not None)
+    def calculate_average(self, values: List[float]) -> Optional[float]:
+        """Calculate average of values."""
+        valid_values = [v for v in values if v is not None]
+        if valid_values:
+            return sum(valid_values) / len(valid_values)
+        return None
+# Singleton instance
+number_extractor = NumberExtractor()

services/ocr_service.py ADDED Viewed

	@@ -0,0 +1,231 @@

+"""
+OCR Service - Document Text Extraction via OpenRouter Vision Models
+Handles OCR for images and scanned documents using vision-capable models with fallback
+"""
+import requests
+import base64
+from pathlib import Path
+from config import Config
+class OCRService:
+    def __init__(self):
+        self.api_key = Config.OPENROUTER_API_KEY
+        self.base_url = Config.OPENROUTER_BASE_URL
+        # Vision-capable models for OCR with fallback order
+        # Only models that support image/vision input can be used for OCR
+        self.ocr_models = [
+            "google/gemma-3-27b-it:free",      # Primary - Largest Gemma 3
+            "google/gemma-3-12b-it:free",      # Fallback 1
+            "google/gemma-3-4b-it:free",       # Fallback 2
+            "google/gemma-3n-e4b-it:free",     # Fallback 3
+            "google/gemma-3n-e2b-it:free",     # Fallback 4 - Smallest
+        ]
+    def _encode_image(self, image_path: str) -> str:
+        """Encode image to base64"""
+        with open(image_path, "rb") as f:
+            return base64.b64encode(f.read()).decode('utf-8')
+    def _get_mime_type(self, file_path: str) -> str:
+        """Get MIME type from file extension"""
+        ext = Path(file_path).suffix.lower()
+        mime_types = {
+            '.png': 'image/png',
+            '.jpg': 'image/jpeg',
+            '.jpeg': 'image/jpeg',
+            '.gif': 'image/gif',
+            '.webp': 'image/webp',
+            '.pdf': 'application/pdf'
+        }
+        return mime_types.get(ext, 'image/png')
+    def _call_ocr_model(self, image_data: str, mime_type: str, model: str = None) -> dict:
+        """Call OpenRouter vision model for OCR"""
+        if not self.api_key:
+            return {"success": False, "error": "OpenRouter API key not configured"}
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "https://notebooklm-fast.hf.space",
+            "X-Title": "NotebookLM Fast OCR"
+        }
+        # OCR prompt for thorough text extraction
+        ocr_prompt = """You are a precise OCR system. Extract EVERY SINGLE piece of text from this image/document with 100% accuracy.
+CRITICAL INSTRUCTIONS:
+1. Extract ALL text - do not skip or miss ANY section, heading, paragraph, or text block
+2. Include ALL sections (e.g., Education, Experience, Skills, Contact, Summary, Projects, etc.)
+3. Preserve the exact structure and hierarchy of the document
+4. Include all names, dates, numbers, addresses, phone numbers, emails, URLs
+5. Include text from headers, footers, sidebars, and any text boxes
+6. For tables, use markdown table format with all rows and columns
+7. For bullet points and lists, preserve the list structure
+8. Include any small text, footnotes, or captions
+OUTPUT FORMAT:
+- Return ONLY the extracted text, explanations
+- Maintain the original reading order (top to bottom, left to right)
+- Use markdown formatting for structure (headers, lists, tables)
+- Separate sections clearly with line breaks
+IMPORTANT: Do not summarize or paraphrase. Extract the EXACT text as it appears."""
+        payload = {
+            "model": model or self.ocr_models[0],
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:{mime_type};base64,{image_data}"
+                            }
+                        },
+                        {
+                            "type": "text",
+                            "text": ocr_prompt
+                        }
+                    ]
+                }
+            ],
+            "max_tokens": 4096,
+            "temperature": 0.1  # Low temperature for accurate extraction
+        }
+        try:
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=headers,
+                json=payload,
+                timeout=120  # Longer timeout for OCR
+            )
+            if response.status_code == 200:
+                data = response.json()
+                text = data.get('choices', [{}])[0].get('message', {}).get('content', '')
+                if text:
+                    return {"success": True, "text": text, "model": model or self.ocr_models[0]}
+                else:
+                    return {"success": False, "error": "No text extracted from response"}
+            else:
+                return {
+                    "success": False,
+                    "error": f"OpenRouter API error: {response.status_code} - {response.text}"
+                }
+        except requests.exceptions.Timeout:
+            return {"success": False, "error": "Request timed out. Please try again."}
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+    def _call_ocr_with_fallback(self, image_data: str, mime_type: str) -> dict:
+        """Try OCR with fallback models"""
+        last_error = None
+        for model in self.ocr_models:
+            print(f"Attempting OCR with {model}...")
+            result = self._call_ocr_model(image_data, mime_type, model)
+            if result['success']:
+                print(f"OCR successful with {model}")
+                return result
+            else:
+                last_error = result.get('error', 'Unknown error')
+                print(f"OCR failed with {model}: {last_error}")
+                continue
+        return {"success": False, "error": f"All OCR models failed. Last error: {last_error}"}
+    def extract_text_from_pdf(self, pdf_path: str) -> dict:
+        """
+        Extract text from entire PDF using OpenRouter vision models.
+        Converts PDF pages to images and processes them.
+        """
+        import fitz  # PyMuPDF
+        import os
+        try:
+            doc = fitz.open(pdf_path)
+            total_pages = len(doc)
+            print(f"Processing {total_pages} page PDF with OpenRouter vision OCR...")
+            all_text = []
+            for page_num in range(total_pages):
+                print(f"Processing page {page_num + 1}/{total_pages}...")
+                page = doc.load_page(page_num)
+                # Render page to image at good resolution for OCR
+                mat = fitz.Matrix(2, 2)  # 2x zoom for better quality
+                pix = page.get_pixmap(matrix=mat)
+                img_data = pix.tobytes("png")
+                # Encode to base64
+                image_base64 = base64.b64encode(img_data).decode('utf-8')
+                # OCR the page with fallback
+                result = self._call_ocr_with_fallback(image_base64, 'image/png')
+                if result['success']:
+                    all_text.append(f"--- Page {page_num + 1} ---\n{result['text']}")
+                else:
+                    all_text.append(f"--- Page {page_num + 1} ---\n[Error extracting text: {result['error']}]")
+            doc.close()
+            combined_text = "\n\n".join(all_text)
+            return {
+                "success": True,
+                "text": combined_text,
+                "model": "OpenRouter Vision OCR"
+            }
+        except Exception as e:
+            return {"success": False, "error": f"Error processing PDF: {str(e)}"}
+    def _process_pdf_in_batches(self, pdf_path: str, total_pages: int) -> dict:
+        """Split PDF into chunks and process sequentially - kept for compatibility"""
+        return self.extract_text_from_pdf(pdf_path)
+    def _send_pdf_to_api(self, pdf_path: str) -> dict:
+        """Process PDF by converting to images - OpenRouter doesn't have native PDF support"""
+        return self.extract_text_from_pdf(pdf_path)
+    def extract_text(self, image_path: str) -> dict:
+        """
+        Extract text from image using OpenRouter vision models with fallback
+        """
+        image_data = self._encode_image(image_path)
+        mime_type = self._get_mime_type(image_path)
+        print(f"Attempting OCR with OpenRouter vision models...")
+        result = self._call_ocr_with_fallback(image_data, mime_type)
+        if result['success']:
+            print(f"OCR successful with {result.get('model', 'OpenRouter')}")
+        else:
+            print(f"OCR failed: {result['error']}")
+        return result
+    def extract_text_from_pdf_page(self, page_image_data: bytes,
+                                    page_num: int) -> dict:
+        """Extract text from a PDF page image"""
+        image_data = base64.b64encode(page_image_data).decode('utf-8')
+        print(f"Extracting text from PDF page {page_num} with OpenRouter vision OCR...")
+        result = self._call_ocr_with_fallback(image_data, 'image/png')
+        return result
+# Singleton instance
+ocr_service = OCRService()

services/rag_service.py ADDED Viewed

	@@ -0,0 +1,1870 @@

+"""
+RAG Service - Retrieval Augmented Generation
+Handles:
+- Text chunking with overlap
+- GraphRAG-based context assembly
+- Query processing with AI response generation
+- Aggregate queries across all documents
+- Date-based filtering and calculations
+"""
+import requests
+import re
+from typing import Optional, List, Dict
+from config import Config
+from services.chroma_service import chroma_service
+from services.date_parser import date_parser
+from services.number_extractor import number_extractor
+class RAGService:
+    def __init__(self):
+        # DeepSeek API (primary - highly capable)
+        self.deepseek_api_key = getattr(Config, 'DEEPSEEK_API_KEY', '')
+        self.deepseek_base_url = getattr(Config, 'DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
+        self.deepseek_model = getattr(Config, 'DEEPSEEK_MODEL', 'deepseek-chat')
+        self.use_deepseek = getattr(Config, 'USE_DEEPSEEK', True) and self.deepseek_api_key
+        # OpenRouter API (fallback)
+        self.api_key = Config.OPENROUTER_API_KEY
+        self.base_url = Config.OPENROUTER_BASE_URL
+        self.model_map = Config.MODEL_MAP
+        self.fallback_order = Config.FALLBACK_ORDER
+        # RAG settings
+        self.chunk_size = Config.CHUNK_SIZE
+        self.chunk_overlap = Config.CHUNK_OVERLAP
+        self.top_k = Config.TOP_K_RESULTS
+        self.temperature = Config.AI_TEMPERATURE
+        self.relevance_threshold = Config.RELEVANCE_THRESHOLD
+        self.max_history = Config.MAX_CONVERSATION_HISTORY
+        self.max_tokens = getattr(Config, 'AI_MAX_TOKENS', 1024)
+        self.timeout = getattr(Config, 'AI_TIMEOUT', 15)
+    def chunk_text(self, text: str) -> list[dict]:
+        """
+        Split text into overlapping chunks for better retrieval
+        Uses sentence-aware chunking for coherence
+        """
+        # Clean and normalize text
+        text = re.sub(r'\n{3,}', '\n\n', text)
+        text = text.strip()
+        if len(text) <= self.chunk_size:
+            return [{"text": text, "start": 0, "end": len(text)}]
+        chunks = []
+        sentences = self._split_into_sentences(text)
+        current_chunk = ""
+        current_start = 0
+        char_pos = 0
+        for sentence in sentences:
+            sentence_len = len(sentence)
+            if len(current_chunk) + sentence_len <= self.chunk_size:
+                current_chunk += sentence
+            else:
+                if current_chunk:
+                    chunks.append({
+                        "text": current_chunk.strip(),
+                        "start": current_start,
+                        "end": char_pos
+                    })
+                # Start new chunk with overlap
+                overlap_start = max(0, len(current_chunk) - self.chunk_overlap)
+                current_chunk = current_chunk[overlap_start:] + sentence
+                current_start = char_pos - (len(current_chunk) - sentence_len)
+            char_pos += sentence_len
+        # Add final chunk
+        if current_chunk.strip():
+            chunks.append({
+                "text": current_chunk.strip(),
+                "start": current_start,
+                "end": char_pos
+            })
+        return chunks
+    def _split_into_sentences(self, text: str) -> list[str]:
+        """Split text into sentences while preserving delimiters"""
+        # Simple sentence splitting
+        pattern = r'(?<=[.!?])\s+(?=[A-Z])'
+        sentences = re.split(pattern, text)
+        return [s + ' ' for s in sentences]
+    def process_document(self, user_id: str, doc_id: str, content: str, bucket_id: str = ""):
+        """
+        Process document for RAG:
+        1. Chunk the text
+        2. Store chunks in ChromaDB
+        """
+        chunks = self.chunk_text(content)
+        chroma_service.store_chunks(doc_id, user_id, chunks, bucket_id)
+        return len(chunks)
+    def _expand_query(self, query: str) -> list[str]:
+        """
+        Generate query variations for better retrieval.
+        Extracts key terms and creates multiple search angles.
+        """
+        import re
+        queries = [query]
+        query_lower = query.lower()
+        # Map numbers to words for module/section matching
+        word_map = {
+            '1': 'one', '2': 'two', '3': 'three', '4': 'four',
+            '5': 'five', '6': 'six', '7': 'seven', '8': 'eight',
+            '9': 'nine', '10': 'ten', '11': 'eleven', '12': 'twelve'
+        }
+        # Extract key terms (nouns, proper nouns) - words that are likely searchable
+        # Remove common question words and stop words
+        stop_words = {'what', 'who', 'where', 'when', 'why', 'how', 'is', 'are', 'was', 'were',
+                      'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
+                      'from', 'about', 'tell', 'me', 'can', 'you', 'please', 'give', 'show',
+                      'list', 'all', 'find', 'get', 'does', 'do', 'did', 'has', 'have', 'had',
+                      'this', 'that', 'these', 'those', 'and', 'or', 'but', 'if', 'then',
+                      'there', 'their', 'they', 'them', 'it', 'its', 'be', 'been', 'being',
+                      'any', 'some', 'my', 'your', 'our', 'his', 'her', 'which', 'each'}
+        # Extract potential key terms (2+ chars, not stop words)
+        words = re.findall(r'\b[a-zA-Z]{2,}\b', query_lower)
+        key_terms = [w for w in words if w not in stop_words]
+        # Add each key term as a separate query for direct matching
+        for term in key_terms[:5]:  # Top 5 key terms
+            if len(term) > 3:  # Only meaningful terms
+                queries.append(term)
+        # Add combinations of key terms
+        if len(key_terms) >= 2:
+            queries.append(' '.join(key_terms[:3]))  # First 3 key terms
+        # Find module/section references and create variations
+        patterns = [
+            (r'module\s*(\d+)', 'module'),
+            (r'section\s*(\d+)', 'section'),
+            (r'chapter\s*(\d+)', 'chapter'),
+            (r'part\s*(\d+)', 'part'),
+        ]
+        for pattern, prefix in patterns:
+            match = re.search(pattern, query_lower)
+            if match:
+                num = match.group(1)
+                # Add number word version: "module five"
+                if num in word_map:
+                    queries.append(query_lower.replace(f'{prefix} {num}', f'{prefix} {word_map[num]}'))
+                # Add just the module reference: "module 5"
+                queries.append(f'{prefix} {num}')
+                # Add numbered list format: "5." or "5)"
+                queries.append(f'{num}.')
+                queries.append(f'{num})')
+                break
+        # Add question without question words for direct info retrieval
+        simplified = ' '.join(key_terms)
+        if simplified and simplified != query_lower:
+            queries.append(simplified)
+        # Deduplicate and limit
+        seen = set()
+        unique_queries = []
+        for q in queries:
+            q_clean = q.lower().strip()
+            if q_clean and q_clean not in seen and len(q_clean) > 1:
+                seen.add(q_clean)
+                unique_queries.append(q)
+        return unique_queries[:8]  # Increased to 8 variations for better coverage
+    def _detect_document_reference(self, query: str, available_docs: list[dict]) -> list[str]:
+        """
+        Detect if user is asking about a specific document by name.
+        Returns list of matching doc_ids to prioritize in search.
+        """
+        query_lower = query.lower()
+        matching_doc_ids = []
+        for doc in available_docs:
+            filename = doc.get('filename', '')
+            if not filename:
+                continue
+            # Remove extension and normalize
+            name_parts = filename.lower().replace('.pdf', '').replace('.docx', '').replace('.xlsx', '').replace('.pptx', '').replace('.txt', '').replace('.md', '')
+            # Check if document name appears in query
+            # Handle common patterns like "the ABC document", "from XYZ file", "in document ABC"
+            if name_parts in query_lower or any(part in query_lower for part in name_parts.split('_') if len(part) > 3):
+                matching_doc_ids.append(doc.get('doc_id'))
+        return matching_doc_ids
+    def _parse_query_with_ai(self, query: str) -> dict:
+        """
+        Use DeepSeek AI to understand query intent and extract structured parameters.
+        This replaces hardcoded pattern matching with intelligent query understanding.
+        Returns dict with:
+        - intent: list|count|rank|calculate|compare|specific|summarize
+        - needs_metadata: True if needs aggregate data across all documents
+        - filters: dict of field->value filters
+        - sort_by: field to sort by (or None)
+        - sort_order: 'desc' or 'asc'
+        - limit: number of results (or None for all)
+        - calculation: sum|average|max|min (or None)
+        - calculation_field: field for calculation
+        """
+        import json
+        system_prompt = """You are a query parser for an insurance document system.
+Analyze the user's question and extract structured parameters to help retrieve the right data.
+Available fields for filtering:
+- is_manufacturing (boolean): True if asking about manufacturing industry/sector
+- policy_type (string): fire, marine, motor, health, liability, property, engineering, etc.
+- industry (string): manufacturing, retail, IT, healthcare, construction, food, textile, etc.
+- insurer_name (string): insurance company name
+- insured_name (string): policyholder/company name
+- broker_name (string): broker or agent name
+- city (string): city name
+- state (string): state name
+- renewal_year (integer): 2024, 2025, 2026, etc.
+Available fields for sorting:
+- premium_amount: net premium, gross premium, premium
+- sum_insured: coverage amount, insured value
+- renewal_date: renewal date, expiry date
+- policy_start_date: inception date, start date
+Return ONLY valid JSON (no markdown, no explanation):
+{
+  "intent": "list|count|rank|calculate|compare|specific|summarize",
+  "needs_metadata": true or false,
+  "filters": {"field_name": "value"},
+  "sort_by": "field_name" or null,
+  "sort_order": "desc" or "asc",
+  "limit": number or null,
+  "calculation": "sum|average|max|min|count" or null,
+  "calculation_field": "premium_amount|sum_insured" or null
+}
+Examples:
+Query: "top 5 manufacturing policies by premium"
+{"intent":"rank","needs_metadata":true,"filters":{"is_manufacturing":true},"sort_by":"premium_amount","sort_order":"desc","limit":5,"calculation":null,"calculation_field":null}
+Query: "total sum insured for all fire policies"
+{"intent":"calculate","needs_metadata":true,"filters":{"policy_type":"fire"},"sort_by":null,"sort_order":"desc","limit":null,"calculation":"sum","calculation_field":"sum_insured"}
+Query: "what is covered in the ABC policy document?"
+{"intent":"specific","needs_metadata":false,"filters":{},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null}
+Query: "list all policies renewing in 2026"
+{"intent":"list","needs_metadata":true,"filters":{"renewal_year":2026},"sort_by":"renewal_date","sort_order":"asc","limit":null,"calculation":null,"calculation_field":null}
+Query: "how many manufacturing companies do we have?"
+{"intent":"count","needs_metadata":true,"filters":{"is_manufacturing":true},"sort_by":null,"sort_order":"desc","limit":null,"calculation":"count","calculation_field":null}
+Query: "compare policy A and policy B"
+{"intent":"compare","needs_metadata":false,"filters":{},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null}"""
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": f"Parse this query: {query}"}
+        ]
+        try:
+            # Use non-streaming call for quick parsing
+            response = self._call_deepseek_sync(messages, max_tokens=300)
+            # Parse JSON response
+            parsed = json.loads(response.strip())
+            print(f"[AI QUERY PARSER] Parsed: {json.dumps(parsed, indent=2)}")
+            return parsed
+        except Exception as e:
+            print(f"[AI QUERY PARSER] Error: {e}, falling back to pattern matching")
+            # Fallback to basic detection
+            return {
+                "intent": "specific",
+                "needs_metadata": False,
+                "filters": {},
+                "sort_by": None,
+                "sort_order": "desc",
+                "limit": None,
+                "calculation": None,
+                "calculation_field": None
+            }
+    def _call_deepseek_sync(self, messages: list, max_tokens: int = 500) -> str:
+        """Synchronous call to DeepSeek for quick operations like query parsing."""
+        import requests
+        if not self.deepseek_api_key:
+            raise Exception("DeepSeek API key not configured")
+        response = requests.post(
+            "https://api.deepseek.com/v1/chat/completions",
+            headers={
+                "Authorization": f"Bearer {self.deepseek_api_key}",
+                "Content-Type": "application/json"
+            },
+            json={
+                "model": "deepseek-chat",
+                "messages": messages,
+                "max_tokens": max_tokens,
+                "temperature": 0.1  # Low temperature for consistent parsing
+            },
+            timeout=15
+        )
+        if response.status_code == 200:
+            return response.json()['choices'][0]['message']['content']
+        else:
+            raise Exception(f"DeepSeek API error: {response.status_code}")
+    def _detect_query_type(self, query: str, history: list[dict] = None) -> str:
+        """
+        Detect the type of query to optimize retrieval and response.
+        Returns: 'specific', 'aggregate', 'calculation', 'date_filter',
+                 'cross_document', 'followup', 'comparison', 'general'
+        NEW TYPES:
+        - 'aggregate': List all, count all, common across all documents
+        - 'calculation': Math operations (sum, average, total of numbers)
+        - 'date_filter': Date-based filtering (policies renewing in 2026)
+        """
+        query_lower = query.lower().strip()
+        # AGGREGATE patterns - queries that need to scan ALL documents
+        aggregate_patterns = [
+            'list all', 'give me all', 'show all', 'all policies', 'all documents',
+            'every policy', 'every document', 'all the policies', 'all the documents',
+            'how many policies', 'how many documents', 'count all', 'total number of',
+            'all manufacturing', 'all companies', 'all insured', 'all insurers',
+            'common', 'across all', 'in all documents', 'throughout all',
+            'summarize all', 'overview of all', 'complete list', 'full list',
+            'what are the', 'what policies', 'which companies', 'which policies'
+        ]
+        # CALCULATION patterns - queries needing math operations
+        calculation_patterns = [
+            'total sum', 'sum of', 'add up', 'combined', 'aggregate',
+            'total insured', 'total premium', 'total value', 'total amount',
+            'calculate', 'average', 'mean', 'maximum', 'minimum', 'highest', 'lowest',
+            'what is the total', 'how much total', 'sum insured across',
+            'cumulative', 'grand total'
+        ]
+        # DATE FILTER patterns - queries filtering by dates
+        date_patterns = [
+            'renew in', 'renewal in', 'expiring in', 'expire in', 'expiry in',
+            'renewing in 2024', 'renewing in 2025', 'renewing in 2026', 'renewing in 2027',
+            'expiring in 2024', 'expiring in 2025', 'expiring in 2026', 'expiring in 2027',
+            'policies in 2024', 'policies in 2025', 'policies in 2026', 'policies in 2027',
+            'before 2025', 'after 2025', 'before 2026', 'after 2026',
+            'next year', 'this year', 'last year', 'next month',
+            'valid until', 'valid till', 'due for renewal'
+        ]
+        # Followup indicators - pronouns and references to previous context
+        followup_patterns = [
+            'it', 'this', 'that', 'these', 'those', 'the same', 'same one',
+            'mentioned', 'above', 'earlier', 'previous', 'last one',
+            'for it', 'about it', 'of it', 'its ', "it's", 'for this', 'for that'
+        ]
+        # Cross-document patterns (legacy - now mostly covered by aggregate)
+        cross_doc_patterns = [
+            'other documents', 'other policies', 'other files',
+            'which documents', 'which files',
+            'similar to', 'related to', 'like this one'
+        ]
+        # Comparison patterns
+        comparison_patterns = [
+            'compare', 'difference between', 'versus', ' vs ', 'differ',
+            'same as', 'similar to', 'contrast', 'both', 'either'
+        ]
+        # Check patterns in priority order
+        # 1. Aggregate queries (highest priority for "list all" type queries)
+        for pattern in aggregate_patterns:
+            if pattern in query_lower:
+                print(f"[QUERY TYPE] Detected AGGREGATE: matched '{pattern}'")
+                return 'aggregate'
+        # 2. Calculation queries
+        for pattern in calculation_patterns:
+            if pattern in query_lower:
+                print(f"[QUERY TYPE] Detected CALCULATION: matched '{pattern}'")
+                return 'calculation'
+        # 3. Date filter queries
+        for pattern in date_patterns:
+            if pattern in query_lower:
+                print(f"[QUERY TYPE] Detected DATE_FILTER: matched '{pattern}'")
+                return 'date_filter'
+        # 4. Followup queries (short queries with pronouns)
+        for pattern in followup_patterns:
+            if pattern in query_lower and len(query) < 100:
+                return 'followup'
+        # 5. Cross-document queries
+        for pattern in cross_doc_patterns:
+            if pattern in query_lower:
+                return 'cross_document'
+        # 6. Comparison queries
+        for pattern in comparison_patterns:
+            if pattern in query_lower:
+                return 'comparison'
+        # If there's recent history and query is short, likely a followup
+        if history and len(history) > 0 and len(query) < 50:
+            words = query_lower.split()
+            if words and words[0] in ['what', 'who', 'when', 'where', 'why', 'how', 'is', 'are', 'does', 'do', 'can']:
+                return 'followup'
+        return 'general'
+    def _handle_aggregate_query(self, user_id: str, bucket_id: str, query: str) -> dict:
+        """
+        Handle aggregate queries by retrieving ALL document metadata/summaries.
+        Used for 'list all', 'how many', etc.
+        Returns dict with context built from ALL documents.
+        """
+        print(f"[AGGREGATE] Handling aggregate query: {query[:50]}...")
+        # Get ALL metadata for this bucket
+        all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
+        # Get ALL summaries too
+        all_summaries = chroma_service.get_all_summaries(user_id, bucket_id)
+        print(f"[AGGREGATE] Retrieved {len(all_metadata)} metadata records, {len(all_summaries)} summaries")
+        # Build context from metadata
+        context_parts = []
+        # For large datasets, use a more compact format to avoid token limits
+        if len(all_metadata) > 50:
+            print(f"[AGGREGATE] Large dataset ({len(all_metadata)} docs) - using compact format")
+            # Compact format for large datasets
+            for i, meta in enumerate(all_metadata, 1):
+                entry = f"{i}. {meta.get('document_title', 'Unknown')} | Insured: {meta.get('insured_name', 'N/A')} | Type: {meta.get('policy_type', 'N/A')} | Industry: {meta.get('industry', 'N/A')} | Sum: {meta.get('sum_insured', 0)} | Mfg: {meta.get('is_manufacturing', False)}"
+                context_parts.append(entry)
+        else:
+            # Full format for smaller datasets
+            for i, meta in enumerate(all_metadata, 1):
+                doc_id = meta.get('doc_id', '')
+                filename = meta.get('document_title', 'Unknown Document')
+                # Find matching summary
+                summary = ""
+                for s in all_summaries:
+                    if s.get('doc_id') == doc_id:
+                        summary = s.get('summary', '')
+                        break
+                # Build document entry
+                entry = f"""
+=== Document {i}: {filename} ===
+- Policy Number: {meta.get('policy_number', 'N/A')}
+- Insured: {meta.get('insured_name', 'N/A')}
+- Insurer: {meta.get('insurer_name', 'N/A')}
+- Policy Type: {meta.get('policy_type', 'N/A')}
+- Industry: {meta.get('industry', 'N/A')}
+- Sum Insured: {meta.get('sum_insured', 'N/A')}
+- Premium: {meta.get('premium_amount', 'N/A')}
+- Start Date: {meta.get('policy_start_date', 'N/A')}
+- End Date: {meta.get('policy_end_date', 'N/A')}
+- Renewal Date: {meta.get('renewal_date', 'N/A')}
+- Location: {meta.get('city', '')}, {meta.get('state', '')}
+- Is Manufacturing: {meta.get('is_manufacturing', False)}
+Summary: {summary[:300] if summary else 'No summary available'}
+"""
+                context_parts.append(entry.strip())
+        context = '\n'.join(context_parts)
+        print(f"[AGGREGATE] Context length: {len(context)} characters")
+        return {
+            'context': context,
+            'metadata': all_metadata,
+            'total_documents': len(all_metadata),
+            'sources': {m.get('doc_id'): m.get('document_title') for m in all_metadata}
+        }
+    def _handle_calculation_query(self, user_id: str, bucket_id: str, query: str) -> dict:
+        """
+        Handle calculation queries by getting all metadata and performing math.
+        Used for 'total sum insured', 'average premium', etc.
+        """
+        print(f"[CALCULATION] Handling calculation query: {query[:50]}...")
+        query_lower = query.lower()
+        # Get all metadata
+        all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
+        # Determine what to calculate
+        calc_results = {}
+        # Sum insured calculations
+        if 'sum insured' in query_lower or 'insured' in query_lower:
+            values = [m.get('sum_insured', 0) for m in all_metadata if m.get('sum_insured')]
+            calc_results['sum_insured'] = {
+                'total': sum(values),
+                'count': len(values),
+                'average': sum(values) / len(values) if values else 0,
+                'max': max(values) if values else 0,
+                'min': min(values) if values else 0
+            }
+        # Premium calculations
+        if 'premium' in query_lower:
+            values = [m.get('premium_amount', 0) for m in all_metadata if m.get('premium_amount')]
+            calc_results['premium'] = {
+                'total': sum(values),
+                'count': len(values),
+                'average': sum(values) / len(values) if values else 0,
+                'max': max(values) if values else 0,
+                'min': min(values) if values else 0
+            }
+        # Policy count by type
+        if 'type' in query_lower or 'policies' in query_lower:
+            type_counts = {}
+            for m in all_metadata:
+                pt = m.get('policy_type', 'unknown')
+                type_counts[pt] = type_counts.get(pt, 0) + 1
+            calc_results['policy_types'] = type_counts
+        # Build context with calculation results
+        context = f"""
+=== CALCULATION RESULTS FOR {len(all_metadata)} DOCUMENTS ===
+"""
+        if 'sum_insured' in calc_results:
+            si = calc_results['sum_insured']
+            context += f"""
+## Sum Insured Analysis
+- **Total Sum Insured**: ₹{si['total']:,.2f}
+- **Number of policies with sum insured**: {si['count']}
+- **Average Sum Insured**: ₹{si['average']:,.2f}
+- **Maximum Sum Insured**: ₹{si['max']:,.2f}
+- **Minimum Sum Insured**: ₹{si['min']:,.2f}
+"""
+        if 'premium' in calc_results:
+            pm = calc_results['premium']
+            context += f"""
+## Premium Analysis
+- **Total Premium**: ₹{pm['total']:,.2f}
+- **Number of policies with premium**: {pm['count']}
+- **Average Premium**: ₹{pm['average']:,.2f}
+- **Maximum Premium**: ₹{pm['max']:,.2f}
+- **Minimum Premium**: ₹{pm['min']:,.2f}
+"""
+        if 'policy_types' in calc_results:
+            context += "\n## Policy Types Breakdown\n"
+            for pt, count in sorted(calc_results['policy_types'].items(), key=lambda x: -x[1]):
+                context += f"- **{pt.title()}**: {count} policies\n"
+        return {
+            'context': context,
+            'calculations': calc_results,
+            'total_documents': len(all_metadata),
+            'sources': {m.get('doc_id'): m.get('document_title') for m in all_metadata}
+        }
+    def _handle_date_filter_query(self, user_id: str, bucket_id: str, query: str) -> dict:
+        """
+        Handle date-based filter queries.
+        Used for 'policies renewing in 2026', 'expiring this year', etc.
+        """
+        print(f"[DATE FILTER] Handling date query: {query[:50]}...")
+        # Extract year from query
+        target_year = date_parser.get_year_from_query(query)
+        # Get all metadata
+        all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
+        # Filter by date criteria
+        matching_docs = []
+        query_lower = query.lower()
+        for meta in all_metadata:
+            matches = False
+            if 'renew' in query_lower and target_year:
+                renewal_year = meta.get('renewal_year', 0)
+                # Also check end date
+                if not renewal_year and meta.get('policy_end_date'):
+                    end_date = date_parser.parse_date(meta.get('policy_end_date'))
+                    if end_date:
+                        renewal_year = end_date.year
+                if renewal_year == target_year:
+                    matches = True
+            elif 'expir' in query_lower and target_year:
+                end_date_str = meta.get('policy_end_date', '')
+                if end_date_str:
+                    end_date = date_parser.parse_date(end_date_str)
+                    if end_date and end_date.year == target_year:
+                        matches = True
+            elif 'start' in query_lower and target_year:
+                start_date_str = meta.get('policy_start_date', '')
+                if start_date_str:
+                    start_date = date_parser.parse_date(start_date_str)
+                    if start_date and start_date.year == target_year:
+                        matches = True
+            if matches:
+                matching_docs.append(meta)
+        print(f"[DATE FILTER] Found {len(matching_docs)} documents matching year {target_year}")
+        # Build context from matching documents
+        context_parts = []
+        context_parts.append(f"=== POLICIES MATCHING DATE CRITERIA (Year: {target_year}) ===\n")
+        context_parts.append(f"Found {len(matching_docs)} policies:\n")
+        for i, meta in enumerate(matching_docs, 1):
+            entry = f"""
+{i}. **{meta.get('document_title', 'Unknown')}**
+   - Insured: {meta.get('insured_name', 'N/A')}
+   - Policy Type: {meta.get('policy_type', 'N/A')}
+   - Start: {meta.get('policy_start_date', 'N/A')}
+   - End: {meta.get('policy_end_date', 'N/A')}
+   - Renewal: {meta.get('renewal_date', 'N/A')}
+   - Sum Insured: {meta.get('sum_insured', 'N/A')}
+"""
+            context_parts.append(entry)
+        return {
+            'context': '\n'.join(context_parts),
+            'matching_documents': matching_docs,
+            'target_year': target_year,
+            'total_matches': len(matching_docs),
+            'sources': {m.get('doc_id'): m.get('document_title') for m in matching_docs}
+        }
+    def _handle_metadata_query(self, user_id: str, bucket_id: str,
+                                query: str, parsed: dict) -> dict:
+        """
+        Handle queries using AI-parsed parameters for intelligent filtering and sorting.
+        This is the new AI-powered approach that replaces pattern-based routing.
+        Args:
+            user_id: User ID
+            bucket_id: Bucket ID
+            query: Original query text
+            parsed: AI-parsed parameters with filters, sort, limit, etc.
+        """
+        print(f"[METADATA QUERY] Using AI-parsed parameters: {parsed}")
+        # Get ALL metadata for this bucket
+        all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
+        total_before_filter = len(all_metadata)
+        print(f"[METADATA QUERY] Starting with {total_before_filter} documents")
+        # Apply AI-extracted filters
+        filters = parsed.get('filters', {})
+        for field, value in filters.items():
+            if value is None or value == '':
+                continue
+            if field == 'is_manufacturing' and value:
+                all_metadata = [m for m in all_metadata if m.get('is_manufacturing', False)]
+                print(f"[METADATA QUERY] Filtered by manufacturing: {len(all_metadata)} remaining")
+            elif field == 'industry':
+                all_metadata = [m for m in all_metadata
+                              if str(value).lower() in str(m.get('industry', '')).lower()]
+                print(f"[METADATA QUERY] Filtered by industry '{value}': {len(all_metadata)} remaining")
+            elif field == 'policy_type':
+                all_metadata = [m for m in all_metadata
+                              if str(value).lower() in str(m.get('policy_type', '')).lower()]
+                print(f"[METADATA QUERY] Filtered by policy_type '{value}': {len(all_metadata)} remaining")
+            elif field in ['city', 'state', 'insurer_name', 'insured_name', 'broker_name']:
+                all_metadata = [m for m in all_metadata
+                              if str(value).lower() in str(m.get(field, '')).lower()]
+                print(f"[METADATA QUERY] Filtered by {field} '{value}': {len(all_metadata)} remaining")
+            elif field == 'renewal_year':
+                target_year = int(value) if isinstance(value, (int, str)) else None
+                if target_year:
+                    all_metadata = [m for m in all_metadata if m.get('renewal_year') == target_year]
+                    print(f"[METADATA QUERY] Filtered by renewal_year {target_year}: {len(all_metadata)} remaining")
+        # Apply AI-extracted sorting
+        sort_by = parsed.get('sort_by')
+        if sort_by and sort_by in ['premium_amount', 'sum_insured', 'renewal_date', 'policy_start_date']:
+            reverse = parsed.get('sort_order', 'desc') == 'desc'
+            all_metadata.sort(key=lambda x: x.get(sort_by, 0) or 0, reverse=reverse)
+            print(f"[METADATA QUERY] Sorted by {sort_by} {'desc' if reverse else 'asc'}")
+        # Apply AI-extracted limit
+        limit = parsed.get('limit')
+        if limit and isinstance(limit, int) and limit > 0:
+            all_metadata = all_metadata[:limit]
+            print(f"[METADATA QUERY] Limited to top {limit}")
+        # Handle calculations
+        calc_result = None
+        if parsed.get('intent') == 'calculate' or parsed.get('calculation'):
+            calc_type = parsed.get('calculation', 'sum')
+            calc_field = parsed.get('calculation_field', 'premium_amount')
+            values = [m.get(calc_field, 0) or 0 for m in all_metadata]
+            if calc_type == 'sum':
+                calc_result = {'type': 'sum', 'field': calc_field, 'value': sum(values)}
+            elif calc_type == 'average' and values:
+                calc_result = {'type': 'average', 'field': calc_field, 'value': sum(values) / len(values)}
+            elif calc_type == 'max' and values:
+                calc_result = {'type': 'max', 'field': calc_field, 'value': max(values)}
+            elif calc_type == 'min' and values:
+                calc_result = {'type': 'min', 'field': calc_field, 'value': min(values)}
+            elif calc_type == 'count':
+                calc_result = {'type': 'count', 'field': 'documents', 'value': len(all_metadata)}
+        # Handle count intent
+        if parsed.get('intent') == 'count' and not calc_result:
+            calc_result = {'type': 'count', 'field': 'documents', 'value': len(all_metadata)}
+        # Build context
+        context_parts = []
+        # Add calculation result if any
+        if calc_result:
+            if calc_result['type'] == 'count':
+                context_parts.append(f"**Total Count: {calc_result['value']} documents**\n")
+            else:
+                context_parts.append(f"**{calc_result['type'].title()} of {calc_result['field']}: ₹{calc_result['value']:,.2f}**\n")
+        # Add filtered results summary
+        filter_desc = ', '.join(f"{k}={v}" for k, v in filters.items() if v)
+        if filter_desc:
+            context_parts.append(f"Filtered by: {filter_desc}")
+        context_parts.append(f"Showing {len(all_metadata)} of {total_before_filter} total documents\n")
+        # Build document list
+        if len(all_metadata) > 0:
+            context_parts.append("---\n**Matching Documents:**\n")
+            for i, meta in enumerate(all_metadata, 1):
+                # Use rich format for smaller sets, compact for larger
+                if len(all_metadata) <= 20:
+                    entry = f"""
+**{i}. {meta.get('document_title', 'Unknown')}**
+- Insured: {meta.get('insured_name', 'N/A')}
+- Insurer: {meta.get('insurer_name', 'N/A')}
+- Policy Type: {meta.get('policy_type', 'N/A')}
+- Industry: {meta.get('industry', 'N/A')}
+- Sum Insured: ₹{meta.get('sum_insured', 0):,.2f}
+- Premium: ₹{meta.get('premium_amount', 0):,.2f}
+- Renewal: {meta.get('renewal_date', 'N/A')}
+- Location: {meta.get('city', '')}, {meta.get('state', '')}
+"""
+                else:
+                    # Compact format for large sets
+                    entry = f"{i}. {meta.get('document_title', 'Unknown')} | {meta.get('insured_name', 'N/A')} | ₹{meta.get('premium_amount', 0):,.0f} | {meta.get('policy_type', 'N/A')}"
+                context_parts.append(entry)
+        context = '\n'.join(context_parts)
+        print(f"[METADATA QUERY] Final context: {len(context)} chars, {len(all_metadata)} docs")
+        return {
+            'context': context,
+            'metadata': all_metadata,
+            'total_documents': len(all_metadata),
+            'total_before_filter': total_before_filter,
+            'calculation': calc_result,
+            'parsed': parsed,
+            'sources': {m.get('doc_id'): m.get('document_title') for m in all_metadata}
+        }
+    def _stream_metadata_query(self, user_id: str, bucket_id: str,
+                                query: str, parsed: dict, chat_id: str = ""):
+        """
+        Stream responses for AI-parsed metadata queries.
+        Uses intelligent filtering, sorting, and calculations based on AI-extracted parameters.
+        This is the new AI-powered streaming handler that replaces pattern-based routing.
+        Args:
+            user_id: User ID
+            bucket_id: Bucket ID
+            query: Original query text
+            parsed: AI-parsed parameters with intent, filters, sort, limit, etc.
+            chat_id: Chat session ID for conversation storage
+        """
+        print(f"[METADATA STREAM] Handling AI-parsed query: intent={parsed.get('intent')}")
+        # Step 1: Get filtered, sorted, and calculated metadata using AI-parsed parameters
+        result = self._handle_metadata_query(user_id, bucket_id, query, parsed)
+        context = result.get('context', '')
+        sources = result.get('sources', {})
+        total_docs = result.get('total_documents', 0)
+        total_before = result.get('total_before_filter', 0)
+        calculation = result.get('calculation')
+        # Check if we have any data
+        if not context or total_docs == 0:
+            yield {
+                "type": "error",
+                "content": "No document metadata found. Please run the migration script to extract metadata from your documents."
+            }
+            return
+        # Send sources first
+        yield {
+            "type": "sources",
+            "sources": list(sources.keys()),
+            "source_files": list(sources.values())
+        }
+        # Step 2: Build AI prompt based on parsed intent
+        intent = parsed.get('intent', 'list')
+        if intent == 'count':
+            system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COUNT query.
+CRITICAL INSTRUCTIONS:
+1. The count has been computed: {total_docs} documents match the criteria.
+2. State the count clearly and directly.
+3. If filters were applied, mention what was filtered.
+4. Brief context about what was counted is helpful."""
+        elif intent == 'calculate':
+            calc_info = ""
+            if calculation:
+                calc_info = f"\nPre-computed: {calculation.get('type').title()} of {calculation.get('field')} = ₹{calculation.get('value', 0):,.2f}"
+            system_prompt = f"""You are Iribl AI, a document analysis assistant performing CALCULATIONS across {total_docs} documents.
+CRITICAL INSTRUCTIONS:
+1. The calculation results have been computed from {total_docs} documents.{calc_info}
+2. Present the numbers clearly with proper formatting (₹ for currency, commas for thousands).
+3. Explain what the numbers mean in business context.
+4. Include document counts to show the calculation scope.
+Present the data accurately - these are pre-computed from actual document metadata."""
+        elif intent == 'rank':
+            limit = parsed.get('limit', total_docs)
+            sort_by = parsed.get('sort_by', 'premium_amount')
+            sort_order = parsed.get('sort_order', 'desc')
+            system_prompt = f"""You are Iribl AI, a document analysis assistant answering a RANKING query.
+CRITICAL INSTRUCTIONS:
+1. You have been given the top {limit} documents sorted by {sort_by} ({sort_order}).
+2. Present them as a clear ranked list with the ranking number.
+3. Highlight the key metric ({sort_by}) for each item.
+4. Format nicely with headers, bold for values, and bullet points.
+5. Include all {limit} items - do not truncate."""
+        elif intent == 'compare':
+            system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COMPARISON query.
+CRITICAL INSTRUCTIONS:
+1. You have metadata for {total_docs} relevant documents.
+2. Create a clear comparison highlighting differences and similarities.
+3. Use tables or side-by-side format where helpful.
+4. Focus on the key metrics mentioned in the query.
+5. Be thorough but organized."""
+        else:  # list, summarize, or other
+            system_prompt = f"""You are Iribl AI, a document analysis assistant. You are answering a query that requires information from {total_docs} documents.
+CRITICAL INSTRUCTIONS:
+1. You have been given metadata for {total_docs} documents (from {total_before} total).
+2. Your answer must be COMPREHENSIVE - include ALL relevant items from the data provided.
+3. Format your response clearly with headers, bullet points, and bold text.
+4. For "list" queries, actually list ALL matching items with key details.
+5. Organize information logically (by type, by company, by date, etc.).
+6. For "summarize" queries, provide a concise overview with key statistics.
+Do NOT say information is missing - you have the filtered list. Do NOT ask for more documents."""
+        # Step 3: Build messages
+        messages = [{"role": "system", "content": system_prompt}]
+        user_message = f"""Based on the following document metadata and any calculations, answer my question.
+DOCUMENT DATA:
+{context}
+QUESTION: {query}
+Instructions: Provide a complete, well-formatted answer based on ALL the data above."""
+        messages.append({"role": "user", "content": user_message})
+        # Step 4: Stream response using DeepSeek or fallback
+        full_response = ""
+        chunk_count = 0
+        if self.use_deepseek:
+            print("[METADATA STREAM] Using DeepSeek for response")
+            for chunk in self._call_deepseek_streaming(messages):
+                if "error" in chunk:
+                    print(f"[METADATA STREAM] DeepSeek failed, falling back...")
+                    break
+                if "chunk" in chunk:
+                    full_response += chunk["chunk"]
+                    chunk_count += 1
+                    if chunk_count <= 3:
+                        print(f"[METADATA YIELD] Chunk {chunk_count}: {chunk['chunk'][:30]}...")
+                    yield {"type": "content", "content": chunk["chunk"]}
+        print(f"[METADATA STREAM] DeepSeek streaming done, yielded {chunk_count} chunks")
+        # Fallback to OpenRouter if DeepSeek failed or not available
+        if not full_response:
+            print("[METADATA STREAM] Falling back to OpenRouter")
+            for model_key in self.fallback_order:
+                try:
+                    for chunk in self._call_ai_model_streaming(model_key, messages):
+                        if "error" in chunk:
+                            continue
+                        if "chunk" in chunk:
+                            full_response += chunk["chunk"]
+                            chunk_count += 1
+                            yield {"type": "content", "content": chunk["chunk"]}
+                    if full_response:
+                        break
+                except Exception as e:
+                    print(f"[METADATA STREAM] Model {model_key} failed: {e}")
+                    continue
+        # Step 5: Store conversation
+        if full_response and chat_id:
+            try:
+                chroma_service.store_conversation(
+                    user_id=user_id,
+                    role="user",
+                    content=query,
+                    bucket_id=bucket_id or "",
+                    chat_id=chat_id
+                )
+                chroma_service.store_conversation(
+                    user_id=user_id,
+                    role="assistant",
+                    content=full_response,
+                    bucket_id=bucket_id or "",
+                    chat_id=chat_id
+                )
+            except Exception as e:
+                print(f"[METADATA STREAM] Failed to store conversation: {e}")
+        # Send done signal with metadata about the query
+        yield {
+            "type": "done",
+            "query_type": "metadata",
+            "intent": parsed.get('intent'),
+            "total_documents": total_docs,
+            "total_before_filter": total_before
+        }
+    def _stream_specialized_query(self, user_id: str, bucket_id: str,
+                                   query: str, query_type: str, chat_id: str = ""):
+        """
+        Stream responses for specialized queries (aggregate, calculation, date_filter).
+        Uses metadata/summaries instead of top-K chunk retrieval.
+        This preserves the existing flow for specific/comparison/general queries.
+        """
+        import time
+        print(f"[SPECIALIZED QUERY] Handling {query_type} query")
+        # Step 1: Get context from appropriate handler
+        if query_type == 'aggregate':
+            result = self._handle_aggregate_query(user_id, bucket_id, query)
+        elif query_type == 'calculation':
+            result = self._handle_calculation_query(user_id, bucket_id, query)
+        elif query_type == 'date_filter':
+            result = self._handle_date_filter_query(user_id, bucket_id, query)
+        else:
+            yield {"type": "error", "content": f"Unknown query type: {query_type}"}
+            return
+        context = result.get('context', '')
+        sources = result.get('sources', {})
+        total_docs = result.get('total_documents', result.get('total_matches', 0))
+        # Check if we have any data
+        if not context or total_docs == 0:
+            yield {
+                "type": "error",
+                "content": "No document metadata found. Please run the migration script to extract metadata from your documents."
+            }
+            return
+        # Send sources first
+        yield {
+            "type": "sources",
+            "sources": list(sources.keys()),
+            "source_files": list(sources.values())
+        }
+        # Step 2: Build AI prompt for specialized query
+        if query_type == 'aggregate':
+            system_prompt = f"""You are Iribl AI, a document analysis assistant. You are answering an AGGREGATE query that requires information from ALL {total_docs} documents.
+CRITICAL INSTRUCTIONS:
+1. You have been given metadata and summaries for ALL {total_docs} documents in the bucket.
+2. Your answer must be COMPREHENSIVE - include ALL relevant items from the data provided.
+3. Format your response clearly with headers, bullet points, and bold text.
+4. For "list all" queries, actually list ALL matching items.
+5. For "how many" queries, give exact counts.
+6. Organize information logically (by type, by company, by date, etc.).
+Do NOT say information is missing - you have the full list. Do NOT ask for more documents."""
+        elif query_type == 'calculation':
+            system_prompt = f"""You are Iribl AI, a document analysis assistant performing CALCULATIONS across {total_docs} documents.
+CRITICAL INSTRUCTIONS:
+1. The calculation results have already been computed from all documents.
+2. Present the numbers clearly with proper formatting (₹ for currency, commas for thousands).
+3. Explain what the numbers mean in business context.
+4. If asked for totals, provide grand totals.
+5. If asked for averages, provide averages with context.
+6. Include document counts to show the calculation scope.
+Present the data accurately - these are pre-computed from actual document metadata."""
+        elif query_type == 'date_filter':
+            total_matches = result.get('total_matches', 0)
+            target_year = result.get('target_year', 'N/A')
+            system_prompt = f"""You are Iribl AI, a document analysis assistant answering a DATE-BASED query.
+CRITICAL INSTRUCTIONS:
+1. You have been given {total_matches} policies matching the date criteria (year {target_year}).
+2. List ALL matching policies with their relevant dates.
+3. Format the response as a clear list with key details.
+4. If no matches found, say so explicitly.
+5. Include date-relevant details: start date, end date, renewal date.
+Present ALL matching documents - do not truncate the list."""
+        # Step 3: Build messages
+        messages = [{"role": "system", "content": system_prompt}]
+        # Add context and query
+        user_message = f"""Based on the following document metadata and calculations, answer my question.
+DOCUMENT DATA:
+{context}
+QUESTION: {query}
+Instructions: Provide a complete, well-formatted answer based on ALL the data above."""
+        messages.append({"role": "user", "content": user_message})
+        # Step 4: Stream response using DeepSeek or fallback
+        full_response = ""
+        chunk_count = 0
+        if self.use_deepseek:
+            print("[SPECIALIZED QUERY] Using DeepSeek for response")
+            for chunk in self._call_deepseek_streaming(messages):
+                if "error" in chunk:
+                    # Fallback to OpenRouter
+                    print(f"[SPECIALIZED QUERY] DeepSeek failed, falling back...")
+                    break
+                if "chunk" in chunk:
+                    full_response += chunk["chunk"]
+                    chunk_count += 1
+                    if chunk_count <= 3:
+                        print(f"[SPECIALIZED YIELD] Chunk {chunk_count}: {chunk['chunk'][:30]}...")
+                    yield {"type": "content", "content": chunk["chunk"]}
+        print(f"[SPECIALIZED QUERY] DeepSeek streaming done, yielded {chunk_count} chunks")
+        # Fallback to OpenRouter if DeepSeek failed or not available
+        if not full_response:
+            print("[SPECIALIZED QUERY] Falling back to OpenRouter")
+            for model_key in self.fallback_order:
+                try:
+                    for chunk in self._call_ai_model_streaming(model_key, messages):
+                        if "error" in chunk:
+                            continue
+                        if "chunk" in chunk:
+                            full_response += chunk["chunk"]
+                            chunk_count += 1
+                            yield {"type": "content", "content": chunk["chunk"]}
+                    if full_response:
+                        break
+                except Exception as e:
+                    print(f"[SPECIALIZED QUERY] Model {model_key} failed: {e}")
+                    continue
+        # Step 5: Store conversation
+        if full_response and chat_id:
+            try:
+                chroma_service.store_conversation(
+                    user_id=user_id,
+                    role="user",
+                    content=query,
+                    bucket_id=bucket_id or "",
+                    chat_id=chat_id
+                )
+                chroma_service.store_conversation(
+                    user_id=user_id,
+                    role="assistant",
+                    content=full_response,
+                    bucket_id=bucket_id or "",
+                    chat_id=chat_id
+                )
+            except Exception as e:
+                print(f"[SPECIALIZED QUERY] Failed to store conversation: {e}")
+        # Send done signal
+        yield {"type": "done", "query_type": query_type, "total_documents": total_docs}
+    def _build_conversation_context(self, history: list[dict], query: str) -> str:
+        """
+        Build a context summary from conversation history for pronoun resolution.
+        Extracts key entities and topics from recent messages.
+        """
+        if not history:
+            return ""
+        # Get last 4 messages (2 Q&A pairs)
+        recent = history[-4:] if len(history) >= 4 else history
+        context_parts = []
+        for msg in recent:
+            role = msg.get('role', 'user')
+            content = msg.get('content', '')[:500]  # Truncate long messages
+            if role == 'user':
+                context_parts.append(f"User asked: {content}")
+            else:
+                # Extract key info from assistant response (first 300 chars)
+                context_parts.append(f"Assistant answered about: {content[:300]}...")
+        if context_parts:
+            return "\n".join(context_parts)
+        return ""
+    def _build_graph_context(self, chunks: list[dict],
+                             user_id: str) -> list[dict]:
+        """
+        Build graph-based context from retrieved chunks
+        Expands context by including related chunks and document metadata
+        """
+        enhanced_chunks = []
+        seen_docs = set()
+        for chunk in chunks:
+            doc_id = chunk['doc_id']
+            # Get document metadata if not seen
+            if doc_id not in seen_docs:
+                seen_docs.add(doc_id)
+                # Get adjacent chunks for context
+                all_doc_chunks = chroma_service.get_document_chunks(doc_id)
+                # Find current chunk index
+                chunk_id = chunk['chunk_id']
+                current_idx = None
+                for i, c in enumerate(all_doc_chunks):
+                    if c['chunk_id'] == chunk_id:
+                        current_idx = i
+                        break
+                # Include surrounding chunks for graph context
+                if current_idx is not None:
+                    start_idx = max(0, current_idx - 1)
+                    end_idx = min(len(all_doc_chunks), current_idx + 2)
+                    for i in range(start_idx, end_idx):
+                        if all_doc_chunks[i]['chunk_id'] != chunk_id:
+                            enhanced_chunks.append({
+                                **all_doc_chunks[i],
+                                'doc_id': doc_id,
+                                'is_context': True
+                            })
+            enhanced_chunks.append({**chunk, 'is_context': False})
+        return enhanced_chunks
+    def _call_ai_model(self, model_key: str, messages: list[dict]) -> dict:
+        """Call AI model via OpenRouter"""
+        model_id = self.model_map.get(model_key)
+        if not model_id:
+            return {"success": False, "error": f"Unknown model: {model_key}"}
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "http://localhost:5000",
+            "X-Title": "NotebookLM Clone"
+        }
+        payload = {
+            "model": model_id,
+            "messages": messages,
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature
+        }
+        try:
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=headers,
+                json=payload,
+                timeout=self.timeout
+            )
+            if response.status_code == 200:
+                data = response.json()
+                text = data['choices'][0]['message']['content']
+                return {"success": True, "response": text, "model": model_key}
+            else:
+                return {
+                    "success": False,
+                    "error": f"API error: {response.status_code}"
+                }
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+    def _call_ai_model_streaming(self, model_key: str, messages: list[dict]):
+        """Call AI model with streaming - yields text chunks as they arrive"""
+        model_id = self.model_map.get(model_key)
+        if not model_id:
+            yield {"error": f"Unknown model: {model_key}"}
+            return
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "http://localhost:5000",
+            "X-Title": "NotebookLM Clone"
+        }
+        payload = {
+            "model": model_id,
+            "messages": messages,
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature,
+            "stream": True
+        }
+        try:
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=headers,
+                json=payload,
+                timeout=self.timeout,
+                stream=True
+            )
+            if response.status_code == 200:
+                for line in response.iter_lines():
+                    if line:
+                        line_text = line.decode('utf-8')
+                        if line_text.startswith('data: '):
+                            data_str = line_text[6:]
+                            if data_str.strip() == '[DONE]':
+                                break
+                            try:
+                                import json
+                                data = json.loads(data_str)
+                                delta = data.get('choices', [{}])[0].get('delta', {})
+                                content = delta.get('content', '')
+                                if content:
+                                    yield {"chunk": content, "model": model_key}
+                            except:
+                                pass
+            else:
+                yield {"error": f"API error: {response.status_code}"}
+        except Exception as e:
+            yield {"error": str(e)}
+    def _call_deepseek_streaming(self, messages: list[dict]):
+        """Call DeepSeek API with streaming - highly capable model"""
+        if not self.deepseek_api_key:
+            print("[DEEPSEEK] No API key configured")
+            yield {"error": "DeepSeek API key not configured"}
+            return
+        print(f"[DEEPSEEK] Calling model: {self.deepseek_model}")
+        headers = {
+            "Authorization": f"Bearer {self.deepseek_api_key}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": self.deepseek_model,
+            "messages": messages,
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature,
+            "stream": True
+        }
+        try:
+            import time
+            start = time.time()
+            response = requests.post(
+                f"{self.deepseek_base_url}/chat/completions",
+                headers=headers,
+                json=payload,
+                timeout=60,  # DeepSeek may need more time for complex queries
+                stream=True
+            )
+            print(f"[DEEPSEEK] Response status: {response.status_code} in {time.time()-start:.2f}s")
+            if response.status_code == 200:
+                chunk_count = 0
+                for line in response.iter_lines():
+                    if line:
+                        line_text = line.decode('utf-8')
+                        if line_text.startswith('data: '):
+                            data_str = line_text[6:]
+                            if data_str.strip() == '[DONE]':
+                                print(f"[DEEPSEEK] Streaming complete, yielded {chunk_count} chunks")
+                                break
+                            try:
+                                import json
+                                data = json.loads(data_str)
+                                delta = data.get('choices', [{}])[0].get('delta', {})
+                                content = delta.get('content', '')
+                                if content:
+                                    chunk_count += 1
+                                    if chunk_count <= 3:
+                                        print(f"[DEEPSEEK] Chunk {chunk_count}: {content[:50]}...")
+                                    yield {"chunk": content, "model": "deepseek"}
+                            except Exception as parse_error:
+                                print(f"[DEEPSEEK] Parse error: {parse_error}")
+                                pass
+                if chunk_count == 0:
+                    print(f"[DEEPSEEK] WARNING: No chunks received from stream")
+            else:
+                print(f"[DEEPSEEK] Error: {response.text[:200]}")
+                yield {"error": f"DeepSeek API error: {response.status_code}"}
+        except Exception as e:
+            print(f"[DEEPSEEK] Exception: {e}")
+            yield {"error": str(e)}
+    def query(self, user_id: str, query: str,
+              doc_ids: list[str] = None,
+              bucket_id: str = None,
+              conversation_history: list[dict] = None) -> dict:
+        """
+        Process a RAG query:
+        1. Search for relevant chunks (optionally filtered by bucket)
+        2. Filter by relevance threshold
+        3. Build graph context
+        4. Load persistent conversation memory
+        5. Generate AI response
+        6. Store conversation in memory
+        """
+        # Step 1: Retrieve relevant chunks
+        chunks = chroma_service.search_chunks(
+            user_id=user_id,
+            query=query,
+            doc_ids=doc_ids,
+            bucket_id=bucket_id,
+            top_k=self.top_k
+        )
+        # Step 2: Filter chunks by relevance threshold (lower distance = more relevant)
+        # If threshold filters everything, use original chunks
+        relevant_chunks = [
+            chunk for chunk in chunks
+            if chunk.get('distance', 0) < self.relevance_threshold
+        ]
+        # Fallback: if threshold is too strict, use top chunks anyway
+        if not relevant_chunks and chunks:
+            relevant_chunks = chunks[:5]  # Use top 5 most relevant
+        if not relevant_chunks:
+            # Store user question even if no answer
+            chroma_service.store_conversation(
+                user_id=user_id,
+                role="user",
+                content=query,
+                bucket_id=bucket_id or ""
+            )
+            no_info_response = "I don't have any relevant information in your documents to answer this question. Please upload some documents first or ask about a topic covered in your uploaded documents."
+            chroma_service.store_conversation(
+                user_id=user_id,
+                role="assistant",
+                content=no_info_response,
+                bucket_id=bucket_id or ""
+            )
+            return {
+                "success": True,
+                "response": no_info_response,
+                "sources": []
+            }
+        # Step 3: Skip graph expansion for speed - use chunks directly
+        enhanced_chunks = [{'doc_id': c['doc_id'], 'text': c['text'], 'is_context': False} for c in relevant_chunks]
+        # Step 4: Prepare context for AI with document sources
+        context_parts = []
+        sources = {}  # doc_id -> filename mapping
+        for chunk in enhanced_chunks:
+            doc_id = chunk['doc_id']
+            # Get document filename for source attribution
+            if doc_id not in sources:
+                doc_info = chroma_service.get_document(doc_id, user_id)
+                sources[doc_id] = doc_info['filename'] if doc_info else doc_id
+            # Include source in context for better attribution
+            source_label = f"[Source: {sources[doc_id]}]"
+            context_parts.append(f"{source_label}\n{chunk['text']}")
+        context = "\n\n---\n\n".join(context_parts)
+        # Step 5: Build messages with cross-document intelligence prompt
+        system_prompt = """You are Iribl AI, a document analysis assistant. You MUST follow these rules strictly:
+**CROSS-DOCUMENT INTELLIGENCE (CRITICAL):**
+1. SYNTHESIZE information from ALL relevant document sections
+2. If documents have CONFLICTING information, state both clearly
+3. Never confuse or mix up information between different documents
+**ACCURACY RULES:**
+1. ONLY answer using information from the DOCUMENT CONTEXT provided below
+2. NEVER use external knowledge, training data, or make assumptions
+3. If the answer is NOT in the documents, say: "This information is not found in your documents."
+**FORMATTING:**
+- Use **bold** for key terms and important values
+- Use headers (##) for multi-topic answers
+- Use bullet points with each item on its own line
+- For tables, use proper markdown: | col | col | with |---| separator
+**RESPONSE LENGTH:**
+- Simple questions: 2-4 sentences
+- Lists: Complete list from ALL documents
+- Analysis: Structured response with headers
+**IMPORTANT: Do NOT list document names or sources at the end of your response.**
+You are answering questions about the user's uploaded documents ONLY."""
+        messages = [{"role": "system", "content": system_prompt}]
+        # Step 6: Load minimal conversation history for speed
+        stored_history = chroma_service.get_conversation_history(
+            user_id=user_id,
+            bucket_id=bucket_id,
+            limit=self.max_history
+        )
+        # Add only last 4 messages for context (speed optimized)
+        for msg in stored_history[-4:]:
+            messages.append({
+                "role": msg['role'],
+                "content": msg['content']
+            })
+        # Also add any session-based conversation history if provided
+        if conversation_history:
+            for msg in conversation_history[-6:]:
+                # Avoid duplicates
+                if msg not in messages:
+                    messages.append(msg)
+        # Add current query with context
+        user_message = f"""Based on the following document sections, answer my question accurately.
+DOCUMENT SECTIONS:
+{context}
+QUESTION: {query}
+Instructions: Synthesize from multiple documents if relevant. Be detailed but concise. Do NOT mention document names or sources at the end."""
+        messages.append({"role": "user", "content": user_message})
+        # Step 7: Generate response with fallback
+        for model_key in self.fallback_order:
+            result = self._call_ai_model(model_key, messages)
+            if result['success']:
+                # Step 8: Store conversation in persistent memory
+                chroma_service.store_conversation(
+                    user_id=user_id,
+                    role="user",
+                    content=query,
+                    bucket_id=bucket_id or ""
+                )
+                chroma_service.store_conversation(
+                    user_id=user_id,
+                    role="assistant",
+                    content=result['response'],
+                    bucket_id=bucket_id or ""
+                )
+                return {
+                    "success": True,
+                    "response": result['response'],
+                    "model": result['model'],
+                    "sources": list(sources.keys()),
+                    "source_files": list(sources.values()),
+                    "chunks_used": len(enhanced_chunks),
+                    "chunks_filtered": len(chunks) - len(relevant_chunks)
+                }
+        return {
+            "success": False,
+            "error": "All AI models failed to generate a response"
+        }
+    def query_stream(self, user_id: str, query: str,
+                     doc_ids: list[str] = None,
+                     bucket_id: str = None,
+                     chat_id: str = ""):
+        """
+        Streaming version of query - yields response chunks as they arrive.
+        Returns generator for SSE streaming.
+        ENHANCED: Now uses AI-powered query parsing to understand intent, filters, sorting, and limits.
+        Routes to metadata handler for aggregate queries, regular RAG for specific document queries.
+        """
+        import time
+        # Step 0: AI-powered query parsing - understand intent and extract structured parameters
+        parsed = self._parse_query_with_ai(query)
+        print(f"[QUERY ROUTING] AI-parsed query: {parsed}")
+        # Route based on AI-parsed intent
+        # needs_metadata = True means query requires aggregate data across all documents
+        if parsed.get('needs_metadata', False):
+            yield from self._stream_metadata_query(user_id, bucket_id, query, parsed, chat_id)
+            return
+        # For all other query types (specific, comparison, followup, general),
+        # continue with existing top-K chunk retrieval logic
+        # Step 1: Expand query for better retrieval (handles "module 5" -> "module five", etc.)
+        expanded_queries = self._expand_query(query)
+        print(f"[DEBUG] Query expansion: {expanded_queries}")
+        # Step 1.5: Detect if user is asking about a specific document by name
+        user_docs = chroma_service.get_user_documents(user_id, bucket_id)
+        referenced_doc_ids = self._detect_document_reference(query, user_docs)
+        if referenced_doc_ids:
+            print(f"[DEBUG] Detected document reference in query: {referenced_doc_ids}")
+            # If user mentioned specific docs, prioritize those but also include others
+            if doc_ids is None:
+                doc_ids = referenced_doc_ids
+        # Step 2: Retrieve chunks using all query variations and merge unique results
+        t1 = time.time()
+        all_chunks = []
+        seen_chunk_ids = set()
+        for q in expanded_queries:
+            chunks = chroma_service.search_chunks(
+                user_id=user_id,
+                query=q,
+                doc_ids=doc_ids,
+                bucket_id=bucket_id,
+                top_k=self.top_k
+            )
+            for chunk in chunks:
+                chunk_id = chunk.get('chunk_id', chunk['text'][:50])
+                if chunk_id not in seen_chunk_ids:
+                    seen_chunk_ids.add(chunk_id)
+                    all_chunks.append(chunk)
+        # Sort by relevance (distance) and limit
+        all_chunks.sort(key=lambda x: x.get('distance', 0))
+        chunks = all_chunks[:self.top_k]
+        print(f"[TIMING] ChromaDB search with expansion: {time.time()-t1:.2f}s")
+        # Debug: Show what chunks we're getting
+        print(f"[DEBUG] Retrieved {len(chunks)} unique chunks from {len(expanded_queries)} queries:")
+        for i, c in enumerate(chunks[:5]):  # Show first 5
+            print(f"  Chunk {i+1} (dist={c.get('distance', 0):.3f}): {c['text'][:100]}...")
+        # Step 3: Use ALL retrieved chunks - do not filter aggressively
+        # For 64+ documents, we need comprehensive coverage
+        relevant_chunks = chunks  # Use all retrieved chunks
+        # Only apply minimal filtering if we have way too many chunks
+        if len(relevant_chunks) > 100:
+            # Keep only chunks with reasonable similarity
+            relevant_chunks = [c for c in chunks if c.get('distance', 0) < self.relevance_threshold]
+            if not relevant_chunks:
+                relevant_chunks = chunks[:80]  # Fallback to top 80
+        if not relevant_chunks:
+            yield {"type": "error", "content": "No relevant documents found. Please upload documents first."}
+            return
+        # Step 4: Build context with prominent document source labels for cross-document intelligence
+        t2 = time.time()
+        context_parts = []
+        sources = {}
+        for i, chunk in enumerate(relevant_chunks, 1):
+            doc_id = chunk['doc_id']
+            filename = chunk.get('filename', 'Document')
+            # Get filename from chroma if not in chunk
+            if filename == 'Document':
+                doc_info = chroma_service.get_document(doc_id, user_id)
+                if doc_info:
+                    filename = doc_info.get('filename', 'Document')
+            sources[doc_id] = filename
+            # Add prominent document source label with chunk number for cross-document intelligence
+            section = f"=== DOCUMENT: {filename} (Section {i}) ===\n{chunk['text']}"
+            context_parts.append(section)
+        context = "\n\n" + "\n\n".join(context_parts)
+        print(f"[TIMING] Context build: {time.time()-t2:.2f}s")
+        print(f"[DEBUG] Context length: {len(context)} chars, chunks: {len(relevant_chunks)}")
+        # Send sources first
+        yield {"type": "sources", "sources": list(sources.keys()), "source_files": list(sources.values())}
+        # Step 5: Load conversation history for this chat (CRITICAL FOR MEMORY)
+        stored_history = []
+        if chat_id:
+            try:
+                all_history = chroma_service.get_conversation_history(
+                    user_id=user_id,
+                    bucket_id=bucket_id,
+                    limit=50  # Get more, filter by chat_id
+                )
+                # Filter to only this chat's messages
+                stored_history = [msg for msg in all_history
+                                  if msg.get('chat_id', '') == chat_id or
+                                  (not msg.get('chat_id') and msg.get('bucket_id', '') == (bucket_id or ''))]
+                stored_history = stored_history[-self.max_history:]
+                print(f"[DEBUG] Loaded {len(stored_history)} history messages for chat {chat_id}")
+            except Exception as e:
+                print(f"[DEBUG] Failed to load history: {e}")
+        # Step 6: Detect query type and build conversation context
+        query_type = self._detect_query_type(query, stored_history)
+        conversation_context = self._build_conversation_context(stored_history, query)
+        print(f"[DEBUG] Query type: {query_type}, has conversation context: {bool(conversation_context)}")
+        # Get list of documents in bucket for cross-document queries
+        doc_list = ""
+        if query_type in ['cross_document', 'comparison']:
+            doc_names = list(sources.values())
+            if doc_names:
+                doc_list = f"\n\nDOCUMENTS IN THIS BUCKET: {', '.join(set(doc_names))}"
+        # Step 7: Build messages with PRODUCTION-GRADE conversational prompt
+        system_prompt = """You are Iribl AI, a smart document assistant. Be conversational, precise, and THOROUGH.
+## FINDING INFORMATION (CRITICAL)
+1. Search EVERY document section before saying something isn't there
+2. Look for ALL types of values: per-item amounts, TOTALS, AGGREGATES, counts, numbers of people/items
+3. Information may be phrased differently - "total sum insured", "aggregate SI", "Sum Insured" could all refer to different values
+4. When asked about "total" - look for aggregate/overall amounts, not per-unit amounts
+5. When asked "how many" - look for counts, numbers, quantities in the documents
+6. NEVER say "not mentioned" unless you've checked every single section and truly cannot find it
+## RESPONSE QUALITY
+1. NEVER start with preambles like "Based on a thorough review..." - just answer directly
+2. If user says "it", "this", "that" - refer to previous conversation for context
+3. Provide COMPLETE answers - include ALL relevant details, numbers, and figures
+4. When numbers exist - mention BOTH per-unit AND total/aggregate if available
+5. Format responses clearly with bold, bullets, and structure
+## ACCURACY RULES
+1. Only answer from the documents provided - never use external knowledge
+2. When asked about Person A, only give Person A's info - never mix up entities
+3. If documents conflict, state both versions
+## FORMATTING
+- **Bold** for names, numbers, key terms
+- Bullet points for lists (comprehensive, include all items)
+- Tables for comparisons
+- No document source lists at the end
+When asked about numbers/totals/counts - SEARCH THOROUGHLY and provide ALL relevant figures found in the documents."""
+        messages = [{"role": "system", "content": system_prompt}]
+        # Add conversation history for memory (CRITICAL for pronoun resolution)
+        for msg in stored_history:
+            messages.append({
+                "role": msg['role'],
+                "content": msg['content']
+            })
+        # Build user message with context injection for pronouns
+        context_injection = ""
+        if query_type == 'followup' and conversation_context:
+            context_injection = f"""
+CONVERSATION CONTEXT (use this to understand pronouns like "it", "this", "that"):
+{conversation_context}
+"""
+        user_message = f"""{context_injection}DOCUMENT SECTIONS (search ALL of these thoroughly):
+{context}{doc_list}
+QUESTION: {query}
+INSTRUCTIONS:
+- Answer directly and completely
+- Include ALL relevant numbers, totals, counts, and details from the documents
+- If this is a follow-up, use conversation history to understand what I'm referring to
+- For number questions: look for per-unit values, totals, aggregates, and counts - include all that are relevant"""
+        messages.append({"role": "user", "content": user_message})
+        # Step 6: Stream the response - Try DeepSeek first (highly capable), then OpenRouter
+        full_response = ""
+        model_used = None
+        # Try DeepSeek first if available
+        if self.use_deepseek:
+            for chunk_data in self._call_deepseek_streaming(messages):
+                if "error" in chunk_data:
+                    break  # Fall through to OpenRouter
+                if "chunk" in chunk_data:
+                    full_response += chunk_data["chunk"]
+                    model_used = chunk_data["model"]
+                    yield {"type": "chunk", "content": chunk_data["chunk"]}
+        # Fallback to OpenRouter if Groq didn't work
+        if not full_response:
+            for model_key in self.fallback_order:
+                had_response = False
+                for chunk_data in self._call_ai_model_streaming(model_key, messages):
+                    if "error" in chunk_data:
+                        break
+                    if "chunk" in chunk_data:
+                        had_response = True
+                        full_response += chunk_data["chunk"]
+                        model_used = chunk_data["model"]
+                        yield {"type": "chunk", "content": chunk_data["chunk"]}
+                if had_response:
+                    break
+        if full_response:
+            # Store conversation with chat_id for proper linking
+            chroma_service.store_conversation(user_id, "user", query, bucket_id or "", chat_id)
+            chroma_service.store_conversation(user_id, "assistant", full_response, bucket_id or "", chat_id)
+            yield {"type": "done", "model": model_used}
+        else:
+            yield {"type": "error", "content": "Failed to generate response"}
+    def clear_memory(self, user_id: str, bucket_id: str = None) -> bool:
+        """Clear conversation memory for a user"""
+        return chroma_service.clear_conversation(user_id, bucket_id)
+    def generate_summary(self, content: str, filename: str = "") -> dict:
+        """
+        Generate a short summary (2-3 sentences) of the document content.
+        Uses DeepSeek as primary, with OpenRouter fallback.
+        """
+        # Truncate content if too long (use first ~4000 chars for summary)
+        truncated_content = content[:4000] if len(content) > 4000 else content
+        summary_prompt = f"""Please provide a concise 2-3 sentence summary of the following document.
+Focus on the main topic, key points, and purpose of the document.
+Do not include any preamble like "This document..." - just state the summary directly.
+Document: {filename}
+Content:
+{truncated_content}
+Summary:"""
+        messages = [
+            {"role": "system", "content": "You are a document summarization assistant. Provide brief, accurate summaries in 2-3 sentences."},
+            {"role": "user", "content": summary_prompt}
+        ]
+        # Try DeepSeek first if available
+        if self.use_deepseek:
+            try:
+                import requests
+                headers = {
+                    "Authorization": f"Bearer {self.deepseek_api_key}",
+                    "Content-Type": "application/json"
+                }
+                payload = {
+                    "model": self.deepseek_model,
+                    "messages": messages,
+                    "max_tokens": 200,
+                    "temperature": 0.3
+                }
+                response = requests.post(
+                    f"{self.deepseek_base_url}/chat/completions",
+                    headers=headers,
+                    json=payload,
+                    timeout=30
+                )
+                if response.status_code == 200:
+                    data = response.json()
+                    text = data['choices'][0]['message']['content']
+                    return {
+                        "success": True,
+                        "summary": text.strip(),
+                        "model": "deepseek"
+                    }
+            except Exception as e:
+                print(f"[DEEPSEEK SUMMARY] Error: {e}")
+        # Fallback to OpenRouter models
+        for model_key in self.fallback_order:
+            result = self._call_ai_model(model_key, messages)
+            if result['success']:
+                return {
+                    "success": True,
+                    "summary": result['response'].strip(),
+                    "model": result['model']
+                }
+        return {
+            "success": False,
+            "error": "Failed to generate summary with all models",
+            "summary": f"Document: {filename}"  # Fallback summary
+        }
+# Singleton instance
+rag_service = RAGService()

static/css/styles.css ADDED Viewed

	@@ -0,0 +1,2567 @@

+/* ==================== CSS Variables & Root Styles ==================== */
+:root {
+    /* Dark Mode Color Palette */
+    --bg-darkest: #0a0a0a;
+    --bg-dark: #121212;
+    --bg-medium: #1a1a1a;
+    --bg-light: #242424;
+    --bg-lighter: #2d2d2d;
+    --bg-hover: #363636;
+    /* Accent Colors */
+    --accent-primary: #ffffff;
+    --accent-secondary: #e0e0e0;
+    --accent-muted: #888888;
+    /* Glass Effect */
+    --glass-bg: rgba(255, 255, 255, 0.03);
+    --glass-border: rgba(255, 255, 255, 0.08);
+    --glass-shadow: rgba(0, 0, 0, 0.5);
+    /* Text Colors */
+    --text-primary: #ffffff;
+    --text-secondary: rgba(255, 255, 255, 0.7);
+    --text-muted: rgba(255, 255, 255, 0.4);
+    /* Status Colors */
+    --success: #4ade80;
+    --error: #f87171;
+    --info: #60a5fa;
+    /* Spacing */
+    --radius-sm: 6px;
+    --radius-md: 10px;
+    --radius-lg: 16px;
+    --radius-xl: 24px;
+    /* Transitions */
+    --transition-fast: 0.15s ease;
+    --transition-smooth: 0.3s ease;
+    --transition-bounce: 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55);
+    /* Sidebar Width */
+    --sidebar-width: 300px;
+    --sidebar-collapsed: 50px;
+}
+/* ==================== Global Styles ==================== */
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+}
+html {
+    font-size: 16px;
+    scroll-behavior: smooth;
+    height: 100vh;
+    overflow: hidden;
+}
+body {
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+    background: var(--bg-darkest);
+    color: var(--text-primary);
+    height: 100vh;
+    overflow: hidden;
+}
+/* ==================== Glass Panels ==================== */
+.glass-panel {
+    background: var(--bg-dark);
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-lg);
+    box-shadow: 0 4px 20px var(--glass-shadow);
+}
+/* ==================== App Container ==================== */
+.app-container {
+    position: relative;
+    z-index: 10;
+    height: 100vh;
+    display: flex;
+    flex-direction: column;
+    overflow: hidden;
+}
+/* ==================== Main Layout ==================== */
+.main-content {
+    flex: 1;
+    display: flex;
+    padding: 1rem;
+    gap: 1rem;
+    height: calc(100vh - 0px);
+    /* Full viewport height */
+    min-height: 0;
+    /* Allow flex children to shrink */
+    overflow: hidden;
+}
+/* ==================== Dual Sidebars ==================== */
+.sidebar {
+    width: var(--sidebar-width);
+    height: 100%;
+    /* Fill available height */
+    display: flex;
+    flex-direction: column;
+    flex-shrink: 0;
+    position: relative;
+    transition: width var(--transition-smooth), opacity var(--transition-smooth);
+}
+.sidebar.collapsed {
+    width: var(--sidebar-collapsed);
+}
+.sidebar.collapsed .sidebar-content {
+    opacity: 0;
+    pointer-events: none;
+}
+.sidebar.collapsed .sidebar-toggle .toggle-icon {
+    transform: rotate(180deg);
+}
+.sidebar-content {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    gap: 0.75rem;
+    overflow-y: auto;
+    overflow-x: hidden;
+    transition: opacity var(--transition-smooth);
+}
+.sidebar-content::-webkit-scrollbar {
+    width: 4px;
+}
+.sidebar-content::-webkit-scrollbar-thumb {
+    background: var(--bg-hover);
+    border-radius: 2px;
+}
+/* Sidebar Toggle Button */
+.sidebar-toggle {
+    position: absolute;
+    top: 50%;
+    transform: translateY(-50%);
+    width: 24px;
+    height: 48px;
+    background: var(--bg-light);
+    border: 1px solid var(--glass-border);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    cursor: pointer;
+    z-index: 10;
+    transition: all var(--transition-fast);
+}
+.sidebar-toggle:hover {
+    background: var(--bg-hover);
+}
+.toggle-icon {
+    font-size: 0.7rem;
+    color: var(--text-muted);
+    transition: transform var(--transition-smooth);
+}
+.sidebar-left .sidebar-toggle {
+    right: -12px;
+    border-radius: 0 var(--radius-sm) var(--radius-sm) 0;
+}
+.sidebar-right .sidebar-toggle {
+    left: -12px;
+    border-radius: var(--radius-sm) 0 0 var(--radius-sm);
+}
+/* ==================== Sidebar Sections ==================== */
+.sidebar-section {
+    padding: 1rem;
+}
+.section-header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    cursor: pointer;
+    user-select: none;
+}
+.section-header:hover .collapse-icon {
+    color: var(--text-primary);
+}
+.sidebar-title {
+    font-size: 0.75rem;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+    color: var(--text-muted);
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    margin: 0;
+}
+.section-actions {
+    display: flex;
+    align-items: center;
+    gap: 0.25rem;
+}
+.collapse-icon {
+    font-size: 0.6rem;
+    color: var(--text-muted);
+    transition: transform var(--transition-smooth), color var(--transition-fast);
+}
+.collapsible.collapsed .collapse-icon {
+    transform: rotate(-90deg);
+}
+.section-body {
+    margin-top: 0.75rem;
+    max-height: 500px;
+    overflow: hidden;
+    transition: max-height var(--transition-smooth), opacity var(--transition-smooth), margin var(--transition-smooth);
+}
+.collapsible.collapsed .section-body {
+    max-height: 0;
+    opacity: 0;
+    margin-top: 0;
+}
+/* ==================== User Section ==================== */
+.user-section {
+    padding: 0.75rem 1rem !important;
+}
+.user-info-row {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+}
+.user-badge {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+}
+.user-avatar {
+    width: 32px;
+    height: 32px;
+    background: var(--bg-hover);
+    border-radius: 50%;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-weight: 600;
+    font-size: 0.85rem;
+}
+.user-details {
+    display: flex;
+    flex-direction: column;
+    gap: 0.1rem;
+}
+.user-details span:first-child {
+    font-size: 0.9rem;
+    font-weight: 500;
+}
+.user-role {
+    font-size: 0.7rem;
+    color: var(--text-muted);
+}
+/* ==================== Custom Animated Dropdown ==================== */
+.custom-select {
+    position: relative;
+    width: 100%;
+    margin-bottom: 0.75rem;
+}
+.custom-select.compact {
+    margin-bottom: 0;
+    width: auto;
+    min-width: 180px;
+}
+.select-trigger {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 0.65rem 1rem;
+    background: var(--bg-medium);
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-md);
+    cursor: pointer;
+    transition: all var(--transition-fast);
+}
+.select-trigger:hover {
+    background: var(--bg-light);
+    border-color: rgba(255, 255, 255, 0.15);
+}
+.custom-select.open .select-trigger {
+    border-color: rgba(255, 255, 255, 0.2);
+    border-radius: var(--radius-md) var(--radius-md) 0 0;
+}
+.select-value {
+    font-size: 0.85rem;
+    color: var(--text-secondary);
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.select-arrow {
+    font-size: 0.6rem;
+    color: var(--text-muted);
+    transition: transform var(--transition-smooth);
+    margin-left: 0.5rem;
+}
+.custom-select.open .select-arrow {
+    transform: rotate(180deg);
+}
+.select-options {
+    position: absolute;
+    top: 100%;
+    left: 0;
+    right: 0;
+    background: var(--bg-medium);
+    border: 1px solid var(--glass-border);
+    border-top: none;
+    border-radius: 0 0 var(--radius-md) var(--radius-md);
+    max-height: 0;
+    overflow: hidden;
+    opacity: 0;
+    z-index: 100;
+    transition: max-height var(--transition-smooth), opacity var(--transition-fast);
+    box-shadow: 0 8px 20px rgba(0, 0, 0, 0.4);
+}
+.custom-select.open .select-options {
+    max-height: 200px;
+    opacity: 1;
+    overflow-y: auto;
+}
+.select-options::-webkit-scrollbar {
+    width: 4px;
+}
+.select-options::-webkit-scrollbar-thumb {
+    background: var(--bg-hover);
+    border-radius: 2px;
+}
+.select-option {
+    padding: 0.6rem 1rem;
+    font-size: 0.85rem;
+    color: var(--text-secondary);
+    cursor: pointer;
+    transition: all var(--transition-fast);
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+}
+.select-option:hover {
+    background: var(--bg-light);
+    color: var(--text-primary);
+}
+.select-option.active {
+    background: var(--bg-hover);
+    color: var(--text-primary);
+}
+.select-option .option-icon {
+    font-size: 1rem;
+}
+/* ==================== Buckets List ==================== */
+.buckets-list {
+    display: flex;
+    flex-direction: column;
+    gap: 0.25rem;
+    max-height: 180px;
+    overflow-y: auto;
+}
+.bucket-item {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    padding: 0.5rem 0.75rem;
+    border-radius: var(--radius-sm);
+    cursor: pointer;
+    transition: all var(--transition-fast);
+}
+.bucket-item:hover {
+    background: var(--bg-light);
+}
+.bucket-item.active {
+    background: var(--bg-light);
+    border-left: 2px solid var(--accent-primary);
+}
+.bucket-name {
+    flex: 1;
+    font-size: 0.85rem;
+}
+.bucket-count {
+    font-size: 0.7rem;
+    color: var(--text-muted);
+    background: var(--bg-hover);
+    padding: 0.1rem 0.4rem;
+    border-radius: var(--radius-sm);
+}
+.bucket-delete {
+    opacity: 0;
+    padding: 0.2rem;
+    font-size: 0.7rem;
+    transition: opacity var(--transition-fast);
+}
+.bucket-item:hover .bucket-delete {
+    opacity: 1;
+}
+/* ==================== Upload Zone ==================== */
+.upload-zone {
+    padding: 1.5rem;
+    border: 1px dashed rgba(255, 255, 255, 0.15);
+    border-radius: var(--radius-md);
+    text-align: center;
+    cursor: pointer;
+    transition: all var(--transition-smooth);
+    background: var(--bg-medium);
+}
+.upload-zone:hover,
+.upload-zone.dragover {
+    border-color: rgba(255, 255, 255, 0.3);
+    background: var(--bg-light);
+    transform: scale(1.02);
+}
+.upload-icon {
+    font-size: 2rem;
+    margin-bottom: 0.5rem;
+}
+.upload-title {
+    font-size: 0.9rem;
+    font-weight: 600;
+    margin-bottom: 0.25rem;
+}
+.upload-subtitle {
+    font-size: 0.75rem;
+    color: var(--text-muted);
+}
+.progress-info {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    margin-bottom: 0.5rem;
+}
+.progress-bar {
+    height: 4px;
+    background: var(--bg-hover);
+    border-radius: 2px;
+    overflow: hidden;
+}
+.progress-fill {
+    height: 100%;
+    background: var(--accent-primary);
+    width: 0%;
+    transition: width var(--transition-smooth);
+}
+/* Cancel Upload Button */
+.btn-cancel-upload {
+    margin-top: 0.75rem;
+    width: 100%;
+    padding: 0.5rem 1rem;
+    background: rgba(248, 113, 113, 0.15);
+    color: var(--error);
+    border: 1px solid rgba(248, 113, 113, 0.3);
+    border-radius: var(--radius-md);
+    font-size: 0.8rem;
+    font-weight: 500;
+    cursor: pointer;
+    transition: all var(--transition-fast);
+}
+.btn-cancel-upload:hover {
+    background: rgba(248, 113, 113, 0.25);
+    border-color: rgba(248, 113, 113, 0.5);
+    transform: translateY(-1px);
+}
+/* ==================== Documents Section (Right Sidebar) ==================== */
+.documents-section {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    min-height: 0;
+    max-height: 50%;
+    transition: all var(--transition-smooth);
+}
+/* ==================== Chat History Section (Right Sidebar) ==================== */
+.chat-history-section {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    min-height: 0;
+    max-height: 50%;
+    transition: all var(--transition-smooth);
+}
+.chat-history-section.collapsed {
+    flex: 0 0 auto;
+    min-height: auto;
+}
+.chat-history-section.collapsed .section-header {
+    padding-bottom: 0;
+}
+.chat-history-section .section-header {
+    padding-bottom: 0.5rem;
+}
+.documents-section.collapsed {
+    flex: 0 0 auto;
+    min-height: auto;
+}
+.documents-section.collapsed .section-header {
+    padding-bottom: 0;
+}
+.documents-section .section-header {
+    padding-bottom: 0.5rem;
+}
+.documents-body {
+    flex: 1;
+    overflow: hidden;
+}
+.chat-history-body {
+    flex: 1;
+    overflow: hidden;
+}
+.documents-list {
+    height: 100%;
+    overflow-y: auto;
+    display: flex;
+    flex-direction: column;
+    gap: 0.4rem;
+    padding-right: 0.25rem;
+}
+.doc-count {
+    margin-left: auto;
+    font-size: 0.7rem;
+    opacity: 0.6;
+}
+.document-item {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    padding: 0.6rem 0.75rem;
+    background: transparent;
+    border: 1px solid transparent;
+    border-radius: var(--radius-md);
+    cursor: pointer;
+    transition: all var(--transition-fast);
+    position: relative;
+}
+.document-item:hover {
+    background: var(--bg-light);
+    border-color: var(--glass-border);
+}
+.doc-icon {
+    width: 32px;
+    height: 32px;
+    border-radius: var(--radius-sm);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-size: 1rem;
+    background: var(--bg-hover);
+}
+.doc-info {
+    flex: 1;
+    min-width: 0;
+}
+.doc-name {
+    font-size: 0.8rem;
+    font-weight: 500;
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.doc-meta {
+    font-size: 0.65rem;
+    color: var(--text-muted);
+    margin-top: 0.1rem;
+}
+.doc-view,
+.doc-delete {
+    opacity: 0;
+    padding: 0.25rem;
+    font-size: 0.8rem;
+    transition: opacity var(--transition-fast);
+}
+.document-item:hover .doc-view,
+.document-item:hover .doc-delete {
+    opacity: 1;
+}
+.doc-view:hover {
+    color: var(--info);
+}
+.doc-delete:hover {
+    color: var(--error);
+}
+/* ==================== Chat Container ==================== */
+.chat-container {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    min-width: 0;
+    min-height: 0;
+    /* Critical: allows flex child to shrink */
+    overflow: hidden;
+    height: 100%;
+    /* Ensure it takes full height */
+}
+/* ==================== Chat Bucket Filter ==================== */
+.chat-bucket-filter {
+    display: flex;
+    align-items: center;
+    gap: 0.75rem;
+    padding: 0.75rem 1.25rem;
+    border-bottom: 1px solid var(--glass-border);
+    background: var(--bg-dark);
+    flex-shrink: 0;
+    /* Prevent filter bar from shrinking */
+}
+.filter-label {
+    font-size: 0.8rem;
+    color: var(--text-muted);
+}
+/* New Chat Button */
+.btn-new-chat {
+    margin-left: auto;
+    background: var(--accent-primary);
+    color: var(--bg-darkest);
+    padding: 0.4rem 0.75rem;
+    font-size: 0.75rem;
+    font-weight: 600;
+    border-radius: var(--radius-md);
+    white-space: nowrap;
+    transition: all var(--transition-fast);
+}
+.btn-new-chat:hover {
+    background: var(--accent-secondary);
+    transform: translateY(-1px);
+}
+/* Clear Chat Button */
+.btn-clear-chat {
+    background: var(--bg-light);
+    color: var(--text-secondary);
+    padding: 0.4rem 0.75rem;
+    font-size: 0.75rem;
+    font-weight: 600;
+    border-radius: var(--radius-md);
+    border: 1px solid var(--glass-border);
+    white-space: nowrap;
+    transition: all var(--transition-fast);
+}
+.btn-clear-chat:hover {
+    background: var(--bg-hover);
+    color: var(--text-primary);
+    transform: translateY(-1px);
+}
+/* Chat History List */
+.chat-history-list {
+    display: flex;
+    flex-direction: column;
+    gap: 0.35rem;
+}
+.chat-history-item {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    padding: 0.5rem 0.6rem;
+    background: var(--bg-medium);
+    border: 1px solid transparent;
+    border-radius: var(--radius-md);
+    cursor: pointer;
+    transition: all var(--transition-fast);
+}
+.chat-history-item:hover {
+    background: var(--bg-light);
+    border-color: var(--glass-border);
+}
+.chat-history-item.active {
+    background: var(--bg-light);
+    border-color: var(--accent-muted);
+}
+.chat-history-icon {
+    font-size: 0.9rem;
+    flex-shrink: 0;
+}
+.chat-history-info {
+    flex: 1;
+    min-width: 0;
+}
+.chat-history-topic {
+    font-size: 0.8rem;
+    font-weight: 500;
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.chat-history-date {
+    font-size: 0.65rem;
+    color: var(--text-muted);
+    margin-top: 0.1rem;
+}
+.chat-history-delete {
+    opacity: 0;
+    padding: 0.2rem;
+    font-size: 0.75rem;
+    transition: opacity var(--transition-fast);
+}
+.chat-history-item:hover .chat-history-delete {
+    opacity: 1;
+}
+.chat-history-delete:hover {
+    color: var(--error);
+}
+/* ==================== Chat Messages ==================== */
+.chat-messages {
+    flex: 1;
+    overflow-y: auto;
+    overflow-x: hidden;
+    padding: 1rem;
+    display: flex;
+    flex-direction: column;
+    gap: 1rem;
+    min-height: 0;
+    /* Critical: allows scrolling to work */
+}
+/* Custom scrollbar for chat messages */
+.chat-messages::-webkit-scrollbar {
+    width: 6px;
+}
+.chat-messages::-webkit-scrollbar-track {
+    background: transparent;
+}
+.chat-messages::-webkit-scrollbar-thumb {
+    background: var(--bg-hover);
+    border-radius: 3px;
+}
+.chat-messages::-webkit-scrollbar-thumb:hover {
+    background: var(--bg-lighter);
+}
+.message {
+    display: flex;
+    gap: 0.75rem;
+    max-width: 85%;
+    animation: messageSlide 0.3s ease-out;
+}
+@keyframes messageSlide {
+    from {
+        opacity: 0;
+        transform: translateY(10px);
+    }
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+.message.user {
+    align-self: flex-end;
+    flex-direction: row-reverse;
+}
+.message-avatar {
+    width: 32px;
+    height: 32px;
+    border-radius: 50%;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    flex-shrink: 0;
+    font-size: 0.9rem;
+    background: var(--bg-light);
+    border: 1px solid var(--glass-border);
+}
+.message-content {
+    padding: 1rem 1.25rem;
+    border-radius: var(--radius-lg);
+    font-size: 0.9rem;
+    line-height: 1.6;
+}
+.message.user .message-content {
+    background: var(--accent-primary);
+    color: var(--bg-darkest);
+    border-bottom-right-radius: 4px;
+}
+.message.assistant .message-content {
+    background: linear-gradient(135deg, var(--bg-light) 0%, var(--bg-medium) 100%);
+    border: 1px solid var(--glass-border);
+    border-bottom-left-radius: 4px;
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
+}
+.message-sources {
+    margin-top: 0.5rem;
+    padding-top: 0.5rem;
+    border-top: 1px solid rgba(255, 255, 255, 0.1);
+    font-size: 0.75rem;
+    color: var(--text-muted);
+}
+.source-tag {
+    display: inline-block;
+    padding: 0.1rem 0.4rem;
+    background: var(--bg-hover);
+    border-radius: var(--radius-sm);
+    margin-left: 0.25rem;
+}
+/* ==================== Markdown Styling in Messages ==================== */
+.message-content h1,
+.message-content h2,
+.message-content h3,
+.message-content h4,
+.message-content .msg-header {
+    font-weight: 600;
+    color: var(--text-primary);
+    margin: 1.25rem 0 0.6rem 0;
+    line-height: 1.4;
+}
+.message-content h1 {
+    font-size: 1.25rem;
+    background: linear-gradient(90deg, var(--accent-primary), var(--accent-secondary));
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    padding-bottom: 0.5rem;
+    border-bottom: 2px solid rgba(168, 85, 247, 0.3);
+}
+.message-content h2 {
+    font-size: 1.1rem;
+    color: var(--accent-secondary);
+    border-bottom: 1px solid rgba(168, 85, 247, 0.2);
+    padding-bottom: 0.4rem;
+}
+.message-content h3 {
+    font-size: 1rem;
+    color: var(--info);
+}
+.message-content h4 {
+    font-size: 0.95rem;
+    font-weight: 600;
+    color: var(--text-secondary);
+    margin: 0.9rem 0 0.4rem 0;
+}
+.message-content h1:first-child,
+.message-content h2:first-child,
+.message-content h3:first-child,
+.message-content h4:first-child,
+.message-content .msg-header:first-child {
+    margin-top: 0;
+}
+.message-content p,
+.message-content .msg-para {
+    margin: 0.75rem 0;
+    line-height: 1.75;
+}
+.message-content p:first-child,
+.message-content .msg-para:first-child {
+    margin-top: 0;
+}
+/* ==================== Enhanced Lists ==================== */
+.message-content .formatted-list {
+    margin: 1rem 0;
+    padding-left: 0;
+    list-style: none;
+}
+.message-content ol.formatted-list {
+    counter-reset: item;
+}
+.message-content .formatted-list li {
+    position: relative;
+    padding: 0.5rem 0.75rem 0.5rem 2.25rem;
+    margin: 0.35rem 0;
+    background: rgba(255, 255, 255, 0.02);
+    border-radius: var(--radius-md);
+    border-left: 3px solid transparent;
+    line-height: 1.65;
+    transition: all 0.2s ease;
+}
+.message-content .formatted-list li:hover {
+    background: rgba(255, 255, 255, 0.04);
+}
+.message-content .formatted-list li.numbered {
+    border-left-color: var(--accent-primary);
+}
+.message-content .formatted-list li.bullet {
+    border-left-color: var(--info);
+}
+.message-content .formatted-list li.numbered .list-num {
+    position: absolute;
+    left: 0.6rem;
+    font-weight: 700;
+    color: var(--accent-primary);
+    font-size: 0.9rem;
+}
+.message-content .formatted-list li.bullet::before {
+    content: "▸";
+    position: absolute;
+    left: 0.75rem;
+    color: var(--info);
+    font-size: 0.85em;
+    font-weight: 600;
+}
+.message-content .formatted-list.sub-list {
+    margin: 0.5rem 0 0.5rem 1.5rem;
+}
+.message-content .formatted-list.sub-list li {
+    padding-left: 1.75rem;
+    background: transparent;
+    border-left: 2px solid rgba(168, 85, 247, 0.3);
+}
+.message-content .formatted-list.sub-list li::before {
+    content: "○";
+    position: absolute;
+    left: 0.5rem;
+    color: var(--accent-muted);
+    font-size: 0.7em;
+}
+/* Legacy list support */
+.message-content ul,
+.message-content ol {
+    margin: 0.75rem 0;
+    padding-left: 1.5rem;
+}
+.message-content li {
+    margin: 0.4rem 0;
+    padding-left: 0.5rem;
+    line-height: 1.6;
+}
+.message-content ul li::marker {
+    color: var(--info);
+}
+.message-content ol li::marker {
+    color: var(--accent-secondary);
+    font-weight: 600;
+}
+/* Nested lists */
+.message-content ul ul,
+.message-content ol ol,
+.message-content ul ol,
+.message-content ol ul {
+    margin: 0.25rem 0 0.25rem 1rem;
+}
+/* ==================== Premium Tables ==================== */
+.message-content .table-wrapper {
+    margin: 1rem 0;
+    border-radius: var(--radius-md);
+    overflow-x: auto;
+    overflow-y: hidden;
+    max-width: 100%;
+    box-shadow: 0 2px 12px rgba(0, 0, 0, 0.2);
+    border: 1px solid rgba(255, 255, 255, 0.08);
+}
+.message-content table {
+    width: 100%;
+    border-collapse: collapse;
+    font-size: 0.8rem;
+    background: rgba(0, 0, 0, 0.2);
+    table-layout: auto;
+}
+.message-content thead {
+    background: linear-gradient(135deg, rgba(168, 85, 247, 0.2) 0%, rgba(96, 165, 250, 0.15) 100%);
+}
+.message-content th {
+    padding: 0.6rem 0.75rem;
+    font-weight: 600;
+    color: var(--text-primary);
+    text-align: left;
+    border-bottom: 2px solid rgba(168, 85, 247, 0.3);
+    text-transform: uppercase;
+    font-size: 0.7rem;
+    letter-spacing: 0.3px;
+    white-space: nowrap;
+}
+.message-content td {
+    padding: 0.5rem 0.75rem;
+    border-bottom: 1px solid rgba(255, 255, 255, 0.05);
+    color: var(--text-secondary);
+    word-break: break-word;
+    max-width: 200px;
+}
+.message-content tbody tr {
+    transition: background 0.2s ease;
+}
+.message-content tbody tr:hover {
+    background: rgba(168, 85, 247, 0.08);
+}
+.message-content tbody tr:nth-child(even) {
+    background: rgba(255, 255, 255, 0.02);
+}
+.message-content tbody tr:nth-child(even):hover {
+    background: rgba(168, 85, 247, 0.08);
+}
+.message-content tbody tr:last-child td {
+    border-bottom: none;
+}
+/* ==================== Code Blocks ==================== */
+.message-content .code-block {
+    margin: 1rem 0;
+    padding: 1rem 1.25rem;
+    background: linear-gradient(135deg, rgba(0, 0, 0, 0.4) 0%, rgba(0, 0, 0, 0.3) 100%);
+    border: 1px solid rgba(255, 255, 255, 0.08);
+    border-radius: var(--radius-lg);
+    overflow-x: auto;
+    font-family: 'Consolas', 'Monaco', 'Fira Code', monospace;
+    font-size: 0.85rem;
+    line-height: 1.6;
+}
+.message-content .code-block code {
+    background: none;
+    padding: 0;
+    border: none;
+    color: var(--info);
+}
+/* Inline code */
+.message-content code.inline-code,
+.message-content code {
+    background: rgba(96, 165, 250, 0.12);
+    padding: 0.2rem 0.5rem;
+    border-radius: var(--radius-sm);
+    font-family: 'Consolas', 'Monaco', monospace;
+    font-size: 0.85em;
+    color: var(--info);
+    border: 1px solid rgba(96, 165, 250, 0.2);
+}
+/* Bold and emphasis */
+.message-content strong,
+.message-content b {
+    font-weight: 700;
+    color: var(--text-primary);
+}
+.message-content em,
+.message-content i {
+    font-style: italic;
+    color: var(--text-secondary);
+}
+/* ==================== Dividers ==================== */
+.message-content hr.divider {
+    border: none;
+    height: 1px;
+    background: linear-gradient(90deg, transparent, rgba(168, 85, 247, 0.4), transparent);
+    margin: 1.5rem 0;
+}
+/* ==================== Blockquotes ==================== */
+.message-content blockquote {
+    border-left: 4px solid var(--accent-primary);
+    margin: 1rem 0;
+    padding: 0.75rem 1.25rem;
+    background: linear-gradient(135deg, rgba(168, 85, 247, 0.08) 0%, rgba(96, 165, 250, 0.05) 100%);
+    border-radius: 0 var(--radius-md) var(--radius-md) 0;
+    font-style: italic;
+    color: var(--text-secondary);
+}
+/* ==================== Typing Indicator ==================== */
+.typing-indicator {
+    display: flex;
+    gap: 0.75rem;
+    padding: 1rem;
+}
+.typing-dots {
+    display: flex;
+    gap: 4px;
+    padding: 0.75rem 1rem;
+    background: var(--bg-light);
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-lg);
+}
+.typing-dot {
+    width: 6px;
+    height: 6px;
+    background: var(--text-muted);
+    border-radius: 50%;
+    animation: typingBounce 1.4s infinite ease-in-out;
+}
+.typing-dot:nth-child(1) {
+    animation-delay: 0s;
+}
+.typing-dot:nth-child(2) {
+    animation-delay: 0.2s;
+}
+.typing-dot:nth-child(3) {
+    animation-delay: 0.4s;
+}
+@keyframes typingBounce {
+    0%,
+    80%,
+    100% {
+        transform: scale(0.6);
+        opacity: 0.4;
+    }
+    40% {
+        transform: scale(1);
+        opacity: 1;
+    }
+}
+/* ==================== Chat Input ==================== */
+.chat-input-container {
+    padding: 1rem;
+    background: var(--bg-dark);
+    border-top: 1px solid var(--glass-border);
+}
+.chat-input-wrapper {
+    display: flex;
+    gap: 0.75rem;
+    align-items: flex-end;
+}
+.chat-input {
+    flex: 1;
+    padding: 0.75rem 1rem;
+    background: var(--bg-medium);
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-lg);
+    color: var(--text-primary);
+    font-size: 0.9rem;
+    resize: none;
+    max-height: 150px;
+    font-family: inherit;
+    transition: all var(--transition-fast);
+}
+.chat-input:focus {
+    outline: none;
+    border-color: rgba(255, 255, 255, 0.2);
+    background: var(--bg-light);
+}
+.chat-input::placeholder {
+    color: var(--text-muted);
+}
+.send-btn {
+    width: 44px;
+    height: 44px;
+    border-radius: 50%;
+    background: var(--accent-primary);
+    border: none;
+    color: var(--bg-darkest);
+    font-size: 1.1rem;
+    cursor: pointer;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    transition: all var(--transition-fast);
+}
+.send-btn:hover {
+    transform: scale(1.05);
+}
+.send-btn:disabled {
+    opacity: 0.3;
+    cursor: not-allowed;
+    transform: none;
+}
+/* Stop Generation Button */
+.stop-btn {
+    width: 44px;
+    height: 44px;
+    border-radius: 50%;
+    background: var(--error);
+    border: none;
+    color: white;
+    font-size: 1rem;
+    cursor: pointer;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    transition: all var(--transition-fast);
+    animation: pulse-stop 1.5s ease-in-out infinite;
+}
+.stop-btn:hover {
+    transform: scale(1.1);
+    background: #dc2626;
+}
+@keyframes pulse-stop {
+    0%, 100% {
+        box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4);
+    }
+    50% {
+        box-shadow: 0 0 0 8px rgba(239, 68, 68, 0);
+    }
+}
+/* ==================== Welcome Screen ==================== */
+.welcome-screen {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    justify-content: center;
+    text-align: center;
+    padding: 2rem;
+}
+.welcome-icon {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    margin-bottom: 1rem;
+}
+.welcome-title {
+    font-size: 1.5rem;
+    font-weight: 700;
+    margin-bottom: 0.5rem;
+}
+.welcome-subtitle {
+    font-size: 0.9rem;
+    color: var(--text-secondary);
+    max-width: 400px;
+}
+.welcome-features {
+    display: flex;
+    gap: 0.75rem;
+    margin-top: 1.5rem;
+}
+.feature-card {
+    padding: 1rem;
+    background: var(--bg-medium);
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-lg);
+    width: 100px;
+    text-align: center;
+    transition: all var(--transition-smooth);
+}
+.feature-card:hover {
+    transform: translateY(-3px);
+    background: var(--bg-light);
+}
+.feature-icon {
+    font-size: 1.5rem;
+    margin-bottom: 0.25rem;
+}
+.feature-title {
+    font-size: 0.75rem;
+    font-weight: 600;
+}
+/* ==================== Modal ==================== */
+.modal-overlay {
+    position: fixed;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: rgba(0, 0, 0, 0.8);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    z-index: 1000;
+    opacity: 0;
+    visibility: hidden;
+    transition: all var(--transition-smooth);
+}
+.modal-overlay.active {
+    opacity: 1;
+    visibility: visible;
+}
+.modal {
+    background: var(--bg-dark);
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-xl);
+    padding: 2rem;
+    width: 100%;
+    max-width: 400px;
+    transform: scale(0.95) translateY(20px);
+    transition: transform var(--transition-smooth);
+}
+.modal-overlay.active .modal {
+    transform: scale(1) translateY(0);
+}
+.modal-header {
+    text-align: center;
+    margin-bottom: 1.5rem;
+}
+.modal-logo {
+    width: 50px;
+    height: 50px;
+    background: var(--bg-light);
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-lg);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-size: 1.5rem;
+    margin: 0 auto 1rem;
+}
+.modal-title {
+    font-size: 1.25rem;
+    font-weight: 700;
+}
+.modal-subtitle {
+    font-size: 0.8rem;
+    color: var(--text-muted);
+    margin-top: 0.25rem;
+}
+/* ==================== Auth Tabs ==================== */
+.role-tabs,
+.auth-tabs {
+    display: flex;
+    background: var(--bg-medium);
+    border-radius: var(--radius-md);
+    padding: 4px;
+    margin-bottom: 1rem;
+}
+.role-tab,
+.auth-tab {
+    flex: 1;
+    padding: 0.65rem;
+    background: transparent;
+    border: none;
+    border-radius: var(--radius-sm);
+    color: var(--text-muted);
+    font-weight: 600;
+    font-size: 0.85rem;
+    cursor: pointer;
+    transition: all var(--transition-fast);
+}
+.role-tab.active,
+.auth-tab.active {
+    background: var(--bg-light);
+    color: var(--text-primary);
+}
+.auth-tab.active {
+    background: var(--accent-primary);
+    color: var(--bg-darkest);
+}
+/* ==================== Form Styles ==================== */
+.form-group {
+    margin-bottom: 1rem;
+}
+.form-label {
+    display: block;
+    font-size: 0.8rem;
+    font-weight: 500;
+    margin-bottom: 0.4rem;
+    color: var(--text-secondary);
+}
+.form-input {
+    width: 100%;
+    padding: 0.7rem 1rem;
+    background: var(--bg-medium);
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-md);
+    color: var(--text-primary);
+    font-size: 0.9rem;
+    transition: all var(--transition-fast);
+}
+.form-input:focus {
+    outline: none;
+    border-color: rgba(255, 255, 255, 0.2);
+    background: var(--bg-light);
+}
+.form-input::placeholder {
+    color: var(--text-muted);
+}
+.form-error {
+    font-size: 0.8rem;
+    color: var(--error);
+    margin-top: 0.4rem;
+}
+.auth-btn {
+    width: 100%;
+    padding: 0.8rem;
+    margin-top: 0.5rem;
+}
+.modal-actions {
+    display: flex;
+    gap: 0.75rem;
+    margin-top: 1rem;
+}
+.modal-actions .btn {
+    flex: 1;
+}
+/* ==================== Buttons ==================== */
+.btn {
+    position: relative;
+    padding: 0.6rem 1.2rem;
+    border: none;
+    border-radius: var(--radius-md);
+    font-size: 0.85rem;
+    font-weight: 600;
+    cursor: pointer;
+    transition: all var(--transition-fast);
+}
+.btn-primary {
+    background: var(--accent-primary);
+    color: var(--bg-darkest);
+}
+.btn-primary:hover {
+    background: var(--accent-secondary);
+    transform: translateY(-1px);
+}
+.btn-secondary {
+    background: var(--bg-light);
+    border: 1px solid var(--glass-border);
+    color: var(--text-primary);
+}
+.btn-secondary:hover {
+    background: var(--bg-hover);
+}
+.btn-ghost {
+    background: transparent;
+    color: var(--text-muted);
+    padding: 0.4rem;
+}
+.btn-ghost:hover {
+    color: var(--text-primary);
+    background: var(--bg-light);
+}
+.btn-logout {
+    background: rgba(248, 113, 113, 0.15);
+    color: #f87171;
+    padding: 0.35rem 0.75rem;
+    font-size: 0.75rem;
+    border: 1px solid rgba(248, 113, 113, 0.3);
+}
+.btn-logout:hover {
+    background: rgba(248, 113, 113, 0.25);
+    border-color: rgba(248, 113, 113, 0.5);
+}
+/* ==================== Document Viewer Modal ==================== */
+.doc-viewer-modal {
+    width: 90%;
+    max-width: 900px;
+    height: 80vh;
+    padding: 0;
+    display: flex;
+    flex-direction: column;
+}
+.doc-viewer-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    padding: 1rem 1.5rem;
+    border-bottom: 1px solid var(--glass-border);
+}
+.doc-viewer-header h3 {
+    font-size: 1rem;
+    font-weight: 600;
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.doc-viewer-content {
+    flex: 1;
+    overflow: auto;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    padding: 1rem;
+    background: var(--bg-medium);
+}
+.doc-viewer-content iframe,
+.doc-viewer-content img {
+    max-width: 100%;
+    max-height: 100%;
+}
+.doc-text-preview {
+    width: 100%;
+    height: 100%;
+    overflow: auto;
+    padding: 1rem;
+}
+.doc-text-preview pre {
+    white-space: pre-wrap;
+    word-wrap: break-word;
+    font-size: 0.85rem;
+    line-height: 1.6;
+    color: var(--text-secondary);
+}
+/* ==================== Empty State ==================== */
+.empty-state {
+    text-align: center;
+    padding: 2rem;
+    color: var(--text-muted);
+}
+.empty-state.small {
+    padding: 0.75rem;
+}
+.empty-icon {
+    font-size: 2rem;
+    margin-bottom: 0.5rem;
+}
+.empty-text {
+    font-size: 0.8rem;
+}
+.empty-state.small .empty-text {
+    font-size: 0.75rem;
+}
+/* ==================== Loading ==================== */
+.loading-spinner {
+    width: 18px;
+    height: 18px;
+    border: 2px solid rgba(255, 255, 255, 0.2);
+    border-top-color: var(--accent-primary);
+    border-radius: 50%;
+    animation: spin 0.7s linear infinite;
+}
+@keyframes spin {
+    to {
+        transform: rotate(360deg);
+    }
+}
+/* ==================== Toast ==================== */
+.toast-container {
+    position: fixed;
+    bottom: 1.5rem;
+    right: 1.5rem;
+    z-index: 2000;
+    display: flex;
+    flex-direction: column;
+    gap: 0.5rem;
+}
+.toast {
+    display: flex;
+    align-items: center;
+    gap: 0.75rem;
+    padding: 0.75rem 1rem;
+    background: var(--bg-dark);
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-md);
+    animation: toastSlide 0.3s ease-out;
+    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.4);
+}
+@keyframes toastSlide {
+    from {
+        opacity: 0;
+        transform: translateX(50px);
+    }
+    to {
+        opacity: 1;
+        transform: translateX(0);
+    }
+}
+.toast-message {
+    font-size: 0.85rem;
+}
+.toast-close {
+    background: none;
+    border: none;
+    color: var(--text-muted);
+    cursor: pointer;
+    padding: 0.25rem;
+}
+/* ==================== Utility Classes ==================== */
+.hidden {
+    display: none !important;
+}
+.flex {
+    display: flex;
+}
+.items-center {
+    align-items: center;
+}
+.gap-2 {
+    gap: 0.5rem;
+}
+.mt-3 {
+    margin-top: 0.75rem;
+}
+/* ==================== Document Summary Panel ==================== */
+.summary-panel {
+    position: relative;
+    background: linear-gradient(135deg, var(--bg-medium), var(--bg-light));
+    border: 1px solid var(--glass-border);
+    border-radius: var(--radius-lg);
+    padding: 1.25rem;
+    margin-bottom: 1rem;
+    animation: summarySlideIn 0.3s ease-out;
+    box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
+}
+@keyframes summarySlideIn {
+    from {
+        opacity: 0;
+        transform: translateY(-10px);
+    }
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+.summary-header {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    margin-bottom: 0.75rem;
+}
+.summary-icon {
+    font-size: 1.25rem;
+}
+.summary-title {
+    font-size: 0.9rem;
+    font-weight: 600;
+    color: var(--text-primary);
+    flex: 1;
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.summary-content {
+    padding-right: 1.5rem;
+}
+.summary-text {
+    font-size: 0.9rem;
+    line-height: 1.6;
+    color: var(--text-secondary);
+}
+.summary-close {
+    position: absolute;
+    top: 0.75rem;
+    right: 0.75rem;
+    background: none;
+    border: none;
+    color: var(--text-muted);
+    cursor: pointer;
+    padding: 0.25rem;
+    font-size: 0.9rem;
+    transition: color var(--transition-fast);
+    opacity: 0.6;
+}
+.summary-close:hover {
+    color: var(--text-primary);
+    opacity: 1;
+}
+/* ==================== Selected Document State ==================== */
+.document-item.selected {
+    background: var(--bg-light);
+    border-color: var(--accent-primary);
+    box-shadow: 0 0 0 1px rgba(255, 255, 255, 0.15);
+}
+.document-item.selected::before {
+    content: '';
+    position: absolute;
+    left: 0;
+    top: 0;
+    bottom: 0;
+    width: 3px;
+    background: var(--accent-primary);
+    border-radius: var(--radius-sm) 0 0 var(--radius-sm);
+}
+.document-item.selected .doc-name {
+    color: var(--text-primary);
+    font-weight: 600;
+}
+/* ==================== Mobile Navigation Bar ==================== */
+.mobile-nav {
+    display: none;
+    position: fixed;
+    bottom: 0;
+    left: 0;
+    right: 0;
+    height: 70px;
+    background: var(--bg-dark);
+    border-top: 1px solid var(--glass-border);
+    z-index: 1000;
+    justify-content: space-around;
+    align-items: center;
+    padding: 0 1rem;
+    padding-bottom: env(safe-area-inset-bottom, 0);
+    box-shadow: 0 -4px 20px rgba(0, 0, 0, 0.3);
+}
+.mobile-nav-btn {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    justify-content: center;
+    gap: 0.25rem;
+    background: transparent;
+    border: none;
+    color: var(--text-muted);
+    padding: 0.5rem 1.5rem;
+    border-radius: var(--radius-md);
+    cursor: pointer;
+    transition: all var(--transition-fast);
+    min-width: 70px;
+}
+.mobile-nav-btn:active {
+    transform: scale(0.95);
+}
+.mobile-nav-btn.active {
+    color: var(--accent-primary);
+}
+.mobile-nav-btn .nav-icon {
+    font-size: 1.5rem;
+    line-height: 1;
+}
+.mobile-nav-btn .nav-label {
+    font-size: 0.65rem;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+}
+/* ==================== Mobile Backdrop ==================== */
+.mobile-backdrop {
+    display: none;
+    position: fixed;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: rgba(0, 0, 0, 0.7);
+    z-index: 500;
+    opacity: 0;
+    visibility: hidden;
+    transition: opacity var(--transition-smooth), visibility var(--transition-smooth);
+}
+.mobile-backdrop.active {
+    opacity: 1;
+    visibility: visible;
+}
+/* ==================== Tablet Breakpoint (768px - 1024px) ==================== */
+@media screen and (max-width: 1024px) {
+    :root {
+        --sidebar-width: 260px;
+    }
+    .main-content {
+        padding: 0.75rem;
+        gap: 0.75rem;
+    }
+    .sidebar-section {
+        padding: 0.75rem;
+    }
+    .chat-bucket-filter {
+        padding: 0.6rem 1rem;
+        gap: 0.5rem;
+    }
+    .welcome-title {
+        font-size: 1.25rem;
+    }
+    .welcome-subtitle {
+        font-size: 0.85rem;
+    }
+}
+/* ==================== Mobile Breakpoint (< 768px) ==================== */
+@media screen and (max-width: 768px) {
+    :root {
+        --sidebar-width: 85vw;
+        --sidebar-collapsed: 0px;
+    }
+    /* Show mobile navigation */
+    .mobile-nav {
+        display: flex;
+    }
+    .mobile-backdrop {
+        display: block;
+    }
+    /* Ensure app container is above backdrop */
+    .app-container {
+        z-index: 600;
+    }
+    /* Adjust main layout for mobile */
+    .main-content {
+        padding: 0;
+        gap: 0;
+        flex-direction: column;
+        height: 100vh;
+        overflow: hidden;
+    }
+    /* ===== Off-Canvas Sidebars ===== */
+    .sidebar {
+        position: fixed;
+        top: 0;
+        bottom: 70px;
+        /* Above mobile nav */
+        width: var(--sidebar-width);
+        max-width: 320px;
+        z-index: 900;
+        transition: transform var(--transition-smooth);
+        border-radius: 0;
+        background: var(--bg-dark);
+        /* Solid background to prevent blur */
+        box-shadow: 0 0 30px rgba(0, 0, 0, 0.5);
+    }
+    .sidebar .sidebar-content {
+        opacity: 1;
+        pointer-events: auto;
+        padding: 1rem;
+        padding-bottom: 2rem;
+    }
+    .sidebar-left {
+        left: 0;
+        transform: translateX(-100%);
+        border-right: 1px solid var(--glass-border);
+    }
+    .sidebar-left.mobile-open {
+        transform: translateX(0);
+    }
+    .sidebar-right {
+        right: 0;
+        transform: translateX(100%);
+        border-left: 1px solid var(--glass-border);
+    }
+    .sidebar-right.mobile-open {
+        transform: translateX(0);
+    }
+    /* Hide desktop sidebar toggles on mobile */
+    .sidebar-toggle {
+        display: none;
+    }
+    /* ===== Chat Container Full Width ===== */
+    .chat-container {
+        border-radius: 0;
+        border: none;
+        height: calc(100vh - 70px);
+        /* Full height minus mobile nav */
+        display: flex;
+        flex-direction: column;
+    }
+    /* ===== Simplified Chat Header ===== */
+    .chat-bucket-filter {
+        padding: 0.75rem;
+        gap: 0.5rem;
+        flex-wrap: wrap;
+    }
+    .filter-label {
+        display: none;
+    }
+    .chat-bucket-filter .custom-select.compact {
+        flex: 1;
+        min-width: 120px;
+    }
+    .btn-new-chat,
+    .btn-clear-chat {
+        padding: 0.5rem 0.6rem;
+        font-size: 0.7rem;
+    }
+    .btn-new-chat {
+        margin-left: 0;
+    }
+    /* ===== Chat Messages ===== */
+    .chat-messages {
+        padding: 0.75rem;
+        gap: 0.75rem;
+        flex: 1;
+        min-height: 0;
+    }
+    .message {
+        max-width: 92%;
+    }
+    .message-content {
+        padding: 0.875rem 1rem;
+        font-size: 0.875rem;
+    }
+    .message-avatar {
+        width: 28px;
+        height: 28px;
+        font-size: 0.8rem;
+    }
+    /* ===== Chat Input ===== */
+    .chat-input-container {
+        padding: 0.75rem;
+        margin-bottom: 70px;
+        /* Space for mobile nav */
+        background: var(--bg-dark);
+        border-top: 1px solid var(--glass-border);
+        flex-shrink: 0;
+    }
+    .chat-input {
+        font-size: 16px;
+        /* Prevents iOS zoom on focus */
+        padding: 0.875rem 1rem;
+    }
+    .send-btn {
+        width: 48px;
+        height: 48px;
+        font-size: 1.2rem;
+    }
+    /* ===== Welcome Screen ===== */
+    .welcome-screen {
+        padding: 1.5rem 1rem;
+    }
+    .welcome-icon img {
+        width: 160px !important;
+    }
+    .welcome-title {
+        font-size: 1.2rem;
+    }
+    .welcome-subtitle {
+        font-size: 0.85rem;
+        max-width: 300px;
+    }
+    .welcome-features {
+        flex-wrap: wrap;
+        justify-content: center;
+    }
+    .feature-card {
+        width: 85px;
+        padding: 0.75rem;
+    }
+    .feature-icon {
+        font-size: 1.25rem;
+    }
+    .feature-title {
+        font-size: 0.7rem;
+    }
+    /* ===== Modal Responsiveness ===== */
+    .modal {
+        width: 95%;
+        max-width: none;
+        margin: 1rem;
+        padding: 1.5rem;
+        max-height: 90vh;
+        overflow-y: auto;
+    }
+    .modal-title {
+        font-size: 1.1rem;
+    }
+    .modal-subtitle {
+        font-size: 0.75rem;
+    }
+    .form-input {
+        font-size: 16px;
+        /* Prevents iOS zoom */
+        padding: 0.875rem 1rem;
+    }
+    .auth-btn {
+        padding: 1rem;
+        font-size: 0.9rem;
+    }
+    .modal-actions {
+        flex-direction: column;
+    }
+    /* ===== Document Viewer Modal ===== */
+    .doc-viewer-modal {
+        width: 100%;
+        height: 100%;
+        max-width: 100%;
+        border-radius: 0;
+    }
+    .doc-viewer-header {
+        padding: 0.875rem 1rem;
+    }
+    .doc-viewer-header h3 {
+        font-size: 0.9rem;
+    }
+    /* ===== Sidebar Content Adjustments ===== */
+    .documents-section,
+    .chat-history-section {
+        max-height: none;
+        flex: 0 0 auto;
+        min-height: 0;
+    }
+    .section-body {
+        max-height: 80vh;
+        /* Allow large lists to expand fully on mobile */
+    }
+    .sidebar-section {
+        padding: 1rem;
+    }
+    .sidebar-title {
+        font-size: 0.8rem;
+    }
+    .user-section {
+        padding: 1rem !important;
+    }
+    .user-avatar {
+        width: 36px;
+        height: 36px;
+        font-size: 0.9rem;
+    }
+    .user-details span:first-child {
+        font-size: 0.95rem;
+    }
+    .user-role {
+        font-size: 0.75rem;
+    }
+    .btn-logout {
+        padding: 0.4rem 0.8rem;
+        font-size: 0.8rem;
+    }
+    /* ===== Upload Zone ===== */
+    .upload-zone {
+        padding: 1.25rem;
+    }
+    .upload-icon {
+        font-size: 1.75rem;
+    }
+    .upload-title {
+        font-size: 0.85rem;
+    }
+    /* ===== Document & Chat History Items ===== */
+    .document-item {
+        padding: 0.75rem;
+    }
+    .doc-icon {
+        width: 36px;
+        height: 36px;
+    }
+    .doc-name {
+        font-size: 0.85rem;
+    }
+    .doc-view,
+    .doc-delete {
+        opacity: 1;
+        /* Always visible on mobile */
+        padding: 0.5rem;
+        font-size: 0.9rem;
+    }
+    .chat-history-item {
+        padding: 0.75rem;
+    }
+    .chat-history-delete {
+        opacity: 1;
+        /* Always visible on mobile */
+    }
+    /* ===== Bucket Items ===== */
+    .bucket-item {
+        padding: 0.75rem;
+    }
+    .bucket-name {
+        font-size: 0.9rem;
+    }
+    .bucket-delete {
+        opacity: 1;
+        /* Always visible on mobile */
+    }
+    /* ===== Custom Dropdowns ===== */
+    .select-trigger {
+        padding: 0.75rem 1rem;
+    }
+    .select-value {
+        font-size: 0.9rem;
+    }
+    .select-option {
+        padding: 0.875rem 1rem;
+        font-size: 0.9rem;
+    }
+    /* ===== Toast Notifications ===== */
+    .toast-container {
+        bottom: 80px;
+        /* Above mobile nav */
+        left: 1rem;
+        right: 1rem;
+    }
+    .toast {
+        width: 100%;
+    }
+    /* ===== Summary Panel ===== */
+    .summary-panel {
+        padding: 1rem;
+        margin-bottom: 0.75rem;
+    }
+    .summary-title {
+        font-size: 0.85rem;
+    }
+    .summary-text {
+        font-size: 0.85rem;
+    }
+    /* ===== Tables in Messages ===== */
+    .message-content .table-wrapper {
+        margin: 1rem -0.5rem;
+        border-radius: var(--radius-md);
+        overflow-x: auto;
+    }
+    .message-content table {
+        font-size: 0.8rem;
+        min-width: 400px;
+    }
+    .message-content th,
+    .message-content td {
+        padding: 0.6rem 0.75rem;
+    }
+    /* ===== Code Blocks ===== */
+    .message-content .code-block {
+        padding: 0.875rem 1rem;
+        font-size: 0.8rem;
+        margin: 0.75rem -0.25rem;
+    }
+    /* ===== Lists ===== */
+    .message-content .formatted-list li {
+        padding: 0.5rem 0.5rem 0.5rem 2rem;
+    }
+}
+/* ==================== Small Mobile (< 480px) ==================== */
+@media screen and (max-width: 480px) {
+    .mobile-nav {
+        height: 65px;
+        padding: 0 0.5rem;
+    }
+    .mobile-nav-btn {
+        min-width: 60px;
+        padding: 0.4rem 1rem;
+    }
+    .mobile-nav-btn .nav-icon {
+        font-size: 1.35rem;
+    }
+    .mobile-nav-btn .nav-label {
+        font-size: 0.6rem;
+    }
+    .sidebar {
+        max-width: 100%;
+        width: 100%;
+    }
+    .chat-bucket-filter {
+        padding: 0.6rem;
+    }
+    .message {
+        max-width: 95%;
+    }
+    .message-content {
+        padding: 0.75rem 0.875rem;
+        font-size: 0.85rem;
+    }
+    .message-avatar {
+        width: 26px;
+        height: 26px;
+    }
+    .welcome-icon img {
+        width: 140px !important;
+    }
+    .welcome-title {
+        font-size: 1.1rem;
+    }
+    .welcome-subtitle {
+        font-size: 0.8rem;
+    }
+    .modal {
+        padding: 1.25rem;
+        margin: 0.5rem;
+    }
+    .role-tabs,
+    .auth-tabs {
+        padding: 3px;
+    }
+    .role-tab,
+    .auth-tab {
+        padding: 0.6rem 0.5rem;
+        font-size: 0.8rem;
+    }
+    .form-label {
+        font-size: 0.75rem;
+    }
+}
+/* ==================== Landscape Mobile ==================== */
+@media screen and (max-width: 768px) and (orientation: landscape) {
+    .mobile-nav {
+        height: 55px;
+    }
+    .main-content {
+        padding-bottom: 55px;
+    }
+    .sidebar {
+        bottom: 55px;
+    }
+    .mobile-nav-btn .nav-label {
+        display: none;
+    }
+    .mobile-nav-btn .nav-icon {
+        font-size: 1.5rem;
+    }
+    .welcome-screen {
+        padding: 1rem;
+        flex-direction: row;
+        gap: 2rem;
+    }
+    .welcome-icon,
+    .welcome-title,
+    .welcome-subtitle {
+        margin: 0;
+    }
+}
+/* ==================== Touch Device Optimizations ==================== */
+@media (hover: none) and (pointer: coarse) {
+    /* Larger touch targets */
+    .btn {
+        min-height: 44px;
+    }
+    .btn-ghost {
+        min-width: 44px;
+        min-height: 44px;
+    }
+    .document-item,
+    .bucket-item,
+    .chat-history-item {
+        min-height: 48px;
+    }
+    /* Remove hover effects that don't work on touch */
+    .upload-zone:hover {
+        transform: none;
+    }
+    .feature-card:hover {
+        transform: none;
+    }
+    /* Always show action buttons */
+    .doc-view,
+    .doc-delete,
+    .bucket-delete,
+    .chat-history-delete {
+        opacity: 1;
+    }
+}
+/* ==================== Safe Area Support (iPhone X+) ==================== */
+@supports (padding-bottom: env(safe-area-inset-bottom)) {
+    .mobile-nav {
+        padding-bottom: env(safe-area-inset-bottom);
+        height: calc(70px + env(safe-area-inset-bottom));
+    }
+    @media screen and (max-width: 768px) {
+        .main-content {
+            padding-bottom: calc(70px + env(safe-area-inset-bottom));
+        }
+        .sidebar {
+            bottom: calc(70px + env(safe-area-inset-bottom));
+        }
+    }
+}
+/* ==================== Reduced Motion ==================== */
+@media (prefers-reduced-motion: reduce) {
+    .sidebar,
+    .mobile-backdrop,
+    .modal,
+    .message {
+        transition: none;
+    }
+    .typing-dot {
+        animation: none;
+    }
+}

static/images/WhatsApp Image 2025-12-23 at 5.10.00 PM.jpeg ADDED Viewed

static/index.html ADDED Viewed

	@@ -0,0 +1,411 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <meta name="description" content="AI-powered document intelligence platform with bucket organization.">
+    <title>Iribl AI - Document Intelligence</title>
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap" rel="stylesheet">
+    <link rel="stylesheet" href="/css/styles.css">
+    <link rel="icon" type="image/svg+xml"
+        href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🧠</text></svg>">
+</head>
+<body>
+    <!-- Toast Container -->
+    <div class="toast-container" id="toastContainer"></div>
+    <!-- Mobile Backdrop Overlay -->
+    <div class="mobile-backdrop" id="mobileBackdrop"></div>
+    <!-- Mobile Bottom Navigation -->
+    <nav class="mobile-nav" id="mobileNav">
+        <button class="mobile-nav-btn" id="mobileLeftToggle" title="Menu">
+            <span class="nav-icon">☰</span>
+            <span class="nav-label">Menu</span>
+        </button>
+        <button class="mobile-nav-btn active" id="mobileChatToggle" title="Chat">
+            <span class="nav-icon">💬</span>
+            <span class="nav-label">Chat</span>
+        </button>
+        <button class="mobile-nav-btn" id="mobileRightToggle" title="Documents">
+            <span class="nav-icon">📚</span>
+            <span class="nav-label">Docs</span>
+        </button>
+    </nav>
+    <!-- Auth Modal -->
+    <div class="modal-overlay" id="authModal">
+        <div class="modal glass-panel">
+            <div class="modal-header">
+                <div class="modal-logo">🧠</div>
+                <h2 class="modal-title">Welcome to Iribl AI</h2>
+                <p class="modal-subtitle">Your intelligent document companion</p>
+            </div>
+            <div class="role-tabs">
+                <button class="role-tab active" data-role="admin">👔 Admin</button>
+                <button class="role-tab" data-role="employee">Employee</button>
+            </div>
+            <div class="auth-tabs" id="authTabs">
+                <button class="auth-tab active" data-tab="login">Sign In</button>
+                <button class="auth-tab" data-tab="register">Sign Up</button>
+            </div>
+            <form id="loginForm" class="auth-form">
+                <div class="form-group">
+                    <label class="form-label">Username</label>
+                    <input type="text" class="form-input" name="username" placeholder="Enter your username" required>
+                </div>
+                <div class="form-group">
+                    <label class="form-label">Password</label>
+                    <input type="password" class="form-input" name="password" placeholder="Enter your password"
+                        required>
+                </div>
+                <div id="loginError" class="form-error hidden"></div>
+                <button type="submit" class="btn btn-primary auth-btn">
+                    <span class="btn-text">Sign In</span>
+                    <span class="btn-loader hidden">
+                        <div class="loading-spinner"></div>
+                    </span>
+                </button>
+            </form>
+            <form id="registerForm" class="auth-form hidden">
+                <div class="form-group">
+                    <label class="form-label">Username</label>
+                    <input type="text" class="form-input" name="username" placeholder="Choose a username" required
+                        minlength="3">
+                </div>
+                <div class="form-group">
+                    <label class="form-label">Email (optional)</label>
+                    <input type="email" class="form-input" name="email" placeholder="your@email.com">
+                </div>
+                <div class="form-group">
+                    <label class="form-label">Password</label>
+                    <input type="password" class="form-input" name="password" placeholder="Create a password" required
+                        minlength="6">
+                </div>
+                <div id="registerError" class="form-error hidden"></div>
+                <button type="submit" class="btn btn-primary auth-btn">
+                    <span class="btn-text">Create Admin Account</span>
+                    <span class="btn-loader hidden">
+                        <div class="loading-spinner"></div>
+                    </span>
+                </button>
+            </form>
+            <form id="employeeLoginForm" class="auth-form hidden">
+                <div class="form-group">
+                    <label class="form-label">Email</label>
+                    <input type="email" class="form-input" name="email" placeholder="Enter your work email" required>
+                </div>
+                <div class="form-group">
+                    <label class="form-label">Password</label>
+                    <input type="password" class="form-input" name="password" placeholder="Enter your password"
+                        required>
+                </div>
+                <div id="employeeLoginError" class="form-error hidden"></div>
+                <button type="submit" class="btn btn-primary auth-btn">
+                    <span class="btn-text">Sign In as Employee</span>
+                    <span class="btn-loader hidden">
+                        <div class="loading-spinner"></div>
+                    </span>
+                </button>
+            </form>
+        </div>
+    </div>
+    <!-- Add Employee Modal -->
+    <div class="modal-overlay" id="addEmployeeModal">
+        <div class="modal glass-panel">
+            <div class="modal-header">
+                <h2 class="modal-title">➕ Add Employee</h2>
+                <p class="modal-subtitle">Create login credentials for a new employee</p>
+            </div>
+            <form id="addEmployeeForm" class="auth-form">
+                <div class="form-group">
+                    <label class="form-label">Employee Email</label>
+                    <input type="email" class="form-input" name="email" placeholder="employee@company.com" required>
+                </div>
+                <div class="form-group">
+                    <label class="form-label">Password</label>
+                    <input type="password" class="form-input" name="password" placeholder="Create a password" required
+                        minlength="6">
+                </div>
+                <div id="addEmployeeError" class="form-error hidden"></div>
+                <div class="modal-actions">
+                    <button type="button" class="btn btn-secondary" id="cancelAddEmployee">Cancel</button>
+                    <button type="submit" class="btn btn-primary">
+                        <span class="btn-text">Add Employee</span>
+                        <span class="btn-loader hidden">
+                            <div class="loading-spinner"></div>
+                        </span>
+                    </button>
+                </div>
+            </form>
+        </div>
+    </div>
+    <!-- Create Bucket Modal -->
+    <div class="modal-overlay" id="createBucketModal">
+        <div class="modal glass-panel">
+            <div class="modal-header">
+                <h2 class="modal-title">📁 Create Bucket</h2>
+                <p class="modal-subtitle">Organize your documents into buckets</p>
+            </div>
+            <form id="createBucketForm" class="auth-form">
+                <div class="form-group">
+                    <label class="form-label">Bucket Name</label>
+                    <input type="text" class="form-input" name="name" placeholder="e.g., Project Alpha" required>
+                </div>
+                <div class="form-group">
+                    <label class="form-label">Description (optional)</label>
+                    <input type="text" class="form-input" name="description" placeholder="Brief description...">
+                </div>
+                <div id="createBucketError" class="form-error hidden"></div>
+                <div class="modal-actions">
+                    <button type="button" class="btn btn-secondary" id="cancelCreateBucket">Cancel</button>
+                    <button type="submit" class="btn btn-primary">
+                        <span class="btn-text">Create Bucket</span>
+                        <span class="btn-loader hidden">
+                            <div class="loading-spinner"></div>
+                        </span>
+                    </button>
+                </div>
+            </form>
+        </div>
+    </div>
+    <!-- Document Viewer Modal -->
+    <div class="modal-overlay" id="docViewerModal">
+        <div class="modal glass-panel doc-viewer-modal">
+            <div class="doc-viewer-header">
+                <h3 id="docViewerTitle">Document</h3>
+                <button class="btn btn-ghost" id="closeDocViewer">✕</button>
+            </div>
+            <div class="doc-viewer-content" id="docViewerContent">
+                <div class="loading-spinner"></div>
+            </div>
+        </div>
+    </div>
+    <!-- Main App Container -->
+    <div class="app-container" id="appContainer">
+        <main class="main-content">
+            <!-- LEFT SIDEBAR -->
+            <aside class="sidebar sidebar-left" id="leftSidebar">
+                <div class="sidebar-toggle" id="leftToggle" title="Toggle sidebar">
+                    <span class="toggle-icon">◀</span>
+                </div>
+                <div class="sidebar-content">
+                    <!-- User Info -->
+                    <section class="sidebar-section glass-panel user-section">
+                        <div class="user-info-row">
+                            <div class="user-badge">
+                                <div class="user-avatar" id="userAvatar">U</div>
+                                <div class="user-details">
+                                    <span id="userName">User</span>
+                                    <span class="user-role" id="userRole">Admin</span>
+                                </div>
+                            </div>
+                            <button class="btn btn-logout" id="logoutBtn" title="Sign Out">logout</button>
+                        </div>
+                    </section>
+                    <!-- Admin: Employees -->
+                    <section class="sidebar-section glass-panel collapsible hidden" id="adminSection">
+                        <div class="section-header" data-target="employeesList">
+                            <h3 class="sidebar-title"><span></span> Employees</h3>
+                            <div class="section-actions">
+                                <button class="btn btn-ghost" id="addEmployeeBtn" title="Add">➕</button>
+                                <span class="collapse-icon">▼</span>
+                            </div>
+                        </div>
+                        <div class="section-body" id="employeesList">
+                            <div class="empty-state small">
+                                <div class="empty-text">No employees</div>
+                            </div>
+                        </div>
+                    </section>
+                    <!-- Buckets -->
+                    <section class="sidebar-section glass-panel collapsible">
+                        <div class="section-header" data-target="bucketsBody">
+                            <h3 class="sidebar-title"><span>📁</span> Buckets</h3>
+                            <div class="section-actions">
+                                <button class="btn btn-ghost" id="createBucketBtn" title="Create">➕</button>
+                                <span class="collapse-icon">▼</span>
+                            </div>
+                        </div>
+                        <div class="section-body" id="bucketsBody">
+                            <div class="buckets-list" id="bucketsList">
+                                <div class="bucket-item active" data-id="">
+                                    <span class="bucket-name">📂 All Documents</span>
+                                </div>
+                            </div>
+                        </div>
+                    </section>
+                    <!-- Upload -->
+                    <section class="sidebar-section glass-panel collapsible">
+                        <div class="section-header" data-target="uploadBody">
+                            <h3 class="sidebar-title"><span></span> Upload</h3>
+                            <span class="collapse-icon">▼</span>
+                        </div>
+                        <div class="section-body" id="uploadBody">
+                            <div class="custom-select" id="uploadBucketWrapper">
+                                <div class="select-trigger" id="uploadBucketTrigger">
+                                    <span class="select-value">No Bucket (General)</span>
+                                    <span class="select-arrow">▼</span>
+                                </div>
+                                <div class="select-options" id="uploadBucketOptions"></div>
+                                <input type="hidden" id="uploadBucketSelect" value="">
+                            </div>
+                            <div class="upload-zone" id="uploadZone">
+                                <input type="file" id="fileInput" hidden multiple
+                                    accept=".pdf,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md,.png,.jpg,.jpeg,.gif,.webp">
+                                <div class="upload-icon">📁</div>
+                                <div class="upload-title">Drop files here</div>
+                                <div class="upload-subtitle">or click to browse</div>
+                            </div>
+                            <div id="uploadProgress" class="hidden">
+                                <div class="progress-info">
+                                    <div class="loading-spinner"></div>
+                                    <span id="uploadStatus">Uploading...</span>
+                                </div>
+                                <div class="progress-bar">
+                                    <div class="progress-fill" id="progressFill"></div>
+                                </div>
+                                <button class="btn btn-cancel-upload" id="cancelUploadBtn" title="Cancel Upload">✕
+                                    Cancel</button>
+                            </div>
+                        </div>
+                    </section>
+                </div>
+            </aside>
+            <!-- CHAT AREA (CENTER) -->
+            <section class="chat-container glass-panel">
+                <!-- Bucket Filter -->
+                <div class="chat-bucket-filter">
+                    <span class="filter-label">🔍 Querying:</span>
+                    <div class="custom-select compact" id="chatBucketWrapper">
+                        <div class="select-trigger" id="chatBucketTrigger">
+                            <span class="select-value">All Documents</span>
+                            <span class="select-arrow">▼</span>
+                        </div>
+                        <div class="select-options" id="chatBucketOptions"></div>
+                        <input type="hidden" id="chatBucketSelect" value="">
+                    </div>
+                    <button class="btn btn-new-chat" id="newChatBtn" title="Start New Chat">➕ New Chat</button>
+                    <button class="btn btn-clear-chat" id="clearChatBtnTop" title="Clear Current Chat">Clear</button>
+                </div>
+                <!-- Messages -->
+                <div class="chat-messages" id="chatMessages">
+                    <!-- Document Summary Panel -->
+                    <div class="summary-panel hidden" id="summaryPanel">
+                        <div class="summary-header">
+                            <span class="summary-icon">📄</span>
+                            <span class="summary-title" id="summaryTitle">Document Summary</span>
+                        </div>
+                        <div class="summary-content" id="summaryContent">
+                            <div class="summary-text" id="summaryText"></div>
+                        </div>
+                        <button class="summary-close" id="summaryClose" title="Close summary">✕</button>
+                    </div>
+                    <div class="welcome-screen" id="welcomeScreen">
+                        <div class="welcome-icon"><img src="/images/WhatsApp Image 2025-12-23 at 5.10.00 PM.jpeg"
+                                alt="Logo"
+                                style="width: 220px; height: auto; filter: invert(1); mix-blend-mode: lighten;"></div>
+                        <h2 class="welcome-title">Welcome to Iribl AI</h2>
+                        <p class="welcome-subtitle">
+                            Upload documents, organize into buckets, and ask questions.
+                        </p>
+                    </div>
+                </div>
+                <!-- Typing Indicator -->
+                <div class="typing-indicator hidden" id="typingIndicator">
+                    <div class="message-avatar">🧠</div>
+                    <div class="typing-dots">
+                        <div class="typing-dot"></div>
+                        <div class="typing-dot"></div>
+                        <div class="typing-dot"></div>
+                    </div>
+                </div>
+                <!-- Chat Input -->
+                <div class="chat-input-container">
+                    <div class="chat-input-wrapper">
+                        <textarea class="chat-input" id="chatInput" placeholder="Ask anything about your documents..."
+                            rows="1"></textarea>
+                        <button class="send-btn" id="sendBtn" disabled title="Send">➤</button>
+                        <button class="stop-btn hidden" id="stopBtn" title="Stop generating">■</button>
+                    </div>
+                </div>
+            </section>
+            <!-- RIGHT SIDEBAR -->
+            <aside class="sidebar sidebar-right" id="rightSidebar">
+                <div class="sidebar-toggle" id="rightToggle" title="Toggle sidebar">
+                    <span class="toggle-icon">▶</span>
+                </div>
+                <div class="sidebar-content">
+                    <!-- Documents -->
+                    <section class="sidebar-section glass-panel documents-section collapsible">
+                        <div class="section-header" data-target="documentsBody">
+                            <h3 class="sidebar-title">
+                                <span>📚</span> Documents
+                                <span id="docCount" class="doc-count">(0)</span>
+                            </h3>
+                            <span class="collapse-icon">▼</span>
+                        </div>
+                        <div class="section-body documents-body" id="documentsBody">
+                            <div class="documents-list" id="documentsList">
+                                <div class="empty-state">
+                                    <div class="empty-icon">📭</div>
+                                    <div class="empty-text">No documents yet</div>
+                                </div>
+                            </div>
+                        </div>
+                    </section>
+                    <!-- Chat History -->
+                    <section class="sidebar-section glass-panel chat-history-section collapsible">
+                        <div class="section-header" data-target="chatHistoryBody">
+                            <h3 class="sidebar-title">
+                                <span>💬</span> Chat History
+                                <span id="chatHistoryCount" class="doc-count">(0)</span>
+                            </h3>
+                            <div class="section-actions">
+                                <button class="btn btn-ghost" id="clearChatBtn" title="Clear current chat">🗑️</button>
+                                <span class="collapse-icon">▼</span>
+                            </div>
+                        </div>
+                        <div class="section-body chat-history-body" id="chatHistoryBody">
+                            <div class="chat-history-list" id="chatHistoryList">
+                                <div class="empty-state small">
+                                    <div class="empty-text">No chats yet</div>
+                                </div>
+                            </div>
+                        </div>
+                    </section>
+                </div>
+            </aside>
+        </main>
+    </div>
+    <script src="/js/app.js"></script>
+</body>
+</html>

static/js/app.js ADDED Viewed

	@@ -0,0 +1,1798 @@

+/**
+ * Iribl AI - Document Intelligence Application
+ * With Dual Sidebars, Collapsible Sections, and Animated Dropdowns
+ */
+// ==================== App State ====================
+const state = {
+    token: localStorage.getItem('Iribl AI_token'),
+    user: JSON.parse(localStorage.getItem('Iribl AI_user') || 'null'),
+    documents: [],
+    buckets: [],
+    employees: [],
+    messages: [],
+    summaries: {},  // doc_id -> summary text cache
+    selectedDocument: null,  // Currently selected document for summary display
+    selectedBucket: '',
+    chatBucket: '',
+    isLoading: false,
+    currentRole: 'admin',
+    // Chat History
+    chatHistory: JSON.parse(localStorage.getItem('Iribl AI_chat_history') || '[]'),
+    currentChatId: null,
+    // Upload cancellation
+    uploadCancelled: false,
+    currentUploadAbortController: null,
+    // Stream abort controller for stopping generation
+    streamAbortController: null
+};
+// ==================== DOM Elements ====================
+const elements = {
+    // Auth
+    authModal: document.getElementById('authModal'),
+    loginForm: document.getElementById('loginForm'),
+    registerForm: document.getElementById('registerForm'),
+    employeeLoginForm: document.getElementById('employeeLoginForm'),
+    authTabs: document.getElementById('authTabs'),
+    loginError: document.getElementById('loginError'),
+    registerError: document.getElementById('registerError'),
+    employeeLoginError: document.getElementById('employeeLoginError'),
+    // Modals
+    addEmployeeModal: document.getElementById('addEmployeeModal'),
+    addEmployeeForm: document.getElementById('addEmployeeForm'),
+    addEmployeeError: document.getElementById('addEmployeeError'),
+    addEmployeeBtn: document.getElementById('addEmployeeBtn'),
+    cancelAddEmployee: document.getElementById('cancelAddEmployee'),
+    createBucketModal: document.getElementById('createBucketModal'),
+    createBucketForm: document.getElementById('createBucketForm'),
+    createBucketError: document.getElementById('createBucketError'),
+    createBucketBtn: document.getElementById('createBucketBtn'),
+    cancelCreateBucket: document.getElementById('cancelCreateBucket'),
+    docViewerModal: document.getElementById('docViewerModal'),
+    docViewerTitle: document.getElementById('docViewerTitle'),
+    docViewerContent: document.getElementById('docViewerContent'),
+    closeDocViewer: document.getElementById('closeDocViewer'),
+    // Sidebars
+    leftSidebar: document.getElementById('leftSidebar'),
+    rightSidebar: document.getElementById('rightSidebar'),
+    leftToggle: document.getElementById('leftToggle'),
+    rightToggle: document.getElementById('rightToggle'),
+    // App
+    appContainer: document.getElementById('appContainer'),
+    userName: document.getElementById('userName'),
+    userAvatar: document.getElementById('userAvatar'),
+    userRole: document.getElementById('userRole'),
+    logoutBtn: document.getElementById('logoutBtn'),
+    // Admin
+    adminSection: document.getElementById('adminSection'),
+    employeesList: document.getElementById('employeesList'),
+    // Buckets
+    bucketsList: document.getElementById('bucketsList'),
+    // Custom Dropdowns
+    uploadBucketWrapper: document.getElementById('uploadBucketWrapper'),
+    uploadBucketTrigger: document.getElementById('uploadBucketTrigger'),
+    uploadBucketOptions: document.getElementById('uploadBucketOptions'),
+    uploadBucketSelect: document.getElementById('uploadBucketSelect'),
+    chatBucketWrapper: document.getElementById('chatBucketWrapper'),
+    chatBucketTrigger: document.getElementById('chatBucketTrigger'),
+    chatBucketOptions: document.getElementById('chatBucketOptions'),
+    chatBucketSelect: document.getElementById('chatBucketSelect'),
+    // Upload
+    uploadZone: document.getElementById('uploadZone'),
+    fileInput: document.getElementById('fileInput'),
+    uploadProgress: document.getElementById('uploadProgress'),
+    uploadStatus: document.getElementById('uploadStatus'),
+    progressFill: document.getElementById('progressFill'),
+    cancelUploadBtn: document.getElementById('cancelUploadBtn'),
+    // Documents
+    documentsList: document.getElementById('documentsList'),
+    docCount: document.getElementById('docCount'),
+    // Chat
+    chatMessages: document.getElementById('chatMessages'),
+    welcomeScreen: document.getElementById('welcomeScreen'),
+    chatInput: document.getElementById('chatInput'),
+    sendBtn: document.getElementById('sendBtn'),
+    stopBtn: document.getElementById('stopBtn'),
+    typingIndicator: document.getElementById('typingIndicator'),
+    toastContainer: document.getElementById('toastContainer'),
+    // Summary Panel
+    summaryPanel: document.getElementById('summaryPanel'),
+    summaryTitle: document.getElementById('summaryTitle'),
+    summaryText: document.getElementById('summaryText'),
+    summaryClose: document.getElementById('summaryClose'),
+    // Chat History
+    newChatBtn: document.getElementById('newChatBtn'),
+    clearChatBtn: document.getElementById('clearChatBtn'),
+    clearChatBtnTop: document.getElementById('clearChatBtnTop'),
+    chatHistoryList: document.getElementById('chatHistoryList'),
+    chatHistoryCount: document.getElementById('chatHistoryCount'),
+    // Mobile Navigation
+    mobileNav: document.getElementById('mobileNav'),
+    mobileBackdrop: document.getElementById('mobileBackdrop'),
+    mobileLeftToggle: document.getElementById('mobileLeftToggle'),
+    mobileChatToggle: document.getElementById('mobileChatToggle'),
+    mobileRightToggle: document.getElementById('mobileRightToggle')
+};
+// ==================== Toast ====================
+function showToast(message, type = 'info') {
+    const icons = { success: '✅', error: '❌', info: 'ℹ️' };
+    const toast = document.createElement('div');
+    toast.className = `toast ${type}`;
+    toast.innerHTML = `<span class="toast-icon">${icons[type]}</span><span class="toast-message">${message}</span><button class="toast-close">✕</button>`;
+    elements.toastContainer.appendChild(toast);
+    toast.querySelector('.toast-close').addEventListener('click', () => toast.remove());
+    setTimeout(() => { if (toast.parentElement) toast.remove(); }, 4000);
+}
+// ==================== Sidebar Toggle ====================
+function initSidebars() {
+    elements.leftToggle.addEventListener('click', () => {
+        elements.leftSidebar.classList.toggle('collapsed');
+        const icon = elements.leftToggle.querySelector('.toggle-icon');
+        icon.textContent = elements.leftSidebar.classList.contains('collapsed') ? '▶' : '◀';
+    });
+    elements.rightToggle.addEventListener('click', () => {
+        elements.rightSidebar.classList.toggle('collapsed');
+        const icon = elements.rightToggle.querySelector('.toggle-icon');
+        icon.textContent = elements.rightSidebar.classList.contains('collapsed') ? '◀' : '▶';
+    });
+}
+// ==================== Mobile Navigation ====================
+function initMobileNavigation() {
+    // Check if we're on mobile
+    const isMobile = () => window.innerWidth <= 768;
+    // Close all sidebars on mobile
+    function closeMobileSidebars() {
+        elements.leftSidebar.classList.remove('mobile-open');
+        elements.rightSidebar.classList.remove('mobile-open');
+        elements.mobileBackdrop.classList.remove('active');
+        document.body.style.overflow = '';
+        // Reset nav button active states
+        elements.mobileLeftToggle.classList.remove('active');
+        elements.mobileRightToggle.classList.remove('active');
+        elements.mobileChatToggle.classList.add('active');
+    }
+    // Open left sidebar (Menu)
+    function openLeftSidebar() {
+        closeMobileSidebars();
+        elements.leftSidebar.classList.add('mobile-open');
+        elements.mobileBackdrop.classList.add('active');
+        document.body.style.overflow = 'hidden';
+        elements.mobileLeftToggle.classList.add('active');
+        elements.mobileChatToggle.classList.remove('active');
+    }
+    // Open right sidebar (Docs)
+    function openRightSidebar() {
+        closeMobileSidebars();
+        elements.rightSidebar.classList.add('mobile-open');
+        elements.mobileBackdrop.classList.add('active');
+        document.body.style.overflow = 'hidden';
+        elements.mobileRightToggle.classList.add('active');
+        elements.mobileChatToggle.classList.remove('active');
+    }
+    // Mobile nav button handlers
+    elements.mobileLeftToggle.addEventListener('click', () => {
+        if (elements.leftSidebar.classList.contains('mobile-open')) {
+            closeMobileSidebars();
+        } else {
+            openLeftSidebar();
+        }
+    });
+    elements.mobileChatToggle.addEventListener('click', () => {
+        closeMobileSidebars();
+    });
+    elements.mobileRightToggle.addEventListener('click', () => {
+        if (elements.rightSidebar.classList.contains('mobile-open')) {
+            closeMobileSidebars();
+        } else {
+            openRightSidebar();
+        }
+    });
+    // Close sidebar when backdrop is clicked
+    elements.mobileBackdrop.addEventListener('click', closeMobileSidebars);
+    // Close sidebar on window resize to desktop
+    window.addEventListener('resize', () => {
+        if (!isMobile()) {
+            closeMobileSidebars();
+            // Reset any mobile-specific classes
+            elements.leftSidebar.classList.remove('mobile-open');
+            elements.rightSidebar.classList.remove('mobile-open');
+        }
+    });
+    // Close sidebar when starting a new chat or after uploading (for better UX)
+    const originalStartNewChat = window.startNewChat;
+    if (typeof originalStartNewChat === 'function') {
+        window.startNewChat = function () {
+            if (isMobile()) closeMobileSidebars();
+            return originalStartNewChat.apply(this, arguments);
+        };
+    }
+    // Handle swipe gestures (optional enhancement)
+    let touchStartX = 0;
+    let touchEndX = 0;
+    document.addEventListener('touchstart', (e) => {
+        touchStartX = e.changedTouches[0].screenX;
+    }, { passive: true });
+    document.addEventListener('touchend', (e) => {
+        if (!isMobile()) return;
+        touchEndX = e.changedTouches[0].screenX;
+        const swipeDistance = touchEndX - touchStartX;
+        const minSwipeDistance = 80;
+        // Swipe right from left edge - open left sidebar
+        if (touchStartX < 30 && swipeDistance > minSwipeDistance) {
+            openLeftSidebar();
+        }
+        // Swipe left from right edge - open right sidebar
+        if (touchStartX > window.innerWidth - 30 && swipeDistance < -minSwipeDistance) {
+            openRightSidebar();
+        }
+        // Swipe to close sidebars
+        if (elements.leftSidebar.classList.contains('mobile-open') && swipeDistance < -minSwipeDistance) {
+            closeMobileSidebars();
+        }
+        if (elements.rightSidebar.classList.contains('mobile-open') && swipeDistance > minSwipeDistance) {
+            closeMobileSidebars();
+        }
+    }, { passive: true });
+}
+// ==================== Collapsible Sections ====================
+function initCollapsible() {
+    document.querySelectorAll('.collapsible .section-header').forEach(header => {
+        header.addEventListener('click', (e) => {
+            // Don't toggle if clicking on action buttons
+            if (e.target.closest('.btn')) return;
+            const section = header.closest('.collapsible');
+            section.classList.toggle('collapsed');
+        });
+    });
+}
+// ==================== Custom Dropdowns ====================
+function initCustomDropdowns() {
+    // Close dropdowns when clicking outside
+    document.addEventListener('click', (e) => {
+        document.querySelectorAll('.custom-select.open').forEach(select => {
+            if (!select.contains(e.target)) {
+                select.classList.remove('open');
+            }
+        });
+    });
+    // Upload bucket dropdown
+    elements.uploadBucketTrigger.addEventListener('click', (e) => {
+        e.stopPropagation();
+        elements.uploadBucketWrapper.classList.toggle('open');
+        elements.chatBucketWrapper.classList.remove('open');
+    });
+    // Chat bucket dropdown
+    elements.chatBucketTrigger.addEventListener('click', (e) => {
+        e.stopPropagation();
+        elements.chatBucketWrapper.classList.toggle('open');
+        elements.uploadBucketWrapper.classList.remove('open');
+    });
+}
+function updateDropdownOptions() {
+    // Upload dropdown options
+    let uploadOptions = `<div class="select-option active" data-value=""><span class="option-icon">📂</span> No Bucket (General)</div>`;
+    uploadOptions += state.buckets.map(b =>
+        `<div class="select-option" data-value="${b.bucket_id}"><span class="option-icon">📁</span> ${b.name}</div>`
+    ).join('');
+    elements.uploadBucketOptions.innerHTML = uploadOptions;
+    // Chat dropdown options
+    let chatOptions = `<div class="select-option active" data-value=""><span class="option-icon">📂</span> All Documents</div>`;
+    chatOptions += state.buckets.map(b =>
+        `<div class="select-option" data-value="${b.bucket_id}"><span class="option-icon">📁</span> ${b.name}</div>`
+    ).join('');
+    elements.chatBucketOptions.innerHTML = chatOptions;
+    // Add click handlers
+    elements.uploadBucketOptions.querySelectorAll('.select-option').forEach(opt => {
+        opt.addEventListener('click', () => {
+            const value = opt.dataset.value;
+            elements.uploadBucketSelect.value = value;
+            elements.uploadBucketTrigger.querySelector('.select-value').textContent = opt.textContent.trim();
+            elements.uploadBucketOptions.querySelectorAll('.select-option').forEach(o => o.classList.remove('active'));
+            opt.classList.add('active');
+            elements.uploadBucketWrapper.classList.remove('open');
+        });
+    });
+    elements.chatBucketOptions.querySelectorAll('.select-option').forEach(opt => {
+        opt.addEventListener('click', () => {
+            const value = opt.dataset.value;
+            elements.chatBucketSelect.value = value;
+            state.chatBucket = value;
+            elements.chatBucketTrigger.querySelector('.select-value').textContent = opt.textContent.trim();
+            elements.chatBucketOptions.querySelectorAll('.select-option').forEach(o => o.classList.remove('active'));
+            opt.classList.add('active');
+            elements.chatBucketWrapper.classList.remove('open');
+        });
+    });
+}
+// ==================== Auth ====================
+function showAuthModal() {
+    elements.authModal.classList.add('active');
+    elements.appContainer.style.filter = 'blur(5px)';
+}
+function hideAuthModal() {
+    elements.authModal.classList.remove('active');
+    elements.appContainer.style.filter = '';
+}
+function updateAuthUI() {
+    if (state.user) {
+        elements.userName.textContent = state.user.username;
+        elements.userAvatar.textContent = state.user.username.charAt(0).toUpperCase();
+        elements.userRole.textContent = state.user.role === 'admin' ? 'Admin' : 'Employee';
+        if (state.user.role === 'admin') {
+            elements.adminSection.classList.remove('hidden');
+            loadEmployees();
+        } else {
+            elements.adminSection.classList.add('hidden');
+        }
+        hideAuthModal();
+    } else {
+        showAuthModal();
+    }
+}
+// Role tabs
+document.querySelectorAll('.role-tab').forEach(tab => {
+    tab.addEventListener('click', () => {
+        document.querySelectorAll('.role-tab').forEach(t => t.classList.remove('active'));
+        tab.classList.add('active');
+        state.currentRole = tab.dataset.role;
+        if (state.currentRole === 'admin') {
+            elements.authTabs.classList.remove('hidden');
+            elements.loginForm.classList.remove('hidden');
+            elements.registerForm.classList.add('hidden');
+            elements.employeeLoginForm.classList.add('hidden');
+        } else {
+            elements.authTabs.classList.add('hidden');
+            elements.loginForm.classList.add('hidden');
+            elements.registerForm.classList.add('hidden');
+            elements.employeeLoginForm.classList.remove('hidden');
+        }
+    });
+});
+// Auth tabs
+document.querySelectorAll('.auth-tab').forEach(tab => {
+    tab.addEventListener('click', () => {
+        document.querySelectorAll('.auth-tab').forEach(t => t.classList.remove('active'));
+        tab.classList.add('active');
+        const tabName = tab.dataset.tab;
+        elements.loginForm.classList.toggle('hidden', tabName !== 'login');
+        elements.registerForm.classList.toggle('hidden', tabName !== 'register');
+    });
+});
+// Admin Login
+elements.loginForm.addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const formData = new FormData(e.target);
+    const btn = e.target.querySelector('.auth-btn');
+    btn.querySelector('.btn-text').classList.add('hidden');
+    btn.querySelector('.btn-loader').classList.remove('hidden');
+    elements.loginError.classList.add('hidden');
+    try {
+        const response = await fetch('/api/auth/login', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ username: formData.get('username'), password: formData.get('password'), role: 'admin' })
+        });
+        const data = await response.json();
+        if (response.ok) {
+            state.token = data.token;
+            state.user = { user_id: data.user_id, username: data.username, role: data.role };
+            localStorage.setItem('Iribl AI_token', state.token);
+            localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
+            updateAuthUI();
+            loadBuckets();
+            loadDocuments();
+            loadChatHistoryFromServer();
+            showToast('Welcome back!', 'success');
+        } else {
+            elements.loginError.textContent = data.error;
+            elements.loginError.classList.remove('hidden');
+        }
+    } catch (error) {
+        elements.loginError.textContent = 'Connection error';
+        elements.loginError.classList.remove('hidden');
+    }
+    btn.querySelector('.btn-text').classList.remove('hidden');
+    btn.querySelector('.btn-loader').classList.add('hidden');
+});
+// Admin Register
+elements.registerForm.addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const formData = new FormData(e.target);
+    const btn = e.target.querySelector('.auth-btn');
+    btn.querySelector('.btn-text').classList.add('hidden');
+    btn.querySelector('.btn-loader').classList.remove('hidden');
+    elements.registerError.classList.add('hidden');
+    try {
+        const response = await fetch('/api/auth/register/admin', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ username: formData.get('username'), email: formData.get('email'), password: formData.get('password') })
+        });
+        const data = await response.json();
+        if (response.ok) {
+            state.token = data.token;
+            state.user = { user_id: data.user_id, username: data.username, role: data.role };
+            localStorage.setItem('Iribl AI_token', state.token);
+            localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
+            updateAuthUI();
+            loadBuckets();
+            loadDocuments();
+            loadChatHistoryFromServer();
+            showToast('Account created!', 'success');
+        } else {
+            elements.registerError.textContent = data.error;
+            elements.registerError.classList.remove('hidden');
+        }
+    } catch (error) {
+        elements.registerError.textContent = 'Connection error';
+        elements.registerError.classList.remove('hidden');
+    }
+    btn.querySelector('.btn-text').classList.remove('hidden');
+    btn.querySelector('.btn-loader').classList.add('hidden');
+});
+// Employee Login
+elements.employeeLoginForm.addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const formData = new FormData(e.target);
+    const btn = e.target.querySelector('.auth-btn');
+    btn.querySelector('.btn-text').classList.add('hidden');
+    btn.querySelector('.btn-loader').classList.remove('hidden');
+    elements.employeeLoginError.classList.add('hidden');
+    try {
+        const response = await fetch('/api/auth/login', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ username: formData.get('email'), password: formData.get('password'), role: 'employee' })
+        });
+        const data = await response.json();
+        if (response.ok) {
+            state.token = data.token;
+            state.user = { user_id: data.user_id, username: data.username, role: data.role };
+            localStorage.setItem('Iribl AI_token', state.token);
+            localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
+            updateAuthUI();
+            loadBuckets();
+            loadDocuments();
+            loadChatHistoryFromServer();
+            showToast('Welcome!', 'success');
+        } else {
+            elements.employeeLoginError.textContent = data.error;
+            elements.employeeLoginError.classList.remove('hidden');
+        }
+    } catch (error) {
+        elements.employeeLoginError.textContent = 'Connection error';
+        elements.employeeLoginError.classList.remove('hidden');
+    }
+    btn.querySelector('.btn-text').classList.remove('hidden');
+    btn.querySelector('.btn-loader').classList.add('hidden');
+});
+// Logout
+elements.logoutBtn.addEventListener('click', () => {
+    state.token = null;
+    state.user = null;
+    state.documents = [];
+    state.buckets = [];
+    state.messages = [];
+    localStorage.removeItem('Iribl AI_token');
+    localStorage.removeItem('Iribl AI_user');
+    updateAuthUI();
+    renderDocuments();
+    renderMessages();
+    showToast('Logged out', 'info');
+});
+// ==================== Employees ====================
+async function loadEmployees() {
+    if (!state.token || state.user?.role !== 'admin') return;
+    try {
+        const response = await fetch('/api/admin/employees', { headers: { 'Authorization': `Bearer ${state.token}` } });
+        if (response.ok) {
+            const data = await response.json();
+            state.employees = data.employees;
+            renderEmployees();
+        }
+    } catch (error) { console.error('Failed to load employees:', error); }
+}
+function renderEmployees() {
+    if (state.employees.length === 0) {
+        elements.employeesList.innerHTML = `<div class="empty-state small"><div class="empty-text">No employees</div></div>`;
+        return;
+    }
+    elements.employeesList.innerHTML = state.employees.map(emp => `
+        <div class="employee-item">
+            <span class="employee-email">${emp.email || emp.username}</span>
+            <button class="btn btn-ghost" onclick="deleteEmployee('${emp.user_id}')" title="Remove">🗑️</button>
+        </div>
+    `).join('');
+}
+elements.addEmployeeBtn.addEventListener('click', (e) => {
+    e.stopPropagation();
+    elements.addEmployeeModal.classList.add('active');
+    elements.addEmployeeError.classList.add('hidden');
+    elements.addEmployeeForm.reset();
+});
+elements.cancelAddEmployee.addEventListener('click', () => elements.addEmployeeModal.classList.remove('active'));
+elements.addEmployeeForm.addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const formData = new FormData(e.target);
+    const btn = e.target.querySelector('.btn-primary');
+    btn.querySelector('.btn-text').classList.add('hidden');
+    btn.querySelector('.btn-loader').classList.remove('hidden');
+    try {
+        const response = await fetch('/api/admin/employees', {
+            method: 'POST',
+            headers: { 'Authorization': `Bearer ${state.token}`, 'Content-Type': 'application/json' },
+            body: JSON.stringify({ email: formData.get('email'), password: formData.get('password') })
+        });
+        const data = await response.json();
+        if (response.ok) {
+            elements.addEmployeeModal.classList.remove('active');
+            loadEmployees();
+            showToast('Employee added!', 'success');
+        } else {
+            elements.addEmployeeError.textContent = data.error;
+            elements.addEmployeeError.classList.remove('hidden');
+        }
+    } catch (error) {
+        elements.addEmployeeError.textContent = 'Connection error';
+        elements.addEmployeeError.classList.remove('hidden');
+    }
+    btn.querySelector('.btn-text').classList.remove('hidden');
+    btn.querySelector('.btn-loader').classList.add('hidden');
+});
+async function deleteEmployee(employeeId) {
+    try {
+        const response = await fetch(`/api/admin/employees/${employeeId}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${state.token}` } });
+        if (response.ok) {
+            state.employees = state.employees.filter(e => e.user_id !== employeeId);
+            renderEmployees();
+            showToast('Employee removed', 'success');
+        }
+    } catch (error) { showToast('Failed to remove employee', 'error'); }
+}
+// ==================== Buckets ====================
+async function loadBuckets() {
+    if (!state.token) return;
+    try {
+        const response = await fetch('/api/buckets', { headers: { 'Authorization': `Bearer ${state.token}` } });
+        if (response.ok) {
+            const data = await response.json();
+            state.buckets = data.buckets;
+            renderBuckets();
+            updateDropdownOptions();
+        }
+    } catch (error) { console.error('Failed to load buckets:', error); }
+}
+function renderBuckets() {
+    let html = `<div class="bucket-item ${state.selectedBucket === '' ? 'active' : ''}" onclick="selectBucket('')">
+        <span class="bucket-name">📂 All Documents</span>
+    </div>`;
+    html += state.buckets.map(b => `
+        <div class="bucket-item ${state.selectedBucket === b.bucket_id ? 'active' : ''}" data-id="${b.bucket_id}">
+            <span class="bucket-name" onclick="selectBucket('${b.bucket_id}')">📁 ${b.name}</span>
+            <span class="bucket-count">${b.doc_count}</span>
+            <button class="btn btn-ghost bucket-delete" onclick="event.stopPropagation(); deleteBucket('${b.bucket_id}')">🗑️</button>
+        </div>
+    `).join('');
+    elements.bucketsList.innerHTML = html;
+}
+function selectBucket(bucketId) {
+    state.selectedBucket = bucketId;
+    state.chatBucket = bucketId;  // Sync chat bucket filter
+    // Get bucket name for display
+    const bucketName = bucketId ?
+        (state.buckets.find(b => b.bucket_id === bucketId)?.name || 'Selected Bucket') :
+        '';
+    const displayName = bucketId ? bucketName : 'All Documents';
+    const uploadDisplayName = bucketId ? bucketName : 'No Bucket (General)';
+    // Sync upload bucket dropdown
+    elements.uploadBucketSelect.value = bucketId;
+    elements.uploadBucketTrigger.querySelector('.select-value').textContent = uploadDisplayName;
+    elements.uploadBucketOptions.querySelectorAll('.select-option').forEach(opt => {
+        opt.classList.toggle('active', opt.dataset.value === bucketId);
+    });
+    // Sync chat bucket dropdown
+    elements.chatBucketSelect.value = bucketId;
+    elements.chatBucketTrigger.querySelector('.select-value').textContent = displayName;
+    elements.chatBucketOptions.querySelectorAll('.select-option').forEach(opt => {
+        opt.classList.toggle('active', opt.dataset.value === bucketId);
+    });
+    // Render all filtered components
+    renderBuckets();
+    loadDocuments();
+    renderChatHistory();  // Re-render to filter by bucket
+}
+elements.createBucketBtn.addEventListener('click', (e) => {
+    e.stopPropagation();
+    elements.createBucketModal.classList.add('active');
+    elements.createBucketError.classList.add('hidden');
+    elements.createBucketForm.reset();
+});
+elements.cancelCreateBucket.addEventListener('click', () => elements.createBucketModal.classList.remove('active'));
+elements.createBucketForm.addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const formData = new FormData(e.target);
+    const btn = e.target.querySelector('.btn-primary');
+    btn.querySelector('.btn-text').classList.add('hidden');
+    btn.querySelector('.btn-loader').classList.remove('hidden');
+    try {
+        const response = await fetch('/api/buckets', {
+            method: 'POST',
+            headers: { 'Authorization': `Bearer ${state.token}`, 'Content-Type': 'application/json' },
+            body: JSON.stringify({ name: formData.get('name'), description: formData.get('description') })
+        });
+        const data = await response.json();
+        if (response.ok) {
+            elements.createBucketModal.classList.remove('active');
+            loadBuckets();
+            showToast('Bucket created!', 'success');
+        } else {
+            elements.createBucketError.textContent = data.error;
+            elements.createBucketError.classList.remove('hidden');
+        }
+    } catch (error) {
+        elements.createBucketError.textContent = 'Connection error';
+        elements.createBucketError.classList.remove('hidden');
+    }
+    btn.querySelector('.btn-text').classList.remove('hidden');
+    btn.querySelector('.btn-loader').classList.add('hidden');
+});
+async function deleteBucket(bucketId) {
+    try {
+        const response = await fetch(`/api/buckets/${bucketId}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${state.token}` } });
+        if (response.ok) {
+            if (state.selectedBucket === bucketId) state.selectedBucket = '';
+            loadBuckets();
+            loadDocuments();
+            showToast('Bucket deleted', 'success');
+        }
+    } catch (error) { showToast('Failed to delete bucket', 'error'); }
+}
+// ==================== Documents ====================
+async function loadDocuments() {
+    if (!state.token) return;
+    try {
+        let url = '/api/documents';
+        if (state.selectedBucket) url += `?bucket_id=${state.selectedBucket}`;
+        const response = await fetch(url, { headers: { 'Authorization': `Bearer ${state.token}` } });
+        if (response.ok) {
+            const data = await response.json();
+            state.documents = data.documents;
+            renderDocuments();
+        }
+    } catch (error) { console.error('Failed to load documents:', error); }
+}
+function renderDocuments() {
+    elements.docCount.textContent = `(${state.documents.length})`;
+    if (state.documents.length === 0) {
+        elements.documentsList.innerHTML = `<div class="empty-state"><div class="empty-icon">📭</div><div class="empty-text">No documents yet</div></div>`;
+        return;
+    }
+    const icons = { pdf: '📕', word: '📘', powerpoint: '📙', excel: '📗', image: '🖼️', text: '📄' };
+    elements.documentsList.innerHTML = state.documents.map(doc => `
+        <div class="document-item ${state.selectedDocument === doc.doc_id ? 'selected' : ''}" data-id="${doc.doc_id}" onclick="selectDocument('${doc.doc_id}')">
+            <div class="doc-icon">${icons[doc.doc_type] || '📄'}</div>
+            <div class="doc-info">
+                <div class="doc-name">${doc.filename}</div>
+                <div class="doc-meta">${formatDate(doc.created_at)}</div>
+            </div>
+            <button class="btn btn-ghost doc-view" onclick="event.stopPropagation(); viewDocument('${doc.doc_id}', '${doc.filename}')" title="View">👁️</button>
+            <button class="btn btn-ghost doc-delete" onclick="event.stopPropagation(); deleteDocument('${doc.doc_id}')" title="Delete">🗑️</button>
+        </div>
+    `).join('');
+}
+function formatDate(timestamp) {
+    const date = new Date(timestamp * 1000);
+    const now = new Date();
+    const diff = now - date;
+    if (diff < 60000) return 'Just now';
+    if (diff < 3600000) return `${Math.floor(diff / 60000)}m ago`;
+    if (diff < 86400000) return `${Math.floor(diff / 3600000)}h ago`;
+    return date.toLocaleDateString();
+}
+async function deleteDocument(docId) {
+    try {
+        const response = await fetch(`/api/documents/${docId}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${state.token}` } });
+        if (response.ok) {
+            state.documents = state.documents.filter(d => d.doc_id !== docId);
+            // Clear selection if deleted doc was selected
+            if (state.selectedDocument === docId) {
+                state.selectedDocument = null;
+                hideSummary();
+            }
+            // Remove from summaries cache
+            delete state.summaries[docId];
+            renderDocuments();
+            loadBuckets();
+            showToast('Document deleted', 'success');
+        }
+    } catch (error) { showToast('Failed to delete', 'error'); }
+}
+// ==================== Document Summary ====================
+function selectDocument(docId) {
+    state.selectedDocument = docId;
+    renderDocuments();
+    displaySummary(docId);
+}
+async function displaySummary(docId) {
+    const doc = state.documents.find(d => d.doc_id === docId);
+    if (!doc) return;
+    // Check if summary is cached
+    if (state.summaries[docId]) {
+        showSummaryPanel(doc.filename, state.summaries[docId].summary);
+    } else {
+        // Show loading state
+        showSummaryPanel(doc.filename, 'Generating summary...');
+        // Fetch summary from server
+        await fetchSummary(docId);
+    }
+}
+async function fetchSummary(docId) {
+    try {
+        const response = await fetch(`/api/documents/${docId}/summary`, {
+            headers: { 'Authorization': `Bearer ${state.token}` }
+        });
+        const data = await response.json();
+        if (response.ok && data.summary) {
+            // Cache the summary
+            state.summaries[docId] = {
+                summary: data.summary,
+                filename: data.filename
+            };
+            // Update display if still selected
+            if (state.selectedDocument === docId) {
+                showSummaryPanel(data.filename, data.summary);
+            }
+        } else {
+            // Show error state
+            if (state.selectedDocument === docId) {
+                showSummaryPanel(data.filename || 'Document', 'Unable to generate summary.');
+            }
+        }
+    } catch (error) {
+        console.error('Failed to fetch summary:', error);
+        if (state.selectedDocument === docId) {
+            const doc = state.documents.find(d => d.doc_id === docId);
+            showSummaryPanel(doc?.filename || 'Document', 'Failed to load summary.');
+        }
+    }
+}
+function showSummaryPanel(filename, summaryText) {
+    elements.summaryPanel.classList.remove('hidden');
+    elements.summaryTitle.textContent = filename;
+    elements.summaryText.textContent = summaryText;
+}
+function hideSummary() {
+    elements.summaryPanel.classList.add('hidden');
+    state.selectedDocument = null;
+    renderDocuments();
+}
+function initSummaryPanel() {
+    elements.summaryClose.addEventListener('click', hideSummary);
+}
+// ==================== Document Viewer ====================
+async function viewDocument(docId, filename) {
+    try {
+        // Fetch the document with proper authorization
+        const response = await fetch(`/api/documents/${docId}/view`, {
+            headers: { 'Authorization': `Bearer ${state.token}` }
+        });
+        if (!response.ok) {
+            showToast('Failed to load document', 'error');
+            return;
+        }
+        // Get the blob and create a URL
+        const blob = await response.blob();
+        const blobUrl = URL.createObjectURL(blob);
+        // Open in a new tab
+        window.open(blobUrl, '_blank');
+    } catch (error) {
+        console.error('Failed to view document:', error);
+        showToast('Failed to open document', 'error');
+    }
+}
+elements.closeDocViewer.addEventListener('click', () => elements.docViewerModal.classList.remove('active'));
+// ==================== Upload ====================
+let currentPollInterval = null;  // Track the current polling interval for cancellation
+function initUpload() {
+    elements.uploadZone.addEventListener('click', () => elements.fileInput.click());
+    elements.fileInput.addEventListener('change', (e) => {
+        if (e.target.files.length > 0) uploadFiles(Array.from(e.target.files));
+    });
+    elements.uploadZone.addEventListener('dragover', (e) => { e.preventDefault(); elements.uploadZone.classList.add('dragover'); });
+    elements.uploadZone.addEventListener('dragleave', () => elements.uploadZone.classList.remove('dragover'));
+    elements.uploadZone.addEventListener('drop', (e) => {
+        e.preventDefault();
+        elements.uploadZone.classList.remove('dragover');
+        if (e.dataTransfer.files.length > 0) uploadFiles(Array.from(e.dataTransfer.files));
+    });
+    // Cancel upload button
+    elements.cancelUploadBtn.addEventListener('click', cancelUpload);
+}
+function cancelUpload() {
+    state.uploadCancelled = true;
+    // Abort any ongoing fetch request
+    if (state.currentUploadAbortController) {
+        state.currentUploadAbortController.abort();
+        state.currentUploadAbortController = null;
+    }
+    // Clear any polling interval
+    if (currentPollInterval) {
+        clearInterval(currentPollInterval);
+        currentPollInterval = null;
+    }
+    // Reset UI
+    elements.uploadProgress.classList.add('hidden');
+    elements.uploadZone.style.pointerEvents = '';
+    elements.fileInput.value = '';
+    elements.progressFill.style.width = '0%';
+    showToast('Upload cancelled', 'info');
+}
+async function uploadFiles(files) {
+    // Reset cancellation state
+    state.uploadCancelled = false;
+    elements.uploadProgress.classList.remove('hidden');
+    elements.uploadZone.style.pointerEvents = 'none';
+    const bucketId = elements.uploadBucketSelect.value;
+    let completed = 0;
+    // Process files sequentially to avoid overwhelming the client,
+    // but the server handles them in background.
+    for (const file of files) {
+        // Check if cancelled before processing each file
+        if (state.uploadCancelled) {
+            break;
+        }
+        elements.uploadStatus.textContent = `Uploading ${file.name}...`;
+        elements.progressFill.style.width = '10%'; // Initial progress
+        const formData = new FormData();
+        formData.append('file', file);
+        formData.append('bucket_id', bucketId);
+        // Create abort controller for this request
+        state.currentUploadAbortController = new AbortController();
+        try {
+            // Initial upload request
+            const response = await fetch('/api/documents/upload', {
+                method: 'POST',
+                headers: { 'Authorization': `Bearer ${state.token}` },
+                body: formData,
+                signal: state.currentUploadAbortController.signal
+            });
+            if (response.status === 202) {
+                // Async processing started
+                const data = await response.json();
+                await pollUploadStatus(data.doc_id, file.name);
+                if (!state.uploadCancelled) {
+                    completed++;
+                }
+            } else if (response.ok) {
+                // Instant completion (legacy or small file)
+                const data = await response.json();
+                handleUploadSuccess(data);
+                completed++;
+            } else {
+                const data = await response.json();
+                showToast(`Failed: ${file.name} - ${data.error}`, 'error');
+            }
+        } catch (e) {
+            if (e.name === 'AbortError') {
+                // Upload was cancelled by user
+                break;
+            }
+            console.error(e);
+            showToast(`Failed to upload ${file.name}`, 'error');
+        }
+    }
+    // Clean up abort controller
+    state.currentUploadAbortController = null;
+    // Only update UI if not cancelled (cancelUpload already handles UI reset)
+    if (!state.uploadCancelled) {
+        elements.uploadProgress.classList.add('hidden');
+        elements.uploadZone.style.pointerEvents = '';
+        elements.fileInput.value = '';
+        elements.progressFill.style.width = '0%';
+        // Load documents first, then show summary
+        await loadDocuments();
+        loadBuckets();
+    }
+}
+async function pollUploadStatus(docId, filename) {
+    return new Promise((resolve, reject) => {
+        currentPollInterval = setInterval(async () => {
+            // Check if cancelled
+            if (state.uploadCancelled) {
+                clearInterval(currentPollInterval);
+                currentPollInterval = null;
+                resolve();
+                return;
+            }
+            try {
+                const response = await fetch(`/api/documents/${docId}/status`, {
+                    headers: { 'Authorization': `Bearer ${state.token}` }
+                });
+                if (response.ok) {
+                    const statusData = await response.json();
+                    // Update UI
+                    elements.uploadStatus.textContent = `Processing ${filename}: ${statusData.message || '...'}`;
+                    // Map 0-100 progress to UI width (keeping 10% buffer)
+                    if (statusData.progress) {
+                        elements.progressFill.style.width = `${Math.max(10, statusData.progress)}%`;
+                    }
+                    if (statusData.status === 'completed') {
+                        clearInterval(currentPollInterval);
+                        currentPollInterval = null;
+                        if (statusData.result) {
+                            handleUploadSuccess(statusData.result);
+                        }
+                        resolve();
+                    } else if (statusData.status === 'failed') {
+                        clearInterval(currentPollInterval);
+                        currentPollInterval = null;
+                        showToast(`Processing failed: ${filename} - ${statusData.error}`, 'error');
+                        resolve(); // Resolve anyway to continue with next file
+                    }
+                } else {
+                    // Status check failed - might be network glitch, ignore once
+                }
+            } catch (e) {
+                console.error("Polling error", e);
+                // Continue polling despite error
+            }
+        }, 2000); // Check every 2 seconds
+    });
+}
+function handleUploadSuccess(data) {
+    showToast(`Ready: ${data.filename}`, 'success');
+    // Cache the summary
+    if (data.summary) {
+        state.summaries[data.doc_id] = {
+            summary: data.summary,
+            filename: data.filename
+        };
+    }
+    // Auto-display this document
+    state.selectedDocument = data.doc_id;
+    // We will re-render documents shortly after this returns
+    if (data.summary) {
+        // Defer slightly to ensure DOM is ready if needed
+        setTimeout(() => {
+            showSummaryPanel(data.filename, data.summary);
+        }, 500);
+    }
+}
+// ==================== Chat ====================
+function initChat() {
+    elements.chatInput.addEventListener('input', () => {
+        elements.chatInput.style.height = 'auto';
+        elements.chatInput.style.height = Math.min(elements.chatInput.scrollHeight, 150) + 'px';
+        elements.sendBtn.disabled = !elements.chatInput.value.trim();
+    });
+    elements.chatInput.addEventListener('keydown', (e) => {
+        if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMessage(); }
+    });
+    elements.sendBtn.addEventListener('click', sendMessage);
+    // Stop generation button
+    elements.stopBtn.addEventListener('click', stopGeneration);
+}
+function stopGeneration() {
+    if (state.streamAbortController) {
+        state.streamAbortController.abort();
+        state.streamAbortController = null;
+    }
+    // Hide stop button, show send button
+    elements.stopBtn.classList.add('hidden');
+    elements.sendBtn.classList.remove('hidden');
+    elements.typingIndicator.classList.add('hidden');
+    state.isLoading = false;
+    // Add a note that generation was stopped
+    if (state.messages.length > 0) {
+        const lastMsg = state.messages[state.messages.length - 1];
+        if (lastMsg.role === 'assistant' && lastMsg.content) {
+            lastMsg.content += '\n\n*[Generation stopped]*';
+            renderMessages();
+            saveCurrentChat();
+        }
+    }
+    showToast('Generation stopped', 'info');
+}
+async function sendMessage() {
+    const message = elements.chatInput.value.trim();
+    if (!message || state.isLoading) return;
+    elements.chatInput.value = '';
+    elements.chatInput.style.height = 'auto';
+    elements.sendBtn.disabled = true;
+    elements.welcomeScreen.classList.add('hidden');
+    // Create a chat ID if this is the first message
+    if (state.messages.length === 0 && !state.currentChatId) {
+        state.currentChatId = Date.now().toString();
+    }
+    const targetChatId = state.currentChatId;
+    addMessage('user', message);
+    elements.typingIndicator.classList.remove('hidden');
+    state.isLoading = true;
+    scrollToBottom();
+    // Show stop button, hide send button
+    elements.sendBtn.classList.add('hidden');
+    elements.stopBtn.classList.remove('hidden');
+    // Create abort controller for this request
+    state.streamAbortController = new AbortController();
+    try {
+        // Use streaming endpoint for instant response
+        const response = await fetch('/api/chat/stream', {
+            method: 'POST',
+            headers: {
+                'Authorization': `Bearer ${state.token}`,
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({
+                message: message,
+                bucket_id: state.chatBucket || null,
+                chat_id: state.currentChatId
+            }),
+            signal: state.streamAbortController.signal
+        });
+        if (!response.ok) {
+            throw new Error('Stream request failed');
+        }
+        elements.typingIndicator.classList.add('hidden');
+        // Create a placeholder message for streaming
+        let streamingContent = '';
+        let sources = [];
+        // Add empty assistant message and get reference to its content element
+        state.messages.push({ role: 'assistant', content: '', sources: [] });
+        renderMessages();
+        scrollToBottom();
+        // Get direct reference to the streaming message element for fast updates
+        const messageElements = elements.chatMessages.querySelectorAll('.message.assistant .message-content');
+        const streamingElement = messageElements[messageElements.length - 1];
+        const reader = response.body.getReader();
+        const decoder = new TextDecoder();
+        // Throttle DOM updates for smooth rendering (update every 50ms max)
+        let lastUpdateTime = 0;
+        let pendingUpdate = false;
+        const UPDATE_INTERVAL = 50; // ms
+        while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            const text = decoder.decode(value);
+            const lines = text.split('\n');
+            for (const line of lines) {
+                if (line.startsWith('data: ')) {
+                    try {
+                        const data = JSON.parse(line.slice(6));
+                        if (data.type === 'sources') {
+                            sources = data.sources || [];
+                        } else if (data.type === 'chunk' || data.type === 'content') {
+                            // Support both 'chunk' (legacy) and 'content' (specialized queries)
+                            streamingContent += data.content;
+                            // Update state for saving later
+                            state.messages[state.messages.length - 1].content = streamingContent;
+                            state.messages[state.messages.length - 1].sources = sources;
+                            // Throttled DOM update for smooth rendering
+                            const now = Date.now();
+                            if (now - lastUpdateTime >= UPDATE_INTERVAL) {
+                                if (streamingElement) {
+                                    streamingElement.innerHTML = formatContent(streamingContent);
+                                }
+                                lastUpdateTime = now;
+                                pendingUpdate = false;
+                            } else {
+                                pendingUpdate = true;
+                            }
+                            // No auto-scroll during streaming - stay at current position
+                        } else if (data.type === 'done') {
+                            // Final update with any pending content
+                            if (pendingUpdate && streamingElement) {
+                                streamingElement.innerHTML = formatContent(streamingContent);
+                            }
+                            // Streaming complete - do final render for proper formatting
+                            renderMessages();
+                            saveCurrentChat();
+                            // No auto-scroll - user stays at current position
+                        } else if (data.type === 'error') {
+                            state.messages[state.messages.length - 1].content = data.content || 'Error generating response';
+                            renderMessages();
+                        }
+                    } catch (e) {
+                        // Skip malformed JSON
+                    }
+                }
+            }
+        }
+    } catch (err) {
+        elements.typingIndicator.classList.add('hidden');
+        // Only show error if not aborted by user
+        if (err.name !== 'AbortError') {
+            addMessageToChat(targetChatId, 'assistant', 'Connection error. Please try again.');
+        }
+    }
+    // Cleanup: hide stop button, show send button
+    elements.stopBtn.classList.add('hidden');
+    elements.sendBtn.classList.remove('hidden');
+    state.streamAbortController = null;
+    state.isLoading = false;
+    // No auto-scroll - user stays at current position
+}
+function addMessage(role, content, sources = []) {
+    // Create a new chat ID if this is the first message
+    if (state.messages.length === 0 && !state.currentChatId) {
+        state.currentChatId = Date.now().toString();
+    }
+    state.messages.push({ role, content, sources });
+    renderMessages();
+    // Auto-save after assistant responds (complete exchange)
+    if (role === 'assistant') {
+        saveCurrentChat();
+    }
+}
+// Add message to a specific chat (handles case where user switched chats during loading)
+function addMessageToChat(chatId, role, content, sources = []) {
+    // If this is the current chat, add directly
+    if (chatId === state.currentChatId) {
+        state.messages.push({ role, content, sources });
+        renderMessages();
+        saveCurrentChat();
+    } else {
+        // Add to the chat in history
+        const chatIndex = state.chatHistory.findIndex(c => c.id === chatId);
+        if (chatIndex >= 0) {
+            state.chatHistory[chatIndex].messages.push({ role, content, sources });
+            saveChatHistory();
+            syncChatToServer(state.chatHistory[chatIndex]);
+            renderChatHistory();
+            showToast('Response added to previous chat', 'info');
+        }
+    }
+}
+function renderMessages() {
+    // Preserve summary panel state before re-rendering
+    const summaryVisible = !elements.summaryPanel.classList.contains('hidden');
+    const summaryTitle = elements.summaryTitle.textContent;
+    const summaryText = elements.summaryText.textContent;
+    if (state.messages.length === 0) {
+        // Clear chat messages and show welcome screen
+        elements.chatMessages.innerHTML = '';
+        elements.welcomeScreen.classList.remove('hidden');
+        elements.chatMessages.appendChild(elements.welcomeScreen);
+        // Re-show summary if it was visible
+        if (summaryVisible) {
+            elements.summaryPanel.classList.remove('hidden');
+        }
+        return;
+    }
+    elements.welcomeScreen.classList.add('hidden');
+    const html = state.messages.map((msg, i) => {
+        const avatar = msg.role === 'user' ? (state.user?.username?.charAt(0).toUpperCase() || 'U') : '🧠';
+        return `<div class="message ${msg.role}"><div class="message-avatar">${avatar}</div><div class="message-content">${formatContent(msg.content)}</div></div>`;
+    }).join('');
+    // Build full content with summary panel and welcome screen
+    const summaryPanelHTML = `
+        <div class="summary-panel ${summaryVisible ? '' : 'hidden'}" id="summaryPanel">
+            <div class="summary-header">
+                <span class="summary-icon">📄</span>
+                <span class="summary-title" id="summaryTitle">${summaryTitle}</span>
+            </div>
+            <div class="summary-content" id="summaryContent">
+                <div class="summary-text" id="summaryText">${summaryText}</div>
+            </div>
+            <button class="summary-close" id="summaryClose" title="Close summary">✕</button>
+        </div>
+    `;
+    elements.chatMessages.innerHTML = summaryPanelHTML + html + elements.welcomeScreen.outerHTML;
+    document.getElementById('welcomeScreen')?.classList.add('hidden');
+    // Re-bind summary panel elements and event listener
+    elements.summaryPanel = document.getElementById('summaryPanel');
+    elements.summaryTitle = document.getElementById('summaryTitle');
+    elements.summaryText = document.getElementById('summaryText');
+    elements.summaryClose = document.getElementById('summaryClose');
+    elements.summaryClose.addEventListener('click', hideSummary);
+}
+function formatContent(content) {
+    // Enhanced markdown parsing for beautiful formatting
+    let html = content;
+    // Escape HTML special characters first (except for already parsed markdown)
+    // Skip this if content looks like it's already HTML
+    if (!html.includes('<table') && !html.includes('<div')) {
+        // Don't escape - let markdown do its thing
+    }
+    // Code blocks: ```code```
+    html = html.replace(/```(\w*)\n?([\s\S]*?)```/g, (match, lang, code) => {
+        return `<pre class="code-block${lang ? ' lang-' + lang : ''}"><code>${code.trim()}</code></pre>`;
+    });
+    // Tables: | Header | Header |
+    html = html.replace(/(?:^|\n)(\|.+\|)\n(\|[-:\s|]+\|)\n((?:\|.+\|\n?)+)/gm, (match, headerRow, sepRow, bodyRows) => {
+        const headers = headerRow.split('|').filter(cell => cell.trim()).map(cell =>
+            `<th>${cell.trim()}</th>`
+        ).join('');
+        const rows = bodyRows.trim().split('\n').map(row => {
+            const cells = row.split('|').filter(cell => cell.trim()).map(cell =>
+                `<td>${cell.trim()}</td>`
+            ).join('');
+            return `<tr>${cells}</tr>`;
+        }).join('');
+        return `<div class="table-wrapper"><table><thead><tr>${headers}</tr></thead><tbody>${rows}</tbody></table></div>`;
+    });
+    // Headers: ### Header, ## Header, # Header
+    html = html.replace(/^#### (.+)$/gm, '<h4>$1</h4>');
+    html = html.replace(/^### (.+)$/gm, '<h3>$1</h3>');
+    html = html.replace(/^## (.+)$/gm, '<h2>$1</h2>');
+    html = html.replace(/^# (.+)$/gm, '<h1>$1</h1>');
+    // Bold headers at start of line (NotebookLM style)
+    html = html.replace(/^(\*\*[^*]+\*\*):?\s*$/gm, '<h4>$1</h4>');
+    // Bold text: **text**
+    html = html.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
+    // Italic text: *text*
+    html = html.replace(/(?<!\*)\*([^*]+)\*(?!\*)/g, '<em>$1</em>');
+    // Inline code: `code`
+    html = html.replace(/`([^`]+)`/g, '<code class="inline-code">$1</code>');
+    // Horizontal rule: --- or ***
+    html = html.replace(/^[-*]{3,}$/gm, '<hr class="divider">');
+    // Numbered lists: 1. Item, 2. Item, etc.
+    html = html.replace(/^(\d+)\.\s+(.+)$/gm, '<li class="numbered"><span class="list-num">$1.</span> $2</li>');
+    // Bullet points: • Item or - Item or * Item at start of line
+    html = html.replace(/^[\•\-\*]\s+(.+)$/gm, '<li class="bullet">$1</li>');
+    // Sub-bullets with indentation (2+ spaces before bullet)
+    html = html.replace(/^[\s]{2,}[\•\-\*]\s+(.+)$/gm, '<li class="sub-bullet">$1</li>');
+    // Wrap consecutive numbered list items
+    html = html.replace(/(<li class="numbered">[\s\S]*?<\/li>\n?)+/g, '<ol class="formatted-list">$&</ol>');
+    // Wrap consecutive bullet items
+    html = html.replace(/(<li class="bullet">[\s\S]*?<\/li>\n?)+/g, '<ul class="formatted-list">$&</ul>');
+    // Wrap consecutive sub-bullet items
+    html = html.replace(/(<li class="sub-bullet">[\s\S]*?<\/li>\n?)+/g, '<ul class="formatted-list sub-list">$&</ul>');
+    // Blockquotes: > text
+    html = html.replace(/^>\s+(.+)$/gm, '<blockquote>$1</blockquote>');
+    // Merge consecutive blockquotes
+    html = html.replace(/<\/blockquote>\n<blockquote>/g, '<br>');
+    // Double newlines become paragraph breaks
+    html = html.replace(/\n\n+/g, '</p><p>');
+    // Single newlines become line breaks (but not inside lists)
+    html = html.replace(/\n/g, '<br>');
+    // Clean up br tags in lists, headers, tables
+    html = html.replace(/<br><li/g, '<li');
+    html = html.replace(/<\/li><br>/g, '</li>');
+    html = html.replace(/<br><h/g, '<h');
+    html = html.replace(/<\/h(\d)><br>/g, '</h$1>');
+    html = html.replace(/<br><ul/g, '<ul');
+    html = html.replace(/<br><ol/g, '<ol');
+    html = html.replace(/<\/ul><br>/g, '</ul>');
+    html = html.replace(/<\/ol><br>/g, '</ol>');
+    html = html.replace(/<br><table/g, '<table');
+    html = html.replace(/<\/table><br>/g, '</table>');
+    html = html.replace(/<br><div class="table/g, '<div class="table');
+    html = html.replace(/<\/div><br>/g, '</div>');
+    html = html.replace(/<br><pre/g, '<pre');
+    html = html.replace(/<\/pre><br>/g, '</pre>');
+    html = html.replace(/<br><hr/g, '<hr');
+    html = html.replace(/<hr[^>]*><br>/g, '<hr class="divider">');
+    html = html.replace(/<br><blockquote/g, '<blockquote');
+    html = html.replace(/<\/blockquote><br>/g, '</blockquote>');
+    // Wrap in paragraph
+    html = '<p>' + html + '</p>';
+    // Clean up empty paragraphs
+    html = html.replace(/<p><\/p>/g, '');
+    html = html.replace(/<p>(\s|<br>)*<\/p>/g, '');
+    html = html.replace(/<p><(h\d|ul|ol|table|div|pre|hr|blockquote)/g, '<$1');
+    html = html.replace(/<\/(h\d|ul|ol|table|div|pre|blockquote)><\/p>/g, '</$1>');
+    html = html.replace(/<p><hr/g, '<hr');
+    return html;
+}
+function scrollToBottom() {
+    elements.chatMessages.scrollTop = elements.chatMessages.scrollHeight;
+}
+// ==================== Token Verification ====================
+async function verifyToken() {
+    if (!state.token) { showAuthModal(); return; }
+    try {
+        const response = await fetch('/api/auth/verify', { headers: { 'Authorization': `Bearer ${state.token}` } });
+        if (response.ok) {
+            const data = await response.json();
+            state.user = data;
+            localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
+            updateAuthUI();
+            loadBuckets();
+            loadDocuments();
+            // Load chat history from server database
+            loadChatHistoryFromServer();
+        } else {
+            state.token = null;
+            state.user = null;
+            localStorage.removeItem('Iribl AI_token');
+            localStorage.removeItem('Iribl AI_user');
+            showAuthModal();
+        }
+    } catch { showAuthModal(); }
+}
+// ==================== Chat History ====================
+function generateChatTopic(messages) {
+    // Get the first user message as the topic
+    const firstUserMsg = messages.find(m => m.role === 'user');
+    if (firstUserMsg) {
+        // Truncate to first 40 chars
+        let topic = firstUserMsg.content.substring(0, 40);
+        if (firstUserMsg.content.length > 40) topic += '...';
+        return topic;
+    }
+    return 'New Conversation';
+}
+function saveChatHistory() {
+    localStorage.setItem('Iribl AI_chat_history', JSON.stringify(state.chatHistory));
+}
+// Sync chat to server
+async function syncChatToServer(chatData) {
+    if (!state.token) return;
+    try {
+        await fetch('/api/chats', {
+            method: 'POST',
+            headers: {
+                'Authorization': `Bearer ${state.token}`,
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify(chatData)
+        });
+    } catch (error) {
+        console.error('Failed to sync chat to server:', error);
+    }
+}
+// Load chat history from server
+async function loadChatHistoryFromServer() {
+    if (!state.token) return;
+    try {
+        const response = await fetch('/api/chats', {
+            headers: { 'Authorization': `Bearer ${state.token}` }
+        });
+        if (response.ok) {
+            const data = await response.json();
+            if (data.chats && data.chats.length > 0) {
+                // Merge server chats with local (server takes priority)
+                state.chatHistory = data.chats;
+                saveChatHistory();  // Update local storage
+                renderChatHistory();
+            }
+        }
+    } catch (error) {
+        console.error('Failed to load chats from server:', error);
+    }
+}
+// Delete chat from server
+async function deleteChatFromServer(chatId) {
+    if (!state.token) return;
+    try {
+        await fetch(`/api/chats/${chatId}`, {
+            method: 'DELETE',
+            headers: { 'Authorization': `Bearer ${state.token}` }
+        });
+    } catch (error) {
+        console.error('Failed to delete chat from server:', error);
+    }
+}
+function saveCurrentChat() {
+    // Only save if there are messages
+    if (state.messages.length === 0) return null;
+    const chatId = state.currentChatId || Date.now().toString();
+    const topic = generateChatTopic(state.messages);
+    // Check if this chat already exists
+    const existingIndex = state.chatHistory.findIndex(c => c.id === chatId);
+    const chatData = {
+        id: chatId,
+        topic: topic,
+        messages: [...state.messages],
+        timestamp: Date.now(),
+        bucket: state.chatBucket
+    };
+    if (existingIndex >= 0) {
+        // Update existing chat
+        state.chatHistory[existingIndex] = chatData;
+    } else {
+        // Add new chat at the beginning
+        state.chatHistory.unshift(chatData);
+    }
+    saveChatHistory();
+    renderChatHistory();
+    // Sync to server
+    syncChatToServer(chatData);
+    return chatId;
+}
+function startNewChat() {
+    // Warn if AI is still generating
+    if (state.isLoading) {
+        showToast('AI is still responding - response will go to current chat', 'info');
+    }
+    // Save current chat first if it has messages
+    if (state.messages.length > 0) {
+        saveCurrentChat();
+    }
+    // Clear current chat
+    state.messages = [];
+    state.currentChatId = null;
+    // Reset UI
+    renderMessages();
+    elements.welcomeScreen.classList.remove('hidden');
+    hideSummary();
+    renderChatHistory();
+    showToast('Started new chat', 'info');
+}
+function loadChatFromHistory(chatId) {
+    // Warn if AI is still generating
+    if (state.isLoading) {
+        showToast('AI is still responding - response will go to current chat', 'info');
+    }
+    // Save current chat first if it has messages
+    if (state.messages.length > 0 && state.currentChatId !== chatId) {
+        saveCurrentChat();
+    }
+    const chat = state.chatHistory.find(c => c.id === chatId);
+    if (!chat) return;
+    // Load the chat
+    state.messages = [...chat.messages];
+    state.currentChatId = chat.id;
+    state.chatBucket = chat.bucket || '';
+    // Update bucket dropdown
+    if (elements.chatBucketSelect) {
+        elements.chatBucketSelect.value = state.chatBucket;
+        const bucketName = state.chatBucket ?
+            state.buckets.find(b => b.bucket_id === state.chatBucket)?.name || 'Selected Bucket' :
+            'All Documents';
+        elements.chatBucketTrigger.querySelector('.select-value').textContent = bucketName;
+    }
+    // Render messages
+    renderMessages();
+    // Show/hide welcome screen based on whether chat has messages
+    if (state.messages.length === 0) {
+        elements.welcomeScreen.classList.remove('hidden');
+    } else {
+        elements.welcomeScreen.classList.add('hidden');
+    }
+    renderChatHistory();
+    scrollToBottom();
+}
+function deleteChatFromHistory(chatId) {
+    event.stopPropagation();
+    state.chatHistory = state.chatHistory.filter(c => c.id !== chatId);
+    // If deleting current chat, clear it
+    if (state.currentChatId === chatId) {
+        state.messages = [];
+        state.currentChatId = null;
+        renderMessages();
+        elements.welcomeScreen.classList.remove('hidden');
+    }
+    saveChatHistory();
+    renderChatHistory();
+    // Delete from server
+    deleteChatFromServer(chatId);
+    showToast('Chat deleted', 'success');
+}
+function renderChatHistory() {
+    // Filter chats by selected bucket
+    let filteredChats = state.chatHistory;
+    if (state.selectedBucket) {
+        filteredChats = state.chatHistory.filter(chat =>
+            chat.bucket === state.selectedBucket ||
+            // Also include chats with no bucket for backwards compatibility
+            (!chat.bucket && !state.selectedBucket)
+        );
+    }
+    const count = filteredChats.length;
+    const totalCount = state.chatHistory.length;
+    // Show filtered count vs total if filtering is active
+    elements.chatHistoryCount.textContent = state.selectedBucket && count !== totalCount ?
+        `(${count}/${totalCount})` : `(${totalCount})`;
+    if (count === 0) {
+        elements.chatHistoryList.innerHTML = state.selectedBucket ?
+            `<div class="empty-state small"><div class="empty-text">No chats in this bucket</div></div>` :
+            `<div class="empty-state small"><div class="empty-text">No chats yet</div></div>`;
+        return;
+    }
+    elements.chatHistoryList.innerHTML = filteredChats.map(chat => {
+        const isActive = state.currentChatId === chat.id;
+        const date = formatDate(chat.timestamp / 1000);
+        return `
+            <div class="chat-history-item ${isActive ? 'active' : ''}" onclick="loadChatFromHistory('${chat.id}')">
+                <span class="chat-history-icon">💬</span>
+                <div class="chat-history-info">
+                    <div class="chat-history-topic">${chat.topic}</div>
+                    <div class="chat-history-date">${date}</div>
+                </div>
+                <button class="btn btn-ghost chat-history-delete" onclick="deleteChatFromHistory('${chat.id}')" title="Delete">🗑️</button>
+            </div>
+        `;
+    }).join('');
+}
+function clearCurrentChat() {
+    // Warn if AI is still generating
+    if (state.isLoading) {
+        showToast('AI is still responding - response will go to current chat', 'info');
+    }
+    // If there's a current chat, clear its messages but keep it in history
+    if (state.currentChatId) {
+        const chatIndex = state.chatHistory.findIndex(c => c.id === state.currentChatId);
+        if (chatIndex >= 0) {
+            // Clear the messages in history
+            state.chatHistory[chatIndex].messages = [];
+            saveChatHistory();
+            // Sync cleared chat to server
+            syncChatToServer(state.chatHistory[chatIndex]);
+        }
+    }
+    // Clear current chat messages
+    state.messages = [];
+    // Reset UI
+    renderMessages();
+    elements.welcomeScreen.classList.remove('hidden');
+    hideSummary();
+    renderChatHistory();
+    showToast('Chat cleared', 'info');
+}
+function initChatHistory() {
+    // New Chat button handler
+    elements.newChatBtn.addEventListener('click', startNewChat);
+    // Clear Chat button handler (sidebar)
+    elements.clearChatBtn.addEventListener('click', (e) => {
+        e.stopPropagation();
+        clearCurrentChat();
+    });
+    // Clear Chat button handler (top)
+    elements.clearChatBtnTop.addEventListener('click', clearCurrentChat);
+    // Render existing history
+    renderChatHistory();
+    // Auto-save current chat when sending messages (hook into sendMessage)
+    // This is handled by updating currentChatId after first message
+}
+// ==================== Init ====================
+function init() {
+    initSidebars();
+    initMobileNavigation();
+    initCollapsible();
+    initCustomDropdowns();
+    initUpload();
+    initChat();
+    initSummaryPanel();
+    initChatHistory();
+    verifyToken();
+}
+document.addEventListener('DOMContentLoaded', init);

test_chroma.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# Test ChromaDB Cloud connection
+import chromadb
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Use CloudClient - the correct way to connect to ChromaDB Cloud
+client = chromadb.CloudClient(
+    tenant="jash_doshi_211294",
+    database="visionextract",
+    api_key=os.getenv("CHROMA_API_KEY")
+)
+print("Connected successfully!")
+print("Collections:", client.list_collections())