Spaces:
Running
Running
jashdoshi77
commited on
Commit
·
64deb3c
0
Parent(s):
feat: Add AI-powered query understanding with DeepSeek parsing
Browse files- .agent/workflows/push-to-huggingface.md +24 -0
- .env.example +21 -0
- .gitignore +0 -0
- Dockerfile +29 -0
- README.md +31 -0
- app.py +732 -0
- config.py +48 -0
- find_buckets.py +25 -0
- find_users.py +21 -0
- migrate_metadata.py +273 -0
- requirements.txt +15 -0
- services/__init__.py +1 -0
- services/auth_service.py +177 -0
- services/chroma_service.py +1009 -0
- services/date_parser.py +285 -0
- services/document_processor.py +336 -0
- services/metadata_extractor.py +446 -0
- services/number_extractor.py +302 -0
- services/ocr_service.py +231 -0
- services/rag_service.py +1870 -0
- static/css/styles.css +2567 -0
- static/images/WhatsApp Image 2025-12-23 at 5.10.00 PM.jpeg +0 -0
- static/index.html +411 -0
- static/js/app.js +1798 -0
- test_chroma.py +16 -0
.agent/workflows/push-to-huggingface.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
description: Push changes to Hugging Face Spaces
|
| 3 |
+
---
|
| 4 |
+
|
| 5 |
+
# Push to Hugging Face
|
| 6 |
+
|
| 7 |
+
// turbo-all
|
| 8 |
+
|
| 9 |
+
1. Stage all changes:
|
| 10 |
+
```bash
|
| 11 |
+
git add .
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
2. Commit with a message:
|
| 15 |
+
```bash
|
| 16 |
+
git commit -m "Your commit message here"
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
3. Push to Hugging Face:
|
| 20 |
+
```bash
|
| 21 |
+
git push https://jashdoshi77:YOUR_HF_TOKEN@huggingface.co/spaces/jashdoshi77/notebooklm-fast master:main
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
**Note**: Replace `YOUR_HF_TOKEN` with your Hugging Face token. The Space will automatically rebuild after pushing.
|
.env.example
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Groq API (Ultra-fast inference) - Get key from https://console.groq.com
|
| 2 |
+
GROQ_API_KEY=your_groq_api_key_here
|
| 3 |
+
|
| 4 |
+
# Mistral AI API (for OCR) - Get key from https://console.mistral.ai/
|
| 5 |
+
MISTRAL_API_KEY=your_mistral_api_key_here
|
| 6 |
+
|
| 7 |
+
# OpenRouter API (fallback)
|
| 8 |
+
OPENROUTER_API_KEY=your_openrouter_api_key_here
|
| 9 |
+
|
| 10 |
+
# ChromaDB Cloud Configuration
|
| 11 |
+
# Get your API key from https://www.trychroma.com/
|
| 12 |
+
CHROMA_API_KEY=your_chromadb_api_key_here
|
| 13 |
+
CHROMA_TENANT=your_tenant_id
|
| 14 |
+
CHROMA_DATABASE=your_database_name
|
| 15 |
+
|
| 16 |
+
# JWT Secret (change in production)
|
| 17 |
+
JWT_SECRET=Iribl AI-secret-key-change-me-in-production
|
| 18 |
+
|
| 19 |
+
# App Configuration
|
| 20 |
+
FLASK_ENV=development
|
| 21 |
+
FLASK_DEBUG=True
|
.gitignore
ADDED
|
Binary file (258 Bytes). View file
|
|
|
Dockerfile
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies for PyMuPDF and other packages
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
libffi-dev \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
# Copy requirements first for Docker layer caching
|
| 12 |
+
COPY requirements.txt .
|
| 13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
+
|
| 15 |
+
# Copy application code
|
| 16 |
+
COPY . .
|
| 17 |
+
|
| 18 |
+
# Create necessary directories
|
| 19 |
+
RUN mkdir -p uploads chroma_data
|
| 20 |
+
|
| 21 |
+
# Hugging Face Spaces uses port 7860
|
| 22 |
+
EXPOSE 7860
|
| 23 |
+
|
| 24 |
+
# Set environment variables
|
| 25 |
+
ENV FLASK_ENV=production
|
| 26 |
+
ENV PYTHONUNBUFFERED=1
|
| 27 |
+
|
| 28 |
+
# Run with gunicorn for production
|
| 29 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--worker-class", "gthread", "--threads", "4", "--workers", "2", "--timeout", "1200", "--access-logfile", "-", "--error-logfile", "-", "app:app"]
|
README.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: NotebookLM Fast
|
| 3 |
+
emoji: 📚
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# NotebookLM Fast
|
| 11 |
+
|
| 12 |
+
AI-powered document intelligence platform with RAG (Retrieval Augmented Generation).
|
| 13 |
+
|
| 14 |
+
## Features
|
| 15 |
+
|
| 16 |
+
- 📄 Upload PDFs, Word docs, Excel files, PowerPoints, and images
|
| 17 |
+
- 💬 Chat with your documents using AI
|
| 18 |
+
- 🗂️ Organize documents in buckets
|
| 19 |
+
- 👥 Admin/Employee role management
|
| 20 |
+
- 🔒 Secure authentication
|
| 21 |
+
|
| 22 |
+
## Environment Variables
|
| 23 |
+
|
| 24 |
+
Set these as secrets in your Hugging Face Space settings:
|
| 25 |
+
|
| 26 |
+
- `OPENROUTER_API_KEY` - Your OpenRouter API key
|
| 27 |
+
- `GROQ_API_KEY` - Your Groq API key
|
| 28 |
+
- `CHROMA_API_KEY` - Your ChromaDB Cloud API key
|
| 29 |
+
- `CHROMA_TENANT` - Your ChromaDB tenant ID
|
| 30 |
+
- `CHROMA_DATABASE` - Your ChromaDB database name
|
| 31 |
+
- `JWT_SECRET` - Secret key for JWT tokens
|
app.py
ADDED
|
@@ -0,0 +1,732 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
NotebookLM Clone - Main Flask Application
|
| 3 |
+
AI-powered document intelligence platform with RAG
|
| 4 |
+
Supports Admin/Employee roles and Bucket organization
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import uuid
|
| 9 |
+
from functools import wraps
|
| 10 |
+
from flask import Flask, request, jsonify, send_from_directory, send_file, Response
|
| 11 |
+
from flask_cors import CORS
|
| 12 |
+
from werkzeug.utils import secure_filename
|
| 13 |
+
|
| 14 |
+
from config import Config
|
| 15 |
+
from services.auth_service import auth_service
|
| 16 |
+
from services.document_processor import document_processor
|
| 17 |
+
from services.chroma_service import chroma_service
|
| 18 |
+
from services.rag_service import rag_service
|
| 19 |
+
from services.metadata_extractor import metadata_extractor
|
| 20 |
+
|
| 21 |
+
# Initialize Flask app
|
| 22 |
+
app = Flask(__name__, static_folder='static')
|
| 23 |
+
app.config['MAX_CONTENT_LENGTH'] = Config.MAX_CONTENT_LENGTH
|
| 24 |
+
CORS(app)
|
| 25 |
+
|
| 26 |
+
# Ensure upload directory exists
|
| 27 |
+
os.makedirs(Config.UPLOAD_FOLDER, exist_ok=True)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# ==================== Auth Decorators ====================
|
| 31 |
+
|
| 32 |
+
def require_auth(f):
|
| 33 |
+
"""Decorator to require authentication"""
|
| 34 |
+
@wraps(f)
|
| 35 |
+
def decorated(*args, **kwargs):
|
| 36 |
+
auth_header = request.headers.get('Authorization')
|
| 37 |
+
|
| 38 |
+
if not auth_header or not auth_header.startswith('Bearer '):
|
| 39 |
+
return jsonify({"error": "Missing or invalid authorization header"}), 401
|
| 40 |
+
|
| 41 |
+
token = auth_header.split(' ')[1]
|
| 42 |
+
user = auth_service.get_current_user(token)
|
| 43 |
+
|
| 44 |
+
if not user:
|
| 45 |
+
return jsonify({"error": "Invalid or expired token"}), 401
|
| 46 |
+
|
| 47 |
+
request.current_user = user
|
| 48 |
+
return f(*args, **kwargs)
|
| 49 |
+
|
| 50 |
+
return decorated
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def require_admin(f):
|
| 54 |
+
"""Decorator to require admin role"""
|
| 55 |
+
@wraps(f)
|
| 56 |
+
def decorated(*args, **kwargs):
|
| 57 |
+
auth_header = request.headers.get('Authorization')
|
| 58 |
+
|
| 59 |
+
if not auth_header or not auth_header.startswith('Bearer '):
|
| 60 |
+
return jsonify({"error": "Missing or invalid authorization header"}), 401
|
| 61 |
+
|
| 62 |
+
token = auth_header.split(' ')[1]
|
| 63 |
+
user = auth_service.get_current_user(token)
|
| 64 |
+
|
| 65 |
+
if not user:
|
| 66 |
+
return jsonify({"error": "Invalid or expired token"}), 401
|
| 67 |
+
|
| 68 |
+
if user.get('role') != 'admin':
|
| 69 |
+
return jsonify({"error": "Admin access required"}), 403
|
| 70 |
+
|
| 71 |
+
request.current_user = user
|
| 72 |
+
return f(*args, **kwargs)
|
| 73 |
+
|
| 74 |
+
return decorated
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# ==================== Static Routes ====================
|
| 78 |
+
|
| 79 |
+
@app.route('/')
|
| 80 |
+
def index():
|
| 81 |
+
return send_from_directory(app.static_folder, 'index.html')
|
| 82 |
+
|
| 83 |
+
@app.route('/<path:path>')
|
| 84 |
+
def serve_static(path):
|
| 85 |
+
return send_from_directory(app.static_folder, path)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# ==================== Auth Routes ====================
|
| 89 |
+
|
| 90 |
+
@app.route('/api/auth/register/admin', methods=['POST'])
|
| 91 |
+
def register_admin():
|
| 92 |
+
data = request.get_json()
|
| 93 |
+
|
| 94 |
+
if not data:
|
| 95 |
+
return jsonify({"error": "No data provided"}), 400
|
| 96 |
+
|
| 97 |
+
username = data.get('username', '').strip()
|
| 98 |
+
password = data.get('password', '')
|
| 99 |
+
email = data.get('email', '').strip()
|
| 100 |
+
|
| 101 |
+
result = auth_service.register_admin(username, password, email)
|
| 102 |
+
|
| 103 |
+
if result['success']:
|
| 104 |
+
return jsonify({
|
| 105 |
+
"token": result['token'],
|
| 106 |
+
"user_id": result['user_id'],
|
| 107 |
+
"username": result['username'],
|
| 108 |
+
"role": result['role']
|
| 109 |
+
})
|
| 110 |
+
else:
|
| 111 |
+
return jsonify({"error": result['error']}), 400
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
@app.route('/api/auth/login', methods=['POST'])
|
| 115 |
+
def login():
|
| 116 |
+
data = request.get_json()
|
| 117 |
+
|
| 118 |
+
if not data:
|
| 119 |
+
return jsonify({"error": "No data provided"}), 400
|
| 120 |
+
|
| 121 |
+
username = data.get('username', '').strip()
|
| 122 |
+
password = data.get('password', '')
|
| 123 |
+
role = data.get('role', 'admin')
|
| 124 |
+
|
| 125 |
+
result = auth_service.login(username, password, role)
|
| 126 |
+
|
| 127 |
+
if result['success']:
|
| 128 |
+
return jsonify({
|
| 129 |
+
"token": result['token'],
|
| 130 |
+
"user_id": result['user_id'],
|
| 131 |
+
"username": result['username'],
|
| 132 |
+
"role": result['role']
|
| 133 |
+
})
|
| 134 |
+
else:
|
| 135 |
+
return jsonify({"error": result['error']}), 401
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
@app.route('/api/auth/verify', methods=['GET'])
|
| 139 |
+
@require_auth
|
| 140 |
+
def verify_token():
|
| 141 |
+
return jsonify({
|
| 142 |
+
"user_id": request.current_user['user_id'],
|
| 143 |
+
"username": request.current_user['username'],
|
| 144 |
+
"role": request.current_user.get('role', 'admin')
|
| 145 |
+
})
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
# ==================== Admin Employee Management ====================
|
| 149 |
+
|
| 150 |
+
@app.route('/api/admin/employees', methods=['GET'])
|
| 151 |
+
@require_admin
|
| 152 |
+
def list_employees():
|
| 153 |
+
employees = auth_service.get_admin_employees(request.current_user['user_id'])
|
| 154 |
+
return jsonify({"employees": employees})
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
@app.route('/api/admin/employees', methods=['POST'])
|
| 158 |
+
@require_admin
|
| 159 |
+
def add_employee():
|
| 160 |
+
data = request.get_json()
|
| 161 |
+
|
| 162 |
+
if not data:
|
| 163 |
+
return jsonify({"error": "No data provided"}), 400
|
| 164 |
+
|
| 165 |
+
email = data.get('email', '').strip()
|
| 166 |
+
password = data.get('password', '')
|
| 167 |
+
|
| 168 |
+
result = auth_service.register_employee(
|
| 169 |
+
admin_user_id=request.current_user['user_id'],
|
| 170 |
+
email=email,
|
| 171 |
+
password=password
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
if result['success']:
|
| 175 |
+
return jsonify({"user_id": result['user_id'], "email": result['email']})
|
| 176 |
+
else:
|
| 177 |
+
return jsonify({"error": result['error']}), 400
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
@app.route('/api/admin/employees/<employee_id>', methods=['DELETE'])
|
| 181 |
+
@require_admin
|
| 182 |
+
def delete_employee(employee_id):
|
| 183 |
+
success = auth_service.delete_employee(
|
| 184 |
+
admin_user_id=request.current_user['user_id'],
|
| 185 |
+
employee_id=employee_id
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
if success:
|
| 189 |
+
return jsonify({"success": True})
|
| 190 |
+
else:
|
| 191 |
+
return jsonify({"error": "Employee not found or access denied"}), 404
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
# ==================== Bucket Routes ====================
|
| 195 |
+
|
| 196 |
+
@app.route('/api/buckets', methods=['GET'])
|
| 197 |
+
@require_auth
|
| 198 |
+
def list_buckets():
|
| 199 |
+
"""List all buckets for current user"""
|
| 200 |
+
buckets = chroma_service.get_user_buckets(request.current_user['user_id'])
|
| 201 |
+
return jsonify({"buckets": buckets})
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
@app.route('/api/buckets', methods=['POST'])
|
| 205 |
+
@require_auth
|
| 206 |
+
def create_bucket():
|
| 207 |
+
"""Create a new bucket"""
|
| 208 |
+
data = request.get_json()
|
| 209 |
+
|
| 210 |
+
if not data or not data.get('name'):
|
| 211 |
+
return jsonify({"error": "Bucket name is required"}), 400
|
| 212 |
+
|
| 213 |
+
name = data.get('name', '').strip()
|
| 214 |
+
description = data.get('description', '').strip()
|
| 215 |
+
|
| 216 |
+
result = chroma_service.create_bucket(
|
| 217 |
+
user_id=request.current_user['user_id'],
|
| 218 |
+
name=name,
|
| 219 |
+
description=description
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
return jsonify(result)
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
@app.route('/api/buckets/<bucket_id>', methods=['DELETE'])
|
| 226 |
+
@require_auth
|
| 227 |
+
def delete_bucket(bucket_id):
|
| 228 |
+
"""Delete a bucket"""
|
| 229 |
+
success = chroma_service.delete_bucket(
|
| 230 |
+
bucket_id=bucket_id,
|
| 231 |
+
user_id=request.current_user['user_id']
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
if success:
|
| 235 |
+
return jsonify({"success": True})
|
| 236 |
+
else:
|
| 237 |
+
return jsonify({"error": "Bucket not found or access denied"}), 404
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
# ==================== Document Routes ====================
|
| 241 |
+
|
| 242 |
+
# ==================== Async Processing ====================
|
| 243 |
+
|
| 244 |
+
# Global status store: doc_id -> {status, progress, message, result, error}
|
| 245 |
+
processing_status = {}
|
| 246 |
+
|
| 247 |
+
def process_document_background(doc_id, user_id, file_path, filename, bucket_id):
|
| 248 |
+
"""Background task for processing documents"""
|
| 249 |
+
import threading
|
| 250 |
+
|
| 251 |
+
try:
|
| 252 |
+
processing_status[doc_id] = {
|
| 253 |
+
"status": "processing",
|
| 254 |
+
"progress": 10,
|
| 255 |
+
"message": "Starting processing..."
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
print(f"[BACKGROUND] Processing file: {filename}")
|
| 259 |
+
|
| 260 |
+
# Step 1: Text Extraction (OCR)
|
| 261 |
+
processing_status[doc_id]["message"] = "Extracting text (OCR)..."
|
| 262 |
+
processing_status[doc_id]["progress"] = 20
|
| 263 |
+
|
| 264 |
+
result = document_processor.process(file_path, filename)
|
| 265 |
+
|
| 266 |
+
if not result['success']:
|
| 267 |
+
processing_status[doc_id] = {
|
| 268 |
+
"status": "failed",
|
| 269 |
+
"error": result['error']
|
| 270 |
+
}
|
| 271 |
+
if os.path.exists(file_path):
|
| 272 |
+
os.remove(file_path)
|
| 273 |
+
return
|
| 274 |
+
|
| 275 |
+
processing_status[doc_id]["progress"] = 50
|
| 276 |
+
processing_status[doc_id]["message"] = "Storing document..."
|
| 277 |
+
|
| 278 |
+
# Step 2: Store Metadata
|
| 279 |
+
doc_type = document_processor.get_file_type(filename)
|
| 280 |
+
chroma_service.store_document(
|
| 281 |
+
user_id=user_id,
|
| 282 |
+
doc_id=doc_id,
|
| 283 |
+
filename=filename,
|
| 284 |
+
doc_type=doc_type,
|
| 285 |
+
content=result['text'],
|
| 286 |
+
bucket_id=bucket_id
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
processing_status[doc_id]["progress"] = 70
|
| 290 |
+
processing_status[doc_id]["message"] = "generating embeddings..."
|
| 291 |
+
|
| 292 |
+
# Step 3: Chunking & Embeddings
|
| 293 |
+
chunk_count = rag_service.process_document(
|
| 294 |
+
user_id=user_id,
|
| 295 |
+
doc_id=doc_id,
|
| 296 |
+
content=result['text'],
|
| 297 |
+
bucket_id=bucket_id
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
processing_status[doc_id]["progress"] = 90
|
| 301 |
+
processing_status[doc_id]["message"] = "Generating summary..."
|
| 302 |
+
|
| 303 |
+
# Step 4: Summary Generation
|
| 304 |
+
summary_result = rag_service.generate_summary(result['text'], filename)
|
| 305 |
+
summary = summary_result.get('summary', f'Document: {filename}')
|
| 306 |
+
|
| 307 |
+
# Step 5: Extract and store metadata for aggregate queries (NEW)
|
| 308 |
+
processing_status[doc_id]["progress"] = 95
|
| 309 |
+
processing_status[doc_id]["message"] = "Extracting metadata..."
|
| 310 |
+
|
| 311 |
+
try:
|
| 312 |
+
# Extract structured metadata from document
|
| 313 |
+
metadata = metadata_extractor.extract_metadata(result['text'], filename)
|
| 314 |
+
|
| 315 |
+
# Store metadata for aggregate queries
|
| 316 |
+
chroma_service.store_document_metadata(
|
| 317 |
+
doc_id=doc_id,
|
| 318 |
+
user_id=user_id,
|
| 319 |
+
bucket_id=bucket_id,
|
| 320 |
+
metadata=metadata
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
# Store summary chunk for aggregate queries
|
| 324 |
+
chroma_service.store_summary_chunk(
|
| 325 |
+
doc_id=doc_id,
|
| 326 |
+
user_id=user_id,
|
| 327 |
+
summary=summary,
|
| 328 |
+
bucket_id=bucket_id,
|
| 329 |
+
filename=filename
|
| 330 |
+
)
|
| 331 |
+
print(f"[METADATA] Extracted and stored metadata for {filename}")
|
| 332 |
+
except Exception as e:
|
| 333 |
+
print(f"[METADATA] Warning: Failed to extract metadata for {filename}: {e}")
|
| 334 |
+
# Non-fatal - continue processing
|
| 335 |
+
|
| 336 |
+
# Complete
|
| 337 |
+
processing_status[doc_id] = {
|
| 338 |
+
"status": "completed",
|
| 339 |
+
"progress": 100,
|
| 340 |
+
"message": "Complete",
|
| 341 |
+
"result": {
|
| 342 |
+
"doc_id": doc_id,
|
| 343 |
+
"filename": filename,
|
| 344 |
+
"doc_type": doc_type,
|
| 345 |
+
"bucket_id": bucket_id,
|
| 346 |
+
"chunk_count": chunk_count,
|
| 347 |
+
"summary": summary
|
| 348 |
+
}
|
| 349 |
+
}
|
| 350 |
+
print(f"[BACKGROUND] Completed {filename}")
|
| 351 |
+
|
| 352 |
+
except Exception as e:
|
| 353 |
+
import traceback
|
| 354 |
+
print(f"[BACKGROUND ERROR] {str(e)}")
|
| 355 |
+
print(traceback.format_exc())
|
| 356 |
+
processing_status[doc_id] = {
|
| 357 |
+
"status": "failed",
|
| 358 |
+
"error": str(e)
|
| 359 |
+
}
|
| 360 |
+
if os.path.exists(file_path):
|
| 361 |
+
try:
|
| 362 |
+
os.remove(file_path)
|
| 363 |
+
except:
|
| 364 |
+
pass
|
| 365 |
+
|
| 366 |
+
@app.route('/api/documents/upload', methods=['POST'])
|
| 367 |
+
@require_auth
|
| 368 |
+
def upload_document():
|
| 369 |
+
"""Upload and process a document (Async)"""
|
| 370 |
+
if 'file' not in request.files:
|
| 371 |
+
return jsonify({"error": "No file provided"}), 400
|
| 372 |
+
|
| 373 |
+
file = request.files['file']
|
| 374 |
+
bucket_id = request.form.get('bucket_id', '')
|
| 375 |
+
|
| 376 |
+
if file.filename == '':
|
| 377 |
+
return jsonify({"error": "No file selected"}), 400
|
| 378 |
+
|
| 379 |
+
if not document_processor.is_supported(file.filename):
|
| 380 |
+
return jsonify({"error": "Unsupported file type"}), 400
|
| 381 |
+
|
| 382 |
+
doc_id = str(uuid.uuid4())
|
| 383 |
+
filename = secure_filename(file.filename)
|
| 384 |
+
|
| 385 |
+
user_folder = os.path.join(Config.UPLOAD_FOLDER, request.current_user['user_id'])
|
| 386 |
+
os.makedirs(user_folder, exist_ok=True)
|
| 387 |
+
|
| 388 |
+
file_path = os.path.join(user_folder, f"{doc_id}_{filename}")
|
| 389 |
+
file.save(file_path)
|
| 390 |
+
|
| 391 |
+
# Initialize status
|
| 392 |
+
processing_status[doc_id] = {
|
| 393 |
+
"status": "queued",
|
| 394 |
+
"progress": 0,
|
| 395 |
+
"message": "Queued for processing..."
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
# Start background thread
|
| 399 |
+
import threading
|
| 400 |
+
thread = threading.Thread(
|
| 401 |
+
target=process_document_background,
|
| 402 |
+
args=(doc_id, request.current_user['user_id'], file_path, filename, bucket_id)
|
| 403 |
+
)
|
| 404 |
+
thread.daemon = True
|
| 405 |
+
thread.start()
|
| 406 |
+
|
| 407 |
+
# Return immediately
|
| 408 |
+
return jsonify({
|
| 409 |
+
"status": "queued",
|
| 410 |
+
"doc_id": doc_id,
|
| 411 |
+
"filename": filename,
|
| 412 |
+
"message": "Upload accepted, processing in background"
|
| 413 |
+
}), 202
|
| 414 |
+
|
| 415 |
+
@app.route('/api/documents/<doc_id>/status', methods=['GET'])
|
| 416 |
+
@require_auth
|
| 417 |
+
def get_document_status(doc_id):
|
| 418 |
+
"""Get processing status of a document"""
|
| 419 |
+
status = processing_status.get(doc_id)
|
| 420 |
+
|
| 421 |
+
if not status:
|
| 422 |
+
# Check if it exists in DB (might be completed and cleared from memory)
|
| 423 |
+
# For now, just return not found if not in memory or DB check logic here
|
| 424 |
+
# Simple version:
|
| 425 |
+
return jsonify({"status": "unknown"}), 404
|
| 426 |
+
|
| 427 |
+
return jsonify(status)
|
| 428 |
+
|
| 429 |
+
|
| 430 |
+
@app.route('/api/documents/<doc_id>/summary', methods=['GET'])
|
| 431 |
+
@require_auth
|
| 432 |
+
def get_document_summary(doc_id):
|
| 433 |
+
"""Get or generate summary for a document"""
|
| 434 |
+
doc = chroma_service.get_document(doc_id, request.current_user['user_id'])
|
| 435 |
+
|
| 436 |
+
if not doc:
|
| 437 |
+
return jsonify({"error": "Document not found"}), 404
|
| 438 |
+
|
| 439 |
+
# Get the full document content from the stored preview
|
| 440 |
+
# For a more complete summary, we'd need to re-read the document
|
| 441 |
+
content_preview = doc.get('content_preview', '')
|
| 442 |
+
|
| 443 |
+
# Generate summary
|
| 444 |
+
summary_result = rag_service.generate_summary(content_preview, doc['filename'])
|
| 445 |
+
|
| 446 |
+
return jsonify({
|
| 447 |
+
"doc_id": doc_id,
|
| 448 |
+
"filename": doc['filename'],
|
| 449 |
+
"summary": summary_result.get('summary', f'Document: {doc["filename"]}'),
|
| 450 |
+
"success": summary_result.get('success', False)
|
| 451 |
+
})
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
@app.route('/api/documents', methods=['GET'])
|
| 455 |
+
@require_auth
|
| 456 |
+
def list_documents():
|
| 457 |
+
"""List all documents, optionally filtered by bucket"""
|
| 458 |
+
bucket_id = request.args.get('bucket_id')
|
| 459 |
+
documents = chroma_service.get_user_documents(
|
| 460 |
+
request.current_user['user_id'],
|
| 461 |
+
bucket_id=bucket_id if bucket_id else None
|
| 462 |
+
)
|
| 463 |
+
return jsonify({"documents": documents})
|
| 464 |
+
|
| 465 |
+
|
| 466 |
+
@app.route('/api/documents/<doc_id>', methods=['GET'])
|
| 467 |
+
@require_auth
|
| 468 |
+
def get_document(doc_id):
|
| 469 |
+
"""Get document details"""
|
| 470 |
+
doc = chroma_service.get_document(doc_id, request.current_user['user_id'])
|
| 471 |
+
|
| 472 |
+
if doc:
|
| 473 |
+
return jsonify(doc)
|
| 474 |
+
else:
|
| 475 |
+
return jsonify({"error": "Document not found"}), 404
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
@app.route('/api/documents/<doc_id>/view', methods=['GET'])
|
| 479 |
+
@require_auth
|
| 480 |
+
def view_document(doc_id):
|
| 481 |
+
"""View/download the actual document file"""
|
| 482 |
+
doc = chroma_service.get_document(doc_id, request.current_user['user_id'])
|
| 483 |
+
|
| 484 |
+
if not doc:
|
| 485 |
+
return jsonify({"error": "Document not found"}), 404
|
| 486 |
+
|
| 487 |
+
user_folder = os.path.join(Config.UPLOAD_FOLDER, request.current_user['user_id'])
|
| 488 |
+
|
| 489 |
+
# Find the file
|
| 490 |
+
for f in os.listdir(user_folder):
|
| 491 |
+
if f.startswith(doc_id):
|
| 492 |
+
file_path = os.path.join(user_folder, f)
|
| 493 |
+
return send_file(file_path, as_attachment=False)
|
| 494 |
+
|
| 495 |
+
return jsonify({"error": "File not found on server"}), 404
|
| 496 |
+
|
| 497 |
+
|
| 498 |
+
@app.route('/api/documents/<doc_id>/bucket', methods=['PUT'])
|
| 499 |
+
@require_auth
|
| 500 |
+
def update_document_bucket(doc_id):
|
| 501 |
+
"""Move document to a different bucket"""
|
| 502 |
+
data = request.get_json()
|
| 503 |
+
bucket_id = data.get('bucket_id', '') if data else ''
|
| 504 |
+
|
| 505 |
+
success = chroma_service.update_document_bucket(
|
| 506 |
+
doc_id=doc_id,
|
| 507 |
+
user_id=request.current_user['user_id'],
|
| 508 |
+
bucket_id=bucket_id
|
| 509 |
+
)
|
| 510 |
+
|
| 511 |
+
if success:
|
| 512 |
+
return jsonify({"success": True})
|
| 513 |
+
else:
|
| 514 |
+
return jsonify({"error": "Document not found or access denied"}), 404
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
@app.route('/api/documents/<doc_id>', methods=['DELETE'])
|
| 518 |
+
@require_auth
|
| 519 |
+
def delete_document(doc_id):
|
| 520 |
+
"""Delete a document"""
|
| 521 |
+
success = chroma_service.delete_document(
|
| 522 |
+
doc_id=doc_id,
|
| 523 |
+
user_id=request.current_user['user_id']
|
| 524 |
+
)
|
| 525 |
+
|
| 526 |
+
if success:
|
| 527 |
+
user_folder = os.path.join(Config.UPLOAD_FOLDER, request.current_user['user_id'])
|
| 528 |
+
try:
|
| 529 |
+
for f in os.listdir(user_folder):
|
| 530 |
+
if f.startswith(doc_id):
|
| 531 |
+
os.remove(os.path.join(user_folder, f))
|
| 532 |
+
break
|
| 533 |
+
except:
|
| 534 |
+
pass
|
| 535 |
+
|
| 536 |
+
return jsonify({"success": True})
|
| 537 |
+
else:
|
| 538 |
+
return jsonify({"error": "Document not found or access denied"}), 404
|
| 539 |
+
|
| 540 |
+
|
| 541 |
+
# ==================== Chat/RAG Routes ====================
|
| 542 |
+
|
| 543 |
+
@app.route('/api/chat', methods=['POST'])
|
| 544 |
+
@require_auth
|
| 545 |
+
def chat():
|
| 546 |
+
"""Process a chat query using RAG with optional bucket filtering"""
|
| 547 |
+
data = request.get_json()
|
| 548 |
+
|
| 549 |
+
if not data or not data.get('message'):
|
| 550 |
+
return jsonify({"error": "No message provided"}), 400
|
| 551 |
+
|
| 552 |
+
message = data.get('message', '').strip()
|
| 553 |
+
doc_ids = data.get('doc_ids')
|
| 554 |
+
bucket_id = data.get('bucket_id') # New: filter by bucket
|
| 555 |
+
conversation_history = data.get('history', [])
|
| 556 |
+
|
| 557 |
+
result = rag_service.query(
|
| 558 |
+
user_id=request.current_user['user_id'],
|
| 559 |
+
query=message,
|
| 560 |
+
doc_ids=doc_ids,
|
| 561 |
+
bucket_id=bucket_id,
|
| 562 |
+
conversation_history=conversation_history
|
| 563 |
+
)
|
| 564 |
+
|
| 565 |
+
if result['success']:
|
| 566 |
+
return jsonify({
|
| 567 |
+
"response": result['response'],
|
| 568 |
+
"model": result.get('model', 'unknown'),
|
| 569 |
+
"sources": result.get('sources', []),
|
| 570 |
+
"source_files": result.get('source_files', []),
|
| 571 |
+
"chunks_used": result.get('chunks_used', 0),
|
| 572 |
+
"chunks_filtered": result.get('chunks_filtered', 0)
|
| 573 |
+
})
|
| 574 |
+
else:
|
| 575 |
+
return jsonify({"error": result['error']}), 500
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
@app.route('/api/chat/stream', methods=['POST'])
|
| 579 |
+
@require_auth
|
| 580 |
+
def chat_stream():
|
| 581 |
+
"""Streaming chat endpoint - sends response chunks as they arrive"""
|
| 582 |
+
import json
|
| 583 |
+
import time
|
| 584 |
+
|
| 585 |
+
start_time = time.time()
|
| 586 |
+
print(f"[STREAM] Endpoint called")
|
| 587 |
+
|
| 588 |
+
data = request.get_json()
|
| 589 |
+
|
| 590 |
+
if not data or not data.get('message'):
|
| 591 |
+
return jsonify({"error": "No message provided"}), 400
|
| 592 |
+
|
| 593 |
+
message = data.get('message', '').strip()
|
| 594 |
+
bucket_id = data.get('bucket_id')
|
| 595 |
+
chat_id = data.get('chat_id', '') # Get chat_id from request
|
| 596 |
+
user_id = request.current_user['user_id']
|
| 597 |
+
|
| 598 |
+
print(f"[STREAM] Request parsed in {time.time()-start_time:.2f}s")
|
| 599 |
+
|
| 600 |
+
def generate():
|
| 601 |
+
# Immediately yield to start the stream
|
| 602 |
+
yield f"data: {json.dumps({'type': 'start'})}\n\n"
|
| 603 |
+
|
| 604 |
+
sse_chunk_count = 0
|
| 605 |
+
for chunk in rag_service.query_stream(
|
| 606 |
+
user_id=user_id,
|
| 607 |
+
query=message,
|
| 608 |
+
bucket_id=bucket_id,
|
| 609 |
+
chat_id=chat_id
|
| 610 |
+
):
|
| 611 |
+
sse_chunk_count += 1
|
| 612 |
+
if sse_chunk_count <= 5:
|
| 613 |
+
print(f"[SSE] Sending chunk {sse_chunk_count}: type={chunk.get('type', 'unknown')}")
|
| 614 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
| 615 |
+
|
| 616 |
+
print(f"[SSE] Stream complete, sent {sse_chunk_count} chunks total")
|
| 617 |
+
|
| 618 |
+
return Response(
|
| 619 |
+
generate(),
|
| 620 |
+
mimetype='text/event-stream',
|
| 621 |
+
headers={
|
| 622 |
+
'Cache-Control': 'no-cache',
|
| 623 |
+
'Connection': 'keep-alive',
|
| 624 |
+
'X-Accel-Buffering': 'no'
|
| 625 |
+
}
|
| 626 |
+
)
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
@app.route('/api/chat/clear', methods=['POST'])
|
| 630 |
+
@require_auth
|
| 631 |
+
def clear_chat_memory():
|
| 632 |
+
"""Clear conversation memory for the current user"""
|
| 633 |
+
data = request.get_json() or {}
|
| 634 |
+
bucket_id = data.get('bucket_id')
|
| 635 |
+
|
| 636 |
+
success = rag_service.clear_memory(
|
| 637 |
+
user_id=request.current_user['user_id'],
|
| 638 |
+
bucket_id=bucket_id
|
| 639 |
+
)
|
| 640 |
+
|
| 641 |
+
if success:
|
| 642 |
+
return jsonify({"success": True, "message": "Conversation memory cleared"})
|
| 643 |
+
else:
|
| 644 |
+
return jsonify({"error": "Failed to clear memory"}), 500
|
| 645 |
+
|
| 646 |
+
|
| 647 |
+
@app.route('/api/cleanup/chunks', methods=['POST'])
|
| 648 |
+
@require_auth
|
| 649 |
+
def cleanup_user_chunks():
|
| 650 |
+
"""Clear ALL chunks for the current user - use to fix stale data issues"""
|
| 651 |
+
deleted_count = chroma_service.clear_all_user_chunks(
|
| 652 |
+
user_id=request.current_user['user_id']
|
| 653 |
+
)
|
| 654 |
+
return jsonify({
|
| 655 |
+
"success": True,
|
| 656 |
+
"message": f"Deleted {deleted_count} chunks. Please re-upload your documents."
|
| 657 |
+
})
|
| 658 |
+
|
| 659 |
+
|
| 660 |
+
|
| 661 |
+
# ==================== Chat History Routes ====================
|
| 662 |
+
|
| 663 |
+
@app.route('/api/chats', methods=['GET'])
|
| 664 |
+
@require_auth
|
| 665 |
+
def list_chat_sessions():
|
| 666 |
+
"""Get all chat sessions for current user"""
|
| 667 |
+
sessions = chroma_service.get_user_chat_sessions(request.current_user['user_id'])
|
| 668 |
+
return jsonify({"chats": sessions})
|
| 669 |
+
|
| 670 |
+
|
| 671 |
+
@app.route('/api/chats', methods=['POST'])
|
| 672 |
+
@require_auth
|
| 673 |
+
def save_chat_session():
|
| 674 |
+
"""Save or update a chat session"""
|
| 675 |
+
data = request.get_json()
|
| 676 |
+
|
| 677 |
+
if not data:
|
| 678 |
+
return jsonify({"error": "No data provided"}), 400
|
| 679 |
+
|
| 680 |
+
chat_id = data.get('id')
|
| 681 |
+
topic = data.get('topic', 'Chat')
|
| 682 |
+
messages = data.get('messages', [])
|
| 683 |
+
bucket_id = data.get('bucket', '')
|
| 684 |
+
|
| 685 |
+
if not chat_id:
|
| 686 |
+
return jsonify({"error": "Chat ID is required"}), 400
|
| 687 |
+
|
| 688 |
+
result = chroma_service.save_chat_session(
|
| 689 |
+
user_id=request.current_user['user_id'],
|
| 690 |
+
chat_id=chat_id,
|
| 691 |
+
topic=topic,
|
| 692 |
+
messages=messages,
|
| 693 |
+
bucket_id=bucket_id
|
| 694 |
+
)
|
| 695 |
+
|
| 696 |
+
return jsonify({"success": True, **result})
|
| 697 |
+
|
| 698 |
+
|
| 699 |
+
@app.route('/api/chats/<chat_id>', methods=['DELETE'])
|
| 700 |
+
@require_auth
|
| 701 |
+
def delete_chat_session(chat_id):
|
| 702 |
+
"""Delete a chat session"""
|
| 703 |
+
success = chroma_service.delete_chat_session(
|
| 704 |
+
user_id=request.current_user['user_id'],
|
| 705 |
+
chat_id=chat_id
|
| 706 |
+
)
|
| 707 |
+
|
| 708 |
+
if success:
|
| 709 |
+
return jsonify({"success": True})
|
| 710 |
+
else:
|
| 711 |
+
return jsonify({"error": "Chat not found or access denied"}), 404
|
| 712 |
+
|
| 713 |
+
|
| 714 |
+
# ==================== Health Check ====================
|
| 715 |
+
|
| 716 |
+
@app.route('/api/health', methods=['GET'])
|
| 717 |
+
def health_check():
|
| 718 |
+
return jsonify({"status": "healthy", "version": "1.1.0"})
|
| 719 |
+
|
| 720 |
+
|
| 721 |
+
# ==================== Main ====================
|
| 722 |
+
|
| 723 |
+
if __name__ == '__main__':
|
| 724 |
+
print("=" * 50)
|
| 725 |
+
print("NotebookLM Clone - AI Document Intelligence")
|
| 726 |
+
print("=" * 50)
|
| 727 |
+
print(f"Upload folder: {Config.UPLOAD_FOLDER}")
|
| 728 |
+
print(f"ChromaDB Cloud: {Config.CHROMA_TENANT}/{Config.CHROMA_DATABASE}")
|
| 729 |
+
print("Starting server on http://localhost:5000")
|
| 730 |
+
print("=" * 50)
|
| 731 |
+
|
| 732 |
+
app.run(host='0.0.0.0', port=5000, debug=True)
|
config.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
class Config:
|
| 7 |
+
# DeepSeek API - PRIMARY (OpenAI-compatible)
|
| 8 |
+
DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY', '')
|
| 9 |
+
DEEPSEEK_BASE_URL = 'https://api.deepseek.com/v1'
|
| 10 |
+
DEEPSEEK_MODEL = 'deepseek-chat' # DeepSeek V3 model
|
| 11 |
+
|
| 12 |
+
# OpenRouter API - For OCR and fallback LLM
|
| 13 |
+
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', '')
|
| 14 |
+
OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'
|
| 15 |
+
|
| 16 |
+
# AI Models (OpenRouter fallback)
|
| 17 |
+
MODEL_MAP = {
|
| 18 |
+
'gemma': 'google/gemma-3-4b-it:free',
|
| 19 |
+
'mistral': 'mistralai/mistral-small-3.1-24b-instruct:free',
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
# Use DeepSeek first, then OpenRouter models
|
| 23 |
+
USE_DEEPSEEK = True # Set to False to use OpenRouter instead
|
| 24 |
+
FALLBACK_ORDER = ['gemma', 'mistral']
|
| 25 |
+
|
| 26 |
+
# ChromaDB Cloud Configuration
|
| 27 |
+
CHROMA_API_KEY = os.getenv('CHROMA_API_KEY', '')
|
| 28 |
+
CHROMA_TENANT = os.getenv('CHROMA_TENANT', 'default_tenant')
|
| 29 |
+
CHROMA_DATABASE = os.getenv('CHROMA_DATABASE', 'default_database')
|
| 30 |
+
CHROMA_HOST = 'api.trychroma.com' # ChromaDB Cloud endpoint
|
| 31 |
+
|
| 32 |
+
# JWT Configuration
|
| 33 |
+
JWT_SECRET = os.getenv('JWT_SECRET', 'your-secret-key-change-in-production')
|
| 34 |
+
JWT_EXPIRY_HOURS = 24
|
| 35 |
+
|
| 36 |
+
# Upload Configuration
|
| 37 |
+
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
|
| 38 |
+
MAX_CONTENT_LENGTH = 200 * 1024 * 1024 # 200MB max file size
|
| 39 |
+
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'txt', 'md', 'png', 'jpg', 'jpeg', 'gif', 'webp'}
|
| 40 |
+
|
| 41 |
+
CHUNK_SIZE = 500 # Smaller chunks for higher precision with many documents
|
| 42 |
+
CHUNK_OVERLAP = 150 # Good overlap to avoid losing info at boundaries
|
| 43 |
+
TOP_K_RESULTS = 100 # High - comprehensive retrieval for 3000+ docs across buckets
|
| 44 |
+
AI_TEMPERATURE = 0.0 # Zero temperature for maximum determinism and accuracy
|
| 45 |
+
RELEVANCE_THRESHOLD = 3.0 # Higher threshold - include all potentially relevant
|
| 46 |
+
MAX_CONVERSATION_HISTORY = 20 # Remember more conversation for pronoun context
|
| 47 |
+
AI_MAX_TOKENS = 4096 # Maximum tokens for detailed responses
|
| 48 |
+
AI_TIMEOUT = 90 # More time for complex multi-document queries
|
find_buckets.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Helper script to find buckets for a user"""
|
| 2 |
+
import sys
|
| 3 |
+
sys.path.insert(0, '.')
|
| 4 |
+
|
| 5 |
+
from services.chroma_service import chroma_service
|
| 6 |
+
|
| 7 |
+
# Check both users
|
| 8 |
+
user_ids = ['55c0893720ef38eb', '7ac2ed69d52d2010']
|
| 9 |
+
|
| 10 |
+
for user_id in user_ids:
|
| 11 |
+
print(f"\nUser: {user_id}")
|
| 12 |
+
print("-" * 40)
|
| 13 |
+
|
| 14 |
+
# Get documents
|
| 15 |
+
docs = chroma_service.get_user_documents(user_id)
|
| 16 |
+
print(f"Documents: {len(docs)}")
|
| 17 |
+
|
| 18 |
+
# Get buckets
|
| 19 |
+
buckets = chroma_service.get_user_buckets(user_id)
|
| 20 |
+
if buckets:
|
| 21 |
+
print("Buckets:")
|
| 22 |
+
for b in buckets:
|
| 23 |
+
print(f" - {b['name']} (ID: {b['bucket_id']}, Docs: {b.get('doc_count', 0)})")
|
| 24 |
+
else:
|
| 25 |
+
print("No buckets found")
|
find_users.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Helper script to find user IDs"""
|
| 2 |
+
import sys
|
| 3 |
+
sys.path.insert(0, '.')
|
| 4 |
+
|
| 5 |
+
from services.chroma_service import chroma_service
|
| 6 |
+
|
| 7 |
+
print("Finding users in ChromaDB...")
|
| 8 |
+
users = chroma_service.users_collection.get()
|
| 9 |
+
|
| 10 |
+
if not users['ids']:
|
| 11 |
+
print("No users found!")
|
| 12 |
+
else:
|
| 13 |
+
print(f"Found {len(users['ids'])} users:")
|
| 14 |
+
for i in range(len(users['ids'])):
|
| 15 |
+
user_id = users['ids'][i]
|
| 16 |
+
username = users['metadatas'][i].get('username', 'N/A')
|
| 17 |
+
role = users['metadatas'][i].get('role', 'N/A')
|
| 18 |
+
print(f" - User ID: {user_id}")
|
| 19 |
+
print(f" Username: {username}")
|
| 20 |
+
print(f" Role: {role}")
|
| 21 |
+
print()
|
migrate_metadata.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Migration Script - Extract Metadata for Existing Documents
|
| 3 |
+
Run this script once to populate document_metadata and document_summaries
|
| 4 |
+
collections for all existing documents.
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
python migrate_metadata.py --user-id <user_id> [--bucket-id <bucket_id>]
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import sys
|
| 11 |
+
import time
|
| 12 |
+
import argparse
|
| 13 |
+
from typing import List, Dict
|
| 14 |
+
|
| 15 |
+
# Add parent directory to path for imports
|
| 16 |
+
sys.path.insert(0, '.')
|
| 17 |
+
|
| 18 |
+
from services.chroma_service import chroma_service
|
| 19 |
+
from services.metadata_extractor import metadata_extractor
|
| 20 |
+
from services.rag_service import rag_service
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def reconstruct_document_content(doc_id: str) -> str:
|
| 24 |
+
"""
|
| 25 |
+
Reconstruct document content from stored chunks.
|
| 26 |
+
"""
|
| 27 |
+
chunks = chroma_service.get_document_chunks(doc_id)
|
| 28 |
+
if not chunks:
|
| 29 |
+
return ""
|
| 30 |
+
|
| 31 |
+
# Sort by chunk index and combine
|
| 32 |
+
chunks.sort(key=lambda x: x.get('chunk_index', 0))
|
| 33 |
+
content = ' '.join(chunk.get('text', '') for chunk in chunks)
|
| 34 |
+
|
| 35 |
+
return content
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def migrate_single_document(doc: Dict, user_id: str, skip_existing: bool = True) -> Dict:
|
| 39 |
+
"""
|
| 40 |
+
Migrate a single document: extract metadata and create summary.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
doc: Document dictionary with doc_id, filename, bucket_id
|
| 44 |
+
user_id: User ID
|
| 45 |
+
skip_existing: If True, skip documents that already have metadata and summary
|
| 46 |
+
"""
|
| 47 |
+
doc_id = doc['doc_id']
|
| 48 |
+
filename = doc.get('filename', '')
|
| 49 |
+
bucket_id = doc.get('bucket_id', '')
|
| 50 |
+
|
| 51 |
+
result = {
|
| 52 |
+
'doc_id': doc_id,
|
| 53 |
+
'filename': filename,
|
| 54 |
+
'status': 'pending',
|
| 55 |
+
'metadata_extracted': False,
|
| 56 |
+
'summary_created': False,
|
| 57 |
+
'error': None
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
# Check if already migrated (fast skip)
|
| 62 |
+
if skip_existing:
|
| 63 |
+
existing_metadata = chroma_service.get_document_metadata(doc_id, user_id)
|
| 64 |
+
summary_id = f"{doc_id}_summary"
|
| 65 |
+
existing_summary = chroma_service.summary_chunks_collection.get(ids=[summary_id])
|
| 66 |
+
|
| 67 |
+
if existing_metadata and existing_summary['ids']:
|
| 68 |
+
result['status'] = 'skipped'
|
| 69 |
+
result['metadata_extracted'] = True
|
| 70 |
+
result['summary_created'] = True
|
| 71 |
+
print(f" [SKIP] Already migrated: {filename}")
|
| 72 |
+
return result
|
| 73 |
+
|
| 74 |
+
# Step 1: Reconstruct content from chunks
|
| 75 |
+
print(f" Reconstructing content for {filename}...")
|
| 76 |
+
content = reconstruct_document_content(doc_id)
|
| 77 |
+
|
| 78 |
+
if not content:
|
| 79 |
+
result['status'] = 'skipped'
|
| 80 |
+
result['error'] = 'No content found'
|
| 81 |
+
return result
|
| 82 |
+
|
| 83 |
+
# Step 2: Extract metadata (only if not exists or skip_existing is False)
|
| 84 |
+
needs_metadata = not skip_existing or not chroma_service.get_document_metadata(doc_id, user_id)
|
| 85 |
+
if needs_metadata:
|
| 86 |
+
print(f" Extracting metadata...")
|
| 87 |
+
metadata = metadata_extractor.extract_metadata(content, filename)
|
| 88 |
+
|
| 89 |
+
# Store metadata
|
| 90 |
+
chroma_service.store_document_metadata(
|
| 91 |
+
doc_id=doc_id,
|
| 92 |
+
user_id=user_id,
|
| 93 |
+
bucket_id=bucket_id,
|
| 94 |
+
metadata=metadata
|
| 95 |
+
)
|
| 96 |
+
result['metadata_extracted'] = True
|
| 97 |
+
else:
|
| 98 |
+
result['metadata_extracted'] = True # Already exists
|
| 99 |
+
|
| 100 |
+
# Step 3: Generate and store summary (only if not exists or skip_existing is False)
|
| 101 |
+
summary_id = f"{doc_id}_summary"
|
| 102 |
+
existing_summary = chroma_service.summary_chunks_collection.get(ids=[summary_id])
|
| 103 |
+
needs_summary = not skip_existing or not existing_summary['ids']
|
| 104 |
+
|
| 105 |
+
if needs_summary:
|
| 106 |
+
print(f" Generating summary...")
|
| 107 |
+
try:
|
| 108 |
+
summary_result = rag_service.generate_summary(content, filename)
|
| 109 |
+
# Extract the summary string from the result dict
|
| 110 |
+
if isinstance(summary_result, dict):
|
| 111 |
+
summary = summary_result.get('summary', f'Document: {filename}')
|
| 112 |
+
else:
|
| 113 |
+
summary = str(summary_result) if summary_result else f'Document: {filename}'
|
| 114 |
+
|
| 115 |
+
if summary:
|
| 116 |
+
chroma_service.store_summary_chunk(
|
| 117 |
+
doc_id=doc_id,
|
| 118 |
+
user_id=user_id,
|
| 119 |
+
summary=summary,
|
| 120 |
+
bucket_id=bucket_id,
|
| 121 |
+
filename=filename
|
| 122 |
+
)
|
| 123 |
+
result['summary_created'] = True
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f" Warning: Summary generation failed: {e}")
|
| 126 |
+
else:
|
| 127 |
+
result['summary_created'] = True # Already exists
|
| 128 |
+
|
| 129 |
+
result['status'] = 'success'
|
| 130 |
+
print(f" [OK] Completed: {filename}")
|
| 131 |
+
|
| 132 |
+
except Exception as e:
|
| 133 |
+
result['status'] = 'error'
|
| 134 |
+
# Sanitize error message for console encoding
|
| 135 |
+
error_msg = str(e).encode('ascii', 'replace').decode('ascii')
|
| 136 |
+
result['error'] = error_msg
|
| 137 |
+
print(f" [ERROR] {filename} - {error_msg}")
|
| 138 |
+
|
| 139 |
+
return result
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def migrate_all_documents(user_id: str, bucket_id: str = None,
|
| 143 |
+
batch_size: int = 10, delay: float = 0.5, skip_existing: bool = True):
|
| 144 |
+
"""
|
| 145 |
+
Migrate all documents for a user/bucket.
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
user_id: User ID to migrate documents for
|
| 149 |
+
bucket_id: Optional bucket ID to filter by
|
| 150 |
+
batch_size: Number of documents to process before pausing
|
| 151 |
+
delay: Seconds to wait between documents (rate limiting)
|
| 152 |
+
skip_existing: If True, skip documents that already have metadata and summary
|
| 153 |
+
"""
|
| 154 |
+
print("=" * 60)
|
| 155 |
+
print("Document Metadata Migration")
|
| 156 |
+
print("=" * 60)
|
| 157 |
+
print(f"User ID: {user_id}")
|
| 158 |
+
print(f"Bucket ID: {bucket_id or 'All buckets'}")
|
| 159 |
+
print(f"Skip existing: {skip_existing}")
|
| 160 |
+
print()
|
| 161 |
+
|
| 162 |
+
# Get all documents
|
| 163 |
+
print("Fetching documents...")
|
| 164 |
+
documents = chroma_service.get_user_documents(user_id, bucket_id)
|
| 165 |
+
total_docs = len(documents)
|
| 166 |
+
|
| 167 |
+
print(f"Found {total_docs} documents to process")
|
| 168 |
+
print()
|
| 169 |
+
|
| 170 |
+
if total_docs == 0:
|
| 171 |
+
print("No documents found. Exiting.")
|
| 172 |
+
return
|
| 173 |
+
|
| 174 |
+
# Track results
|
| 175 |
+
results = {
|
| 176 |
+
'total': total_docs,
|
| 177 |
+
'success': 0,
|
| 178 |
+
'skipped': 0,
|
| 179 |
+
'already_migrated': 0,
|
| 180 |
+
'error': 0,
|
| 181 |
+
'metadata_extracted': 0,
|
| 182 |
+
'summaries_created': 0
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
start_time = time.time()
|
| 186 |
+
|
| 187 |
+
# Process documents
|
| 188 |
+
for i, doc in enumerate(documents, 1):
|
| 189 |
+
print(f"\n[{i}/{total_docs}] Processing: {doc.get('filename', 'Unknown')}")
|
| 190 |
+
|
| 191 |
+
result = migrate_single_document(doc, user_id, skip_existing=skip_existing)
|
| 192 |
+
|
| 193 |
+
# Update results
|
| 194 |
+
if result['status'] == 'success':
|
| 195 |
+
results['success'] += 1
|
| 196 |
+
elif result['status'] == 'skipped':
|
| 197 |
+
if result.get('metadata_extracted') and result.get('summary_created'):
|
| 198 |
+
results['already_migrated'] += 1
|
| 199 |
+
else:
|
| 200 |
+
results['skipped'] += 1
|
| 201 |
+
else:
|
| 202 |
+
results['error'] += 1
|
| 203 |
+
|
| 204 |
+
if result['metadata_extracted']:
|
| 205 |
+
results['metadata_extracted'] += 1
|
| 206 |
+
if result['summary_created']:
|
| 207 |
+
results['summaries_created'] += 1
|
| 208 |
+
|
| 209 |
+
# Rate limiting
|
| 210 |
+
if delay > 0:
|
| 211 |
+
time.sleep(delay)
|
| 212 |
+
|
| 213 |
+
# Progress update every batch_size documents
|
| 214 |
+
if i % batch_size == 0:
|
| 215 |
+
elapsed = time.time() - start_time
|
| 216 |
+
rate = i / elapsed if elapsed > 0 else 0
|
| 217 |
+
remaining = (total_docs - i) / rate if rate > 0 else 0
|
| 218 |
+
print(f"\n--- Progress: {i}/{total_docs} ({i/total_docs*100:.1f}%) ---")
|
| 219 |
+
print(f" Elapsed: {elapsed:.1f}s | ETA: {remaining:.1f}s")
|
| 220 |
+
print(f" Success: {results['success']} | Already migrated: {results['already_migrated']} | Errors: {results['error']}")
|
| 221 |
+
|
| 222 |
+
# Final summary
|
| 223 |
+
elapsed = time.time() - start_time
|
| 224 |
+
print("\n" + "=" * 60)
|
| 225 |
+
print("Migration Complete!")
|
| 226 |
+
print("=" * 60)
|
| 227 |
+
print(f"Total documents: {results['total']}")
|
| 228 |
+
print(f" [OK] Success: {results['success']}")
|
| 229 |
+
print(f" [SKIP] Already migrated: {results['already_migrated']}")
|
| 230 |
+
print(f" [SKIP] Skipped (no content): {results['skipped']}")
|
| 231 |
+
print(f" [ERR] Errors: {results['error']}")
|
| 232 |
+
print()
|
| 233 |
+
print(f"Metadata extracted: {results['metadata_extracted']}")
|
| 234 |
+
print(f"Summaries created: {results['summaries_created']}")
|
| 235 |
+
print()
|
| 236 |
+
print(f"Total time: {elapsed:.1f} seconds")
|
| 237 |
+
if total_docs > 0:
|
| 238 |
+
print(f"Average: {elapsed/total_docs:.2f} seconds per document")
|
| 239 |
+
|
| 240 |
+
return results
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def main():
|
| 244 |
+
parser = argparse.ArgumentParser(description='Migrate existing documents to extract metadata')
|
| 245 |
+
parser.add_argument('--user-id', required=True, help='User ID to migrate documents for')
|
| 246 |
+
parser.add_argument('--bucket-id', help='Optional bucket ID to filter by')
|
| 247 |
+
parser.add_argument('--batch-size', type=int, default=10, help='Batch size for progress updates')
|
| 248 |
+
parser.add_argument('--delay', type=float, default=0.5, help='Delay between documents (seconds)')
|
| 249 |
+
parser.add_argument('--dry-run', action='store_true', help='Show what would be done without making changes')
|
| 250 |
+
|
| 251 |
+
args = parser.parse_args()
|
| 252 |
+
|
| 253 |
+
if args.dry_run:
|
| 254 |
+
print("DRY RUN MODE - No changes will be made")
|
| 255 |
+
documents = chroma_service.get_user_documents(args.user_id, args.bucket_id)
|
| 256 |
+
print(f"Would process {len(documents)} documents:")
|
| 257 |
+
for doc in documents[:10]:
|
| 258 |
+
print(f" - {doc.get('filename', 'Unknown')}")
|
| 259 |
+
if len(documents) > 10:
|
| 260 |
+
print(f" ... and {len(documents) - 10} more")
|
| 261 |
+
return
|
| 262 |
+
|
| 263 |
+
migrate_all_documents(
|
| 264 |
+
user_id=args.user_id,
|
| 265 |
+
bucket_id=args.bucket_id,
|
| 266 |
+
batch_size=args.batch_size,
|
| 267 |
+
delay=args.delay,
|
| 268 |
+
skip_existing=True
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
if __name__ == '__main__':
|
| 273 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask
|
| 2 |
+
flask-cors
|
| 3 |
+
chromadb
|
| 4 |
+
python-dotenv
|
| 5 |
+
python-docx
|
| 6 |
+
openpyxl
|
| 7 |
+
pandas
|
| 8 |
+
Pillow
|
| 9 |
+
requests
|
| 10 |
+
bcrypt
|
| 11 |
+
PyJWT
|
| 12 |
+
werkzeug
|
| 13 |
+
python-pptx
|
| 14 |
+
pymupdf
|
| 15 |
+
gunicorn
|
services/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Services package
|
services/auth_service.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Authentication Service with Role-Based Access
|
| 3 |
+
Handles user registration, login, and JWT token management
|
| 4 |
+
Supports Admin and Employee roles
|
| 5 |
+
Uses ChromaDB for user storage
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import bcrypt
|
| 9 |
+
import jwt
|
| 10 |
+
import time
|
| 11 |
+
from datetime import datetime, timedelta
|
| 12 |
+
from config import Config
|
| 13 |
+
from services.chroma_service import chroma_service
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class AuthService:
|
| 17 |
+
def __init__(self):
|
| 18 |
+
self.jwt_secret = Config.JWT_SECRET
|
| 19 |
+
self.jwt_expiry_hours = Config.JWT_EXPIRY_HOURS
|
| 20 |
+
|
| 21 |
+
def _hash_password(self, password: str) -> str:
|
| 22 |
+
"""Hash password using bcrypt"""
|
| 23 |
+
salt = bcrypt.gensalt()
|
| 24 |
+
return bcrypt.hashpw(password.encode('utf-8'), salt).decode('utf-8')
|
| 25 |
+
|
| 26 |
+
def _verify_password(self, password: str, hashed: str) -> bool:
|
| 27 |
+
"""Verify password against hash"""
|
| 28 |
+
return bcrypt.checkpw(
|
| 29 |
+
password.encode('utf-8'),
|
| 30 |
+
hashed.encode('utf-8')
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
def _generate_token(self, user_id: str, username: str, role: str) -> str:
|
| 34 |
+
"""Generate JWT token with role"""
|
| 35 |
+
payload = {
|
| 36 |
+
"user_id": user_id,
|
| 37 |
+
"username": username,
|
| 38 |
+
"role": role,
|
| 39 |
+
"exp": datetime.utcnow() + timedelta(hours=self.jwt_expiry_hours),
|
| 40 |
+
"iat": datetime.utcnow()
|
| 41 |
+
}
|
| 42 |
+
return jwt.encode(payload, self.jwt_secret, algorithm="HS256")
|
| 43 |
+
|
| 44 |
+
def verify_token(self, token: str) -> dict | None:
|
| 45 |
+
"""Verify and decode JWT token"""
|
| 46 |
+
try:
|
| 47 |
+
payload = jwt.decode(token, self.jwt_secret, algorithms=["HS256"])
|
| 48 |
+
return {
|
| 49 |
+
"user_id": payload['user_id'],
|
| 50 |
+
"username": payload['username'],
|
| 51 |
+
"role": payload.get('role', 'employee')
|
| 52 |
+
}
|
| 53 |
+
except jwt.ExpiredSignatureError:
|
| 54 |
+
return None
|
| 55 |
+
except jwt.InvalidTokenError:
|
| 56 |
+
return None
|
| 57 |
+
|
| 58 |
+
def register_admin(self, username: str, password: str, email: str = "") -> dict:
|
| 59 |
+
"""
|
| 60 |
+
Register a new admin user
|
| 61 |
+
Returns: {"success": bool, "token": str, "user_id": str, "error": str}
|
| 62 |
+
"""
|
| 63 |
+
# Validate input
|
| 64 |
+
if not username or len(username) < 3:
|
| 65 |
+
return {"success": False, "error": "Username must be at least 3 characters"}
|
| 66 |
+
|
| 67 |
+
if not password or len(password) < 6:
|
| 68 |
+
return {"success": False, "error": "Password must be at least 6 characters"}
|
| 69 |
+
|
| 70 |
+
# Check if user exists
|
| 71 |
+
existing = chroma_service.get_user(username)
|
| 72 |
+
if existing:
|
| 73 |
+
return {"success": False, "error": "Username already exists"}
|
| 74 |
+
|
| 75 |
+
# Hash password and create admin user
|
| 76 |
+
password_hash = self._hash_password(password)
|
| 77 |
+
result = chroma_service.create_user(username, password_hash, email, role="admin")
|
| 78 |
+
|
| 79 |
+
if "error" in result:
|
| 80 |
+
return {"success": False, "error": result['error']}
|
| 81 |
+
|
| 82 |
+
# Generate token
|
| 83 |
+
token = self._generate_token(result['user_id'], username, "admin")
|
| 84 |
+
|
| 85 |
+
return {
|
| 86 |
+
"success": True,
|
| 87 |
+
"token": token,
|
| 88 |
+
"user_id": result['user_id'],
|
| 89 |
+
"username": username,
|
| 90 |
+
"role": "admin"
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
def register_employee(self, admin_user_id: str, email: str, password: str) -> dict:
|
| 94 |
+
"""
|
| 95 |
+
Admin registers an employee
|
| 96 |
+
Returns: {"success": bool, "user_id": str, "error": str}
|
| 97 |
+
"""
|
| 98 |
+
# Validate input
|
| 99 |
+
if not email or "@" not in email:
|
| 100 |
+
return {"success": False, "error": "Valid email is required"}
|
| 101 |
+
|
| 102 |
+
if not password or len(password) < 6:
|
| 103 |
+
return {"success": False, "error": "Password must be at least 6 characters"}
|
| 104 |
+
|
| 105 |
+
# Check if employee email already exists
|
| 106 |
+
existing = chroma_service.get_user(email)
|
| 107 |
+
if existing:
|
| 108 |
+
return {"success": False, "error": "Employee with this email already exists"}
|
| 109 |
+
|
| 110 |
+
# Hash password and create employee user
|
| 111 |
+
password_hash = self._hash_password(password)
|
| 112 |
+
result = chroma_service.create_user(
|
| 113 |
+
username=email,
|
| 114 |
+
password_hash=password_hash,
|
| 115 |
+
email=email,
|
| 116 |
+
role="employee",
|
| 117 |
+
admin_id=admin_user_id
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
if "error" in result:
|
| 121 |
+
return {"success": False, "error": result['error']}
|
| 122 |
+
|
| 123 |
+
return {
|
| 124 |
+
"success": True,
|
| 125 |
+
"user_id": result['user_id'],
|
| 126 |
+
"email": email
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
def login(self, username: str, password: str, role: str = "admin") -> dict:
|
| 130 |
+
"""
|
| 131 |
+
Login user with role check
|
| 132 |
+
Returns: {"success": bool, "token": str, "user_id": str, "error": str}
|
| 133 |
+
"""
|
| 134 |
+
# Get user
|
| 135 |
+
user = chroma_service.get_user(username)
|
| 136 |
+
|
| 137 |
+
if not user:
|
| 138 |
+
return {"success": False, "error": "Invalid credentials"}
|
| 139 |
+
|
| 140 |
+
# Verify password
|
| 141 |
+
if not self._verify_password(password, user['password_hash']):
|
| 142 |
+
return {"success": False, "error": "Invalid credentials"}
|
| 143 |
+
|
| 144 |
+
# Verify role matches
|
| 145 |
+
user_role = user.get('role', 'admin')
|
| 146 |
+
if user_role != role:
|
| 147 |
+
if role == "admin":
|
| 148 |
+
return {"success": False, "error": "This account is not an admin account"}
|
| 149 |
+
else:
|
| 150 |
+
return {"success": False, "error": "This account is not an employee account"}
|
| 151 |
+
|
| 152 |
+
# Generate token
|
| 153 |
+
token = self._generate_token(user['user_id'], username, user_role)
|
| 154 |
+
|
| 155 |
+
return {
|
| 156 |
+
"success": True,
|
| 157 |
+
"token": token,
|
| 158 |
+
"user_id": user['user_id'],
|
| 159 |
+
"username": username,
|
| 160 |
+
"role": user_role
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
def get_admin_employees(self, admin_user_id: str) -> list:
|
| 164 |
+
"""Get all employees created by an admin"""
|
| 165 |
+
return chroma_service.get_employees_by_admin(admin_user_id)
|
| 166 |
+
|
| 167 |
+
def delete_employee(self, admin_user_id: str, employee_id: str) -> bool:
|
| 168 |
+
"""Admin deletes an employee"""
|
| 169 |
+
return chroma_service.delete_employee(admin_user_id, employee_id)
|
| 170 |
+
|
| 171 |
+
def get_current_user(self, token: str) -> dict | None:
|
| 172 |
+
"""Get current user from token"""
|
| 173 |
+
return self.verify_token(token)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
# Singleton instance
|
| 177 |
+
auth_service = AuthService()
|
services/chroma_service.py
ADDED
|
@@ -0,0 +1,1009 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ChromaDB Cloud Service - Vector Storage & Retrieval
|
| 3 |
+
With Role-Based User Management and Bucket Organization
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import chromadb
|
| 7 |
+
from chromadb.utils.embedding_functions import DefaultEmbeddingFunction
|
| 8 |
+
import hashlib
|
| 9 |
+
import time
|
| 10 |
+
from config import Config
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class ChromaService:
|
| 14 |
+
_instance = None
|
| 15 |
+
|
| 16 |
+
def __new__(cls):
|
| 17 |
+
if cls._instance is None:
|
| 18 |
+
cls._instance = super().__new__(cls)
|
| 19 |
+
cls._instance._initialize()
|
| 20 |
+
return cls._instance
|
| 21 |
+
|
| 22 |
+
def _initialize(self):
|
| 23 |
+
"""Initialize ChromaDB Cloud client"""
|
| 24 |
+
if not Config.CHROMA_API_KEY:
|
| 25 |
+
raise ValueError(
|
| 26 |
+
"CHROMA_API_KEY is required! Please set it in your .env file. "
|
| 27 |
+
"Get your API key from https://www.trychroma.com/"
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
print(f"Connecting to ChromaDB Cloud...")
|
| 31 |
+
print(f"Tenant: {Config.CHROMA_TENANT}")
|
| 32 |
+
print(f"Database: {Config.CHROMA_DATABASE}")
|
| 33 |
+
|
| 34 |
+
# Connect to ChromaDB Cloud using CloudClient
|
| 35 |
+
self.client = chromadb.CloudClient(
|
| 36 |
+
tenant=Config.CHROMA_TENANT,
|
| 37 |
+
database=Config.CHROMA_DATABASE,
|
| 38 |
+
api_key=Config.CHROMA_API_KEY
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
print("Connected to ChromaDB Cloud successfully!")
|
| 42 |
+
|
| 43 |
+
# Initialize collections
|
| 44 |
+
self._init_collections()
|
| 45 |
+
|
| 46 |
+
def _init_collections(self):
|
| 47 |
+
"""Initialize required collections"""
|
| 48 |
+
# Users collection
|
| 49 |
+
self.users_collection = self.client.get_or_create_collection(
|
| 50 |
+
name="users",
|
| 51 |
+
metadata={"description": "User authentication data with roles"}
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Buckets collection for organizing documents
|
| 55 |
+
self.buckets_collection = self.client.get_or_create_collection(
|
| 56 |
+
name="buckets",
|
| 57 |
+
metadata={"description": "Document buckets for organization"}
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Documents collection
|
| 61 |
+
self.documents_collection = self.client.get_or_create_collection(
|
| 62 |
+
name="documents",
|
| 63 |
+
metadata={"description": "Document metadata and embeddings"}
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Chunks collection for RAG
|
| 67 |
+
self.chunks_collection = self.client.get_or_create_collection(
|
| 68 |
+
name="document_chunks",
|
| 69 |
+
metadata={"description": "Document chunks for RAG retrieval"}
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Conversation history collection for memory
|
| 73 |
+
self.conversations_collection = self.client.get_or_create_collection(
|
| 74 |
+
name="conversation_history",
|
| 75 |
+
metadata={"description": "Persistent conversation memory for context"}
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Chat sessions collection for complete chat history
|
| 79 |
+
self.chat_sessions_collection = self.client.get_or_create_collection(
|
| 80 |
+
name="chat_sessions",
|
| 81 |
+
metadata={"description": "Complete chat session storage"}
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# Document metadata collection for structured data (aggregate queries)
|
| 85 |
+
self.metadata_collection = self.client.get_or_create_collection(
|
| 86 |
+
name="document_metadata",
|
| 87 |
+
metadata={"description": "Structured document metadata for aggregate queries"}
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Summary chunks collection for fast aggregate retrieval
|
| 91 |
+
self.summary_chunks_collection = self.client.get_or_create_collection(
|
| 92 |
+
name="document_summaries",
|
| 93 |
+
metadata={"description": "Document summaries for aggregate queries"}
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
print("Collections initialized: users, buckets, documents, document_chunks, conversation_history, chat_sessions, document_metadata, document_summaries")
|
| 97 |
+
|
| 98 |
+
# ==================== User Operations ====================
|
| 99 |
+
|
| 100 |
+
def create_user(self, username: str, password_hash: str, email: str = "",
|
| 101 |
+
role: str = "admin", admin_id: str = None) -> dict:
|
| 102 |
+
"""Create a new user (admin or employee)"""
|
| 103 |
+
user_id = hashlib.sha256(username.encode()).hexdigest()[:16]
|
| 104 |
+
|
| 105 |
+
existing = self.users_collection.get(ids=[user_id])
|
| 106 |
+
if existing['ids']:
|
| 107 |
+
return {"error": "User already exists"}
|
| 108 |
+
|
| 109 |
+
metadata = {
|
| 110 |
+
"username": username,
|
| 111 |
+
"password_hash": password_hash,
|
| 112 |
+
"email": email,
|
| 113 |
+
"role": role,
|
| 114 |
+
"created_at": time.time()
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
if admin_id:
|
| 118 |
+
metadata["admin_id"] = admin_id
|
| 119 |
+
|
| 120 |
+
self.users_collection.add(
|
| 121 |
+
ids=[user_id],
|
| 122 |
+
documents=[username],
|
| 123 |
+
metadatas=[metadata]
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
return {"user_id": user_id, "username": username, "role": role}
|
| 127 |
+
|
| 128 |
+
def get_user(self, username: str) -> dict | None:
|
| 129 |
+
"""Get user by username"""
|
| 130 |
+
user_id = hashlib.sha256(username.encode()).hexdigest()[:16]
|
| 131 |
+
result = self.users_collection.get(ids=[user_id])
|
| 132 |
+
|
| 133 |
+
if result['ids']:
|
| 134 |
+
return {
|
| 135 |
+
"user_id": result['ids'][0],
|
| 136 |
+
"username": result['metadatas'][0]['username'],
|
| 137 |
+
"password_hash": result['metadatas'][0]['password_hash'],
|
| 138 |
+
"email": result['metadatas'][0].get('email', ''),
|
| 139 |
+
"role": result['metadatas'][0].get('role', 'admin'),
|
| 140 |
+
"admin_id": result['metadatas'][0].get('admin_id')
|
| 141 |
+
}
|
| 142 |
+
return None
|
| 143 |
+
|
| 144 |
+
def get_employees_by_admin(self, admin_id: str) -> list:
|
| 145 |
+
"""Get all employees created by a specific admin"""
|
| 146 |
+
results = self.users_collection.get(where={"admin_id": admin_id})
|
| 147 |
+
|
| 148 |
+
employees = []
|
| 149 |
+
for i, user_id in enumerate(results['ids']):
|
| 150 |
+
employees.append({
|
| 151 |
+
"user_id": user_id,
|
| 152 |
+
"email": results['metadatas'][i].get('email', ''),
|
| 153 |
+
"username": results['metadatas'][i].get('username', ''),
|
| 154 |
+
"created_at": results['metadatas'][i].get('created_at', 0)
|
| 155 |
+
})
|
| 156 |
+
return employees
|
| 157 |
+
|
| 158 |
+
def delete_employee(self, admin_id: str, employee_id: str) -> bool:
|
| 159 |
+
"""Delete an employee (verify admin ownership)"""
|
| 160 |
+
result = self.users_collection.get(ids=[employee_id])
|
| 161 |
+
|
| 162 |
+
if not result['ids']:
|
| 163 |
+
return False
|
| 164 |
+
|
| 165 |
+
if result['metadatas'][0].get('admin_id') != admin_id:
|
| 166 |
+
return False
|
| 167 |
+
|
| 168 |
+
self.users_collection.delete(ids=[employee_id])
|
| 169 |
+
return True
|
| 170 |
+
|
| 171 |
+
# ==================== Bucket Operations ====================
|
| 172 |
+
|
| 173 |
+
def create_bucket(self, user_id: str, name: str, description: str = "") -> dict:
|
| 174 |
+
"""Create a new bucket for organizing documents"""
|
| 175 |
+
bucket_id = hashlib.sha256(f"{user_id}_{name}_{time.time()}".encode()).hexdigest()[:16]
|
| 176 |
+
|
| 177 |
+
self.buckets_collection.add(
|
| 178 |
+
ids=[bucket_id],
|
| 179 |
+
documents=[name],
|
| 180 |
+
metadatas=[{
|
| 181 |
+
"user_id": user_id,
|
| 182 |
+
"name": name,
|
| 183 |
+
"description": description,
|
| 184 |
+
"created_at": time.time()
|
| 185 |
+
}]
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
return {"bucket_id": bucket_id, "name": name}
|
| 189 |
+
|
| 190 |
+
def get_user_buckets(self, user_id: str) -> list:
|
| 191 |
+
"""Get all buckets for a user"""
|
| 192 |
+
results = self.buckets_collection.get(where={"user_id": user_id})
|
| 193 |
+
|
| 194 |
+
buckets = []
|
| 195 |
+
for i, bucket_id in enumerate(results['ids']):
|
| 196 |
+
# Count documents in this bucket
|
| 197 |
+
doc_count = len(self.documents_collection.get(
|
| 198 |
+
where={"bucket_id": bucket_id}
|
| 199 |
+
)['ids'])
|
| 200 |
+
|
| 201 |
+
buckets.append({
|
| 202 |
+
"bucket_id": bucket_id,
|
| 203 |
+
"name": results['metadatas'][i]['name'],
|
| 204 |
+
"description": results['metadatas'][i].get('description', ''),
|
| 205 |
+
"doc_count": doc_count,
|
| 206 |
+
"created_at": results['metadatas'][i]['created_at']
|
| 207 |
+
})
|
| 208 |
+
return buckets
|
| 209 |
+
|
| 210 |
+
def delete_bucket(self, bucket_id: str, user_id: str) -> bool:
|
| 211 |
+
"""Delete a bucket and optionally its documents"""
|
| 212 |
+
bucket = self.buckets_collection.get(ids=[bucket_id])
|
| 213 |
+
if not bucket['ids'] or bucket['metadatas'][0]['user_id'] != user_id:
|
| 214 |
+
return False
|
| 215 |
+
|
| 216 |
+
# Delete bucket
|
| 217 |
+
self.buckets_collection.delete(ids=[bucket_id])
|
| 218 |
+
|
| 219 |
+
# Update documents to remove bucket_id
|
| 220 |
+
docs = self.documents_collection.get(where={"bucket_id": bucket_id})
|
| 221 |
+
for i, doc_id in enumerate(docs['ids']):
|
| 222 |
+
# Update metadata to remove bucket_id (set to empty)
|
| 223 |
+
meta = docs['metadatas'][i]
|
| 224 |
+
meta['bucket_id'] = ""
|
| 225 |
+
self.documents_collection.update(
|
| 226 |
+
ids=[doc_id],
|
| 227 |
+
metadatas=[meta]
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
return True
|
| 231 |
+
|
| 232 |
+
# ==================== Document Operations ====================
|
| 233 |
+
|
| 234 |
+
def store_document(self, user_id: str, doc_id: str, filename: str,
|
| 235 |
+
doc_type: str, content: str, bucket_id: str = "") -> dict:
|
| 236 |
+
"""Store document metadata"""
|
| 237 |
+
self.documents_collection.add(
|
| 238 |
+
ids=[doc_id],
|
| 239 |
+
documents=[content[:1000]], # Store preview
|
| 240 |
+
metadatas=[{
|
| 241 |
+
"user_id": user_id,
|
| 242 |
+
"filename": filename,
|
| 243 |
+
"doc_type": doc_type,
|
| 244 |
+
"bucket_id": bucket_id,
|
| 245 |
+
"content_length": len(content),
|
| 246 |
+
"created_at": time.time()
|
| 247 |
+
}]
|
| 248 |
+
)
|
| 249 |
+
return {"doc_id": doc_id, "filename": filename}
|
| 250 |
+
|
| 251 |
+
def update_document_bucket(self, doc_id: str, user_id: str, bucket_id: str) -> bool:
|
| 252 |
+
"""Move document to a different bucket"""
|
| 253 |
+
doc = self.documents_collection.get(ids=[doc_id])
|
| 254 |
+
if not doc['ids'] or doc['metadatas'][0]['user_id'] != user_id:
|
| 255 |
+
return False
|
| 256 |
+
|
| 257 |
+
meta = doc['metadatas'][0]
|
| 258 |
+
meta['bucket_id'] = bucket_id
|
| 259 |
+
|
| 260 |
+
self.documents_collection.update(
|
| 261 |
+
ids=[doc_id],
|
| 262 |
+
metadatas=[meta]
|
| 263 |
+
)
|
| 264 |
+
return True
|
| 265 |
+
|
| 266 |
+
def get_user_documents(self, user_id: str, bucket_id: str = None) -> list:
|
| 267 |
+
"""Get all documents for a user, optionally filtered by bucket"""
|
| 268 |
+
if bucket_id:
|
| 269 |
+
results = self.documents_collection.get(
|
| 270 |
+
where={"$and": [{"user_id": user_id}, {"bucket_id": bucket_id}]}
|
| 271 |
+
)
|
| 272 |
+
else:
|
| 273 |
+
results = self.documents_collection.get(where={"user_id": user_id})
|
| 274 |
+
|
| 275 |
+
documents = []
|
| 276 |
+
for i, doc_id in enumerate(results['ids']):
|
| 277 |
+
documents.append({
|
| 278 |
+
"doc_id": doc_id,
|
| 279 |
+
"filename": results['metadatas'][i]['filename'],
|
| 280 |
+
"doc_type": results['metadatas'][i]['doc_type'],
|
| 281 |
+
"bucket_id": results['metadatas'][i].get('bucket_id', ''),
|
| 282 |
+
"created_at": results['metadatas'][i]['created_at']
|
| 283 |
+
})
|
| 284 |
+
return documents
|
| 285 |
+
|
| 286 |
+
def get_document(self, doc_id: str, user_id: str) -> dict | None:
|
| 287 |
+
"""Get a single document by ID"""
|
| 288 |
+
doc = self.documents_collection.get(ids=[doc_id])
|
| 289 |
+
if not doc['ids'] or doc['metadatas'][0]['user_id'] != user_id:
|
| 290 |
+
return None
|
| 291 |
+
|
| 292 |
+
return {
|
| 293 |
+
"doc_id": doc_id,
|
| 294 |
+
"filename": doc['metadatas'][0]['filename'],
|
| 295 |
+
"doc_type": doc['metadatas'][0]['doc_type'],
|
| 296 |
+
"bucket_id": doc['metadatas'][0].get('bucket_id', ''),
|
| 297 |
+
"content_preview": doc['documents'][0],
|
| 298 |
+
"created_at": doc['metadatas'][0]['created_at']
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
def delete_document(self, doc_id: str, user_id: str) -> bool:
|
| 302 |
+
"""Delete a document and ALL its chunks from the database"""
|
| 303 |
+
doc = self.documents_collection.get(ids=[doc_id])
|
| 304 |
+
if not doc['ids'] or doc['metadatas'][0]['user_id'] != user_id:
|
| 305 |
+
print(f"Document {doc_id} not found or access denied for user {user_id}")
|
| 306 |
+
return False
|
| 307 |
+
|
| 308 |
+
filename = doc['metadatas'][0].get('filename', 'unknown')
|
| 309 |
+
print(f"Deleting document: {filename} (ID: {doc_id})")
|
| 310 |
+
|
| 311 |
+
# First, delete all chunks for this document
|
| 312 |
+
try:
|
| 313 |
+
chunks = self.chunks_collection.get(where={"doc_id": doc_id})
|
| 314 |
+
chunk_count = len(chunks['ids']) if chunks['ids'] else 0
|
| 315 |
+
|
| 316 |
+
if chunk_count > 0:
|
| 317 |
+
print(f" Deleting {chunk_count} chunks for document {doc_id}...")
|
| 318 |
+
self.chunks_collection.delete(ids=chunks['ids'])
|
| 319 |
+
print(f" Successfully deleted {chunk_count} chunks")
|
| 320 |
+
else:
|
| 321 |
+
print(f" No chunks found for document {doc_id}")
|
| 322 |
+
except Exception as e:
|
| 323 |
+
print(f" Error deleting chunks: {e}")
|
| 324 |
+
# Continue to delete document even if chunk deletion fails
|
| 325 |
+
|
| 326 |
+
# Then delete the document metadata
|
| 327 |
+
try:
|
| 328 |
+
self.documents_collection.delete(ids=[doc_id])
|
| 329 |
+
print(f" Successfully deleted document metadata for {doc_id}")
|
| 330 |
+
except Exception as e:
|
| 331 |
+
print(f" Error deleting document metadata: {e}")
|
| 332 |
+
return False
|
| 333 |
+
|
| 334 |
+
return True
|
| 335 |
+
|
| 336 |
+
def clear_all_user_chunks(self, user_id: str) -> int:
|
| 337 |
+
"""Clear ALL chunks for a user - useful for cleanup after stale data issues"""
|
| 338 |
+
chunks = self.chunks_collection.get(where={"user_id": user_id})
|
| 339 |
+
if chunks['ids']:
|
| 340 |
+
self.chunks_collection.delete(ids=chunks['ids'])
|
| 341 |
+
return len(chunks['ids'])
|
| 342 |
+
return 0
|
| 343 |
+
|
| 344 |
+
# ==================== Chunk Operations (RAG) ====================
|
| 345 |
+
|
| 346 |
+
def store_chunks(self, doc_id: str, user_id: str, chunks: list[dict], bucket_id: str = ""):
|
| 347 |
+
"""Store document chunks with embeddings for RAG - one at a time for quota compliance"""
|
| 348 |
+
if not chunks:
|
| 349 |
+
return
|
| 350 |
+
|
| 351 |
+
total_chunks = len(chunks)
|
| 352 |
+
print(f"Storing {total_chunks} chunks for document...")
|
| 353 |
+
|
| 354 |
+
total_chunks = len(chunks)
|
| 355 |
+
print(f"Storing {total_chunks} chunks for document...")
|
| 356 |
+
|
| 357 |
+
# Batch size for ChromaDB Cloud (max 100 per batch recommended)
|
| 358 |
+
BATCH_SIZE = 100
|
| 359 |
+
|
| 360 |
+
for i in range(0, total_chunks, BATCH_SIZE):
|
| 361 |
+
batch = chunks[i:i + BATCH_SIZE]
|
| 362 |
+
batch_ids = []
|
| 363 |
+
batch_documents = []
|
| 364 |
+
batch_metadatas = []
|
| 365 |
+
|
| 366 |
+
for j, chunk in enumerate(batch):
|
| 367 |
+
# Global index
|
| 368 |
+
global_idx = i + j
|
| 369 |
+
chunk_id = f"{doc_id}_chunk_{global_idx}"
|
| 370 |
+
|
| 371 |
+
# Truncate chunk text if too large
|
| 372 |
+
text = chunk['text']
|
| 373 |
+
if len(text) > 4000:
|
| 374 |
+
text = text[:4000]
|
| 375 |
+
|
| 376 |
+
metadata = {
|
| 377 |
+
"doc_id": doc_id,
|
| 378 |
+
"user_id": user_id,
|
| 379 |
+
"bucket_id": bucket_id,
|
| 380 |
+
"chunk_index": global_idx,
|
| 381 |
+
"start_char": chunk.get('start', 0),
|
| 382 |
+
"end_char": chunk.get('end', 0)
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
batch_ids.append(chunk_id)
|
| 386 |
+
batch_documents.append(text)
|
| 387 |
+
batch_metadatas.append(metadata)
|
| 388 |
+
|
| 389 |
+
try:
|
| 390 |
+
self.chunks_collection.add(
|
| 391 |
+
ids=batch_ids,
|
| 392 |
+
documents=batch_documents,
|
| 393 |
+
metadatas=batch_metadatas
|
| 394 |
+
)
|
| 395 |
+
print(f" Stored batch {i // BATCH_SIZE + 1} ({len(batch)} chunks)")
|
| 396 |
+
except Exception as e:
|
| 397 |
+
print(f" Error storing batch starting at index {i}: {str(e)[:100]}")
|
| 398 |
+
# Fallback: try one by one for this failed batch
|
| 399 |
+
print(" Retrying invalid batch one by one...")
|
| 400 |
+
for k, (bid, doc, meta) in enumerate(zip(batch_ids, batch_documents, batch_metadatas)):
|
| 401 |
+
try:
|
| 402 |
+
self.chunks_collection.add(ids=[bid], documents=[doc], metadatas=[meta])
|
| 403 |
+
except Exception as inner_e:
|
| 404 |
+
print(f" Failed chunk {i+k}: {str(inner_e)[:50]}")
|
| 405 |
+
|
| 406 |
+
def search_chunks(self, user_id: str, query: str,
|
| 407 |
+
doc_ids: list[str] = None, bucket_id: str = None,
|
| 408 |
+
top_k: int = 5) -> list[dict]:
|
| 409 |
+
"""Search for relevant chunks with filtering by bucket or documents.
|
| 410 |
+
|
| 411 |
+
IMPORTANT: When bucket_id is provided, ONLY chunks from that bucket are returned.
|
| 412 |
+
This ensures strict bucket isolation for multi-bucket deployments.
|
| 413 |
+
"""
|
| 414 |
+
# Build where clause with strict bucket isolation
|
| 415 |
+
if bucket_id:
|
| 416 |
+
where_clause = {
|
| 417 |
+
"$and": [
|
| 418 |
+
{"user_id": user_id},
|
| 419 |
+
{"bucket_id": bucket_id}
|
| 420 |
+
]
|
| 421 |
+
}
|
| 422 |
+
print(f"[CHROMA] Strict bucket isolation: searching only bucket '{bucket_id}'")
|
| 423 |
+
elif doc_ids:
|
| 424 |
+
where_clause = {
|
| 425 |
+
"$and": [
|
| 426 |
+
{"user_id": user_id},
|
| 427 |
+
{"doc_id": {"$in": doc_ids}}
|
| 428 |
+
]
|
| 429 |
+
}
|
| 430 |
+
else:
|
| 431 |
+
where_clause = {"user_id": user_id}
|
| 432 |
+
|
| 433 |
+
results = self.chunks_collection.query(
|
| 434 |
+
query_texts=[query],
|
| 435 |
+
n_results=top_k,
|
| 436 |
+
where=where_clause
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
+
chunks = []
|
| 440 |
+
if results['ids'] and results['ids'][0]:
|
| 441 |
+
for i, chunk_id in enumerate(results['ids'][0]):
|
| 442 |
+
chunk_bucket = results['metadatas'][0][i].get('bucket_id', '')
|
| 443 |
+
|
| 444 |
+
# Validate bucket isolation (should never happen, but log if it does)
|
| 445 |
+
if bucket_id and chunk_bucket != bucket_id:
|
| 446 |
+
print(f"[CHROMA WARNING] Bucket leak detected! Expected '{bucket_id}', got '{chunk_bucket}'")
|
| 447 |
+
continue # Skip leaked chunks
|
| 448 |
+
|
| 449 |
+
chunks.append({
|
| 450 |
+
"chunk_id": chunk_id,
|
| 451 |
+
"text": results['documents'][0][i],
|
| 452 |
+
"doc_id": results['metadatas'][0][i]['doc_id'],
|
| 453 |
+
"bucket_id": chunk_bucket,
|
| 454 |
+
"distance": results['distances'][0][i] if results.get('distances') else 0
|
| 455 |
+
})
|
| 456 |
+
|
| 457 |
+
return chunks
|
| 458 |
+
|
| 459 |
+
def get_bucket_document_list(self, user_id: str, bucket_id: str) -> list[str]:
|
| 460 |
+
"""Get list of document filenames in a bucket for cross-document queries."""
|
| 461 |
+
if not bucket_id:
|
| 462 |
+
return []
|
| 463 |
+
|
| 464 |
+
docs = self.get_user_documents(user_id, bucket_id)
|
| 465 |
+
return [doc.get('filename', 'Unknown') for doc in docs]
|
| 466 |
+
|
| 467 |
+
def get_document_chunks(self, doc_id: str) -> list[dict]:
|
| 468 |
+
"""Get all chunks for a specific document"""
|
| 469 |
+
results = self.chunks_collection.get(where={"doc_id": doc_id})
|
| 470 |
+
|
| 471 |
+
chunks = []
|
| 472 |
+
for i, chunk_id in enumerate(results['ids']):
|
| 473 |
+
chunks.append({
|
| 474 |
+
"chunk_id": chunk_id,
|
| 475 |
+
"text": results['documents'][i],
|
| 476 |
+
"chunk_index": results['metadatas'][i]['chunk_index']
|
| 477 |
+
})
|
| 478 |
+
|
| 479 |
+
chunks.sort(key=lambda x: x['chunk_index'])
|
| 480 |
+
return chunks
|
| 481 |
+
|
| 482 |
+
# ==================== Conversation Memory Operations ====================
|
| 483 |
+
|
| 484 |
+
def store_conversation(self, user_id: str, role: str, content: str,
|
| 485 |
+
bucket_id: str = "", chat_id: str = "") -> dict:
|
| 486 |
+
"""Store a conversation message for persistent memory"""
|
| 487 |
+
import time
|
| 488 |
+
msg_id = f"{user_id}_{int(time.time() * 1000)}"
|
| 489 |
+
|
| 490 |
+
self.conversations_collection.add(
|
| 491 |
+
ids=[msg_id],
|
| 492 |
+
documents=[content],
|
| 493 |
+
metadatas=[{
|
| 494 |
+
"user_id": user_id,
|
| 495 |
+
"role": role, # 'user' or 'assistant'
|
| 496 |
+
"bucket_id": bucket_id,
|
| 497 |
+
"chat_id": chat_id,
|
| 498 |
+
"timestamp": time.time()
|
| 499 |
+
}]
|
| 500 |
+
)
|
| 501 |
+
return {"msg_id": msg_id}
|
| 502 |
+
|
| 503 |
+
def get_conversation_history(self, user_id: str, bucket_id: str = None,
|
| 504 |
+
limit: int = 20) -> list[dict]:
|
| 505 |
+
"""Retrieve conversation history for a user, optionally filtered by bucket"""
|
| 506 |
+
if bucket_id:
|
| 507 |
+
where_clause = {
|
| 508 |
+
"$and": [
|
| 509 |
+
{"user_id": user_id},
|
| 510 |
+
{"bucket_id": bucket_id}
|
| 511 |
+
]
|
| 512 |
+
}
|
| 513 |
+
else:
|
| 514 |
+
where_clause = {"user_id": user_id}
|
| 515 |
+
|
| 516 |
+
results = self.conversations_collection.get(
|
| 517 |
+
where=where_clause
|
| 518 |
+
)
|
| 519 |
+
|
| 520 |
+
messages = []
|
| 521 |
+
for i, msg_id in enumerate(results['ids']):
|
| 522 |
+
messages.append({
|
| 523 |
+
"msg_id": msg_id,
|
| 524 |
+
"role": results['metadatas'][i]['role'],
|
| 525 |
+
"content": results['documents'][i],
|
| 526 |
+
"timestamp": results['metadatas'][i]['timestamp'],
|
| 527 |
+
"bucket_id": results['metadatas'][i].get('bucket_id', ''),
|
| 528 |
+
"chat_id": results['metadatas'][i].get('chat_id', '')
|
| 529 |
+
})
|
| 530 |
+
|
| 531 |
+
# Sort by timestamp (newest last) and limit
|
| 532 |
+
messages.sort(key=lambda x: x['timestamp'])
|
| 533 |
+
return messages[-limit:]
|
| 534 |
+
|
| 535 |
+
def clear_conversation(self, user_id: str, bucket_id: str = None) -> bool:
|
| 536 |
+
"""Clear conversation history for a user"""
|
| 537 |
+
if bucket_id:
|
| 538 |
+
where_clause = {
|
| 539 |
+
"$and": [
|
| 540 |
+
{"user_id": user_id},
|
| 541 |
+
{"bucket_id": bucket_id}
|
| 542 |
+
]
|
| 543 |
+
}
|
| 544 |
+
else:
|
| 545 |
+
where_clause = {"user_id": user_id}
|
| 546 |
+
|
| 547 |
+
results = self.conversations_collection.get(where=where_clause)
|
| 548 |
+
if results['ids']:
|
| 549 |
+
self.conversations_collection.delete(ids=results['ids'])
|
| 550 |
+
return True
|
| 551 |
+
|
| 552 |
+
# ==================== Chat Session Operations ====================
|
| 553 |
+
|
| 554 |
+
def save_chat_session(self, user_id: str, chat_id: str, topic: str,
|
| 555 |
+
messages: list, bucket_id: str = "") -> dict:
|
| 556 |
+
"""Store or update a complete chat session.
|
| 557 |
+
|
| 558 |
+
Note: ChromaDB Cloud has a 16KB document size limit, so we truncate
|
| 559 |
+
long messages to fit within this constraint.
|
| 560 |
+
"""
|
| 561 |
+
import json
|
| 562 |
+
|
| 563 |
+
# ChromaDB Cloud has a 16KB (16384 bytes) document size limit
|
| 564 |
+
MAX_DOC_SIZE = 14000 # Leave buffer for metadata overhead
|
| 565 |
+
MAX_MESSAGE_LENGTH = 3000 # Max chars per message when truncating
|
| 566 |
+
|
| 567 |
+
# First, try to serialize as-is
|
| 568 |
+
messages_json = json.dumps(messages)
|
| 569 |
+
|
| 570 |
+
# If too large, truncate individual message contents
|
| 571 |
+
if len(messages_json) > MAX_DOC_SIZE:
|
| 572 |
+
truncated_messages = []
|
| 573 |
+
for msg in messages:
|
| 574 |
+
truncated_msg = {**msg}
|
| 575 |
+
if len(msg.get('content', '')) > MAX_MESSAGE_LENGTH:
|
| 576 |
+
truncated_msg['content'] = msg['content'][:MAX_MESSAGE_LENGTH] + '... [truncated for storage]'
|
| 577 |
+
truncated_messages.append(truncated_msg)
|
| 578 |
+
messages_json = json.dumps(truncated_messages)
|
| 579 |
+
|
| 580 |
+
# If still too large, keep only the last N messages
|
| 581 |
+
if len(messages_json) > MAX_DOC_SIZE:
|
| 582 |
+
# Keep first message (context) and last few exchanges
|
| 583 |
+
keep_count = 10
|
| 584 |
+
while len(messages_json) > MAX_DOC_SIZE and keep_count > 2:
|
| 585 |
+
truncated_messages = truncated_messages[-keep_count:]
|
| 586 |
+
messages_json = json.dumps(truncated_messages)
|
| 587 |
+
keep_count -= 2
|
| 588 |
+
|
| 589 |
+
metadata = {
|
| 590 |
+
"user_id": user_id,
|
| 591 |
+
"topic": topic,
|
| 592 |
+
"bucket_id": bucket_id,
|
| 593 |
+
"message_count": len(messages),
|
| 594 |
+
"timestamp": time.time()
|
| 595 |
+
}
|
| 596 |
+
|
| 597 |
+
# Check if chat exists
|
| 598 |
+
existing = self.chat_sessions_collection.get(ids=[chat_id])
|
| 599 |
+
|
| 600 |
+
try:
|
| 601 |
+
if existing['ids']:
|
| 602 |
+
# Update existing chat
|
| 603 |
+
self.chat_sessions_collection.update(
|
| 604 |
+
ids=[chat_id],
|
| 605 |
+
documents=[messages_json],
|
| 606 |
+
metadatas=[metadata]
|
| 607 |
+
)
|
| 608 |
+
else:
|
| 609 |
+
# Add new chat
|
| 610 |
+
self.chat_sessions_collection.add(
|
| 611 |
+
ids=[chat_id],
|
| 612 |
+
documents=[messages_json],
|
| 613 |
+
metadatas=[metadata]
|
| 614 |
+
)
|
| 615 |
+
except Exception as e:
|
| 616 |
+
# If still failing, store minimal version
|
| 617 |
+
print(f"[CHAT SAVE] Error saving full chat, storing minimal: {e}")
|
| 618 |
+
minimal_messages = [{"role": "system", "content": f"Chat with {len(messages)} messages (too large to store)"}]
|
| 619 |
+
self.chat_sessions_collection.upsert(
|
| 620 |
+
ids=[chat_id],
|
| 621 |
+
documents=[json.dumps(minimal_messages)],
|
| 622 |
+
metadatas=[metadata]
|
| 623 |
+
)
|
| 624 |
+
|
| 625 |
+
return {"chat_id": chat_id, "topic": topic}
|
| 626 |
+
|
| 627 |
+
def get_user_chat_sessions(self, user_id: str) -> list:
|
| 628 |
+
"""Get all chat sessions for a user"""
|
| 629 |
+
import json
|
| 630 |
+
|
| 631 |
+
results = self.chat_sessions_collection.get(where={"user_id": user_id})
|
| 632 |
+
|
| 633 |
+
sessions = []
|
| 634 |
+
for i, chat_id in enumerate(results['ids']):
|
| 635 |
+
try:
|
| 636 |
+
messages = json.loads(results['documents'][i])
|
| 637 |
+
except:
|
| 638 |
+
messages = []
|
| 639 |
+
|
| 640 |
+
sessions.append({
|
| 641 |
+
"id": chat_id,
|
| 642 |
+
"topic": results['metadatas'][i].get('topic', 'Chat'),
|
| 643 |
+
"messages": messages,
|
| 644 |
+
"bucket": results['metadatas'][i].get('bucket_id', ''),
|
| 645 |
+
"timestamp": results['metadatas'][i].get('timestamp', 0)
|
| 646 |
+
})
|
| 647 |
+
|
| 648 |
+
# Sort by timestamp (newest first)
|
| 649 |
+
sessions.sort(key=lambda x: x['timestamp'], reverse=True)
|
| 650 |
+
return sessions
|
| 651 |
+
|
| 652 |
+
def get_chat_session(self, user_id: str, chat_id: str) -> dict | None:
|
| 653 |
+
"""Get a single chat session by ID"""
|
| 654 |
+
import json
|
| 655 |
+
|
| 656 |
+
result = self.chat_sessions_collection.get(ids=[chat_id])
|
| 657 |
+
|
| 658 |
+
if not result['ids']:
|
| 659 |
+
return None
|
| 660 |
+
|
| 661 |
+
# Verify ownership
|
| 662 |
+
if result['metadatas'][0].get('user_id') != user_id:
|
| 663 |
+
return None
|
| 664 |
+
|
| 665 |
+
try:
|
| 666 |
+
messages = json.loads(result['documents'][0])
|
| 667 |
+
except:
|
| 668 |
+
messages = []
|
| 669 |
+
|
| 670 |
+
return {
|
| 671 |
+
"id": chat_id,
|
| 672 |
+
"topic": result['metadatas'][0].get('topic', 'Chat'),
|
| 673 |
+
"messages": messages,
|
| 674 |
+
"bucket": result['metadatas'][0].get('bucket_id', ''),
|
| 675 |
+
"timestamp": result['metadatas'][0].get('timestamp', 0)
|
| 676 |
+
}
|
| 677 |
+
|
| 678 |
+
def delete_chat_session(self, user_id: str, chat_id: str) -> bool:
|
| 679 |
+
"""Delete a chat session and all associated conversation history"""
|
| 680 |
+
result = self.chat_sessions_collection.get(ids=[chat_id])
|
| 681 |
+
|
| 682 |
+
if not result['ids']:
|
| 683 |
+
return False
|
| 684 |
+
|
| 685 |
+
# Verify ownership
|
| 686 |
+
if result['metadatas'][0].get('user_id') != user_id:
|
| 687 |
+
return False
|
| 688 |
+
|
| 689 |
+
# Delete the chat session
|
| 690 |
+
self.chat_sessions_collection.delete(ids=[chat_id])
|
| 691 |
+
|
| 692 |
+
# Also delete all conversation history entries for this chat
|
| 693 |
+
try:
|
| 694 |
+
conv_results = self.conversations_collection.get(
|
| 695 |
+
where={
|
| 696 |
+
"$and": [
|
| 697 |
+
{"user_id": user_id},
|
| 698 |
+
{"chat_id": chat_id}
|
| 699 |
+
]
|
| 700 |
+
}
|
| 701 |
+
)
|
| 702 |
+
if conv_results['ids']:
|
| 703 |
+
self.conversations_collection.delete(ids=conv_results['ids'])
|
| 704 |
+
print(f"Deleted {len(conv_results['ids'])} conversation history entries for chat {chat_id}")
|
| 705 |
+
except Exception as e:
|
| 706 |
+
print(f"Warning: Could not delete conversation history for chat {chat_id}: {e}")
|
| 707 |
+
|
| 708 |
+
return True
|
| 709 |
+
|
| 710 |
+
# ==================== Document Metadata Operations (Aggregate Queries) ====================
|
| 711 |
+
|
| 712 |
+
def store_document_metadata(self, doc_id: str, user_id: str, bucket_id: str,
|
| 713 |
+
metadata: dict) -> dict:
|
| 714 |
+
"""
|
| 715 |
+
Store structured metadata for a document.
|
| 716 |
+
Used for aggregate queries like 'list all manufacturing units'.
|
| 717 |
+
|
| 718 |
+
Args:
|
| 719 |
+
doc_id: Document ID
|
| 720 |
+
user_id: User ID
|
| 721 |
+
bucket_id: Bucket ID
|
| 722 |
+
metadata: Structured metadata dict
|
| 723 |
+
"""
|
| 724 |
+
import json
|
| 725 |
+
|
| 726 |
+
# Flatten metadata for ChromaDB (which only supports primitive types in metadata)
|
| 727 |
+
# Helper functions for safe type conversion
|
| 728 |
+
def safe_float(value, default=0.0):
|
| 729 |
+
if value is None:
|
| 730 |
+
return default
|
| 731 |
+
if isinstance(value, (int, float)):
|
| 732 |
+
return float(value)
|
| 733 |
+
try:
|
| 734 |
+
# Try to extract numbers from string
|
| 735 |
+
import re
|
| 736 |
+
if isinstance(value, str):
|
| 737 |
+
# Remove currency symbols and commas
|
| 738 |
+
cleaned = re.sub(r'[^\d.]', '', str(value).replace(',', ''))
|
| 739 |
+
if cleaned:
|
| 740 |
+
return float(cleaned)
|
| 741 |
+
return default
|
| 742 |
+
except:
|
| 743 |
+
return default
|
| 744 |
+
|
| 745 |
+
def safe_int(value, default=0):
|
| 746 |
+
if value is None:
|
| 747 |
+
return default
|
| 748 |
+
if isinstance(value, int):
|
| 749 |
+
return value
|
| 750 |
+
try:
|
| 751 |
+
return int(safe_float(value, default))
|
| 752 |
+
except:
|
| 753 |
+
return default
|
| 754 |
+
|
| 755 |
+
flat_metadata = {
|
| 756 |
+
"doc_id": doc_id,
|
| 757 |
+
"user_id": user_id,
|
| 758 |
+
"bucket_id": bucket_id,
|
| 759 |
+
"document_type": str(metadata.get("document_type", "")),
|
| 760 |
+
"document_title": str(metadata.get("document_title", "")),
|
| 761 |
+
"policy_number": str(metadata.get("policy_number", "")),
|
| 762 |
+
"insurer_name": str(metadata.get("insurer_name", "")),
|
| 763 |
+
"insured_name": str(metadata.get("insured_name", "")),
|
| 764 |
+
"broker_name": str(metadata.get("broker_name", "")),
|
| 765 |
+
"policy_type": str(metadata.get("policy_type", "")),
|
| 766 |
+
"industry": str(metadata.get("industry", "")),
|
| 767 |
+
"is_manufacturing": bool(metadata.get("is_manufacturing", False)),
|
| 768 |
+
"sum_insured": safe_float(metadata.get("sum_insured")),
|
| 769 |
+
"premium_amount": safe_float(metadata.get("premium_amount")),
|
| 770 |
+
"policy_start_date": str(metadata.get("policy_start_date", "")),
|
| 771 |
+
"policy_end_date": str(metadata.get("policy_end_date", "")),
|
| 772 |
+
"renewal_date": str(metadata.get("renewal_date", "")),
|
| 773 |
+
"renewal_year": safe_int(metadata.get("renewal_year")),
|
| 774 |
+
"city": str(metadata.get("city", "")),
|
| 775 |
+
"state": str(metadata.get("state", "")),
|
| 776 |
+
"pincode": str(metadata.get("pincode", "")),
|
| 777 |
+
"property_address": str(metadata.get("property_address", ""))[:500],
|
| 778 |
+
"created_at": metadata.get("created_at", time.time())
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
|
| 782 |
+
# Store arrays as JSON strings
|
| 783 |
+
coverage_types = metadata.get("coverage_type", [])
|
| 784 |
+
flat_metadata["coverage_type_json"] = json.dumps(coverage_types if isinstance(coverage_types, list) else [])
|
| 785 |
+
|
| 786 |
+
keywords = metadata.get("keywords", [])
|
| 787 |
+
flat_metadata["keywords_json"] = json.dumps(keywords if isinstance(keywords, list) else [])
|
| 788 |
+
|
| 789 |
+
# Create searchable text from metadata
|
| 790 |
+
searchable_text = f"""
|
| 791 |
+
{metadata.get('document_title', '')}
|
| 792 |
+
{metadata.get('insured_name', '')}
|
| 793 |
+
{metadata.get('insurer_name', '')}
|
| 794 |
+
{metadata.get('policy_type', '')}
|
| 795 |
+
{metadata.get('industry', '')}
|
| 796 |
+
{metadata.get('city', '')} {metadata.get('state', '')}
|
| 797 |
+
Policy Number: {metadata.get('policy_number', '')}
|
| 798 |
+
Sum Insured: {metadata.get('sum_insured', '')}
|
| 799 |
+
""".strip()
|
| 800 |
+
|
| 801 |
+
# Check if metadata already exists for this doc
|
| 802 |
+
existing = self.metadata_collection.get(ids=[doc_id])
|
| 803 |
+
|
| 804 |
+
if existing['ids']:
|
| 805 |
+
self.metadata_collection.update(
|
| 806 |
+
ids=[doc_id],
|
| 807 |
+
documents=[searchable_text],
|
| 808 |
+
metadatas=[flat_metadata]
|
| 809 |
+
)
|
| 810 |
+
else:
|
| 811 |
+
self.metadata_collection.add(
|
| 812 |
+
ids=[doc_id],
|
| 813 |
+
documents=[searchable_text],
|
| 814 |
+
metadatas=[flat_metadata]
|
| 815 |
+
)
|
| 816 |
+
|
| 817 |
+
return {"doc_id": doc_id, "status": "stored"}
|
| 818 |
+
|
| 819 |
+
def get_document_metadata(self, doc_id: str, user_id: str) -> dict | None:
|
| 820 |
+
"""Get metadata for a specific document."""
|
| 821 |
+
result = self.metadata_collection.get(ids=[doc_id])
|
| 822 |
+
|
| 823 |
+
if not result['ids']:
|
| 824 |
+
return None
|
| 825 |
+
|
| 826 |
+
meta = result['metadatas'][0]
|
| 827 |
+
if meta.get('user_id') != user_id:
|
| 828 |
+
return None
|
| 829 |
+
|
| 830 |
+
return meta
|
| 831 |
+
|
| 832 |
+
def get_all_metadata(self, user_id: str, bucket_id: str = None) -> list[dict]:
|
| 833 |
+
"""
|
| 834 |
+
Get ALL document metadata for a user/bucket.
|
| 835 |
+
Used for aggregate queries - returns complete list, no top-K limit.
|
| 836 |
+
"""
|
| 837 |
+
import json
|
| 838 |
+
|
| 839 |
+
if bucket_id:
|
| 840 |
+
where_clause = {
|
| 841 |
+
"$and": [
|
| 842 |
+
{"user_id": user_id},
|
| 843 |
+
{"bucket_id": bucket_id}
|
| 844 |
+
]
|
| 845 |
+
}
|
| 846 |
+
else:
|
| 847 |
+
where_clause = {"user_id": user_id}
|
| 848 |
+
|
| 849 |
+
results = self.metadata_collection.get(where=where_clause)
|
| 850 |
+
|
| 851 |
+
metadata_list = []
|
| 852 |
+
for i, doc_id in enumerate(results['ids']):
|
| 853 |
+
meta = results['metadatas'][i]
|
| 854 |
+
|
| 855 |
+
# Parse JSON arrays back
|
| 856 |
+
try:
|
| 857 |
+
meta['coverage_type'] = json.loads(meta.get('coverage_type_json', '[]'))
|
| 858 |
+
except:
|
| 859 |
+
meta['coverage_type'] = []
|
| 860 |
+
|
| 861 |
+
try:
|
| 862 |
+
meta['keywords'] = json.loads(meta.get('keywords_json', '[]'))
|
| 863 |
+
except:
|
| 864 |
+
meta['keywords'] = []
|
| 865 |
+
|
| 866 |
+
metadata_list.append(meta)
|
| 867 |
+
|
| 868 |
+
return metadata_list
|
| 869 |
+
|
| 870 |
+
def search_metadata(self, user_id: str, bucket_id: str, filters: dict) -> list[dict]:
|
| 871 |
+
"""
|
| 872 |
+
Search metadata with filters.
|
| 873 |
+
Supports filtering by: policy_type, industry, is_manufacturing, renewal_year, city, state
|
| 874 |
+
"""
|
| 875 |
+
# Build where clause
|
| 876 |
+
conditions = [{"user_id": user_id}]
|
| 877 |
+
|
| 878 |
+
if bucket_id:
|
| 879 |
+
conditions.append({"bucket_id": bucket_id})
|
| 880 |
+
|
| 881 |
+
for field, value in filters.items():
|
| 882 |
+
if value is not None and value != "":
|
| 883 |
+
conditions.append({field: value})
|
| 884 |
+
|
| 885 |
+
if len(conditions) > 1:
|
| 886 |
+
where_clause = {"$and": conditions}
|
| 887 |
+
else:
|
| 888 |
+
where_clause = conditions[0]
|
| 889 |
+
|
| 890 |
+
results = self.metadata_collection.get(where=where_clause)
|
| 891 |
+
|
| 892 |
+
return [results['metadatas'][i] for i in range(len(results['ids']))]
|
| 893 |
+
|
| 894 |
+
def delete_document_metadata(self, doc_id: str) -> bool:
|
| 895 |
+
"""Delete metadata for a document."""
|
| 896 |
+
try:
|
| 897 |
+
self.metadata_collection.delete(ids=[doc_id])
|
| 898 |
+
return True
|
| 899 |
+
except:
|
| 900 |
+
return False
|
| 901 |
+
|
| 902 |
+
# ==================== Summary Chunks Operations ====================
|
| 903 |
+
|
| 904 |
+
def store_summary_chunk(self, doc_id: str, user_id: str, summary: str,
|
| 905 |
+
bucket_id: str = "", filename: str = "") -> dict:
|
| 906 |
+
"""
|
| 907 |
+
Store a document summary as a special chunk for aggregate queries.
|
| 908 |
+
"""
|
| 909 |
+
summary_id = f"{doc_id}_summary"
|
| 910 |
+
|
| 911 |
+
metadata = {
|
| 912 |
+
"doc_id": doc_id,
|
| 913 |
+
"user_id": user_id,
|
| 914 |
+
"bucket_id": bucket_id,
|
| 915 |
+
"filename": filename,
|
| 916 |
+
"chunk_type": "summary",
|
| 917 |
+
"created_at": time.time()
|
| 918 |
+
}
|
| 919 |
+
|
| 920 |
+
# Check if summary exists
|
| 921 |
+
existing = self.summary_chunks_collection.get(ids=[summary_id])
|
| 922 |
+
|
| 923 |
+
if existing['ids']:
|
| 924 |
+
self.summary_chunks_collection.update(
|
| 925 |
+
ids=[summary_id],
|
| 926 |
+
documents=[summary],
|
| 927 |
+
metadatas=[metadata]
|
| 928 |
+
)
|
| 929 |
+
else:
|
| 930 |
+
self.summary_chunks_collection.add(
|
| 931 |
+
ids=[summary_id],
|
| 932 |
+
documents=[summary],
|
| 933 |
+
metadatas=[metadata]
|
| 934 |
+
)
|
| 935 |
+
|
| 936 |
+
return {"summary_id": summary_id, "status": "stored"}
|
| 937 |
+
|
| 938 |
+
def get_all_summaries(self, user_id: str, bucket_id: str = None) -> list[dict]:
|
| 939 |
+
"""
|
| 940 |
+
Get ALL document summaries for a user/bucket.
|
| 941 |
+
Returns complete list - no top-K limit!
|
| 942 |
+
"""
|
| 943 |
+
if bucket_id:
|
| 944 |
+
where_clause = {
|
| 945 |
+
"$and": [
|
| 946 |
+
{"user_id": user_id},
|
| 947 |
+
{"bucket_id": bucket_id}
|
| 948 |
+
]
|
| 949 |
+
}
|
| 950 |
+
else:
|
| 951 |
+
where_clause = {"user_id": user_id}
|
| 952 |
+
|
| 953 |
+
results = self.summary_chunks_collection.get(where=where_clause)
|
| 954 |
+
|
| 955 |
+
summaries = []
|
| 956 |
+
for i, summary_id in enumerate(results['ids']):
|
| 957 |
+
summaries.append({
|
| 958 |
+
"doc_id": results['metadatas'][i]['doc_id'],
|
| 959 |
+
"filename": results['metadatas'][i].get('filename', ''),
|
| 960 |
+
"summary": results['documents'][i],
|
| 961 |
+
"bucket_id": results['metadatas'][i].get('bucket_id', '')
|
| 962 |
+
})
|
| 963 |
+
|
| 964 |
+
return summaries
|
| 965 |
+
|
| 966 |
+
def search_summaries(self, user_id: str, query: str, bucket_id: str = None,
|
| 967 |
+
top_k: int = 50) -> list[dict]:
|
| 968 |
+
"""Search summaries by semantic similarity."""
|
| 969 |
+
if bucket_id:
|
| 970 |
+
where_clause = {
|
| 971 |
+
"$and": [
|
| 972 |
+
{"user_id": user_id},
|
| 973 |
+
{"bucket_id": bucket_id}
|
| 974 |
+
]
|
| 975 |
+
}
|
| 976 |
+
else:
|
| 977 |
+
where_clause = {"user_id": user_id}
|
| 978 |
+
|
| 979 |
+
results = self.summary_chunks_collection.query(
|
| 980 |
+
query_texts=[query],
|
| 981 |
+
n_results=top_k,
|
| 982 |
+
where=where_clause
|
| 983 |
+
)
|
| 984 |
+
|
| 985 |
+
summaries = []
|
| 986 |
+
if results['ids'] and results['ids'][0]:
|
| 987 |
+
for i, summary_id in enumerate(results['ids'][0]):
|
| 988 |
+
summaries.append({
|
| 989 |
+
"doc_id": results['metadatas'][0][i]['doc_id'],
|
| 990 |
+
"filename": results['metadatas'][0][i].get('filename', ''),
|
| 991 |
+
"summary": results['documents'][0][i],
|
| 992 |
+
"distance": results['distances'][0][i] if results.get('distances') else 0
|
| 993 |
+
})
|
| 994 |
+
|
| 995 |
+
return summaries
|
| 996 |
+
|
| 997 |
+
def delete_summary_chunk(self, doc_id: str) -> bool:
|
| 998 |
+
"""Delete summary chunk for a document."""
|
| 999 |
+
try:
|
| 1000 |
+
summary_id = f"{doc_id}_summary"
|
| 1001 |
+
self.summary_chunks_collection.delete(ids=[summary_id])
|
| 1002 |
+
return True
|
| 1003 |
+
except:
|
| 1004 |
+
return False
|
| 1005 |
+
|
| 1006 |
+
|
| 1007 |
+
# Singleton instance
|
| 1008 |
+
chroma_service = ChromaService()
|
| 1009 |
+
|
services/date_parser.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Date Parser Service
|
| 3 |
+
Handles parsing of various date formats commonly found in insurance documents.
|
| 4 |
+
Supports:
|
| 5 |
+
- 1-1-25, 01-01-2025, 1/1/25, 01/01/2025
|
| 6 |
+
- January 1, 2025, Jan 1, 2025, 1 January 2025
|
| 7 |
+
- 2025-01-01 (ISO format)
|
| 8 |
+
- Date ranges and period calculations
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import re
|
| 12 |
+
from datetime import datetime, timedelta
|
| 13 |
+
from typing import Optional, List, Dict, Tuple
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class DateParser:
|
| 17 |
+
"""Parse and normalize dates from various formats."""
|
| 18 |
+
|
| 19 |
+
# Month name mappings
|
| 20 |
+
MONTHS = {
|
| 21 |
+
'january': 1, 'jan': 1,
|
| 22 |
+
'february': 2, 'feb': 2,
|
| 23 |
+
'march': 3, 'mar': 3,
|
| 24 |
+
'april': 4, 'apr': 4,
|
| 25 |
+
'may': 5,
|
| 26 |
+
'june': 6, 'jun': 6,
|
| 27 |
+
'july': 7, 'jul': 7,
|
| 28 |
+
'august': 8, 'aug': 8,
|
| 29 |
+
'september': 9, 'sep': 9, 'sept': 9,
|
| 30 |
+
'october': 10, 'oct': 10,
|
| 31 |
+
'november': 11, 'nov': 11,
|
| 32 |
+
'december': 12, 'dec': 12
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
# Date context keywords for identifying date types
|
| 36 |
+
DATE_CONTEXTS = {
|
| 37 |
+
'start': ['start', 'commence', 'inception', 'effective', 'from', 'begins', 'starting'],
|
| 38 |
+
'end': ['end', 'expiry', 'expire', 'expiration', 'until', 'to', 'ending', 'valid till', 'valid until'],
|
| 39 |
+
'renewal': ['renewal', 'renew', 'next renewal', 'due for renewal'],
|
| 40 |
+
'issue': ['issue', 'issued', 'date of issue', 'policy date']
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
def __init__(self):
|
| 44 |
+
self._compile_patterns()
|
| 45 |
+
|
| 46 |
+
def _compile_patterns(self):
|
| 47 |
+
"""Compile regex patterns for date extraction."""
|
| 48 |
+
# DD-MM-YY or DD-MM-YYYY (with - or /)
|
| 49 |
+
self.pattern_dmy = re.compile(
|
| 50 |
+
r'\b(\d{1,2})[-/](\d{1,2})[-/](\d{2,4})\b'
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# YYYY-MM-DD (ISO format)
|
| 54 |
+
self.pattern_iso = re.compile(
|
| 55 |
+
r'\b(\d{4})[-/](\d{1,2})[-/](\d{1,2})\b'
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Month DD, YYYY or DD Month YYYY
|
| 59 |
+
month_names = '|'.join(self.MONTHS.keys())
|
| 60 |
+
self.pattern_month_name = re.compile(
|
| 61 |
+
rf'\b(\d{{1,2}})\s*(?:st|nd|rd|th)?\s*({month_names})[,]?\s*(\d{{4}})\b|'
|
| 62 |
+
rf'\b({month_names})\s*(\d{{1,2}})(?:st|nd|rd|th)?[,]?\s*(\d{{4}})\b',
|
| 63 |
+
re.IGNORECASE
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
def parse_date(self, date_str: str) -> Optional[datetime]:
|
| 67 |
+
"""
|
| 68 |
+
Parse a date string in various formats to datetime object.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
date_str: Date string to parse
|
| 72 |
+
|
| 73 |
+
Returns:
|
| 74 |
+
datetime object or None if parsing fails
|
| 75 |
+
"""
|
| 76 |
+
if not date_str:
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
date_str = str(date_str).strip()
|
| 80 |
+
|
| 81 |
+
# Try ISO format first (YYYY-MM-DD)
|
| 82 |
+
match = self.pattern_iso.search(date_str)
|
| 83 |
+
if match:
|
| 84 |
+
year, month, day = match.groups()
|
| 85 |
+
try:
|
| 86 |
+
return datetime(int(year), int(month), int(day))
|
| 87 |
+
except ValueError:
|
| 88 |
+
pass
|
| 89 |
+
|
| 90 |
+
# Try DMY format (DD-MM-YY or DD-MM-YYYY)
|
| 91 |
+
match = self.pattern_dmy.search(date_str)
|
| 92 |
+
if match:
|
| 93 |
+
day, month, year = match.groups()
|
| 94 |
+
year = int(year)
|
| 95 |
+
# Handle 2-digit years
|
| 96 |
+
if year < 100:
|
| 97 |
+
year = 2000 + year if year < 50 else 1900 + year
|
| 98 |
+
try:
|
| 99 |
+
return datetime(year, int(month), int(day))
|
| 100 |
+
except ValueError:
|
| 101 |
+
# Try swapping day/month for US format
|
| 102 |
+
try:
|
| 103 |
+
return datetime(year, int(day), int(month))
|
| 104 |
+
except ValueError:
|
| 105 |
+
pass
|
| 106 |
+
|
| 107 |
+
# Try month name format
|
| 108 |
+
match = self.pattern_month_name.search(date_str)
|
| 109 |
+
if match:
|
| 110 |
+
groups = match.groups()
|
| 111 |
+
if groups[0]: # DD Month YYYY format
|
| 112 |
+
day, month_name, year = groups[0], groups[1], groups[2]
|
| 113 |
+
else: # Month DD, YYYY format
|
| 114 |
+
month_name, day, year = groups[3], groups[4], groups[5]
|
| 115 |
+
|
| 116 |
+
month = self.MONTHS.get(month_name.lower())
|
| 117 |
+
if month:
|
| 118 |
+
try:
|
| 119 |
+
return datetime(int(year), month, int(day))
|
| 120 |
+
except ValueError:
|
| 121 |
+
pass
|
| 122 |
+
|
| 123 |
+
return None
|
| 124 |
+
|
| 125 |
+
def extract_dates_from_text(self, text: str) -> List[Dict]:
|
| 126 |
+
"""
|
| 127 |
+
Extract all dates from text with their context.
|
| 128 |
+
|
| 129 |
+
Args:
|
| 130 |
+
text: Text to search for dates
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
List of dicts with date info:
|
| 134 |
+
[{"date": datetime, "context": "start/end/renewal/issue/unknown",
|
| 135 |
+
"original": "01-01-2025", "position": 123}]
|
| 136 |
+
"""
|
| 137 |
+
if not text:
|
| 138 |
+
return []
|
| 139 |
+
|
| 140 |
+
results = []
|
| 141 |
+
text_lower = text.lower()
|
| 142 |
+
|
| 143 |
+
# Find all date matches
|
| 144 |
+
all_matches = []
|
| 145 |
+
|
| 146 |
+
# DMY format
|
| 147 |
+
for match in self.pattern_dmy.finditer(text):
|
| 148 |
+
parsed = self.parse_date(match.group())
|
| 149 |
+
if parsed:
|
| 150 |
+
all_matches.append({
|
| 151 |
+
'date': parsed,
|
| 152 |
+
'original': match.group(),
|
| 153 |
+
'position': match.start()
|
| 154 |
+
})
|
| 155 |
+
|
| 156 |
+
# ISO format
|
| 157 |
+
for match in self.pattern_iso.finditer(text):
|
| 158 |
+
parsed = self.parse_date(match.group())
|
| 159 |
+
if parsed:
|
| 160 |
+
all_matches.append({
|
| 161 |
+
'date': parsed,
|
| 162 |
+
'original': match.group(),
|
| 163 |
+
'position': match.start()
|
| 164 |
+
})
|
| 165 |
+
|
| 166 |
+
# Month name format
|
| 167 |
+
for match in self.pattern_month_name.finditer(text):
|
| 168 |
+
parsed = self.parse_date(match.group())
|
| 169 |
+
if parsed:
|
| 170 |
+
all_matches.append({
|
| 171 |
+
'date': parsed,
|
| 172 |
+
'original': match.group(),
|
| 173 |
+
'position': match.start()
|
| 174 |
+
})
|
| 175 |
+
|
| 176 |
+
# Determine context for each date
|
| 177 |
+
for match in all_matches:
|
| 178 |
+
pos = match['position']
|
| 179 |
+
# Look at surrounding text (100 chars before)
|
| 180 |
+
context_start = max(0, pos - 100)
|
| 181 |
+
context_text = text_lower[context_start:pos]
|
| 182 |
+
|
| 183 |
+
date_type = 'unknown'
|
| 184 |
+
for dtype, keywords in self.DATE_CONTEXTS.items():
|
| 185 |
+
if any(kw in context_text for kw in keywords):
|
| 186 |
+
date_type = dtype
|
| 187 |
+
break
|
| 188 |
+
|
| 189 |
+
results.append({
|
| 190 |
+
'date': match['date'],
|
| 191 |
+
'date_str': match['date'].strftime('%Y-%m-%d'),
|
| 192 |
+
'context': date_type,
|
| 193 |
+
'original': match['original'],
|
| 194 |
+
'position': pos
|
| 195 |
+
})
|
| 196 |
+
|
| 197 |
+
# Remove duplicates based on date
|
| 198 |
+
seen_dates = set()
|
| 199 |
+
unique_results = []
|
| 200 |
+
for r in results:
|
| 201 |
+
date_key = r['date_str']
|
| 202 |
+
if date_key not in seen_dates:
|
| 203 |
+
seen_dates.add(date_key)
|
| 204 |
+
unique_results.append(r)
|
| 205 |
+
|
| 206 |
+
return unique_results
|
| 207 |
+
|
| 208 |
+
def calculate_renewal_date(self, policy_start: datetime,
|
| 209 |
+
term_months: int = 12) -> datetime:
|
| 210 |
+
"""
|
| 211 |
+
Calculate policy renewal date.
|
| 212 |
+
|
| 213 |
+
Args:
|
| 214 |
+
policy_start: Policy start date
|
| 215 |
+
term_months: Policy term in months (default 12)
|
| 216 |
+
|
| 217 |
+
Returns:
|
| 218 |
+
Renewal date (policy_start + term_months)
|
| 219 |
+
"""
|
| 220 |
+
# Add months
|
| 221 |
+
new_month = policy_start.month + term_months
|
| 222 |
+
new_year = policy_start.year + (new_month - 1) // 12
|
| 223 |
+
new_month = ((new_month - 1) % 12) + 1
|
| 224 |
+
|
| 225 |
+
# Handle day overflow
|
| 226 |
+
try:
|
| 227 |
+
return datetime(new_year, new_month, policy_start.day)
|
| 228 |
+
except ValueError:
|
| 229 |
+
# Last day of month for dates like Jan 31 + 1 month
|
| 230 |
+
if new_month == 12:
|
| 231 |
+
next_month = datetime(new_year + 1, 1, 1)
|
| 232 |
+
else:
|
| 233 |
+
next_month = datetime(new_year, new_month + 1, 1)
|
| 234 |
+
return next_month - timedelta(days=1)
|
| 235 |
+
|
| 236 |
+
def is_date_in_range(self, date: datetime,
|
| 237 |
+
year: int = None,
|
| 238 |
+
before: datetime = None,
|
| 239 |
+
after: datetime = None) -> bool:
|
| 240 |
+
"""
|
| 241 |
+
Check if date matches filter criteria.
|
| 242 |
+
|
| 243 |
+
Args:
|
| 244 |
+
date: Date to check
|
| 245 |
+
year: Match specific year
|
| 246 |
+
before: Date must be before this
|
| 247 |
+
after: Date must be after this
|
| 248 |
+
|
| 249 |
+
Returns:
|
| 250 |
+
True if date matches all criteria
|
| 251 |
+
"""
|
| 252 |
+
if not date:
|
| 253 |
+
return False
|
| 254 |
+
|
| 255 |
+
if year and date.year != year:
|
| 256 |
+
return False
|
| 257 |
+
|
| 258 |
+
if before and date >= before:
|
| 259 |
+
return False
|
| 260 |
+
|
| 261 |
+
if after and date <= after:
|
| 262 |
+
return False
|
| 263 |
+
|
| 264 |
+
return True
|
| 265 |
+
|
| 266 |
+
def get_year_from_query(self, query: str) -> Optional[int]:
|
| 267 |
+
"""Extract year from query like 'policies renewing in 2026'."""
|
| 268 |
+
match = re.search(r'\b(20\d{2})\b', query)
|
| 269 |
+
if match:
|
| 270 |
+
return int(match.group(1))
|
| 271 |
+
|
| 272 |
+
# Handle relative years
|
| 273 |
+
current_year = datetime.now().year
|
| 274 |
+
if 'this year' in query.lower():
|
| 275 |
+
return current_year
|
| 276 |
+
if 'next year' in query.lower():
|
| 277 |
+
return current_year + 1
|
| 278 |
+
if 'last year' in query.lower():
|
| 279 |
+
return current_year - 1
|
| 280 |
+
|
| 281 |
+
return None
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
# Singleton instance
|
| 285 |
+
date_parser = DateParser()
|
services/document_processor.py
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Document Processor Service
|
| 3 |
+
Handles text extraction from various document types:
|
| 4 |
+
- PDF (text extraction + OCR fallback)
|
| 5 |
+
- DOCX (Word documents)
|
| 6 |
+
- Excel (XLS, XLSX)
|
| 7 |
+
- Images (via OCR)
|
| 8 |
+
- Plain text (TXT, MD)
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import io
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
from typing import Optional
|
| 15 |
+
import fitz # PyMuPDF
|
| 16 |
+
from docx import Document
|
| 17 |
+
from pptx import Presentation
|
| 18 |
+
from pptx.util import Inches
|
| 19 |
+
import pandas as pd
|
| 20 |
+
from PIL import Image
|
| 21 |
+
|
| 22 |
+
from services.ocr_service import ocr_service
|
| 23 |
+
from config import Config
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class DocumentProcessor:
|
| 27 |
+
def __init__(self):
|
| 28 |
+
self.supported_extensions = Config.ALLOWED_EXTENSIONS
|
| 29 |
+
|
| 30 |
+
def get_file_type(self, filename: str) -> str:
|
| 31 |
+
"""Determine file type from extension"""
|
| 32 |
+
ext = Path(filename).suffix.lower().lstrip('.')
|
| 33 |
+
|
| 34 |
+
type_map = {
|
| 35 |
+
'pdf': 'pdf',
|
| 36 |
+
'doc': 'word',
|
| 37 |
+
'docx': 'word',
|
| 38 |
+
'ppt': 'powerpoint',
|
| 39 |
+
'pptx': 'powerpoint',
|
| 40 |
+
'xls': 'excel',
|
| 41 |
+
'xlsx': 'excel',
|
| 42 |
+
'txt': 'text',
|
| 43 |
+
'md': 'text',
|
| 44 |
+
'png': 'image',
|
| 45 |
+
'jpg': 'image',
|
| 46 |
+
'jpeg': 'image',
|
| 47 |
+
'gif': 'image',
|
| 48 |
+
'webp': 'image'
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
return type_map.get(ext, 'unknown')
|
| 52 |
+
|
| 53 |
+
def is_supported(self, filename: str) -> bool:
|
| 54 |
+
"""Check if file type is supported"""
|
| 55 |
+
ext = Path(filename).suffix.lower().lstrip('.')
|
| 56 |
+
return ext in self.supported_extensions
|
| 57 |
+
|
| 58 |
+
def process(self, file_path: str, filename: str) -> dict:
|
| 59 |
+
"""
|
| 60 |
+
Process a document and extract text
|
| 61 |
+
Returns: {"success": bool, "text": str, "method": str, "error": str}
|
| 62 |
+
"""
|
| 63 |
+
file_type = self.get_file_type(filename)
|
| 64 |
+
|
| 65 |
+
try:
|
| 66 |
+
if file_type == 'pdf':
|
| 67 |
+
return self._process_pdf(file_path)
|
| 68 |
+
elif file_type == 'word':
|
| 69 |
+
return self._process_word(file_path)
|
| 70 |
+
elif file_type == 'powerpoint':
|
| 71 |
+
return self._process_pptx(file_path)
|
| 72 |
+
elif file_type == 'excel':
|
| 73 |
+
return self._process_excel(file_path)
|
| 74 |
+
elif file_type == 'image':
|
| 75 |
+
return self._process_image(file_path)
|
| 76 |
+
elif file_type == 'text':
|
| 77 |
+
return self._process_text(file_path)
|
| 78 |
+
else:
|
| 79 |
+
return {
|
| 80 |
+
"success": False,
|
| 81 |
+
"error": f"Unsupported file type: {file_type}"
|
| 82 |
+
}
|
| 83 |
+
except Exception as e:
|
| 84 |
+
return {"success": False, "error": str(e)}
|
| 85 |
+
|
| 86 |
+
def _process_pdf(self, file_path: str) -> dict:
|
| 87 |
+
"""
|
| 88 |
+
Process PDF - Always use complete OpenRouter vision OCR for best accuracy
|
| 89 |
+
"""
|
| 90 |
+
try:
|
| 91 |
+
doc = fitz.open(file_path)
|
| 92 |
+
total_pages = len(doc)
|
| 93 |
+
doc.close()
|
| 94 |
+
|
| 95 |
+
print(f"Processing {total_pages} page PDF with OpenRouter vision OCR...")
|
| 96 |
+
|
| 97 |
+
# Use OpenRouter vision models for OCR
|
| 98 |
+
ocr_result = ocr_service.extract_text_from_pdf(file_path)
|
| 99 |
+
|
| 100 |
+
if ocr_result['success']:
|
| 101 |
+
print(f"PDF OCR successful")
|
| 102 |
+
return {
|
| 103 |
+
"success": True,
|
| 104 |
+
"text": ocr_result['text'],
|
| 105 |
+
"method": ocr_result.get('model', 'OpenRouter Vision OCR'),
|
| 106 |
+
"page_count": total_pages
|
| 107 |
+
}
|
| 108 |
+
else:
|
| 109 |
+
return {
|
| 110 |
+
"success": False,
|
| 111 |
+
"error": f"OCR failed: {ocr_result['error']}"
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
except Exception as e:
|
| 115 |
+
return {"success": False, "error": f"PDF processing error: {str(e)}"}
|
| 116 |
+
|
| 117 |
+
def _process_pdf_hybrid(self, file_path: str, text_pages: list, ocr_needed_pages: list) -> dict:
|
| 118 |
+
"""
|
| 119 |
+
Hybrid PDF processing: combine text extraction with OCR for scanned pages only
|
| 120 |
+
Used as fallback when full PDF OCR fails
|
| 121 |
+
"""
|
| 122 |
+
try:
|
| 123 |
+
doc = fitz.open(file_path)
|
| 124 |
+
total_pages = len(doc)
|
| 125 |
+
all_pages = {}
|
| 126 |
+
|
| 127 |
+
# Add already extracted text pages
|
| 128 |
+
for page_num, text in text_pages:
|
| 129 |
+
all_pages[page_num] = f"--- Page {page_num + 1} ---\n{text}"
|
| 130 |
+
|
| 131 |
+
# OCR the scanned pages in batches
|
| 132 |
+
print(f"OCR processing {len(ocr_needed_pages)} scanned pages...")
|
| 133 |
+
|
| 134 |
+
for i, page_num in enumerate(ocr_needed_pages):
|
| 135 |
+
page = doc[page_num]
|
| 136 |
+
|
| 137 |
+
# Render page to image
|
| 138 |
+
mat = fitz.Matrix(2, 2) # 2x zoom for better OCR
|
| 139 |
+
pix = page.get_pixmap(matrix=mat)
|
| 140 |
+
|
| 141 |
+
temp_path = f"{file_path}_page_{page_num}.png"
|
| 142 |
+
pix.save(temp_path)
|
| 143 |
+
|
| 144 |
+
ocr_result = ocr_service.extract_text(temp_path)
|
| 145 |
+
|
| 146 |
+
# Clean up temp file
|
| 147 |
+
if os.path.exists(temp_path):
|
| 148 |
+
os.remove(temp_path)
|
| 149 |
+
|
| 150 |
+
if ocr_result['success']:
|
| 151 |
+
all_pages[page_num] = f"--- Page {page_num + 1} (OCR) ---\n{ocr_result['text']}"
|
| 152 |
+
else:
|
| 153 |
+
all_pages[page_num] = f"--- Page {page_num + 1} ---\n[OCR failed: {ocr_result['error']}]"
|
| 154 |
+
|
| 155 |
+
# Progress logging every 10 pages
|
| 156 |
+
if (i + 1) % 10 == 0:
|
| 157 |
+
print(f"OCR progress: {i + 1}/{len(ocr_needed_pages)} pages")
|
| 158 |
+
|
| 159 |
+
doc.close()
|
| 160 |
+
|
| 161 |
+
# Combine all pages in order
|
| 162 |
+
text_parts = [all_pages[i] for i in sorted(all_pages.keys())]
|
| 163 |
+
|
| 164 |
+
return {
|
| 165 |
+
"success": True,
|
| 166 |
+
"text": "\n\n".join(text_parts),
|
| 167 |
+
"method": "hybrid (text + OCR)",
|
| 168 |
+
"page_count": total_pages
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
except Exception as e:
|
| 172 |
+
return {"success": False, "error": f"Hybrid PDF processing error: {str(e)}"}
|
| 173 |
+
|
| 174 |
+
def _process_word(self, file_path: str) -> dict:
|
| 175 |
+
"""Process Word documents (DOCX)"""
|
| 176 |
+
try:
|
| 177 |
+
doc = Document(file_path)
|
| 178 |
+
|
| 179 |
+
text_parts = []
|
| 180 |
+
|
| 181 |
+
# Extract paragraphs
|
| 182 |
+
for para in doc.paragraphs:
|
| 183 |
+
if para.text.strip():
|
| 184 |
+
text_parts.append(para.text)
|
| 185 |
+
|
| 186 |
+
# Extract tables
|
| 187 |
+
for table in doc.tables:
|
| 188 |
+
table_text = []
|
| 189 |
+
for row in table.rows:
|
| 190 |
+
row_text = [cell.text.strip() for cell in row.cells]
|
| 191 |
+
table_text.append(" | ".join(row_text))
|
| 192 |
+
if table_text:
|
| 193 |
+
text_parts.append("\n[Table]\n" + "\n".join(table_text))
|
| 194 |
+
|
| 195 |
+
return {
|
| 196 |
+
"success": True,
|
| 197 |
+
"text": "\n\n".join(text_parts),
|
| 198 |
+
"method": "docx extraction"
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
except Exception as e:
|
| 202 |
+
return {"success": False, "error": f"Word processing error: {str(e)}"}
|
| 203 |
+
|
| 204 |
+
def _process_pptx(self, file_path: str) -> dict:
|
| 205 |
+
"""Process PowerPoint files (PPTX) - extracts all text from slides"""
|
| 206 |
+
try:
|
| 207 |
+
prs = Presentation(file_path)
|
| 208 |
+
text_parts = []
|
| 209 |
+
slide_count = 0
|
| 210 |
+
|
| 211 |
+
for slide_num, slide in enumerate(prs.slides, 1):
|
| 212 |
+
slide_count += 1
|
| 213 |
+
slide_text_parts = []
|
| 214 |
+
|
| 215 |
+
# Extract text from all shapes
|
| 216 |
+
for shape in slide.shapes:
|
| 217 |
+
# Text frames (text boxes, titles, etc.)
|
| 218 |
+
if shape.has_text_frame:
|
| 219 |
+
for paragraph in shape.text_frame.paragraphs:
|
| 220 |
+
para_text = ""
|
| 221 |
+
for run in paragraph.runs:
|
| 222 |
+
para_text += run.text
|
| 223 |
+
if para_text.strip():
|
| 224 |
+
slide_text_parts.append(para_text.strip())
|
| 225 |
+
|
| 226 |
+
# Tables in slides
|
| 227 |
+
if shape.has_table:
|
| 228 |
+
table = shape.table
|
| 229 |
+
table_rows = []
|
| 230 |
+
for row in table.rows:
|
| 231 |
+
row_cells = []
|
| 232 |
+
for cell in row.cells:
|
| 233 |
+
cell_text = ""
|
| 234 |
+
for paragraph in cell.text_frame.paragraphs:
|
| 235 |
+
for run in paragraph.runs:
|
| 236 |
+
cell_text += run.text
|
| 237 |
+
row_cells.append(cell_text.strip())
|
| 238 |
+
table_rows.append(" | ".join(row_cells))
|
| 239 |
+
if table_rows:
|
| 240 |
+
slide_text_parts.append("[Table]\n" + "\n".join(table_rows))
|
| 241 |
+
|
| 242 |
+
# Speaker notes
|
| 243 |
+
if slide.has_notes_slide:
|
| 244 |
+
notes_frame = slide.notes_slide.notes_text_frame
|
| 245 |
+
if notes_frame:
|
| 246 |
+
notes_text = ""
|
| 247 |
+
for paragraph in notes_frame.paragraphs:
|
| 248 |
+
for run in paragraph.runs:
|
| 249 |
+
notes_text += run.text
|
| 250 |
+
if notes_text.strip():
|
| 251 |
+
slide_text_parts.append(f"[Speaker Notes]\n{notes_text.strip()}")
|
| 252 |
+
|
| 253 |
+
if slide_text_parts:
|
| 254 |
+
text_parts.append(f"--- Slide {slide_num} ---\n" + "\n".join(slide_text_parts))
|
| 255 |
+
|
| 256 |
+
if not text_parts:
|
| 257 |
+
return {
|
| 258 |
+
"success": False,
|
| 259 |
+
"error": "No text content found in PowerPoint file"
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
return {
|
| 263 |
+
"success": True,
|
| 264 |
+
"text": "\n\n".join(text_parts),
|
| 265 |
+
"method": "pptx extraction",
|
| 266 |
+
"slide_count": slide_count
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
except Exception as e:
|
| 270 |
+
return {"success": False, "error": f"PowerPoint processing error: {str(e)}"}
|
| 271 |
+
|
| 272 |
+
def _process_excel(self, file_path: str) -> dict:
|
| 273 |
+
"""Process Excel files"""
|
| 274 |
+
try:
|
| 275 |
+
# Read all sheets
|
| 276 |
+
excel_file = pd.ExcelFile(file_path)
|
| 277 |
+
text_parts = []
|
| 278 |
+
|
| 279 |
+
for sheet_name in excel_file.sheet_names:
|
| 280 |
+
df = pd.read_excel(excel_file, sheet_name=sheet_name)
|
| 281 |
+
|
| 282 |
+
if not df.empty:
|
| 283 |
+
# Convert to string representation
|
| 284 |
+
sheet_text = f"=== Sheet: {sheet_name} ===\n"
|
| 285 |
+
sheet_text += df.to_string(index=False)
|
| 286 |
+
text_parts.append(sheet_text)
|
| 287 |
+
|
| 288 |
+
return {
|
| 289 |
+
"success": True,
|
| 290 |
+
"text": "\n\n".join(text_parts),
|
| 291 |
+
"method": "excel extraction",
|
| 292 |
+
"sheet_count": len(excel_file.sheet_names)
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
except Exception as e:
|
| 296 |
+
return {"success": False, "error": f"Excel processing error: {str(e)}"}
|
| 297 |
+
|
| 298 |
+
def _process_image(self, file_path: str) -> dict:
|
| 299 |
+
"""Process images using OCR"""
|
| 300 |
+
result = ocr_service.extract_text(file_path)
|
| 301 |
+
|
| 302 |
+
if result['success']:
|
| 303 |
+
return {
|
| 304 |
+
"success": True,
|
| 305 |
+
"text": result['text'],
|
| 306 |
+
"method": f"OCR ({result.get('model', 'unknown')})"
|
| 307 |
+
}
|
| 308 |
+
else:
|
| 309 |
+
return {"success": False, "error": result['error']}
|
| 310 |
+
|
| 311 |
+
def _process_text(self, file_path: str) -> dict:
|
| 312 |
+
"""Process plain text files"""
|
| 313 |
+
try:
|
| 314 |
+
# Try different encodings
|
| 315 |
+
encodings = ['utf-8', 'latin-1', 'cp1252']
|
| 316 |
+
|
| 317 |
+
for encoding in encodings:
|
| 318 |
+
try:
|
| 319 |
+
with open(file_path, 'r', encoding=encoding) as f:
|
| 320 |
+
text = f.read()
|
| 321 |
+
return {
|
| 322 |
+
"success": True,
|
| 323 |
+
"text": text,
|
| 324 |
+
"method": f"text read ({encoding})"
|
| 325 |
+
}
|
| 326 |
+
except UnicodeDecodeError:
|
| 327 |
+
continue
|
| 328 |
+
|
| 329 |
+
return {"success": False, "error": "Could not decode text file"}
|
| 330 |
+
|
| 331 |
+
except Exception as e:
|
| 332 |
+
return {"success": False, "error": f"Text processing error: {str(e)}"}
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
# Singleton instance
|
| 336 |
+
document_processor = DocumentProcessor()
|
services/metadata_extractor.py
ADDED
|
@@ -0,0 +1,446 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Metadata Extractor Service
|
| 3 |
+
Extracts structured metadata from insurance policy documents using AI.
|
| 4 |
+
Handles various document formats and naming conventions.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
import json
|
| 9 |
+
import requests
|
| 10 |
+
from typing import Optional, Dict, List
|
| 11 |
+
from config import Config
|
| 12 |
+
from services.date_parser import date_parser
|
| 13 |
+
from services.number_extractor import number_extractor
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class MetadataExtractor:
|
| 17 |
+
"""Extract structured metadata from document content using AI and regex."""
|
| 18 |
+
|
| 19 |
+
# Default metadata schema
|
| 20 |
+
DEFAULT_METADATA = {
|
| 21 |
+
# Identity
|
| 22 |
+
"document_type": "",
|
| 23 |
+
"document_title": "",
|
| 24 |
+
"policy_number": "",
|
| 25 |
+
"insurer_name": "",
|
| 26 |
+
"issue_date": "",
|
| 27 |
+
|
| 28 |
+
# Parties
|
| 29 |
+
"insured_name": "",
|
| 30 |
+
"broker_name": "",
|
| 31 |
+
|
| 32 |
+
# Dates
|
| 33 |
+
"policy_start_date": "",
|
| 34 |
+
"policy_end_date": "",
|
| 35 |
+
"renewal_date": "",
|
| 36 |
+
"renewal_year": None,
|
| 37 |
+
|
| 38 |
+
# Financial
|
| 39 |
+
"sum_insured": None,
|
| 40 |
+
"premium_amount": None,
|
| 41 |
+
"tax_amount": None,
|
| 42 |
+
"deductible": None,
|
| 43 |
+
|
| 44 |
+
# Risk & Coverage
|
| 45 |
+
"policy_type": "",
|
| 46 |
+
"insured_property_type": "",
|
| 47 |
+
"coverage_type": [],
|
| 48 |
+
"exclusions_present": False,
|
| 49 |
+
"add_on_covers": [],
|
| 50 |
+
|
| 51 |
+
# Location & Asset
|
| 52 |
+
"property_address": "",
|
| 53 |
+
"city": "",
|
| 54 |
+
"state": "",
|
| 55 |
+
"pincode": "",
|
| 56 |
+
"construction_type": "",
|
| 57 |
+
|
| 58 |
+
# RAG helpers
|
| 59 |
+
"section_name": "",
|
| 60 |
+
"clause_reference": "",
|
| 61 |
+
"page_number": "",
|
| 62 |
+
"chunk_type": "full_document",
|
| 63 |
+
|
| 64 |
+
# Search helpers
|
| 65 |
+
"keywords": [],
|
| 66 |
+
"industry": "",
|
| 67 |
+
"is_manufacturing": False
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
# Field name variations commonly found in documents
|
| 71 |
+
FIELD_VARIATIONS = {
|
| 72 |
+
'insured_name': [
|
| 73 |
+
'insured', 'name of insured', 'proposer', 'policyholder',
|
| 74 |
+
'policy holder', 'insured party', 'insured name', 'name of the insured',
|
| 75 |
+
'assured', 'name of assured', 'customer name', 'client name'
|
| 76 |
+
],
|
| 77 |
+
'insurer_name': [
|
| 78 |
+
'insurer', 'insurance company', 'underwriter', 'company name',
|
| 79 |
+
'issued by', 'insuring company'
|
| 80 |
+
],
|
| 81 |
+
'policy_number': [
|
| 82 |
+
'policy no', 'policy number', 'policy #', 'certificate no',
|
| 83 |
+
'certificate number', 'policy ref', 'reference number', 'ref no'
|
| 84 |
+
],
|
| 85 |
+
'sum_insured': [
|
| 86 |
+
'sum insured', 'total sum insured', 'tsi', 'si', 'insured value',
|
| 87 |
+
'coverage amount', 'insured amount', 'sum assured', 'cover amount',
|
| 88 |
+
'amount insured', 'value insured'
|
| 89 |
+
],
|
| 90 |
+
'premium_amount': [
|
| 91 |
+
'premium', 'total premium', 'net premium', 'gross premium',
|
| 92 |
+
'annual premium', 'premium payable', 'premium amount'
|
| 93 |
+
],
|
| 94 |
+
'policy_start_date': [
|
| 95 |
+
'start date', 'commencement', 'inception date', 'effective from',
|
| 96 |
+
'period from', 'from date', 'valid from', 'cover starts'
|
| 97 |
+
],
|
| 98 |
+
'policy_end_date': [
|
| 99 |
+
'end date', 'expiry date', 'expiry', 'valid until', 'valid till',
|
| 100 |
+
'period to', 'to date', 'cover ends', 'expires on'
|
| 101 |
+
],
|
| 102 |
+
'policy_type': [
|
| 103 |
+
'type of policy', 'policy type', 'cover type', 'insurance type',
|
| 104 |
+
'class of insurance', 'product name', 'product type', 'scheme name'
|
| 105 |
+
],
|
| 106 |
+
'property_address': [
|
| 107 |
+
'address', 'risk location', 'location of risk', 'property address',
|
| 108 |
+
'insured location', 'premises address', 'site address'
|
| 109 |
+
]
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
# Policy type patterns
|
| 113 |
+
POLICY_TYPES = {
|
| 114 |
+
'fire': ['fire', 'fire & allied', 'fire insurance', 'sfsp'],
|
| 115 |
+
'marine': ['marine', 'cargo', 'marine cargo', 'marine hull'],
|
| 116 |
+
'motor': ['motor', 'vehicle', 'car', 'two wheeler', 'automobile'],
|
| 117 |
+
'health': ['health', 'mediclaim', 'medical', 'hospitalization'],
|
| 118 |
+
'life': ['life', 'term', 'endowment', 'ulip'],
|
| 119 |
+
'property': ['property', 'building', 'structure', 'premises'],
|
| 120 |
+
'liability': ['liability', 'professional indemnity', 'pi', 'directors'],
|
| 121 |
+
'engineering': ['engineering', 'car', 'eai', 'cpm', 'boiler', 'machinery'],
|
| 122 |
+
'personal_accident': ['personal accident', 'pa', 'accident'],
|
| 123 |
+
'travel': ['travel', 'overseas', 'foreign travel'],
|
| 124 |
+
'home': ['home', 'householder', 'household'],
|
| 125 |
+
'group': ['group', 'employee', 'gpa', 'gmc']
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
# Industry classification patterns
|
| 129 |
+
INDUSTRY_PATTERNS = {
|
| 130 |
+
'manufacturing': ['manufacturing', 'factory', 'plant', 'production', 'industrial'],
|
| 131 |
+
'chemical': ['chemical', 'petrochemical', 'pharmaceutical', 'fertilizer'],
|
| 132 |
+
'automotive': ['automobile', 'automotive', 'tyre', 'tire', 'vehicle'],
|
| 133 |
+
'food_processing': ['food', 'beverage', 'dairy', 'agro'],
|
| 134 |
+
'textile': ['textile', 'garment', 'apparel', 'fabric'],
|
| 135 |
+
'it_services': ['software', 'it services', 'technology', 'tech'],
|
| 136 |
+
'banking': ['bank', 'finance', 'nbfc', 'financial services'],
|
| 137 |
+
'hospitality': ['hotel', 'restaurant', 'hospitality', 'resort'],
|
| 138 |
+
'healthcare': ['hospital', 'clinic', 'healthcare', 'medical'],
|
| 139 |
+
'retail': ['retail', 'shop', 'store', 'mall', 'supermarket'],
|
| 140 |
+
'real_estate': ['real estate', 'construction', 'builder', 'developer'],
|
| 141 |
+
'education': ['school', 'college', 'university', 'education', 'institute']
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
def __init__(self):
|
| 145 |
+
self.deepseek_api_key = getattr(Config, 'DEEPSEEK_API_KEY', '')
|
| 146 |
+
self.deepseek_base_url = getattr(Config, 'DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
|
| 147 |
+
self.deepseek_model = getattr(Config, 'DEEPSEEK_MODEL', 'deepseek-chat')
|
| 148 |
+
|
| 149 |
+
def extract_metadata(self, content: str, filename: str = "") -> Dict:
|
| 150 |
+
"""
|
| 151 |
+
Extract structured metadata from document content.
|
| 152 |
+
Uses AI for complex extraction with regex fallback.
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
content: Document text content
|
| 156 |
+
filename: Original filename for context
|
| 157 |
+
|
| 158 |
+
Returns:
|
| 159 |
+
Dictionary with extracted metadata
|
| 160 |
+
"""
|
| 161 |
+
# Start with default metadata
|
| 162 |
+
metadata = self.DEFAULT_METADATA.copy()
|
| 163 |
+
metadata['document_title'] = filename
|
| 164 |
+
|
| 165 |
+
# Try AI extraction first (more accurate)
|
| 166 |
+
if self.deepseek_api_key and len(content) > 100:
|
| 167 |
+
ai_metadata = self._extract_with_ai(content, filename)
|
| 168 |
+
if ai_metadata:
|
| 169 |
+
metadata.update({k: v for k, v in ai_metadata.items() if v})
|
| 170 |
+
|
| 171 |
+
# Fill in missing fields with regex extraction
|
| 172 |
+
metadata = self._extract_with_regex(content, metadata)
|
| 173 |
+
|
| 174 |
+
# Extract dates using date_parser
|
| 175 |
+
metadata = self._extract_dates(content, metadata)
|
| 176 |
+
|
| 177 |
+
# Extract numbers using number_extractor
|
| 178 |
+
metadata = self._extract_numbers(content, metadata)
|
| 179 |
+
|
| 180 |
+
# Determine policy type
|
| 181 |
+
if not metadata.get('policy_type'):
|
| 182 |
+
metadata['policy_type'] = self._detect_policy_type(content)
|
| 183 |
+
|
| 184 |
+
# Determine industry
|
| 185 |
+
if not metadata.get('industry'):
|
| 186 |
+
metadata['industry'] = self._detect_industry(content)
|
| 187 |
+
|
| 188 |
+
# Check if manufacturing
|
| 189 |
+
metadata['is_manufacturing'] = self._is_manufacturing(content, metadata)
|
| 190 |
+
|
| 191 |
+
# Extract keywords for search
|
| 192 |
+
metadata['keywords'] = self._extract_keywords(content, filename)
|
| 193 |
+
|
| 194 |
+
return metadata
|
| 195 |
+
|
| 196 |
+
def _extract_with_ai(self, content: str, filename: str) -> Optional[Dict]:
|
| 197 |
+
"""Use DeepSeek AI to extract metadata."""
|
| 198 |
+
if not self.deepseek_api_key:
|
| 199 |
+
return None
|
| 200 |
+
|
| 201 |
+
# Truncate content to avoid token limits
|
| 202 |
+
max_content = content[:15000] if len(content) > 15000 else content
|
| 203 |
+
|
| 204 |
+
prompt = f"""Extract the following metadata from this insurance document. Return ONLY a valid JSON object with no explanation.
|
| 205 |
+
|
| 206 |
+
Document filename: {filename}
|
| 207 |
+
Document content:
|
| 208 |
+
{max_content}
|
| 209 |
+
|
| 210 |
+
Extract these fields (use empty string if not found, use null for missing numbers):
|
| 211 |
+
{{
|
| 212 |
+
"document_type": "policy/endorsement/certificate/schedule/etc",
|
| 213 |
+
"policy_number": "",
|
| 214 |
+
"insurer_name": "name of insurance company",
|
| 215 |
+
"insured_name": "name of insured party/policyholder",
|
| 216 |
+
"broker_name": "",
|
| 217 |
+
"policy_type": "fire/motor/health/marine/property/liability/etc",
|
| 218 |
+
"sum_insured": null,
|
| 219 |
+
"premium_amount": null,
|
| 220 |
+
"deductible": null,
|
| 221 |
+
"policy_start_date": "YYYY-MM-DD format",
|
| 222 |
+
"policy_end_date": "YYYY-MM-DD format",
|
| 223 |
+
"property_address": "",
|
| 224 |
+
"city": "",
|
| 225 |
+
"state": "",
|
| 226 |
+
"pincode": "",
|
| 227 |
+
"construction_type": "",
|
| 228 |
+
"insured_property_type": "",
|
| 229 |
+
"coverage_type": [],
|
| 230 |
+
"add_on_covers": [],
|
| 231 |
+
"industry": ""
|
| 232 |
+
}}
|
| 233 |
+
|
| 234 |
+
Return ONLY the JSON object, no markdown, no explanation."""
|
| 235 |
+
|
| 236 |
+
try:
|
| 237 |
+
response = requests.post(
|
| 238 |
+
f"{self.deepseek_base_url}/chat/completions",
|
| 239 |
+
headers={
|
| 240 |
+
"Authorization": f"Bearer {self.deepseek_api_key}",
|
| 241 |
+
"Content-Type": "application/json"
|
| 242 |
+
},
|
| 243 |
+
json={
|
| 244 |
+
"model": self.deepseek_model,
|
| 245 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 246 |
+
"max_tokens": 1000,
|
| 247 |
+
"temperature": 0
|
| 248 |
+
},
|
| 249 |
+
timeout=30
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
if response.status_code == 200:
|
| 253 |
+
data = response.json()
|
| 254 |
+
ai_response = data['choices'][0]['message']['content'].strip()
|
| 255 |
+
|
| 256 |
+
# Parse JSON from response
|
| 257 |
+
# Remove markdown code blocks if present
|
| 258 |
+
if ai_response.startswith('```'):
|
| 259 |
+
ai_response = re.sub(r'^```(?:json)?\n?', '', ai_response)
|
| 260 |
+
ai_response = re.sub(r'\n?```$', '', ai_response)
|
| 261 |
+
|
| 262 |
+
return json.loads(ai_response)
|
| 263 |
+
except Exception as e:
|
| 264 |
+
print(f"[METADATA] AI extraction failed: {e}")
|
| 265 |
+
|
| 266 |
+
return None
|
| 267 |
+
|
| 268 |
+
def _extract_with_regex(self, content: str, metadata: Dict) -> Dict:
|
| 269 |
+
"""Extract metadata using regex patterns."""
|
| 270 |
+
content_lower = content.lower()
|
| 271 |
+
|
| 272 |
+
# Extract fields using variations
|
| 273 |
+
for field, variations in self.FIELD_VARIATIONS.items():
|
| 274 |
+
if metadata.get(field): # Already extracted
|
| 275 |
+
continue
|
| 276 |
+
|
| 277 |
+
for variation in variations:
|
| 278 |
+
# Look for pattern: "variation: value" or "variation - value"
|
| 279 |
+
pattern = rf'{re.escape(variation)}\s*[:|-]\s*([^\n]+)'
|
| 280 |
+
match = re.search(pattern, content_lower)
|
| 281 |
+
if match:
|
| 282 |
+
value = match.group(1).strip()
|
| 283 |
+
# Clean up the value
|
| 284 |
+
value = re.sub(r'\s+', ' ', value)[:200] # Limit length
|
| 285 |
+
if value and len(value) > 2:
|
| 286 |
+
metadata[field] = value
|
| 287 |
+
break
|
| 288 |
+
|
| 289 |
+
# Extract policy number (often in specific formats)
|
| 290 |
+
if not metadata.get('policy_number'):
|
| 291 |
+
# Common policy number patterns
|
| 292 |
+
patterns = [
|
| 293 |
+
r'policy\s*(?:no|number|#)?\s*[:.]?\s*([A-Z0-9/-]{5,30})',
|
| 294 |
+
r'([A-Z]{2,5}[/-]?\d{6,15})',
|
| 295 |
+
r'(\d{10,20})'
|
| 296 |
+
]
|
| 297 |
+
for pattern in patterns:
|
| 298 |
+
match = re.search(pattern, content, re.IGNORECASE)
|
| 299 |
+
if match:
|
| 300 |
+
metadata['policy_number'] = match.group(1).strip()
|
| 301 |
+
break
|
| 302 |
+
|
| 303 |
+
# Extract pincode
|
| 304 |
+
if not metadata.get('pincode'):
|
| 305 |
+
match = re.search(r'\b(\d{6})\b', content)
|
| 306 |
+
if match:
|
| 307 |
+
metadata['pincode'] = match.group(1)
|
| 308 |
+
|
| 309 |
+
return metadata
|
| 310 |
+
|
| 311 |
+
def _extract_dates(self, content: str, metadata: Dict) -> Dict:
|
| 312 |
+
"""Extract dates using date_parser."""
|
| 313 |
+
dates = date_parser.extract_dates_from_text(content)
|
| 314 |
+
|
| 315 |
+
for date_info in dates:
|
| 316 |
+
context = date_info['context']
|
| 317 |
+
date_str = date_info['date_str']
|
| 318 |
+
|
| 319 |
+
if context == 'start' and not metadata.get('policy_start_date'):
|
| 320 |
+
metadata['policy_start_date'] = date_str
|
| 321 |
+
elif context == 'end' and not metadata.get('policy_end_date'):
|
| 322 |
+
metadata['policy_end_date'] = date_str
|
| 323 |
+
elif context == 'renewal' and not metadata.get('renewal_date'):
|
| 324 |
+
metadata['renewal_date'] = date_str
|
| 325 |
+
elif context == 'issue' and not metadata.get('issue_date'):
|
| 326 |
+
metadata['issue_date'] = date_str
|
| 327 |
+
|
| 328 |
+
# Calculate renewal date if not found but we have end date
|
| 329 |
+
if not metadata.get('renewal_date') and metadata.get('policy_end_date'):
|
| 330 |
+
end_date = date_parser.parse_date(metadata['policy_end_date'])
|
| 331 |
+
if end_date:
|
| 332 |
+
metadata['renewal_date'] = metadata['policy_end_date']
|
| 333 |
+
metadata['renewal_year'] = end_date.year
|
| 334 |
+
|
| 335 |
+
# Set renewal year
|
| 336 |
+
if metadata.get('renewal_date') and not metadata.get('renewal_year'):
|
| 337 |
+
renewal = date_parser.parse_date(metadata['renewal_date'])
|
| 338 |
+
if renewal:
|
| 339 |
+
metadata['renewal_year'] = renewal.year
|
| 340 |
+
|
| 341 |
+
return metadata
|
| 342 |
+
|
| 343 |
+
def _extract_numbers(self, content: str, metadata: Dict) -> Dict:
|
| 344 |
+
"""Extract numerical values using number_extractor."""
|
| 345 |
+
numbers = number_extractor.extract_numbers(content)
|
| 346 |
+
|
| 347 |
+
for num_info in numbers:
|
| 348 |
+
context = num_info['context']
|
| 349 |
+
value = num_info['value']
|
| 350 |
+
|
| 351 |
+
if context == 'sum_insured' and not metadata.get('sum_insured'):
|
| 352 |
+
metadata['sum_insured'] = value
|
| 353 |
+
elif context == 'premium' and not metadata.get('premium_amount'):
|
| 354 |
+
metadata['premium_amount'] = value
|
| 355 |
+
elif context == 'tax' and not metadata.get('tax_amount'):
|
| 356 |
+
metadata['tax_amount'] = value
|
| 357 |
+
elif context == 'deductible' and not metadata.get('deductible'):
|
| 358 |
+
metadata['deductible'] = value
|
| 359 |
+
|
| 360 |
+
# If sum_insured not found, use largest number
|
| 361 |
+
if not metadata.get('sum_insured'):
|
| 362 |
+
sum_insured = number_extractor.extract_sum_insured(content)
|
| 363 |
+
if sum_insured:
|
| 364 |
+
metadata['sum_insured'] = sum_insured
|
| 365 |
+
|
| 366 |
+
return metadata
|
| 367 |
+
|
| 368 |
+
def _detect_policy_type(self, content: str) -> str:
|
| 369 |
+
"""Detect policy type from content."""
|
| 370 |
+
content_lower = content.lower()
|
| 371 |
+
|
| 372 |
+
for policy_type, keywords in self.POLICY_TYPES.items():
|
| 373 |
+
if any(kw in content_lower for kw in keywords):
|
| 374 |
+
return policy_type
|
| 375 |
+
|
| 376 |
+
return "general"
|
| 377 |
+
|
| 378 |
+
def _detect_industry(self, content: str) -> str:
|
| 379 |
+
"""Detect industry classification from content."""
|
| 380 |
+
content_lower = content.lower()
|
| 381 |
+
|
| 382 |
+
for industry, keywords in self.INDUSTRY_PATTERNS.items():
|
| 383 |
+
if any(kw in content_lower for kw in keywords):
|
| 384 |
+
return industry
|
| 385 |
+
|
| 386 |
+
return ""
|
| 387 |
+
|
| 388 |
+
def _is_manufacturing(self, content: str, metadata: Dict) -> bool:
|
| 389 |
+
"""Check if this is a manufacturing-related policy."""
|
| 390 |
+
content_lower = content.lower()
|
| 391 |
+
manufacturing_keywords = [
|
| 392 |
+
'manufacturing', 'factory', 'plant', 'production', 'industrial',
|
| 393 |
+
'machinery', 'equipment', 'boiler', 'pressure vessel'
|
| 394 |
+
]
|
| 395 |
+
|
| 396 |
+
if metadata.get('industry') == 'manufacturing':
|
| 397 |
+
return True
|
| 398 |
+
|
| 399 |
+
return any(kw in content_lower for kw in manufacturing_keywords)
|
| 400 |
+
|
| 401 |
+
def _extract_keywords(self, content: str, filename: str) -> List[str]:
|
| 402 |
+
"""Extract keywords for search enhancement."""
|
| 403 |
+
keywords = []
|
| 404 |
+
|
| 405 |
+
# Add words from filename
|
| 406 |
+
filename_words = re.findall(r'[A-Za-z]{3,}', filename)
|
| 407 |
+
keywords.extend([w.lower() for w in filename_words])
|
| 408 |
+
|
| 409 |
+
# Extract capitalized words (likely proper nouns/company names)
|
| 410 |
+
proper_nouns = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content[:5000])
|
| 411 |
+
keywords.extend([n.lower() for n in proper_nouns[:20]])
|
| 412 |
+
|
| 413 |
+
# Remove duplicates and common words
|
| 414 |
+
stop_words = {'the', 'and', 'for', 'with', 'this', 'that', 'from', 'are', 'was', 'were'}
|
| 415 |
+
keywords = list(set(kw for kw in keywords if kw not in stop_words and len(kw) > 2))
|
| 416 |
+
|
| 417 |
+
return keywords[:30] # Limit to 30 keywords
|
| 418 |
+
|
| 419 |
+
def extract_metadata_batch(self, documents: List[Dict]) -> List[Dict]:
|
| 420 |
+
"""
|
| 421 |
+
Extract metadata for multiple documents.
|
| 422 |
+
|
| 423 |
+
Args:
|
| 424 |
+
documents: List of dicts with 'content' and 'filename' keys
|
| 425 |
+
|
| 426 |
+
Returns:
|
| 427 |
+
List of metadata dicts
|
| 428 |
+
"""
|
| 429 |
+
results = []
|
| 430 |
+
for doc in documents:
|
| 431 |
+
try:
|
| 432 |
+
metadata = self.extract_metadata(
|
| 433 |
+
doc.get('content', ''),
|
| 434 |
+
doc.get('filename', '')
|
| 435 |
+
)
|
| 436 |
+
metadata['doc_id'] = doc.get('doc_id', '')
|
| 437 |
+
results.append(metadata)
|
| 438 |
+
except Exception as e:
|
| 439 |
+
print(f"[METADATA] Error extracting from {doc.get('filename')}: {e}")
|
| 440 |
+
results.append({**self.DEFAULT_METADATA, 'doc_id': doc.get('doc_id', '')})
|
| 441 |
+
|
| 442 |
+
return results
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
# Singleton instance
|
| 446 |
+
metadata_extractor = MetadataExtractor()
|
services/number_extractor.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Number Extractor Service
|
| 3 |
+
Handles extraction and normalization of numerical values from insurance documents.
|
| 4 |
+
Supports:
|
| 5 |
+
- Indian number formats (lakhs, crores)
|
| 6 |
+
- Currency symbols (₹, Rs., INR, USD)
|
| 7 |
+
- Comma-separated numbers
|
| 8 |
+
- Word numbers (One Hundred Million)
|
| 9 |
+
- Percentage values
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import re
|
| 13 |
+
from typing import Optional, List, Dict, Tuple
|
| 14 |
+
from decimal import Decimal, InvalidOperation
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class NumberExtractor:
|
| 18 |
+
"""Extract and normalize numerical values from text."""
|
| 19 |
+
|
| 20 |
+
# Indian number words
|
| 21 |
+
WORD_TO_NUMBER = {
|
| 22 |
+
'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4,
|
| 23 |
+
'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9,
|
| 24 |
+
'ten': 10, 'eleven': 11, 'twelve': 12, 'thirteen': 13,
|
| 25 |
+
'fourteen': 14, 'fifteen': 15, 'sixteen': 16, 'seventeen': 17,
|
| 26 |
+
'eighteen': 18, 'nineteen': 19, 'twenty': 20, 'thirty': 30,
|
| 27 |
+
'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70,
|
| 28 |
+
'eighty': 80, 'ninety': 90
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
MAGNITUDE_WORDS = {
|
| 32 |
+
'hundred': 100,
|
| 33 |
+
'thousand': 1000,
|
| 34 |
+
'lakh': 100000,
|
| 35 |
+
'lac': 100000,
|
| 36 |
+
'lakhs': 100000,
|
| 37 |
+
'lacs': 100000,
|
| 38 |
+
'million': 1000000,
|
| 39 |
+
'crore': 10000000,
|
| 40 |
+
'crores': 10000000,
|
| 41 |
+
'billion': 1000000000
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
# Currency patterns
|
| 45 |
+
CURRENCY_PATTERNS = {
|
| 46 |
+
'INR': [r'₹', r'Rs\.?', r'INR', r'Rupees?'],
|
| 47 |
+
'USD': [r'\$', r'USD', r'Dollars?'],
|
| 48 |
+
'EUR': [r'€', r'EUR', r'Euros?']
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
# Context keywords for identifying number types
|
| 52 |
+
NUMBER_CONTEXTS = {
|
| 53 |
+
'sum_insured': ['sum insured', 'total sum insured', 'tsi', 'si', 'insured value',
|
| 54 |
+
'coverage amount', 'insured amount', 'sum assured'],
|
| 55 |
+
'premium': ['premium', 'premium amount', 'total premium', 'net premium',
|
| 56 |
+
'gross premium', 'annual premium'],
|
| 57 |
+
'tax': ['tax', 'gst', 'cgst', 'sgst', 'igst', 'service tax'],
|
| 58 |
+
'deductible': ['deductible', 'excess', 'franchise']
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
def __init__(self):
|
| 62 |
+
self._compile_patterns()
|
| 63 |
+
|
| 64 |
+
def _compile_patterns(self):
|
| 65 |
+
"""Compile regex patterns for number extraction."""
|
| 66 |
+
# Currency amount: ₹1,00,000 or Rs. 1,00,000.00 or INR 100000
|
| 67 |
+
currency_symbols = '|'.join(
|
| 68 |
+
p for patterns in self.CURRENCY_PATTERNS.values() for p in patterns
|
| 69 |
+
)
|
| 70 |
+
self.pattern_currency = re.compile(
|
| 71 |
+
rf'({currency_symbols})\s*([\d,]+(?:\.\d{{1,2}})?)',
|
| 72 |
+
re.IGNORECASE
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Plain number with commas: 1,00,00,000 or 100,000,000
|
| 76 |
+
self.pattern_number = re.compile(
|
| 77 |
+
r'\b([\d,]+(?:\.\d+)?)\b'
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# Number with magnitude words: 10 crore, 5.5 lakhs
|
| 81 |
+
magnitude_words = '|'.join(self.MAGNITUDE_WORDS.keys())
|
| 82 |
+
self.pattern_magnitude = re.compile(
|
| 83 |
+
rf'\b([\d,.]+)\s*({magnitude_words})\b',
|
| 84 |
+
re.IGNORECASE
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# Percentage: 10%, 10.5 percent
|
| 88 |
+
self.pattern_percent = re.compile(
|
| 89 |
+
r'\b([\d.]+)\s*(?:%|percent|percentage)\b',
|
| 90 |
+
re.IGNORECASE
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
def parse_number(self, num_str: str) -> Optional[float]:
|
| 94 |
+
"""
|
| 95 |
+
Parse a number string to float, handling Indian format.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
num_str: Number string (e.g., "1,00,000" or "100,000.50")
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
Float value or None
|
| 102 |
+
"""
|
| 103 |
+
if not num_str:
|
| 104 |
+
return None
|
| 105 |
+
|
| 106 |
+
# Remove commas and spaces
|
| 107 |
+
num_str = str(num_str).replace(',', '').replace(' ', '').strip()
|
| 108 |
+
|
| 109 |
+
try:
|
| 110 |
+
return float(num_str)
|
| 111 |
+
except ValueError:
|
| 112 |
+
return None
|
| 113 |
+
|
| 114 |
+
def parse_indian_number(self, text: str) -> Optional[float]:
|
| 115 |
+
"""
|
| 116 |
+
Parse Indian number format (lakhs, crores).
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
text: Text like "10 crore" or "5.5 lakhs"
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
Float value or None
|
| 123 |
+
"""
|
| 124 |
+
text = text.lower().strip()
|
| 125 |
+
|
| 126 |
+
match = self.pattern_magnitude.search(text)
|
| 127 |
+
if match:
|
| 128 |
+
num_part = self.parse_number(match.group(1))
|
| 129 |
+
magnitude = self.MAGNITUDE_WORDS.get(match.group(2).lower(), 1)
|
| 130 |
+
if num_part is not None:
|
| 131 |
+
return num_part * magnitude
|
| 132 |
+
|
| 133 |
+
return None
|
| 134 |
+
|
| 135 |
+
def word_to_number(self, text: str) -> Optional[int]:
|
| 136 |
+
"""
|
| 137 |
+
Convert word numbers to integers.
|
| 138 |
+
|
| 139 |
+
Args:
|
| 140 |
+
text: Text like "One Hundred Million"
|
| 141 |
+
|
| 142 |
+
Returns:
|
| 143 |
+
Integer value or None
|
| 144 |
+
"""
|
| 145 |
+
text = text.lower().strip()
|
| 146 |
+
words = text.split()
|
| 147 |
+
|
| 148 |
+
if not words:
|
| 149 |
+
return None
|
| 150 |
+
|
| 151 |
+
result = 0
|
| 152 |
+
current = 0
|
| 153 |
+
|
| 154 |
+
for word in words:
|
| 155 |
+
word = word.strip(',').strip()
|
| 156 |
+
|
| 157 |
+
if word in self.WORD_TO_NUMBER:
|
| 158 |
+
current += self.WORD_TO_NUMBER[word]
|
| 159 |
+
elif word in self.MAGNITUDE_WORDS:
|
| 160 |
+
magnitude = self.MAGNITUDE_WORDS[word]
|
| 161 |
+
if magnitude >= 1000:
|
| 162 |
+
current = (current or 1) * magnitude
|
| 163 |
+
result += current
|
| 164 |
+
current = 0
|
| 165 |
+
else:
|
| 166 |
+
current *= magnitude
|
| 167 |
+
elif word == 'and':
|
| 168 |
+
continue
|
| 169 |
+
else:
|
| 170 |
+
# Unknown word, try to parse as number
|
| 171 |
+
try:
|
| 172 |
+
current += int(word)
|
| 173 |
+
except ValueError:
|
| 174 |
+
pass
|
| 175 |
+
|
| 176 |
+
result += current
|
| 177 |
+
return result if result > 0 else None
|
| 178 |
+
|
| 179 |
+
def extract_numbers(self, text: str) -> List[Dict]:
|
| 180 |
+
"""
|
| 181 |
+
Extract all numerical values from text with context.
|
| 182 |
+
|
| 183 |
+
Args:
|
| 184 |
+
text: Text to search for numbers
|
| 185 |
+
|
| 186 |
+
Returns:
|
| 187 |
+
List of dicts with number info:
|
| 188 |
+
[{"value": 101000000, "context": "sum_insured", "currency": "INR",
|
| 189 |
+
"original": "₹10,10,00,000"}]
|
| 190 |
+
"""
|
| 191 |
+
if not text:
|
| 192 |
+
return []
|
| 193 |
+
|
| 194 |
+
results = []
|
| 195 |
+
text_lower = text.lower()
|
| 196 |
+
|
| 197 |
+
# Extract currency amounts
|
| 198 |
+
for match in self.pattern_currency.finditer(text):
|
| 199 |
+
currency_symbol = match.group(1)
|
| 200 |
+
num_str = match.group(2)
|
| 201 |
+
value = self.parse_number(num_str)
|
| 202 |
+
|
| 203 |
+
if value is not None and value > 0:
|
| 204 |
+
# Determine currency
|
| 205 |
+
currency = 'INR' # Default
|
| 206 |
+
for curr, patterns in self.CURRENCY_PATTERNS.items():
|
| 207 |
+
if any(re.match(p, currency_symbol, re.IGNORECASE) for p in patterns):
|
| 208 |
+
currency = curr
|
| 209 |
+
break
|
| 210 |
+
|
| 211 |
+
# Determine context
|
| 212 |
+
context = self._determine_number_context(text_lower, match.start())
|
| 213 |
+
|
| 214 |
+
results.append({
|
| 215 |
+
'value': value,
|
| 216 |
+
'context': context,
|
| 217 |
+
'currency': currency,
|
| 218 |
+
'original': match.group(),
|
| 219 |
+
'position': match.start()
|
| 220 |
+
})
|
| 221 |
+
|
| 222 |
+
# Extract numbers with magnitude words (10 crore, 5 lakhs)
|
| 223 |
+
for match in self.pattern_magnitude.finditer(text):
|
| 224 |
+
value = self.parse_indian_number(match.group())
|
| 225 |
+
if value is not None and value > 0:
|
| 226 |
+
context = self._determine_number_context(text_lower, match.start())
|
| 227 |
+
results.append({
|
| 228 |
+
'value': value,
|
| 229 |
+
'context': context,
|
| 230 |
+
'currency': 'INR', # Lakhs/crores are typically INR
|
| 231 |
+
'original': match.group(),
|
| 232 |
+
'position': match.start()
|
| 233 |
+
})
|
| 234 |
+
|
| 235 |
+
# Remove duplicates based on position (currency matches often overlap with magnitude)
|
| 236 |
+
seen_positions = set()
|
| 237 |
+
unique_results = []
|
| 238 |
+
for r in sorted(results, key=lambda x: -x['value']): # Prefer larger values
|
| 239 |
+
# Check if any existing result overlaps with this one
|
| 240 |
+
overlaps = False
|
| 241 |
+
for pos in seen_positions:
|
| 242 |
+
if abs(r['position'] - pos) < 20: # Within 20 chars
|
| 243 |
+
overlaps = True
|
| 244 |
+
break
|
| 245 |
+
|
| 246 |
+
if not overlaps:
|
| 247 |
+
seen_positions.add(r['position'])
|
| 248 |
+
unique_results.append(r)
|
| 249 |
+
|
| 250 |
+
return unique_results
|
| 251 |
+
|
| 252 |
+
def _determine_number_context(self, text: str, position: int) -> str:
|
| 253 |
+
"""Determine what type of number this is based on surrounding text."""
|
| 254 |
+
# Look at 100 chars before the number
|
| 255 |
+
context_start = max(0, position - 100)
|
| 256 |
+
context_text = text[context_start:position]
|
| 257 |
+
|
| 258 |
+
for num_type, keywords in self.NUMBER_CONTEXTS.items():
|
| 259 |
+
if any(kw in context_text for kw in keywords):
|
| 260 |
+
return num_type
|
| 261 |
+
|
| 262 |
+
return 'unknown'
|
| 263 |
+
|
| 264 |
+
def extract_sum_insured(self, text: str) -> Optional[float]:
|
| 265 |
+
"""Extract the sum insured value from text."""
|
| 266 |
+
numbers = self.extract_numbers(text)
|
| 267 |
+
|
| 268 |
+
# First, look for explicitly labeled sum insured
|
| 269 |
+
for num in numbers:
|
| 270 |
+
if num['context'] == 'sum_insured':
|
| 271 |
+
return num['value']
|
| 272 |
+
|
| 273 |
+
# Otherwise, return the largest number (likely to be sum insured)
|
| 274 |
+
if numbers:
|
| 275 |
+
return max(num['value'] for num in numbers)
|
| 276 |
+
|
| 277 |
+
return None
|
| 278 |
+
|
| 279 |
+
def extract_premium(self, text: str) -> Optional[float]:
|
| 280 |
+
"""Extract the premium amount from text."""
|
| 281 |
+
numbers = self.extract_numbers(text)
|
| 282 |
+
|
| 283 |
+
for num in numbers:
|
| 284 |
+
if num['context'] == 'premium':
|
| 285 |
+
return num['value']
|
| 286 |
+
|
| 287 |
+
return None
|
| 288 |
+
|
| 289 |
+
def calculate_sum(self, values: List[float]) -> float:
|
| 290 |
+
"""Calculate sum of values."""
|
| 291 |
+
return sum(v for v in values if v is not None)
|
| 292 |
+
|
| 293 |
+
def calculate_average(self, values: List[float]) -> Optional[float]:
|
| 294 |
+
"""Calculate average of values."""
|
| 295 |
+
valid_values = [v for v in values if v is not None]
|
| 296 |
+
if valid_values:
|
| 297 |
+
return sum(valid_values) / len(valid_values)
|
| 298 |
+
return None
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
# Singleton instance
|
| 302 |
+
number_extractor = NumberExtractor()
|
services/ocr_service.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
OCR Service - Document Text Extraction via OpenRouter Vision Models
|
| 3 |
+
Handles OCR for images and scanned documents using vision-capable models with fallback
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import requests
|
| 7 |
+
import base64
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from config import Config
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class OCRService:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self.api_key = Config.OPENROUTER_API_KEY
|
| 15 |
+
self.base_url = Config.OPENROUTER_BASE_URL
|
| 16 |
+
|
| 17 |
+
# Vision-capable models for OCR with fallback order
|
| 18 |
+
# Only models that support image/vision input can be used for OCR
|
| 19 |
+
self.ocr_models = [
|
| 20 |
+
"google/gemma-3-27b-it:free", # Primary - Largest Gemma 3
|
| 21 |
+
"google/gemma-3-12b-it:free", # Fallback 1
|
| 22 |
+
"google/gemma-3-4b-it:free", # Fallback 2
|
| 23 |
+
"google/gemma-3n-e4b-it:free", # Fallback 3
|
| 24 |
+
"google/gemma-3n-e2b-it:free", # Fallback 4 - Smallest
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
def _encode_image(self, image_path: str) -> str:
|
| 28 |
+
"""Encode image to base64"""
|
| 29 |
+
with open(image_path, "rb") as f:
|
| 30 |
+
return base64.b64encode(f.read()).decode('utf-8')
|
| 31 |
+
|
| 32 |
+
def _get_mime_type(self, file_path: str) -> str:
|
| 33 |
+
"""Get MIME type from file extension"""
|
| 34 |
+
ext = Path(file_path).suffix.lower()
|
| 35 |
+
mime_types = {
|
| 36 |
+
'.png': 'image/png',
|
| 37 |
+
'.jpg': 'image/jpeg',
|
| 38 |
+
'.jpeg': 'image/jpeg',
|
| 39 |
+
'.gif': 'image/gif',
|
| 40 |
+
'.webp': 'image/webp',
|
| 41 |
+
'.pdf': 'application/pdf'
|
| 42 |
+
}
|
| 43 |
+
return mime_types.get(ext, 'image/png')
|
| 44 |
+
|
| 45 |
+
def _call_ocr_model(self, image_data: str, mime_type: str, model: str = None) -> dict:
|
| 46 |
+
"""Call OpenRouter vision model for OCR"""
|
| 47 |
+
if not self.api_key:
|
| 48 |
+
return {"success": False, "error": "OpenRouter API key not configured"}
|
| 49 |
+
|
| 50 |
+
headers = {
|
| 51 |
+
"Authorization": f"Bearer {self.api_key}",
|
| 52 |
+
"Content-Type": "application/json",
|
| 53 |
+
"HTTP-Referer": "https://notebooklm-fast.hf.space",
|
| 54 |
+
"X-Title": "NotebookLM Fast OCR"
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# OCR prompt for thorough text extraction
|
| 58 |
+
ocr_prompt = """You are a precise OCR system. Extract EVERY SINGLE piece of text from this image/document with 100% accuracy.
|
| 59 |
+
|
| 60 |
+
CRITICAL INSTRUCTIONS:
|
| 61 |
+
1. Extract ALL text - do not skip or miss ANY section, heading, paragraph, or text block
|
| 62 |
+
2. Include ALL sections (e.g., Education, Experience, Skills, Contact, Summary, Projects, etc.)
|
| 63 |
+
3. Preserve the exact structure and hierarchy of the document
|
| 64 |
+
4. Include all names, dates, numbers, addresses, phone numbers, emails, URLs
|
| 65 |
+
5. Include text from headers, footers, sidebars, and any text boxes
|
| 66 |
+
6. For tables, use markdown table format with all rows and columns
|
| 67 |
+
7. For bullet points and lists, preserve the list structure
|
| 68 |
+
8. Include any small text, footnotes, or captions
|
| 69 |
+
|
| 70 |
+
OUTPUT FORMAT:
|
| 71 |
+
- Return ONLY the extracted text, explanations
|
| 72 |
+
- Maintain the original reading order (top to bottom, left to right)
|
| 73 |
+
- Use markdown formatting for structure (headers, lists, tables)
|
| 74 |
+
- Separate sections clearly with line breaks
|
| 75 |
+
|
| 76 |
+
IMPORTANT: Do not summarize or paraphrase. Extract the EXACT text as it appears."""
|
| 77 |
+
|
| 78 |
+
payload = {
|
| 79 |
+
"model": model or self.ocr_models[0],
|
| 80 |
+
"messages": [
|
| 81 |
+
{
|
| 82 |
+
"role": "user",
|
| 83 |
+
"content": [
|
| 84 |
+
{
|
| 85 |
+
"type": "image_url",
|
| 86 |
+
"image_url": {
|
| 87 |
+
"url": f"data:{mime_type};base64,{image_data}"
|
| 88 |
+
}
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"type": "text",
|
| 92 |
+
"text": ocr_prompt
|
| 93 |
+
}
|
| 94 |
+
]
|
| 95 |
+
}
|
| 96 |
+
],
|
| 97 |
+
"max_tokens": 4096,
|
| 98 |
+
"temperature": 0.1 # Low temperature for accurate extraction
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
response = requests.post(
|
| 103 |
+
f"{self.base_url}/chat/completions",
|
| 104 |
+
headers=headers,
|
| 105 |
+
json=payload,
|
| 106 |
+
timeout=120 # Longer timeout for OCR
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
if response.status_code == 200:
|
| 110 |
+
data = response.json()
|
| 111 |
+
text = data.get('choices', [{}])[0].get('message', {}).get('content', '')
|
| 112 |
+
|
| 113 |
+
if text:
|
| 114 |
+
return {"success": True, "text": text, "model": model or self.ocr_models[0]}
|
| 115 |
+
else:
|
| 116 |
+
return {"success": False, "error": "No text extracted from response"}
|
| 117 |
+
else:
|
| 118 |
+
return {
|
| 119 |
+
"success": False,
|
| 120 |
+
"error": f"OpenRouter API error: {response.status_code} - {response.text}"
|
| 121 |
+
}
|
| 122 |
+
except requests.exceptions.Timeout:
|
| 123 |
+
return {"success": False, "error": "Request timed out. Please try again."}
|
| 124 |
+
except Exception as e:
|
| 125 |
+
return {"success": False, "error": str(e)}
|
| 126 |
+
|
| 127 |
+
def _call_ocr_with_fallback(self, image_data: str, mime_type: str) -> dict:
|
| 128 |
+
"""Try OCR with fallback models"""
|
| 129 |
+
last_error = None
|
| 130 |
+
|
| 131 |
+
for model in self.ocr_models:
|
| 132 |
+
print(f"Attempting OCR with {model}...")
|
| 133 |
+
result = self._call_ocr_model(image_data, mime_type, model)
|
| 134 |
+
|
| 135 |
+
if result['success']:
|
| 136 |
+
print(f"OCR successful with {model}")
|
| 137 |
+
return result
|
| 138 |
+
else:
|
| 139 |
+
last_error = result.get('error', 'Unknown error')
|
| 140 |
+
print(f"OCR failed with {model}: {last_error}")
|
| 141 |
+
continue
|
| 142 |
+
|
| 143 |
+
return {"success": False, "error": f"All OCR models failed. Last error: {last_error}"}
|
| 144 |
+
|
| 145 |
+
def extract_text_from_pdf(self, pdf_path: str) -> dict:
|
| 146 |
+
"""
|
| 147 |
+
Extract text from entire PDF using OpenRouter vision models.
|
| 148 |
+
Converts PDF pages to images and processes them.
|
| 149 |
+
"""
|
| 150 |
+
import fitz # PyMuPDF
|
| 151 |
+
import os
|
| 152 |
+
|
| 153 |
+
try:
|
| 154 |
+
doc = fitz.open(pdf_path)
|
| 155 |
+
total_pages = len(doc)
|
| 156 |
+
|
| 157 |
+
print(f"Processing {total_pages} page PDF with OpenRouter vision OCR...")
|
| 158 |
+
|
| 159 |
+
all_text = []
|
| 160 |
+
|
| 161 |
+
for page_num in range(total_pages):
|
| 162 |
+
print(f"Processing page {page_num + 1}/{total_pages}...")
|
| 163 |
+
|
| 164 |
+
page = doc.load_page(page_num)
|
| 165 |
+
|
| 166 |
+
# Render page to image at good resolution for OCR
|
| 167 |
+
mat = fitz.Matrix(2, 2) # 2x zoom for better quality
|
| 168 |
+
pix = page.get_pixmap(matrix=mat)
|
| 169 |
+
img_data = pix.tobytes("png")
|
| 170 |
+
|
| 171 |
+
# Encode to base64
|
| 172 |
+
image_base64 = base64.b64encode(img_data).decode('utf-8')
|
| 173 |
+
|
| 174 |
+
# OCR the page with fallback
|
| 175 |
+
result = self._call_ocr_with_fallback(image_base64, 'image/png')
|
| 176 |
+
|
| 177 |
+
if result['success']:
|
| 178 |
+
all_text.append(f"--- Page {page_num + 1} ---\n{result['text']}")
|
| 179 |
+
else:
|
| 180 |
+
all_text.append(f"--- Page {page_num + 1} ---\n[Error extracting text: {result['error']}]")
|
| 181 |
+
|
| 182 |
+
doc.close()
|
| 183 |
+
|
| 184 |
+
combined_text = "\n\n".join(all_text)
|
| 185 |
+
|
| 186 |
+
return {
|
| 187 |
+
"success": True,
|
| 188 |
+
"text": combined_text,
|
| 189 |
+
"model": "OpenRouter Vision OCR"
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
except Exception as e:
|
| 193 |
+
return {"success": False, "error": f"Error processing PDF: {str(e)}"}
|
| 194 |
+
|
| 195 |
+
def _process_pdf_in_batches(self, pdf_path: str, total_pages: int) -> dict:
|
| 196 |
+
"""Split PDF into chunks and process sequentially - kept for compatibility"""
|
| 197 |
+
return self.extract_text_from_pdf(pdf_path)
|
| 198 |
+
|
| 199 |
+
def _send_pdf_to_api(self, pdf_path: str) -> dict:
|
| 200 |
+
"""Process PDF by converting to images - OpenRouter doesn't have native PDF support"""
|
| 201 |
+
return self.extract_text_from_pdf(pdf_path)
|
| 202 |
+
|
| 203 |
+
def extract_text(self, image_path: str) -> dict:
|
| 204 |
+
"""
|
| 205 |
+
Extract text from image using OpenRouter vision models with fallback
|
| 206 |
+
"""
|
| 207 |
+
image_data = self._encode_image(image_path)
|
| 208 |
+
mime_type = self._get_mime_type(image_path)
|
| 209 |
+
|
| 210 |
+
print(f"Attempting OCR with OpenRouter vision models...")
|
| 211 |
+
result = self._call_ocr_with_fallback(image_data, mime_type)
|
| 212 |
+
|
| 213 |
+
if result['success']:
|
| 214 |
+
print(f"OCR successful with {result.get('model', 'OpenRouter')}")
|
| 215 |
+
else:
|
| 216 |
+
print(f"OCR failed: {result['error']}")
|
| 217 |
+
|
| 218 |
+
return result
|
| 219 |
+
|
| 220 |
+
def extract_text_from_pdf_page(self, page_image_data: bytes,
|
| 221 |
+
page_num: int) -> dict:
|
| 222 |
+
"""Extract text from a PDF page image"""
|
| 223 |
+
image_data = base64.b64encode(page_image_data).decode('utf-8')
|
| 224 |
+
|
| 225 |
+
print(f"Extracting text from PDF page {page_num} with OpenRouter vision OCR...")
|
| 226 |
+
result = self._call_ocr_with_fallback(image_data, 'image/png')
|
| 227 |
+
return result
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
# Singleton instance
|
| 231 |
+
ocr_service = OCRService()
|
services/rag_service.py
ADDED
|
@@ -0,0 +1,1870 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RAG Service - Retrieval Augmented Generation
|
| 3 |
+
Handles:
|
| 4 |
+
- Text chunking with overlap
|
| 5 |
+
- GraphRAG-based context assembly
|
| 6 |
+
- Query processing with AI response generation
|
| 7 |
+
- Aggregate queries across all documents
|
| 8 |
+
- Date-based filtering and calculations
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import requests
|
| 12 |
+
import re
|
| 13 |
+
from typing import Optional, List, Dict
|
| 14 |
+
from config import Config
|
| 15 |
+
from services.chroma_service import chroma_service
|
| 16 |
+
from services.date_parser import date_parser
|
| 17 |
+
from services.number_extractor import number_extractor
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class RAGService:
|
| 21 |
+
def __init__(self):
|
| 22 |
+
# DeepSeek API (primary - highly capable)
|
| 23 |
+
self.deepseek_api_key = getattr(Config, 'DEEPSEEK_API_KEY', '')
|
| 24 |
+
self.deepseek_base_url = getattr(Config, 'DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
|
| 25 |
+
self.deepseek_model = getattr(Config, 'DEEPSEEK_MODEL', 'deepseek-chat')
|
| 26 |
+
self.use_deepseek = getattr(Config, 'USE_DEEPSEEK', True) and self.deepseek_api_key
|
| 27 |
+
|
| 28 |
+
# OpenRouter API (fallback)
|
| 29 |
+
self.api_key = Config.OPENROUTER_API_KEY
|
| 30 |
+
self.base_url = Config.OPENROUTER_BASE_URL
|
| 31 |
+
self.model_map = Config.MODEL_MAP
|
| 32 |
+
self.fallback_order = Config.FALLBACK_ORDER
|
| 33 |
+
|
| 34 |
+
# RAG settings
|
| 35 |
+
self.chunk_size = Config.CHUNK_SIZE
|
| 36 |
+
self.chunk_overlap = Config.CHUNK_OVERLAP
|
| 37 |
+
self.top_k = Config.TOP_K_RESULTS
|
| 38 |
+
self.temperature = Config.AI_TEMPERATURE
|
| 39 |
+
self.relevance_threshold = Config.RELEVANCE_THRESHOLD
|
| 40 |
+
self.max_history = Config.MAX_CONVERSATION_HISTORY
|
| 41 |
+
self.max_tokens = getattr(Config, 'AI_MAX_TOKENS', 1024)
|
| 42 |
+
self.timeout = getattr(Config, 'AI_TIMEOUT', 15)
|
| 43 |
+
|
| 44 |
+
def chunk_text(self, text: str) -> list[dict]:
|
| 45 |
+
"""
|
| 46 |
+
Split text into overlapping chunks for better retrieval
|
| 47 |
+
Uses sentence-aware chunking for coherence
|
| 48 |
+
"""
|
| 49 |
+
# Clean and normalize text
|
| 50 |
+
text = re.sub(r'\n{3,}', '\n\n', text)
|
| 51 |
+
text = text.strip()
|
| 52 |
+
|
| 53 |
+
if len(text) <= self.chunk_size:
|
| 54 |
+
return [{"text": text, "start": 0, "end": len(text)}]
|
| 55 |
+
|
| 56 |
+
chunks = []
|
| 57 |
+
sentences = self._split_into_sentences(text)
|
| 58 |
+
|
| 59 |
+
current_chunk = ""
|
| 60 |
+
current_start = 0
|
| 61 |
+
char_pos = 0
|
| 62 |
+
|
| 63 |
+
for sentence in sentences:
|
| 64 |
+
sentence_len = len(sentence)
|
| 65 |
+
|
| 66 |
+
if len(current_chunk) + sentence_len <= self.chunk_size:
|
| 67 |
+
current_chunk += sentence
|
| 68 |
+
else:
|
| 69 |
+
if current_chunk:
|
| 70 |
+
chunks.append({
|
| 71 |
+
"text": current_chunk.strip(),
|
| 72 |
+
"start": current_start,
|
| 73 |
+
"end": char_pos
|
| 74 |
+
})
|
| 75 |
+
|
| 76 |
+
# Start new chunk with overlap
|
| 77 |
+
overlap_start = max(0, len(current_chunk) - self.chunk_overlap)
|
| 78 |
+
current_chunk = current_chunk[overlap_start:] + sentence
|
| 79 |
+
current_start = char_pos - (len(current_chunk) - sentence_len)
|
| 80 |
+
|
| 81 |
+
char_pos += sentence_len
|
| 82 |
+
|
| 83 |
+
# Add final chunk
|
| 84 |
+
if current_chunk.strip():
|
| 85 |
+
chunks.append({
|
| 86 |
+
"text": current_chunk.strip(),
|
| 87 |
+
"start": current_start,
|
| 88 |
+
"end": char_pos
|
| 89 |
+
})
|
| 90 |
+
|
| 91 |
+
return chunks
|
| 92 |
+
|
| 93 |
+
def _split_into_sentences(self, text: str) -> list[str]:
|
| 94 |
+
"""Split text into sentences while preserving delimiters"""
|
| 95 |
+
# Simple sentence splitting
|
| 96 |
+
pattern = r'(?<=[.!?])\s+(?=[A-Z])'
|
| 97 |
+
sentences = re.split(pattern, text)
|
| 98 |
+
return [s + ' ' for s in sentences]
|
| 99 |
+
|
| 100 |
+
def process_document(self, user_id: str, doc_id: str, content: str, bucket_id: str = ""):
|
| 101 |
+
"""
|
| 102 |
+
Process document for RAG:
|
| 103 |
+
1. Chunk the text
|
| 104 |
+
2. Store chunks in ChromaDB
|
| 105 |
+
"""
|
| 106 |
+
chunks = self.chunk_text(content)
|
| 107 |
+
chroma_service.store_chunks(doc_id, user_id, chunks, bucket_id)
|
| 108 |
+
return len(chunks)
|
| 109 |
+
|
| 110 |
+
def _expand_query(self, query: str) -> list[str]:
|
| 111 |
+
"""
|
| 112 |
+
Generate query variations for better retrieval.
|
| 113 |
+
Extracts key terms and creates multiple search angles.
|
| 114 |
+
"""
|
| 115 |
+
import re
|
| 116 |
+
queries = [query]
|
| 117 |
+
query_lower = query.lower()
|
| 118 |
+
|
| 119 |
+
# Map numbers to words for module/section matching
|
| 120 |
+
word_map = {
|
| 121 |
+
'1': 'one', '2': 'two', '3': 'three', '4': 'four',
|
| 122 |
+
'5': 'five', '6': 'six', '7': 'seven', '8': 'eight',
|
| 123 |
+
'9': 'nine', '10': 'ten', '11': 'eleven', '12': 'twelve'
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
# Extract key terms (nouns, proper nouns) - words that are likely searchable
|
| 127 |
+
# Remove common question words and stop words
|
| 128 |
+
stop_words = {'what', 'who', 'where', 'when', 'why', 'how', 'is', 'are', 'was', 'were',
|
| 129 |
+
'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
|
| 130 |
+
'from', 'about', 'tell', 'me', 'can', 'you', 'please', 'give', 'show',
|
| 131 |
+
'list', 'all', 'find', 'get', 'does', 'do', 'did', 'has', 'have', 'had',
|
| 132 |
+
'this', 'that', 'these', 'those', 'and', 'or', 'but', 'if', 'then',
|
| 133 |
+
'there', 'their', 'they', 'them', 'it', 'its', 'be', 'been', 'being',
|
| 134 |
+
'any', 'some', 'my', 'your', 'our', 'his', 'her', 'which', 'each'}
|
| 135 |
+
|
| 136 |
+
# Extract potential key terms (2+ chars, not stop words)
|
| 137 |
+
words = re.findall(r'\b[a-zA-Z]{2,}\b', query_lower)
|
| 138 |
+
key_terms = [w for w in words if w not in stop_words]
|
| 139 |
+
|
| 140 |
+
# Add each key term as a separate query for direct matching
|
| 141 |
+
for term in key_terms[:5]: # Top 5 key terms
|
| 142 |
+
if len(term) > 3: # Only meaningful terms
|
| 143 |
+
queries.append(term)
|
| 144 |
+
|
| 145 |
+
# Add combinations of key terms
|
| 146 |
+
if len(key_terms) >= 2:
|
| 147 |
+
queries.append(' '.join(key_terms[:3])) # First 3 key terms
|
| 148 |
+
|
| 149 |
+
# Find module/section references and create variations
|
| 150 |
+
patterns = [
|
| 151 |
+
(r'module\s*(\d+)', 'module'),
|
| 152 |
+
(r'section\s*(\d+)', 'section'),
|
| 153 |
+
(r'chapter\s*(\d+)', 'chapter'),
|
| 154 |
+
(r'part\s*(\d+)', 'part'),
|
| 155 |
+
]
|
| 156 |
+
|
| 157 |
+
for pattern, prefix in patterns:
|
| 158 |
+
match = re.search(pattern, query_lower)
|
| 159 |
+
if match:
|
| 160 |
+
num = match.group(1)
|
| 161 |
+
# Add number word version: "module five"
|
| 162 |
+
if num in word_map:
|
| 163 |
+
queries.append(query_lower.replace(f'{prefix} {num}', f'{prefix} {word_map[num]}'))
|
| 164 |
+
# Add just the module reference: "module 5"
|
| 165 |
+
queries.append(f'{prefix} {num}')
|
| 166 |
+
# Add numbered list format: "5." or "5)"
|
| 167 |
+
queries.append(f'{num}.')
|
| 168 |
+
queries.append(f'{num})')
|
| 169 |
+
break
|
| 170 |
+
|
| 171 |
+
# Add question without question words for direct info retrieval
|
| 172 |
+
simplified = ' '.join(key_terms)
|
| 173 |
+
if simplified and simplified != query_lower:
|
| 174 |
+
queries.append(simplified)
|
| 175 |
+
|
| 176 |
+
# Deduplicate and limit
|
| 177 |
+
seen = set()
|
| 178 |
+
unique_queries = []
|
| 179 |
+
for q in queries:
|
| 180 |
+
q_clean = q.lower().strip()
|
| 181 |
+
if q_clean and q_clean not in seen and len(q_clean) > 1:
|
| 182 |
+
seen.add(q_clean)
|
| 183 |
+
unique_queries.append(q)
|
| 184 |
+
|
| 185 |
+
return unique_queries[:8] # Increased to 8 variations for better coverage
|
| 186 |
+
|
| 187 |
+
def _detect_document_reference(self, query: str, available_docs: list[dict]) -> list[str]:
|
| 188 |
+
"""
|
| 189 |
+
Detect if user is asking about a specific document by name.
|
| 190 |
+
Returns list of matching doc_ids to prioritize in search.
|
| 191 |
+
"""
|
| 192 |
+
query_lower = query.lower()
|
| 193 |
+
matching_doc_ids = []
|
| 194 |
+
|
| 195 |
+
for doc in available_docs:
|
| 196 |
+
filename = doc.get('filename', '')
|
| 197 |
+
if not filename:
|
| 198 |
+
continue
|
| 199 |
+
|
| 200 |
+
# Remove extension and normalize
|
| 201 |
+
name_parts = filename.lower().replace('.pdf', '').replace('.docx', '').replace('.xlsx', '').replace('.pptx', '').replace('.txt', '').replace('.md', '')
|
| 202 |
+
|
| 203 |
+
# Check if document name appears in query
|
| 204 |
+
# Handle common patterns like "the ABC document", "from XYZ file", "in document ABC"
|
| 205 |
+
if name_parts in query_lower or any(part in query_lower for part in name_parts.split('_') if len(part) > 3):
|
| 206 |
+
matching_doc_ids.append(doc.get('doc_id'))
|
| 207 |
+
|
| 208 |
+
return matching_doc_ids
|
| 209 |
+
|
| 210 |
+
def _parse_query_with_ai(self, query: str) -> dict:
|
| 211 |
+
"""
|
| 212 |
+
Use DeepSeek AI to understand query intent and extract structured parameters.
|
| 213 |
+
This replaces hardcoded pattern matching with intelligent query understanding.
|
| 214 |
+
|
| 215 |
+
Returns dict with:
|
| 216 |
+
- intent: list|count|rank|calculate|compare|specific|summarize
|
| 217 |
+
- needs_metadata: True if needs aggregate data across all documents
|
| 218 |
+
- filters: dict of field->value filters
|
| 219 |
+
- sort_by: field to sort by (or None)
|
| 220 |
+
- sort_order: 'desc' or 'asc'
|
| 221 |
+
- limit: number of results (or None for all)
|
| 222 |
+
- calculation: sum|average|max|min (or None)
|
| 223 |
+
- calculation_field: field for calculation
|
| 224 |
+
"""
|
| 225 |
+
import json
|
| 226 |
+
|
| 227 |
+
system_prompt = """You are a query parser for an insurance document system.
|
| 228 |
+
Analyze the user's question and extract structured parameters to help retrieve the right data.
|
| 229 |
+
|
| 230 |
+
Available fields for filtering:
|
| 231 |
+
- is_manufacturing (boolean): True if asking about manufacturing industry/sector
|
| 232 |
+
- policy_type (string): fire, marine, motor, health, liability, property, engineering, etc.
|
| 233 |
+
- industry (string): manufacturing, retail, IT, healthcare, construction, food, textile, etc.
|
| 234 |
+
- insurer_name (string): insurance company name
|
| 235 |
+
- insured_name (string): policyholder/company name
|
| 236 |
+
- broker_name (string): broker or agent name
|
| 237 |
+
- city (string): city name
|
| 238 |
+
- state (string): state name
|
| 239 |
+
- renewal_year (integer): 2024, 2025, 2026, etc.
|
| 240 |
+
|
| 241 |
+
Available fields for sorting:
|
| 242 |
+
- premium_amount: net premium, gross premium, premium
|
| 243 |
+
- sum_insured: coverage amount, insured value
|
| 244 |
+
- renewal_date: renewal date, expiry date
|
| 245 |
+
- policy_start_date: inception date, start date
|
| 246 |
+
|
| 247 |
+
Return ONLY valid JSON (no markdown, no explanation):
|
| 248 |
+
{
|
| 249 |
+
"intent": "list|count|rank|calculate|compare|specific|summarize",
|
| 250 |
+
"needs_metadata": true or false,
|
| 251 |
+
"filters": {"field_name": "value"},
|
| 252 |
+
"sort_by": "field_name" or null,
|
| 253 |
+
"sort_order": "desc" or "asc",
|
| 254 |
+
"limit": number or null,
|
| 255 |
+
"calculation": "sum|average|max|min|count" or null,
|
| 256 |
+
"calculation_field": "premium_amount|sum_insured" or null
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
Examples:
|
| 260 |
+
Query: "top 5 manufacturing policies by premium"
|
| 261 |
+
{"intent":"rank","needs_metadata":true,"filters":{"is_manufacturing":true},"sort_by":"premium_amount","sort_order":"desc","limit":5,"calculation":null,"calculation_field":null}
|
| 262 |
+
|
| 263 |
+
Query: "total sum insured for all fire policies"
|
| 264 |
+
{"intent":"calculate","needs_metadata":true,"filters":{"policy_type":"fire"},"sort_by":null,"sort_order":"desc","limit":null,"calculation":"sum","calculation_field":"sum_insured"}
|
| 265 |
+
|
| 266 |
+
Query: "what is covered in the ABC policy document?"
|
| 267 |
+
{"intent":"specific","needs_metadata":false,"filters":{},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null}
|
| 268 |
+
|
| 269 |
+
Query: "list all policies renewing in 2026"
|
| 270 |
+
{"intent":"list","needs_metadata":true,"filters":{"renewal_year":2026},"sort_by":"renewal_date","sort_order":"asc","limit":null,"calculation":null,"calculation_field":null}
|
| 271 |
+
|
| 272 |
+
Query: "how many manufacturing companies do we have?"
|
| 273 |
+
{"intent":"count","needs_metadata":true,"filters":{"is_manufacturing":true},"sort_by":null,"sort_order":"desc","limit":null,"calculation":"count","calculation_field":null}
|
| 274 |
+
|
| 275 |
+
Query: "compare policy A and policy B"
|
| 276 |
+
{"intent":"compare","needs_metadata":false,"filters":{},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null}"""
|
| 277 |
+
|
| 278 |
+
messages = [
|
| 279 |
+
{"role": "system", "content": system_prompt},
|
| 280 |
+
{"role": "user", "content": f"Parse this query: {query}"}
|
| 281 |
+
]
|
| 282 |
+
|
| 283 |
+
try:
|
| 284 |
+
# Use non-streaming call for quick parsing
|
| 285 |
+
response = self._call_deepseek_sync(messages, max_tokens=300)
|
| 286 |
+
|
| 287 |
+
# Parse JSON response
|
| 288 |
+
parsed = json.loads(response.strip())
|
| 289 |
+
print(f"[AI QUERY PARSER] Parsed: {json.dumps(parsed, indent=2)}")
|
| 290 |
+
return parsed
|
| 291 |
+
|
| 292 |
+
except Exception as e:
|
| 293 |
+
print(f"[AI QUERY PARSER] Error: {e}, falling back to pattern matching")
|
| 294 |
+
# Fallback to basic detection
|
| 295 |
+
return {
|
| 296 |
+
"intent": "specific",
|
| 297 |
+
"needs_metadata": False,
|
| 298 |
+
"filters": {},
|
| 299 |
+
"sort_by": None,
|
| 300 |
+
"sort_order": "desc",
|
| 301 |
+
"limit": None,
|
| 302 |
+
"calculation": None,
|
| 303 |
+
"calculation_field": None
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
def _call_deepseek_sync(self, messages: list, max_tokens: int = 500) -> str:
|
| 307 |
+
"""Synchronous call to DeepSeek for quick operations like query parsing."""
|
| 308 |
+
import requests
|
| 309 |
+
|
| 310 |
+
if not self.deepseek_api_key:
|
| 311 |
+
raise Exception("DeepSeek API key not configured")
|
| 312 |
+
|
| 313 |
+
response = requests.post(
|
| 314 |
+
"https://api.deepseek.com/v1/chat/completions",
|
| 315 |
+
headers={
|
| 316 |
+
"Authorization": f"Bearer {self.deepseek_api_key}",
|
| 317 |
+
"Content-Type": "application/json"
|
| 318 |
+
},
|
| 319 |
+
json={
|
| 320 |
+
"model": "deepseek-chat",
|
| 321 |
+
"messages": messages,
|
| 322 |
+
"max_tokens": max_tokens,
|
| 323 |
+
"temperature": 0.1 # Low temperature for consistent parsing
|
| 324 |
+
},
|
| 325 |
+
timeout=15
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
if response.status_code == 200:
|
| 329 |
+
return response.json()['choices'][0]['message']['content']
|
| 330 |
+
else:
|
| 331 |
+
raise Exception(f"DeepSeek API error: {response.status_code}")
|
| 332 |
+
|
| 333 |
+
def _detect_query_type(self, query: str, history: list[dict] = None) -> str:
|
| 334 |
+
"""
|
| 335 |
+
Detect the type of query to optimize retrieval and response.
|
| 336 |
+
Returns: 'specific', 'aggregate', 'calculation', 'date_filter',
|
| 337 |
+
'cross_document', 'followup', 'comparison', 'general'
|
| 338 |
+
|
| 339 |
+
NEW TYPES:
|
| 340 |
+
- 'aggregate': List all, count all, common across all documents
|
| 341 |
+
- 'calculation': Math operations (sum, average, total of numbers)
|
| 342 |
+
- 'date_filter': Date-based filtering (policies renewing in 2026)
|
| 343 |
+
"""
|
| 344 |
+
query_lower = query.lower().strip()
|
| 345 |
+
|
| 346 |
+
# AGGREGATE patterns - queries that need to scan ALL documents
|
| 347 |
+
aggregate_patterns = [
|
| 348 |
+
'list all', 'give me all', 'show all', 'all policies', 'all documents',
|
| 349 |
+
'every policy', 'every document', 'all the policies', 'all the documents',
|
| 350 |
+
'how many policies', 'how many documents', 'count all', 'total number of',
|
| 351 |
+
'all manufacturing', 'all companies', 'all insured', 'all insurers',
|
| 352 |
+
'common', 'across all', 'in all documents', 'throughout all',
|
| 353 |
+
'summarize all', 'overview of all', 'complete list', 'full list',
|
| 354 |
+
'what are the', 'what policies', 'which companies', 'which policies'
|
| 355 |
+
]
|
| 356 |
+
|
| 357 |
+
# CALCULATION patterns - queries needing math operations
|
| 358 |
+
calculation_patterns = [
|
| 359 |
+
'total sum', 'sum of', 'add up', 'combined', 'aggregate',
|
| 360 |
+
'total insured', 'total premium', 'total value', 'total amount',
|
| 361 |
+
'calculate', 'average', 'mean', 'maximum', 'minimum', 'highest', 'lowest',
|
| 362 |
+
'what is the total', 'how much total', 'sum insured across',
|
| 363 |
+
'cumulative', 'grand total'
|
| 364 |
+
]
|
| 365 |
+
|
| 366 |
+
# DATE FILTER patterns - queries filtering by dates
|
| 367 |
+
date_patterns = [
|
| 368 |
+
'renew in', 'renewal in', 'expiring in', 'expire in', 'expiry in',
|
| 369 |
+
'renewing in 2024', 'renewing in 2025', 'renewing in 2026', 'renewing in 2027',
|
| 370 |
+
'expiring in 2024', 'expiring in 2025', 'expiring in 2026', 'expiring in 2027',
|
| 371 |
+
'policies in 2024', 'policies in 2025', 'policies in 2026', 'policies in 2027',
|
| 372 |
+
'before 2025', 'after 2025', 'before 2026', 'after 2026',
|
| 373 |
+
'next year', 'this year', 'last year', 'next month',
|
| 374 |
+
'valid until', 'valid till', 'due for renewal'
|
| 375 |
+
]
|
| 376 |
+
|
| 377 |
+
# Followup indicators - pronouns and references to previous context
|
| 378 |
+
followup_patterns = [
|
| 379 |
+
'it', 'this', 'that', 'these', 'those', 'the same', 'same one',
|
| 380 |
+
'mentioned', 'above', 'earlier', 'previous', 'last one',
|
| 381 |
+
'for it', 'about it', 'of it', 'its ', "it's", 'for this', 'for that'
|
| 382 |
+
]
|
| 383 |
+
|
| 384 |
+
# Cross-document patterns (legacy - now mostly covered by aggregate)
|
| 385 |
+
cross_doc_patterns = [
|
| 386 |
+
'other documents', 'other policies', 'other files',
|
| 387 |
+
'which documents', 'which files',
|
| 388 |
+
'similar to', 'related to', 'like this one'
|
| 389 |
+
]
|
| 390 |
+
|
| 391 |
+
# Comparison patterns
|
| 392 |
+
comparison_patterns = [
|
| 393 |
+
'compare', 'difference between', 'versus', ' vs ', 'differ',
|
| 394 |
+
'same as', 'similar to', 'contrast', 'both', 'either'
|
| 395 |
+
]
|
| 396 |
+
|
| 397 |
+
# Check patterns in priority order
|
| 398 |
+
# 1. Aggregate queries (highest priority for "list all" type queries)
|
| 399 |
+
for pattern in aggregate_patterns:
|
| 400 |
+
if pattern in query_lower:
|
| 401 |
+
print(f"[QUERY TYPE] Detected AGGREGATE: matched '{pattern}'")
|
| 402 |
+
return 'aggregate'
|
| 403 |
+
|
| 404 |
+
# 2. Calculation queries
|
| 405 |
+
for pattern in calculation_patterns:
|
| 406 |
+
if pattern in query_lower:
|
| 407 |
+
print(f"[QUERY TYPE] Detected CALCULATION: matched '{pattern}'")
|
| 408 |
+
return 'calculation'
|
| 409 |
+
|
| 410 |
+
# 3. Date filter queries
|
| 411 |
+
for pattern in date_patterns:
|
| 412 |
+
if pattern in query_lower:
|
| 413 |
+
print(f"[QUERY TYPE] Detected DATE_FILTER: matched '{pattern}'")
|
| 414 |
+
return 'date_filter'
|
| 415 |
+
|
| 416 |
+
# 4. Followup queries (short queries with pronouns)
|
| 417 |
+
for pattern in followup_patterns:
|
| 418 |
+
if pattern in query_lower and len(query) < 100:
|
| 419 |
+
return 'followup'
|
| 420 |
+
|
| 421 |
+
# 5. Cross-document queries
|
| 422 |
+
for pattern in cross_doc_patterns:
|
| 423 |
+
if pattern in query_lower:
|
| 424 |
+
return 'cross_document'
|
| 425 |
+
|
| 426 |
+
# 6. Comparison queries
|
| 427 |
+
for pattern in comparison_patterns:
|
| 428 |
+
if pattern in query_lower:
|
| 429 |
+
return 'comparison'
|
| 430 |
+
|
| 431 |
+
# If there's recent history and query is short, likely a followup
|
| 432 |
+
if history and len(history) > 0 and len(query) < 50:
|
| 433 |
+
words = query_lower.split()
|
| 434 |
+
if words and words[0] in ['what', 'who', 'when', 'where', 'why', 'how', 'is', 'are', 'does', 'do', 'can']:
|
| 435 |
+
return 'followup'
|
| 436 |
+
|
| 437 |
+
return 'general'
|
| 438 |
+
|
| 439 |
+
def _handle_aggregate_query(self, user_id: str, bucket_id: str, query: str) -> dict:
|
| 440 |
+
"""
|
| 441 |
+
Handle aggregate queries by retrieving ALL document metadata/summaries.
|
| 442 |
+
Used for 'list all', 'how many', etc.
|
| 443 |
+
|
| 444 |
+
Returns dict with context built from ALL documents.
|
| 445 |
+
"""
|
| 446 |
+
print(f"[AGGREGATE] Handling aggregate query: {query[:50]}...")
|
| 447 |
+
|
| 448 |
+
# Get ALL metadata for this bucket
|
| 449 |
+
all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
|
| 450 |
+
|
| 451 |
+
# Get ALL summaries too
|
| 452 |
+
all_summaries = chroma_service.get_all_summaries(user_id, bucket_id)
|
| 453 |
+
|
| 454 |
+
print(f"[AGGREGATE] Retrieved {len(all_metadata)} metadata records, {len(all_summaries)} summaries")
|
| 455 |
+
|
| 456 |
+
# Build context from metadata
|
| 457 |
+
context_parts = []
|
| 458 |
+
|
| 459 |
+
# For large datasets, use a more compact format to avoid token limits
|
| 460 |
+
if len(all_metadata) > 50:
|
| 461 |
+
print(f"[AGGREGATE] Large dataset ({len(all_metadata)} docs) - using compact format")
|
| 462 |
+
# Compact format for large datasets
|
| 463 |
+
for i, meta in enumerate(all_metadata, 1):
|
| 464 |
+
entry = f"{i}. {meta.get('document_title', 'Unknown')} | Insured: {meta.get('insured_name', 'N/A')} | Type: {meta.get('policy_type', 'N/A')} | Industry: {meta.get('industry', 'N/A')} | Sum: {meta.get('sum_insured', 0)} | Mfg: {meta.get('is_manufacturing', False)}"
|
| 465 |
+
context_parts.append(entry)
|
| 466 |
+
else:
|
| 467 |
+
# Full format for smaller datasets
|
| 468 |
+
for i, meta in enumerate(all_metadata, 1):
|
| 469 |
+
doc_id = meta.get('doc_id', '')
|
| 470 |
+
filename = meta.get('document_title', 'Unknown Document')
|
| 471 |
+
|
| 472 |
+
# Find matching summary
|
| 473 |
+
summary = ""
|
| 474 |
+
for s in all_summaries:
|
| 475 |
+
if s.get('doc_id') == doc_id:
|
| 476 |
+
summary = s.get('summary', '')
|
| 477 |
+
break
|
| 478 |
+
|
| 479 |
+
# Build document entry
|
| 480 |
+
entry = f"""
|
| 481 |
+
=== Document {i}: {filename} ===
|
| 482 |
+
- Policy Number: {meta.get('policy_number', 'N/A')}
|
| 483 |
+
- Insured: {meta.get('insured_name', 'N/A')}
|
| 484 |
+
- Insurer: {meta.get('insurer_name', 'N/A')}
|
| 485 |
+
- Policy Type: {meta.get('policy_type', 'N/A')}
|
| 486 |
+
- Industry: {meta.get('industry', 'N/A')}
|
| 487 |
+
- Sum Insured: {meta.get('sum_insured', 'N/A')}
|
| 488 |
+
- Premium: {meta.get('premium_amount', 'N/A')}
|
| 489 |
+
- Start Date: {meta.get('policy_start_date', 'N/A')}
|
| 490 |
+
- End Date: {meta.get('policy_end_date', 'N/A')}
|
| 491 |
+
- Renewal Date: {meta.get('renewal_date', 'N/A')}
|
| 492 |
+
- Location: {meta.get('city', '')}, {meta.get('state', '')}
|
| 493 |
+
- Is Manufacturing: {meta.get('is_manufacturing', False)}
|
| 494 |
+
Summary: {summary[:300] if summary else 'No summary available'}
|
| 495 |
+
"""
|
| 496 |
+
context_parts.append(entry.strip())
|
| 497 |
+
|
| 498 |
+
context = '\n'.join(context_parts)
|
| 499 |
+
print(f"[AGGREGATE] Context length: {len(context)} characters")
|
| 500 |
+
|
| 501 |
+
return {
|
| 502 |
+
'context': context,
|
| 503 |
+
'metadata': all_metadata,
|
| 504 |
+
'total_documents': len(all_metadata),
|
| 505 |
+
'sources': {m.get('doc_id'): m.get('document_title') for m in all_metadata}
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
def _handle_calculation_query(self, user_id: str, bucket_id: str, query: str) -> dict:
|
| 509 |
+
"""
|
| 510 |
+
Handle calculation queries by getting all metadata and performing math.
|
| 511 |
+
Used for 'total sum insured', 'average premium', etc.
|
| 512 |
+
"""
|
| 513 |
+
print(f"[CALCULATION] Handling calculation query: {query[:50]}...")
|
| 514 |
+
|
| 515 |
+
query_lower = query.lower()
|
| 516 |
+
|
| 517 |
+
# Get all metadata
|
| 518 |
+
all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
|
| 519 |
+
|
| 520 |
+
# Determine what to calculate
|
| 521 |
+
calc_results = {}
|
| 522 |
+
|
| 523 |
+
# Sum insured calculations
|
| 524 |
+
if 'sum insured' in query_lower or 'insured' in query_lower:
|
| 525 |
+
values = [m.get('sum_insured', 0) for m in all_metadata if m.get('sum_insured')]
|
| 526 |
+
calc_results['sum_insured'] = {
|
| 527 |
+
'total': sum(values),
|
| 528 |
+
'count': len(values),
|
| 529 |
+
'average': sum(values) / len(values) if values else 0,
|
| 530 |
+
'max': max(values) if values else 0,
|
| 531 |
+
'min': min(values) if values else 0
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
# Premium calculations
|
| 535 |
+
if 'premium' in query_lower:
|
| 536 |
+
values = [m.get('premium_amount', 0) for m in all_metadata if m.get('premium_amount')]
|
| 537 |
+
calc_results['premium'] = {
|
| 538 |
+
'total': sum(values),
|
| 539 |
+
'count': len(values),
|
| 540 |
+
'average': sum(values) / len(values) if values else 0,
|
| 541 |
+
'max': max(values) if values else 0,
|
| 542 |
+
'min': min(values) if values else 0
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
# Policy count by type
|
| 546 |
+
if 'type' in query_lower or 'policies' in query_lower:
|
| 547 |
+
type_counts = {}
|
| 548 |
+
for m in all_metadata:
|
| 549 |
+
pt = m.get('policy_type', 'unknown')
|
| 550 |
+
type_counts[pt] = type_counts.get(pt, 0) + 1
|
| 551 |
+
calc_results['policy_types'] = type_counts
|
| 552 |
+
|
| 553 |
+
# Build context with calculation results
|
| 554 |
+
context = f"""
|
| 555 |
+
=== CALCULATION RESULTS FOR {len(all_metadata)} DOCUMENTS ===
|
| 556 |
+
|
| 557 |
+
"""
|
| 558 |
+
|
| 559 |
+
if 'sum_insured' in calc_results:
|
| 560 |
+
si = calc_results['sum_insured']
|
| 561 |
+
context += f"""
|
| 562 |
+
## Sum Insured Analysis
|
| 563 |
+
- **Total Sum Insured**: ₹{si['total']:,.2f}
|
| 564 |
+
- **Number of policies with sum insured**: {si['count']}
|
| 565 |
+
- **Average Sum Insured**: ₹{si['average']:,.2f}
|
| 566 |
+
- **Maximum Sum Insured**: ₹{si['max']:,.2f}
|
| 567 |
+
- **Minimum Sum Insured**: ₹{si['min']:,.2f}
|
| 568 |
+
"""
|
| 569 |
+
|
| 570 |
+
if 'premium' in calc_results:
|
| 571 |
+
pm = calc_results['premium']
|
| 572 |
+
context += f"""
|
| 573 |
+
## Premium Analysis
|
| 574 |
+
- **Total Premium**: ₹{pm['total']:,.2f}
|
| 575 |
+
- **Number of policies with premium**: {pm['count']}
|
| 576 |
+
- **Average Premium**: ₹{pm['average']:,.2f}
|
| 577 |
+
- **Maximum Premium**: ₹{pm['max']:,.2f}
|
| 578 |
+
- **Minimum Premium**: ₹{pm['min']:,.2f}
|
| 579 |
+
"""
|
| 580 |
+
|
| 581 |
+
if 'policy_types' in calc_results:
|
| 582 |
+
context += "\n## Policy Types Breakdown\n"
|
| 583 |
+
for pt, count in sorted(calc_results['policy_types'].items(), key=lambda x: -x[1]):
|
| 584 |
+
context += f"- **{pt.title()}**: {count} policies\n"
|
| 585 |
+
|
| 586 |
+
return {
|
| 587 |
+
'context': context,
|
| 588 |
+
'calculations': calc_results,
|
| 589 |
+
'total_documents': len(all_metadata),
|
| 590 |
+
'sources': {m.get('doc_id'): m.get('document_title') for m in all_metadata}
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
def _handle_date_filter_query(self, user_id: str, bucket_id: str, query: str) -> dict:
|
| 594 |
+
"""
|
| 595 |
+
Handle date-based filter queries.
|
| 596 |
+
Used for 'policies renewing in 2026', 'expiring this year', etc.
|
| 597 |
+
"""
|
| 598 |
+
print(f"[DATE FILTER] Handling date query: {query[:50]}...")
|
| 599 |
+
|
| 600 |
+
# Extract year from query
|
| 601 |
+
target_year = date_parser.get_year_from_query(query)
|
| 602 |
+
|
| 603 |
+
# Get all metadata
|
| 604 |
+
all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
|
| 605 |
+
|
| 606 |
+
# Filter by date criteria
|
| 607 |
+
matching_docs = []
|
| 608 |
+
|
| 609 |
+
query_lower = query.lower()
|
| 610 |
+
|
| 611 |
+
for meta in all_metadata:
|
| 612 |
+
matches = False
|
| 613 |
+
|
| 614 |
+
if 'renew' in query_lower and target_year:
|
| 615 |
+
renewal_year = meta.get('renewal_year', 0)
|
| 616 |
+
# Also check end date
|
| 617 |
+
if not renewal_year and meta.get('policy_end_date'):
|
| 618 |
+
end_date = date_parser.parse_date(meta.get('policy_end_date'))
|
| 619 |
+
if end_date:
|
| 620 |
+
renewal_year = end_date.year
|
| 621 |
+
|
| 622 |
+
if renewal_year == target_year:
|
| 623 |
+
matches = True
|
| 624 |
+
|
| 625 |
+
elif 'expir' in query_lower and target_year:
|
| 626 |
+
end_date_str = meta.get('policy_end_date', '')
|
| 627 |
+
if end_date_str:
|
| 628 |
+
end_date = date_parser.parse_date(end_date_str)
|
| 629 |
+
if end_date and end_date.year == target_year:
|
| 630 |
+
matches = True
|
| 631 |
+
|
| 632 |
+
elif 'start' in query_lower and target_year:
|
| 633 |
+
start_date_str = meta.get('policy_start_date', '')
|
| 634 |
+
if start_date_str:
|
| 635 |
+
start_date = date_parser.parse_date(start_date_str)
|
| 636 |
+
if start_date and start_date.year == target_year:
|
| 637 |
+
matches = True
|
| 638 |
+
|
| 639 |
+
if matches:
|
| 640 |
+
matching_docs.append(meta)
|
| 641 |
+
|
| 642 |
+
print(f"[DATE FILTER] Found {len(matching_docs)} documents matching year {target_year}")
|
| 643 |
+
|
| 644 |
+
# Build context from matching documents
|
| 645 |
+
context_parts = []
|
| 646 |
+
context_parts.append(f"=== POLICIES MATCHING DATE CRITERIA (Year: {target_year}) ===\n")
|
| 647 |
+
context_parts.append(f"Found {len(matching_docs)} policies:\n")
|
| 648 |
+
|
| 649 |
+
for i, meta in enumerate(matching_docs, 1):
|
| 650 |
+
entry = f"""
|
| 651 |
+
{i}. **{meta.get('document_title', 'Unknown')}**
|
| 652 |
+
- Insured: {meta.get('insured_name', 'N/A')}
|
| 653 |
+
- Policy Type: {meta.get('policy_type', 'N/A')}
|
| 654 |
+
- Start: {meta.get('policy_start_date', 'N/A')}
|
| 655 |
+
- End: {meta.get('policy_end_date', 'N/A')}
|
| 656 |
+
- Renewal: {meta.get('renewal_date', 'N/A')}
|
| 657 |
+
- Sum Insured: {meta.get('sum_insured', 'N/A')}
|
| 658 |
+
"""
|
| 659 |
+
context_parts.append(entry)
|
| 660 |
+
|
| 661 |
+
return {
|
| 662 |
+
'context': '\n'.join(context_parts),
|
| 663 |
+
'matching_documents': matching_docs,
|
| 664 |
+
'target_year': target_year,
|
| 665 |
+
'total_matches': len(matching_docs),
|
| 666 |
+
'sources': {m.get('doc_id'): m.get('document_title') for m in matching_docs}
|
| 667 |
+
}
|
| 668 |
+
|
| 669 |
+
def _handle_metadata_query(self, user_id: str, bucket_id: str,
|
| 670 |
+
query: str, parsed: dict) -> dict:
|
| 671 |
+
"""
|
| 672 |
+
Handle queries using AI-parsed parameters for intelligent filtering and sorting.
|
| 673 |
+
This is the new AI-powered approach that replaces pattern-based routing.
|
| 674 |
+
|
| 675 |
+
Args:
|
| 676 |
+
user_id: User ID
|
| 677 |
+
bucket_id: Bucket ID
|
| 678 |
+
query: Original query text
|
| 679 |
+
parsed: AI-parsed parameters with filters, sort, limit, etc.
|
| 680 |
+
"""
|
| 681 |
+
print(f"[METADATA QUERY] Using AI-parsed parameters: {parsed}")
|
| 682 |
+
|
| 683 |
+
# Get ALL metadata for this bucket
|
| 684 |
+
all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
|
| 685 |
+
total_before_filter = len(all_metadata)
|
| 686 |
+
|
| 687 |
+
print(f"[METADATA QUERY] Starting with {total_before_filter} documents")
|
| 688 |
+
|
| 689 |
+
# Apply AI-extracted filters
|
| 690 |
+
filters = parsed.get('filters', {})
|
| 691 |
+
for field, value in filters.items():
|
| 692 |
+
if value is None or value == '':
|
| 693 |
+
continue
|
| 694 |
+
|
| 695 |
+
if field == 'is_manufacturing' and value:
|
| 696 |
+
all_metadata = [m for m in all_metadata if m.get('is_manufacturing', False)]
|
| 697 |
+
print(f"[METADATA QUERY] Filtered by manufacturing: {len(all_metadata)} remaining")
|
| 698 |
+
|
| 699 |
+
elif field == 'industry':
|
| 700 |
+
all_metadata = [m for m in all_metadata
|
| 701 |
+
if str(value).lower() in str(m.get('industry', '')).lower()]
|
| 702 |
+
print(f"[METADATA QUERY] Filtered by industry '{value}': {len(all_metadata)} remaining")
|
| 703 |
+
|
| 704 |
+
elif field == 'policy_type':
|
| 705 |
+
all_metadata = [m for m in all_metadata
|
| 706 |
+
if str(value).lower() in str(m.get('policy_type', '')).lower()]
|
| 707 |
+
print(f"[METADATA QUERY] Filtered by policy_type '{value}': {len(all_metadata)} remaining")
|
| 708 |
+
|
| 709 |
+
elif field in ['city', 'state', 'insurer_name', 'insured_name', 'broker_name']:
|
| 710 |
+
all_metadata = [m for m in all_metadata
|
| 711 |
+
if str(value).lower() in str(m.get(field, '')).lower()]
|
| 712 |
+
print(f"[METADATA QUERY] Filtered by {field} '{value}': {len(all_metadata)} remaining")
|
| 713 |
+
|
| 714 |
+
elif field == 'renewal_year':
|
| 715 |
+
target_year = int(value) if isinstance(value, (int, str)) else None
|
| 716 |
+
if target_year:
|
| 717 |
+
all_metadata = [m for m in all_metadata if m.get('renewal_year') == target_year]
|
| 718 |
+
print(f"[METADATA QUERY] Filtered by renewal_year {target_year}: {len(all_metadata)} remaining")
|
| 719 |
+
|
| 720 |
+
# Apply AI-extracted sorting
|
| 721 |
+
sort_by = parsed.get('sort_by')
|
| 722 |
+
if sort_by and sort_by in ['premium_amount', 'sum_insured', 'renewal_date', 'policy_start_date']:
|
| 723 |
+
reverse = parsed.get('sort_order', 'desc') == 'desc'
|
| 724 |
+
all_metadata.sort(key=lambda x: x.get(sort_by, 0) or 0, reverse=reverse)
|
| 725 |
+
print(f"[METADATA QUERY] Sorted by {sort_by} {'desc' if reverse else 'asc'}")
|
| 726 |
+
|
| 727 |
+
# Apply AI-extracted limit
|
| 728 |
+
limit = parsed.get('limit')
|
| 729 |
+
if limit and isinstance(limit, int) and limit > 0:
|
| 730 |
+
all_metadata = all_metadata[:limit]
|
| 731 |
+
print(f"[METADATA QUERY] Limited to top {limit}")
|
| 732 |
+
|
| 733 |
+
# Handle calculations
|
| 734 |
+
calc_result = None
|
| 735 |
+
if parsed.get('intent') == 'calculate' or parsed.get('calculation'):
|
| 736 |
+
calc_type = parsed.get('calculation', 'sum')
|
| 737 |
+
calc_field = parsed.get('calculation_field', 'premium_amount')
|
| 738 |
+
values = [m.get(calc_field, 0) or 0 for m in all_metadata]
|
| 739 |
+
|
| 740 |
+
if calc_type == 'sum':
|
| 741 |
+
calc_result = {'type': 'sum', 'field': calc_field, 'value': sum(values)}
|
| 742 |
+
elif calc_type == 'average' and values:
|
| 743 |
+
calc_result = {'type': 'average', 'field': calc_field, 'value': sum(values) / len(values)}
|
| 744 |
+
elif calc_type == 'max' and values:
|
| 745 |
+
calc_result = {'type': 'max', 'field': calc_field, 'value': max(values)}
|
| 746 |
+
elif calc_type == 'min' and values:
|
| 747 |
+
calc_result = {'type': 'min', 'field': calc_field, 'value': min(values)}
|
| 748 |
+
elif calc_type == 'count':
|
| 749 |
+
calc_result = {'type': 'count', 'field': 'documents', 'value': len(all_metadata)}
|
| 750 |
+
|
| 751 |
+
# Handle count intent
|
| 752 |
+
if parsed.get('intent') == 'count' and not calc_result:
|
| 753 |
+
calc_result = {'type': 'count', 'field': 'documents', 'value': len(all_metadata)}
|
| 754 |
+
|
| 755 |
+
# Build context
|
| 756 |
+
context_parts = []
|
| 757 |
+
|
| 758 |
+
# Add calculation result if any
|
| 759 |
+
if calc_result:
|
| 760 |
+
if calc_result['type'] == 'count':
|
| 761 |
+
context_parts.append(f"**Total Count: {calc_result['value']} documents**\n")
|
| 762 |
+
else:
|
| 763 |
+
context_parts.append(f"**{calc_result['type'].title()} of {calc_result['field']}: ₹{calc_result['value']:,.2f}**\n")
|
| 764 |
+
|
| 765 |
+
# Add filtered results summary
|
| 766 |
+
filter_desc = ', '.join(f"{k}={v}" for k, v in filters.items() if v)
|
| 767 |
+
if filter_desc:
|
| 768 |
+
context_parts.append(f"Filtered by: {filter_desc}")
|
| 769 |
+
context_parts.append(f"Showing {len(all_metadata)} of {total_before_filter} total documents\n")
|
| 770 |
+
|
| 771 |
+
# Build document list
|
| 772 |
+
if len(all_metadata) > 0:
|
| 773 |
+
context_parts.append("---\n**Matching Documents:**\n")
|
| 774 |
+
|
| 775 |
+
for i, meta in enumerate(all_metadata, 1):
|
| 776 |
+
# Use rich format for smaller sets, compact for larger
|
| 777 |
+
if len(all_metadata) <= 20:
|
| 778 |
+
entry = f"""
|
| 779 |
+
**{i}. {meta.get('document_title', 'Unknown')}**
|
| 780 |
+
- Insured: {meta.get('insured_name', 'N/A')}
|
| 781 |
+
- Insurer: {meta.get('insurer_name', 'N/A')}
|
| 782 |
+
- Policy Type: {meta.get('policy_type', 'N/A')}
|
| 783 |
+
- Industry: {meta.get('industry', 'N/A')}
|
| 784 |
+
- Sum Insured: ₹{meta.get('sum_insured', 0):,.2f}
|
| 785 |
+
- Premium: ₹{meta.get('premium_amount', 0):,.2f}
|
| 786 |
+
- Renewal: {meta.get('renewal_date', 'N/A')}
|
| 787 |
+
- Location: {meta.get('city', '')}, {meta.get('state', '')}
|
| 788 |
+
"""
|
| 789 |
+
else:
|
| 790 |
+
# Compact format for large sets
|
| 791 |
+
entry = f"{i}. {meta.get('document_title', 'Unknown')} | {meta.get('insured_name', 'N/A')} | ₹{meta.get('premium_amount', 0):,.0f} | {meta.get('policy_type', 'N/A')}"
|
| 792 |
+
|
| 793 |
+
context_parts.append(entry)
|
| 794 |
+
|
| 795 |
+
context = '\n'.join(context_parts)
|
| 796 |
+
print(f"[METADATA QUERY] Final context: {len(context)} chars, {len(all_metadata)} docs")
|
| 797 |
+
|
| 798 |
+
return {
|
| 799 |
+
'context': context,
|
| 800 |
+
'metadata': all_metadata,
|
| 801 |
+
'total_documents': len(all_metadata),
|
| 802 |
+
'total_before_filter': total_before_filter,
|
| 803 |
+
'calculation': calc_result,
|
| 804 |
+
'parsed': parsed,
|
| 805 |
+
'sources': {m.get('doc_id'): m.get('document_title') for m in all_metadata}
|
| 806 |
+
}
|
| 807 |
+
|
| 808 |
+
def _stream_metadata_query(self, user_id: str, bucket_id: str,
|
| 809 |
+
query: str, parsed: dict, chat_id: str = ""):
|
| 810 |
+
"""
|
| 811 |
+
Stream responses for AI-parsed metadata queries.
|
| 812 |
+
Uses intelligent filtering, sorting, and calculations based on AI-extracted parameters.
|
| 813 |
+
|
| 814 |
+
This is the new AI-powered streaming handler that replaces pattern-based routing.
|
| 815 |
+
|
| 816 |
+
Args:
|
| 817 |
+
user_id: User ID
|
| 818 |
+
bucket_id: Bucket ID
|
| 819 |
+
query: Original query text
|
| 820 |
+
parsed: AI-parsed parameters with intent, filters, sort, limit, etc.
|
| 821 |
+
chat_id: Chat session ID for conversation storage
|
| 822 |
+
"""
|
| 823 |
+
print(f"[METADATA STREAM] Handling AI-parsed query: intent={parsed.get('intent')}")
|
| 824 |
+
|
| 825 |
+
# Step 1: Get filtered, sorted, and calculated metadata using AI-parsed parameters
|
| 826 |
+
result = self._handle_metadata_query(user_id, bucket_id, query, parsed)
|
| 827 |
+
|
| 828 |
+
context = result.get('context', '')
|
| 829 |
+
sources = result.get('sources', {})
|
| 830 |
+
total_docs = result.get('total_documents', 0)
|
| 831 |
+
total_before = result.get('total_before_filter', 0)
|
| 832 |
+
calculation = result.get('calculation')
|
| 833 |
+
|
| 834 |
+
# Check if we have any data
|
| 835 |
+
if not context or total_docs == 0:
|
| 836 |
+
yield {
|
| 837 |
+
"type": "error",
|
| 838 |
+
"content": "No document metadata found. Please run the migration script to extract metadata from your documents."
|
| 839 |
+
}
|
| 840 |
+
return
|
| 841 |
+
|
| 842 |
+
# Send sources first
|
| 843 |
+
yield {
|
| 844 |
+
"type": "sources",
|
| 845 |
+
"sources": list(sources.keys()),
|
| 846 |
+
"source_files": list(sources.values())
|
| 847 |
+
}
|
| 848 |
+
|
| 849 |
+
# Step 2: Build AI prompt based on parsed intent
|
| 850 |
+
intent = parsed.get('intent', 'list')
|
| 851 |
+
|
| 852 |
+
if intent == 'count':
|
| 853 |
+
system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COUNT query.
|
| 854 |
+
|
| 855 |
+
CRITICAL INSTRUCTIONS:
|
| 856 |
+
1. The count has been computed: {total_docs} documents match the criteria.
|
| 857 |
+
2. State the count clearly and directly.
|
| 858 |
+
3. If filters were applied, mention what was filtered.
|
| 859 |
+
4. Brief context about what was counted is helpful."""
|
| 860 |
+
|
| 861 |
+
elif intent == 'calculate':
|
| 862 |
+
calc_info = ""
|
| 863 |
+
if calculation:
|
| 864 |
+
calc_info = f"\nPre-computed: {calculation.get('type').title()} of {calculation.get('field')} = ₹{calculation.get('value', 0):,.2f}"
|
| 865 |
+
system_prompt = f"""You are Iribl AI, a document analysis assistant performing CALCULATIONS across {total_docs} documents.
|
| 866 |
+
|
| 867 |
+
CRITICAL INSTRUCTIONS:
|
| 868 |
+
1. The calculation results have been computed from {total_docs} documents.{calc_info}
|
| 869 |
+
2. Present the numbers clearly with proper formatting (₹ for currency, commas for thousands).
|
| 870 |
+
3. Explain what the numbers mean in business context.
|
| 871 |
+
4. Include document counts to show the calculation scope.
|
| 872 |
+
|
| 873 |
+
Present the data accurately - these are pre-computed from actual document metadata."""
|
| 874 |
+
|
| 875 |
+
elif intent == 'rank':
|
| 876 |
+
limit = parsed.get('limit', total_docs)
|
| 877 |
+
sort_by = parsed.get('sort_by', 'premium_amount')
|
| 878 |
+
sort_order = parsed.get('sort_order', 'desc')
|
| 879 |
+
system_prompt = f"""You are Iribl AI, a document analysis assistant answering a RANKING query.
|
| 880 |
+
|
| 881 |
+
CRITICAL INSTRUCTIONS:
|
| 882 |
+
1. You have been given the top {limit} documents sorted by {sort_by} ({sort_order}).
|
| 883 |
+
2. Present them as a clear ranked list with the ranking number.
|
| 884 |
+
3. Highlight the key metric ({sort_by}) for each item.
|
| 885 |
+
4. Format nicely with headers, bold for values, and bullet points.
|
| 886 |
+
5. Include all {limit} items - do not truncate."""
|
| 887 |
+
|
| 888 |
+
elif intent == 'compare':
|
| 889 |
+
system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COMPARISON query.
|
| 890 |
+
|
| 891 |
+
CRITICAL INSTRUCTIONS:
|
| 892 |
+
1. You have metadata for {total_docs} relevant documents.
|
| 893 |
+
2. Create a clear comparison highlighting differences and similarities.
|
| 894 |
+
3. Use tables or side-by-side format where helpful.
|
| 895 |
+
4. Focus on the key metrics mentioned in the query.
|
| 896 |
+
5. Be thorough but organized."""
|
| 897 |
+
|
| 898 |
+
else: # list, summarize, or other
|
| 899 |
+
system_prompt = f"""You are Iribl AI, a document analysis assistant. You are answering a query that requires information from {total_docs} documents.
|
| 900 |
+
|
| 901 |
+
CRITICAL INSTRUCTIONS:
|
| 902 |
+
1. You have been given metadata for {total_docs} documents (from {total_before} total).
|
| 903 |
+
2. Your answer must be COMPREHENSIVE - include ALL relevant items from the data provided.
|
| 904 |
+
3. Format your response clearly with headers, bullet points, and bold text.
|
| 905 |
+
4. For "list" queries, actually list ALL matching items with key details.
|
| 906 |
+
5. Organize information logically (by type, by company, by date, etc.).
|
| 907 |
+
6. For "summarize" queries, provide a concise overview with key statistics.
|
| 908 |
+
|
| 909 |
+
Do NOT say information is missing - you have the filtered list. Do NOT ask for more documents."""
|
| 910 |
+
|
| 911 |
+
# Step 3: Build messages
|
| 912 |
+
messages = [{"role": "system", "content": system_prompt}]
|
| 913 |
+
|
| 914 |
+
user_message = f"""Based on the following document metadata and any calculations, answer my question.
|
| 915 |
+
|
| 916 |
+
DOCUMENT DATA:
|
| 917 |
+
{context}
|
| 918 |
+
|
| 919 |
+
QUESTION: {query}
|
| 920 |
+
|
| 921 |
+
Instructions: Provide a complete, well-formatted answer based on ALL the data above."""
|
| 922 |
+
|
| 923 |
+
messages.append({"role": "user", "content": user_message})
|
| 924 |
+
|
| 925 |
+
# Step 4: Stream response using DeepSeek or fallback
|
| 926 |
+
full_response = ""
|
| 927 |
+
chunk_count = 0
|
| 928 |
+
|
| 929 |
+
if self.use_deepseek:
|
| 930 |
+
print("[METADATA STREAM] Using DeepSeek for response")
|
| 931 |
+
for chunk in self._call_deepseek_streaming(messages):
|
| 932 |
+
if "error" in chunk:
|
| 933 |
+
print(f"[METADATA STREAM] DeepSeek failed, falling back...")
|
| 934 |
+
break
|
| 935 |
+
if "chunk" in chunk:
|
| 936 |
+
full_response += chunk["chunk"]
|
| 937 |
+
chunk_count += 1
|
| 938 |
+
if chunk_count <= 3:
|
| 939 |
+
print(f"[METADATA YIELD] Chunk {chunk_count}: {chunk['chunk'][:30]}...")
|
| 940 |
+
yield {"type": "content", "content": chunk["chunk"]}
|
| 941 |
+
|
| 942 |
+
print(f"[METADATA STREAM] DeepSeek streaming done, yielded {chunk_count} chunks")
|
| 943 |
+
|
| 944 |
+
# Fallback to OpenRouter if DeepSeek failed or not available
|
| 945 |
+
if not full_response:
|
| 946 |
+
print("[METADATA STREAM] Falling back to OpenRouter")
|
| 947 |
+
for model_key in self.fallback_order:
|
| 948 |
+
try:
|
| 949 |
+
for chunk in self._call_ai_model_streaming(model_key, messages):
|
| 950 |
+
if "error" in chunk:
|
| 951 |
+
continue
|
| 952 |
+
if "chunk" in chunk:
|
| 953 |
+
full_response += chunk["chunk"]
|
| 954 |
+
chunk_count += 1
|
| 955 |
+
yield {"type": "content", "content": chunk["chunk"]}
|
| 956 |
+
if full_response:
|
| 957 |
+
break
|
| 958 |
+
except Exception as e:
|
| 959 |
+
print(f"[METADATA STREAM] Model {model_key} failed: {e}")
|
| 960 |
+
continue
|
| 961 |
+
|
| 962 |
+
# Step 5: Store conversation
|
| 963 |
+
if full_response and chat_id:
|
| 964 |
+
try:
|
| 965 |
+
chroma_service.store_conversation(
|
| 966 |
+
user_id=user_id,
|
| 967 |
+
role="user",
|
| 968 |
+
content=query,
|
| 969 |
+
bucket_id=bucket_id or "",
|
| 970 |
+
chat_id=chat_id
|
| 971 |
+
)
|
| 972 |
+
chroma_service.store_conversation(
|
| 973 |
+
user_id=user_id,
|
| 974 |
+
role="assistant",
|
| 975 |
+
content=full_response,
|
| 976 |
+
bucket_id=bucket_id or "",
|
| 977 |
+
chat_id=chat_id
|
| 978 |
+
)
|
| 979 |
+
except Exception as e:
|
| 980 |
+
print(f"[METADATA STREAM] Failed to store conversation: {e}")
|
| 981 |
+
|
| 982 |
+
# Send done signal with metadata about the query
|
| 983 |
+
yield {
|
| 984 |
+
"type": "done",
|
| 985 |
+
"query_type": "metadata",
|
| 986 |
+
"intent": parsed.get('intent'),
|
| 987 |
+
"total_documents": total_docs,
|
| 988 |
+
"total_before_filter": total_before
|
| 989 |
+
}
|
| 990 |
+
|
| 991 |
+
def _stream_specialized_query(self, user_id: str, bucket_id: str,
|
| 992 |
+
query: str, query_type: str, chat_id: str = ""):
|
| 993 |
+
"""
|
| 994 |
+
Stream responses for specialized queries (aggregate, calculation, date_filter).
|
| 995 |
+
Uses metadata/summaries instead of top-K chunk retrieval.
|
| 996 |
+
|
| 997 |
+
This preserves the existing flow for specific/comparison/general queries.
|
| 998 |
+
"""
|
| 999 |
+
import time
|
| 1000 |
+
print(f"[SPECIALIZED QUERY] Handling {query_type} query")
|
| 1001 |
+
|
| 1002 |
+
# Step 1: Get context from appropriate handler
|
| 1003 |
+
if query_type == 'aggregate':
|
| 1004 |
+
result = self._handle_aggregate_query(user_id, bucket_id, query)
|
| 1005 |
+
elif query_type == 'calculation':
|
| 1006 |
+
result = self._handle_calculation_query(user_id, bucket_id, query)
|
| 1007 |
+
elif query_type == 'date_filter':
|
| 1008 |
+
result = self._handle_date_filter_query(user_id, bucket_id, query)
|
| 1009 |
+
else:
|
| 1010 |
+
yield {"type": "error", "content": f"Unknown query type: {query_type}"}
|
| 1011 |
+
return
|
| 1012 |
+
|
| 1013 |
+
context = result.get('context', '')
|
| 1014 |
+
sources = result.get('sources', {})
|
| 1015 |
+
total_docs = result.get('total_documents', result.get('total_matches', 0))
|
| 1016 |
+
|
| 1017 |
+
# Check if we have any data
|
| 1018 |
+
if not context or total_docs == 0:
|
| 1019 |
+
yield {
|
| 1020 |
+
"type": "error",
|
| 1021 |
+
"content": "No document metadata found. Please run the migration script to extract metadata from your documents."
|
| 1022 |
+
}
|
| 1023 |
+
return
|
| 1024 |
+
|
| 1025 |
+
# Send sources first
|
| 1026 |
+
yield {
|
| 1027 |
+
"type": "sources",
|
| 1028 |
+
"sources": list(sources.keys()),
|
| 1029 |
+
"source_files": list(sources.values())
|
| 1030 |
+
}
|
| 1031 |
+
|
| 1032 |
+
# Step 2: Build AI prompt for specialized query
|
| 1033 |
+
if query_type == 'aggregate':
|
| 1034 |
+
system_prompt = f"""You are Iribl AI, a document analysis assistant. You are answering an AGGREGATE query that requires information from ALL {total_docs} documents.
|
| 1035 |
+
|
| 1036 |
+
CRITICAL INSTRUCTIONS:
|
| 1037 |
+
1. You have been given metadata and summaries for ALL {total_docs} documents in the bucket.
|
| 1038 |
+
2. Your answer must be COMPREHENSIVE - include ALL relevant items from the data provided.
|
| 1039 |
+
3. Format your response clearly with headers, bullet points, and bold text.
|
| 1040 |
+
4. For "list all" queries, actually list ALL matching items.
|
| 1041 |
+
5. For "how many" queries, give exact counts.
|
| 1042 |
+
6. Organize information logically (by type, by company, by date, etc.).
|
| 1043 |
+
|
| 1044 |
+
Do NOT say information is missing - you have the full list. Do NOT ask for more documents."""
|
| 1045 |
+
|
| 1046 |
+
elif query_type == 'calculation':
|
| 1047 |
+
system_prompt = f"""You are Iribl AI, a document analysis assistant performing CALCULATIONS across {total_docs} documents.
|
| 1048 |
+
|
| 1049 |
+
CRITICAL INSTRUCTIONS:
|
| 1050 |
+
1. The calculation results have already been computed from all documents.
|
| 1051 |
+
2. Present the numbers clearly with proper formatting (₹ for currency, commas for thousands).
|
| 1052 |
+
3. Explain what the numbers mean in business context.
|
| 1053 |
+
4. If asked for totals, provide grand totals.
|
| 1054 |
+
5. If asked for averages, provide averages with context.
|
| 1055 |
+
6. Include document counts to show the calculation scope.
|
| 1056 |
+
|
| 1057 |
+
Present the data accurately - these are pre-computed from actual document metadata."""
|
| 1058 |
+
|
| 1059 |
+
elif query_type == 'date_filter':
|
| 1060 |
+
total_matches = result.get('total_matches', 0)
|
| 1061 |
+
target_year = result.get('target_year', 'N/A')
|
| 1062 |
+
system_prompt = f"""You are Iribl AI, a document analysis assistant answering a DATE-BASED query.
|
| 1063 |
+
|
| 1064 |
+
CRITICAL INSTRUCTIONS:
|
| 1065 |
+
1. You have been given {total_matches} policies matching the date criteria (year {target_year}).
|
| 1066 |
+
2. List ALL matching policies with their relevant dates.
|
| 1067 |
+
3. Format the response as a clear list with key details.
|
| 1068 |
+
4. If no matches found, say so explicitly.
|
| 1069 |
+
5. Include date-relevant details: start date, end date, renewal date.
|
| 1070 |
+
|
| 1071 |
+
Present ALL matching documents - do not truncate the list."""
|
| 1072 |
+
|
| 1073 |
+
# Step 3: Build messages
|
| 1074 |
+
messages = [{"role": "system", "content": system_prompt}]
|
| 1075 |
+
|
| 1076 |
+
# Add context and query
|
| 1077 |
+
user_message = f"""Based on the following document metadata and calculations, answer my question.
|
| 1078 |
+
|
| 1079 |
+
DOCUMENT DATA:
|
| 1080 |
+
{context}
|
| 1081 |
+
|
| 1082 |
+
QUESTION: {query}
|
| 1083 |
+
|
| 1084 |
+
Instructions: Provide a complete, well-formatted answer based on ALL the data above."""
|
| 1085 |
+
|
| 1086 |
+
messages.append({"role": "user", "content": user_message})
|
| 1087 |
+
|
| 1088 |
+
# Step 4: Stream response using DeepSeek or fallback
|
| 1089 |
+
full_response = ""
|
| 1090 |
+
chunk_count = 0
|
| 1091 |
+
|
| 1092 |
+
if self.use_deepseek:
|
| 1093 |
+
print("[SPECIALIZED QUERY] Using DeepSeek for response")
|
| 1094 |
+
for chunk in self._call_deepseek_streaming(messages):
|
| 1095 |
+
if "error" in chunk:
|
| 1096 |
+
# Fallback to OpenRouter
|
| 1097 |
+
print(f"[SPECIALIZED QUERY] DeepSeek failed, falling back...")
|
| 1098 |
+
break
|
| 1099 |
+
if "chunk" in chunk:
|
| 1100 |
+
full_response += chunk["chunk"]
|
| 1101 |
+
chunk_count += 1
|
| 1102 |
+
if chunk_count <= 3:
|
| 1103 |
+
print(f"[SPECIALIZED YIELD] Chunk {chunk_count}: {chunk['chunk'][:30]}...")
|
| 1104 |
+
yield {"type": "content", "content": chunk["chunk"]}
|
| 1105 |
+
|
| 1106 |
+
print(f"[SPECIALIZED QUERY] DeepSeek streaming done, yielded {chunk_count} chunks")
|
| 1107 |
+
|
| 1108 |
+
# Fallback to OpenRouter if DeepSeek failed or not available
|
| 1109 |
+
if not full_response:
|
| 1110 |
+
print("[SPECIALIZED QUERY] Falling back to OpenRouter")
|
| 1111 |
+
for model_key in self.fallback_order:
|
| 1112 |
+
try:
|
| 1113 |
+
for chunk in self._call_ai_model_streaming(model_key, messages):
|
| 1114 |
+
if "error" in chunk:
|
| 1115 |
+
continue
|
| 1116 |
+
if "chunk" in chunk:
|
| 1117 |
+
full_response += chunk["chunk"]
|
| 1118 |
+
chunk_count += 1
|
| 1119 |
+
yield {"type": "content", "content": chunk["chunk"]}
|
| 1120 |
+
if full_response:
|
| 1121 |
+
break
|
| 1122 |
+
except Exception as e:
|
| 1123 |
+
print(f"[SPECIALIZED QUERY] Model {model_key} failed: {e}")
|
| 1124 |
+
continue
|
| 1125 |
+
|
| 1126 |
+
# Step 5: Store conversation
|
| 1127 |
+
if full_response and chat_id:
|
| 1128 |
+
try:
|
| 1129 |
+
chroma_service.store_conversation(
|
| 1130 |
+
user_id=user_id,
|
| 1131 |
+
role="user",
|
| 1132 |
+
content=query,
|
| 1133 |
+
bucket_id=bucket_id or "",
|
| 1134 |
+
chat_id=chat_id
|
| 1135 |
+
)
|
| 1136 |
+
chroma_service.store_conversation(
|
| 1137 |
+
user_id=user_id,
|
| 1138 |
+
role="assistant",
|
| 1139 |
+
content=full_response,
|
| 1140 |
+
bucket_id=bucket_id or "",
|
| 1141 |
+
chat_id=chat_id
|
| 1142 |
+
)
|
| 1143 |
+
except Exception as e:
|
| 1144 |
+
print(f"[SPECIALIZED QUERY] Failed to store conversation: {e}")
|
| 1145 |
+
|
| 1146 |
+
# Send done signal
|
| 1147 |
+
yield {"type": "done", "query_type": query_type, "total_documents": total_docs}
|
| 1148 |
+
|
| 1149 |
+
def _build_conversation_context(self, history: list[dict], query: str) -> str:
|
| 1150 |
+
"""
|
| 1151 |
+
Build a context summary from conversation history for pronoun resolution.
|
| 1152 |
+
Extracts key entities and topics from recent messages.
|
| 1153 |
+
"""
|
| 1154 |
+
if not history:
|
| 1155 |
+
return ""
|
| 1156 |
+
|
| 1157 |
+
# Get last 4 messages (2 Q&A pairs)
|
| 1158 |
+
recent = history[-4:] if len(history) >= 4 else history
|
| 1159 |
+
|
| 1160 |
+
context_parts = []
|
| 1161 |
+
for msg in recent:
|
| 1162 |
+
role = msg.get('role', 'user')
|
| 1163 |
+
content = msg.get('content', '')[:500] # Truncate long messages
|
| 1164 |
+
|
| 1165 |
+
if role == 'user':
|
| 1166 |
+
context_parts.append(f"User asked: {content}")
|
| 1167 |
+
else:
|
| 1168 |
+
# Extract key info from assistant response (first 300 chars)
|
| 1169 |
+
context_parts.append(f"Assistant answered about: {content[:300]}...")
|
| 1170 |
+
|
| 1171 |
+
if context_parts:
|
| 1172 |
+
return "\n".join(context_parts)
|
| 1173 |
+
return ""
|
| 1174 |
+
|
| 1175 |
+
def _build_graph_context(self, chunks: list[dict],
|
| 1176 |
+
user_id: str) -> list[dict]:
|
| 1177 |
+
"""
|
| 1178 |
+
Build graph-based context from retrieved chunks
|
| 1179 |
+
Expands context by including related chunks and document metadata
|
| 1180 |
+
"""
|
| 1181 |
+
enhanced_chunks = []
|
| 1182 |
+
seen_docs = set()
|
| 1183 |
+
|
| 1184 |
+
for chunk in chunks:
|
| 1185 |
+
doc_id = chunk['doc_id']
|
| 1186 |
+
|
| 1187 |
+
# Get document metadata if not seen
|
| 1188 |
+
if doc_id not in seen_docs:
|
| 1189 |
+
seen_docs.add(doc_id)
|
| 1190 |
+
|
| 1191 |
+
# Get adjacent chunks for context
|
| 1192 |
+
all_doc_chunks = chroma_service.get_document_chunks(doc_id)
|
| 1193 |
+
|
| 1194 |
+
# Find current chunk index
|
| 1195 |
+
chunk_id = chunk['chunk_id']
|
| 1196 |
+
current_idx = None
|
| 1197 |
+
|
| 1198 |
+
for i, c in enumerate(all_doc_chunks):
|
| 1199 |
+
if c['chunk_id'] == chunk_id:
|
| 1200 |
+
current_idx = i
|
| 1201 |
+
break
|
| 1202 |
+
|
| 1203 |
+
# Include surrounding chunks for graph context
|
| 1204 |
+
if current_idx is not None:
|
| 1205 |
+
start_idx = max(0, current_idx - 1)
|
| 1206 |
+
end_idx = min(len(all_doc_chunks), current_idx + 2)
|
| 1207 |
+
|
| 1208 |
+
for i in range(start_idx, end_idx):
|
| 1209 |
+
if all_doc_chunks[i]['chunk_id'] != chunk_id:
|
| 1210 |
+
enhanced_chunks.append({
|
| 1211 |
+
**all_doc_chunks[i],
|
| 1212 |
+
'doc_id': doc_id,
|
| 1213 |
+
'is_context': True
|
| 1214 |
+
})
|
| 1215 |
+
|
| 1216 |
+
enhanced_chunks.append({**chunk, 'is_context': False})
|
| 1217 |
+
|
| 1218 |
+
return enhanced_chunks
|
| 1219 |
+
|
| 1220 |
+
def _call_ai_model(self, model_key: str, messages: list[dict]) -> dict:
|
| 1221 |
+
"""Call AI model via OpenRouter"""
|
| 1222 |
+
model_id = self.model_map.get(model_key)
|
| 1223 |
+
if not model_id:
|
| 1224 |
+
return {"success": False, "error": f"Unknown model: {model_key}"}
|
| 1225 |
+
|
| 1226 |
+
headers = {
|
| 1227 |
+
"Authorization": f"Bearer {self.api_key}",
|
| 1228 |
+
"Content-Type": "application/json",
|
| 1229 |
+
"HTTP-Referer": "http://localhost:5000",
|
| 1230 |
+
"X-Title": "NotebookLM Clone"
|
| 1231 |
+
}
|
| 1232 |
+
|
| 1233 |
+
payload = {
|
| 1234 |
+
"model": model_id,
|
| 1235 |
+
"messages": messages,
|
| 1236 |
+
"max_tokens": self.max_tokens,
|
| 1237 |
+
"temperature": self.temperature
|
| 1238 |
+
}
|
| 1239 |
+
|
| 1240 |
+
try:
|
| 1241 |
+
response = requests.post(
|
| 1242 |
+
f"{self.base_url}/chat/completions",
|
| 1243 |
+
headers=headers,
|
| 1244 |
+
json=payload,
|
| 1245 |
+
timeout=self.timeout
|
| 1246 |
+
)
|
| 1247 |
+
|
| 1248 |
+
if response.status_code == 200:
|
| 1249 |
+
data = response.json()
|
| 1250 |
+
text = data['choices'][0]['message']['content']
|
| 1251 |
+
return {"success": True, "response": text, "model": model_key}
|
| 1252 |
+
else:
|
| 1253 |
+
return {
|
| 1254 |
+
"success": False,
|
| 1255 |
+
"error": f"API error: {response.status_code}"
|
| 1256 |
+
}
|
| 1257 |
+
except Exception as e:
|
| 1258 |
+
return {"success": False, "error": str(e)}
|
| 1259 |
+
|
| 1260 |
+
def _call_ai_model_streaming(self, model_key: str, messages: list[dict]):
|
| 1261 |
+
"""Call AI model with streaming - yields text chunks as they arrive"""
|
| 1262 |
+
model_id = self.model_map.get(model_key)
|
| 1263 |
+
if not model_id:
|
| 1264 |
+
yield {"error": f"Unknown model: {model_key}"}
|
| 1265 |
+
return
|
| 1266 |
+
|
| 1267 |
+
headers = {
|
| 1268 |
+
"Authorization": f"Bearer {self.api_key}",
|
| 1269 |
+
"Content-Type": "application/json",
|
| 1270 |
+
"HTTP-Referer": "http://localhost:5000",
|
| 1271 |
+
"X-Title": "NotebookLM Clone"
|
| 1272 |
+
}
|
| 1273 |
+
|
| 1274 |
+
payload = {
|
| 1275 |
+
"model": model_id,
|
| 1276 |
+
"messages": messages,
|
| 1277 |
+
"max_tokens": self.max_tokens,
|
| 1278 |
+
"temperature": self.temperature,
|
| 1279 |
+
"stream": True
|
| 1280 |
+
}
|
| 1281 |
+
|
| 1282 |
+
try:
|
| 1283 |
+
response = requests.post(
|
| 1284 |
+
f"{self.base_url}/chat/completions",
|
| 1285 |
+
headers=headers,
|
| 1286 |
+
json=payload,
|
| 1287 |
+
timeout=self.timeout,
|
| 1288 |
+
stream=True
|
| 1289 |
+
)
|
| 1290 |
+
|
| 1291 |
+
if response.status_code == 200:
|
| 1292 |
+
for line in response.iter_lines():
|
| 1293 |
+
if line:
|
| 1294 |
+
line_text = line.decode('utf-8')
|
| 1295 |
+
if line_text.startswith('data: '):
|
| 1296 |
+
data_str = line_text[6:]
|
| 1297 |
+
if data_str.strip() == '[DONE]':
|
| 1298 |
+
break
|
| 1299 |
+
try:
|
| 1300 |
+
import json
|
| 1301 |
+
data = json.loads(data_str)
|
| 1302 |
+
delta = data.get('choices', [{}])[0].get('delta', {})
|
| 1303 |
+
content = delta.get('content', '')
|
| 1304 |
+
if content:
|
| 1305 |
+
yield {"chunk": content, "model": model_key}
|
| 1306 |
+
except:
|
| 1307 |
+
pass
|
| 1308 |
+
else:
|
| 1309 |
+
yield {"error": f"API error: {response.status_code}"}
|
| 1310 |
+
except Exception as e:
|
| 1311 |
+
yield {"error": str(e)}
|
| 1312 |
+
|
| 1313 |
+
def _call_deepseek_streaming(self, messages: list[dict]):
|
| 1314 |
+
"""Call DeepSeek API with streaming - highly capable model"""
|
| 1315 |
+
if not self.deepseek_api_key:
|
| 1316 |
+
print("[DEEPSEEK] No API key configured")
|
| 1317 |
+
yield {"error": "DeepSeek API key not configured"}
|
| 1318 |
+
return
|
| 1319 |
+
|
| 1320 |
+
print(f"[DEEPSEEK] Calling model: {self.deepseek_model}")
|
| 1321 |
+
|
| 1322 |
+
headers = {
|
| 1323 |
+
"Authorization": f"Bearer {self.deepseek_api_key}",
|
| 1324 |
+
"Content-Type": "application/json"
|
| 1325 |
+
}
|
| 1326 |
+
|
| 1327 |
+
payload = {
|
| 1328 |
+
"model": self.deepseek_model,
|
| 1329 |
+
"messages": messages,
|
| 1330 |
+
"max_tokens": self.max_tokens,
|
| 1331 |
+
"temperature": self.temperature,
|
| 1332 |
+
"stream": True
|
| 1333 |
+
}
|
| 1334 |
+
|
| 1335 |
+
try:
|
| 1336 |
+
import time
|
| 1337 |
+
start = time.time()
|
| 1338 |
+
response = requests.post(
|
| 1339 |
+
f"{self.deepseek_base_url}/chat/completions",
|
| 1340 |
+
headers=headers,
|
| 1341 |
+
json=payload,
|
| 1342 |
+
timeout=60, # DeepSeek may need more time for complex queries
|
| 1343 |
+
stream=True
|
| 1344 |
+
)
|
| 1345 |
+
|
| 1346 |
+
print(f"[DEEPSEEK] Response status: {response.status_code} in {time.time()-start:.2f}s")
|
| 1347 |
+
|
| 1348 |
+
if response.status_code == 200:
|
| 1349 |
+
chunk_count = 0
|
| 1350 |
+
for line in response.iter_lines():
|
| 1351 |
+
if line:
|
| 1352 |
+
line_text = line.decode('utf-8')
|
| 1353 |
+
if line_text.startswith('data: '):
|
| 1354 |
+
data_str = line_text[6:]
|
| 1355 |
+
if data_str.strip() == '[DONE]':
|
| 1356 |
+
print(f"[DEEPSEEK] Streaming complete, yielded {chunk_count} chunks")
|
| 1357 |
+
break
|
| 1358 |
+
try:
|
| 1359 |
+
import json
|
| 1360 |
+
data = json.loads(data_str)
|
| 1361 |
+
delta = data.get('choices', [{}])[0].get('delta', {})
|
| 1362 |
+
content = delta.get('content', '')
|
| 1363 |
+
if content:
|
| 1364 |
+
chunk_count += 1
|
| 1365 |
+
if chunk_count <= 3:
|
| 1366 |
+
print(f"[DEEPSEEK] Chunk {chunk_count}: {content[:50]}...")
|
| 1367 |
+
yield {"chunk": content, "model": "deepseek"}
|
| 1368 |
+
except Exception as parse_error:
|
| 1369 |
+
print(f"[DEEPSEEK] Parse error: {parse_error}")
|
| 1370 |
+
pass
|
| 1371 |
+
if chunk_count == 0:
|
| 1372 |
+
print(f"[DEEPSEEK] WARNING: No chunks received from stream")
|
| 1373 |
+
else:
|
| 1374 |
+
print(f"[DEEPSEEK] Error: {response.text[:200]}")
|
| 1375 |
+
yield {"error": f"DeepSeek API error: {response.status_code}"}
|
| 1376 |
+
except Exception as e:
|
| 1377 |
+
print(f"[DEEPSEEK] Exception: {e}")
|
| 1378 |
+
yield {"error": str(e)}
|
| 1379 |
+
|
| 1380 |
+
def query(self, user_id: str, query: str,
|
| 1381 |
+
doc_ids: list[str] = None,
|
| 1382 |
+
bucket_id: str = None,
|
| 1383 |
+
conversation_history: list[dict] = None) -> dict:
|
| 1384 |
+
"""
|
| 1385 |
+
Process a RAG query:
|
| 1386 |
+
1. Search for relevant chunks (optionally filtered by bucket)
|
| 1387 |
+
2. Filter by relevance threshold
|
| 1388 |
+
3. Build graph context
|
| 1389 |
+
4. Load persistent conversation memory
|
| 1390 |
+
5. Generate AI response
|
| 1391 |
+
6. Store conversation in memory
|
| 1392 |
+
"""
|
| 1393 |
+
# Step 1: Retrieve relevant chunks
|
| 1394 |
+
chunks = chroma_service.search_chunks(
|
| 1395 |
+
user_id=user_id,
|
| 1396 |
+
query=query,
|
| 1397 |
+
doc_ids=doc_ids,
|
| 1398 |
+
bucket_id=bucket_id,
|
| 1399 |
+
top_k=self.top_k
|
| 1400 |
+
)
|
| 1401 |
+
|
| 1402 |
+
# Step 2: Filter chunks by relevance threshold (lower distance = more relevant)
|
| 1403 |
+
# If threshold filters everything, use original chunks
|
| 1404 |
+
relevant_chunks = [
|
| 1405 |
+
chunk for chunk in chunks
|
| 1406 |
+
if chunk.get('distance', 0) < self.relevance_threshold
|
| 1407 |
+
]
|
| 1408 |
+
|
| 1409 |
+
# Fallback: if threshold is too strict, use top chunks anyway
|
| 1410 |
+
if not relevant_chunks and chunks:
|
| 1411 |
+
relevant_chunks = chunks[:5] # Use top 5 most relevant
|
| 1412 |
+
|
| 1413 |
+
if not relevant_chunks:
|
| 1414 |
+
# Store user question even if no answer
|
| 1415 |
+
chroma_service.store_conversation(
|
| 1416 |
+
user_id=user_id,
|
| 1417 |
+
role="user",
|
| 1418 |
+
content=query,
|
| 1419 |
+
bucket_id=bucket_id or ""
|
| 1420 |
+
)
|
| 1421 |
+
no_info_response = "I don't have any relevant information in your documents to answer this question. Please upload some documents first or ask about a topic covered in your uploaded documents."
|
| 1422 |
+
chroma_service.store_conversation(
|
| 1423 |
+
user_id=user_id,
|
| 1424 |
+
role="assistant",
|
| 1425 |
+
content=no_info_response,
|
| 1426 |
+
bucket_id=bucket_id or ""
|
| 1427 |
+
)
|
| 1428 |
+
return {
|
| 1429 |
+
"success": True,
|
| 1430 |
+
"response": no_info_response,
|
| 1431 |
+
"sources": []
|
| 1432 |
+
}
|
| 1433 |
+
|
| 1434 |
+
# Step 3: Skip graph expansion for speed - use chunks directly
|
| 1435 |
+
enhanced_chunks = [{'doc_id': c['doc_id'], 'text': c['text'], 'is_context': False} for c in relevant_chunks]
|
| 1436 |
+
|
| 1437 |
+
# Step 4: Prepare context for AI with document sources
|
| 1438 |
+
context_parts = []
|
| 1439 |
+
sources = {} # doc_id -> filename mapping
|
| 1440 |
+
|
| 1441 |
+
for chunk in enhanced_chunks:
|
| 1442 |
+
doc_id = chunk['doc_id']
|
| 1443 |
+
# Get document filename for source attribution
|
| 1444 |
+
if doc_id not in sources:
|
| 1445 |
+
doc_info = chroma_service.get_document(doc_id, user_id)
|
| 1446 |
+
sources[doc_id] = doc_info['filename'] if doc_info else doc_id
|
| 1447 |
+
|
| 1448 |
+
# Include source in context for better attribution
|
| 1449 |
+
source_label = f"[Source: {sources[doc_id]}]"
|
| 1450 |
+
context_parts.append(f"{source_label}\n{chunk['text']}")
|
| 1451 |
+
|
| 1452 |
+
context = "\n\n---\n\n".join(context_parts)
|
| 1453 |
+
|
| 1454 |
+
# Step 5: Build messages with cross-document intelligence prompt
|
| 1455 |
+
system_prompt = """You are Iribl AI, a document analysis assistant. You MUST follow these rules strictly:
|
| 1456 |
+
|
| 1457 |
+
**CROSS-DOCUMENT INTELLIGENCE (CRITICAL):**
|
| 1458 |
+
1. SYNTHESIZE information from ALL relevant document sections
|
| 1459 |
+
2. If documents have CONFLICTING information, state both clearly
|
| 1460 |
+
3. Never confuse or mix up information between different documents
|
| 1461 |
+
|
| 1462 |
+
**ACCURACY RULES:**
|
| 1463 |
+
1. ONLY answer using information from the DOCUMENT CONTEXT provided below
|
| 1464 |
+
2. NEVER use external knowledge, training data, or make assumptions
|
| 1465 |
+
3. If the answer is NOT in the documents, say: "This information is not found in your documents."
|
| 1466 |
+
|
| 1467 |
+
**FORMATTING:**
|
| 1468 |
+
- Use **bold** for key terms and important values
|
| 1469 |
+
- Use headers (##) for multi-topic answers
|
| 1470 |
+
- Use bullet points with each item on its own line
|
| 1471 |
+
- For tables, use proper markdown: | col | col | with |---| separator
|
| 1472 |
+
|
| 1473 |
+
**RESPONSE LENGTH:**
|
| 1474 |
+
- Simple questions: 2-4 sentences
|
| 1475 |
+
- Lists: Complete list from ALL documents
|
| 1476 |
+
- Analysis: Structured response with headers
|
| 1477 |
+
|
| 1478 |
+
**IMPORTANT: Do NOT list document names or sources at the end of your response.**
|
| 1479 |
+
|
| 1480 |
+
You are answering questions about the user's uploaded documents ONLY."""
|
| 1481 |
+
|
| 1482 |
+
messages = [{"role": "system", "content": system_prompt}]
|
| 1483 |
+
|
| 1484 |
+
# Step 6: Load minimal conversation history for speed
|
| 1485 |
+
stored_history = chroma_service.get_conversation_history(
|
| 1486 |
+
user_id=user_id,
|
| 1487 |
+
bucket_id=bucket_id,
|
| 1488 |
+
limit=self.max_history
|
| 1489 |
+
)
|
| 1490 |
+
|
| 1491 |
+
# Add only last 4 messages for context (speed optimized)
|
| 1492 |
+
for msg in stored_history[-4:]:
|
| 1493 |
+
messages.append({
|
| 1494 |
+
"role": msg['role'],
|
| 1495 |
+
"content": msg['content']
|
| 1496 |
+
})
|
| 1497 |
+
|
| 1498 |
+
# Also add any session-based conversation history if provided
|
| 1499 |
+
if conversation_history:
|
| 1500 |
+
for msg in conversation_history[-6:]:
|
| 1501 |
+
# Avoid duplicates
|
| 1502 |
+
if msg not in messages:
|
| 1503 |
+
messages.append(msg)
|
| 1504 |
+
|
| 1505 |
+
# Add current query with context
|
| 1506 |
+
user_message = f"""Based on the following document sections, answer my question accurately.
|
| 1507 |
+
|
| 1508 |
+
DOCUMENT SECTIONS:
|
| 1509 |
+
{context}
|
| 1510 |
+
|
| 1511 |
+
QUESTION: {query}
|
| 1512 |
+
|
| 1513 |
+
Instructions: Synthesize from multiple documents if relevant. Be detailed but concise. Do NOT mention document names or sources at the end."""
|
| 1514 |
+
|
| 1515 |
+
messages.append({"role": "user", "content": user_message})
|
| 1516 |
+
|
| 1517 |
+
# Step 7: Generate response with fallback
|
| 1518 |
+
for model_key in self.fallback_order:
|
| 1519 |
+
result = self._call_ai_model(model_key, messages)
|
| 1520 |
+
|
| 1521 |
+
if result['success']:
|
| 1522 |
+
# Step 8: Store conversation in persistent memory
|
| 1523 |
+
chroma_service.store_conversation(
|
| 1524 |
+
user_id=user_id,
|
| 1525 |
+
role="user",
|
| 1526 |
+
content=query,
|
| 1527 |
+
bucket_id=bucket_id or ""
|
| 1528 |
+
)
|
| 1529 |
+
chroma_service.store_conversation(
|
| 1530 |
+
user_id=user_id,
|
| 1531 |
+
role="assistant",
|
| 1532 |
+
content=result['response'],
|
| 1533 |
+
bucket_id=bucket_id or ""
|
| 1534 |
+
)
|
| 1535 |
+
|
| 1536 |
+
return {
|
| 1537 |
+
"success": True,
|
| 1538 |
+
"response": result['response'],
|
| 1539 |
+
"model": result['model'],
|
| 1540 |
+
"sources": list(sources.keys()),
|
| 1541 |
+
"source_files": list(sources.values()),
|
| 1542 |
+
"chunks_used": len(enhanced_chunks),
|
| 1543 |
+
"chunks_filtered": len(chunks) - len(relevant_chunks)
|
| 1544 |
+
}
|
| 1545 |
+
|
| 1546 |
+
return {
|
| 1547 |
+
"success": False,
|
| 1548 |
+
"error": "All AI models failed to generate a response"
|
| 1549 |
+
}
|
| 1550 |
+
|
| 1551 |
+
def query_stream(self, user_id: str, query: str,
|
| 1552 |
+
doc_ids: list[str] = None,
|
| 1553 |
+
bucket_id: str = None,
|
| 1554 |
+
chat_id: str = ""):
|
| 1555 |
+
"""
|
| 1556 |
+
Streaming version of query - yields response chunks as they arrive.
|
| 1557 |
+
Returns generator for SSE streaming.
|
| 1558 |
+
|
| 1559 |
+
ENHANCED: Now uses AI-powered query parsing to understand intent, filters, sorting, and limits.
|
| 1560 |
+
Routes to metadata handler for aggregate queries, regular RAG for specific document queries.
|
| 1561 |
+
"""
|
| 1562 |
+
import time
|
| 1563 |
+
|
| 1564 |
+
# Step 0: AI-powered query parsing - understand intent and extract structured parameters
|
| 1565 |
+
parsed = self._parse_query_with_ai(query)
|
| 1566 |
+
print(f"[QUERY ROUTING] AI-parsed query: {parsed}")
|
| 1567 |
+
|
| 1568 |
+
# Route based on AI-parsed intent
|
| 1569 |
+
# needs_metadata = True means query requires aggregate data across all documents
|
| 1570 |
+
if parsed.get('needs_metadata', False):
|
| 1571 |
+
yield from self._stream_metadata_query(user_id, bucket_id, query, parsed, chat_id)
|
| 1572 |
+
return
|
| 1573 |
+
|
| 1574 |
+
# For all other query types (specific, comparison, followup, general),
|
| 1575 |
+
# continue with existing top-K chunk retrieval logic
|
| 1576 |
+
|
| 1577 |
+
# Step 1: Expand query for better retrieval (handles "module 5" -> "module five", etc.)
|
| 1578 |
+
expanded_queries = self._expand_query(query)
|
| 1579 |
+
print(f"[DEBUG] Query expansion: {expanded_queries}")
|
| 1580 |
+
|
| 1581 |
+
# Step 1.5: Detect if user is asking about a specific document by name
|
| 1582 |
+
user_docs = chroma_service.get_user_documents(user_id, bucket_id)
|
| 1583 |
+
referenced_doc_ids = self._detect_document_reference(query, user_docs)
|
| 1584 |
+
if referenced_doc_ids:
|
| 1585 |
+
print(f"[DEBUG] Detected document reference in query: {referenced_doc_ids}")
|
| 1586 |
+
# If user mentioned specific docs, prioritize those but also include others
|
| 1587 |
+
if doc_ids is None:
|
| 1588 |
+
doc_ids = referenced_doc_ids
|
| 1589 |
+
|
| 1590 |
+
|
| 1591 |
+
# Step 2: Retrieve chunks using all query variations and merge unique results
|
| 1592 |
+
t1 = time.time()
|
| 1593 |
+
all_chunks = []
|
| 1594 |
+
seen_chunk_ids = set()
|
| 1595 |
+
|
| 1596 |
+
for q in expanded_queries:
|
| 1597 |
+
chunks = chroma_service.search_chunks(
|
| 1598 |
+
user_id=user_id,
|
| 1599 |
+
query=q,
|
| 1600 |
+
doc_ids=doc_ids,
|
| 1601 |
+
bucket_id=bucket_id,
|
| 1602 |
+
top_k=self.top_k
|
| 1603 |
+
)
|
| 1604 |
+
for chunk in chunks:
|
| 1605 |
+
chunk_id = chunk.get('chunk_id', chunk['text'][:50])
|
| 1606 |
+
if chunk_id not in seen_chunk_ids:
|
| 1607 |
+
seen_chunk_ids.add(chunk_id)
|
| 1608 |
+
all_chunks.append(chunk)
|
| 1609 |
+
|
| 1610 |
+
# Sort by relevance (distance) and limit
|
| 1611 |
+
all_chunks.sort(key=lambda x: x.get('distance', 0))
|
| 1612 |
+
chunks = all_chunks[:self.top_k]
|
| 1613 |
+
|
| 1614 |
+
print(f"[TIMING] ChromaDB search with expansion: {time.time()-t1:.2f}s")
|
| 1615 |
+
|
| 1616 |
+
# Debug: Show what chunks we're getting
|
| 1617 |
+
print(f"[DEBUG] Retrieved {len(chunks)} unique chunks from {len(expanded_queries)} queries:")
|
| 1618 |
+
for i, c in enumerate(chunks[:5]): # Show first 5
|
| 1619 |
+
print(f" Chunk {i+1} (dist={c.get('distance', 0):.3f}): {c['text'][:100]}...")
|
| 1620 |
+
|
| 1621 |
+
# Step 3: Use ALL retrieved chunks - do not filter aggressively
|
| 1622 |
+
# For 64+ documents, we need comprehensive coverage
|
| 1623 |
+
relevant_chunks = chunks # Use all retrieved chunks
|
| 1624 |
+
|
| 1625 |
+
# Only apply minimal filtering if we have way too many chunks
|
| 1626 |
+
if len(relevant_chunks) > 100:
|
| 1627 |
+
# Keep only chunks with reasonable similarity
|
| 1628 |
+
relevant_chunks = [c for c in chunks if c.get('distance', 0) < self.relevance_threshold]
|
| 1629 |
+
if not relevant_chunks:
|
| 1630 |
+
relevant_chunks = chunks[:80] # Fallback to top 80
|
| 1631 |
+
|
| 1632 |
+
if not relevant_chunks:
|
| 1633 |
+
yield {"type": "error", "content": "No relevant documents found. Please upload documents first."}
|
| 1634 |
+
return
|
| 1635 |
+
|
| 1636 |
+
# Step 4: Build context with prominent document source labels for cross-document intelligence
|
| 1637 |
+
t2 = time.time()
|
| 1638 |
+
context_parts = []
|
| 1639 |
+
sources = {}
|
| 1640 |
+
|
| 1641 |
+
for i, chunk in enumerate(relevant_chunks, 1):
|
| 1642 |
+
doc_id = chunk['doc_id']
|
| 1643 |
+
filename = chunk.get('filename', 'Document')
|
| 1644 |
+
|
| 1645 |
+
# Get filename from chroma if not in chunk
|
| 1646 |
+
if filename == 'Document':
|
| 1647 |
+
doc_info = chroma_service.get_document(doc_id, user_id)
|
| 1648 |
+
if doc_info:
|
| 1649 |
+
filename = doc_info.get('filename', 'Document')
|
| 1650 |
+
|
| 1651 |
+
sources[doc_id] = filename
|
| 1652 |
+
# Add prominent document source label with chunk number for cross-document intelligence
|
| 1653 |
+
section = f"=== DOCUMENT: {filename} (Section {i}) ===\n{chunk['text']}"
|
| 1654 |
+
context_parts.append(section)
|
| 1655 |
+
|
| 1656 |
+
context = "\n\n" + "\n\n".join(context_parts)
|
| 1657 |
+
print(f"[TIMING] Context build: {time.time()-t2:.2f}s")
|
| 1658 |
+
print(f"[DEBUG] Context length: {len(context)} chars, chunks: {len(relevant_chunks)}")
|
| 1659 |
+
|
| 1660 |
+
# Send sources first
|
| 1661 |
+
yield {"type": "sources", "sources": list(sources.keys()), "source_files": list(sources.values())}
|
| 1662 |
+
|
| 1663 |
+
# Step 5: Load conversation history for this chat (CRITICAL FOR MEMORY)
|
| 1664 |
+
stored_history = []
|
| 1665 |
+
if chat_id:
|
| 1666 |
+
try:
|
| 1667 |
+
all_history = chroma_service.get_conversation_history(
|
| 1668 |
+
user_id=user_id,
|
| 1669 |
+
bucket_id=bucket_id,
|
| 1670 |
+
limit=50 # Get more, filter by chat_id
|
| 1671 |
+
)
|
| 1672 |
+
# Filter to only this chat's messages
|
| 1673 |
+
stored_history = [msg for msg in all_history
|
| 1674 |
+
if msg.get('chat_id', '') == chat_id or
|
| 1675 |
+
(not msg.get('chat_id') and msg.get('bucket_id', '') == (bucket_id or ''))]
|
| 1676 |
+
stored_history = stored_history[-self.max_history:]
|
| 1677 |
+
print(f"[DEBUG] Loaded {len(stored_history)} history messages for chat {chat_id}")
|
| 1678 |
+
except Exception as e:
|
| 1679 |
+
print(f"[DEBUG] Failed to load history: {e}")
|
| 1680 |
+
|
| 1681 |
+
# Step 6: Detect query type and build conversation context
|
| 1682 |
+
query_type = self._detect_query_type(query, stored_history)
|
| 1683 |
+
conversation_context = self._build_conversation_context(stored_history, query)
|
| 1684 |
+
print(f"[DEBUG] Query type: {query_type}, has conversation context: {bool(conversation_context)}")
|
| 1685 |
+
|
| 1686 |
+
# Get list of documents in bucket for cross-document queries
|
| 1687 |
+
doc_list = ""
|
| 1688 |
+
if query_type in ['cross_document', 'comparison']:
|
| 1689 |
+
doc_names = list(sources.values())
|
| 1690 |
+
if doc_names:
|
| 1691 |
+
doc_list = f"\n\nDOCUMENTS IN THIS BUCKET: {', '.join(set(doc_names))}"
|
| 1692 |
+
|
| 1693 |
+
# Step 7: Build messages with PRODUCTION-GRADE conversational prompt
|
| 1694 |
+
system_prompt = """You are Iribl AI, a smart document assistant. Be conversational, precise, and THOROUGH.
|
| 1695 |
+
|
| 1696 |
+
## FINDING INFORMATION (CRITICAL)
|
| 1697 |
+
1. Search EVERY document section before saying something isn't there
|
| 1698 |
+
2. Look for ALL types of values: per-item amounts, TOTALS, AGGREGATES, counts, numbers of people/items
|
| 1699 |
+
3. Information may be phrased differently - "total sum insured", "aggregate SI", "Sum Insured" could all refer to different values
|
| 1700 |
+
4. When asked about "total" - look for aggregate/overall amounts, not per-unit amounts
|
| 1701 |
+
5. When asked "how many" - look for counts, numbers, quantities in the documents
|
| 1702 |
+
6. NEVER say "not mentioned" unless you've checked every single section and truly cannot find it
|
| 1703 |
+
|
| 1704 |
+
## RESPONSE QUALITY
|
| 1705 |
+
1. NEVER start with preambles like "Based on a thorough review..." - just answer directly
|
| 1706 |
+
2. If user says "it", "this", "that" - refer to previous conversation for context
|
| 1707 |
+
3. Provide COMPLETE answers - include ALL relevant details, numbers, and figures
|
| 1708 |
+
4. When numbers exist - mention BOTH per-unit AND total/aggregate if available
|
| 1709 |
+
5. Format responses clearly with bold, bullets, and structure
|
| 1710 |
+
|
| 1711 |
+
## ACCURACY RULES
|
| 1712 |
+
1. Only answer from the documents provided - never use external knowledge
|
| 1713 |
+
2. When asked about Person A, only give Person A's info - never mix up entities
|
| 1714 |
+
3. If documents conflict, state both versions
|
| 1715 |
+
|
| 1716 |
+
## FORMATTING
|
| 1717 |
+
- **Bold** for names, numbers, key terms
|
| 1718 |
+
- Bullet points for lists (comprehensive, include all items)
|
| 1719 |
+
- Tables for comparisons
|
| 1720 |
+
- No document source lists at the end
|
| 1721 |
+
|
| 1722 |
+
When asked about numbers/totals/counts - SEARCH THOROUGHLY and provide ALL relevant figures found in the documents."""
|
| 1723 |
+
|
| 1724 |
+
messages = [{"role": "system", "content": system_prompt}]
|
| 1725 |
+
|
| 1726 |
+
# Add conversation history for memory (CRITICAL for pronoun resolution)
|
| 1727 |
+
for msg in stored_history:
|
| 1728 |
+
messages.append({
|
| 1729 |
+
"role": msg['role'],
|
| 1730 |
+
"content": msg['content']
|
| 1731 |
+
})
|
| 1732 |
+
|
| 1733 |
+
# Build user message with context injection for pronouns
|
| 1734 |
+
context_injection = ""
|
| 1735 |
+
if query_type == 'followup' and conversation_context:
|
| 1736 |
+
context_injection = f"""
|
| 1737 |
+
CONVERSATION CONTEXT (use this to understand pronouns like "it", "this", "that"):
|
| 1738 |
+
{conversation_context}
|
| 1739 |
+
|
| 1740 |
+
"""
|
| 1741 |
+
|
| 1742 |
+
user_message = f"""{context_injection}DOCUMENT SECTIONS (search ALL of these thoroughly):
|
| 1743 |
+
{context}{doc_list}
|
| 1744 |
+
|
| 1745 |
+
QUESTION: {query}
|
| 1746 |
+
|
| 1747 |
+
INSTRUCTIONS:
|
| 1748 |
+
- Answer directly and completely
|
| 1749 |
+
- Include ALL relevant numbers, totals, counts, and details from the documents
|
| 1750 |
+
- If this is a follow-up, use conversation history to understand what I'm referring to
|
| 1751 |
+
- For number questions: look for per-unit values, totals, aggregates, and counts - include all that are relevant"""
|
| 1752 |
+
|
| 1753 |
+
messages.append({"role": "user", "content": user_message})
|
| 1754 |
+
|
| 1755 |
+
# Step 6: Stream the response - Try DeepSeek first (highly capable), then OpenRouter
|
| 1756 |
+
full_response = ""
|
| 1757 |
+
model_used = None
|
| 1758 |
+
|
| 1759 |
+
# Try DeepSeek first if available
|
| 1760 |
+
if self.use_deepseek:
|
| 1761 |
+
for chunk_data in self._call_deepseek_streaming(messages):
|
| 1762 |
+
if "error" in chunk_data:
|
| 1763 |
+
break # Fall through to OpenRouter
|
| 1764 |
+
if "chunk" in chunk_data:
|
| 1765 |
+
full_response += chunk_data["chunk"]
|
| 1766 |
+
model_used = chunk_data["model"]
|
| 1767 |
+
yield {"type": "chunk", "content": chunk_data["chunk"]}
|
| 1768 |
+
|
| 1769 |
+
# Fallback to OpenRouter if Groq didn't work
|
| 1770 |
+
if not full_response:
|
| 1771 |
+
for model_key in self.fallback_order:
|
| 1772 |
+
had_response = False
|
| 1773 |
+
for chunk_data in self._call_ai_model_streaming(model_key, messages):
|
| 1774 |
+
if "error" in chunk_data:
|
| 1775 |
+
break
|
| 1776 |
+
if "chunk" in chunk_data:
|
| 1777 |
+
had_response = True
|
| 1778 |
+
full_response += chunk_data["chunk"]
|
| 1779 |
+
model_used = chunk_data["model"]
|
| 1780 |
+
yield {"type": "chunk", "content": chunk_data["chunk"]}
|
| 1781 |
+
|
| 1782 |
+
if had_response:
|
| 1783 |
+
break
|
| 1784 |
+
|
| 1785 |
+
if full_response:
|
| 1786 |
+
# Store conversation with chat_id for proper linking
|
| 1787 |
+
chroma_service.store_conversation(user_id, "user", query, bucket_id or "", chat_id)
|
| 1788 |
+
chroma_service.store_conversation(user_id, "assistant", full_response, bucket_id or "", chat_id)
|
| 1789 |
+
yield {"type": "done", "model": model_used}
|
| 1790 |
+
else:
|
| 1791 |
+
yield {"type": "error", "content": "Failed to generate response"}
|
| 1792 |
+
|
| 1793 |
+
def clear_memory(self, user_id: str, bucket_id: str = None) -> bool:
|
| 1794 |
+
"""Clear conversation memory for a user"""
|
| 1795 |
+
return chroma_service.clear_conversation(user_id, bucket_id)
|
| 1796 |
+
|
| 1797 |
+
def generate_summary(self, content: str, filename: str = "") -> dict:
|
| 1798 |
+
"""
|
| 1799 |
+
Generate a short summary (2-3 sentences) of the document content.
|
| 1800 |
+
Uses DeepSeek as primary, with OpenRouter fallback.
|
| 1801 |
+
"""
|
| 1802 |
+
# Truncate content if too long (use first ~4000 chars for summary)
|
| 1803 |
+
truncated_content = content[:4000] if len(content) > 4000 else content
|
| 1804 |
+
|
| 1805 |
+
summary_prompt = f"""Please provide a concise 2-3 sentence summary of the following document.
|
| 1806 |
+
Focus on the main topic, key points, and purpose of the document.
|
| 1807 |
+
Do not include any preamble like "This document..." - just state the summary directly.
|
| 1808 |
+
|
| 1809 |
+
Document: {filename}
|
| 1810 |
+
Content:
|
| 1811 |
+
{truncated_content}
|
| 1812 |
+
|
| 1813 |
+
Summary:"""
|
| 1814 |
+
|
| 1815 |
+
messages = [
|
| 1816 |
+
{"role": "system", "content": "You are a document summarization assistant. Provide brief, accurate summaries in 2-3 sentences."},
|
| 1817 |
+
{"role": "user", "content": summary_prompt}
|
| 1818 |
+
]
|
| 1819 |
+
|
| 1820 |
+
# Try DeepSeek first if available
|
| 1821 |
+
if self.use_deepseek:
|
| 1822 |
+
try:
|
| 1823 |
+
import requests
|
| 1824 |
+
headers = {
|
| 1825 |
+
"Authorization": f"Bearer {self.deepseek_api_key}",
|
| 1826 |
+
"Content-Type": "application/json"
|
| 1827 |
+
}
|
| 1828 |
+
payload = {
|
| 1829 |
+
"model": self.deepseek_model,
|
| 1830 |
+
"messages": messages,
|
| 1831 |
+
"max_tokens": 200,
|
| 1832 |
+
"temperature": 0.3
|
| 1833 |
+
}
|
| 1834 |
+
response = requests.post(
|
| 1835 |
+
f"{self.deepseek_base_url}/chat/completions",
|
| 1836 |
+
headers=headers,
|
| 1837 |
+
json=payload,
|
| 1838 |
+
timeout=30
|
| 1839 |
+
)
|
| 1840 |
+
if response.status_code == 200:
|
| 1841 |
+
data = response.json()
|
| 1842 |
+
text = data['choices'][0]['message']['content']
|
| 1843 |
+
return {
|
| 1844 |
+
"success": True,
|
| 1845 |
+
"summary": text.strip(),
|
| 1846 |
+
"model": "deepseek"
|
| 1847 |
+
}
|
| 1848 |
+
except Exception as e:
|
| 1849 |
+
print(f"[DEEPSEEK SUMMARY] Error: {e}")
|
| 1850 |
+
|
| 1851 |
+
# Fallback to OpenRouter models
|
| 1852 |
+
for model_key in self.fallback_order:
|
| 1853 |
+
result = self._call_ai_model(model_key, messages)
|
| 1854 |
+
if result['success']:
|
| 1855 |
+
return {
|
| 1856 |
+
"success": True,
|
| 1857 |
+
"summary": result['response'].strip(),
|
| 1858 |
+
"model": result['model']
|
| 1859 |
+
}
|
| 1860 |
+
|
| 1861 |
+
return {
|
| 1862 |
+
"success": False,
|
| 1863 |
+
"error": "Failed to generate summary with all models",
|
| 1864 |
+
"summary": f"Document: {filename}" # Fallback summary
|
| 1865 |
+
}
|
| 1866 |
+
|
| 1867 |
+
|
| 1868 |
+
# Singleton instance
|
| 1869 |
+
rag_service = RAGService()
|
| 1870 |
+
|
static/css/styles.css
ADDED
|
@@ -0,0 +1,2567 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* ==================== CSS Variables & Root Styles ==================== */
|
| 2 |
+
:root {
|
| 3 |
+
/* Dark Mode Color Palette */
|
| 4 |
+
--bg-darkest: #0a0a0a;
|
| 5 |
+
--bg-dark: #121212;
|
| 6 |
+
--bg-medium: #1a1a1a;
|
| 7 |
+
--bg-light: #242424;
|
| 8 |
+
--bg-lighter: #2d2d2d;
|
| 9 |
+
--bg-hover: #363636;
|
| 10 |
+
|
| 11 |
+
/* Accent Colors */
|
| 12 |
+
--accent-primary: #ffffff;
|
| 13 |
+
--accent-secondary: #e0e0e0;
|
| 14 |
+
--accent-muted: #888888;
|
| 15 |
+
|
| 16 |
+
/* Glass Effect */
|
| 17 |
+
--glass-bg: rgba(255, 255, 255, 0.03);
|
| 18 |
+
--glass-border: rgba(255, 255, 255, 0.08);
|
| 19 |
+
--glass-shadow: rgba(0, 0, 0, 0.5);
|
| 20 |
+
|
| 21 |
+
/* Text Colors */
|
| 22 |
+
--text-primary: #ffffff;
|
| 23 |
+
--text-secondary: rgba(255, 255, 255, 0.7);
|
| 24 |
+
--text-muted: rgba(255, 255, 255, 0.4);
|
| 25 |
+
|
| 26 |
+
/* Status Colors */
|
| 27 |
+
--success: #4ade80;
|
| 28 |
+
--error: #f87171;
|
| 29 |
+
--info: #60a5fa;
|
| 30 |
+
|
| 31 |
+
/* Spacing */
|
| 32 |
+
--radius-sm: 6px;
|
| 33 |
+
--radius-md: 10px;
|
| 34 |
+
--radius-lg: 16px;
|
| 35 |
+
--radius-xl: 24px;
|
| 36 |
+
|
| 37 |
+
/* Transitions */
|
| 38 |
+
--transition-fast: 0.15s ease;
|
| 39 |
+
--transition-smooth: 0.3s ease;
|
| 40 |
+
--transition-bounce: 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55);
|
| 41 |
+
|
| 42 |
+
/* Sidebar Width */
|
| 43 |
+
--sidebar-width: 300px;
|
| 44 |
+
--sidebar-collapsed: 50px;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
/* ==================== Global Styles ==================== */
|
| 48 |
+
* {
|
| 49 |
+
margin: 0;
|
| 50 |
+
padding: 0;
|
| 51 |
+
box-sizing: border-box;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
html {
|
| 55 |
+
font-size: 16px;
|
| 56 |
+
scroll-behavior: smooth;
|
| 57 |
+
height: 100vh;
|
| 58 |
+
overflow: hidden;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
body {
|
| 62 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 63 |
+
background: var(--bg-darkest);
|
| 64 |
+
color: var(--text-primary);
|
| 65 |
+
height: 100vh;
|
| 66 |
+
overflow: hidden;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
/* ==================== Glass Panels ==================== */
|
| 70 |
+
.glass-panel {
|
| 71 |
+
background: var(--bg-dark);
|
| 72 |
+
border: 1px solid var(--glass-border);
|
| 73 |
+
border-radius: var(--radius-lg);
|
| 74 |
+
box-shadow: 0 4px 20px var(--glass-shadow);
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
/* ==================== App Container ==================== */
|
| 78 |
+
.app-container {
|
| 79 |
+
position: relative;
|
| 80 |
+
z-index: 10;
|
| 81 |
+
height: 100vh;
|
| 82 |
+
display: flex;
|
| 83 |
+
flex-direction: column;
|
| 84 |
+
overflow: hidden;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
/* ==================== Main Layout ==================== */
|
| 88 |
+
.main-content {
|
| 89 |
+
flex: 1;
|
| 90 |
+
display: flex;
|
| 91 |
+
padding: 1rem;
|
| 92 |
+
gap: 1rem;
|
| 93 |
+
height: calc(100vh - 0px);
|
| 94 |
+
/* Full viewport height */
|
| 95 |
+
min-height: 0;
|
| 96 |
+
/* Allow flex children to shrink */
|
| 97 |
+
overflow: hidden;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
/* ==================== Dual Sidebars ==================== */
|
| 101 |
+
.sidebar {
|
| 102 |
+
width: var(--sidebar-width);
|
| 103 |
+
height: 100%;
|
| 104 |
+
/* Fill available height */
|
| 105 |
+
display: flex;
|
| 106 |
+
flex-direction: column;
|
| 107 |
+
flex-shrink: 0;
|
| 108 |
+
position: relative;
|
| 109 |
+
transition: width var(--transition-smooth), opacity var(--transition-smooth);
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.sidebar.collapsed {
|
| 113 |
+
width: var(--sidebar-collapsed);
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
.sidebar.collapsed .sidebar-content {
|
| 117 |
+
opacity: 0;
|
| 118 |
+
pointer-events: none;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.sidebar.collapsed .sidebar-toggle .toggle-icon {
|
| 122 |
+
transform: rotate(180deg);
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.sidebar-content {
|
| 126 |
+
flex: 1;
|
| 127 |
+
display: flex;
|
| 128 |
+
flex-direction: column;
|
| 129 |
+
gap: 0.75rem;
|
| 130 |
+
overflow-y: auto;
|
| 131 |
+
overflow-x: hidden;
|
| 132 |
+
transition: opacity var(--transition-smooth);
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
.sidebar-content::-webkit-scrollbar {
|
| 136 |
+
width: 4px;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
.sidebar-content::-webkit-scrollbar-thumb {
|
| 140 |
+
background: var(--bg-hover);
|
| 141 |
+
border-radius: 2px;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
/* Sidebar Toggle Button */
|
| 145 |
+
.sidebar-toggle {
|
| 146 |
+
position: absolute;
|
| 147 |
+
top: 50%;
|
| 148 |
+
transform: translateY(-50%);
|
| 149 |
+
width: 24px;
|
| 150 |
+
height: 48px;
|
| 151 |
+
background: var(--bg-light);
|
| 152 |
+
border: 1px solid var(--glass-border);
|
| 153 |
+
display: flex;
|
| 154 |
+
align-items: center;
|
| 155 |
+
justify-content: center;
|
| 156 |
+
cursor: pointer;
|
| 157 |
+
z-index: 10;
|
| 158 |
+
transition: all var(--transition-fast);
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
.sidebar-toggle:hover {
|
| 162 |
+
background: var(--bg-hover);
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
.toggle-icon {
|
| 166 |
+
font-size: 0.7rem;
|
| 167 |
+
color: var(--text-muted);
|
| 168 |
+
transition: transform var(--transition-smooth);
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
.sidebar-left .sidebar-toggle {
|
| 172 |
+
right: -12px;
|
| 173 |
+
border-radius: 0 var(--radius-sm) var(--radius-sm) 0;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
.sidebar-right .sidebar-toggle {
|
| 177 |
+
left: -12px;
|
| 178 |
+
border-radius: var(--radius-sm) 0 0 var(--radius-sm);
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
/* ==================== Sidebar Sections ==================== */
|
| 182 |
+
.sidebar-section {
|
| 183 |
+
padding: 1rem;
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
.section-header {
|
| 187 |
+
display: flex;
|
| 188 |
+
align-items: center;
|
| 189 |
+
justify-content: space-between;
|
| 190 |
+
cursor: pointer;
|
| 191 |
+
user-select: none;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
.section-header:hover .collapse-icon {
|
| 195 |
+
color: var(--text-primary);
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
.sidebar-title {
|
| 199 |
+
font-size: 0.75rem;
|
| 200 |
+
font-weight: 600;
|
| 201 |
+
text-transform: uppercase;
|
| 202 |
+
letter-spacing: 0.5px;
|
| 203 |
+
color: var(--text-muted);
|
| 204 |
+
display: flex;
|
| 205 |
+
align-items: center;
|
| 206 |
+
gap: 0.5rem;
|
| 207 |
+
margin: 0;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
.section-actions {
|
| 211 |
+
display: flex;
|
| 212 |
+
align-items: center;
|
| 213 |
+
gap: 0.25rem;
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
.collapse-icon {
|
| 217 |
+
font-size: 0.6rem;
|
| 218 |
+
color: var(--text-muted);
|
| 219 |
+
transition: transform var(--transition-smooth), color var(--transition-fast);
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
.collapsible.collapsed .collapse-icon {
|
| 223 |
+
transform: rotate(-90deg);
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
.section-body {
|
| 227 |
+
margin-top: 0.75rem;
|
| 228 |
+
max-height: 500px;
|
| 229 |
+
overflow: hidden;
|
| 230 |
+
transition: max-height var(--transition-smooth), opacity var(--transition-smooth), margin var(--transition-smooth);
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
.collapsible.collapsed .section-body {
|
| 234 |
+
max-height: 0;
|
| 235 |
+
opacity: 0;
|
| 236 |
+
margin-top: 0;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
/* ==================== User Section ==================== */
|
| 240 |
+
.user-section {
|
| 241 |
+
padding: 0.75rem 1rem !important;
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
.user-info-row {
|
| 245 |
+
display: flex;
|
| 246 |
+
align-items: center;
|
| 247 |
+
justify-content: space-between;
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
.user-badge {
|
| 251 |
+
display: flex;
|
| 252 |
+
align-items: center;
|
| 253 |
+
gap: 0.5rem;
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
.user-avatar {
|
| 257 |
+
width: 32px;
|
| 258 |
+
height: 32px;
|
| 259 |
+
background: var(--bg-hover);
|
| 260 |
+
border-radius: 50%;
|
| 261 |
+
display: flex;
|
| 262 |
+
align-items: center;
|
| 263 |
+
justify-content: center;
|
| 264 |
+
font-weight: 600;
|
| 265 |
+
font-size: 0.85rem;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.user-details {
|
| 269 |
+
display: flex;
|
| 270 |
+
flex-direction: column;
|
| 271 |
+
gap: 0.1rem;
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
.user-details span:first-child {
|
| 275 |
+
font-size: 0.9rem;
|
| 276 |
+
font-weight: 500;
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
.user-role {
|
| 280 |
+
font-size: 0.7rem;
|
| 281 |
+
color: var(--text-muted);
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
/* ==================== Custom Animated Dropdown ==================== */
|
| 285 |
+
.custom-select {
|
| 286 |
+
position: relative;
|
| 287 |
+
width: 100%;
|
| 288 |
+
margin-bottom: 0.75rem;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
.custom-select.compact {
|
| 292 |
+
margin-bottom: 0;
|
| 293 |
+
width: auto;
|
| 294 |
+
min-width: 180px;
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
.select-trigger {
|
| 298 |
+
display: flex;
|
| 299 |
+
align-items: center;
|
| 300 |
+
justify-content: space-between;
|
| 301 |
+
padding: 0.65rem 1rem;
|
| 302 |
+
background: var(--bg-medium);
|
| 303 |
+
border: 1px solid var(--glass-border);
|
| 304 |
+
border-radius: var(--radius-md);
|
| 305 |
+
cursor: pointer;
|
| 306 |
+
transition: all var(--transition-fast);
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
.select-trigger:hover {
|
| 310 |
+
background: var(--bg-light);
|
| 311 |
+
border-color: rgba(255, 255, 255, 0.15);
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
.custom-select.open .select-trigger {
|
| 315 |
+
border-color: rgba(255, 255, 255, 0.2);
|
| 316 |
+
border-radius: var(--radius-md) var(--radius-md) 0 0;
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
.select-value {
|
| 320 |
+
font-size: 0.85rem;
|
| 321 |
+
color: var(--text-secondary);
|
| 322 |
+
white-space: nowrap;
|
| 323 |
+
overflow: hidden;
|
| 324 |
+
text-overflow: ellipsis;
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
.select-arrow {
|
| 328 |
+
font-size: 0.6rem;
|
| 329 |
+
color: var(--text-muted);
|
| 330 |
+
transition: transform var(--transition-smooth);
|
| 331 |
+
margin-left: 0.5rem;
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
.custom-select.open .select-arrow {
|
| 335 |
+
transform: rotate(180deg);
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
.select-options {
|
| 339 |
+
position: absolute;
|
| 340 |
+
top: 100%;
|
| 341 |
+
left: 0;
|
| 342 |
+
right: 0;
|
| 343 |
+
background: var(--bg-medium);
|
| 344 |
+
border: 1px solid var(--glass-border);
|
| 345 |
+
border-top: none;
|
| 346 |
+
border-radius: 0 0 var(--radius-md) var(--radius-md);
|
| 347 |
+
max-height: 0;
|
| 348 |
+
overflow: hidden;
|
| 349 |
+
opacity: 0;
|
| 350 |
+
z-index: 100;
|
| 351 |
+
transition: max-height var(--transition-smooth), opacity var(--transition-fast);
|
| 352 |
+
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.4);
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
.custom-select.open .select-options {
|
| 356 |
+
max-height: 200px;
|
| 357 |
+
opacity: 1;
|
| 358 |
+
overflow-y: auto;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
.select-options::-webkit-scrollbar {
|
| 362 |
+
width: 4px;
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
.select-options::-webkit-scrollbar-thumb {
|
| 366 |
+
background: var(--bg-hover);
|
| 367 |
+
border-radius: 2px;
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
.select-option {
|
| 371 |
+
padding: 0.6rem 1rem;
|
| 372 |
+
font-size: 0.85rem;
|
| 373 |
+
color: var(--text-secondary);
|
| 374 |
+
cursor: pointer;
|
| 375 |
+
transition: all var(--transition-fast);
|
| 376 |
+
display: flex;
|
| 377 |
+
align-items: center;
|
| 378 |
+
gap: 0.5rem;
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
.select-option:hover {
|
| 382 |
+
background: var(--bg-light);
|
| 383 |
+
color: var(--text-primary);
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
.select-option.active {
|
| 387 |
+
background: var(--bg-hover);
|
| 388 |
+
color: var(--text-primary);
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
.select-option .option-icon {
|
| 392 |
+
font-size: 1rem;
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
/* ==================== Buckets List ==================== */
|
| 396 |
+
.buckets-list {
|
| 397 |
+
display: flex;
|
| 398 |
+
flex-direction: column;
|
| 399 |
+
gap: 0.25rem;
|
| 400 |
+
max-height: 180px;
|
| 401 |
+
overflow-y: auto;
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
.bucket-item {
|
| 405 |
+
display: flex;
|
| 406 |
+
align-items: center;
|
| 407 |
+
gap: 0.5rem;
|
| 408 |
+
padding: 0.5rem 0.75rem;
|
| 409 |
+
border-radius: var(--radius-sm);
|
| 410 |
+
cursor: pointer;
|
| 411 |
+
transition: all var(--transition-fast);
|
| 412 |
+
}
|
| 413 |
+
|
| 414 |
+
.bucket-item:hover {
|
| 415 |
+
background: var(--bg-light);
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
.bucket-item.active {
|
| 419 |
+
background: var(--bg-light);
|
| 420 |
+
border-left: 2px solid var(--accent-primary);
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
.bucket-name {
|
| 424 |
+
flex: 1;
|
| 425 |
+
font-size: 0.85rem;
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
.bucket-count {
|
| 429 |
+
font-size: 0.7rem;
|
| 430 |
+
color: var(--text-muted);
|
| 431 |
+
background: var(--bg-hover);
|
| 432 |
+
padding: 0.1rem 0.4rem;
|
| 433 |
+
border-radius: var(--radius-sm);
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
.bucket-delete {
|
| 437 |
+
opacity: 0;
|
| 438 |
+
padding: 0.2rem;
|
| 439 |
+
font-size: 0.7rem;
|
| 440 |
+
transition: opacity var(--transition-fast);
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
.bucket-item:hover .bucket-delete {
|
| 444 |
+
opacity: 1;
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
/* ==================== Upload Zone ==================== */
|
| 448 |
+
.upload-zone {
|
| 449 |
+
padding: 1.5rem;
|
| 450 |
+
border: 1px dashed rgba(255, 255, 255, 0.15);
|
| 451 |
+
border-radius: var(--radius-md);
|
| 452 |
+
text-align: center;
|
| 453 |
+
cursor: pointer;
|
| 454 |
+
transition: all var(--transition-smooth);
|
| 455 |
+
background: var(--bg-medium);
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
.upload-zone:hover,
|
| 459 |
+
.upload-zone.dragover {
|
| 460 |
+
border-color: rgba(255, 255, 255, 0.3);
|
| 461 |
+
background: var(--bg-light);
|
| 462 |
+
transform: scale(1.02);
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
.upload-icon {
|
| 466 |
+
font-size: 2rem;
|
| 467 |
+
margin-bottom: 0.5rem;
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
.upload-title {
|
| 471 |
+
font-size: 0.9rem;
|
| 472 |
+
font-weight: 600;
|
| 473 |
+
margin-bottom: 0.25rem;
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
.upload-subtitle {
|
| 477 |
+
font-size: 0.75rem;
|
| 478 |
+
color: var(--text-muted);
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
.progress-info {
|
| 482 |
+
display: flex;
|
| 483 |
+
align-items: center;
|
| 484 |
+
gap: 0.5rem;
|
| 485 |
+
margin-bottom: 0.5rem;
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
.progress-bar {
|
| 489 |
+
height: 4px;
|
| 490 |
+
background: var(--bg-hover);
|
| 491 |
+
border-radius: 2px;
|
| 492 |
+
overflow: hidden;
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
.progress-fill {
|
| 496 |
+
height: 100%;
|
| 497 |
+
background: var(--accent-primary);
|
| 498 |
+
width: 0%;
|
| 499 |
+
transition: width var(--transition-smooth);
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
/* Cancel Upload Button */
|
| 503 |
+
.btn-cancel-upload {
|
| 504 |
+
margin-top: 0.75rem;
|
| 505 |
+
width: 100%;
|
| 506 |
+
padding: 0.5rem 1rem;
|
| 507 |
+
background: rgba(248, 113, 113, 0.15);
|
| 508 |
+
color: var(--error);
|
| 509 |
+
border: 1px solid rgba(248, 113, 113, 0.3);
|
| 510 |
+
border-radius: var(--radius-md);
|
| 511 |
+
font-size: 0.8rem;
|
| 512 |
+
font-weight: 500;
|
| 513 |
+
cursor: pointer;
|
| 514 |
+
transition: all var(--transition-fast);
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
.btn-cancel-upload:hover {
|
| 518 |
+
background: rgba(248, 113, 113, 0.25);
|
| 519 |
+
border-color: rgba(248, 113, 113, 0.5);
|
| 520 |
+
transform: translateY(-1px);
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
/* ==================== Documents Section (Right Sidebar) ==================== */
|
| 524 |
+
.documents-section {
|
| 525 |
+
flex: 1;
|
| 526 |
+
display: flex;
|
| 527 |
+
flex-direction: column;
|
| 528 |
+
min-height: 0;
|
| 529 |
+
max-height: 50%;
|
| 530 |
+
transition: all var(--transition-smooth);
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
/* ==================== Chat History Section (Right Sidebar) ==================== */
|
| 534 |
+
.chat-history-section {
|
| 535 |
+
flex: 1;
|
| 536 |
+
display: flex;
|
| 537 |
+
flex-direction: column;
|
| 538 |
+
min-height: 0;
|
| 539 |
+
max-height: 50%;
|
| 540 |
+
transition: all var(--transition-smooth);
|
| 541 |
+
}
|
| 542 |
+
|
| 543 |
+
.chat-history-section.collapsed {
|
| 544 |
+
flex: 0 0 auto;
|
| 545 |
+
min-height: auto;
|
| 546 |
+
}
|
| 547 |
+
|
| 548 |
+
.chat-history-section.collapsed .section-header {
|
| 549 |
+
padding-bottom: 0;
|
| 550 |
+
}
|
| 551 |
+
|
| 552 |
+
.chat-history-section .section-header {
|
| 553 |
+
padding-bottom: 0.5rem;
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
.documents-section.collapsed {
|
| 557 |
+
flex: 0 0 auto;
|
| 558 |
+
min-height: auto;
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
.documents-section.collapsed .section-header {
|
| 562 |
+
padding-bottom: 0;
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
.documents-section .section-header {
|
| 566 |
+
padding-bottom: 0.5rem;
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
.documents-body {
|
| 570 |
+
flex: 1;
|
| 571 |
+
overflow: hidden;
|
| 572 |
+
}
|
| 573 |
+
|
| 574 |
+
.chat-history-body {
|
| 575 |
+
flex: 1;
|
| 576 |
+
overflow: hidden;
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
.documents-list {
|
| 580 |
+
height: 100%;
|
| 581 |
+
overflow-y: auto;
|
| 582 |
+
display: flex;
|
| 583 |
+
flex-direction: column;
|
| 584 |
+
gap: 0.4rem;
|
| 585 |
+
padding-right: 0.25rem;
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
.doc-count {
|
| 589 |
+
margin-left: auto;
|
| 590 |
+
font-size: 0.7rem;
|
| 591 |
+
opacity: 0.6;
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
.document-item {
|
| 595 |
+
display: flex;
|
| 596 |
+
align-items: center;
|
| 597 |
+
gap: 0.5rem;
|
| 598 |
+
padding: 0.6rem 0.75rem;
|
| 599 |
+
background: transparent;
|
| 600 |
+
border: 1px solid transparent;
|
| 601 |
+
border-radius: var(--radius-md);
|
| 602 |
+
cursor: pointer;
|
| 603 |
+
transition: all var(--transition-fast);
|
| 604 |
+
position: relative;
|
| 605 |
+
}
|
| 606 |
+
|
| 607 |
+
.document-item:hover {
|
| 608 |
+
background: var(--bg-light);
|
| 609 |
+
border-color: var(--glass-border);
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
.doc-icon {
|
| 613 |
+
width: 32px;
|
| 614 |
+
height: 32px;
|
| 615 |
+
border-radius: var(--radius-sm);
|
| 616 |
+
display: flex;
|
| 617 |
+
align-items: center;
|
| 618 |
+
justify-content: center;
|
| 619 |
+
font-size: 1rem;
|
| 620 |
+
background: var(--bg-hover);
|
| 621 |
+
}
|
| 622 |
+
|
| 623 |
+
.doc-info {
|
| 624 |
+
flex: 1;
|
| 625 |
+
min-width: 0;
|
| 626 |
+
}
|
| 627 |
+
|
| 628 |
+
.doc-name {
|
| 629 |
+
font-size: 0.8rem;
|
| 630 |
+
font-weight: 500;
|
| 631 |
+
white-space: nowrap;
|
| 632 |
+
overflow: hidden;
|
| 633 |
+
text-overflow: ellipsis;
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
.doc-meta {
|
| 637 |
+
font-size: 0.65rem;
|
| 638 |
+
color: var(--text-muted);
|
| 639 |
+
margin-top: 0.1rem;
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
.doc-view,
|
| 643 |
+
.doc-delete {
|
| 644 |
+
opacity: 0;
|
| 645 |
+
padding: 0.25rem;
|
| 646 |
+
font-size: 0.8rem;
|
| 647 |
+
transition: opacity var(--transition-fast);
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
.document-item:hover .doc-view,
|
| 651 |
+
.document-item:hover .doc-delete {
|
| 652 |
+
opacity: 1;
|
| 653 |
+
}
|
| 654 |
+
|
| 655 |
+
.doc-view:hover {
|
| 656 |
+
color: var(--info);
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
.doc-delete:hover {
|
| 660 |
+
color: var(--error);
|
| 661 |
+
}
|
| 662 |
+
|
| 663 |
+
/* ==================== Chat Container ==================== */
|
| 664 |
+
.chat-container {
|
| 665 |
+
flex: 1;
|
| 666 |
+
display: flex;
|
| 667 |
+
flex-direction: column;
|
| 668 |
+
min-width: 0;
|
| 669 |
+
min-height: 0;
|
| 670 |
+
/* Critical: allows flex child to shrink */
|
| 671 |
+
overflow: hidden;
|
| 672 |
+
height: 100%;
|
| 673 |
+
/* Ensure it takes full height */
|
| 674 |
+
}
|
| 675 |
+
|
| 676 |
+
/* ==================== Chat Bucket Filter ==================== */
|
| 677 |
+
.chat-bucket-filter {
|
| 678 |
+
display: flex;
|
| 679 |
+
align-items: center;
|
| 680 |
+
gap: 0.75rem;
|
| 681 |
+
padding: 0.75rem 1.25rem;
|
| 682 |
+
border-bottom: 1px solid var(--glass-border);
|
| 683 |
+
background: var(--bg-dark);
|
| 684 |
+
flex-shrink: 0;
|
| 685 |
+
/* Prevent filter bar from shrinking */
|
| 686 |
+
}
|
| 687 |
+
|
| 688 |
+
.filter-label {
|
| 689 |
+
font-size: 0.8rem;
|
| 690 |
+
color: var(--text-muted);
|
| 691 |
+
}
|
| 692 |
+
|
| 693 |
+
/* New Chat Button */
|
| 694 |
+
.btn-new-chat {
|
| 695 |
+
margin-left: auto;
|
| 696 |
+
background: var(--accent-primary);
|
| 697 |
+
color: var(--bg-darkest);
|
| 698 |
+
padding: 0.4rem 0.75rem;
|
| 699 |
+
font-size: 0.75rem;
|
| 700 |
+
font-weight: 600;
|
| 701 |
+
border-radius: var(--radius-md);
|
| 702 |
+
white-space: nowrap;
|
| 703 |
+
transition: all var(--transition-fast);
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
.btn-new-chat:hover {
|
| 707 |
+
background: var(--accent-secondary);
|
| 708 |
+
transform: translateY(-1px);
|
| 709 |
+
}
|
| 710 |
+
|
| 711 |
+
/* Clear Chat Button */
|
| 712 |
+
.btn-clear-chat {
|
| 713 |
+
background: var(--bg-light);
|
| 714 |
+
color: var(--text-secondary);
|
| 715 |
+
padding: 0.4rem 0.75rem;
|
| 716 |
+
font-size: 0.75rem;
|
| 717 |
+
font-weight: 600;
|
| 718 |
+
border-radius: var(--radius-md);
|
| 719 |
+
border: 1px solid var(--glass-border);
|
| 720 |
+
white-space: nowrap;
|
| 721 |
+
transition: all var(--transition-fast);
|
| 722 |
+
}
|
| 723 |
+
|
| 724 |
+
.btn-clear-chat:hover {
|
| 725 |
+
background: var(--bg-hover);
|
| 726 |
+
color: var(--text-primary);
|
| 727 |
+
transform: translateY(-1px);
|
| 728 |
+
}
|
| 729 |
+
|
| 730 |
+
/* Chat History List */
|
| 731 |
+
.chat-history-list {
|
| 732 |
+
display: flex;
|
| 733 |
+
flex-direction: column;
|
| 734 |
+
gap: 0.35rem;
|
| 735 |
+
}
|
| 736 |
+
|
| 737 |
+
.chat-history-item {
|
| 738 |
+
display: flex;
|
| 739 |
+
align-items: center;
|
| 740 |
+
gap: 0.5rem;
|
| 741 |
+
padding: 0.5rem 0.6rem;
|
| 742 |
+
background: var(--bg-medium);
|
| 743 |
+
border: 1px solid transparent;
|
| 744 |
+
border-radius: var(--radius-md);
|
| 745 |
+
cursor: pointer;
|
| 746 |
+
transition: all var(--transition-fast);
|
| 747 |
+
}
|
| 748 |
+
|
| 749 |
+
.chat-history-item:hover {
|
| 750 |
+
background: var(--bg-light);
|
| 751 |
+
border-color: var(--glass-border);
|
| 752 |
+
}
|
| 753 |
+
|
| 754 |
+
.chat-history-item.active {
|
| 755 |
+
background: var(--bg-light);
|
| 756 |
+
border-color: var(--accent-muted);
|
| 757 |
+
}
|
| 758 |
+
|
| 759 |
+
.chat-history-icon {
|
| 760 |
+
font-size: 0.9rem;
|
| 761 |
+
flex-shrink: 0;
|
| 762 |
+
}
|
| 763 |
+
|
| 764 |
+
.chat-history-info {
|
| 765 |
+
flex: 1;
|
| 766 |
+
min-width: 0;
|
| 767 |
+
}
|
| 768 |
+
|
| 769 |
+
.chat-history-topic {
|
| 770 |
+
font-size: 0.8rem;
|
| 771 |
+
font-weight: 500;
|
| 772 |
+
white-space: nowrap;
|
| 773 |
+
overflow: hidden;
|
| 774 |
+
text-overflow: ellipsis;
|
| 775 |
+
}
|
| 776 |
+
|
| 777 |
+
.chat-history-date {
|
| 778 |
+
font-size: 0.65rem;
|
| 779 |
+
color: var(--text-muted);
|
| 780 |
+
margin-top: 0.1rem;
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
.chat-history-delete {
|
| 784 |
+
opacity: 0;
|
| 785 |
+
padding: 0.2rem;
|
| 786 |
+
font-size: 0.75rem;
|
| 787 |
+
transition: opacity var(--transition-fast);
|
| 788 |
+
}
|
| 789 |
+
|
| 790 |
+
.chat-history-item:hover .chat-history-delete {
|
| 791 |
+
opacity: 1;
|
| 792 |
+
}
|
| 793 |
+
|
| 794 |
+
.chat-history-delete:hover {
|
| 795 |
+
color: var(--error);
|
| 796 |
+
}
|
| 797 |
+
|
| 798 |
+
|
| 799 |
+
/* ==================== Chat Messages ==================== */
|
| 800 |
+
.chat-messages {
|
| 801 |
+
flex: 1;
|
| 802 |
+
overflow-y: auto;
|
| 803 |
+
overflow-x: hidden;
|
| 804 |
+
padding: 1rem;
|
| 805 |
+
display: flex;
|
| 806 |
+
flex-direction: column;
|
| 807 |
+
gap: 1rem;
|
| 808 |
+
min-height: 0;
|
| 809 |
+
/* Critical: allows scrolling to work */
|
| 810 |
+
}
|
| 811 |
+
|
| 812 |
+
/* Custom scrollbar for chat messages */
|
| 813 |
+
.chat-messages::-webkit-scrollbar {
|
| 814 |
+
width: 6px;
|
| 815 |
+
}
|
| 816 |
+
|
| 817 |
+
.chat-messages::-webkit-scrollbar-track {
|
| 818 |
+
background: transparent;
|
| 819 |
+
}
|
| 820 |
+
|
| 821 |
+
.chat-messages::-webkit-scrollbar-thumb {
|
| 822 |
+
background: var(--bg-hover);
|
| 823 |
+
border-radius: 3px;
|
| 824 |
+
}
|
| 825 |
+
|
| 826 |
+
.chat-messages::-webkit-scrollbar-thumb:hover {
|
| 827 |
+
background: var(--bg-lighter);
|
| 828 |
+
}
|
| 829 |
+
|
| 830 |
+
.message {
|
| 831 |
+
display: flex;
|
| 832 |
+
gap: 0.75rem;
|
| 833 |
+
max-width: 85%;
|
| 834 |
+
animation: messageSlide 0.3s ease-out;
|
| 835 |
+
}
|
| 836 |
+
|
| 837 |
+
@keyframes messageSlide {
|
| 838 |
+
from {
|
| 839 |
+
opacity: 0;
|
| 840 |
+
transform: translateY(10px);
|
| 841 |
+
}
|
| 842 |
+
|
| 843 |
+
to {
|
| 844 |
+
opacity: 1;
|
| 845 |
+
transform: translateY(0);
|
| 846 |
+
}
|
| 847 |
+
}
|
| 848 |
+
|
| 849 |
+
.message.user {
|
| 850 |
+
align-self: flex-end;
|
| 851 |
+
flex-direction: row-reverse;
|
| 852 |
+
}
|
| 853 |
+
|
| 854 |
+
.message-avatar {
|
| 855 |
+
width: 32px;
|
| 856 |
+
height: 32px;
|
| 857 |
+
border-radius: 50%;
|
| 858 |
+
display: flex;
|
| 859 |
+
align-items: center;
|
| 860 |
+
justify-content: center;
|
| 861 |
+
flex-shrink: 0;
|
| 862 |
+
font-size: 0.9rem;
|
| 863 |
+
background: var(--bg-light);
|
| 864 |
+
border: 1px solid var(--glass-border);
|
| 865 |
+
}
|
| 866 |
+
|
| 867 |
+
.message-content {
|
| 868 |
+
padding: 1rem 1.25rem;
|
| 869 |
+
border-radius: var(--radius-lg);
|
| 870 |
+
font-size: 0.9rem;
|
| 871 |
+
line-height: 1.6;
|
| 872 |
+
}
|
| 873 |
+
|
| 874 |
+
.message.user .message-content {
|
| 875 |
+
background: var(--accent-primary);
|
| 876 |
+
color: var(--bg-darkest);
|
| 877 |
+
border-bottom-right-radius: 4px;
|
| 878 |
+
}
|
| 879 |
+
|
| 880 |
+
.message.assistant .message-content {
|
| 881 |
+
background: linear-gradient(135deg, var(--bg-light) 0%, var(--bg-medium) 100%);
|
| 882 |
+
border: 1px solid var(--glass-border);
|
| 883 |
+
border-bottom-left-radius: 4px;
|
| 884 |
+
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
|
| 885 |
+
}
|
| 886 |
+
|
| 887 |
+
.message-sources {
|
| 888 |
+
margin-top: 0.5rem;
|
| 889 |
+
padding-top: 0.5rem;
|
| 890 |
+
border-top: 1px solid rgba(255, 255, 255, 0.1);
|
| 891 |
+
font-size: 0.75rem;
|
| 892 |
+
color: var(--text-muted);
|
| 893 |
+
}
|
| 894 |
+
|
| 895 |
+
.source-tag {
|
| 896 |
+
display: inline-block;
|
| 897 |
+
padding: 0.1rem 0.4rem;
|
| 898 |
+
background: var(--bg-hover);
|
| 899 |
+
border-radius: var(--radius-sm);
|
| 900 |
+
margin-left: 0.25rem;
|
| 901 |
+
}
|
| 902 |
+
|
| 903 |
+
/* ==================== Markdown Styling in Messages ==================== */
|
| 904 |
+
.message-content h1,
|
| 905 |
+
.message-content h2,
|
| 906 |
+
.message-content h3,
|
| 907 |
+
.message-content h4,
|
| 908 |
+
.message-content .msg-header {
|
| 909 |
+
font-weight: 600;
|
| 910 |
+
color: var(--text-primary);
|
| 911 |
+
margin: 1.25rem 0 0.6rem 0;
|
| 912 |
+
line-height: 1.4;
|
| 913 |
+
}
|
| 914 |
+
|
| 915 |
+
.message-content h1 {
|
| 916 |
+
font-size: 1.25rem;
|
| 917 |
+
background: linear-gradient(90deg, var(--accent-primary), var(--accent-secondary));
|
| 918 |
+
-webkit-background-clip: text;
|
| 919 |
+
-webkit-text-fill-color: transparent;
|
| 920 |
+
background-clip: text;
|
| 921 |
+
padding-bottom: 0.5rem;
|
| 922 |
+
border-bottom: 2px solid rgba(168, 85, 247, 0.3);
|
| 923 |
+
}
|
| 924 |
+
|
| 925 |
+
.message-content h2 {
|
| 926 |
+
font-size: 1.1rem;
|
| 927 |
+
color: var(--accent-secondary);
|
| 928 |
+
border-bottom: 1px solid rgba(168, 85, 247, 0.2);
|
| 929 |
+
padding-bottom: 0.4rem;
|
| 930 |
+
}
|
| 931 |
+
|
| 932 |
+
.message-content h3 {
|
| 933 |
+
font-size: 1rem;
|
| 934 |
+
color: var(--info);
|
| 935 |
+
}
|
| 936 |
+
|
| 937 |
+
.message-content h4 {
|
| 938 |
+
font-size: 0.95rem;
|
| 939 |
+
font-weight: 600;
|
| 940 |
+
color: var(--text-secondary);
|
| 941 |
+
margin: 0.9rem 0 0.4rem 0;
|
| 942 |
+
}
|
| 943 |
+
|
| 944 |
+
.message-content h1:first-child,
|
| 945 |
+
.message-content h2:first-child,
|
| 946 |
+
.message-content h3:first-child,
|
| 947 |
+
.message-content h4:first-child,
|
| 948 |
+
.message-content .msg-header:first-child {
|
| 949 |
+
margin-top: 0;
|
| 950 |
+
}
|
| 951 |
+
|
| 952 |
+
.message-content p,
|
| 953 |
+
.message-content .msg-para {
|
| 954 |
+
margin: 0.75rem 0;
|
| 955 |
+
line-height: 1.75;
|
| 956 |
+
}
|
| 957 |
+
|
| 958 |
+
.message-content p:first-child,
|
| 959 |
+
.message-content .msg-para:first-child {
|
| 960 |
+
margin-top: 0;
|
| 961 |
+
}
|
| 962 |
+
|
| 963 |
+
/* ==================== Enhanced Lists ==================== */
|
| 964 |
+
.message-content .formatted-list {
|
| 965 |
+
margin: 1rem 0;
|
| 966 |
+
padding-left: 0;
|
| 967 |
+
list-style: none;
|
| 968 |
+
}
|
| 969 |
+
|
| 970 |
+
.message-content ol.formatted-list {
|
| 971 |
+
counter-reset: item;
|
| 972 |
+
}
|
| 973 |
+
|
| 974 |
+
.message-content .formatted-list li {
|
| 975 |
+
position: relative;
|
| 976 |
+
padding: 0.5rem 0.75rem 0.5rem 2.25rem;
|
| 977 |
+
margin: 0.35rem 0;
|
| 978 |
+
background: rgba(255, 255, 255, 0.02);
|
| 979 |
+
border-radius: var(--radius-md);
|
| 980 |
+
border-left: 3px solid transparent;
|
| 981 |
+
line-height: 1.65;
|
| 982 |
+
transition: all 0.2s ease;
|
| 983 |
+
}
|
| 984 |
+
|
| 985 |
+
.message-content .formatted-list li:hover {
|
| 986 |
+
background: rgba(255, 255, 255, 0.04);
|
| 987 |
+
}
|
| 988 |
+
|
| 989 |
+
.message-content .formatted-list li.numbered {
|
| 990 |
+
border-left-color: var(--accent-primary);
|
| 991 |
+
}
|
| 992 |
+
|
| 993 |
+
.message-content .formatted-list li.bullet {
|
| 994 |
+
border-left-color: var(--info);
|
| 995 |
+
}
|
| 996 |
+
|
| 997 |
+
.message-content .formatted-list li.numbered .list-num {
|
| 998 |
+
position: absolute;
|
| 999 |
+
left: 0.6rem;
|
| 1000 |
+
font-weight: 700;
|
| 1001 |
+
color: var(--accent-primary);
|
| 1002 |
+
font-size: 0.9rem;
|
| 1003 |
+
}
|
| 1004 |
+
|
| 1005 |
+
.message-content .formatted-list li.bullet::before {
|
| 1006 |
+
content: "▸";
|
| 1007 |
+
position: absolute;
|
| 1008 |
+
left: 0.75rem;
|
| 1009 |
+
color: var(--info);
|
| 1010 |
+
font-size: 0.85em;
|
| 1011 |
+
font-weight: 600;
|
| 1012 |
+
}
|
| 1013 |
+
|
| 1014 |
+
.message-content .formatted-list.sub-list {
|
| 1015 |
+
margin: 0.5rem 0 0.5rem 1.5rem;
|
| 1016 |
+
}
|
| 1017 |
+
|
| 1018 |
+
.message-content .formatted-list.sub-list li {
|
| 1019 |
+
padding-left: 1.75rem;
|
| 1020 |
+
background: transparent;
|
| 1021 |
+
border-left: 2px solid rgba(168, 85, 247, 0.3);
|
| 1022 |
+
}
|
| 1023 |
+
|
| 1024 |
+
.message-content .formatted-list.sub-list li::before {
|
| 1025 |
+
content: "○";
|
| 1026 |
+
position: absolute;
|
| 1027 |
+
left: 0.5rem;
|
| 1028 |
+
color: var(--accent-muted);
|
| 1029 |
+
font-size: 0.7em;
|
| 1030 |
+
}
|
| 1031 |
+
|
| 1032 |
+
/* Legacy list support */
|
| 1033 |
+
.message-content ul,
|
| 1034 |
+
.message-content ol {
|
| 1035 |
+
margin: 0.75rem 0;
|
| 1036 |
+
padding-left: 1.5rem;
|
| 1037 |
+
}
|
| 1038 |
+
|
| 1039 |
+
.message-content li {
|
| 1040 |
+
margin: 0.4rem 0;
|
| 1041 |
+
padding-left: 0.5rem;
|
| 1042 |
+
line-height: 1.6;
|
| 1043 |
+
}
|
| 1044 |
+
|
| 1045 |
+
.message-content ul li::marker {
|
| 1046 |
+
color: var(--info);
|
| 1047 |
+
}
|
| 1048 |
+
|
| 1049 |
+
.message-content ol li::marker {
|
| 1050 |
+
color: var(--accent-secondary);
|
| 1051 |
+
font-weight: 600;
|
| 1052 |
+
}
|
| 1053 |
+
|
| 1054 |
+
/* Nested lists */
|
| 1055 |
+
.message-content ul ul,
|
| 1056 |
+
.message-content ol ol,
|
| 1057 |
+
.message-content ul ol,
|
| 1058 |
+
.message-content ol ul {
|
| 1059 |
+
margin: 0.25rem 0 0.25rem 1rem;
|
| 1060 |
+
}
|
| 1061 |
+
|
| 1062 |
+
/* ==================== Premium Tables ==================== */
|
| 1063 |
+
.message-content .table-wrapper {
|
| 1064 |
+
margin: 1rem 0;
|
| 1065 |
+
border-radius: var(--radius-md);
|
| 1066 |
+
overflow-x: auto;
|
| 1067 |
+
overflow-y: hidden;
|
| 1068 |
+
max-width: 100%;
|
| 1069 |
+
box-shadow: 0 2px 12px rgba(0, 0, 0, 0.2);
|
| 1070 |
+
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 1071 |
+
}
|
| 1072 |
+
|
| 1073 |
+
.message-content table {
|
| 1074 |
+
width: 100%;
|
| 1075 |
+
border-collapse: collapse;
|
| 1076 |
+
font-size: 0.8rem;
|
| 1077 |
+
background: rgba(0, 0, 0, 0.2);
|
| 1078 |
+
table-layout: auto;
|
| 1079 |
+
}
|
| 1080 |
+
|
| 1081 |
+
.message-content thead {
|
| 1082 |
+
background: linear-gradient(135deg, rgba(168, 85, 247, 0.2) 0%, rgba(96, 165, 250, 0.15) 100%);
|
| 1083 |
+
}
|
| 1084 |
+
|
| 1085 |
+
.message-content th {
|
| 1086 |
+
padding: 0.6rem 0.75rem;
|
| 1087 |
+
font-weight: 600;
|
| 1088 |
+
color: var(--text-primary);
|
| 1089 |
+
text-align: left;
|
| 1090 |
+
border-bottom: 2px solid rgba(168, 85, 247, 0.3);
|
| 1091 |
+
text-transform: uppercase;
|
| 1092 |
+
font-size: 0.7rem;
|
| 1093 |
+
letter-spacing: 0.3px;
|
| 1094 |
+
white-space: nowrap;
|
| 1095 |
+
}
|
| 1096 |
+
|
| 1097 |
+
.message-content td {
|
| 1098 |
+
padding: 0.5rem 0.75rem;
|
| 1099 |
+
border-bottom: 1px solid rgba(255, 255, 255, 0.05);
|
| 1100 |
+
color: var(--text-secondary);
|
| 1101 |
+
word-break: break-word;
|
| 1102 |
+
max-width: 200px;
|
| 1103 |
+
}
|
| 1104 |
+
|
| 1105 |
+
.message-content tbody tr {
|
| 1106 |
+
transition: background 0.2s ease;
|
| 1107 |
+
}
|
| 1108 |
+
|
| 1109 |
+
.message-content tbody tr:hover {
|
| 1110 |
+
background: rgba(168, 85, 247, 0.08);
|
| 1111 |
+
}
|
| 1112 |
+
|
| 1113 |
+
.message-content tbody tr:nth-child(even) {
|
| 1114 |
+
background: rgba(255, 255, 255, 0.02);
|
| 1115 |
+
}
|
| 1116 |
+
|
| 1117 |
+
.message-content tbody tr:nth-child(even):hover {
|
| 1118 |
+
background: rgba(168, 85, 247, 0.08);
|
| 1119 |
+
}
|
| 1120 |
+
|
| 1121 |
+
.message-content tbody tr:last-child td {
|
| 1122 |
+
border-bottom: none;
|
| 1123 |
+
}
|
| 1124 |
+
|
| 1125 |
+
/* ==================== Code Blocks ==================== */
|
| 1126 |
+
.message-content .code-block {
|
| 1127 |
+
margin: 1rem 0;
|
| 1128 |
+
padding: 1rem 1.25rem;
|
| 1129 |
+
background: linear-gradient(135deg, rgba(0, 0, 0, 0.4) 0%, rgba(0, 0, 0, 0.3) 100%);
|
| 1130 |
+
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 1131 |
+
border-radius: var(--radius-lg);
|
| 1132 |
+
overflow-x: auto;
|
| 1133 |
+
font-family: 'Consolas', 'Monaco', 'Fira Code', monospace;
|
| 1134 |
+
font-size: 0.85rem;
|
| 1135 |
+
line-height: 1.6;
|
| 1136 |
+
}
|
| 1137 |
+
|
| 1138 |
+
.message-content .code-block code {
|
| 1139 |
+
background: none;
|
| 1140 |
+
padding: 0;
|
| 1141 |
+
border: none;
|
| 1142 |
+
color: var(--info);
|
| 1143 |
+
}
|
| 1144 |
+
|
| 1145 |
+
/* Inline code */
|
| 1146 |
+
.message-content code.inline-code,
|
| 1147 |
+
.message-content code {
|
| 1148 |
+
background: rgba(96, 165, 250, 0.12);
|
| 1149 |
+
padding: 0.2rem 0.5rem;
|
| 1150 |
+
border-radius: var(--radius-sm);
|
| 1151 |
+
font-family: 'Consolas', 'Monaco', monospace;
|
| 1152 |
+
font-size: 0.85em;
|
| 1153 |
+
color: var(--info);
|
| 1154 |
+
border: 1px solid rgba(96, 165, 250, 0.2);
|
| 1155 |
+
}
|
| 1156 |
+
|
| 1157 |
+
/* Bold and emphasis */
|
| 1158 |
+
.message-content strong,
|
| 1159 |
+
.message-content b {
|
| 1160 |
+
font-weight: 700;
|
| 1161 |
+
color: var(--text-primary);
|
| 1162 |
+
}
|
| 1163 |
+
|
| 1164 |
+
.message-content em,
|
| 1165 |
+
.message-content i {
|
| 1166 |
+
font-style: italic;
|
| 1167 |
+
color: var(--text-secondary);
|
| 1168 |
+
}
|
| 1169 |
+
|
| 1170 |
+
/* ==================== Dividers ==================== */
|
| 1171 |
+
.message-content hr.divider {
|
| 1172 |
+
border: none;
|
| 1173 |
+
height: 1px;
|
| 1174 |
+
background: linear-gradient(90deg, transparent, rgba(168, 85, 247, 0.4), transparent);
|
| 1175 |
+
margin: 1.5rem 0;
|
| 1176 |
+
}
|
| 1177 |
+
|
| 1178 |
+
/* ==================== Blockquotes ==================== */
|
| 1179 |
+
.message-content blockquote {
|
| 1180 |
+
border-left: 4px solid var(--accent-primary);
|
| 1181 |
+
margin: 1rem 0;
|
| 1182 |
+
padding: 0.75rem 1.25rem;
|
| 1183 |
+
background: linear-gradient(135deg, rgba(168, 85, 247, 0.08) 0%, rgba(96, 165, 250, 0.05) 100%);
|
| 1184 |
+
border-radius: 0 var(--radius-md) var(--radius-md) 0;
|
| 1185 |
+
font-style: italic;
|
| 1186 |
+
color: var(--text-secondary);
|
| 1187 |
+
}
|
| 1188 |
+
|
| 1189 |
+
/* ==================== Typing Indicator ==================== */
|
| 1190 |
+
.typing-indicator {
|
| 1191 |
+
display: flex;
|
| 1192 |
+
gap: 0.75rem;
|
| 1193 |
+
padding: 1rem;
|
| 1194 |
+
}
|
| 1195 |
+
|
| 1196 |
+
.typing-dots {
|
| 1197 |
+
display: flex;
|
| 1198 |
+
gap: 4px;
|
| 1199 |
+
padding: 0.75rem 1rem;
|
| 1200 |
+
background: var(--bg-light);
|
| 1201 |
+
border: 1px solid var(--glass-border);
|
| 1202 |
+
border-radius: var(--radius-lg);
|
| 1203 |
+
}
|
| 1204 |
+
|
| 1205 |
+
.typing-dot {
|
| 1206 |
+
width: 6px;
|
| 1207 |
+
height: 6px;
|
| 1208 |
+
background: var(--text-muted);
|
| 1209 |
+
border-radius: 50%;
|
| 1210 |
+
animation: typingBounce 1.4s infinite ease-in-out;
|
| 1211 |
+
}
|
| 1212 |
+
|
| 1213 |
+
.typing-dot:nth-child(1) {
|
| 1214 |
+
animation-delay: 0s;
|
| 1215 |
+
}
|
| 1216 |
+
|
| 1217 |
+
.typing-dot:nth-child(2) {
|
| 1218 |
+
animation-delay: 0.2s;
|
| 1219 |
+
}
|
| 1220 |
+
|
| 1221 |
+
.typing-dot:nth-child(3) {
|
| 1222 |
+
animation-delay: 0.4s;
|
| 1223 |
+
}
|
| 1224 |
+
|
| 1225 |
+
@keyframes typingBounce {
|
| 1226 |
+
|
| 1227 |
+
0%,
|
| 1228 |
+
80%,
|
| 1229 |
+
100% {
|
| 1230 |
+
transform: scale(0.6);
|
| 1231 |
+
opacity: 0.4;
|
| 1232 |
+
}
|
| 1233 |
+
|
| 1234 |
+
40% {
|
| 1235 |
+
transform: scale(1);
|
| 1236 |
+
opacity: 1;
|
| 1237 |
+
}
|
| 1238 |
+
}
|
| 1239 |
+
|
| 1240 |
+
/* ==================== Chat Input ==================== */
|
| 1241 |
+
.chat-input-container {
|
| 1242 |
+
padding: 1rem;
|
| 1243 |
+
background: var(--bg-dark);
|
| 1244 |
+
border-top: 1px solid var(--glass-border);
|
| 1245 |
+
}
|
| 1246 |
+
|
| 1247 |
+
.chat-input-wrapper {
|
| 1248 |
+
display: flex;
|
| 1249 |
+
gap: 0.75rem;
|
| 1250 |
+
align-items: flex-end;
|
| 1251 |
+
}
|
| 1252 |
+
|
| 1253 |
+
.chat-input {
|
| 1254 |
+
flex: 1;
|
| 1255 |
+
padding: 0.75rem 1rem;
|
| 1256 |
+
background: var(--bg-medium);
|
| 1257 |
+
border: 1px solid var(--glass-border);
|
| 1258 |
+
border-radius: var(--radius-lg);
|
| 1259 |
+
color: var(--text-primary);
|
| 1260 |
+
font-size: 0.9rem;
|
| 1261 |
+
resize: none;
|
| 1262 |
+
max-height: 150px;
|
| 1263 |
+
font-family: inherit;
|
| 1264 |
+
transition: all var(--transition-fast);
|
| 1265 |
+
}
|
| 1266 |
+
|
| 1267 |
+
.chat-input:focus {
|
| 1268 |
+
outline: none;
|
| 1269 |
+
border-color: rgba(255, 255, 255, 0.2);
|
| 1270 |
+
background: var(--bg-light);
|
| 1271 |
+
}
|
| 1272 |
+
|
| 1273 |
+
.chat-input::placeholder {
|
| 1274 |
+
color: var(--text-muted);
|
| 1275 |
+
}
|
| 1276 |
+
|
| 1277 |
+
.send-btn {
|
| 1278 |
+
width: 44px;
|
| 1279 |
+
height: 44px;
|
| 1280 |
+
border-radius: 50%;
|
| 1281 |
+
background: var(--accent-primary);
|
| 1282 |
+
border: none;
|
| 1283 |
+
color: var(--bg-darkest);
|
| 1284 |
+
font-size: 1.1rem;
|
| 1285 |
+
cursor: pointer;
|
| 1286 |
+
display: flex;
|
| 1287 |
+
align-items: center;
|
| 1288 |
+
justify-content: center;
|
| 1289 |
+
transition: all var(--transition-fast);
|
| 1290 |
+
}
|
| 1291 |
+
|
| 1292 |
+
.send-btn:hover {
|
| 1293 |
+
transform: scale(1.05);
|
| 1294 |
+
}
|
| 1295 |
+
|
| 1296 |
+
.send-btn:disabled {
|
| 1297 |
+
opacity: 0.3;
|
| 1298 |
+
cursor: not-allowed;
|
| 1299 |
+
transform: none;
|
| 1300 |
+
}
|
| 1301 |
+
|
| 1302 |
+
/* Stop Generation Button */
|
| 1303 |
+
.stop-btn {
|
| 1304 |
+
width: 44px;
|
| 1305 |
+
height: 44px;
|
| 1306 |
+
border-radius: 50%;
|
| 1307 |
+
background: var(--error);
|
| 1308 |
+
border: none;
|
| 1309 |
+
color: white;
|
| 1310 |
+
font-size: 1rem;
|
| 1311 |
+
cursor: pointer;
|
| 1312 |
+
display: flex;
|
| 1313 |
+
align-items: center;
|
| 1314 |
+
justify-content: center;
|
| 1315 |
+
transition: all var(--transition-fast);
|
| 1316 |
+
animation: pulse-stop 1.5s ease-in-out infinite;
|
| 1317 |
+
}
|
| 1318 |
+
|
| 1319 |
+
.stop-btn:hover {
|
| 1320 |
+
transform: scale(1.1);
|
| 1321 |
+
background: #dc2626;
|
| 1322 |
+
}
|
| 1323 |
+
|
| 1324 |
+
@keyframes pulse-stop {
|
| 1325 |
+
0%, 100% {
|
| 1326 |
+
box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4);
|
| 1327 |
+
}
|
| 1328 |
+
50% {
|
| 1329 |
+
box-shadow: 0 0 0 8px rgba(239, 68, 68, 0);
|
| 1330 |
+
}
|
| 1331 |
+
}
|
| 1332 |
+
|
| 1333 |
+
/* ==================== Welcome Screen ==================== */
|
| 1334 |
+
.welcome-screen {
|
| 1335 |
+
flex: 1;
|
| 1336 |
+
display: flex;
|
| 1337 |
+
flex-direction: column;
|
| 1338 |
+
align-items: center;
|
| 1339 |
+
justify-content: center;
|
| 1340 |
+
text-align: center;
|
| 1341 |
+
padding: 2rem;
|
| 1342 |
+
}
|
| 1343 |
+
|
| 1344 |
+
.welcome-icon {
|
| 1345 |
+
display: flex;
|
| 1346 |
+
align-items: center;
|
| 1347 |
+
justify-content: center;
|
| 1348 |
+
margin-bottom: 1rem;
|
| 1349 |
+
}
|
| 1350 |
+
|
| 1351 |
+
.welcome-title {
|
| 1352 |
+
font-size: 1.5rem;
|
| 1353 |
+
font-weight: 700;
|
| 1354 |
+
margin-bottom: 0.5rem;
|
| 1355 |
+
}
|
| 1356 |
+
|
| 1357 |
+
.welcome-subtitle {
|
| 1358 |
+
font-size: 0.9rem;
|
| 1359 |
+
color: var(--text-secondary);
|
| 1360 |
+
max-width: 400px;
|
| 1361 |
+
}
|
| 1362 |
+
|
| 1363 |
+
.welcome-features {
|
| 1364 |
+
display: flex;
|
| 1365 |
+
gap: 0.75rem;
|
| 1366 |
+
margin-top: 1.5rem;
|
| 1367 |
+
}
|
| 1368 |
+
|
| 1369 |
+
.feature-card {
|
| 1370 |
+
padding: 1rem;
|
| 1371 |
+
background: var(--bg-medium);
|
| 1372 |
+
border: 1px solid var(--glass-border);
|
| 1373 |
+
border-radius: var(--radius-lg);
|
| 1374 |
+
width: 100px;
|
| 1375 |
+
text-align: center;
|
| 1376 |
+
transition: all var(--transition-smooth);
|
| 1377 |
+
}
|
| 1378 |
+
|
| 1379 |
+
.feature-card:hover {
|
| 1380 |
+
transform: translateY(-3px);
|
| 1381 |
+
background: var(--bg-light);
|
| 1382 |
+
}
|
| 1383 |
+
|
| 1384 |
+
.feature-icon {
|
| 1385 |
+
font-size: 1.5rem;
|
| 1386 |
+
margin-bottom: 0.25rem;
|
| 1387 |
+
}
|
| 1388 |
+
|
| 1389 |
+
.feature-title {
|
| 1390 |
+
font-size: 0.75rem;
|
| 1391 |
+
font-weight: 600;
|
| 1392 |
+
}
|
| 1393 |
+
|
| 1394 |
+
/* ==================== Modal ==================== */
|
| 1395 |
+
.modal-overlay {
|
| 1396 |
+
position: fixed;
|
| 1397 |
+
top: 0;
|
| 1398 |
+
left: 0;
|
| 1399 |
+
right: 0;
|
| 1400 |
+
bottom: 0;
|
| 1401 |
+
background: rgba(0, 0, 0, 0.8);
|
| 1402 |
+
display: flex;
|
| 1403 |
+
align-items: center;
|
| 1404 |
+
justify-content: center;
|
| 1405 |
+
z-index: 1000;
|
| 1406 |
+
opacity: 0;
|
| 1407 |
+
visibility: hidden;
|
| 1408 |
+
transition: all var(--transition-smooth);
|
| 1409 |
+
}
|
| 1410 |
+
|
| 1411 |
+
.modal-overlay.active {
|
| 1412 |
+
opacity: 1;
|
| 1413 |
+
visibility: visible;
|
| 1414 |
+
}
|
| 1415 |
+
|
| 1416 |
+
.modal {
|
| 1417 |
+
background: var(--bg-dark);
|
| 1418 |
+
border: 1px solid var(--glass-border);
|
| 1419 |
+
border-radius: var(--radius-xl);
|
| 1420 |
+
padding: 2rem;
|
| 1421 |
+
width: 100%;
|
| 1422 |
+
max-width: 400px;
|
| 1423 |
+
transform: scale(0.95) translateY(20px);
|
| 1424 |
+
transition: transform var(--transition-smooth);
|
| 1425 |
+
}
|
| 1426 |
+
|
| 1427 |
+
.modal-overlay.active .modal {
|
| 1428 |
+
transform: scale(1) translateY(0);
|
| 1429 |
+
}
|
| 1430 |
+
|
| 1431 |
+
.modal-header {
|
| 1432 |
+
text-align: center;
|
| 1433 |
+
margin-bottom: 1.5rem;
|
| 1434 |
+
}
|
| 1435 |
+
|
| 1436 |
+
.modal-logo {
|
| 1437 |
+
width: 50px;
|
| 1438 |
+
height: 50px;
|
| 1439 |
+
background: var(--bg-light);
|
| 1440 |
+
border: 1px solid var(--glass-border);
|
| 1441 |
+
border-radius: var(--radius-lg);
|
| 1442 |
+
display: flex;
|
| 1443 |
+
align-items: center;
|
| 1444 |
+
justify-content: center;
|
| 1445 |
+
font-size: 1.5rem;
|
| 1446 |
+
margin: 0 auto 1rem;
|
| 1447 |
+
}
|
| 1448 |
+
|
| 1449 |
+
.modal-title {
|
| 1450 |
+
font-size: 1.25rem;
|
| 1451 |
+
font-weight: 700;
|
| 1452 |
+
}
|
| 1453 |
+
|
| 1454 |
+
.modal-subtitle {
|
| 1455 |
+
font-size: 0.8rem;
|
| 1456 |
+
color: var(--text-muted);
|
| 1457 |
+
margin-top: 0.25rem;
|
| 1458 |
+
}
|
| 1459 |
+
|
| 1460 |
+
/* ==================== Auth Tabs ==================== */
|
| 1461 |
+
.role-tabs,
|
| 1462 |
+
.auth-tabs {
|
| 1463 |
+
display: flex;
|
| 1464 |
+
background: var(--bg-medium);
|
| 1465 |
+
border-radius: var(--radius-md);
|
| 1466 |
+
padding: 4px;
|
| 1467 |
+
margin-bottom: 1rem;
|
| 1468 |
+
}
|
| 1469 |
+
|
| 1470 |
+
.role-tab,
|
| 1471 |
+
.auth-tab {
|
| 1472 |
+
flex: 1;
|
| 1473 |
+
padding: 0.65rem;
|
| 1474 |
+
background: transparent;
|
| 1475 |
+
border: none;
|
| 1476 |
+
border-radius: var(--radius-sm);
|
| 1477 |
+
color: var(--text-muted);
|
| 1478 |
+
font-weight: 600;
|
| 1479 |
+
font-size: 0.85rem;
|
| 1480 |
+
cursor: pointer;
|
| 1481 |
+
transition: all var(--transition-fast);
|
| 1482 |
+
}
|
| 1483 |
+
|
| 1484 |
+
.role-tab.active,
|
| 1485 |
+
.auth-tab.active {
|
| 1486 |
+
background: var(--bg-light);
|
| 1487 |
+
color: var(--text-primary);
|
| 1488 |
+
}
|
| 1489 |
+
|
| 1490 |
+
.auth-tab.active {
|
| 1491 |
+
background: var(--accent-primary);
|
| 1492 |
+
color: var(--bg-darkest);
|
| 1493 |
+
}
|
| 1494 |
+
|
| 1495 |
+
/* ==================== Form Styles ==================== */
|
| 1496 |
+
.form-group {
|
| 1497 |
+
margin-bottom: 1rem;
|
| 1498 |
+
}
|
| 1499 |
+
|
| 1500 |
+
.form-label {
|
| 1501 |
+
display: block;
|
| 1502 |
+
font-size: 0.8rem;
|
| 1503 |
+
font-weight: 500;
|
| 1504 |
+
margin-bottom: 0.4rem;
|
| 1505 |
+
color: var(--text-secondary);
|
| 1506 |
+
}
|
| 1507 |
+
|
| 1508 |
+
.form-input {
|
| 1509 |
+
width: 100%;
|
| 1510 |
+
padding: 0.7rem 1rem;
|
| 1511 |
+
background: var(--bg-medium);
|
| 1512 |
+
border: 1px solid var(--glass-border);
|
| 1513 |
+
border-radius: var(--radius-md);
|
| 1514 |
+
color: var(--text-primary);
|
| 1515 |
+
font-size: 0.9rem;
|
| 1516 |
+
transition: all var(--transition-fast);
|
| 1517 |
+
}
|
| 1518 |
+
|
| 1519 |
+
.form-input:focus {
|
| 1520 |
+
outline: none;
|
| 1521 |
+
border-color: rgba(255, 255, 255, 0.2);
|
| 1522 |
+
background: var(--bg-light);
|
| 1523 |
+
}
|
| 1524 |
+
|
| 1525 |
+
.form-input::placeholder {
|
| 1526 |
+
color: var(--text-muted);
|
| 1527 |
+
}
|
| 1528 |
+
|
| 1529 |
+
.form-error {
|
| 1530 |
+
font-size: 0.8rem;
|
| 1531 |
+
color: var(--error);
|
| 1532 |
+
margin-top: 0.4rem;
|
| 1533 |
+
}
|
| 1534 |
+
|
| 1535 |
+
.auth-btn {
|
| 1536 |
+
width: 100%;
|
| 1537 |
+
padding: 0.8rem;
|
| 1538 |
+
margin-top: 0.5rem;
|
| 1539 |
+
}
|
| 1540 |
+
|
| 1541 |
+
.modal-actions {
|
| 1542 |
+
display: flex;
|
| 1543 |
+
gap: 0.75rem;
|
| 1544 |
+
margin-top: 1rem;
|
| 1545 |
+
}
|
| 1546 |
+
|
| 1547 |
+
.modal-actions .btn {
|
| 1548 |
+
flex: 1;
|
| 1549 |
+
}
|
| 1550 |
+
|
| 1551 |
+
/* ==================== Buttons ==================== */
|
| 1552 |
+
.btn {
|
| 1553 |
+
position: relative;
|
| 1554 |
+
padding: 0.6rem 1.2rem;
|
| 1555 |
+
border: none;
|
| 1556 |
+
border-radius: var(--radius-md);
|
| 1557 |
+
font-size: 0.85rem;
|
| 1558 |
+
font-weight: 600;
|
| 1559 |
+
cursor: pointer;
|
| 1560 |
+
transition: all var(--transition-fast);
|
| 1561 |
+
}
|
| 1562 |
+
|
| 1563 |
+
.btn-primary {
|
| 1564 |
+
background: var(--accent-primary);
|
| 1565 |
+
color: var(--bg-darkest);
|
| 1566 |
+
}
|
| 1567 |
+
|
| 1568 |
+
.btn-primary:hover {
|
| 1569 |
+
background: var(--accent-secondary);
|
| 1570 |
+
transform: translateY(-1px);
|
| 1571 |
+
}
|
| 1572 |
+
|
| 1573 |
+
.btn-secondary {
|
| 1574 |
+
background: var(--bg-light);
|
| 1575 |
+
border: 1px solid var(--glass-border);
|
| 1576 |
+
color: var(--text-primary);
|
| 1577 |
+
}
|
| 1578 |
+
|
| 1579 |
+
.btn-secondary:hover {
|
| 1580 |
+
background: var(--bg-hover);
|
| 1581 |
+
}
|
| 1582 |
+
|
| 1583 |
+
.btn-ghost {
|
| 1584 |
+
background: transparent;
|
| 1585 |
+
color: var(--text-muted);
|
| 1586 |
+
padding: 0.4rem;
|
| 1587 |
+
}
|
| 1588 |
+
|
| 1589 |
+
.btn-ghost:hover {
|
| 1590 |
+
color: var(--text-primary);
|
| 1591 |
+
background: var(--bg-light);
|
| 1592 |
+
}
|
| 1593 |
+
|
| 1594 |
+
.btn-logout {
|
| 1595 |
+
background: rgba(248, 113, 113, 0.15);
|
| 1596 |
+
color: #f87171;
|
| 1597 |
+
padding: 0.35rem 0.75rem;
|
| 1598 |
+
font-size: 0.75rem;
|
| 1599 |
+
border: 1px solid rgba(248, 113, 113, 0.3);
|
| 1600 |
+
}
|
| 1601 |
+
|
| 1602 |
+
.btn-logout:hover {
|
| 1603 |
+
background: rgba(248, 113, 113, 0.25);
|
| 1604 |
+
border-color: rgba(248, 113, 113, 0.5);
|
| 1605 |
+
}
|
| 1606 |
+
|
| 1607 |
+
/* ==================== Document Viewer Modal ==================== */
|
| 1608 |
+
.doc-viewer-modal {
|
| 1609 |
+
width: 90%;
|
| 1610 |
+
max-width: 900px;
|
| 1611 |
+
height: 80vh;
|
| 1612 |
+
padding: 0;
|
| 1613 |
+
display: flex;
|
| 1614 |
+
flex-direction: column;
|
| 1615 |
+
}
|
| 1616 |
+
|
| 1617 |
+
.doc-viewer-header {
|
| 1618 |
+
display: flex;
|
| 1619 |
+
justify-content: space-between;
|
| 1620 |
+
align-items: center;
|
| 1621 |
+
padding: 1rem 1.5rem;
|
| 1622 |
+
border-bottom: 1px solid var(--glass-border);
|
| 1623 |
+
}
|
| 1624 |
+
|
| 1625 |
+
.doc-viewer-header h3 {
|
| 1626 |
+
font-size: 1rem;
|
| 1627 |
+
font-weight: 600;
|
| 1628 |
+
white-space: nowrap;
|
| 1629 |
+
overflow: hidden;
|
| 1630 |
+
text-overflow: ellipsis;
|
| 1631 |
+
}
|
| 1632 |
+
|
| 1633 |
+
.doc-viewer-content {
|
| 1634 |
+
flex: 1;
|
| 1635 |
+
overflow: auto;
|
| 1636 |
+
display: flex;
|
| 1637 |
+
align-items: center;
|
| 1638 |
+
justify-content: center;
|
| 1639 |
+
padding: 1rem;
|
| 1640 |
+
background: var(--bg-medium);
|
| 1641 |
+
}
|
| 1642 |
+
|
| 1643 |
+
.doc-viewer-content iframe,
|
| 1644 |
+
.doc-viewer-content img {
|
| 1645 |
+
max-width: 100%;
|
| 1646 |
+
max-height: 100%;
|
| 1647 |
+
}
|
| 1648 |
+
|
| 1649 |
+
.doc-text-preview {
|
| 1650 |
+
width: 100%;
|
| 1651 |
+
height: 100%;
|
| 1652 |
+
overflow: auto;
|
| 1653 |
+
padding: 1rem;
|
| 1654 |
+
}
|
| 1655 |
+
|
| 1656 |
+
.doc-text-preview pre {
|
| 1657 |
+
white-space: pre-wrap;
|
| 1658 |
+
word-wrap: break-word;
|
| 1659 |
+
font-size: 0.85rem;
|
| 1660 |
+
line-height: 1.6;
|
| 1661 |
+
color: var(--text-secondary);
|
| 1662 |
+
}
|
| 1663 |
+
|
| 1664 |
+
/* ==================== Empty State ==================== */
|
| 1665 |
+
.empty-state {
|
| 1666 |
+
text-align: center;
|
| 1667 |
+
padding: 2rem;
|
| 1668 |
+
color: var(--text-muted);
|
| 1669 |
+
}
|
| 1670 |
+
|
| 1671 |
+
.empty-state.small {
|
| 1672 |
+
padding: 0.75rem;
|
| 1673 |
+
}
|
| 1674 |
+
|
| 1675 |
+
.empty-icon {
|
| 1676 |
+
font-size: 2rem;
|
| 1677 |
+
margin-bottom: 0.5rem;
|
| 1678 |
+
}
|
| 1679 |
+
|
| 1680 |
+
.empty-text {
|
| 1681 |
+
font-size: 0.8rem;
|
| 1682 |
+
}
|
| 1683 |
+
|
| 1684 |
+
.empty-state.small .empty-text {
|
| 1685 |
+
font-size: 0.75rem;
|
| 1686 |
+
}
|
| 1687 |
+
|
| 1688 |
+
/* ==================== Loading ==================== */
|
| 1689 |
+
.loading-spinner {
|
| 1690 |
+
width: 18px;
|
| 1691 |
+
height: 18px;
|
| 1692 |
+
border: 2px solid rgba(255, 255, 255, 0.2);
|
| 1693 |
+
border-top-color: var(--accent-primary);
|
| 1694 |
+
border-radius: 50%;
|
| 1695 |
+
animation: spin 0.7s linear infinite;
|
| 1696 |
+
}
|
| 1697 |
+
|
| 1698 |
+
@keyframes spin {
|
| 1699 |
+
to {
|
| 1700 |
+
transform: rotate(360deg);
|
| 1701 |
+
}
|
| 1702 |
+
}
|
| 1703 |
+
|
| 1704 |
+
/* ==================== Toast ==================== */
|
| 1705 |
+
.toast-container {
|
| 1706 |
+
position: fixed;
|
| 1707 |
+
bottom: 1.5rem;
|
| 1708 |
+
right: 1.5rem;
|
| 1709 |
+
z-index: 2000;
|
| 1710 |
+
display: flex;
|
| 1711 |
+
flex-direction: column;
|
| 1712 |
+
gap: 0.5rem;
|
| 1713 |
+
}
|
| 1714 |
+
|
| 1715 |
+
.toast {
|
| 1716 |
+
display: flex;
|
| 1717 |
+
align-items: center;
|
| 1718 |
+
gap: 0.75rem;
|
| 1719 |
+
padding: 0.75rem 1rem;
|
| 1720 |
+
background: var(--bg-dark);
|
| 1721 |
+
border: 1px solid var(--glass-border);
|
| 1722 |
+
border-radius: var(--radius-md);
|
| 1723 |
+
animation: toastSlide 0.3s ease-out;
|
| 1724 |
+
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.4);
|
| 1725 |
+
}
|
| 1726 |
+
|
| 1727 |
+
@keyframes toastSlide {
|
| 1728 |
+
from {
|
| 1729 |
+
opacity: 0;
|
| 1730 |
+
transform: translateX(50px);
|
| 1731 |
+
}
|
| 1732 |
+
|
| 1733 |
+
to {
|
| 1734 |
+
opacity: 1;
|
| 1735 |
+
transform: translateX(0);
|
| 1736 |
+
}
|
| 1737 |
+
}
|
| 1738 |
+
|
| 1739 |
+
.toast-message {
|
| 1740 |
+
font-size: 0.85rem;
|
| 1741 |
+
}
|
| 1742 |
+
|
| 1743 |
+
.toast-close {
|
| 1744 |
+
background: none;
|
| 1745 |
+
border: none;
|
| 1746 |
+
color: var(--text-muted);
|
| 1747 |
+
cursor: pointer;
|
| 1748 |
+
padding: 0.25rem;
|
| 1749 |
+
}
|
| 1750 |
+
|
| 1751 |
+
/* ==================== Utility Classes ==================== */
|
| 1752 |
+
.hidden {
|
| 1753 |
+
display: none !important;
|
| 1754 |
+
}
|
| 1755 |
+
|
| 1756 |
+
.flex {
|
| 1757 |
+
display: flex;
|
| 1758 |
+
}
|
| 1759 |
+
|
| 1760 |
+
.items-center {
|
| 1761 |
+
align-items: center;
|
| 1762 |
+
}
|
| 1763 |
+
|
| 1764 |
+
.gap-2 {
|
| 1765 |
+
gap: 0.5rem;
|
| 1766 |
+
}
|
| 1767 |
+
|
| 1768 |
+
.mt-3 {
|
| 1769 |
+
margin-top: 0.75rem;
|
| 1770 |
+
}
|
| 1771 |
+
|
| 1772 |
+
/* ==================== Document Summary Panel ==================== */
|
| 1773 |
+
.summary-panel {
|
| 1774 |
+
position: relative;
|
| 1775 |
+
background: linear-gradient(135deg, var(--bg-medium), var(--bg-light));
|
| 1776 |
+
border: 1px solid var(--glass-border);
|
| 1777 |
+
border-radius: var(--radius-lg);
|
| 1778 |
+
padding: 1.25rem;
|
| 1779 |
+
margin-bottom: 1rem;
|
| 1780 |
+
animation: summarySlideIn 0.3s ease-out;
|
| 1781 |
+
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
|
| 1782 |
+
}
|
| 1783 |
+
|
| 1784 |
+
@keyframes summarySlideIn {
|
| 1785 |
+
from {
|
| 1786 |
+
opacity: 0;
|
| 1787 |
+
transform: translateY(-10px);
|
| 1788 |
+
}
|
| 1789 |
+
|
| 1790 |
+
to {
|
| 1791 |
+
opacity: 1;
|
| 1792 |
+
transform: translateY(0);
|
| 1793 |
+
}
|
| 1794 |
+
}
|
| 1795 |
+
|
| 1796 |
+
.summary-header {
|
| 1797 |
+
display: flex;
|
| 1798 |
+
align-items: center;
|
| 1799 |
+
gap: 0.5rem;
|
| 1800 |
+
margin-bottom: 0.75rem;
|
| 1801 |
+
}
|
| 1802 |
+
|
| 1803 |
+
.summary-icon {
|
| 1804 |
+
font-size: 1.25rem;
|
| 1805 |
+
}
|
| 1806 |
+
|
| 1807 |
+
.summary-title {
|
| 1808 |
+
font-size: 0.9rem;
|
| 1809 |
+
font-weight: 600;
|
| 1810 |
+
color: var(--text-primary);
|
| 1811 |
+
flex: 1;
|
| 1812 |
+
white-space: nowrap;
|
| 1813 |
+
overflow: hidden;
|
| 1814 |
+
text-overflow: ellipsis;
|
| 1815 |
+
}
|
| 1816 |
+
|
| 1817 |
+
.summary-content {
|
| 1818 |
+
padding-right: 1.5rem;
|
| 1819 |
+
}
|
| 1820 |
+
|
| 1821 |
+
.summary-text {
|
| 1822 |
+
font-size: 0.9rem;
|
| 1823 |
+
line-height: 1.6;
|
| 1824 |
+
color: var(--text-secondary);
|
| 1825 |
+
}
|
| 1826 |
+
|
| 1827 |
+
.summary-close {
|
| 1828 |
+
position: absolute;
|
| 1829 |
+
top: 0.75rem;
|
| 1830 |
+
right: 0.75rem;
|
| 1831 |
+
background: none;
|
| 1832 |
+
border: none;
|
| 1833 |
+
color: var(--text-muted);
|
| 1834 |
+
cursor: pointer;
|
| 1835 |
+
padding: 0.25rem;
|
| 1836 |
+
font-size: 0.9rem;
|
| 1837 |
+
transition: color var(--transition-fast);
|
| 1838 |
+
opacity: 0.6;
|
| 1839 |
+
}
|
| 1840 |
+
|
| 1841 |
+
.summary-close:hover {
|
| 1842 |
+
color: var(--text-primary);
|
| 1843 |
+
opacity: 1;
|
| 1844 |
+
}
|
| 1845 |
+
|
| 1846 |
+
/* ==================== Selected Document State ==================== */
|
| 1847 |
+
.document-item.selected {
|
| 1848 |
+
background: var(--bg-light);
|
| 1849 |
+
border-color: var(--accent-primary);
|
| 1850 |
+
box-shadow: 0 0 0 1px rgba(255, 255, 255, 0.15);
|
| 1851 |
+
}
|
| 1852 |
+
|
| 1853 |
+
.document-item.selected::before {
|
| 1854 |
+
content: '';
|
| 1855 |
+
position: absolute;
|
| 1856 |
+
left: 0;
|
| 1857 |
+
top: 0;
|
| 1858 |
+
bottom: 0;
|
| 1859 |
+
width: 3px;
|
| 1860 |
+
background: var(--accent-primary);
|
| 1861 |
+
border-radius: var(--radius-sm) 0 0 var(--radius-sm);
|
| 1862 |
+
}
|
| 1863 |
+
|
| 1864 |
+
.document-item.selected .doc-name {
|
| 1865 |
+
color: var(--text-primary);
|
| 1866 |
+
font-weight: 600;
|
| 1867 |
+
}
|
| 1868 |
+
|
| 1869 |
+
/* ==================== Mobile Navigation Bar ==================== */
|
| 1870 |
+
.mobile-nav {
|
| 1871 |
+
display: none;
|
| 1872 |
+
position: fixed;
|
| 1873 |
+
bottom: 0;
|
| 1874 |
+
left: 0;
|
| 1875 |
+
right: 0;
|
| 1876 |
+
height: 70px;
|
| 1877 |
+
background: var(--bg-dark);
|
| 1878 |
+
border-top: 1px solid var(--glass-border);
|
| 1879 |
+
z-index: 1000;
|
| 1880 |
+
justify-content: space-around;
|
| 1881 |
+
align-items: center;
|
| 1882 |
+
padding: 0 1rem;
|
| 1883 |
+
padding-bottom: env(safe-area-inset-bottom, 0);
|
| 1884 |
+
box-shadow: 0 -4px 20px rgba(0, 0, 0, 0.3);
|
| 1885 |
+
}
|
| 1886 |
+
|
| 1887 |
+
.mobile-nav-btn {
|
| 1888 |
+
display: flex;
|
| 1889 |
+
flex-direction: column;
|
| 1890 |
+
align-items: center;
|
| 1891 |
+
justify-content: center;
|
| 1892 |
+
gap: 0.25rem;
|
| 1893 |
+
background: transparent;
|
| 1894 |
+
border: none;
|
| 1895 |
+
color: var(--text-muted);
|
| 1896 |
+
padding: 0.5rem 1.5rem;
|
| 1897 |
+
border-radius: var(--radius-md);
|
| 1898 |
+
cursor: pointer;
|
| 1899 |
+
transition: all var(--transition-fast);
|
| 1900 |
+
min-width: 70px;
|
| 1901 |
+
}
|
| 1902 |
+
|
| 1903 |
+
.mobile-nav-btn:active {
|
| 1904 |
+
transform: scale(0.95);
|
| 1905 |
+
}
|
| 1906 |
+
|
| 1907 |
+
.mobile-nav-btn.active {
|
| 1908 |
+
color: var(--accent-primary);
|
| 1909 |
+
}
|
| 1910 |
+
|
| 1911 |
+
.mobile-nav-btn .nav-icon {
|
| 1912 |
+
font-size: 1.5rem;
|
| 1913 |
+
line-height: 1;
|
| 1914 |
+
}
|
| 1915 |
+
|
| 1916 |
+
.mobile-nav-btn .nav-label {
|
| 1917 |
+
font-size: 0.65rem;
|
| 1918 |
+
font-weight: 600;
|
| 1919 |
+
text-transform: uppercase;
|
| 1920 |
+
letter-spacing: 0.5px;
|
| 1921 |
+
}
|
| 1922 |
+
|
| 1923 |
+
/* ==================== Mobile Backdrop ==================== */
|
| 1924 |
+
.mobile-backdrop {
|
| 1925 |
+
display: none;
|
| 1926 |
+
position: fixed;
|
| 1927 |
+
top: 0;
|
| 1928 |
+
left: 0;
|
| 1929 |
+
right: 0;
|
| 1930 |
+
bottom: 0;
|
| 1931 |
+
background: rgba(0, 0, 0, 0.7);
|
| 1932 |
+
z-index: 500;
|
| 1933 |
+
opacity: 0;
|
| 1934 |
+
visibility: hidden;
|
| 1935 |
+
transition: opacity var(--transition-smooth), visibility var(--transition-smooth);
|
| 1936 |
+
}
|
| 1937 |
+
|
| 1938 |
+
.mobile-backdrop.active {
|
| 1939 |
+
opacity: 1;
|
| 1940 |
+
visibility: visible;
|
| 1941 |
+
}
|
| 1942 |
+
|
| 1943 |
+
/* ==================== Tablet Breakpoint (768px - 1024px) ==================== */
|
| 1944 |
+
@media screen and (max-width: 1024px) {
|
| 1945 |
+
:root {
|
| 1946 |
+
--sidebar-width: 260px;
|
| 1947 |
+
}
|
| 1948 |
+
|
| 1949 |
+
.main-content {
|
| 1950 |
+
padding: 0.75rem;
|
| 1951 |
+
gap: 0.75rem;
|
| 1952 |
+
}
|
| 1953 |
+
|
| 1954 |
+
.sidebar-section {
|
| 1955 |
+
padding: 0.75rem;
|
| 1956 |
+
}
|
| 1957 |
+
|
| 1958 |
+
.chat-bucket-filter {
|
| 1959 |
+
padding: 0.6rem 1rem;
|
| 1960 |
+
gap: 0.5rem;
|
| 1961 |
+
}
|
| 1962 |
+
|
| 1963 |
+
.welcome-title {
|
| 1964 |
+
font-size: 1.25rem;
|
| 1965 |
+
}
|
| 1966 |
+
|
| 1967 |
+
.welcome-subtitle {
|
| 1968 |
+
font-size: 0.85rem;
|
| 1969 |
+
}
|
| 1970 |
+
}
|
| 1971 |
+
|
| 1972 |
+
/* ==================== Mobile Breakpoint (< 768px) ==================== */
|
| 1973 |
+
@media screen and (max-width: 768px) {
|
| 1974 |
+
:root {
|
| 1975 |
+
--sidebar-width: 85vw;
|
| 1976 |
+
--sidebar-collapsed: 0px;
|
| 1977 |
+
}
|
| 1978 |
+
|
| 1979 |
+
/* Show mobile navigation */
|
| 1980 |
+
.mobile-nav {
|
| 1981 |
+
display: flex;
|
| 1982 |
+
}
|
| 1983 |
+
|
| 1984 |
+
.mobile-backdrop {
|
| 1985 |
+
display: block;
|
| 1986 |
+
}
|
| 1987 |
+
|
| 1988 |
+
/* Ensure app container is above backdrop */
|
| 1989 |
+
.app-container {
|
| 1990 |
+
z-index: 600;
|
| 1991 |
+
}
|
| 1992 |
+
|
| 1993 |
+
/* Adjust main layout for mobile */
|
| 1994 |
+
.main-content {
|
| 1995 |
+
padding: 0;
|
| 1996 |
+
gap: 0;
|
| 1997 |
+
flex-direction: column;
|
| 1998 |
+
height: 100vh;
|
| 1999 |
+
overflow: hidden;
|
| 2000 |
+
}
|
| 2001 |
+
|
| 2002 |
+
/* ===== Off-Canvas Sidebars ===== */
|
| 2003 |
+
.sidebar {
|
| 2004 |
+
position: fixed;
|
| 2005 |
+
top: 0;
|
| 2006 |
+
bottom: 70px;
|
| 2007 |
+
/* Above mobile nav */
|
| 2008 |
+
width: var(--sidebar-width);
|
| 2009 |
+
max-width: 320px;
|
| 2010 |
+
z-index: 900;
|
| 2011 |
+
transition: transform var(--transition-smooth);
|
| 2012 |
+
border-radius: 0;
|
| 2013 |
+
background: var(--bg-dark);
|
| 2014 |
+
/* Solid background to prevent blur */
|
| 2015 |
+
box-shadow: 0 0 30px rgba(0, 0, 0, 0.5);
|
| 2016 |
+
}
|
| 2017 |
+
|
| 2018 |
+
.sidebar .sidebar-content {
|
| 2019 |
+
opacity: 1;
|
| 2020 |
+
pointer-events: auto;
|
| 2021 |
+
padding: 1rem;
|
| 2022 |
+
padding-bottom: 2rem;
|
| 2023 |
+
}
|
| 2024 |
+
|
| 2025 |
+
.sidebar-left {
|
| 2026 |
+
left: 0;
|
| 2027 |
+
transform: translateX(-100%);
|
| 2028 |
+
border-right: 1px solid var(--glass-border);
|
| 2029 |
+
}
|
| 2030 |
+
|
| 2031 |
+
.sidebar-left.mobile-open {
|
| 2032 |
+
transform: translateX(0);
|
| 2033 |
+
}
|
| 2034 |
+
|
| 2035 |
+
.sidebar-right {
|
| 2036 |
+
right: 0;
|
| 2037 |
+
transform: translateX(100%);
|
| 2038 |
+
border-left: 1px solid var(--glass-border);
|
| 2039 |
+
}
|
| 2040 |
+
|
| 2041 |
+
.sidebar-right.mobile-open {
|
| 2042 |
+
transform: translateX(0);
|
| 2043 |
+
}
|
| 2044 |
+
|
| 2045 |
+
/* Hide desktop sidebar toggles on mobile */
|
| 2046 |
+
.sidebar-toggle {
|
| 2047 |
+
display: none;
|
| 2048 |
+
}
|
| 2049 |
+
|
| 2050 |
+
/* ===== Chat Container Full Width ===== */
|
| 2051 |
+
.chat-container {
|
| 2052 |
+
border-radius: 0;
|
| 2053 |
+
border: none;
|
| 2054 |
+
height: calc(100vh - 70px);
|
| 2055 |
+
/* Full height minus mobile nav */
|
| 2056 |
+
display: flex;
|
| 2057 |
+
flex-direction: column;
|
| 2058 |
+
}
|
| 2059 |
+
|
| 2060 |
+
/* ===== Simplified Chat Header ===== */
|
| 2061 |
+
.chat-bucket-filter {
|
| 2062 |
+
padding: 0.75rem;
|
| 2063 |
+
gap: 0.5rem;
|
| 2064 |
+
flex-wrap: wrap;
|
| 2065 |
+
}
|
| 2066 |
+
|
| 2067 |
+
.filter-label {
|
| 2068 |
+
display: none;
|
| 2069 |
+
}
|
| 2070 |
+
|
| 2071 |
+
.chat-bucket-filter .custom-select.compact {
|
| 2072 |
+
flex: 1;
|
| 2073 |
+
min-width: 120px;
|
| 2074 |
+
}
|
| 2075 |
+
|
| 2076 |
+
.btn-new-chat,
|
| 2077 |
+
.btn-clear-chat {
|
| 2078 |
+
padding: 0.5rem 0.6rem;
|
| 2079 |
+
font-size: 0.7rem;
|
| 2080 |
+
}
|
| 2081 |
+
|
| 2082 |
+
.btn-new-chat {
|
| 2083 |
+
margin-left: 0;
|
| 2084 |
+
}
|
| 2085 |
+
|
| 2086 |
+
/* ===== Chat Messages ===== */
|
| 2087 |
+
.chat-messages {
|
| 2088 |
+
padding: 0.75rem;
|
| 2089 |
+
gap: 0.75rem;
|
| 2090 |
+
flex: 1;
|
| 2091 |
+
min-height: 0;
|
| 2092 |
+
}
|
| 2093 |
+
|
| 2094 |
+
.message {
|
| 2095 |
+
max-width: 92%;
|
| 2096 |
+
}
|
| 2097 |
+
|
| 2098 |
+
.message-content {
|
| 2099 |
+
padding: 0.875rem 1rem;
|
| 2100 |
+
font-size: 0.875rem;
|
| 2101 |
+
}
|
| 2102 |
+
|
| 2103 |
+
.message-avatar {
|
| 2104 |
+
width: 28px;
|
| 2105 |
+
height: 28px;
|
| 2106 |
+
font-size: 0.8rem;
|
| 2107 |
+
}
|
| 2108 |
+
|
| 2109 |
+
/* ===== Chat Input ===== */
|
| 2110 |
+
.chat-input-container {
|
| 2111 |
+
padding: 0.75rem;
|
| 2112 |
+
margin-bottom: 70px;
|
| 2113 |
+
/* Space for mobile nav */
|
| 2114 |
+
background: var(--bg-dark);
|
| 2115 |
+
border-top: 1px solid var(--glass-border);
|
| 2116 |
+
flex-shrink: 0;
|
| 2117 |
+
}
|
| 2118 |
+
|
| 2119 |
+
.chat-input {
|
| 2120 |
+
font-size: 16px;
|
| 2121 |
+
/* Prevents iOS zoom on focus */
|
| 2122 |
+
padding: 0.875rem 1rem;
|
| 2123 |
+
}
|
| 2124 |
+
|
| 2125 |
+
.send-btn {
|
| 2126 |
+
width: 48px;
|
| 2127 |
+
height: 48px;
|
| 2128 |
+
font-size: 1.2rem;
|
| 2129 |
+
}
|
| 2130 |
+
|
| 2131 |
+
/* ===== Welcome Screen ===== */
|
| 2132 |
+
.welcome-screen {
|
| 2133 |
+
padding: 1.5rem 1rem;
|
| 2134 |
+
}
|
| 2135 |
+
|
| 2136 |
+
.welcome-icon img {
|
| 2137 |
+
width: 160px !important;
|
| 2138 |
+
}
|
| 2139 |
+
|
| 2140 |
+
.welcome-title {
|
| 2141 |
+
font-size: 1.2rem;
|
| 2142 |
+
}
|
| 2143 |
+
|
| 2144 |
+
.welcome-subtitle {
|
| 2145 |
+
font-size: 0.85rem;
|
| 2146 |
+
max-width: 300px;
|
| 2147 |
+
}
|
| 2148 |
+
|
| 2149 |
+
.welcome-features {
|
| 2150 |
+
flex-wrap: wrap;
|
| 2151 |
+
justify-content: center;
|
| 2152 |
+
}
|
| 2153 |
+
|
| 2154 |
+
.feature-card {
|
| 2155 |
+
width: 85px;
|
| 2156 |
+
padding: 0.75rem;
|
| 2157 |
+
}
|
| 2158 |
+
|
| 2159 |
+
.feature-icon {
|
| 2160 |
+
font-size: 1.25rem;
|
| 2161 |
+
}
|
| 2162 |
+
|
| 2163 |
+
.feature-title {
|
| 2164 |
+
font-size: 0.7rem;
|
| 2165 |
+
}
|
| 2166 |
+
|
| 2167 |
+
/* ===== Modal Responsiveness ===== */
|
| 2168 |
+
.modal {
|
| 2169 |
+
width: 95%;
|
| 2170 |
+
max-width: none;
|
| 2171 |
+
margin: 1rem;
|
| 2172 |
+
padding: 1.5rem;
|
| 2173 |
+
max-height: 90vh;
|
| 2174 |
+
overflow-y: auto;
|
| 2175 |
+
}
|
| 2176 |
+
|
| 2177 |
+
.modal-title {
|
| 2178 |
+
font-size: 1.1rem;
|
| 2179 |
+
}
|
| 2180 |
+
|
| 2181 |
+
.modal-subtitle {
|
| 2182 |
+
font-size: 0.75rem;
|
| 2183 |
+
}
|
| 2184 |
+
|
| 2185 |
+
.form-input {
|
| 2186 |
+
font-size: 16px;
|
| 2187 |
+
/* Prevents iOS zoom */
|
| 2188 |
+
padding: 0.875rem 1rem;
|
| 2189 |
+
}
|
| 2190 |
+
|
| 2191 |
+
.auth-btn {
|
| 2192 |
+
padding: 1rem;
|
| 2193 |
+
font-size: 0.9rem;
|
| 2194 |
+
}
|
| 2195 |
+
|
| 2196 |
+
.modal-actions {
|
| 2197 |
+
flex-direction: column;
|
| 2198 |
+
}
|
| 2199 |
+
|
| 2200 |
+
/* ===== Document Viewer Modal ===== */
|
| 2201 |
+
.doc-viewer-modal {
|
| 2202 |
+
width: 100%;
|
| 2203 |
+
height: 100%;
|
| 2204 |
+
max-width: 100%;
|
| 2205 |
+
border-radius: 0;
|
| 2206 |
+
}
|
| 2207 |
+
|
| 2208 |
+
.doc-viewer-header {
|
| 2209 |
+
padding: 0.875rem 1rem;
|
| 2210 |
+
}
|
| 2211 |
+
|
| 2212 |
+
.doc-viewer-header h3 {
|
| 2213 |
+
font-size: 0.9rem;
|
| 2214 |
+
}
|
| 2215 |
+
|
| 2216 |
+
/* ===== Sidebar Content Adjustments ===== */
|
| 2217 |
+
.documents-section,
|
| 2218 |
+
.chat-history-section {
|
| 2219 |
+
max-height: none;
|
| 2220 |
+
flex: 0 0 auto;
|
| 2221 |
+
min-height: 0;
|
| 2222 |
+
}
|
| 2223 |
+
|
| 2224 |
+
.section-body {
|
| 2225 |
+
max-height: 80vh;
|
| 2226 |
+
/* Allow large lists to expand fully on mobile */
|
| 2227 |
+
}
|
| 2228 |
+
|
| 2229 |
+
.sidebar-section {
|
| 2230 |
+
padding: 1rem;
|
| 2231 |
+
}
|
| 2232 |
+
|
| 2233 |
+
.sidebar-title {
|
| 2234 |
+
font-size: 0.8rem;
|
| 2235 |
+
}
|
| 2236 |
+
|
| 2237 |
+
.user-section {
|
| 2238 |
+
padding: 1rem !important;
|
| 2239 |
+
}
|
| 2240 |
+
|
| 2241 |
+
.user-avatar {
|
| 2242 |
+
width: 36px;
|
| 2243 |
+
height: 36px;
|
| 2244 |
+
font-size: 0.9rem;
|
| 2245 |
+
}
|
| 2246 |
+
|
| 2247 |
+
.user-details span:first-child {
|
| 2248 |
+
font-size: 0.95rem;
|
| 2249 |
+
}
|
| 2250 |
+
|
| 2251 |
+
.user-role {
|
| 2252 |
+
font-size: 0.75rem;
|
| 2253 |
+
}
|
| 2254 |
+
|
| 2255 |
+
.btn-logout {
|
| 2256 |
+
padding: 0.4rem 0.8rem;
|
| 2257 |
+
font-size: 0.8rem;
|
| 2258 |
+
}
|
| 2259 |
+
|
| 2260 |
+
/* ===== Upload Zone ===== */
|
| 2261 |
+
.upload-zone {
|
| 2262 |
+
padding: 1.25rem;
|
| 2263 |
+
}
|
| 2264 |
+
|
| 2265 |
+
.upload-icon {
|
| 2266 |
+
font-size: 1.75rem;
|
| 2267 |
+
}
|
| 2268 |
+
|
| 2269 |
+
.upload-title {
|
| 2270 |
+
font-size: 0.85rem;
|
| 2271 |
+
}
|
| 2272 |
+
|
| 2273 |
+
/* ===== Document & Chat History Items ===== */
|
| 2274 |
+
.document-item {
|
| 2275 |
+
padding: 0.75rem;
|
| 2276 |
+
}
|
| 2277 |
+
|
| 2278 |
+
.doc-icon {
|
| 2279 |
+
width: 36px;
|
| 2280 |
+
height: 36px;
|
| 2281 |
+
}
|
| 2282 |
+
|
| 2283 |
+
.doc-name {
|
| 2284 |
+
font-size: 0.85rem;
|
| 2285 |
+
}
|
| 2286 |
+
|
| 2287 |
+
.doc-view,
|
| 2288 |
+
.doc-delete {
|
| 2289 |
+
opacity: 1;
|
| 2290 |
+
/* Always visible on mobile */
|
| 2291 |
+
padding: 0.5rem;
|
| 2292 |
+
font-size: 0.9rem;
|
| 2293 |
+
}
|
| 2294 |
+
|
| 2295 |
+
.chat-history-item {
|
| 2296 |
+
padding: 0.75rem;
|
| 2297 |
+
}
|
| 2298 |
+
|
| 2299 |
+
.chat-history-delete {
|
| 2300 |
+
opacity: 1;
|
| 2301 |
+
/* Always visible on mobile */
|
| 2302 |
+
}
|
| 2303 |
+
|
| 2304 |
+
/* ===== Bucket Items ===== */
|
| 2305 |
+
.bucket-item {
|
| 2306 |
+
padding: 0.75rem;
|
| 2307 |
+
}
|
| 2308 |
+
|
| 2309 |
+
.bucket-name {
|
| 2310 |
+
font-size: 0.9rem;
|
| 2311 |
+
}
|
| 2312 |
+
|
| 2313 |
+
.bucket-delete {
|
| 2314 |
+
opacity: 1;
|
| 2315 |
+
/* Always visible on mobile */
|
| 2316 |
+
}
|
| 2317 |
+
|
| 2318 |
+
/* ===== Custom Dropdowns ===== */
|
| 2319 |
+
.select-trigger {
|
| 2320 |
+
padding: 0.75rem 1rem;
|
| 2321 |
+
}
|
| 2322 |
+
|
| 2323 |
+
.select-value {
|
| 2324 |
+
font-size: 0.9rem;
|
| 2325 |
+
}
|
| 2326 |
+
|
| 2327 |
+
.select-option {
|
| 2328 |
+
padding: 0.875rem 1rem;
|
| 2329 |
+
font-size: 0.9rem;
|
| 2330 |
+
}
|
| 2331 |
+
|
| 2332 |
+
/* ===== Toast Notifications ===== */
|
| 2333 |
+
.toast-container {
|
| 2334 |
+
bottom: 80px;
|
| 2335 |
+
/* Above mobile nav */
|
| 2336 |
+
left: 1rem;
|
| 2337 |
+
right: 1rem;
|
| 2338 |
+
}
|
| 2339 |
+
|
| 2340 |
+
.toast {
|
| 2341 |
+
width: 100%;
|
| 2342 |
+
}
|
| 2343 |
+
|
| 2344 |
+
/* ===== Summary Panel ===== */
|
| 2345 |
+
.summary-panel {
|
| 2346 |
+
padding: 1rem;
|
| 2347 |
+
margin-bottom: 0.75rem;
|
| 2348 |
+
}
|
| 2349 |
+
|
| 2350 |
+
.summary-title {
|
| 2351 |
+
font-size: 0.85rem;
|
| 2352 |
+
}
|
| 2353 |
+
|
| 2354 |
+
.summary-text {
|
| 2355 |
+
font-size: 0.85rem;
|
| 2356 |
+
}
|
| 2357 |
+
|
| 2358 |
+
/* ===== Tables in Messages ===== */
|
| 2359 |
+
.message-content .table-wrapper {
|
| 2360 |
+
margin: 1rem -0.5rem;
|
| 2361 |
+
border-radius: var(--radius-md);
|
| 2362 |
+
overflow-x: auto;
|
| 2363 |
+
}
|
| 2364 |
+
|
| 2365 |
+
.message-content table {
|
| 2366 |
+
font-size: 0.8rem;
|
| 2367 |
+
min-width: 400px;
|
| 2368 |
+
}
|
| 2369 |
+
|
| 2370 |
+
.message-content th,
|
| 2371 |
+
.message-content td {
|
| 2372 |
+
padding: 0.6rem 0.75rem;
|
| 2373 |
+
}
|
| 2374 |
+
|
| 2375 |
+
/* ===== Code Blocks ===== */
|
| 2376 |
+
.message-content .code-block {
|
| 2377 |
+
padding: 0.875rem 1rem;
|
| 2378 |
+
font-size: 0.8rem;
|
| 2379 |
+
margin: 0.75rem -0.25rem;
|
| 2380 |
+
}
|
| 2381 |
+
|
| 2382 |
+
/* ===== Lists ===== */
|
| 2383 |
+
.message-content .formatted-list li {
|
| 2384 |
+
padding: 0.5rem 0.5rem 0.5rem 2rem;
|
| 2385 |
+
}
|
| 2386 |
+
}
|
| 2387 |
+
|
| 2388 |
+
/* ==================== Small Mobile (< 480px) ==================== */
|
| 2389 |
+
@media screen and (max-width: 480px) {
|
| 2390 |
+
.mobile-nav {
|
| 2391 |
+
height: 65px;
|
| 2392 |
+
padding: 0 0.5rem;
|
| 2393 |
+
}
|
| 2394 |
+
|
| 2395 |
+
.mobile-nav-btn {
|
| 2396 |
+
min-width: 60px;
|
| 2397 |
+
padding: 0.4rem 1rem;
|
| 2398 |
+
}
|
| 2399 |
+
|
| 2400 |
+
.mobile-nav-btn .nav-icon {
|
| 2401 |
+
font-size: 1.35rem;
|
| 2402 |
+
}
|
| 2403 |
+
|
| 2404 |
+
.mobile-nav-btn .nav-label {
|
| 2405 |
+
font-size: 0.6rem;
|
| 2406 |
+
}
|
| 2407 |
+
|
| 2408 |
+
.sidebar {
|
| 2409 |
+
max-width: 100%;
|
| 2410 |
+
width: 100%;
|
| 2411 |
+
}
|
| 2412 |
+
|
| 2413 |
+
.chat-bucket-filter {
|
| 2414 |
+
padding: 0.6rem;
|
| 2415 |
+
}
|
| 2416 |
+
|
| 2417 |
+
.message {
|
| 2418 |
+
max-width: 95%;
|
| 2419 |
+
}
|
| 2420 |
+
|
| 2421 |
+
.message-content {
|
| 2422 |
+
padding: 0.75rem 0.875rem;
|
| 2423 |
+
font-size: 0.85rem;
|
| 2424 |
+
}
|
| 2425 |
+
|
| 2426 |
+
.message-avatar {
|
| 2427 |
+
width: 26px;
|
| 2428 |
+
height: 26px;
|
| 2429 |
+
}
|
| 2430 |
+
|
| 2431 |
+
.welcome-icon img {
|
| 2432 |
+
width: 140px !important;
|
| 2433 |
+
}
|
| 2434 |
+
|
| 2435 |
+
.welcome-title {
|
| 2436 |
+
font-size: 1.1rem;
|
| 2437 |
+
}
|
| 2438 |
+
|
| 2439 |
+
.welcome-subtitle {
|
| 2440 |
+
font-size: 0.8rem;
|
| 2441 |
+
}
|
| 2442 |
+
|
| 2443 |
+
.modal {
|
| 2444 |
+
padding: 1.25rem;
|
| 2445 |
+
margin: 0.5rem;
|
| 2446 |
+
}
|
| 2447 |
+
|
| 2448 |
+
.role-tabs,
|
| 2449 |
+
.auth-tabs {
|
| 2450 |
+
padding: 3px;
|
| 2451 |
+
}
|
| 2452 |
+
|
| 2453 |
+
.role-tab,
|
| 2454 |
+
.auth-tab {
|
| 2455 |
+
padding: 0.6rem 0.5rem;
|
| 2456 |
+
font-size: 0.8rem;
|
| 2457 |
+
}
|
| 2458 |
+
|
| 2459 |
+
.form-label {
|
| 2460 |
+
font-size: 0.75rem;
|
| 2461 |
+
}
|
| 2462 |
+
}
|
| 2463 |
+
|
| 2464 |
+
/* ==================== Landscape Mobile ==================== */
|
| 2465 |
+
@media screen and (max-width: 768px) and (orientation: landscape) {
|
| 2466 |
+
.mobile-nav {
|
| 2467 |
+
height: 55px;
|
| 2468 |
+
}
|
| 2469 |
+
|
| 2470 |
+
.main-content {
|
| 2471 |
+
padding-bottom: 55px;
|
| 2472 |
+
}
|
| 2473 |
+
|
| 2474 |
+
.sidebar {
|
| 2475 |
+
bottom: 55px;
|
| 2476 |
+
}
|
| 2477 |
+
|
| 2478 |
+
.mobile-nav-btn .nav-label {
|
| 2479 |
+
display: none;
|
| 2480 |
+
}
|
| 2481 |
+
|
| 2482 |
+
.mobile-nav-btn .nav-icon {
|
| 2483 |
+
font-size: 1.5rem;
|
| 2484 |
+
}
|
| 2485 |
+
|
| 2486 |
+
.welcome-screen {
|
| 2487 |
+
padding: 1rem;
|
| 2488 |
+
flex-direction: row;
|
| 2489 |
+
gap: 2rem;
|
| 2490 |
+
}
|
| 2491 |
+
|
| 2492 |
+
.welcome-icon,
|
| 2493 |
+
.welcome-title,
|
| 2494 |
+
.welcome-subtitle {
|
| 2495 |
+
margin: 0;
|
| 2496 |
+
}
|
| 2497 |
+
}
|
| 2498 |
+
|
| 2499 |
+
/* ==================== Touch Device Optimizations ==================== */
|
| 2500 |
+
@media (hover: none) and (pointer: coarse) {
|
| 2501 |
+
|
| 2502 |
+
/* Larger touch targets */
|
| 2503 |
+
.btn {
|
| 2504 |
+
min-height: 44px;
|
| 2505 |
+
}
|
| 2506 |
+
|
| 2507 |
+
.btn-ghost {
|
| 2508 |
+
min-width: 44px;
|
| 2509 |
+
min-height: 44px;
|
| 2510 |
+
}
|
| 2511 |
+
|
| 2512 |
+
.document-item,
|
| 2513 |
+
.bucket-item,
|
| 2514 |
+
.chat-history-item {
|
| 2515 |
+
min-height: 48px;
|
| 2516 |
+
}
|
| 2517 |
+
|
| 2518 |
+
/* Remove hover effects that don't work on touch */
|
| 2519 |
+
.upload-zone:hover {
|
| 2520 |
+
transform: none;
|
| 2521 |
+
}
|
| 2522 |
+
|
| 2523 |
+
.feature-card:hover {
|
| 2524 |
+
transform: none;
|
| 2525 |
+
}
|
| 2526 |
+
|
| 2527 |
+
/* Always show action buttons */
|
| 2528 |
+
.doc-view,
|
| 2529 |
+
.doc-delete,
|
| 2530 |
+
.bucket-delete,
|
| 2531 |
+
.chat-history-delete {
|
| 2532 |
+
opacity: 1;
|
| 2533 |
+
}
|
| 2534 |
+
}
|
| 2535 |
+
|
| 2536 |
+
/* ==================== Safe Area Support (iPhone X+) ==================== */
|
| 2537 |
+
@supports (padding-bottom: env(safe-area-inset-bottom)) {
|
| 2538 |
+
.mobile-nav {
|
| 2539 |
+
padding-bottom: env(safe-area-inset-bottom);
|
| 2540 |
+
height: calc(70px + env(safe-area-inset-bottom));
|
| 2541 |
+
}
|
| 2542 |
+
|
| 2543 |
+
@media screen and (max-width: 768px) {
|
| 2544 |
+
.main-content {
|
| 2545 |
+
padding-bottom: calc(70px + env(safe-area-inset-bottom));
|
| 2546 |
+
}
|
| 2547 |
+
|
| 2548 |
+
.sidebar {
|
| 2549 |
+
bottom: calc(70px + env(safe-area-inset-bottom));
|
| 2550 |
+
}
|
| 2551 |
+
}
|
| 2552 |
+
}
|
| 2553 |
+
|
| 2554 |
+
/* ==================== Reduced Motion ==================== */
|
| 2555 |
+
@media (prefers-reduced-motion: reduce) {
|
| 2556 |
+
|
| 2557 |
+
.sidebar,
|
| 2558 |
+
.mobile-backdrop,
|
| 2559 |
+
.modal,
|
| 2560 |
+
.message {
|
| 2561 |
+
transition: none;
|
| 2562 |
+
}
|
| 2563 |
+
|
| 2564 |
+
.typing-dot {
|
| 2565 |
+
animation: none;
|
| 2566 |
+
}
|
| 2567 |
+
}
|
static/images/WhatsApp Image 2025-12-23 at 5.10.00 PM.jpeg
ADDED
|
static/index.html
ADDED
|
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<meta name="description" content="AI-powered document intelligence platform with bucket organization.">
|
| 8 |
+
<title>Iribl AI - Document Intelligence</title>
|
| 9 |
+
|
| 10 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 11 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 12 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap" rel="stylesheet">
|
| 13 |
+
<link rel="stylesheet" href="/css/styles.css">
|
| 14 |
+
<link rel="icon" type="image/svg+xml"
|
| 15 |
+
href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🧠</text></svg>">
|
| 16 |
+
</head>
|
| 17 |
+
|
| 18 |
+
<body>
|
| 19 |
+
<!-- Toast Container -->
|
| 20 |
+
<div class="toast-container" id="toastContainer"></div>
|
| 21 |
+
|
| 22 |
+
<!-- Mobile Backdrop Overlay -->
|
| 23 |
+
<div class="mobile-backdrop" id="mobileBackdrop"></div>
|
| 24 |
+
|
| 25 |
+
<!-- Mobile Bottom Navigation -->
|
| 26 |
+
<nav class="mobile-nav" id="mobileNav">
|
| 27 |
+
<button class="mobile-nav-btn" id="mobileLeftToggle" title="Menu">
|
| 28 |
+
<span class="nav-icon">☰</span>
|
| 29 |
+
<span class="nav-label">Menu</span>
|
| 30 |
+
</button>
|
| 31 |
+
<button class="mobile-nav-btn active" id="mobileChatToggle" title="Chat">
|
| 32 |
+
<span class="nav-icon">💬</span>
|
| 33 |
+
<span class="nav-label">Chat</span>
|
| 34 |
+
</button>
|
| 35 |
+
<button class="mobile-nav-btn" id="mobileRightToggle" title="Documents">
|
| 36 |
+
<span class="nav-icon">📚</span>
|
| 37 |
+
<span class="nav-label">Docs</span>
|
| 38 |
+
</button>
|
| 39 |
+
</nav>
|
| 40 |
+
|
| 41 |
+
<!-- Auth Modal -->
|
| 42 |
+
<div class="modal-overlay" id="authModal">
|
| 43 |
+
<div class="modal glass-panel">
|
| 44 |
+
<div class="modal-header">
|
| 45 |
+
<div class="modal-logo">🧠</div>
|
| 46 |
+
<h2 class="modal-title">Welcome to Iribl AI</h2>
|
| 47 |
+
<p class="modal-subtitle">Your intelligent document companion</p>
|
| 48 |
+
</div>
|
| 49 |
+
|
| 50 |
+
<div class="role-tabs">
|
| 51 |
+
<button class="role-tab active" data-role="admin">👔 Admin</button>
|
| 52 |
+
<button class="role-tab" data-role="employee">Employee</button>
|
| 53 |
+
</div>
|
| 54 |
+
|
| 55 |
+
<div class="auth-tabs" id="authTabs">
|
| 56 |
+
<button class="auth-tab active" data-tab="login">Sign In</button>
|
| 57 |
+
<button class="auth-tab" data-tab="register">Sign Up</button>
|
| 58 |
+
</div>
|
| 59 |
+
|
| 60 |
+
<form id="loginForm" class="auth-form">
|
| 61 |
+
<div class="form-group">
|
| 62 |
+
<label class="form-label">Username</label>
|
| 63 |
+
<input type="text" class="form-input" name="username" placeholder="Enter your username" required>
|
| 64 |
+
</div>
|
| 65 |
+
<div class="form-group">
|
| 66 |
+
<label class="form-label">Password</label>
|
| 67 |
+
<input type="password" class="form-input" name="password" placeholder="Enter your password"
|
| 68 |
+
required>
|
| 69 |
+
</div>
|
| 70 |
+
<div id="loginError" class="form-error hidden"></div>
|
| 71 |
+
<button type="submit" class="btn btn-primary auth-btn">
|
| 72 |
+
<span class="btn-text">Sign In</span>
|
| 73 |
+
<span class="btn-loader hidden">
|
| 74 |
+
<div class="loading-spinner"></div>
|
| 75 |
+
</span>
|
| 76 |
+
</button>
|
| 77 |
+
</form>
|
| 78 |
+
|
| 79 |
+
<form id="registerForm" class="auth-form hidden">
|
| 80 |
+
<div class="form-group">
|
| 81 |
+
<label class="form-label">Username</label>
|
| 82 |
+
<input type="text" class="form-input" name="username" placeholder="Choose a username" required
|
| 83 |
+
minlength="3">
|
| 84 |
+
</div>
|
| 85 |
+
<div class="form-group">
|
| 86 |
+
<label class="form-label">Email (optional)</label>
|
| 87 |
+
<input type="email" class="form-input" name="email" placeholder="your@email.com">
|
| 88 |
+
</div>
|
| 89 |
+
<div class="form-group">
|
| 90 |
+
<label class="form-label">Password</label>
|
| 91 |
+
<input type="password" class="form-input" name="password" placeholder="Create a password" required
|
| 92 |
+
minlength="6">
|
| 93 |
+
</div>
|
| 94 |
+
<div id="registerError" class="form-error hidden"></div>
|
| 95 |
+
<button type="submit" class="btn btn-primary auth-btn">
|
| 96 |
+
<span class="btn-text">Create Admin Account</span>
|
| 97 |
+
<span class="btn-loader hidden">
|
| 98 |
+
<div class="loading-spinner"></div>
|
| 99 |
+
</span>
|
| 100 |
+
</button>
|
| 101 |
+
</form>
|
| 102 |
+
|
| 103 |
+
<form id="employeeLoginForm" class="auth-form hidden">
|
| 104 |
+
<div class="form-group">
|
| 105 |
+
<label class="form-label">Email</label>
|
| 106 |
+
<input type="email" class="form-input" name="email" placeholder="Enter your work email" required>
|
| 107 |
+
</div>
|
| 108 |
+
<div class="form-group">
|
| 109 |
+
<label class="form-label">Password</label>
|
| 110 |
+
<input type="password" class="form-input" name="password" placeholder="Enter your password"
|
| 111 |
+
required>
|
| 112 |
+
</div>
|
| 113 |
+
<div id="employeeLoginError" class="form-error hidden"></div>
|
| 114 |
+
<button type="submit" class="btn btn-primary auth-btn">
|
| 115 |
+
<span class="btn-text">Sign In as Employee</span>
|
| 116 |
+
<span class="btn-loader hidden">
|
| 117 |
+
<div class="loading-spinner"></div>
|
| 118 |
+
</span>
|
| 119 |
+
</button>
|
| 120 |
+
</form>
|
| 121 |
+
</div>
|
| 122 |
+
</div>
|
| 123 |
+
|
| 124 |
+
<!-- Add Employee Modal -->
|
| 125 |
+
<div class="modal-overlay" id="addEmployeeModal">
|
| 126 |
+
<div class="modal glass-panel">
|
| 127 |
+
<div class="modal-header">
|
| 128 |
+
<h2 class="modal-title">➕ Add Employee</h2>
|
| 129 |
+
<p class="modal-subtitle">Create login credentials for a new employee</p>
|
| 130 |
+
</div>
|
| 131 |
+
<form id="addEmployeeForm" class="auth-form">
|
| 132 |
+
<div class="form-group">
|
| 133 |
+
<label class="form-label">Employee Email</label>
|
| 134 |
+
<input type="email" class="form-input" name="email" placeholder="employee@company.com" required>
|
| 135 |
+
</div>
|
| 136 |
+
<div class="form-group">
|
| 137 |
+
<label class="form-label">Password</label>
|
| 138 |
+
<input type="password" class="form-input" name="password" placeholder="Create a password" required
|
| 139 |
+
minlength="6">
|
| 140 |
+
</div>
|
| 141 |
+
<div id="addEmployeeError" class="form-error hidden"></div>
|
| 142 |
+
<div class="modal-actions">
|
| 143 |
+
<button type="button" class="btn btn-secondary" id="cancelAddEmployee">Cancel</button>
|
| 144 |
+
<button type="submit" class="btn btn-primary">
|
| 145 |
+
<span class="btn-text">Add Employee</span>
|
| 146 |
+
<span class="btn-loader hidden">
|
| 147 |
+
<div class="loading-spinner"></div>
|
| 148 |
+
</span>
|
| 149 |
+
</button>
|
| 150 |
+
</div>
|
| 151 |
+
</form>
|
| 152 |
+
</div>
|
| 153 |
+
</div>
|
| 154 |
+
|
| 155 |
+
<!-- Create Bucket Modal -->
|
| 156 |
+
<div class="modal-overlay" id="createBucketModal">
|
| 157 |
+
<div class="modal glass-panel">
|
| 158 |
+
<div class="modal-header">
|
| 159 |
+
<h2 class="modal-title">📁 Create Bucket</h2>
|
| 160 |
+
<p class="modal-subtitle">Organize your documents into buckets</p>
|
| 161 |
+
</div>
|
| 162 |
+
<form id="createBucketForm" class="auth-form">
|
| 163 |
+
<div class="form-group">
|
| 164 |
+
<label class="form-label">Bucket Name</label>
|
| 165 |
+
<input type="text" class="form-input" name="name" placeholder="e.g., Project Alpha" required>
|
| 166 |
+
</div>
|
| 167 |
+
<div class="form-group">
|
| 168 |
+
<label class="form-label">Description (optional)</label>
|
| 169 |
+
<input type="text" class="form-input" name="description" placeholder="Brief description...">
|
| 170 |
+
</div>
|
| 171 |
+
<div id="createBucketError" class="form-error hidden"></div>
|
| 172 |
+
<div class="modal-actions">
|
| 173 |
+
<button type="button" class="btn btn-secondary" id="cancelCreateBucket">Cancel</button>
|
| 174 |
+
<button type="submit" class="btn btn-primary">
|
| 175 |
+
<span class="btn-text">Create Bucket</span>
|
| 176 |
+
<span class="btn-loader hidden">
|
| 177 |
+
<div class="loading-spinner"></div>
|
| 178 |
+
</span>
|
| 179 |
+
</button>
|
| 180 |
+
</div>
|
| 181 |
+
</form>
|
| 182 |
+
</div>
|
| 183 |
+
</div>
|
| 184 |
+
|
| 185 |
+
<!-- Document Viewer Modal -->
|
| 186 |
+
<div class="modal-overlay" id="docViewerModal">
|
| 187 |
+
<div class="modal glass-panel doc-viewer-modal">
|
| 188 |
+
<div class="doc-viewer-header">
|
| 189 |
+
<h3 id="docViewerTitle">Document</h3>
|
| 190 |
+
<button class="btn btn-ghost" id="closeDocViewer">✕</button>
|
| 191 |
+
</div>
|
| 192 |
+
<div class="doc-viewer-content" id="docViewerContent">
|
| 193 |
+
<div class="loading-spinner"></div>
|
| 194 |
+
</div>
|
| 195 |
+
</div>
|
| 196 |
+
</div>
|
| 197 |
+
|
| 198 |
+
<!-- Main App Container -->
|
| 199 |
+
<div class="app-container" id="appContainer">
|
| 200 |
+
<main class="main-content">
|
| 201 |
+
<!-- LEFT SIDEBAR -->
|
| 202 |
+
<aside class="sidebar sidebar-left" id="leftSidebar">
|
| 203 |
+
<div class="sidebar-toggle" id="leftToggle" title="Toggle sidebar">
|
| 204 |
+
<span class="toggle-icon">◀</span>
|
| 205 |
+
</div>
|
| 206 |
+
|
| 207 |
+
<div class="sidebar-content">
|
| 208 |
+
<!-- User Info -->
|
| 209 |
+
<section class="sidebar-section glass-panel user-section">
|
| 210 |
+
<div class="user-info-row">
|
| 211 |
+
<div class="user-badge">
|
| 212 |
+
<div class="user-avatar" id="userAvatar">U</div>
|
| 213 |
+
<div class="user-details">
|
| 214 |
+
<span id="userName">User</span>
|
| 215 |
+
<span class="user-role" id="userRole">Admin</span>
|
| 216 |
+
</div>
|
| 217 |
+
</div>
|
| 218 |
+
<button class="btn btn-logout" id="logoutBtn" title="Sign Out">logout</button>
|
| 219 |
+
</div>
|
| 220 |
+
</section>
|
| 221 |
+
|
| 222 |
+
<!-- Admin: Employees -->
|
| 223 |
+
<section class="sidebar-section glass-panel collapsible hidden" id="adminSection">
|
| 224 |
+
<div class="section-header" data-target="employeesList">
|
| 225 |
+
<h3 class="sidebar-title"><span></span> Employees</h3>
|
| 226 |
+
<div class="section-actions">
|
| 227 |
+
<button class="btn btn-ghost" id="addEmployeeBtn" title="Add">➕</button>
|
| 228 |
+
<span class="collapse-icon">▼</span>
|
| 229 |
+
</div>
|
| 230 |
+
</div>
|
| 231 |
+
<div class="section-body" id="employeesList">
|
| 232 |
+
<div class="empty-state small">
|
| 233 |
+
<div class="empty-text">No employees</div>
|
| 234 |
+
</div>
|
| 235 |
+
</div>
|
| 236 |
+
</section>
|
| 237 |
+
|
| 238 |
+
<!-- Buckets -->
|
| 239 |
+
<section class="sidebar-section glass-panel collapsible">
|
| 240 |
+
<div class="section-header" data-target="bucketsBody">
|
| 241 |
+
<h3 class="sidebar-title"><span>📁</span> Buckets</h3>
|
| 242 |
+
<div class="section-actions">
|
| 243 |
+
<button class="btn btn-ghost" id="createBucketBtn" title="Create">➕</button>
|
| 244 |
+
<span class="collapse-icon">▼</span>
|
| 245 |
+
</div>
|
| 246 |
+
</div>
|
| 247 |
+
<div class="section-body" id="bucketsBody">
|
| 248 |
+
<div class="buckets-list" id="bucketsList">
|
| 249 |
+
<div class="bucket-item active" data-id="">
|
| 250 |
+
<span class="bucket-name">📂 All Documents</span>
|
| 251 |
+
</div>
|
| 252 |
+
</div>
|
| 253 |
+
</div>
|
| 254 |
+
</section>
|
| 255 |
+
|
| 256 |
+
<!-- Upload -->
|
| 257 |
+
<section class="sidebar-section glass-panel collapsible">
|
| 258 |
+
<div class="section-header" data-target="uploadBody">
|
| 259 |
+
<h3 class="sidebar-title"><span></span> Upload</h3>
|
| 260 |
+
<span class="collapse-icon">▼</span>
|
| 261 |
+
</div>
|
| 262 |
+
<div class="section-body" id="uploadBody">
|
| 263 |
+
<div class="custom-select" id="uploadBucketWrapper">
|
| 264 |
+
<div class="select-trigger" id="uploadBucketTrigger">
|
| 265 |
+
<span class="select-value">No Bucket (General)</span>
|
| 266 |
+
<span class="select-arrow">▼</span>
|
| 267 |
+
</div>
|
| 268 |
+
<div class="select-options" id="uploadBucketOptions"></div>
|
| 269 |
+
<input type="hidden" id="uploadBucketSelect" value="">
|
| 270 |
+
</div>
|
| 271 |
+
<div class="upload-zone" id="uploadZone">
|
| 272 |
+
<input type="file" id="fileInput" hidden multiple
|
| 273 |
+
accept=".pdf,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md,.png,.jpg,.jpeg,.gif,.webp">
|
| 274 |
+
<div class="upload-icon">📁</div>
|
| 275 |
+
<div class="upload-title">Drop files here</div>
|
| 276 |
+
<div class="upload-subtitle">or click to browse</div>
|
| 277 |
+
</div>
|
| 278 |
+
<div id="uploadProgress" class="hidden">
|
| 279 |
+
<div class="progress-info">
|
| 280 |
+
<div class="loading-spinner"></div>
|
| 281 |
+
<span id="uploadStatus">Uploading...</span>
|
| 282 |
+
</div>
|
| 283 |
+
<div class="progress-bar">
|
| 284 |
+
<div class="progress-fill" id="progressFill"></div>
|
| 285 |
+
</div>
|
| 286 |
+
<button class="btn btn-cancel-upload" id="cancelUploadBtn" title="Cancel Upload">✕
|
| 287 |
+
Cancel</button>
|
| 288 |
+
</div>
|
| 289 |
+
</div>
|
| 290 |
+
</section>
|
| 291 |
+
</div>
|
| 292 |
+
</aside>
|
| 293 |
+
|
| 294 |
+
<!-- CHAT AREA (CENTER) -->
|
| 295 |
+
<section class="chat-container glass-panel">
|
| 296 |
+
<!-- Bucket Filter -->
|
| 297 |
+
<div class="chat-bucket-filter">
|
| 298 |
+
<span class="filter-label">🔍 Querying:</span>
|
| 299 |
+
<div class="custom-select compact" id="chatBucketWrapper">
|
| 300 |
+
<div class="select-trigger" id="chatBucketTrigger">
|
| 301 |
+
<span class="select-value">All Documents</span>
|
| 302 |
+
<span class="select-arrow">▼</span>
|
| 303 |
+
</div>
|
| 304 |
+
<div class="select-options" id="chatBucketOptions"></div>
|
| 305 |
+
<input type="hidden" id="chatBucketSelect" value="">
|
| 306 |
+
</div>
|
| 307 |
+
<button class="btn btn-new-chat" id="newChatBtn" title="Start New Chat">➕ New Chat</button>
|
| 308 |
+
<button class="btn btn-clear-chat" id="clearChatBtnTop" title="Clear Current Chat">Clear</button>
|
| 309 |
+
</div>
|
| 310 |
+
|
| 311 |
+
<!-- Messages -->
|
| 312 |
+
<div class="chat-messages" id="chatMessages">
|
| 313 |
+
<!-- Document Summary Panel -->
|
| 314 |
+
<div class="summary-panel hidden" id="summaryPanel">
|
| 315 |
+
<div class="summary-header">
|
| 316 |
+
<span class="summary-icon">📄</span>
|
| 317 |
+
<span class="summary-title" id="summaryTitle">Document Summary</span>
|
| 318 |
+
</div>
|
| 319 |
+
<div class="summary-content" id="summaryContent">
|
| 320 |
+
<div class="summary-text" id="summaryText"></div>
|
| 321 |
+
</div>
|
| 322 |
+
<button class="summary-close" id="summaryClose" title="Close summary">✕</button>
|
| 323 |
+
</div>
|
| 324 |
+
|
| 325 |
+
<div class="welcome-screen" id="welcomeScreen">
|
| 326 |
+
<div class="welcome-icon"><img src="/images/WhatsApp Image 2025-12-23 at 5.10.00 PM.jpeg"
|
| 327 |
+
alt="Logo"
|
| 328 |
+
style="width: 220px; height: auto; filter: invert(1); mix-blend-mode: lighten;"></div>
|
| 329 |
+
<h2 class="welcome-title">Welcome to Iribl AI</h2>
|
| 330 |
+
<p class="welcome-subtitle">
|
| 331 |
+
Upload documents, organize into buckets, and ask questions.
|
| 332 |
+
</p>
|
| 333 |
+
</div>
|
| 334 |
+
</div>
|
| 335 |
+
|
| 336 |
+
<!-- Typing Indicator -->
|
| 337 |
+
<div class="typing-indicator hidden" id="typingIndicator">
|
| 338 |
+
<div class="message-avatar">🧠</div>
|
| 339 |
+
<div class="typing-dots">
|
| 340 |
+
<div class="typing-dot"></div>
|
| 341 |
+
<div class="typing-dot"></div>
|
| 342 |
+
<div class="typing-dot"></div>
|
| 343 |
+
</div>
|
| 344 |
+
</div>
|
| 345 |
+
|
| 346 |
+
<!-- Chat Input -->
|
| 347 |
+
<div class="chat-input-container">
|
| 348 |
+
<div class="chat-input-wrapper">
|
| 349 |
+
<textarea class="chat-input" id="chatInput" placeholder="Ask anything about your documents..."
|
| 350 |
+
rows="1"></textarea>
|
| 351 |
+
<button class="send-btn" id="sendBtn" disabled title="Send">➤</button>
|
| 352 |
+
<button class="stop-btn hidden" id="stopBtn" title="Stop generating">■</button>
|
| 353 |
+
</div>
|
| 354 |
+
</div>
|
| 355 |
+
</section>
|
| 356 |
+
|
| 357 |
+
<!-- RIGHT SIDEBAR -->
|
| 358 |
+
<aside class="sidebar sidebar-right" id="rightSidebar">
|
| 359 |
+
<div class="sidebar-toggle" id="rightToggle" title="Toggle sidebar">
|
| 360 |
+
<span class="toggle-icon">▶</span>
|
| 361 |
+
</div>
|
| 362 |
+
|
| 363 |
+
<div class="sidebar-content">
|
| 364 |
+
<!-- Documents -->
|
| 365 |
+
<section class="sidebar-section glass-panel documents-section collapsible">
|
| 366 |
+
<div class="section-header" data-target="documentsBody">
|
| 367 |
+
<h3 class="sidebar-title">
|
| 368 |
+
<span>📚</span> Documents
|
| 369 |
+
<span id="docCount" class="doc-count">(0)</span>
|
| 370 |
+
</h3>
|
| 371 |
+
<span class="collapse-icon">▼</span>
|
| 372 |
+
</div>
|
| 373 |
+
<div class="section-body documents-body" id="documentsBody">
|
| 374 |
+
<div class="documents-list" id="documentsList">
|
| 375 |
+
<div class="empty-state">
|
| 376 |
+
<div class="empty-icon">📭</div>
|
| 377 |
+
<div class="empty-text">No documents yet</div>
|
| 378 |
+
</div>
|
| 379 |
+
</div>
|
| 380 |
+
</div>
|
| 381 |
+
</section>
|
| 382 |
+
|
| 383 |
+
<!-- Chat History -->
|
| 384 |
+
<section class="sidebar-section glass-panel chat-history-section collapsible">
|
| 385 |
+
<div class="section-header" data-target="chatHistoryBody">
|
| 386 |
+
<h3 class="sidebar-title">
|
| 387 |
+
<span>💬</span> Chat History
|
| 388 |
+
<span id="chatHistoryCount" class="doc-count">(0)</span>
|
| 389 |
+
</h3>
|
| 390 |
+
<div class="section-actions">
|
| 391 |
+
<button class="btn btn-ghost" id="clearChatBtn" title="Clear current chat">🗑️</button>
|
| 392 |
+
<span class="collapse-icon">▼</span>
|
| 393 |
+
</div>
|
| 394 |
+
</div>
|
| 395 |
+
<div class="section-body chat-history-body" id="chatHistoryBody">
|
| 396 |
+
<div class="chat-history-list" id="chatHistoryList">
|
| 397 |
+
<div class="empty-state small">
|
| 398 |
+
<div class="empty-text">No chats yet</div>
|
| 399 |
+
</div>
|
| 400 |
+
</div>
|
| 401 |
+
</div>
|
| 402 |
+
</section>
|
| 403 |
+
</div>
|
| 404 |
+
</aside>
|
| 405 |
+
</main>
|
| 406 |
+
</div>
|
| 407 |
+
|
| 408 |
+
<script src="/js/app.js"></script>
|
| 409 |
+
</body>
|
| 410 |
+
|
| 411 |
+
</html>
|
static/js/app.js
ADDED
|
@@ -0,0 +1,1798 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Iribl AI - Document Intelligence Application
|
| 3 |
+
* With Dual Sidebars, Collapsible Sections, and Animated Dropdowns
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
// ==================== App State ====================
|
| 7 |
+
const state = {
|
| 8 |
+
token: localStorage.getItem('Iribl AI_token'),
|
| 9 |
+
user: JSON.parse(localStorage.getItem('Iribl AI_user') || 'null'),
|
| 10 |
+
documents: [],
|
| 11 |
+
buckets: [],
|
| 12 |
+
employees: [],
|
| 13 |
+
messages: [],
|
| 14 |
+
summaries: {}, // doc_id -> summary text cache
|
| 15 |
+
selectedDocument: null, // Currently selected document for summary display
|
| 16 |
+
selectedBucket: '',
|
| 17 |
+
chatBucket: '',
|
| 18 |
+
isLoading: false,
|
| 19 |
+
currentRole: 'admin',
|
| 20 |
+
// Chat History
|
| 21 |
+
chatHistory: JSON.parse(localStorage.getItem('Iribl AI_chat_history') || '[]'),
|
| 22 |
+
currentChatId: null,
|
| 23 |
+
// Upload cancellation
|
| 24 |
+
uploadCancelled: false,
|
| 25 |
+
currentUploadAbortController: null,
|
| 26 |
+
// Stream abort controller for stopping generation
|
| 27 |
+
streamAbortController: null
|
| 28 |
+
};
|
| 29 |
+
|
| 30 |
+
// ==================== DOM Elements ====================
|
| 31 |
+
const elements = {
|
| 32 |
+
// Auth
|
| 33 |
+
authModal: document.getElementById('authModal'),
|
| 34 |
+
loginForm: document.getElementById('loginForm'),
|
| 35 |
+
registerForm: document.getElementById('registerForm'),
|
| 36 |
+
employeeLoginForm: document.getElementById('employeeLoginForm'),
|
| 37 |
+
authTabs: document.getElementById('authTabs'),
|
| 38 |
+
loginError: document.getElementById('loginError'),
|
| 39 |
+
registerError: document.getElementById('registerError'),
|
| 40 |
+
employeeLoginError: document.getElementById('employeeLoginError'),
|
| 41 |
+
|
| 42 |
+
// Modals
|
| 43 |
+
addEmployeeModal: document.getElementById('addEmployeeModal'),
|
| 44 |
+
addEmployeeForm: document.getElementById('addEmployeeForm'),
|
| 45 |
+
addEmployeeError: document.getElementById('addEmployeeError'),
|
| 46 |
+
addEmployeeBtn: document.getElementById('addEmployeeBtn'),
|
| 47 |
+
cancelAddEmployee: document.getElementById('cancelAddEmployee'),
|
| 48 |
+
createBucketModal: document.getElementById('createBucketModal'),
|
| 49 |
+
createBucketForm: document.getElementById('createBucketForm'),
|
| 50 |
+
createBucketError: document.getElementById('createBucketError'),
|
| 51 |
+
createBucketBtn: document.getElementById('createBucketBtn'),
|
| 52 |
+
cancelCreateBucket: document.getElementById('cancelCreateBucket'),
|
| 53 |
+
docViewerModal: document.getElementById('docViewerModal'),
|
| 54 |
+
docViewerTitle: document.getElementById('docViewerTitle'),
|
| 55 |
+
docViewerContent: document.getElementById('docViewerContent'),
|
| 56 |
+
closeDocViewer: document.getElementById('closeDocViewer'),
|
| 57 |
+
|
| 58 |
+
// Sidebars
|
| 59 |
+
leftSidebar: document.getElementById('leftSidebar'),
|
| 60 |
+
rightSidebar: document.getElementById('rightSidebar'),
|
| 61 |
+
leftToggle: document.getElementById('leftToggle'),
|
| 62 |
+
rightToggle: document.getElementById('rightToggle'),
|
| 63 |
+
|
| 64 |
+
// App
|
| 65 |
+
appContainer: document.getElementById('appContainer'),
|
| 66 |
+
userName: document.getElementById('userName'),
|
| 67 |
+
userAvatar: document.getElementById('userAvatar'),
|
| 68 |
+
userRole: document.getElementById('userRole'),
|
| 69 |
+
logoutBtn: document.getElementById('logoutBtn'),
|
| 70 |
+
|
| 71 |
+
// Admin
|
| 72 |
+
adminSection: document.getElementById('adminSection'),
|
| 73 |
+
employeesList: document.getElementById('employeesList'),
|
| 74 |
+
|
| 75 |
+
// Buckets
|
| 76 |
+
bucketsList: document.getElementById('bucketsList'),
|
| 77 |
+
|
| 78 |
+
// Custom Dropdowns
|
| 79 |
+
uploadBucketWrapper: document.getElementById('uploadBucketWrapper'),
|
| 80 |
+
uploadBucketTrigger: document.getElementById('uploadBucketTrigger'),
|
| 81 |
+
uploadBucketOptions: document.getElementById('uploadBucketOptions'),
|
| 82 |
+
uploadBucketSelect: document.getElementById('uploadBucketSelect'),
|
| 83 |
+
chatBucketWrapper: document.getElementById('chatBucketWrapper'),
|
| 84 |
+
chatBucketTrigger: document.getElementById('chatBucketTrigger'),
|
| 85 |
+
chatBucketOptions: document.getElementById('chatBucketOptions'),
|
| 86 |
+
chatBucketSelect: document.getElementById('chatBucketSelect'),
|
| 87 |
+
|
| 88 |
+
// Upload
|
| 89 |
+
uploadZone: document.getElementById('uploadZone'),
|
| 90 |
+
fileInput: document.getElementById('fileInput'),
|
| 91 |
+
uploadProgress: document.getElementById('uploadProgress'),
|
| 92 |
+
uploadStatus: document.getElementById('uploadStatus'),
|
| 93 |
+
progressFill: document.getElementById('progressFill'),
|
| 94 |
+
cancelUploadBtn: document.getElementById('cancelUploadBtn'),
|
| 95 |
+
|
| 96 |
+
// Documents
|
| 97 |
+
documentsList: document.getElementById('documentsList'),
|
| 98 |
+
docCount: document.getElementById('docCount'),
|
| 99 |
+
|
| 100 |
+
// Chat
|
| 101 |
+
chatMessages: document.getElementById('chatMessages'),
|
| 102 |
+
welcomeScreen: document.getElementById('welcomeScreen'),
|
| 103 |
+
chatInput: document.getElementById('chatInput'),
|
| 104 |
+
sendBtn: document.getElementById('sendBtn'),
|
| 105 |
+
stopBtn: document.getElementById('stopBtn'),
|
| 106 |
+
typingIndicator: document.getElementById('typingIndicator'),
|
| 107 |
+
toastContainer: document.getElementById('toastContainer'),
|
| 108 |
+
|
| 109 |
+
// Summary Panel
|
| 110 |
+
summaryPanel: document.getElementById('summaryPanel'),
|
| 111 |
+
summaryTitle: document.getElementById('summaryTitle'),
|
| 112 |
+
summaryText: document.getElementById('summaryText'),
|
| 113 |
+
summaryClose: document.getElementById('summaryClose'),
|
| 114 |
+
|
| 115 |
+
// Chat History
|
| 116 |
+
newChatBtn: document.getElementById('newChatBtn'),
|
| 117 |
+
clearChatBtn: document.getElementById('clearChatBtn'),
|
| 118 |
+
clearChatBtnTop: document.getElementById('clearChatBtnTop'),
|
| 119 |
+
chatHistoryList: document.getElementById('chatHistoryList'),
|
| 120 |
+
chatHistoryCount: document.getElementById('chatHistoryCount'),
|
| 121 |
+
|
| 122 |
+
// Mobile Navigation
|
| 123 |
+
mobileNav: document.getElementById('mobileNav'),
|
| 124 |
+
mobileBackdrop: document.getElementById('mobileBackdrop'),
|
| 125 |
+
mobileLeftToggle: document.getElementById('mobileLeftToggle'),
|
| 126 |
+
mobileChatToggle: document.getElementById('mobileChatToggle'),
|
| 127 |
+
mobileRightToggle: document.getElementById('mobileRightToggle')
|
| 128 |
+
};
|
| 129 |
+
|
| 130 |
+
// ==================== Toast ====================
|
| 131 |
+
function showToast(message, type = 'info') {
|
| 132 |
+
const icons = { success: '✅', error: '❌', info: 'ℹ️' };
|
| 133 |
+
const toast = document.createElement('div');
|
| 134 |
+
toast.className = `toast ${type}`;
|
| 135 |
+
toast.innerHTML = `<span class="toast-icon">${icons[type]}</span><span class="toast-message">${message}</span><button class="toast-close">✕</button>`;
|
| 136 |
+
elements.toastContainer.appendChild(toast);
|
| 137 |
+
toast.querySelector('.toast-close').addEventListener('click', () => toast.remove());
|
| 138 |
+
setTimeout(() => { if (toast.parentElement) toast.remove(); }, 4000);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
// ==================== Sidebar Toggle ====================
|
| 142 |
+
function initSidebars() {
|
| 143 |
+
elements.leftToggle.addEventListener('click', () => {
|
| 144 |
+
elements.leftSidebar.classList.toggle('collapsed');
|
| 145 |
+
const icon = elements.leftToggle.querySelector('.toggle-icon');
|
| 146 |
+
icon.textContent = elements.leftSidebar.classList.contains('collapsed') ? '▶' : '◀';
|
| 147 |
+
});
|
| 148 |
+
|
| 149 |
+
elements.rightToggle.addEventListener('click', () => {
|
| 150 |
+
elements.rightSidebar.classList.toggle('collapsed');
|
| 151 |
+
const icon = elements.rightToggle.querySelector('.toggle-icon');
|
| 152 |
+
icon.textContent = elements.rightSidebar.classList.contains('collapsed') ? '◀' : '▶';
|
| 153 |
+
});
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
// ==================== Mobile Navigation ====================
|
| 157 |
+
function initMobileNavigation() {
|
| 158 |
+
// Check if we're on mobile
|
| 159 |
+
const isMobile = () => window.innerWidth <= 768;
|
| 160 |
+
|
| 161 |
+
// Close all sidebars on mobile
|
| 162 |
+
function closeMobileSidebars() {
|
| 163 |
+
elements.leftSidebar.classList.remove('mobile-open');
|
| 164 |
+
elements.rightSidebar.classList.remove('mobile-open');
|
| 165 |
+
elements.mobileBackdrop.classList.remove('active');
|
| 166 |
+
document.body.style.overflow = '';
|
| 167 |
+
|
| 168 |
+
// Reset nav button active states
|
| 169 |
+
elements.mobileLeftToggle.classList.remove('active');
|
| 170 |
+
elements.mobileRightToggle.classList.remove('active');
|
| 171 |
+
elements.mobileChatToggle.classList.add('active');
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
// Open left sidebar (Menu)
|
| 175 |
+
function openLeftSidebar() {
|
| 176 |
+
closeMobileSidebars();
|
| 177 |
+
elements.leftSidebar.classList.add('mobile-open');
|
| 178 |
+
elements.mobileBackdrop.classList.add('active');
|
| 179 |
+
document.body.style.overflow = 'hidden';
|
| 180 |
+
|
| 181 |
+
elements.mobileLeftToggle.classList.add('active');
|
| 182 |
+
elements.mobileChatToggle.classList.remove('active');
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// Open right sidebar (Docs)
|
| 186 |
+
function openRightSidebar() {
|
| 187 |
+
closeMobileSidebars();
|
| 188 |
+
elements.rightSidebar.classList.add('mobile-open');
|
| 189 |
+
elements.mobileBackdrop.classList.add('active');
|
| 190 |
+
document.body.style.overflow = 'hidden';
|
| 191 |
+
|
| 192 |
+
elements.mobileRightToggle.classList.add('active');
|
| 193 |
+
elements.mobileChatToggle.classList.remove('active');
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
// Mobile nav button handlers
|
| 197 |
+
elements.mobileLeftToggle.addEventListener('click', () => {
|
| 198 |
+
if (elements.leftSidebar.classList.contains('mobile-open')) {
|
| 199 |
+
closeMobileSidebars();
|
| 200 |
+
} else {
|
| 201 |
+
openLeftSidebar();
|
| 202 |
+
}
|
| 203 |
+
});
|
| 204 |
+
|
| 205 |
+
elements.mobileChatToggle.addEventListener('click', () => {
|
| 206 |
+
closeMobileSidebars();
|
| 207 |
+
});
|
| 208 |
+
|
| 209 |
+
elements.mobileRightToggle.addEventListener('click', () => {
|
| 210 |
+
if (elements.rightSidebar.classList.contains('mobile-open')) {
|
| 211 |
+
closeMobileSidebars();
|
| 212 |
+
} else {
|
| 213 |
+
openRightSidebar();
|
| 214 |
+
}
|
| 215 |
+
});
|
| 216 |
+
|
| 217 |
+
// Close sidebar when backdrop is clicked
|
| 218 |
+
elements.mobileBackdrop.addEventListener('click', closeMobileSidebars);
|
| 219 |
+
|
| 220 |
+
// Close sidebar on window resize to desktop
|
| 221 |
+
window.addEventListener('resize', () => {
|
| 222 |
+
if (!isMobile()) {
|
| 223 |
+
closeMobileSidebars();
|
| 224 |
+
// Reset any mobile-specific classes
|
| 225 |
+
elements.leftSidebar.classList.remove('mobile-open');
|
| 226 |
+
elements.rightSidebar.classList.remove('mobile-open');
|
| 227 |
+
}
|
| 228 |
+
});
|
| 229 |
+
|
| 230 |
+
// Close sidebar when starting a new chat or after uploading (for better UX)
|
| 231 |
+
const originalStartNewChat = window.startNewChat;
|
| 232 |
+
if (typeof originalStartNewChat === 'function') {
|
| 233 |
+
window.startNewChat = function () {
|
| 234 |
+
if (isMobile()) closeMobileSidebars();
|
| 235 |
+
return originalStartNewChat.apply(this, arguments);
|
| 236 |
+
};
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
// Handle swipe gestures (optional enhancement)
|
| 240 |
+
let touchStartX = 0;
|
| 241 |
+
let touchEndX = 0;
|
| 242 |
+
|
| 243 |
+
document.addEventListener('touchstart', (e) => {
|
| 244 |
+
touchStartX = e.changedTouches[0].screenX;
|
| 245 |
+
}, { passive: true });
|
| 246 |
+
|
| 247 |
+
document.addEventListener('touchend', (e) => {
|
| 248 |
+
if (!isMobile()) return;
|
| 249 |
+
|
| 250 |
+
touchEndX = e.changedTouches[0].screenX;
|
| 251 |
+
const swipeDistance = touchEndX - touchStartX;
|
| 252 |
+
const minSwipeDistance = 80;
|
| 253 |
+
|
| 254 |
+
// Swipe right from left edge - open left sidebar
|
| 255 |
+
if (touchStartX < 30 && swipeDistance > minSwipeDistance) {
|
| 256 |
+
openLeftSidebar();
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
// Swipe left from right edge - open right sidebar
|
| 260 |
+
if (touchStartX > window.innerWidth - 30 && swipeDistance < -minSwipeDistance) {
|
| 261 |
+
openRightSidebar();
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
// Swipe to close sidebars
|
| 265 |
+
if (elements.leftSidebar.classList.contains('mobile-open') && swipeDistance < -minSwipeDistance) {
|
| 266 |
+
closeMobileSidebars();
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
if (elements.rightSidebar.classList.contains('mobile-open') && swipeDistance > minSwipeDistance) {
|
| 270 |
+
closeMobileSidebars();
|
| 271 |
+
}
|
| 272 |
+
}, { passive: true });
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
// ==================== Collapsible Sections ====================
|
| 276 |
+
function initCollapsible() {
|
| 277 |
+
document.querySelectorAll('.collapsible .section-header').forEach(header => {
|
| 278 |
+
header.addEventListener('click', (e) => {
|
| 279 |
+
// Don't toggle if clicking on action buttons
|
| 280 |
+
if (e.target.closest('.btn')) return;
|
| 281 |
+
|
| 282 |
+
const section = header.closest('.collapsible');
|
| 283 |
+
section.classList.toggle('collapsed');
|
| 284 |
+
});
|
| 285 |
+
});
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
// ==================== Custom Dropdowns ====================
|
| 289 |
+
function initCustomDropdowns() {
|
| 290 |
+
// Close dropdowns when clicking outside
|
| 291 |
+
document.addEventListener('click', (e) => {
|
| 292 |
+
document.querySelectorAll('.custom-select.open').forEach(select => {
|
| 293 |
+
if (!select.contains(e.target)) {
|
| 294 |
+
select.classList.remove('open');
|
| 295 |
+
}
|
| 296 |
+
});
|
| 297 |
+
});
|
| 298 |
+
|
| 299 |
+
// Upload bucket dropdown
|
| 300 |
+
elements.uploadBucketTrigger.addEventListener('click', (e) => {
|
| 301 |
+
e.stopPropagation();
|
| 302 |
+
elements.uploadBucketWrapper.classList.toggle('open');
|
| 303 |
+
elements.chatBucketWrapper.classList.remove('open');
|
| 304 |
+
});
|
| 305 |
+
|
| 306 |
+
// Chat bucket dropdown
|
| 307 |
+
elements.chatBucketTrigger.addEventListener('click', (e) => {
|
| 308 |
+
e.stopPropagation();
|
| 309 |
+
elements.chatBucketWrapper.classList.toggle('open');
|
| 310 |
+
elements.uploadBucketWrapper.classList.remove('open');
|
| 311 |
+
});
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
function updateDropdownOptions() {
|
| 315 |
+
// Upload dropdown options
|
| 316 |
+
let uploadOptions = `<div class="select-option active" data-value=""><span class="option-icon">📂</span> No Bucket (General)</div>`;
|
| 317 |
+
uploadOptions += state.buckets.map(b =>
|
| 318 |
+
`<div class="select-option" data-value="${b.bucket_id}"><span class="option-icon">📁</span> ${b.name}</div>`
|
| 319 |
+
).join('');
|
| 320 |
+
elements.uploadBucketOptions.innerHTML = uploadOptions;
|
| 321 |
+
|
| 322 |
+
// Chat dropdown options
|
| 323 |
+
let chatOptions = `<div class="select-option active" data-value=""><span class="option-icon">📂</span> All Documents</div>`;
|
| 324 |
+
chatOptions += state.buckets.map(b =>
|
| 325 |
+
`<div class="select-option" data-value="${b.bucket_id}"><span class="option-icon">📁</span> ${b.name}</div>`
|
| 326 |
+
).join('');
|
| 327 |
+
elements.chatBucketOptions.innerHTML = chatOptions;
|
| 328 |
+
|
| 329 |
+
// Add click handlers
|
| 330 |
+
elements.uploadBucketOptions.querySelectorAll('.select-option').forEach(opt => {
|
| 331 |
+
opt.addEventListener('click', () => {
|
| 332 |
+
const value = opt.dataset.value;
|
| 333 |
+
elements.uploadBucketSelect.value = value;
|
| 334 |
+
elements.uploadBucketTrigger.querySelector('.select-value').textContent = opt.textContent.trim();
|
| 335 |
+
elements.uploadBucketOptions.querySelectorAll('.select-option').forEach(o => o.classList.remove('active'));
|
| 336 |
+
opt.classList.add('active');
|
| 337 |
+
elements.uploadBucketWrapper.classList.remove('open');
|
| 338 |
+
});
|
| 339 |
+
});
|
| 340 |
+
|
| 341 |
+
elements.chatBucketOptions.querySelectorAll('.select-option').forEach(opt => {
|
| 342 |
+
opt.addEventListener('click', () => {
|
| 343 |
+
const value = opt.dataset.value;
|
| 344 |
+
elements.chatBucketSelect.value = value;
|
| 345 |
+
state.chatBucket = value;
|
| 346 |
+
elements.chatBucketTrigger.querySelector('.select-value').textContent = opt.textContent.trim();
|
| 347 |
+
elements.chatBucketOptions.querySelectorAll('.select-option').forEach(o => o.classList.remove('active'));
|
| 348 |
+
opt.classList.add('active');
|
| 349 |
+
elements.chatBucketWrapper.classList.remove('open');
|
| 350 |
+
});
|
| 351 |
+
});
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
// ==================== Auth ====================
|
| 355 |
+
function showAuthModal() {
|
| 356 |
+
elements.authModal.classList.add('active');
|
| 357 |
+
elements.appContainer.style.filter = 'blur(5px)';
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
function hideAuthModal() {
|
| 361 |
+
elements.authModal.classList.remove('active');
|
| 362 |
+
elements.appContainer.style.filter = '';
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
function updateAuthUI() {
|
| 366 |
+
if (state.user) {
|
| 367 |
+
elements.userName.textContent = state.user.username;
|
| 368 |
+
elements.userAvatar.textContent = state.user.username.charAt(0).toUpperCase();
|
| 369 |
+
elements.userRole.textContent = state.user.role === 'admin' ? 'Admin' : 'Employee';
|
| 370 |
+
|
| 371 |
+
if (state.user.role === 'admin') {
|
| 372 |
+
elements.adminSection.classList.remove('hidden');
|
| 373 |
+
loadEmployees();
|
| 374 |
+
} else {
|
| 375 |
+
elements.adminSection.classList.add('hidden');
|
| 376 |
+
}
|
| 377 |
+
hideAuthModal();
|
| 378 |
+
} else {
|
| 379 |
+
showAuthModal();
|
| 380 |
+
}
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
// Role tabs
|
| 384 |
+
document.querySelectorAll('.role-tab').forEach(tab => {
|
| 385 |
+
tab.addEventListener('click', () => {
|
| 386 |
+
document.querySelectorAll('.role-tab').forEach(t => t.classList.remove('active'));
|
| 387 |
+
tab.classList.add('active');
|
| 388 |
+
state.currentRole = tab.dataset.role;
|
| 389 |
+
|
| 390 |
+
if (state.currentRole === 'admin') {
|
| 391 |
+
elements.authTabs.classList.remove('hidden');
|
| 392 |
+
elements.loginForm.classList.remove('hidden');
|
| 393 |
+
elements.registerForm.classList.add('hidden');
|
| 394 |
+
elements.employeeLoginForm.classList.add('hidden');
|
| 395 |
+
} else {
|
| 396 |
+
elements.authTabs.classList.add('hidden');
|
| 397 |
+
elements.loginForm.classList.add('hidden');
|
| 398 |
+
elements.registerForm.classList.add('hidden');
|
| 399 |
+
elements.employeeLoginForm.classList.remove('hidden');
|
| 400 |
+
}
|
| 401 |
+
});
|
| 402 |
+
});
|
| 403 |
+
|
| 404 |
+
// Auth tabs
|
| 405 |
+
document.querySelectorAll('.auth-tab').forEach(tab => {
|
| 406 |
+
tab.addEventListener('click', () => {
|
| 407 |
+
document.querySelectorAll('.auth-tab').forEach(t => t.classList.remove('active'));
|
| 408 |
+
tab.classList.add('active');
|
| 409 |
+
const tabName = tab.dataset.tab;
|
| 410 |
+
elements.loginForm.classList.toggle('hidden', tabName !== 'login');
|
| 411 |
+
elements.registerForm.classList.toggle('hidden', tabName !== 'register');
|
| 412 |
+
});
|
| 413 |
+
});
|
| 414 |
+
|
| 415 |
+
// Admin Login
|
| 416 |
+
elements.loginForm.addEventListener('submit', async (e) => {
|
| 417 |
+
e.preventDefault();
|
| 418 |
+
const formData = new FormData(e.target);
|
| 419 |
+
const btn = e.target.querySelector('.auth-btn');
|
| 420 |
+
btn.querySelector('.btn-text').classList.add('hidden');
|
| 421 |
+
btn.querySelector('.btn-loader').classList.remove('hidden');
|
| 422 |
+
elements.loginError.classList.add('hidden');
|
| 423 |
+
|
| 424 |
+
try {
|
| 425 |
+
const response = await fetch('/api/auth/login', {
|
| 426 |
+
method: 'POST',
|
| 427 |
+
headers: { 'Content-Type': 'application/json' },
|
| 428 |
+
body: JSON.stringify({ username: formData.get('username'), password: formData.get('password'), role: 'admin' })
|
| 429 |
+
});
|
| 430 |
+
const data = await response.json();
|
| 431 |
+
if (response.ok) {
|
| 432 |
+
state.token = data.token;
|
| 433 |
+
state.user = { user_id: data.user_id, username: data.username, role: data.role };
|
| 434 |
+
localStorage.setItem('Iribl AI_token', state.token);
|
| 435 |
+
localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
|
| 436 |
+
updateAuthUI();
|
| 437 |
+
loadBuckets();
|
| 438 |
+
loadDocuments();
|
| 439 |
+
loadChatHistoryFromServer();
|
| 440 |
+
showToast('Welcome back!', 'success');
|
| 441 |
+
} else {
|
| 442 |
+
elements.loginError.textContent = data.error;
|
| 443 |
+
elements.loginError.classList.remove('hidden');
|
| 444 |
+
}
|
| 445 |
+
} catch (error) {
|
| 446 |
+
elements.loginError.textContent = 'Connection error';
|
| 447 |
+
elements.loginError.classList.remove('hidden');
|
| 448 |
+
}
|
| 449 |
+
btn.querySelector('.btn-text').classList.remove('hidden');
|
| 450 |
+
btn.querySelector('.btn-loader').classList.add('hidden');
|
| 451 |
+
});
|
| 452 |
+
|
| 453 |
+
// Admin Register
|
| 454 |
+
elements.registerForm.addEventListener('submit', async (e) => {
|
| 455 |
+
e.preventDefault();
|
| 456 |
+
const formData = new FormData(e.target);
|
| 457 |
+
const btn = e.target.querySelector('.auth-btn');
|
| 458 |
+
btn.querySelector('.btn-text').classList.add('hidden');
|
| 459 |
+
btn.querySelector('.btn-loader').classList.remove('hidden');
|
| 460 |
+
elements.registerError.classList.add('hidden');
|
| 461 |
+
|
| 462 |
+
try {
|
| 463 |
+
const response = await fetch('/api/auth/register/admin', {
|
| 464 |
+
method: 'POST',
|
| 465 |
+
headers: { 'Content-Type': 'application/json' },
|
| 466 |
+
body: JSON.stringify({ username: formData.get('username'), email: formData.get('email'), password: formData.get('password') })
|
| 467 |
+
});
|
| 468 |
+
const data = await response.json();
|
| 469 |
+
if (response.ok) {
|
| 470 |
+
state.token = data.token;
|
| 471 |
+
state.user = { user_id: data.user_id, username: data.username, role: data.role };
|
| 472 |
+
localStorage.setItem('Iribl AI_token', state.token);
|
| 473 |
+
localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
|
| 474 |
+
updateAuthUI();
|
| 475 |
+
loadBuckets();
|
| 476 |
+
loadDocuments();
|
| 477 |
+
loadChatHistoryFromServer();
|
| 478 |
+
showToast('Account created!', 'success');
|
| 479 |
+
} else {
|
| 480 |
+
elements.registerError.textContent = data.error;
|
| 481 |
+
elements.registerError.classList.remove('hidden');
|
| 482 |
+
}
|
| 483 |
+
} catch (error) {
|
| 484 |
+
elements.registerError.textContent = 'Connection error';
|
| 485 |
+
elements.registerError.classList.remove('hidden');
|
| 486 |
+
}
|
| 487 |
+
btn.querySelector('.btn-text').classList.remove('hidden');
|
| 488 |
+
btn.querySelector('.btn-loader').classList.add('hidden');
|
| 489 |
+
});
|
| 490 |
+
|
| 491 |
+
// Employee Login
|
| 492 |
+
elements.employeeLoginForm.addEventListener('submit', async (e) => {
|
| 493 |
+
e.preventDefault();
|
| 494 |
+
const formData = new FormData(e.target);
|
| 495 |
+
const btn = e.target.querySelector('.auth-btn');
|
| 496 |
+
btn.querySelector('.btn-text').classList.add('hidden');
|
| 497 |
+
btn.querySelector('.btn-loader').classList.remove('hidden');
|
| 498 |
+
elements.employeeLoginError.classList.add('hidden');
|
| 499 |
+
|
| 500 |
+
try {
|
| 501 |
+
const response = await fetch('/api/auth/login', {
|
| 502 |
+
method: 'POST',
|
| 503 |
+
headers: { 'Content-Type': 'application/json' },
|
| 504 |
+
body: JSON.stringify({ username: formData.get('email'), password: formData.get('password'), role: 'employee' })
|
| 505 |
+
});
|
| 506 |
+
const data = await response.json();
|
| 507 |
+
if (response.ok) {
|
| 508 |
+
state.token = data.token;
|
| 509 |
+
state.user = { user_id: data.user_id, username: data.username, role: data.role };
|
| 510 |
+
localStorage.setItem('Iribl AI_token', state.token);
|
| 511 |
+
localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
|
| 512 |
+
updateAuthUI();
|
| 513 |
+
loadBuckets();
|
| 514 |
+
loadDocuments();
|
| 515 |
+
loadChatHistoryFromServer();
|
| 516 |
+
showToast('Welcome!', 'success');
|
| 517 |
+
} else {
|
| 518 |
+
elements.employeeLoginError.textContent = data.error;
|
| 519 |
+
elements.employeeLoginError.classList.remove('hidden');
|
| 520 |
+
}
|
| 521 |
+
} catch (error) {
|
| 522 |
+
elements.employeeLoginError.textContent = 'Connection error';
|
| 523 |
+
elements.employeeLoginError.classList.remove('hidden');
|
| 524 |
+
}
|
| 525 |
+
btn.querySelector('.btn-text').classList.remove('hidden');
|
| 526 |
+
btn.querySelector('.btn-loader').classList.add('hidden');
|
| 527 |
+
});
|
| 528 |
+
|
| 529 |
+
// Logout
|
| 530 |
+
elements.logoutBtn.addEventListener('click', () => {
|
| 531 |
+
state.token = null;
|
| 532 |
+
state.user = null;
|
| 533 |
+
state.documents = [];
|
| 534 |
+
state.buckets = [];
|
| 535 |
+
state.messages = [];
|
| 536 |
+
localStorage.removeItem('Iribl AI_token');
|
| 537 |
+
localStorage.removeItem('Iribl AI_user');
|
| 538 |
+
updateAuthUI();
|
| 539 |
+
renderDocuments();
|
| 540 |
+
renderMessages();
|
| 541 |
+
showToast('Logged out', 'info');
|
| 542 |
+
});
|
| 543 |
+
|
| 544 |
+
// ==================== Employees ====================
|
| 545 |
+
async function loadEmployees() {
|
| 546 |
+
if (!state.token || state.user?.role !== 'admin') return;
|
| 547 |
+
try {
|
| 548 |
+
const response = await fetch('/api/admin/employees', { headers: { 'Authorization': `Bearer ${state.token}` } });
|
| 549 |
+
if (response.ok) {
|
| 550 |
+
const data = await response.json();
|
| 551 |
+
state.employees = data.employees;
|
| 552 |
+
renderEmployees();
|
| 553 |
+
}
|
| 554 |
+
} catch (error) { console.error('Failed to load employees:', error); }
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
function renderEmployees() {
|
| 558 |
+
if (state.employees.length === 0) {
|
| 559 |
+
elements.employeesList.innerHTML = `<div class="empty-state small"><div class="empty-text">No employees</div></div>`;
|
| 560 |
+
return;
|
| 561 |
+
}
|
| 562 |
+
elements.employeesList.innerHTML = state.employees.map(emp => `
|
| 563 |
+
<div class="employee-item">
|
| 564 |
+
<span class="employee-email">${emp.email || emp.username}</span>
|
| 565 |
+
<button class="btn btn-ghost" onclick="deleteEmployee('${emp.user_id}')" title="Remove">🗑️</button>
|
| 566 |
+
</div>
|
| 567 |
+
`).join('');
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
elements.addEmployeeBtn.addEventListener('click', (e) => {
|
| 571 |
+
e.stopPropagation();
|
| 572 |
+
elements.addEmployeeModal.classList.add('active');
|
| 573 |
+
elements.addEmployeeError.classList.add('hidden');
|
| 574 |
+
elements.addEmployeeForm.reset();
|
| 575 |
+
});
|
| 576 |
+
|
| 577 |
+
elements.cancelAddEmployee.addEventListener('click', () => elements.addEmployeeModal.classList.remove('active'));
|
| 578 |
+
|
| 579 |
+
elements.addEmployeeForm.addEventListener('submit', async (e) => {
|
| 580 |
+
e.preventDefault();
|
| 581 |
+
const formData = new FormData(e.target);
|
| 582 |
+
const btn = e.target.querySelector('.btn-primary');
|
| 583 |
+
btn.querySelector('.btn-text').classList.add('hidden');
|
| 584 |
+
btn.querySelector('.btn-loader').classList.remove('hidden');
|
| 585 |
+
|
| 586 |
+
try {
|
| 587 |
+
const response = await fetch('/api/admin/employees', {
|
| 588 |
+
method: 'POST',
|
| 589 |
+
headers: { 'Authorization': `Bearer ${state.token}`, 'Content-Type': 'application/json' },
|
| 590 |
+
body: JSON.stringify({ email: formData.get('email'), password: formData.get('password') })
|
| 591 |
+
});
|
| 592 |
+
const data = await response.json();
|
| 593 |
+
if (response.ok) {
|
| 594 |
+
elements.addEmployeeModal.classList.remove('active');
|
| 595 |
+
loadEmployees();
|
| 596 |
+
showToast('Employee added!', 'success');
|
| 597 |
+
} else {
|
| 598 |
+
elements.addEmployeeError.textContent = data.error;
|
| 599 |
+
elements.addEmployeeError.classList.remove('hidden');
|
| 600 |
+
}
|
| 601 |
+
} catch (error) {
|
| 602 |
+
elements.addEmployeeError.textContent = 'Connection error';
|
| 603 |
+
elements.addEmployeeError.classList.remove('hidden');
|
| 604 |
+
}
|
| 605 |
+
btn.querySelector('.btn-text').classList.remove('hidden');
|
| 606 |
+
btn.querySelector('.btn-loader').classList.add('hidden');
|
| 607 |
+
});
|
| 608 |
+
|
| 609 |
+
async function deleteEmployee(employeeId) {
|
| 610 |
+
try {
|
| 611 |
+
const response = await fetch(`/api/admin/employees/${employeeId}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${state.token}` } });
|
| 612 |
+
if (response.ok) {
|
| 613 |
+
state.employees = state.employees.filter(e => e.user_id !== employeeId);
|
| 614 |
+
renderEmployees();
|
| 615 |
+
showToast('Employee removed', 'success');
|
| 616 |
+
}
|
| 617 |
+
} catch (error) { showToast('Failed to remove employee', 'error'); }
|
| 618 |
+
}
|
| 619 |
+
|
| 620 |
+
// ==================== Buckets ====================
|
| 621 |
+
async function loadBuckets() {
|
| 622 |
+
if (!state.token) return;
|
| 623 |
+
try {
|
| 624 |
+
const response = await fetch('/api/buckets', { headers: { 'Authorization': `Bearer ${state.token}` } });
|
| 625 |
+
if (response.ok) {
|
| 626 |
+
const data = await response.json();
|
| 627 |
+
state.buckets = data.buckets;
|
| 628 |
+
renderBuckets();
|
| 629 |
+
updateDropdownOptions();
|
| 630 |
+
}
|
| 631 |
+
} catch (error) { console.error('Failed to load buckets:', error); }
|
| 632 |
+
}
|
| 633 |
+
|
| 634 |
+
function renderBuckets() {
|
| 635 |
+
let html = `<div class="bucket-item ${state.selectedBucket === '' ? 'active' : ''}" onclick="selectBucket('')">
|
| 636 |
+
<span class="bucket-name">📂 All Documents</span>
|
| 637 |
+
</div>`;
|
| 638 |
+
|
| 639 |
+
html += state.buckets.map(b => `
|
| 640 |
+
<div class="bucket-item ${state.selectedBucket === b.bucket_id ? 'active' : ''}" data-id="${b.bucket_id}">
|
| 641 |
+
<span class="bucket-name" onclick="selectBucket('${b.bucket_id}')">📁 ${b.name}</span>
|
| 642 |
+
<span class="bucket-count">${b.doc_count}</span>
|
| 643 |
+
<button class="btn btn-ghost bucket-delete" onclick="event.stopPropagation(); deleteBucket('${b.bucket_id}')">🗑️</button>
|
| 644 |
+
</div>
|
| 645 |
+
`).join('');
|
| 646 |
+
|
| 647 |
+
elements.bucketsList.innerHTML = html;
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
function selectBucket(bucketId) {
|
| 651 |
+
state.selectedBucket = bucketId;
|
| 652 |
+
state.chatBucket = bucketId; // Sync chat bucket filter
|
| 653 |
+
|
| 654 |
+
// Get bucket name for display
|
| 655 |
+
const bucketName = bucketId ?
|
| 656 |
+
(state.buckets.find(b => b.bucket_id === bucketId)?.name || 'Selected Bucket') :
|
| 657 |
+
'';
|
| 658 |
+
const displayName = bucketId ? bucketName : 'All Documents';
|
| 659 |
+
const uploadDisplayName = bucketId ? bucketName : 'No Bucket (General)';
|
| 660 |
+
|
| 661 |
+
// Sync upload bucket dropdown
|
| 662 |
+
elements.uploadBucketSelect.value = bucketId;
|
| 663 |
+
elements.uploadBucketTrigger.querySelector('.select-value').textContent = uploadDisplayName;
|
| 664 |
+
elements.uploadBucketOptions.querySelectorAll('.select-option').forEach(opt => {
|
| 665 |
+
opt.classList.toggle('active', opt.dataset.value === bucketId);
|
| 666 |
+
});
|
| 667 |
+
|
| 668 |
+
// Sync chat bucket dropdown
|
| 669 |
+
elements.chatBucketSelect.value = bucketId;
|
| 670 |
+
elements.chatBucketTrigger.querySelector('.select-value').textContent = displayName;
|
| 671 |
+
elements.chatBucketOptions.querySelectorAll('.select-option').forEach(opt => {
|
| 672 |
+
opt.classList.toggle('active', opt.dataset.value === bucketId);
|
| 673 |
+
});
|
| 674 |
+
|
| 675 |
+
// Render all filtered components
|
| 676 |
+
renderBuckets();
|
| 677 |
+
loadDocuments();
|
| 678 |
+
renderChatHistory(); // Re-render to filter by bucket
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
elements.createBucketBtn.addEventListener('click', (e) => {
|
| 682 |
+
e.stopPropagation();
|
| 683 |
+
elements.createBucketModal.classList.add('active');
|
| 684 |
+
elements.createBucketError.classList.add('hidden');
|
| 685 |
+
elements.createBucketForm.reset();
|
| 686 |
+
});
|
| 687 |
+
|
| 688 |
+
elements.cancelCreateBucket.addEventListener('click', () => elements.createBucketModal.classList.remove('active'));
|
| 689 |
+
|
| 690 |
+
elements.createBucketForm.addEventListener('submit', async (e) => {
|
| 691 |
+
e.preventDefault();
|
| 692 |
+
const formData = new FormData(e.target);
|
| 693 |
+
const btn = e.target.querySelector('.btn-primary');
|
| 694 |
+
btn.querySelector('.btn-text').classList.add('hidden');
|
| 695 |
+
btn.querySelector('.btn-loader').classList.remove('hidden');
|
| 696 |
+
|
| 697 |
+
try {
|
| 698 |
+
const response = await fetch('/api/buckets', {
|
| 699 |
+
method: 'POST',
|
| 700 |
+
headers: { 'Authorization': `Bearer ${state.token}`, 'Content-Type': 'application/json' },
|
| 701 |
+
body: JSON.stringify({ name: formData.get('name'), description: formData.get('description') })
|
| 702 |
+
});
|
| 703 |
+
const data = await response.json();
|
| 704 |
+
if (response.ok) {
|
| 705 |
+
elements.createBucketModal.classList.remove('active');
|
| 706 |
+
loadBuckets();
|
| 707 |
+
showToast('Bucket created!', 'success');
|
| 708 |
+
} else {
|
| 709 |
+
elements.createBucketError.textContent = data.error;
|
| 710 |
+
elements.createBucketError.classList.remove('hidden');
|
| 711 |
+
}
|
| 712 |
+
} catch (error) {
|
| 713 |
+
elements.createBucketError.textContent = 'Connection error';
|
| 714 |
+
elements.createBucketError.classList.remove('hidden');
|
| 715 |
+
}
|
| 716 |
+
btn.querySelector('.btn-text').classList.remove('hidden');
|
| 717 |
+
btn.querySelector('.btn-loader').classList.add('hidden');
|
| 718 |
+
});
|
| 719 |
+
|
| 720 |
+
async function deleteBucket(bucketId) {
|
| 721 |
+
try {
|
| 722 |
+
const response = await fetch(`/api/buckets/${bucketId}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${state.token}` } });
|
| 723 |
+
if (response.ok) {
|
| 724 |
+
if (state.selectedBucket === bucketId) state.selectedBucket = '';
|
| 725 |
+
loadBuckets();
|
| 726 |
+
loadDocuments();
|
| 727 |
+
showToast('Bucket deleted', 'success');
|
| 728 |
+
}
|
| 729 |
+
} catch (error) { showToast('Failed to delete bucket', 'error'); }
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
// ==================== Documents ====================
|
| 733 |
+
async function loadDocuments() {
|
| 734 |
+
if (!state.token) return;
|
| 735 |
+
try {
|
| 736 |
+
let url = '/api/documents';
|
| 737 |
+
if (state.selectedBucket) url += `?bucket_id=${state.selectedBucket}`;
|
| 738 |
+
|
| 739 |
+
const response = await fetch(url, { headers: { 'Authorization': `Bearer ${state.token}` } });
|
| 740 |
+
if (response.ok) {
|
| 741 |
+
const data = await response.json();
|
| 742 |
+
state.documents = data.documents;
|
| 743 |
+
renderDocuments();
|
| 744 |
+
}
|
| 745 |
+
} catch (error) { console.error('Failed to load documents:', error); }
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
function renderDocuments() {
|
| 749 |
+
elements.docCount.textContent = `(${state.documents.length})`;
|
| 750 |
+
|
| 751 |
+
if (state.documents.length === 0) {
|
| 752 |
+
elements.documentsList.innerHTML = `<div class="empty-state"><div class="empty-icon">📭</div><div class="empty-text">No documents yet</div></div>`;
|
| 753 |
+
return;
|
| 754 |
+
}
|
| 755 |
+
|
| 756 |
+
const icons = { pdf: '📕', word: '📘', powerpoint: '📙', excel: '📗', image: '🖼️', text: '📄' };
|
| 757 |
+
|
| 758 |
+
elements.documentsList.innerHTML = state.documents.map(doc => `
|
| 759 |
+
<div class="document-item ${state.selectedDocument === doc.doc_id ? 'selected' : ''}" data-id="${doc.doc_id}" onclick="selectDocument('${doc.doc_id}')">
|
| 760 |
+
<div class="doc-icon">${icons[doc.doc_type] || '📄'}</div>
|
| 761 |
+
<div class="doc-info">
|
| 762 |
+
<div class="doc-name">${doc.filename}</div>
|
| 763 |
+
<div class="doc-meta">${formatDate(doc.created_at)}</div>
|
| 764 |
+
</div>
|
| 765 |
+
<button class="btn btn-ghost doc-view" onclick="event.stopPropagation(); viewDocument('${doc.doc_id}', '${doc.filename}')" title="View">👁️</button>
|
| 766 |
+
<button class="btn btn-ghost doc-delete" onclick="event.stopPropagation(); deleteDocument('${doc.doc_id}')" title="Delete">🗑️</button>
|
| 767 |
+
</div>
|
| 768 |
+
`).join('');
|
| 769 |
+
}
|
| 770 |
+
|
| 771 |
+
function formatDate(timestamp) {
|
| 772 |
+
const date = new Date(timestamp * 1000);
|
| 773 |
+
const now = new Date();
|
| 774 |
+
const diff = now - date;
|
| 775 |
+
if (diff < 60000) return 'Just now';
|
| 776 |
+
if (diff < 3600000) return `${Math.floor(diff / 60000)}m ago`;
|
| 777 |
+
if (diff < 86400000) return `${Math.floor(diff / 3600000)}h ago`;
|
| 778 |
+
return date.toLocaleDateString();
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
async function deleteDocument(docId) {
|
| 782 |
+
try {
|
| 783 |
+
const response = await fetch(`/api/documents/${docId}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${state.token}` } });
|
| 784 |
+
if (response.ok) {
|
| 785 |
+
state.documents = state.documents.filter(d => d.doc_id !== docId);
|
| 786 |
+
// Clear selection if deleted doc was selected
|
| 787 |
+
if (state.selectedDocument === docId) {
|
| 788 |
+
state.selectedDocument = null;
|
| 789 |
+
hideSummary();
|
| 790 |
+
}
|
| 791 |
+
// Remove from summaries cache
|
| 792 |
+
delete state.summaries[docId];
|
| 793 |
+
renderDocuments();
|
| 794 |
+
loadBuckets();
|
| 795 |
+
showToast('Document deleted', 'success');
|
| 796 |
+
}
|
| 797 |
+
} catch (error) { showToast('Failed to delete', 'error'); }
|
| 798 |
+
}
|
| 799 |
+
|
| 800 |
+
// ==================== Document Summary ====================
|
| 801 |
+
function selectDocument(docId) {
|
| 802 |
+
state.selectedDocument = docId;
|
| 803 |
+
renderDocuments();
|
| 804 |
+
displaySummary(docId);
|
| 805 |
+
}
|
| 806 |
+
|
| 807 |
+
async function displaySummary(docId) {
|
| 808 |
+
const doc = state.documents.find(d => d.doc_id === docId);
|
| 809 |
+
if (!doc) return;
|
| 810 |
+
|
| 811 |
+
// Check if summary is cached
|
| 812 |
+
if (state.summaries[docId]) {
|
| 813 |
+
showSummaryPanel(doc.filename, state.summaries[docId].summary);
|
| 814 |
+
} else {
|
| 815 |
+
// Show loading state
|
| 816 |
+
showSummaryPanel(doc.filename, 'Generating summary...');
|
| 817 |
+
// Fetch summary from server
|
| 818 |
+
await fetchSummary(docId);
|
| 819 |
+
}
|
| 820 |
+
}
|
| 821 |
+
|
| 822 |
+
async function fetchSummary(docId) {
|
| 823 |
+
try {
|
| 824 |
+
const response = await fetch(`/api/documents/${docId}/summary`, {
|
| 825 |
+
headers: { 'Authorization': `Bearer ${state.token}` }
|
| 826 |
+
});
|
| 827 |
+
const data = await response.json();
|
| 828 |
+
|
| 829 |
+
if (response.ok && data.summary) {
|
| 830 |
+
// Cache the summary
|
| 831 |
+
state.summaries[docId] = {
|
| 832 |
+
summary: data.summary,
|
| 833 |
+
filename: data.filename
|
| 834 |
+
};
|
| 835 |
+
// Update display if still selected
|
| 836 |
+
if (state.selectedDocument === docId) {
|
| 837 |
+
showSummaryPanel(data.filename, data.summary);
|
| 838 |
+
}
|
| 839 |
+
} else {
|
| 840 |
+
// Show error state
|
| 841 |
+
if (state.selectedDocument === docId) {
|
| 842 |
+
showSummaryPanel(data.filename || 'Document', 'Unable to generate summary.');
|
| 843 |
+
}
|
| 844 |
+
}
|
| 845 |
+
} catch (error) {
|
| 846 |
+
console.error('Failed to fetch summary:', error);
|
| 847 |
+
if (state.selectedDocument === docId) {
|
| 848 |
+
const doc = state.documents.find(d => d.doc_id === docId);
|
| 849 |
+
showSummaryPanel(doc?.filename || 'Document', 'Failed to load summary.');
|
| 850 |
+
}
|
| 851 |
+
}
|
| 852 |
+
}
|
| 853 |
+
|
| 854 |
+
function showSummaryPanel(filename, summaryText) {
|
| 855 |
+
elements.summaryPanel.classList.remove('hidden');
|
| 856 |
+
elements.summaryTitle.textContent = filename;
|
| 857 |
+
elements.summaryText.textContent = summaryText;
|
| 858 |
+
}
|
| 859 |
+
|
| 860 |
+
function hideSummary() {
|
| 861 |
+
elements.summaryPanel.classList.add('hidden');
|
| 862 |
+
state.selectedDocument = null;
|
| 863 |
+
renderDocuments();
|
| 864 |
+
}
|
| 865 |
+
|
| 866 |
+
function initSummaryPanel() {
|
| 867 |
+
elements.summaryClose.addEventListener('click', hideSummary);
|
| 868 |
+
}
|
| 869 |
+
|
| 870 |
+
// ==================== Document Viewer ====================
|
| 871 |
+
async function viewDocument(docId, filename) {
|
| 872 |
+
try {
|
| 873 |
+
// Fetch the document with proper authorization
|
| 874 |
+
const response = await fetch(`/api/documents/${docId}/view`, {
|
| 875 |
+
headers: { 'Authorization': `Bearer ${state.token}` }
|
| 876 |
+
});
|
| 877 |
+
|
| 878 |
+
if (!response.ok) {
|
| 879 |
+
showToast('Failed to load document', 'error');
|
| 880 |
+
return;
|
| 881 |
+
}
|
| 882 |
+
|
| 883 |
+
// Get the blob and create a URL
|
| 884 |
+
const blob = await response.blob();
|
| 885 |
+
const blobUrl = URL.createObjectURL(blob);
|
| 886 |
+
|
| 887 |
+
// Open in a new tab
|
| 888 |
+
window.open(blobUrl, '_blank');
|
| 889 |
+
} catch (error) {
|
| 890 |
+
console.error('Failed to view document:', error);
|
| 891 |
+
showToast('Failed to open document', 'error');
|
| 892 |
+
}
|
| 893 |
+
}
|
| 894 |
+
|
| 895 |
+
|
| 896 |
+
elements.closeDocViewer.addEventListener('click', () => elements.docViewerModal.classList.remove('active'));
|
| 897 |
+
|
| 898 |
+
// ==================== Upload ====================
|
| 899 |
+
let currentPollInterval = null; // Track the current polling interval for cancellation
|
| 900 |
+
|
| 901 |
+
function initUpload() {
|
| 902 |
+
elements.uploadZone.addEventListener('click', () => elements.fileInput.click());
|
| 903 |
+
elements.fileInput.addEventListener('change', (e) => {
|
| 904 |
+
if (e.target.files.length > 0) uploadFiles(Array.from(e.target.files));
|
| 905 |
+
});
|
| 906 |
+
elements.uploadZone.addEventListener('dragover', (e) => { e.preventDefault(); elements.uploadZone.classList.add('dragover'); });
|
| 907 |
+
elements.uploadZone.addEventListener('dragleave', () => elements.uploadZone.classList.remove('dragover'));
|
| 908 |
+
elements.uploadZone.addEventListener('drop', (e) => {
|
| 909 |
+
e.preventDefault();
|
| 910 |
+
elements.uploadZone.classList.remove('dragover');
|
| 911 |
+
if (e.dataTransfer.files.length > 0) uploadFiles(Array.from(e.dataTransfer.files));
|
| 912 |
+
});
|
| 913 |
+
|
| 914 |
+
// Cancel upload button
|
| 915 |
+
elements.cancelUploadBtn.addEventListener('click', cancelUpload);
|
| 916 |
+
}
|
| 917 |
+
|
| 918 |
+
function cancelUpload() {
|
| 919 |
+
state.uploadCancelled = true;
|
| 920 |
+
|
| 921 |
+
// Abort any ongoing fetch request
|
| 922 |
+
if (state.currentUploadAbortController) {
|
| 923 |
+
state.currentUploadAbortController.abort();
|
| 924 |
+
state.currentUploadAbortController = null;
|
| 925 |
+
}
|
| 926 |
+
|
| 927 |
+
// Clear any polling interval
|
| 928 |
+
if (currentPollInterval) {
|
| 929 |
+
clearInterval(currentPollInterval);
|
| 930 |
+
currentPollInterval = null;
|
| 931 |
+
}
|
| 932 |
+
|
| 933 |
+
// Reset UI
|
| 934 |
+
elements.uploadProgress.classList.add('hidden');
|
| 935 |
+
elements.uploadZone.style.pointerEvents = '';
|
| 936 |
+
elements.fileInput.value = '';
|
| 937 |
+
elements.progressFill.style.width = '0%';
|
| 938 |
+
|
| 939 |
+
showToast('Upload cancelled', 'info');
|
| 940 |
+
}
|
| 941 |
+
|
| 942 |
+
async function uploadFiles(files) {
|
| 943 |
+
// Reset cancellation state
|
| 944 |
+
state.uploadCancelled = false;
|
| 945 |
+
|
| 946 |
+
elements.uploadProgress.classList.remove('hidden');
|
| 947 |
+
elements.uploadZone.style.pointerEvents = 'none';
|
| 948 |
+
|
| 949 |
+
const bucketId = elements.uploadBucketSelect.value;
|
| 950 |
+
let completed = 0;
|
| 951 |
+
|
| 952 |
+
// Process files sequentially to avoid overwhelming the client,
|
| 953 |
+
// but the server handles them in background.
|
| 954 |
+
for (const file of files) {
|
| 955 |
+
// Check if cancelled before processing each file
|
| 956 |
+
if (state.uploadCancelled) {
|
| 957 |
+
break;
|
| 958 |
+
}
|
| 959 |
+
|
| 960 |
+
elements.uploadStatus.textContent = `Uploading ${file.name}...`;
|
| 961 |
+
elements.progressFill.style.width = '10%'; // Initial progress
|
| 962 |
+
|
| 963 |
+
const formData = new FormData();
|
| 964 |
+
formData.append('file', file);
|
| 965 |
+
formData.append('bucket_id', bucketId);
|
| 966 |
+
|
| 967 |
+
// Create abort controller for this request
|
| 968 |
+
state.currentUploadAbortController = new AbortController();
|
| 969 |
+
|
| 970 |
+
try {
|
| 971 |
+
// Initial upload request
|
| 972 |
+
const response = await fetch('/api/documents/upload', {
|
| 973 |
+
method: 'POST',
|
| 974 |
+
headers: { 'Authorization': `Bearer ${state.token}` },
|
| 975 |
+
body: formData,
|
| 976 |
+
signal: state.currentUploadAbortController.signal
|
| 977 |
+
});
|
| 978 |
+
|
| 979 |
+
if (response.status === 202) {
|
| 980 |
+
// Async processing started
|
| 981 |
+
const data = await response.json();
|
| 982 |
+
await pollUploadStatus(data.doc_id, file.name);
|
| 983 |
+
if (!state.uploadCancelled) {
|
| 984 |
+
completed++;
|
| 985 |
+
}
|
| 986 |
+
} else if (response.ok) {
|
| 987 |
+
// Instant completion (legacy or small file)
|
| 988 |
+
const data = await response.json();
|
| 989 |
+
handleUploadSuccess(data);
|
| 990 |
+
completed++;
|
| 991 |
+
} else {
|
| 992 |
+
const data = await response.json();
|
| 993 |
+
showToast(`Failed: ${file.name} - ${data.error}`, 'error');
|
| 994 |
+
}
|
| 995 |
+
} catch (e) {
|
| 996 |
+
if (e.name === 'AbortError') {
|
| 997 |
+
// Upload was cancelled by user
|
| 998 |
+
break;
|
| 999 |
+
}
|
| 1000 |
+
console.error(e);
|
| 1001 |
+
showToast(`Failed to upload ${file.name}`, 'error');
|
| 1002 |
+
}
|
| 1003 |
+
}
|
| 1004 |
+
|
| 1005 |
+
// Clean up abort controller
|
| 1006 |
+
state.currentUploadAbortController = null;
|
| 1007 |
+
|
| 1008 |
+
// Only update UI if not cancelled (cancelUpload already handles UI reset)
|
| 1009 |
+
if (!state.uploadCancelled) {
|
| 1010 |
+
elements.uploadProgress.classList.add('hidden');
|
| 1011 |
+
elements.uploadZone.style.pointerEvents = '';
|
| 1012 |
+
elements.fileInput.value = '';
|
| 1013 |
+
elements.progressFill.style.width = '0%';
|
| 1014 |
+
|
| 1015 |
+
// Load documents first, then show summary
|
| 1016 |
+
await loadDocuments();
|
| 1017 |
+
loadBuckets();
|
| 1018 |
+
}
|
| 1019 |
+
}
|
| 1020 |
+
|
| 1021 |
+
async function pollUploadStatus(docId, filename) {
|
| 1022 |
+
return new Promise((resolve, reject) => {
|
| 1023 |
+
currentPollInterval = setInterval(async () => {
|
| 1024 |
+
// Check if cancelled
|
| 1025 |
+
if (state.uploadCancelled) {
|
| 1026 |
+
clearInterval(currentPollInterval);
|
| 1027 |
+
currentPollInterval = null;
|
| 1028 |
+
resolve();
|
| 1029 |
+
return;
|
| 1030 |
+
}
|
| 1031 |
+
|
| 1032 |
+
try {
|
| 1033 |
+
const response = await fetch(`/api/documents/${docId}/status`, {
|
| 1034 |
+
headers: { 'Authorization': `Bearer ${state.token}` }
|
| 1035 |
+
});
|
| 1036 |
+
|
| 1037 |
+
if (response.ok) {
|
| 1038 |
+
const statusData = await response.json();
|
| 1039 |
+
|
| 1040 |
+
// Update UI
|
| 1041 |
+
elements.uploadStatus.textContent = `Processing ${filename}: ${statusData.message || '...'}`;
|
| 1042 |
+
// Map 0-100 progress to UI width (keeping 10% buffer)
|
| 1043 |
+
if (statusData.progress) {
|
| 1044 |
+
elements.progressFill.style.width = `${Math.max(10, statusData.progress)}%`;
|
| 1045 |
+
}
|
| 1046 |
+
|
| 1047 |
+
if (statusData.status === 'completed') {
|
| 1048 |
+
clearInterval(currentPollInterval);
|
| 1049 |
+
currentPollInterval = null;
|
| 1050 |
+
if (statusData.result) {
|
| 1051 |
+
handleUploadSuccess(statusData.result);
|
| 1052 |
+
}
|
| 1053 |
+
resolve();
|
| 1054 |
+
} else if (statusData.status === 'failed') {
|
| 1055 |
+
clearInterval(currentPollInterval);
|
| 1056 |
+
currentPollInterval = null;
|
| 1057 |
+
showToast(`Processing failed: ${filename} - ${statusData.error}`, 'error');
|
| 1058 |
+
resolve(); // Resolve anyway to continue with next file
|
| 1059 |
+
}
|
| 1060 |
+
} else {
|
| 1061 |
+
// Status check failed - might be network glitch, ignore once
|
| 1062 |
+
}
|
| 1063 |
+
} catch (e) {
|
| 1064 |
+
console.error("Polling error", e);
|
| 1065 |
+
// Continue polling despite error
|
| 1066 |
+
}
|
| 1067 |
+
}, 2000); // Check every 2 seconds
|
| 1068 |
+
});
|
| 1069 |
+
}
|
| 1070 |
+
|
| 1071 |
+
function handleUploadSuccess(data) {
|
| 1072 |
+
showToast(`Ready: ${data.filename}`, 'success');
|
| 1073 |
+
|
| 1074 |
+
// Cache the summary
|
| 1075 |
+
if (data.summary) {
|
| 1076 |
+
state.summaries[data.doc_id] = {
|
| 1077 |
+
summary: data.summary,
|
| 1078 |
+
filename: data.filename
|
| 1079 |
+
};
|
| 1080 |
+
}
|
| 1081 |
+
|
| 1082 |
+
// Auto-display this document
|
| 1083 |
+
state.selectedDocument = data.doc_id;
|
| 1084 |
+
// We will re-render documents shortly after this returns
|
| 1085 |
+
|
| 1086 |
+
if (data.summary) {
|
| 1087 |
+
// Defer slightly to ensure DOM is ready if needed
|
| 1088 |
+
setTimeout(() => {
|
| 1089 |
+
showSummaryPanel(data.filename, data.summary);
|
| 1090 |
+
}, 500);
|
| 1091 |
+
}
|
| 1092 |
+
}
|
| 1093 |
+
|
| 1094 |
+
// ==================== Chat ====================
|
| 1095 |
+
function initChat() {
|
| 1096 |
+
elements.chatInput.addEventListener('input', () => {
|
| 1097 |
+
elements.chatInput.style.height = 'auto';
|
| 1098 |
+
elements.chatInput.style.height = Math.min(elements.chatInput.scrollHeight, 150) + 'px';
|
| 1099 |
+
elements.sendBtn.disabled = !elements.chatInput.value.trim();
|
| 1100 |
+
});
|
| 1101 |
+
elements.chatInput.addEventListener('keydown', (e) => {
|
| 1102 |
+
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMessage(); }
|
| 1103 |
+
});
|
| 1104 |
+
elements.sendBtn.addEventListener('click', sendMessage);
|
| 1105 |
+
|
| 1106 |
+
// Stop generation button
|
| 1107 |
+
elements.stopBtn.addEventListener('click', stopGeneration);
|
| 1108 |
+
}
|
| 1109 |
+
|
| 1110 |
+
function stopGeneration() {
|
| 1111 |
+
if (state.streamAbortController) {
|
| 1112 |
+
state.streamAbortController.abort();
|
| 1113 |
+
state.streamAbortController = null;
|
| 1114 |
+
}
|
| 1115 |
+
|
| 1116 |
+
// Hide stop button, show send button
|
| 1117 |
+
elements.stopBtn.classList.add('hidden');
|
| 1118 |
+
elements.sendBtn.classList.remove('hidden');
|
| 1119 |
+
elements.typingIndicator.classList.add('hidden');
|
| 1120 |
+
state.isLoading = false;
|
| 1121 |
+
|
| 1122 |
+
// Add a note that generation was stopped
|
| 1123 |
+
if (state.messages.length > 0) {
|
| 1124 |
+
const lastMsg = state.messages[state.messages.length - 1];
|
| 1125 |
+
if (lastMsg.role === 'assistant' && lastMsg.content) {
|
| 1126 |
+
lastMsg.content += '\n\n*[Generation stopped]*';
|
| 1127 |
+
renderMessages();
|
| 1128 |
+
saveCurrentChat();
|
| 1129 |
+
}
|
| 1130 |
+
}
|
| 1131 |
+
|
| 1132 |
+
showToast('Generation stopped', 'info');
|
| 1133 |
+
}
|
| 1134 |
+
|
| 1135 |
+
async function sendMessage() {
|
| 1136 |
+
const message = elements.chatInput.value.trim();
|
| 1137 |
+
if (!message || state.isLoading) return;
|
| 1138 |
+
|
| 1139 |
+
elements.chatInput.value = '';
|
| 1140 |
+
elements.chatInput.style.height = 'auto';
|
| 1141 |
+
elements.sendBtn.disabled = true;
|
| 1142 |
+
elements.welcomeScreen.classList.add('hidden');
|
| 1143 |
+
|
| 1144 |
+
// Create a chat ID if this is the first message
|
| 1145 |
+
if (state.messages.length === 0 && !state.currentChatId) {
|
| 1146 |
+
state.currentChatId = Date.now().toString();
|
| 1147 |
+
}
|
| 1148 |
+
|
| 1149 |
+
const targetChatId = state.currentChatId;
|
| 1150 |
+
addMessage('user', message);
|
| 1151 |
+
elements.typingIndicator.classList.remove('hidden');
|
| 1152 |
+
state.isLoading = true;
|
| 1153 |
+
scrollToBottom();
|
| 1154 |
+
|
| 1155 |
+
// Show stop button, hide send button
|
| 1156 |
+
elements.sendBtn.classList.add('hidden');
|
| 1157 |
+
elements.stopBtn.classList.remove('hidden');
|
| 1158 |
+
|
| 1159 |
+
// Create abort controller for this request
|
| 1160 |
+
state.streamAbortController = new AbortController();
|
| 1161 |
+
|
| 1162 |
+
try {
|
| 1163 |
+
// Use streaming endpoint for instant response
|
| 1164 |
+
const response = await fetch('/api/chat/stream', {
|
| 1165 |
+
method: 'POST',
|
| 1166 |
+
headers: {
|
| 1167 |
+
'Authorization': `Bearer ${state.token}`,
|
| 1168 |
+
'Content-Type': 'application/json'
|
| 1169 |
+
},
|
| 1170 |
+
body: JSON.stringify({
|
| 1171 |
+
message: message,
|
| 1172 |
+
bucket_id: state.chatBucket || null,
|
| 1173 |
+
chat_id: state.currentChatId
|
| 1174 |
+
}),
|
| 1175 |
+
signal: state.streamAbortController.signal
|
| 1176 |
+
});
|
| 1177 |
+
|
| 1178 |
+
if (!response.ok) {
|
| 1179 |
+
throw new Error('Stream request failed');
|
| 1180 |
+
}
|
| 1181 |
+
|
| 1182 |
+
elements.typingIndicator.classList.add('hidden');
|
| 1183 |
+
|
| 1184 |
+
// Create a placeholder message for streaming
|
| 1185 |
+
let streamingContent = '';
|
| 1186 |
+
let sources = [];
|
| 1187 |
+
|
| 1188 |
+
// Add empty assistant message and get reference to its content element
|
| 1189 |
+
state.messages.push({ role: 'assistant', content: '', sources: [] });
|
| 1190 |
+
renderMessages();
|
| 1191 |
+
scrollToBottom();
|
| 1192 |
+
|
| 1193 |
+
// Get direct reference to the streaming message element for fast updates
|
| 1194 |
+
const messageElements = elements.chatMessages.querySelectorAll('.message.assistant .message-content');
|
| 1195 |
+
const streamingElement = messageElements[messageElements.length - 1];
|
| 1196 |
+
|
| 1197 |
+
const reader = response.body.getReader();
|
| 1198 |
+
const decoder = new TextDecoder();
|
| 1199 |
+
|
| 1200 |
+
// Throttle DOM updates for smooth rendering (update every 50ms max)
|
| 1201 |
+
let lastUpdateTime = 0;
|
| 1202 |
+
let pendingUpdate = false;
|
| 1203 |
+
const UPDATE_INTERVAL = 50; // ms
|
| 1204 |
+
|
| 1205 |
+
while (true) {
|
| 1206 |
+
const { done, value } = await reader.read();
|
| 1207 |
+
if (done) break;
|
| 1208 |
+
|
| 1209 |
+
const text = decoder.decode(value);
|
| 1210 |
+
const lines = text.split('\n');
|
| 1211 |
+
|
| 1212 |
+
for (const line of lines) {
|
| 1213 |
+
if (line.startsWith('data: ')) {
|
| 1214 |
+
try {
|
| 1215 |
+
const data = JSON.parse(line.slice(6));
|
| 1216 |
+
|
| 1217 |
+
if (data.type === 'sources') {
|
| 1218 |
+
sources = data.sources || [];
|
| 1219 |
+
} else if (data.type === 'chunk' || data.type === 'content') {
|
| 1220 |
+
// Support both 'chunk' (legacy) and 'content' (specialized queries)
|
| 1221 |
+
streamingContent += data.content;
|
| 1222 |
+
// Update state for saving later
|
| 1223 |
+
state.messages[state.messages.length - 1].content = streamingContent;
|
| 1224 |
+
state.messages[state.messages.length - 1].sources = sources;
|
| 1225 |
+
|
| 1226 |
+
// Throttled DOM update for smooth rendering
|
| 1227 |
+
const now = Date.now();
|
| 1228 |
+
if (now - lastUpdateTime >= UPDATE_INTERVAL) {
|
| 1229 |
+
if (streamingElement) {
|
| 1230 |
+
streamingElement.innerHTML = formatContent(streamingContent);
|
| 1231 |
+
}
|
| 1232 |
+
lastUpdateTime = now;
|
| 1233 |
+
pendingUpdate = false;
|
| 1234 |
+
} else {
|
| 1235 |
+
pendingUpdate = true;
|
| 1236 |
+
}
|
| 1237 |
+
// No auto-scroll during streaming - stay at current position
|
| 1238 |
+
} else if (data.type === 'done') {
|
| 1239 |
+
// Final update with any pending content
|
| 1240 |
+
if (pendingUpdate && streamingElement) {
|
| 1241 |
+
streamingElement.innerHTML = formatContent(streamingContent);
|
| 1242 |
+
}
|
| 1243 |
+
// Streaming complete - do final render for proper formatting
|
| 1244 |
+
renderMessages();
|
| 1245 |
+
saveCurrentChat();
|
| 1246 |
+
// No auto-scroll - user stays at current position
|
| 1247 |
+
} else if (data.type === 'error') {
|
| 1248 |
+
state.messages[state.messages.length - 1].content = data.content || 'Error generating response';
|
| 1249 |
+
renderMessages();
|
| 1250 |
+
}
|
| 1251 |
+
} catch (e) {
|
| 1252 |
+
// Skip malformed JSON
|
| 1253 |
+
}
|
| 1254 |
+
}
|
| 1255 |
+
}
|
| 1256 |
+
}
|
| 1257 |
+
} catch (err) {
|
| 1258 |
+
elements.typingIndicator.classList.add('hidden');
|
| 1259 |
+
// Only show error if not aborted by user
|
| 1260 |
+
if (err.name !== 'AbortError') {
|
| 1261 |
+
addMessageToChat(targetChatId, 'assistant', 'Connection error. Please try again.');
|
| 1262 |
+
}
|
| 1263 |
+
}
|
| 1264 |
+
|
| 1265 |
+
// Cleanup: hide stop button, show send button
|
| 1266 |
+
elements.stopBtn.classList.add('hidden');
|
| 1267 |
+
elements.sendBtn.classList.remove('hidden');
|
| 1268 |
+
state.streamAbortController = null;
|
| 1269 |
+
state.isLoading = false;
|
| 1270 |
+
// No auto-scroll - user stays at current position
|
| 1271 |
+
}
|
| 1272 |
+
|
| 1273 |
+
function addMessage(role, content, sources = []) {
|
| 1274 |
+
// Create a new chat ID if this is the first message
|
| 1275 |
+
if (state.messages.length === 0 && !state.currentChatId) {
|
| 1276 |
+
state.currentChatId = Date.now().toString();
|
| 1277 |
+
}
|
| 1278 |
+
|
| 1279 |
+
state.messages.push({ role, content, sources });
|
| 1280 |
+
renderMessages();
|
| 1281 |
+
|
| 1282 |
+
// Auto-save after assistant responds (complete exchange)
|
| 1283 |
+
if (role === 'assistant') {
|
| 1284 |
+
saveCurrentChat();
|
| 1285 |
+
}
|
| 1286 |
+
}
|
| 1287 |
+
|
| 1288 |
+
// Add message to a specific chat (handles case where user switched chats during loading)
|
| 1289 |
+
function addMessageToChat(chatId, role, content, sources = []) {
|
| 1290 |
+
// If this is the current chat, add directly
|
| 1291 |
+
if (chatId === state.currentChatId) {
|
| 1292 |
+
state.messages.push({ role, content, sources });
|
| 1293 |
+
renderMessages();
|
| 1294 |
+
saveCurrentChat();
|
| 1295 |
+
} else {
|
| 1296 |
+
// Add to the chat in history
|
| 1297 |
+
const chatIndex = state.chatHistory.findIndex(c => c.id === chatId);
|
| 1298 |
+
if (chatIndex >= 0) {
|
| 1299 |
+
state.chatHistory[chatIndex].messages.push({ role, content, sources });
|
| 1300 |
+
saveChatHistory();
|
| 1301 |
+
syncChatToServer(state.chatHistory[chatIndex]);
|
| 1302 |
+
renderChatHistory();
|
| 1303 |
+
showToast('Response added to previous chat', 'info');
|
| 1304 |
+
}
|
| 1305 |
+
}
|
| 1306 |
+
}
|
| 1307 |
+
|
| 1308 |
+
function renderMessages() {
|
| 1309 |
+
// Preserve summary panel state before re-rendering
|
| 1310 |
+
const summaryVisible = !elements.summaryPanel.classList.contains('hidden');
|
| 1311 |
+
const summaryTitle = elements.summaryTitle.textContent;
|
| 1312 |
+
const summaryText = elements.summaryText.textContent;
|
| 1313 |
+
|
| 1314 |
+
if (state.messages.length === 0) {
|
| 1315 |
+
// Clear chat messages and show welcome screen
|
| 1316 |
+
elements.chatMessages.innerHTML = '';
|
| 1317 |
+
elements.welcomeScreen.classList.remove('hidden');
|
| 1318 |
+
elements.chatMessages.appendChild(elements.welcomeScreen);
|
| 1319 |
+
// Re-show summary if it was visible
|
| 1320 |
+
if (summaryVisible) {
|
| 1321 |
+
elements.summaryPanel.classList.remove('hidden');
|
| 1322 |
+
}
|
| 1323 |
+
return;
|
| 1324 |
+
}
|
| 1325 |
+
elements.welcomeScreen.classList.add('hidden');
|
| 1326 |
+
|
| 1327 |
+
const html = state.messages.map((msg, i) => {
|
| 1328 |
+
const avatar = msg.role === 'user' ? (state.user?.username?.charAt(0).toUpperCase() || 'U') : '🧠';
|
| 1329 |
+
return `<div class="message ${msg.role}"><div class="message-avatar">${avatar}</div><div class="message-content">${formatContent(msg.content)}</div></div>`;
|
| 1330 |
+
}).join('');
|
| 1331 |
+
|
| 1332 |
+
// Build full content with summary panel and welcome screen
|
| 1333 |
+
const summaryPanelHTML = `
|
| 1334 |
+
<div class="summary-panel ${summaryVisible ? '' : 'hidden'}" id="summaryPanel">
|
| 1335 |
+
<div class="summary-header">
|
| 1336 |
+
<span class="summary-icon">📄</span>
|
| 1337 |
+
<span class="summary-title" id="summaryTitle">${summaryTitle}</span>
|
| 1338 |
+
</div>
|
| 1339 |
+
<div class="summary-content" id="summaryContent">
|
| 1340 |
+
<div class="summary-text" id="summaryText">${summaryText}</div>
|
| 1341 |
+
</div>
|
| 1342 |
+
<button class="summary-close" id="summaryClose" title="Close summary">✕</button>
|
| 1343 |
+
</div>
|
| 1344 |
+
`;
|
| 1345 |
+
|
| 1346 |
+
elements.chatMessages.innerHTML = summaryPanelHTML + html + elements.welcomeScreen.outerHTML;
|
| 1347 |
+
document.getElementById('welcomeScreen')?.classList.add('hidden');
|
| 1348 |
+
|
| 1349 |
+
// Re-bind summary panel elements and event listener
|
| 1350 |
+
elements.summaryPanel = document.getElementById('summaryPanel');
|
| 1351 |
+
elements.summaryTitle = document.getElementById('summaryTitle');
|
| 1352 |
+
elements.summaryText = document.getElementById('summaryText');
|
| 1353 |
+
elements.summaryClose = document.getElementById('summaryClose');
|
| 1354 |
+
elements.summaryClose.addEventListener('click', hideSummary);
|
| 1355 |
+
}
|
| 1356 |
+
|
| 1357 |
+
function formatContent(content) {
|
| 1358 |
+
// Enhanced markdown parsing for beautiful formatting
|
| 1359 |
+
let html = content;
|
| 1360 |
+
|
| 1361 |
+
// Escape HTML special characters first (except for already parsed markdown)
|
| 1362 |
+
// Skip this if content looks like it's already HTML
|
| 1363 |
+
if (!html.includes('<table') && !html.includes('<div')) {
|
| 1364 |
+
// Don't escape - let markdown do its thing
|
| 1365 |
+
}
|
| 1366 |
+
|
| 1367 |
+
// Code blocks: ```code```
|
| 1368 |
+
html = html.replace(/```(\w*)\n?([\s\S]*?)```/g, (match, lang, code) => {
|
| 1369 |
+
return `<pre class="code-block${lang ? ' lang-' + lang : ''}"><code>${code.trim()}</code></pre>`;
|
| 1370 |
+
});
|
| 1371 |
+
|
| 1372 |
+
// Tables: | Header | Header |
|
| 1373 |
+
html = html.replace(/(?:^|\n)(\|.+\|)\n(\|[-:\s|]+\|)\n((?:\|.+\|\n?)+)/gm, (match, headerRow, sepRow, bodyRows) => {
|
| 1374 |
+
const headers = headerRow.split('|').filter(cell => cell.trim()).map(cell =>
|
| 1375 |
+
`<th>${cell.trim()}</th>`
|
| 1376 |
+
).join('');
|
| 1377 |
+
|
| 1378 |
+
const rows = bodyRows.trim().split('\n').map(row => {
|
| 1379 |
+
const cells = row.split('|').filter(cell => cell.trim()).map(cell =>
|
| 1380 |
+
`<td>${cell.trim()}</td>`
|
| 1381 |
+
).join('');
|
| 1382 |
+
return `<tr>${cells}</tr>`;
|
| 1383 |
+
}).join('');
|
| 1384 |
+
|
| 1385 |
+
return `<div class="table-wrapper"><table><thead><tr>${headers}</tr></thead><tbody>${rows}</tbody></table></div>`;
|
| 1386 |
+
});
|
| 1387 |
+
|
| 1388 |
+
// Headers: ### Header, ## Header, # Header
|
| 1389 |
+
html = html.replace(/^#### (.+)$/gm, '<h4>$1</h4>');
|
| 1390 |
+
html = html.replace(/^### (.+)$/gm, '<h3>$1</h3>');
|
| 1391 |
+
html = html.replace(/^## (.+)$/gm, '<h2>$1</h2>');
|
| 1392 |
+
html = html.replace(/^# (.+)$/gm, '<h1>$1</h1>');
|
| 1393 |
+
|
| 1394 |
+
// Bold headers at start of line (NotebookLM style)
|
| 1395 |
+
html = html.replace(/^(\*\*[^*]+\*\*):?\s*$/gm, '<h4>$1</h4>');
|
| 1396 |
+
|
| 1397 |
+
// Bold text: **text**
|
| 1398 |
+
html = html.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
|
| 1399 |
+
|
| 1400 |
+
// Italic text: *text*
|
| 1401 |
+
html = html.replace(/(?<!\*)\*([^*]+)\*(?!\*)/g, '<em>$1</em>');
|
| 1402 |
+
|
| 1403 |
+
// Inline code: `code`
|
| 1404 |
+
html = html.replace(/`([^`]+)`/g, '<code class="inline-code">$1</code>');
|
| 1405 |
+
|
| 1406 |
+
// Horizontal rule: --- or ***
|
| 1407 |
+
html = html.replace(/^[-*]{3,}$/gm, '<hr class="divider">');
|
| 1408 |
+
|
| 1409 |
+
// Numbered lists: 1. Item, 2. Item, etc.
|
| 1410 |
+
html = html.replace(/^(\d+)\.\s+(.+)$/gm, '<li class="numbered"><span class="list-num">$1.</span> $2</li>');
|
| 1411 |
+
|
| 1412 |
+
// Bullet points: • Item or - Item or * Item at start of line
|
| 1413 |
+
html = html.replace(/^[\•\-\*]\s+(.+)$/gm, '<li class="bullet">$1</li>');
|
| 1414 |
+
|
| 1415 |
+
// Sub-bullets with indentation (2+ spaces before bullet)
|
| 1416 |
+
html = html.replace(/^[\s]{2,}[\•\-\*]\s+(.+)$/gm, '<li class="sub-bullet">$1</li>');
|
| 1417 |
+
|
| 1418 |
+
// Wrap consecutive numbered list items
|
| 1419 |
+
html = html.replace(/(<li class="numbered">[\s\S]*?<\/li>\n?)+/g, '<ol class="formatted-list">$&</ol>');
|
| 1420 |
+
|
| 1421 |
+
// Wrap consecutive bullet items
|
| 1422 |
+
html = html.replace(/(<li class="bullet">[\s\S]*?<\/li>\n?)+/g, '<ul class="formatted-list">$&</ul>');
|
| 1423 |
+
|
| 1424 |
+
// Wrap consecutive sub-bullet items
|
| 1425 |
+
html = html.replace(/(<li class="sub-bullet">[\s\S]*?<\/li>\n?)+/g, '<ul class="formatted-list sub-list">$&</ul>');
|
| 1426 |
+
|
| 1427 |
+
// Blockquotes: > text
|
| 1428 |
+
html = html.replace(/^>\s+(.+)$/gm, '<blockquote>$1</blockquote>');
|
| 1429 |
+
// Merge consecutive blockquotes
|
| 1430 |
+
html = html.replace(/<\/blockquote>\n<blockquote>/g, '<br>');
|
| 1431 |
+
|
| 1432 |
+
// Double newlines become paragraph breaks
|
| 1433 |
+
html = html.replace(/\n\n+/g, '</p><p>');
|
| 1434 |
+
|
| 1435 |
+
// Single newlines become line breaks (but not inside lists)
|
| 1436 |
+
html = html.replace(/\n/g, '<br>');
|
| 1437 |
+
|
| 1438 |
+
// Clean up br tags in lists, headers, tables
|
| 1439 |
+
html = html.replace(/<br><li/g, '<li');
|
| 1440 |
+
html = html.replace(/<\/li><br>/g, '</li>');
|
| 1441 |
+
html = html.replace(/<br><h/g, '<h');
|
| 1442 |
+
html = html.replace(/<\/h(\d)><br>/g, '</h$1>');
|
| 1443 |
+
html = html.replace(/<br><ul/g, '<ul');
|
| 1444 |
+
html = html.replace(/<br><ol/g, '<ol');
|
| 1445 |
+
html = html.replace(/<\/ul><br>/g, '</ul>');
|
| 1446 |
+
html = html.replace(/<\/ol><br>/g, '</ol>');
|
| 1447 |
+
html = html.replace(/<br><table/g, '<table');
|
| 1448 |
+
html = html.replace(/<\/table><br>/g, '</table>');
|
| 1449 |
+
html = html.replace(/<br><div class="table/g, '<div class="table');
|
| 1450 |
+
html = html.replace(/<\/div><br>/g, '</div>');
|
| 1451 |
+
html = html.replace(/<br><pre/g, '<pre');
|
| 1452 |
+
html = html.replace(/<\/pre><br>/g, '</pre>');
|
| 1453 |
+
html = html.replace(/<br><hr/g, '<hr');
|
| 1454 |
+
html = html.replace(/<hr[^>]*><br>/g, '<hr class="divider">');
|
| 1455 |
+
html = html.replace(/<br><blockquote/g, '<blockquote');
|
| 1456 |
+
html = html.replace(/<\/blockquote><br>/g, '</blockquote>');
|
| 1457 |
+
|
| 1458 |
+
// Wrap in paragraph
|
| 1459 |
+
html = '<p>' + html + '</p>';
|
| 1460 |
+
|
| 1461 |
+
// Clean up empty paragraphs
|
| 1462 |
+
html = html.replace(/<p><\/p>/g, '');
|
| 1463 |
+
html = html.replace(/<p>(\s|<br>)*<\/p>/g, '');
|
| 1464 |
+
html = html.replace(/<p><(h\d|ul|ol|table|div|pre|hr|blockquote)/g, '<$1');
|
| 1465 |
+
html = html.replace(/<\/(h\d|ul|ol|table|div|pre|blockquote)><\/p>/g, '</$1>');
|
| 1466 |
+
html = html.replace(/<p><hr/g, '<hr');
|
| 1467 |
+
|
| 1468 |
+
return html;
|
| 1469 |
+
}
|
| 1470 |
+
|
| 1471 |
+
function scrollToBottom() {
|
| 1472 |
+
elements.chatMessages.scrollTop = elements.chatMessages.scrollHeight;
|
| 1473 |
+
}
|
| 1474 |
+
|
| 1475 |
+
// ==================== Token Verification ====================
|
| 1476 |
+
async function verifyToken() {
|
| 1477 |
+
if (!state.token) { showAuthModal(); return; }
|
| 1478 |
+
try {
|
| 1479 |
+
const response = await fetch('/api/auth/verify', { headers: { 'Authorization': `Bearer ${state.token}` } });
|
| 1480 |
+
if (response.ok) {
|
| 1481 |
+
const data = await response.json();
|
| 1482 |
+
state.user = data;
|
| 1483 |
+
localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
|
| 1484 |
+
updateAuthUI();
|
| 1485 |
+
loadBuckets();
|
| 1486 |
+
loadDocuments();
|
| 1487 |
+
// Load chat history from server database
|
| 1488 |
+
loadChatHistoryFromServer();
|
| 1489 |
+
} else {
|
| 1490 |
+
state.token = null;
|
| 1491 |
+
state.user = null;
|
| 1492 |
+
localStorage.removeItem('Iribl AI_token');
|
| 1493 |
+
localStorage.removeItem('Iribl AI_user');
|
| 1494 |
+
showAuthModal();
|
| 1495 |
+
}
|
| 1496 |
+
} catch { showAuthModal(); }
|
| 1497 |
+
}
|
| 1498 |
+
|
| 1499 |
+
// ==================== Chat History ====================
|
| 1500 |
+
function generateChatTopic(messages) {
|
| 1501 |
+
// Get the first user message as the topic
|
| 1502 |
+
const firstUserMsg = messages.find(m => m.role === 'user');
|
| 1503 |
+
if (firstUserMsg) {
|
| 1504 |
+
// Truncate to first 40 chars
|
| 1505 |
+
let topic = firstUserMsg.content.substring(0, 40);
|
| 1506 |
+
if (firstUserMsg.content.length > 40) topic += '...';
|
| 1507 |
+
return topic;
|
| 1508 |
+
}
|
| 1509 |
+
return 'New Conversation';
|
| 1510 |
+
}
|
| 1511 |
+
|
| 1512 |
+
function saveChatHistory() {
|
| 1513 |
+
localStorage.setItem('Iribl AI_chat_history', JSON.stringify(state.chatHistory));
|
| 1514 |
+
}
|
| 1515 |
+
|
| 1516 |
+
// Sync chat to server
|
| 1517 |
+
async function syncChatToServer(chatData) {
|
| 1518 |
+
if (!state.token) return;
|
| 1519 |
+
|
| 1520 |
+
try {
|
| 1521 |
+
await fetch('/api/chats', {
|
| 1522 |
+
method: 'POST',
|
| 1523 |
+
headers: {
|
| 1524 |
+
'Authorization': `Bearer ${state.token}`,
|
| 1525 |
+
'Content-Type': 'application/json'
|
| 1526 |
+
},
|
| 1527 |
+
body: JSON.stringify(chatData)
|
| 1528 |
+
});
|
| 1529 |
+
} catch (error) {
|
| 1530 |
+
console.error('Failed to sync chat to server:', error);
|
| 1531 |
+
}
|
| 1532 |
+
}
|
| 1533 |
+
|
| 1534 |
+
// Load chat history from server
|
| 1535 |
+
async function loadChatHistoryFromServer() {
|
| 1536 |
+
if (!state.token) return;
|
| 1537 |
+
|
| 1538 |
+
try {
|
| 1539 |
+
const response = await fetch('/api/chats', {
|
| 1540 |
+
headers: { 'Authorization': `Bearer ${state.token}` }
|
| 1541 |
+
});
|
| 1542 |
+
|
| 1543 |
+
if (response.ok) {
|
| 1544 |
+
const data = await response.json();
|
| 1545 |
+
if (data.chats && data.chats.length > 0) {
|
| 1546 |
+
// Merge server chats with local (server takes priority)
|
| 1547 |
+
state.chatHistory = data.chats;
|
| 1548 |
+
saveChatHistory(); // Update local storage
|
| 1549 |
+
renderChatHistory();
|
| 1550 |
+
}
|
| 1551 |
+
}
|
| 1552 |
+
} catch (error) {
|
| 1553 |
+
console.error('Failed to load chats from server:', error);
|
| 1554 |
+
}
|
| 1555 |
+
}
|
| 1556 |
+
|
| 1557 |
+
// Delete chat from server
|
| 1558 |
+
async function deleteChatFromServer(chatId) {
|
| 1559 |
+
if (!state.token) return;
|
| 1560 |
+
|
| 1561 |
+
try {
|
| 1562 |
+
await fetch(`/api/chats/${chatId}`, {
|
| 1563 |
+
method: 'DELETE',
|
| 1564 |
+
headers: { 'Authorization': `Bearer ${state.token}` }
|
| 1565 |
+
});
|
| 1566 |
+
} catch (error) {
|
| 1567 |
+
console.error('Failed to delete chat from server:', error);
|
| 1568 |
+
}
|
| 1569 |
+
}
|
| 1570 |
+
|
| 1571 |
+
function saveCurrentChat() {
|
| 1572 |
+
// Only save if there are messages
|
| 1573 |
+
if (state.messages.length === 0) return null;
|
| 1574 |
+
|
| 1575 |
+
const chatId = state.currentChatId || Date.now().toString();
|
| 1576 |
+
const topic = generateChatTopic(state.messages);
|
| 1577 |
+
|
| 1578 |
+
// Check if this chat already exists
|
| 1579 |
+
const existingIndex = state.chatHistory.findIndex(c => c.id === chatId);
|
| 1580 |
+
|
| 1581 |
+
const chatData = {
|
| 1582 |
+
id: chatId,
|
| 1583 |
+
topic: topic,
|
| 1584 |
+
messages: [...state.messages],
|
| 1585 |
+
timestamp: Date.now(),
|
| 1586 |
+
bucket: state.chatBucket
|
| 1587 |
+
};
|
| 1588 |
+
|
| 1589 |
+
if (existingIndex >= 0) {
|
| 1590 |
+
// Update existing chat
|
| 1591 |
+
state.chatHistory[existingIndex] = chatData;
|
| 1592 |
+
} else {
|
| 1593 |
+
// Add new chat at the beginning
|
| 1594 |
+
state.chatHistory.unshift(chatData);
|
| 1595 |
+
}
|
| 1596 |
+
|
| 1597 |
+
saveChatHistory();
|
| 1598 |
+
renderChatHistory();
|
| 1599 |
+
|
| 1600 |
+
// Sync to server
|
| 1601 |
+
syncChatToServer(chatData);
|
| 1602 |
+
|
| 1603 |
+
return chatId;
|
| 1604 |
+
}
|
| 1605 |
+
|
| 1606 |
+
function startNewChat() {
|
| 1607 |
+
// Warn if AI is still generating
|
| 1608 |
+
if (state.isLoading) {
|
| 1609 |
+
showToast('AI is still responding - response will go to current chat', 'info');
|
| 1610 |
+
}
|
| 1611 |
+
|
| 1612 |
+
// Save current chat first if it has messages
|
| 1613 |
+
if (state.messages.length > 0) {
|
| 1614 |
+
saveCurrentChat();
|
| 1615 |
+
}
|
| 1616 |
+
|
| 1617 |
+
// Clear current chat
|
| 1618 |
+
state.messages = [];
|
| 1619 |
+
state.currentChatId = null;
|
| 1620 |
+
|
| 1621 |
+
// Reset UI
|
| 1622 |
+
renderMessages();
|
| 1623 |
+
elements.welcomeScreen.classList.remove('hidden');
|
| 1624 |
+
hideSummary();
|
| 1625 |
+
renderChatHistory();
|
| 1626 |
+
|
| 1627 |
+
showToast('Started new chat', 'info');
|
| 1628 |
+
}
|
| 1629 |
+
|
| 1630 |
+
function loadChatFromHistory(chatId) {
|
| 1631 |
+
// Warn if AI is still generating
|
| 1632 |
+
if (state.isLoading) {
|
| 1633 |
+
showToast('AI is still responding - response will go to current chat', 'info');
|
| 1634 |
+
}
|
| 1635 |
+
|
| 1636 |
+
// Save current chat first if it has messages
|
| 1637 |
+
if (state.messages.length > 0 && state.currentChatId !== chatId) {
|
| 1638 |
+
saveCurrentChat();
|
| 1639 |
+
}
|
| 1640 |
+
|
| 1641 |
+
const chat = state.chatHistory.find(c => c.id === chatId);
|
| 1642 |
+
if (!chat) return;
|
| 1643 |
+
|
| 1644 |
+
// Load the chat
|
| 1645 |
+
state.messages = [...chat.messages];
|
| 1646 |
+
state.currentChatId = chat.id;
|
| 1647 |
+
state.chatBucket = chat.bucket || '';
|
| 1648 |
+
|
| 1649 |
+
// Update bucket dropdown
|
| 1650 |
+
if (elements.chatBucketSelect) {
|
| 1651 |
+
elements.chatBucketSelect.value = state.chatBucket;
|
| 1652 |
+
const bucketName = state.chatBucket ?
|
| 1653 |
+
state.buckets.find(b => b.bucket_id === state.chatBucket)?.name || 'Selected Bucket' :
|
| 1654 |
+
'All Documents';
|
| 1655 |
+
elements.chatBucketTrigger.querySelector('.select-value').textContent = bucketName;
|
| 1656 |
+
}
|
| 1657 |
+
|
| 1658 |
+
// Render messages
|
| 1659 |
+
renderMessages();
|
| 1660 |
+
|
| 1661 |
+
// Show/hide welcome screen based on whether chat has messages
|
| 1662 |
+
if (state.messages.length === 0) {
|
| 1663 |
+
elements.welcomeScreen.classList.remove('hidden');
|
| 1664 |
+
} else {
|
| 1665 |
+
elements.welcomeScreen.classList.add('hidden');
|
| 1666 |
+
}
|
| 1667 |
+
|
| 1668 |
+
renderChatHistory();
|
| 1669 |
+
scrollToBottom();
|
| 1670 |
+
}
|
| 1671 |
+
|
| 1672 |
+
function deleteChatFromHistory(chatId) {
|
| 1673 |
+
event.stopPropagation();
|
| 1674 |
+
|
| 1675 |
+
state.chatHistory = state.chatHistory.filter(c => c.id !== chatId);
|
| 1676 |
+
|
| 1677 |
+
// If deleting current chat, clear it
|
| 1678 |
+
if (state.currentChatId === chatId) {
|
| 1679 |
+
state.messages = [];
|
| 1680 |
+
state.currentChatId = null;
|
| 1681 |
+
renderMessages();
|
| 1682 |
+
elements.welcomeScreen.classList.remove('hidden');
|
| 1683 |
+
}
|
| 1684 |
+
|
| 1685 |
+
saveChatHistory();
|
| 1686 |
+
renderChatHistory();
|
| 1687 |
+
|
| 1688 |
+
// Delete from server
|
| 1689 |
+
deleteChatFromServer(chatId);
|
| 1690 |
+
|
| 1691 |
+
showToast('Chat deleted', 'success');
|
| 1692 |
+
}
|
| 1693 |
+
|
| 1694 |
+
function renderChatHistory() {
|
| 1695 |
+
// Filter chats by selected bucket
|
| 1696 |
+
let filteredChats = state.chatHistory;
|
| 1697 |
+
if (state.selectedBucket) {
|
| 1698 |
+
filteredChats = state.chatHistory.filter(chat =>
|
| 1699 |
+
chat.bucket === state.selectedBucket ||
|
| 1700 |
+
// Also include chats with no bucket for backwards compatibility
|
| 1701 |
+
(!chat.bucket && !state.selectedBucket)
|
| 1702 |
+
);
|
| 1703 |
+
}
|
| 1704 |
+
|
| 1705 |
+
const count = filteredChats.length;
|
| 1706 |
+
const totalCount = state.chatHistory.length;
|
| 1707 |
+
|
| 1708 |
+
// Show filtered count vs total if filtering is active
|
| 1709 |
+
elements.chatHistoryCount.textContent = state.selectedBucket && count !== totalCount ?
|
| 1710 |
+
`(${count}/${totalCount})` : `(${totalCount})`;
|
| 1711 |
+
|
| 1712 |
+
if (count === 0) {
|
| 1713 |
+
elements.chatHistoryList.innerHTML = state.selectedBucket ?
|
| 1714 |
+
`<div class="empty-state small"><div class="empty-text">No chats in this bucket</div></div>` :
|
| 1715 |
+
`<div class="empty-state small"><div class="empty-text">No chats yet</div></div>`;
|
| 1716 |
+
return;
|
| 1717 |
+
}
|
| 1718 |
+
|
| 1719 |
+
elements.chatHistoryList.innerHTML = filteredChats.map(chat => {
|
| 1720 |
+
const isActive = state.currentChatId === chat.id;
|
| 1721 |
+
const date = formatDate(chat.timestamp / 1000);
|
| 1722 |
+
return `
|
| 1723 |
+
<div class="chat-history-item ${isActive ? 'active' : ''}" onclick="loadChatFromHistory('${chat.id}')">
|
| 1724 |
+
<span class="chat-history-icon">💬</span>
|
| 1725 |
+
<div class="chat-history-info">
|
| 1726 |
+
<div class="chat-history-topic">${chat.topic}</div>
|
| 1727 |
+
<div class="chat-history-date">${date}</div>
|
| 1728 |
+
</div>
|
| 1729 |
+
<button class="btn btn-ghost chat-history-delete" onclick="deleteChatFromHistory('${chat.id}')" title="Delete">🗑️</button>
|
| 1730 |
+
</div>
|
| 1731 |
+
`;
|
| 1732 |
+
}).join('');
|
| 1733 |
+
}
|
| 1734 |
+
|
| 1735 |
+
function clearCurrentChat() {
|
| 1736 |
+
// Warn if AI is still generating
|
| 1737 |
+
if (state.isLoading) {
|
| 1738 |
+
showToast('AI is still responding - response will go to current chat', 'info');
|
| 1739 |
+
}
|
| 1740 |
+
|
| 1741 |
+
// If there's a current chat, clear its messages but keep it in history
|
| 1742 |
+
if (state.currentChatId) {
|
| 1743 |
+
const chatIndex = state.chatHistory.findIndex(c => c.id === state.currentChatId);
|
| 1744 |
+
if (chatIndex >= 0) {
|
| 1745 |
+
// Clear the messages in history
|
| 1746 |
+
state.chatHistory[chatIndex].messages = [];
|
| 1747 |
+
saveChatHistory();
|
| 1748 |
+
// Sync cleared chat to server
|
| 1749 |
+
syncChatToServer(state.chatHistory[chatIndex]);
|
| 1750 |
+
}
|
| 1751 |
+
}
|
| 1752 |
+
|
| 1753 |
+
// Clear current chat messages
|
| 1754 |
+
state.messages = [];
|
| 1755 |
+
|
| 1756 |
+
// Reset UI
|
| 1757 |
+
renderMessages();
|
| 1758 |
+
elements.welcomeScreen.classList.remove('hidden');
|
| 1759 |
+
hideSummary();
|
| 1760 |
+
renderChatHistory();
|
| 1761 |
+
|
| 1762 |
+
showToast('Chat cleared', 'info');
|
| 1763 |
+
}
|
| 1764 |
+
|
| 1765 |
+
function initChatHistory() {
|
| 1766 |
+
// New Chat button handler
|
| 1767 |
+
elements.newChatBtn.addEventListener('click', startNewChat);
|
| 1768 |
+
|
| 1769 |
+
// Clear Chat button handler (sidebar)
|
| 1770 |
+
elements.clearChatBtn.addEventListener('click', (e) => {
|
| 1771 |
+
e.stopPropagation();
|
| 1772 |
+
clearCurrentChat();
|
| 1773 |
+
});
|
| 1774 |
+
|
| 1775 |
+
// Clear Chat button handler (top)
|
| 1776 |
+
elements.clearChatBtnTop.addEventListener('click', clearCurrentChat);
|
| 1777 |
+
|
| 1778 |
+
// Render existing history
|
| 1779 |
+
renderChatHistory();
|
| 1780 |
+
|
| 1781 |
+
// Auto-save current chat when sending messages (hook into sendMessage)
|
| 1782 |
+
// This is handled by updating currentChatId after first message
|
| 1783 |
+
}
|
| 1784 |
+
|
| 1785 |
+
// ==================== Init ====================
|
| 1786 |
+
function init() {
|
| 1787 |
+
initSidebars();
|
| 1788 |
+
initMobileNavigation();
|
| 1789 |
+
initCollapsible();
|
| 1790 |
+
initCustomDropdowns();
|
| 1791 |
+
initUpload();
|
| 1792 |
+
initChat();
|
| 1793 |
+
initSummaryPanel();
|
| 1794 |
+
initChatHistory();
|
| 1795 |
+
verifyToken();
|
| 1796 |
+
}
|
| 1797 |
+
|
| 1798 |
+
document.addEventListener('DOMContentLoaded', init);
|
test_chroma.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Test ChromaDB Cloud connection
|
| 2 |
+
import chromadb
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
# Use CloudClient - the correct way to connect to ChromaDB Cloud
|
| 9 |
+
client = chromadb.CloudClient(
|
| 10 |
+
tenant="jash_doshi_211294",
|
| 11 |
+
database="visionextract",
|
| 12 |
+
api_key=os.getenv("CHROMA_API_KEY")
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
print("Connected successfully!")
|
| 16 |
+
print("Collections:", client.list_collections())
|