jashdoshi77 commited on
Commit
64deb3c
·
0 Parent(s):

feat: Add AI-powered query understanding with DeepSeek parsing

Browse files
.agent/workflows/push-to-huggingface.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: Push changes to Hugging Face Spaces
3
+ ---
4
+
5
+ # Push to Hugging Face
6
+
7
+ // turbo-all
8
+
9
+ 1. Stage all changes:
10
+ ```bash
11
+ git add .
12
+ ```
13
+
14
+ 2. Commit with a message:
15
+ ```bash
16
+ git commit -m "Your commit message here"
17
+ ```
18
+
19
+ 3. Push to Hugging Face:
20
+ ```bash
21
+ git push https://jashdoshi77:YOUR_HF_TOKEN@huggingface.co/spaces/jashdoshi77/notebooklm-fast master:main
22
+ ```
23
+
24
+ **Note**: Replace `YOUR_HF_TOKEN` with your Hugging Face token. The Space will automatically rebuild after pushing.
.env.example ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Groq API (Ultra-fast inference) - Get key from https://console.groq.com
2
+ GROQ_API_KEY=your_groq_api_key_here
3
+
4
+ # Mistral AI API (for OCR) - Get key from https://console.mistral.ai/
5
+ MISTRAL_API_KEY=your_mistral_api_key_here
6
+
7
+ # OpenRouter API (fallback)
8
+ OPENROUTER_API_KEY=your_openrouter_api_key_here
9
+
10
+ # ChromaDB Cloud Configuration
11
+ # Get your API key from https://www.trychroma.com/
12
+ CHROMA_API_KEY=your_chromadb_api_key_here
13
+ CHROMA_TENANT=your_tenant_id
14
+ CHROMA_DATABASE=your_database_name
15
+
16
+ # JWT Secret (change in production)
17
+ JWT_SECRET=Iribl AI-secret-key-change-me-in-production
18
+
19
+ # App Configuration
20
+ FLASK_ENV=development
21
+ FLASK_DEBUG=True
.gitignore ADDED
Binary file (258 Bytes). View file
 
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies for PyMuPDF and other packages
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ libffi-dev \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Copy requirements first for Docker layer caching
12
+ COPY requirements.txt .
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Copy application code
16
+ COPY . .
17
+
18
+ # Create necessary directories
19
+ RUN mkdir -p uploads chroma_data
20
+
21
+ # Hugging Face Spaces uses port 7860
22
+ EXPOSE 7860
23
+
24
+ # Set environment variables
25
+ ENV FLASK_ENV=production
26
+ ENV PYTHONUNBUFFERED=1
27
+
28
+ # Run with gunicorn for production
29
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--worker-class", "gthread", "--threads", "4", "--workers", "2", "--timeout", "1200", "--access-logfile", "-", "--error-logfile", "-", "app:app"]
README.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: NotebookLM Fast
3
+ emoji: 📚
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ # NotebookLM Fast
11
+
12
+ AI-powered document intelligence platform with RAG (Retrieval Augmented Generation).
13
+
14
+ ## Features
15
+
16
+ - 📄 Upload PDFs, Word docs, Excel files, PowerPoints, and images
17
+ - 💬 Chat with your documents using AI
18
+ - 🗂️ Organize documents in buckets
19
+ - 👥 Admin/Employee role management
20
+ - 🔒 Secure authentication
21
+
22
+ ## Environment Variables
23
+
24
+ Set these as secrets in your Hugging Face Space settings:
25
+
26
+ - `OPENROUTER_API_KEY` - Your OpenRouter API key
27
+ - `GROQ_API_KEY` - Your Groq API key
28
+ - `CHROMA_API_KEY` - Your ChromaDB Cloud API key
29
+ - `CHROMA_TENANT` - Your ChromaDB tenant ID
30
+ - `CHROMA_DATABASE` - Your ChromaDB database name
31
+ - `JWT_SECRET` - Secret key for JWT tokens
app.py ADDED
@@ -0,0 +1,732 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ NotebookLM Clone - Main Flask Application
3
+ AI-powered document intelligence platform with RAG
4
+ Supports Admin/Employee roles and Bucket organization
5
+ """
6
+
7
+ import os
8
+ import uuid
9
+ from functools import wraps
10
+ from flask import Flask, request, jsonify, send_from_directory, send_file, Response
11
+ from flask_cors import CORS
12
+ from werkzeug.utils import secure_filename
13
+
14
+ from config import Config
15
+ from services.auth_service import auth_service
16
+ from services.document_processor import document_processor
17
+ from services.chroma_service import chroma_service
18
+ from services.rag_service import rag_service
19
+ from services.metadata_extractor import metadata_extractor
20
+
21
+ # Initialize Flask app
22
+ app = Flask(__name__, static_folder='static')
23
+ app.config['MAX_CONTENT_LENGTH'] = Config.MAX_CONTENT_LENGTH
24
+ CORS(app)
25
+
26
+ # Ensure upload directory exists
27
+ os.makedirs(Config.UPLOAD_FOLDER, exist_ok=True)
28
+
29
+
30
+ # ==================== Auth Decorators ====================
31
+
32
+ def require_auth(f):
33
+ """Decorator to require authentication"""
34
+ @wraps(f)
35
+ def decorated(*args, **kwargs):
36
+ auth_header = request.headers.get('Authorization')
37
+
38
+ if not auth_header or not auth_header.startswith('Bearer '):
39
+ return jsonify({"error": "Missing or invalid authorization header"}), 401
40
+
41
+ token = auth_header.split(' ')[1]
42
+ user = auth_service.get_current_user(token)
43
+
44
+ if not user:
45
+ return jsonify({"error": "Invalid or expired token"}), 401
46
+
47
+ request.current_user = user
48
+ return f(*args, **kwargs)
49
+
50
+ return decorated
51
+
52
+
53
+ def require_admin(f):
54
+ """Decorator to require admin role"""
55
+ @wraps(f)
56
+ def decorated(*args, **kwargs):
57
+ auth_header = request.headers.get('Authorization')
58
+
59
+ if not auth_header or not auth_header.startswith('Bearer '):
60
+ return jsonify({"error": "Missing or invalid authorization header"}), 401
61
+
62
+ token = auth_header.split(' ')[1]
63
+ user = auth_service.get_current_user(token)
64
+
65
+ if not user:
66
+ return jsonify({"error": "Invalid or expired token"}), 401
67
+
68
+ if user.get('role') != 'admin':
69
+ return jsonify({"error": "Admin access required"}), 403
70
+
71
+ request.current_user = user
72
+ return f(*args, **kwargs)
73
+
74
+ return decorated
75
+
76
+
77
+ # ==================== Static Routes ====================
78
+
79
+ @app.route('/')
80
+ def index():
81
+ return send_from_directory(app.static_folder, 'index.html')
82
+
83
+ @app.route('/<path:path>')
84
+ def serve_static(path):
85
+ return send_from_directory(app.static_folder, path)
86
+
87
+
88
+ # ==================== Auth Routes ====================
89
+
90
+ @app.route('/api/auth/register/admin', methods=['POST'])
91
+ def register_admin():
92
+ data = request.get_json()
93
+
94
+ if not data:
95
+ return jsonify({"error": "No data provided"}), 400
96
+
97
+ username = data.get('username', '').strip()
98
+ password = data.get('password', '')
99
+ email = data.get('email', '').strip()
100
+
101
+ result = auth_service.register_admin(username, password, email)
102
+
103
+ if result['success']:
104
+ return jsonify({
105
+ "token": result['token'],
106
+ "user_id": result['user_id'],
107
+ "username": result['username'],
108
+ "role": result['role']
109
+ })
110
+ else:
111
+ return jsonify({"error": result['error']}), 400
112
+
113
+
114
+ @app.route('/api/auth/login', methods=['POST'])
115
+ def login():
116
+ data = request.get_json()
117
+
118
+ if not data:
119
+ return jsonify({"error": "No data provided"}), 400
120
+
121
+ username = data.get('username', '').strip()
122
+ password = data.get('password', '')
123
+ role = data.get('role', 'admin')
124
+
125
+ result = auth_service.login(username, password, role)
126
+
127
+ if result['success']:
128
+ return jsonify({
129
+ "token": result['token'],
130
+ "user_id": result['user_id'],
131
+ "username": result['username'],
132
+ "role": result['role']
133
+ })
134
+ else:
135
+ return jsonify({"error": result['error']}), 401
136
+
137
+
138
+ @app.route('/api/auth/verify', methods=['GET'])
139
+ @require_auth
140
+ def verify_token():
141
+ return jsonify({
142
+ "user_id": request.current_user['user_id'],
143
+ "username": request.current_user['username'],
144
+ "role": request.current_user.get('role', 'admin')
145
+ })
146
+
147
+
148
+ # ==================== Admin Employee Management ====================
149
+
150
+ @app.route('/api/admin/employees', methods=['GET'])
151
+ @require_admin
152
+ def list_employees():
153
+ employees = auth_service.get_admin_employees(request.current_user['user_id'])
154
+ return jsonify({"employees": employees})
155
+
156
+
157
+ @app.route('/api/admin/employees', methods=['POST'])
158
+ @require_admin
159
+ def add_employee():
160
+ data = request.get_json()
161
+
162
+ if not data:
163
+ return jsonify({"error": "No data provided"}), 400
164
+
165
+ email = data.get('email', '').strip()
166
+ password = data.get('password', '')
167
+
168
+ result = auth_service.register_employee(
169
+ admin_user_id=request.current_user['user_id'],
170
+ email=email,
171
+ password=password
172
+ )
173
+
174
+ if result['success']:
175
+ return jsonify({"user_id": result['user_id'], "email": result['email']})
176
+ else:
177
+ return jsonify({"error": result['error']}), 400
178
+
179
+
180
+ @app.route('/api/admin/employees/<employee_id>', methods=['DELETE'])
181
+ @require_admin
182
+ def delete_employee(employee_id):
183
+ success = auth_service.delete_employee(
184
+ admin_user_id=request.current_user['user_id'],
185
+ employee_id=employee_id
186
+ )
187
+
188
+ if success:
189
+ return jsonify({"success": True})
190
+ else:
191
+ return jsonify({"error": "Employee not found or access denied"}), 404
192
+
193
+
194
+ # ==================== Bucket Routes ====================
195
+
196
+ @app.route('/api/buckets', methods=['GET'])
197
+ @require_auth
198
+ def list_buckets():
199
+ """List all buckets for current user"""
200
+ buckets = chroma_service.get_user_buckets(request.current_user['user_id'])
201
+ return jsonify({"buckets": buckets})
202
+
203
+
204
+ @app.route('/api/buckets', methods=['POST'])
205
+ @require_auth
206
+ def create_bucket():
207
+ """Create a new bucket"""
208
+ data = request.get_json()
209
+
210
+ if not data or not data.get('name'):
211
+ return jsonify({"error": "Bucket name is required"}), 400
212
+
213
+ name = data.get('name', '').strip()
214
+ description = data.get('description', '').strip()
215
+
216
+ result = chroma_service.create_bucket(
217
+ user_id=request.current_user['user_id'],
218
+ name=name,
219
+ description=description
220
+ )
221
+
222
+ return jsonify(result)
223
+
224
+
225
+ @app.route('/api/buckets/<bucket_id>', methods=['DELETE'])
226
+ @require_auth
227
+ def delete_bucket(bucket_id):
228
+ """Delete a bucket"""
229
+ success = chroma_service.delete_bucket(
230
+ bucket_id=bucket_id,
231
+ user_id=request.current_user['user_id']
232
+ )
233
+
234
+ if success:
235
+ return jsonify({"success": True})
236
+ else:
237
+ return jsonify({"error": "Bucket not found or access denied"}), 404
238
+
239
+
240
+ # ==================== Document Routes ====================
241
+
242
+ # ==================== Async Processing ====================
243
+
244
+ # Global status store: doc_id -> {status, progress, message, result, error}
245
+ processing_status = {}
246
+
247
+ def process_document_background(doc_id, user_id, file_path, filename, bucket_id):
248
+ """Background task for processing documents"""
249
+ import threading
250
+
251
+ try:
252
+ processing_status[doc_id] = {
253
+ "status": "processing",
254
+ "progress": 10,
255
+ "message": "Starting processing..."
256
+ }
257
+
258
+ print(f"[BACKGROUND] Processing file: {filename}")
259
+
260
+ # Step 1: Text Extraction (OCR)
261
+ processing_status[doc_id]["message"] = "Extracting text (OCR)..."
262
+ processing_status[doc_id]["progress"] = 20
263
+
264
+ result = document_processor.process(file_path, filename)
265
+
266
+ if not result['success']:
267
+ processing_status[doc_id] = {
268
+ "status": "failed",
269
+ "error": result['error']
270
+ }
271
+ if os.path.exists(file_path):
272
+ os.remove(file_path)
273
+ return
274
+
275
+ processing_status[doc_id]["progress"] = 50
276
+ processing_status[doc_id]["message"] = "Storing document..."
277
+
278
+ # Step 2: Store Metadata
279
+ doc_type = document_processor.get_file_type(filename)
280
+ chroma_service.store_document(
281
+ user_id=user_id,
282
+ doc_id=doc_id,
283
+ filename=filename,
284
+ doc_type=doc_type,
285
+ content=result['text'],
286
+ bucket_id=bucket_id
287
+ )
288
+
289
+ processing_status[doc_id]["progress"] = 70
290
+ processing_status[doc_id]["message"] = "generating embeddings..."
291
+
292
+ # Step 3: Chunking & Embeddings
293
+ chunk_count = rag_service.process_document(
294
+ user_id=user_id,
295
+ doc_id=doc_id,
296
+ content=result['text'],
297
+ bucket_id=bucket_id
298
+ )
299
+
300
+ processing_status[doc_id]["progress"] = 90
301
+ processing_status[doc_id]["message"] = "Generating summary..."
302
+
303
+ # Step 4: Summary Generation
304
+ summary_result = rag_service.generate_summary(result['text'], filename)
305
+ summary = summary_result.get('summary', f'Document: {filename}')
306
+
307
+ # Step 5: Extract and store metadata for aggregate queries (NEW)
308
+ processing_status[doc_id]["progress"] = 95
309
+ processing_status[doc_id]["message"] = "Extracting metadata..."
310
+
311
+ try:
312
+ # Extract structured metadata from document
313
+ metadata = metadata_extractor.extract_metadata(result['text'], filename)
314
+
315
+ # Store metadata for aggregate queries
316
+ chroma_service.store_document_metadata(
317
+ doc_id=doc_id,
318
+ user_id=user_id,
319
+ bucket_id=bucket_id,
320
+ metadata=metadata
321
+ )
322
+
323
+ # Store summary chunk for aggregate queries
324
+ chroma_service.store_summary_chunk(
325
+ doc_id=doc_id,
326
+ user_id=user_id,
327
+ summary=summary,
328
+ bucket_id=bucket_id,
329
+ filename=filename
330
+ )
331
+ print(f"[METADATA] Extracted and stored metadata for {filename}")
332
+ except Exception as e:
333
+ print(f"[METADATA] Warning: Failed to extract metadata for {filename}: {e}")
334
+ # Non-fatal - continue processing
335
+
336
+ # Complete
337
+ processing_status[doc_id] = {
338
+ "status": "completed",
339
+ "progress": 100,
340
+ "message": "Complete",
341
+ "result": {
342
+ "doc_id": doc_id,
343
+ "filename": filename,
344
+ "doc_type": doc_type,
345
+ "bucket_id": bucket_id,
346
+ "chunk_count": chunk_count,
347
+ "summary": summary
348
+ }
349
+ }
350
+ print(f"[BACKGROUND] Completed {filename}")
351
+
352
+ except Exception as e:
353
+ import traceback
354
+ print(f"[BACKGROUND ERROR] {str(e)}")
355
+ print(traceback.format_exc())
356
+ processing_status[doc_id] = {
357
+ "status": "failed",
358
+ "error": str(e)
359
+ }
360
+ if os.path.exists(file_path):
361
+ try:
362
+ os.remove(file_path)
363
+ except:
364
+ pass
365
+
366
+ @app.route('/api/documents/upload', methods=['POST'])
367
+ @require_auth
368
+ def upload_document():
369
+ """Upload and process a document (Async)"""
370
+ if 'file' not in request.files:
371
+ return jsonify({"error": "No file provided"}), 400
372
+
373
+ file = request.files['file']
374
+ bucket_id = request.form.get('bucket_id', '')
375
+
376
+ if file.filename == '':
377
+ return jsonify({"error": "No file selected"}), 400
378
+
379
+ if not document_processor.is_supported(file.filename):
380
+ return jsonify({"error": "Unsupported file type"}), 400
381
+
382
+ doc_id = str(uuid.uuid4())
383
+ filename = secure_filename(file.filename)
384
+
385
+ user_folder = os.path.join(Config.UPLOAD_FOLDER, request.current_user['user_id'])
386
+ os.makedirs(user_folder, exist_ok=True)
387
+
388
+ file_path = os.path.join(user_folder, f"{doc_id}_{filename}")
389
+ file.save(file_path)
390
+
391
+ # Initialize status
392
+ processing_status[doc_id] = {
393
+ "status": "queued",
394
+ "progress": 0,
395
+ "message": "Queued for processing..."
396
+ }
397
+
398
+ # Start background thread
399
+ import threading
400
+ thread = threading.Thread(
401
+ target=process_document_background,
402
+ args=(doc_id, request.current_user['user_id'], file_path, filename, bucket_id)
403
+ )
404
+ thread.daemon = True
405
+ thread.start()
406
+
407
+ # Return immediately
408
+ return jsonify({
409
+ "status": "queued",
410
+ "doc_id": doc_id,
411
+ "filename": filename,
412
+ "message": "Upload accepted, processing in background"
413
+ }), 202
414
+
415
+ @app.route('/api/documents/<doc_id>/status', methods=['GET'])
416
+ @require_auth
417
+ def get_document_status(doc_id):
418
+ """Get processing status of a document"""
419
+ status = processing_status.get(doc_id)
420
+
421
+ if not status:
422
+ # Check if it exists in DB (might be completed and cleared from memory)
423
+ # For now, just return not found if not in memory or DB check logic here
424
+ # Simple version:
425
+ return jsonify({"status": "unknown"}), 404
426
+
427
+ return jsonify(status)
428
+
429
+
430
+ @app.route('/api/documents/<doc_id>/summary', methods=['GET'])
431
+ @require_auth
432
+ def get_document_summary(doc_id):
433
+ """Get or generate summary for a document"""
434
+ doc = chroma_service.get_document(doc_id, request.current_user['user_id'])
435
+
436
+ if not doc:
437
+ return jsonify({"error": "Document not found"}), 404
438
+
439
+ # Get the full document content from the stored preview
440
+ # For a more complete summary, we'd need to re-read the document
441
+ content_preview = doc.get('content_preview', '')
442
+
443
+ # Generate summary
444
+ summary_result = rag_service.generate_summary(content_preview, doc['filename'])
445
+
446
+ return jsonify({
447
+ "doc_id": doc_id,
448
+ "filename": doc['filename'],
449
+ "summary": summary_result.get('summary', f'Document: {doc["filename"]}'),
450
+ "success": summary_result.get('success', False)
451
+ })
452
+
453
+
454
+ @app.route('/api/documents', methods=['GET'])
455
+ @require_auth
456
+ def list_documents():
457
+ """List all documents, optionally filtered by bucket"""
458
+ bucket_id = request.args.get('bucket_id')
459
+ documents = chroma_service.get_user_documents(
460
+ request.current_user['user_id'],
461
+ bucket_id=bucket_id if bucket_id else None
462
+ )
463
+ return jsonify({"documents": documents})
464
+
465
+
466
+ @app.route('/api/documents/<doc_id>', methods=['GET'])
467
+ @require_auth
468
+ def get_document(doc_id):
469
+ """Get document details"""
470
+ doc = chroma_service.get_document(doc_id, request.current_user['user_id'])
471
+
472
+ if doc:
473
+ return jsonify(doc)
474
+ else:
475
+ return jsonify({"error": "Document not found"}), 404
476
+
477
+
478
+ @app.route('/api/documents/<doc_id>/view', methods=['GET'])
479
+ @require_auth
480
+ def view_document(doc_id):
481
+ """View/download the actual document file"""
482
+ doc = chroma_service.get_document(doc_id, request.current_user['user_id'])
483
+
484
+ if not doc:
485
+ return jsonify({"error": "Document not found"}), 404
486
+
487
+ user_folder = os.path.join(Config.UPLOAD_FOLDER, request.current_user['user_id'])
488
+
489
+ # Find the file
490
+ for f in os.listdir(user_folder):
491
+ if f.startswith(doc_id):
492
+ file_path = os.path.join(user_folder, f)
493
+ return send_file(file_path, as_attachment=False)
494
+
495
+ return jsonify({"error": "File not found on server"}), 404
496
+
497
+
498
+ @app.route('/api/documents/<doc_id>/bucket', methods=['PUT'])
499
+ @require_auth
500
+ def update_document_bucket(doc_id):
501
+ """Move document to a different bucket"""
502
+ data = request.get_json()
503
+ bucket_id = data.get('bucket_id', '') if data else ''
504
+
505
+ success = chroma_service.update_document_bucket(
506
+ doc_id=doc_id,
507
+ user_id=request.current_user['user_id'],
508
+ bucket_id=bucket_id
509
+ )
510
+
511
+ if success:
512
+ return jsonify({"success": True})
513
+ else:
514
+ return jsonify({"error": "Document not found or access denied"}), 404
515
+
516
+
517
+ @app.route('/api/documents/<doc_id>', methods=['DELETE'])
518
+ @require_auth
519
+ def delete_document(doc_id):
520
+ """Delete a document"""
521
+ success = chroma_service.delete_document(
522
+ doc_id=doc_id,
523
+ user_id=request.current_user['user_id']
524
+ )
525
+
526
+ if success:
527
+ user_folder = os.path.join(Config.UPLOAD_FOLDER, request.current_user['user_id'])
528
+ try:
529
+ for f in os.listdir(user_folder):
530
+ if f.startswith(doc_id):
531
+ os.remove(os.path.join(user_folder, f))
532
+ break
533
+ except:
534
+ pass
535
+
536
+ return jsonify({"success": True})
537
+ else:
538
+ return jsonify({"error": "Document not found or access denied"}), 404
539
+
540
+
541
+ # ==================== Chat/RAG Routes ====================
542
+
543
+ @app.route('/api/chat', methods=['POST'])
544
+ @require_auth
545
+ def chat():
546
+ """Process a chat query using RAG with optional bucket filtering"""
547
+ data = request.get_json()
548
+
549
+ if not data or not data.get('message'):
550
+ return jsonify({"error": "No message provided"}), 400
551
+
552
+ message = data.get('message', '').strip()
553
+ doc_ids = data.get('doc_ids')
554
+ bucket_id = data.get('bucket_id') # New: filter by bucket
555
+ conversation_history = data.get('history', [])
556
+
557
+ result = rag_service.query(
558
+ user_id=request.current_user['user_id'],
559
+ query=message,
560
+ doc_ids=doc_ids,
561
+ bucket_id=bucket_id,
562
+ conversation_history=conversation_history
563
+ )
564
+
565
+ if result['success']:
566
+ return jsonify({
567
+ "response": result['response'],
568
+ "model": result.get('model', 'unknown'),
569
+ "sources": result.get('sources', []),
570
+ "source_files": result.get('source_files', []),
571
+ "chunks_used": result.get('chunks_used', 0),
572
+ "chunks_filtered": result.get('chunks_filtered', 0)
573
+ })
574
+ else:
575
+ return jsonify({"error": result['error']}), 500
576
+
577
+
578
+ @app.route('/api/chat/stream', methods=['POST'])
579
+ @require_auth
580
+ def chat_stream():
581
+ """Streaming chat endpoint - sends response chunks as they arrive"""
582
+ import json
583
+ import time
584
+
585
+ start_time = time.time()
586
+ print(f"[STREAM] Endpoint called")
587
+
588
+ data = request.get_json()
589
+
590
+ if not data or not data.get('message'):
591
+ return jsonify({"error": "No message provided"}), 400
592
+
593
+ message = data.get('message', '').strip()
594
+ bucket_id = data.get('bucket_id')
595
+ chat_id = data.get('chat_id', '') # Get chat_id from request
596
+ user_id = request.current_user['user_id']
597
+
598
+ print(f"[STREAM] Request parsed in {time.time()-start_time:.2f}s")
599
+
600
+ def generate():
601
+ # Immediately yield to start the stream
602
+ yield f"data: {json.dumps({'type': 'start'})}\n\n"
603
+
604
+ sse_chunk_count = 0
605
+ for chunk in rag_service.query_stream(
606
+ user_id=user_id,
607
+ query=message,
608
+ bucket_id=bucket_id,
609
+ chat_id=chat_id
610
+ ):
611
+ sse_chunk_count += 1
612
+ if sse_chunk_count <= 5:
613
+ print(f"[SSE] Sending chunk {sse_chunk_count}: type={chunk.get('type', 'unknown')}")
614
+ yield f"data: {json.dumps(chunk)}\n\n"
615
+
616
+ print(f"[SSE] Stream complete, sent {sse_chunk_count} chunks total")
617
+
618
+ return Response(
619
+ generate(),
620
+ mimetype='text/event-stream',
621
+ headers={
622
+ 'Cache-Control': 'no-cache',
623
+ 'Connection': 'keep-alive',
624
+ 'X-Accel-Buffering': 'no'
625
+ }
626
+ )
627
+
628
+
629
+ @app.route('/api/chat/clear', methods=['POST'])
630
+ @require_auth
631
+ def clear_chat_memory():
632
+ """Clear conversation memory for the current user"""
633
+ data = request.get_json() or {}
634
+ bucket_id = data.get('bucket_id')
635
+
636
+ success = rag_service.clear_memory(
637
+ user_id=request.current_user['user_id'],
638
+ bucket_id=bucket_id
639
+ )
640
+
641
+ if success:
642
+ return jsonify({"success": True, "message": "Conversation memory cleared"})
643
+ else:
644
+ return jsonify({"error": "Failed to clear memory"}), 500
645
+
646
+
647
+ @app.route('/api/cleanup/chunks', methods=['POST'])
648
+ @require_auth
649
+ def cleanup_user_chunks():
650
+ """Clear ALL chunks for the current user - use to fix stale data issues"""
651
+ deleted_count = chroma_service.clear_all_user_chunks(
652
+ user_id=request.current_user['user_id']
653
+ )
654
+ return jsonify({
655
+ "success": True,
656
+ "message": f"Deleted {deleted_count} chunks. Please re-upload your documents."
657
+ })
658
+
659
+
660
+
661
+ # ==================== Chat History Routes ====================
662
+
663
+ @app.route('/api/chats', methods=['GET'])
664
+ @require_auth
665
+ def list_chat_sessions():
666
+ """Get all chat sessions for current user"""
667
+ sessions = chroma_service.get_user_chat_sessions(request.current_user['user_id'])
668
+ return jsonify({"chats": sessions})
669
+
670
+
671
+ @app.route('/api/chats', methods=['POST'])
672
+ @require_auth
673
+ def save_chat_session():
674
+ """Save or update a chat session"""
675
+ data = request.get_json()
676
+
677
+ if not data:
678
+ return jsonify({"error": "No data provided"}), 400
679
+
680
+ chat_id = data.get('id')
681
+ topic = data.get('topic', 'Chat')
682
+ messages = data.get('messages', [])
683
+ bucket_id = data.get('bucket', '')
684
+
685
+ if not chat_id:
686
+ return jsonify({"error": "Chat ID is required"}), 400
687
+
688
+ result = chroma_service.save_chat_session(
689
+ user_id=request.current_user['user_id'],
690
+ chat_id=chat_id,
691
+ topic=topic,
692
+ messages=messages,
693
+ bucket_id=bucket_id
694
+ )
695
+
696
+ return jsonify({"success": True, **result})
697
+
698
+
699
+ @app.route('/api/chats/<chat_id>', methods=['DELETE'])
700
+ @require_auth
701
+ def delete_chat_session(chat_id):
702
+ """Delete a chat session"""
703
+ success = chroma_service.delete_chat_session(
704
+ user_id=request.current_user['user_id'],
705
+ chat_id=chat_id
706
+ )
707
+
708
+ if success:
709
+ return jsonify({"success": True})
710
+ else:
711
+ return jsonify({"error": "Chat not found or access denied"}), 404
712
+
713
+
714
+ # ==================== Health Check ====================
715
+
716
+ @app.route('/api/health', methods=['GET'])
717
+ def health_check():
718
+ return jsonify({"status": "healthy", "version": "1.1.0"})
719
+
720
+
721
+ # ==================== Main ====================
722
+
723
+ if __name__ == '__main__':
724
+ print("=" * 50)
725
+ print("NotebookLM Clone - AI Document Intelligence")
726
+ print("=" * 50)
727
+ print(f"Upload folder: {Config.UPLOAD_FOLDER}")
728
+ print(f"ChromaDB Cloud: {Config.CHROMA_TENANT}/{Config.CHROMA_DATABASE}")
729
+ print("Starting server on http://localhost:5000")
730
+ print("=" * 50)
731
+
732
+ app.run(host='0.0.0.0', port=5000, debug=True)
config.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ class Config:
7
+ # DeepSeek API - PRIMARY (OpenAI-compatible)
8
+ DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY', '')
9
+ DEEPSEEK_BASE_URL = 'https://api.deepseek.com/v1'
10
+ DEEPSEEK_MODEL = 'deepseek-chat' # DeepSeek V3 model
11
+
12
+ # OpenRouter API - For OCR and fallback LLM
13
+ OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', '')
14
+ OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'
15
+
16
+ # AI Models (OpenRouter fallback)
17
+ MODEL_MAP = {
18
+ 'gemma': 'google/gemma-3-4b-it:free',
19
+ 'mistral': 'mistralai/mistral-small-3.1-24b-instruct:free',
20
+ }
21
+
22
+ # Use DeepSeek first, then OpenRouter models
23
+ USE_DEEPSEEK = True # Set to False to use OpenRouter instead
24
+ FALLBACK_ORDER = ['gemma', 'mistral']
25
+
26
+ # ChromaDB Cloud Configuration
27
+ CHROMA_API_KEY = os.getenv('CHROMA_API_KEY', '')
28
+ CHROMA_TENANT = os.getenv('CHROMA_TENANT', 'default_tenant')
29
+ CHROMA_DATABASE = os.getenv('CHROMA_DATABASE', 'default_database')
30
+ CHROMA_HOST = 'api.trychroma.com' # ChromaDB Cloud endpoint
31
+
32
+ # JWT Configuration
33
+ JWT_SECRET = os.getenv('JWT_SECRET', 'your-secret-key-change-in-production')
34
+ JWT_EXPIRY_HOURS = 24
35
+
36
+ # Upload Configuration
37
+ UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
38
+ MAX_CONTENT_LENGTH = 200 * 1024 * 1024 # 200MB max file size
39
+ ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'txt', 'md', 'png', 'jpg', 'jpeg', 'gif', 'webp'}
40
+
41
+ CHUNK_SIZE = 500 # Smaller chunks for higher precision with many documents
42
+ CHUNK_OVERLAP = 150 # Good overlap to avoid losing info at boundaries
43
+ TOP_K_RESULTS = 100 # High - comprehensive retrieval for 3000+ docs across buckets
44
+ AI_TEMPERATURE = 0.0 # Zero temperature for maximum determinism and accuracy
45
+ RELEVANCE_THRESHOLD = 3.0 # Higher threshold - include all potentially relevant
46
+ MAX_CONVERSATION_HISTORY = 20 # Remember more conversation for pronoun context
47
+ AI_MAX_TOKENS = 4096 # Maximum tokens for detailed responses
48
+ AI_TIMEOUT = 90 # More time for complex multi-document queries
find_buckets.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Helper script to find buckets for a user"""
2
+ import sys
3
+ sys.path.insert(0, '.')
4
+
5
+ from services.chroma_service import chroma_service
6
+
7
+ # Check both users
8
+ user_ids = ['55c0893720ef38eb', '7ac2ed69d52d2010']
9
+
10
+ for user_id in user_ids:
11
+ print(f"\nUser: {user_id}")
12
+ print("-" * 40)
13
+
14
+ # Get documents
15
+ docs = chroma_service.get_user_documents(user_id)
16
+ print(f"Documents: {len(docs)}")
17
+
18
+ # Get buckets
19
+ buckets = chroma_service.get_user_buckets(user_id)
20
+ if buckets:
21
+ print("Buckets:")
22
+ for b in buckets:
23
+ print(f" - {b['name']} (ID: {b['bucket_id']}, Docs: {b.get('doc_count', 0)})")
24
+ else:
25
+ print("No buckets found")
find_users.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Helper script to find user IDs"""
2
+ import sys
3
+ sys.path.insert(0, '.')
4
+
5
+ from services.chroma_service import chroma_service
6
+
7
+ print("Finding users in ChromaDB...")
8
+ users = chroma_service.users_collection.get()
9
+
10
+ if not users['ids']:
11
+ print("No users found!")
12
+ else:
13
+ print(f"Found {len(users['ids'])} users:")
14
+ for i in range(len(users['ids'])):
15
+ user_id = users['ids'][i]
16
+ username = users['metadatas'][i].get('username', 'N/A')
17
+ role = users['metadatas'][i].get('role', 'N/A')
18
+ print(f" - User ID: {user_id}")
19
+ print(f" Username: {username}")
20
+ print(f" Role: {role}")
21
+ print()
migrate_metadata.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Migration Script - Extract Metadata for Existing Documents
3
+ Run this script once to populate document_metadata and document_summaries
4
+ collections for all existing documents.
5
+
6
+ Usage:
7
+ python migrate_metadata.py --user-id <user_id> [--bucket-id <bucket_id>]
8
+ """
9
+
10
+ import sys
11
+ import time
12
+ import argparse
13
+ from typing import List, Dict
14
+
15
+ # Add parent directory to path for imports
16
+ sys.path.insert(0, '.')
17
+
18
+ from services.chroma_service import chroma_service
19
+ from services.metadata_extractor import metadata_extractor
20
+ from services.rag_service import rag_service
21
+
22
+
23
+ def reconstruct_document_content(doc_id: str) -> str:
24
+ """
25
+ Reconstruct document content from stored chunks.
26
+ """
27
+ chunks = chroma_service.get_document_chunks(doc_id)
28
+ if not chunks:
29
+ return ""
30
+
31
+ # Sort by chunk index and combine
32
+ chunks.sort(key=lambda x: x.get('chunk_index', 0))
33
+ content = ' '.join(chunk.get('text', '') for chunk in chunks)
34
+
35
+ return content
36
+
37
+
38
+ def migrate_single_document(doc: Dict, user_id: str, skip_existing: bool = True) -> Dict:
39
+ """
40
+ Migrate a single document: extract metadata and create summary.
41
+
42
+ Args:
43
+ doc: Document dictionary with doc_id, filename, bucket_id
44
+ user_id: User ID
45
+ skip_existing: If True, skip documents that already have metadata and summary
46
+ """
47
+ doc_id = doc['doc_id']
48
+ filename = doc.get('filename', '')
49
+ bucket_id = doc.get('bucket_id', '')
50
+
51
+ result = {
52
+ 'doc_id': doc_id,
53
+ 'filename': filename,
54
+ 'status': 'pending',
55
+ 'metadata_extracted': False,
56
+ 'summary_created': False,
57
+ 'error': None
58
+ }
59
+
60
+ try:
61
+ # Check if already migrated (fast skip)
62
+ if skip_existing:
63
+ existing_metadata = chroma_service.get_document_metadata(doc_id, user_id)
64
+ summary_id = f"{doc_id}_summary"
65
+ existing_summary = chroma_service.summary_chunks_collection.get(ids=[summary_id])
66
+
67
+ if existing_metadata and existing_summary['ids']:
68
+ result['status'] = 'skipped'
69
+ result['metadata_extracted'] = True
70
+ result['summary_created'] = True
71
+ print(f" [SKIP] Already migrated: {filename}")
72
+ return result
73
+
74
+ # Step 1: Reconstruct content from chunks
75
+ print(f" Reconstructing content for {filename}...")
76
+ content = reconstruct_document_content(doc_id)
77
+
78
+ if not content:
79
+ result['status'] = 'skipped'
80
+ result['error'] = 'No content found'
81
+ return result
82
+
83
+ # Step 2: Extract metadata (only if not exists or skip_existing is False)
84
+ needs_metadata = not skip_existing or not chroma_service.get_document_metadata(doc_id, user_id)
85
+ if needs_metadata:
86
+ print(f" Extracting metadata...")
87
+ metadata = metadata_extractor.extract_metadata(content, filename)
88
+
89
+ # Store metadata
90
+ chroma_service.store_document_metadata(
91
+ doc_id=doc_id,
92
+ user_id=user_id,
93
+ bucket_id=bucket_id,
94
+ metadata=metadata
95
+ )
96
+ result['metadata_extracted'] = True
97
+ else:
98
+ result['metadata_extracted'] = True # Already exists
99
+
100
+ # Step 3: Generate and store summary (only if not exists or skip_existing is False)
101
+ summary_id = f"{doc_id}_summary"
102
+ existing_summary = chroma_service.summary_chunks_collection.get(ids=[summary_id])
103
+ needs_summary = not skip_existing or not existing_summary['ids']
104
+
105
+ if needs_summary:
106
+ print(f" Generating summary...")
107
+ try:
108
+ summary_result = rag_service.generate_summary(content, filename)
109
+ # Extract the summary string from the result dict
110
+ if isinstance(summary_result, dict):
111
+ summary = summary_result.get('summary', f'Document: {filename}')
112
+ else:
113
+ summary = str(summary_result) if summary_result else f'Document: {filename}'
114
+
115
+ if summary:
116
+ chroma_service.store_summary_chunk(
117
+ doc_id=doc_id,
118
+ user_id=user_id,
119
+ summary=summary,
120
+ bucket_id=bucket_id,
121
+ filename=filename
122
+ )
123
+ result['summary_created'] = True
124
+ except Exception as e:
125
+ print(f" Warning: Summary generation failed: {e}")
126
+ else:
127
+ result['summary_created'] = True # Already exists
128
+
129
+ result['status'] = 'success'
130
+ print(f" [OK] Completed: {filename}")
131
+
132
+ except Exception as e:
133
+ result['status'] = 'error'
134
+ # Sanitize error message for console encoding
135
+ error_msg = str(e).encode('ascii', 'replace').decode('ascii')
136
+ result['error'] = error_msg
137
+ print(f" [ERROR] {filename} - {error_msg}")
138
+
139
+ return result
140
+
141
+
142
+ def migrate_all_documents(user_id: str, bucket_id: str = None,
143
+ batch_size: int = 10, delay: float = 0.5, skip_existing: bool = True):
144
+ """
145
+ Migrate all documents for a user/bucket.
146
+
147
+ Args:
148
+ user_id: User ID to migrate documents for
149
+ bucket_id: Optional bucket ID to filter by
150
+ batch_size: Number of documents to process before pausing
151
+ delay: Seconds to wait between documents (rate limiting)
152
+ skip_existing: If True, skip documents that already have metadata and summary
153
+ """
154
+ print("=" * 60)
155
+ print("Document Metadata Migration")
156
+ print("=" * 60)
157
+ print(f"User ID: {user_id}")
158
+ print(f"Bucket ID: {bucket_id or 'All buckets'}")
159
+ print(f"Skip existing: {skip_existing}")
160
+ print()
161
+
162
+ # Get all documents
163
+ print("Fetching documents...")
164
+ documents = chroma_service.get_user_documents(user_id, bucket_id)
165
+ total_docs = len(documents)
166
+
167
+ print(f"Found {total_docs} documents to process")
168
+ print()
169
+
170
+ if total_docs == 0:
171
+ print("No documents found. Exiting.")
172
+ return
173
+
174
+ # Track results
175
+ results = {
176
+ 'total': total_docs,
177
+ 'success': 0,
178
+ 'skipped': 0,
179
+ 'already_migrated': 0,
180
+ 'error': 0,
181
+ 'metadata_extracted': 0,
182
+ 'summaries_created': 0
183
+ }
184
+
185
+ start_time = time.time()
186
+
187
+ # Process documents
188
+ for i, doc in enumerate(documents, 1):
189
+ print(f"\n[{i}/{total_docs}] Processing: {doc.get('filename', 'Unknown')}")
190
+
191
+ result = migrate_single_document(doc, user_id, skip_existing=skip_existing)
192
+
193
+ # Update results
194
+ if result['status'] == 'success':
195
+ results['success'] += 1
196
+ elif result['status'] == 'skipped':
197
+ if result.get('metadata_extracted') and result.get('summary_created'):
198
+ results['already_migrated'] += 1
199
+ else:
200
+ results['skipped'] += 1
201
+ else:
202
+ results['error'] += 1
203
+
204
+ if result['metadata_extracted']:
205
+ results['metadata_extracted'] += 1
206
+ if result['summary_created']:
207
+ results['summaries_created'] += 1
208
+
209
+ # Rate limiting
210
+ if delay > 0:
211
+ time.sleep(delay)
212
+
213
+ # Progress update every batch_size documents
214
+ if i % batch_size == 0:
215
+ elapsed = time.time() - start_time
216
+ rate = i / elapsed if elapsed > 0 else 0
217
+ remaining = (total_docs - i) / rate if rate > 0 else 0
218
+ print(f"\n--- Progress: {i}/{total_docs} ({i/total_docs*100:.1f}%) ---")
219
+ print(f" Elapsed: {elapsed:.1f}s | ETA: {remaining:.1f}s")
220
+ print(f" Success: {results['success']} | Already migrated: {results['already_migrated']} | Errors: {results['error']}")
221
+
222
+ # Final summary
223
+ elapsed = time.time() - start_time
224
+ print("\n" + "=" * 60)
225
+ print("Migration Complete!")
226
+ print("=" * 60)
227
+ print(f"Total documents: {results['total']}")
228
+ print(f" [OK] Success: {results['success']}")
229
+ print(f" [SKIP] Already migrated: {results['already_migrated']}")
230
+ print(f" [SKIP] Skipped (no content): {results['skipped']}")
231
+ print(f" [ERR] Errors: {results['error']}")
232
+ print()
233
+ print(f"Metadata extracted: {results['metadata_extracted']}")
234
+ print(f"Summaries created: {results['summaries_created']}")
235
+ print()
236
+ print(f"Total time: {elapsed:.1f} seconds")
237
+ if total_docs > 0:
238
+ print(f"Average: {elapsed/total_docs:.2f} seconds per document")
239
+
240
+ return results
241
+
242
+
243
+ def main():
244
+ parser = argparse.ArgumentParser(description='Migrate existing documents to extract metadata')
245
+ parser.add_argument('--user-id', required=True, help='User ID to migrate documents for')
246
+ parser.add_argument('--bucket-id', help='Optional bucket ID to filter by')
247
+ parser.add_argument('--batch-size', type=int, default=10, help='Batch size for progress updates')
248
+ parser.add_argument('--delay', type=float, default=0.5, help='Delay between documents (seconds)')
249
+ parser.add_argument('--dry-run', action='store_true', help='Show what would be done without making changes')
250
+
251
+ args = parser.parse_args()
252
+
253
+ if args.dry_run:
254
+ print("DRY RUN MODE - No changes will be made")
255
+ documents = chroma_service.get_user_documents(args.user_id, args.bucket_id)
256
+ print(f"Would process {len(documents)} documents:")
257
+ for doc in documents[:10]:
258
+ print(f" - {doc.get('filename', 'Unknown')}")
259
+ if len(documents) > 10:
260
+ print(f" ... and {len(documents) - 10} more")
261
+ return
262
+
263
+ migrate_all_documents(
264
+ user_id=args.user_id,
265
+ bucket_id=args.bucket_id,
266
+ batch_size=args.batch_size,
267
+ delay=args.delay,
268
+ skip_existing=True
269
+ )
270
+
271
+
272
+ if __name__ == '__main__':
273
+ main()
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ flask-cors
3
+ chromadb
4
+ python-dotenv
5
+ python-docx
6
+ openpyxl
7
+ pandas
8
+ Pillow
9
+ requests
10
+ bcrypt
11
+ PyJWT
12
+ werkzeug
13
+ python-pptx
14
+ pymupdf
15
+ gunicorn
services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Services package
services/auth_service.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Authentication Service with Role-Based Access
3
+ Handles user registration, login, and JWT token management
4
+ Supports Admin and Employee roles
5
+ Uses ChromaDB for user storage
6
+ """
7
+
8
+ import bcrypt
9
+ import jwt
10
+ import time
11
+ from datetime import datetime, timedelta
12
+ from config import Config
13
+ from services.chroma_service import chroma_service
14
+
15
+
16
+ class AuthService:
17
+ def __init__(self):
18
+ self.jwt_secret = Config.JWT_SECRET
19
+ self.jwt_expiry_hours = Config.JWT_EXPIRY_HOURS
20
+
21
+ def _hash_password(self, password: str) -> str:
22
+ """Hash password using bcrypt"""
23
+ salt = bcrypt.gensalt()
24
+ return bcrypt.hashpw(password.encode('utf-8'), salt).decode('utf-8')
25
+
26
+ def _verify_password(self, password: str, hashed: str) -> bool:
27
+ """Verify password against hash"""
28
+ return bcrypt.checkpw(
29
+ password.encode('utf-8'),
30
+ hashed.encode('utf-8')
31
+ )
32
+
33
+ def _generate_token(self, user_id: str, username: str, role: str) -> str:
34
+ """Generate JWT token with role"""
35
+ payload = {
36
+ "user_id": user_id,
37
+ "username": username,
38
+ "role": role,
39
+ "exp": datetime.utcnow() + timedelta(hours=self.jwt_expiry_hours),
40
+ "iat": datetime.utcnow()
41
+ }
42
+ return jwt.encode(payload, self.jwt_secret, algorithm="HS256")
43
+
44
+ def verify_token(self, token: str) -> dict | None:
45
+ """Verify and decode JWT token"""
46
+ try:
47
+ payload = jwt.decode(token, self.jwt_secret, algorithms=["HS256"])
48
+ return {
49
+ "user_id": payload['user_id'],
50
+ "username": payload['username'],
51
+ "role": payload.get('role', 'employee')
52
+ }
53
+ except jwt.ExpiredSignatureError:
54
+ return None
55
+ except jwt.InvalidTokenError:
56
+ return None
57
+
58
+ def register_admin(self, username: str, password: str, email: str = "") -> dict:
59
+ """
60
+ Register a new admin user
61
+ Returns: {"success": bool, "token": str, "user_id": str, "error": str}
62
+ """
63
+ # Validate input
64
+ if not username or len(username) < 3:
65
+ return {"success": False, "error": "Username must be at least 3 characters"}
66
+
67
+ if not password or len(password) < 6:
68
+ return {"success": False, "error": "Password must be at least 6 characters"}
69
+
70
+ # Check if user exists
71
+ existing = chroma_service.get_user(username)
72
+ if existing:
73
+ return {"success": False, "error": "Username already exists"}
74
+
75
+ # Hash password and create admin user
76
+ password_hash = self._hash_password(password)
77
+ result = chroma_service.create_user(username, password_hash, email, role="admin")
78
+
79
+ if "error" in result:
80
+ return {"success": False, "error": result['error']}
81
+
82
+ # Generate token
83
+ token = self._generate_token(result['user_id'], username, "admin")
84
+
85
+ return {
86
+ "success": True,
87
+ "token": token,
88
+ "user_id": result['user_id'],
89
+ "username": username,
90
+ "role": "admin"
91
+ }
92
+
93
+ def register_employee(self, admin_user_id: str, email: str, password: str) -> dict:
94
+ """
95
+ Admin registers an employee
96
+ Returns: {"success": bool, "user_id": str, "error": str}
97
+ """
98
+ # Validate input
99
+ if not email or "@" not in email:
100
+ return {"success": False, "error": "Valid email is required"}
101
+
102
+ if not password or len(password) < 6:
103
+ return {"success": False, "error": "Password must be at least 6 characters"}
104
+
105
+ # Check if employee email already exists
106
+ existing = chroma_service.get_user(email)
107
+ if existing:
108
+ return {"success": False, "error": "Employee with this email already exists"}
109
+
110
+ # Hash password and create employee user
111
+ password_hash = self._hash_password(password)
112
+ result = chroma_service.create_user(
113
+ username=email,
114
+ password_hash=password_hash,
115
+ email=email,
116
+ role="employee",
117
+ admin_id=admin_user_id
118
+ )
119
+
120
+ if "error" in result:
121
+ return {"success": False, "error": result['error']}
122
+
123
+ return {
124
+ "success": True,
125
+ "user_id": result['user_id'],
126
+ "email": email
127
+ }
128
+
129
+ def login(self, username: str, password: str, role: str = "admin") -> dict:
130
+ """
131
+ Login user with role check
132
+ Returns: {"success": bool, "token": str, "user_id": str, "error": str}
133
+ """
134
+ # Get user
135
+ user = chroma_service.get_user(username)
136
+
137
+ if not user:
138
+ return {"success": False, "error": "Invalid credentials"}
139
+
140
+ # Verify password
141
+ if not self._verify_password(password, user['password_hash']):
142
+ return {"success": False, "error": "Invalid credentials"}
143
+
144
+ # Verify role matches
145
+ user_role = user.get('role', 'admin')
146
+ if user_role != role:
147
+ if role == "admin":
148
+ return {"success": False, "error": "This account is not an admin account"}
149
+ else:
150
+ return {"success": False, "error": "This account is not an employee account"}
151
+
152
+ # Generate token
153
+ token = self._generate_token(user['user_id'], username, user_role)
154
+
155
+ return {
156
+ "success": True,
157
+ "token": token,
158
+ "user_id": user['user_id'],
159
+ "username": username,
160
+ "role": user_role
161
+ }
162
+
163
+ def get_admin_employees(self, admin_user_id: str) -> list:
164
+ """Get all employees created by an admin"""
165
+ return chroma_service.get_employees_by_admin(admin_user_id)
166
+
167
+ def delete_employee(self, admin_user_id: str, employee_id: str) -> bool:
168
+ """Admin deletes an employee"""
169
+ return chroma_service.delete_employee(admin_user_id, employee_id)
170
+
171
+ def get_current_user(self, token: str) -> dict | None:
172
+ """Get current user from token"""
173
+ return self.verify_token(token)
174
+
175
+
176
+ # Singleton instance
177
+ auth_service = AuthService()
services/chroma_service.py ADDED
@@ -0,0 +1,1009 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ChromaDB Cloud Service - Vector Storage & Retrieval
3
+ With Role-Based User Management and Bucket Organization
4
+ """
5
+
6
+ import chromadb
7
+ from chromadb.utils.embedding_functions import DefaultEmbeddingFunction
8
+ import hashlib
9
+ import time
10
+ from config import Config
11
+
12
+
13
+ class ChromaService:
14
+ _instance = None
15
+
16
+ def __new__(cls):
17
+ if cls._instance is None:
18
+ cls._instance = super().__new__(cls)
19
+ cls._instance._initialize()
20
+ return cls._instance
21
+
22
+ def _initialize(self):
23
+ """Initialize ChromaDB Cloud client"""
24
+ if not Config.CHROMA_API_KEY:
25
+ raise ValueError(
26
+ "CHROMA_API_KEY is required! Please set it in your .env file. "
27
+ "Get your API key from https://www.trychroma.com/"
28
+ )
29
+
30
+ print(f"Connecting to ChromaDB Cloud...")
31
+ print(f"Tenant: {Config.CHROMA_TENANT}")
32
+ print(f"Database: {Config.CHROMA_DATABASE}")
33
+
34
+ # Connect to ChromaDB Cloud using CloudClient
35
+ self.client = chromadb.CloudClient(
36
+ tenant=Config.CHROMA_TENANT,
37
+ database=Config.CHROMA_DATABASE,
38
+ api_key=Config.CHROMA_API_KEY
39
+ )
40
+
41
+ print("Connected to ChromaDB Cloud successfully!")
42
+
43
+ # Initialize collections
44
+ self._init_collections()
45
+
46
+ def _init_collections(self):
47
+ """Initialize required collections"""
48
+ # Users collection
49
+ self.users_collection = self.client.get_or_create_collection(
50
+ name="users",
51
+ metadata={"description": "User authentication data with roles"}
52
+ )
53
+
54
+ # Buckets collection for organizing documents
55
+ self.buckets_collection = self.client.get_or_create_collection(
56
+ name="buckets",
57
+ metadata={"description": "Document buckets for organization"}
58
+ )
59
+
60
+ # Documents collection
61
+ self.documents_collection = self.client.get_or_create_collection(
62
+ name="documents",
63
+ metadata={"description": "Document metadata and embeddings"}
64
+ )
65
+
66
+ # Chunks collection for RAG
67
+ self.chunks_collection = self.client.get_or_create_collection(
68
+ name="document_chunks",
69
+ metadata={"description": "Document chunks for RAG retrieval"}
70
+ )
71
+
72
+ # Conversation history collection for memory
73
+ self.conversations_collection = self.client.get_or_create_collection(
74
+ name="conversation_history",
75
+ metadata={"description": "Persistent conversation memory for context"}
76
+ )
77
+
78
+ # Chat sessions collection for complete chat history
79
+ self.chat_sessions_collection = self.client.get_or_create_collection(
80
+ name="chat_sessions",
81
+ metadata={"description": "Complete chat session storage"}
82
+ )
83
+
84
+ # Document metadata collection for structured data (aggregate queries)
85
+ self.metadata_collection = self.client.get_or_create_collection(
86
+ name="document_metadata",
87
+ metadata={"description": "Structured document metadata for aggregate queries"}
88
+ )
89
+
90
+ # Summary chunks collection for fast aggregate retrieval
91
+ self.summary_chunks_collection = self.client.get_or_create_collection(
92
+ name="document_summaries",
93
+ metadata={"description": "Document summaries for aggregate queries"}
94
+ )
95
+
96
+ print("Collections initialized: users, buckets, documents, document_chunks, conversation_history, chat_sessions, document_metadata, document_summaries")
97
+
98
+ # ==================== User Operations ====================
99
+
100
+ def create_user(self, username: str, password_hash: str, email: str = "",
101
+ role: str = "admin", admin_id: str = None) -> dict:
102
+ """Create a new user (admin or employee)"""
103
+ user_id = hashlib.sha256(username.encode()).hexdigest()[:16]
104
+
105
+ existing = self.users_collection.get(ids=[user_id])
106
+ if existing['ids']:
107
+ return {"error": "User already exists"}
108
+
109
+ metadata = {
110
+ "username": username,
111
+ "password_hash": password_hash,
112
+ "email": email,
113
+ "role": role,
114
+ "created_at": time.time()
115
+ }
116
+
117
+ if admin_id:
118
+ metadata["admin_id"] = admin_id
119
+
120
+ self.users_collection.add(
121
+ ids=[user_id],
122
+ documents=[username],
123
+ metadatas=[metadata]
124
+ )
125
+
126
+ return {"user_id": user_id, "username": username, "role": role}
127
+
128
+ def get_user(self, username: str) -> dict | None:
129
+ """Get user by username"""
130
+ user_id = hashlib.sha256(username.encode()).hexdigest()[:16]
131
+ result = self.users_collection.get(ids=[user_id])
132
+
133
+ if result['ids']:
134
+ return {
135
+ "user_id": result['ids'][0],
136
+ "username": result['metadatas'][0]['username'],
137
+ "password_hash": result['metadatas'][0]['password_hash'],
138
+ "email": result['metadatas'][0].get('email', ''),
139
+ "role": result['metadatas'][0].get('role', 'admin'),
140
+ "admin_id": result['metadatas'][0].get('admin_id')
141
+ }
142
+ return None
143
+
144
+ def get_employees_by_admin(self, admin_id: str) -> list:
145
+ """Get all employees created by a specific admin"""
146
+ results = self.users_collection.get(where={"admin_id": admin_id})
147
+
148
+ employees = []
149
+ for i, user_id in enumerate(results['ids']):
150
+ employees.append({
151
+ "user_id": user_id,
152
+ "email": results['metadatas'][i].get('email', ''),
153
+ "username": results['metadatas'][i].get('username', ''),
154
+ "created_at": results['metadatas'][i].get('created_at', 0)
155
+ })
156
+ return employees
157
+
158
+ def delete_employee(self, admin_id: str, employee_id: str) -> bool:
159
+ """Delete an employee (verify admin ownership)"""
160
+ result = self.users_collection.get(ids=[employee_id])
161
+
162
+ if not result['ids']:
163
+ return False
164
+
165
+ if result['metadatas'][0].get('admin_id') != admin_id:
166
+ return False
167
+
168
+ self.users_collection.delete(ids=[employee_id])
169
+ return True
170
+
171
+ # ==================== Bucket Operations ====================
172
+
173
+ def create_bucket(self, user_id: str, name: str, description: str = "") -> dict:
174
+ """Create a new bucket for organizing documents"""
175
+ bucket_id = hashlib.sha256(f"{user_id}_{name}_{time.time()}".encode()).hexdigest()[:16]
176
+
177
+ self.buckets_collection.add(
178
+ ids=[bucket_id],
179
+ documents=[name],
180
+ metadatas=[{
181
+ "user_id": user_id,
182
+ "name": name,
183
+ "description": description,
184
+ "created_at": time.time()
185
+ }]
186
+ )
187
+
188
+ return {"bucket_id": bucket_id, "name": name}
189
+
190
+ def get_user_buckets(self, user_id: str) -> list:
191
+ """Get all buckets for a user"""
192
+ results = self.buckets_collection.get(where={"user_id": user_id})
193
+
194
+ buckets = []
195
+ for i, bucket_id in enumerate(results['ids']):
196
+ # Count documents in this bucket
197
+ doc_count = len(self.documents_collection.get(
198
+ where={"bucket_id": bucket_id}
199
+ )['ids'])
200
+
201
+ buckets.append({
202
+ "bucket_id": bucket_id,
203
+ "name": results['metadatas'][i]['name'],
204
+ "description": results['metadatas'][i].get('description', ''),
205
+ "doc_count": doc_count,
206
+ "created_at": results['metadatas'][i]['created_at']
207
+ })
208
+ return buckets
209
+
210
+ def delete_bucket(self, bucket_id: str, user_id: str) -> bool:
211
+ """Delete a bucket and optionally its documents"""
212
+ bucket = self.buckets_collection.get(ids=[bucket_id])
213
+ if not bucket['ids'] or bucket['metadatas'][0]['user_id'] != user_id:
214
+ return False
215
+
216
+ # Delete bucket
217
+ self.buckets_collection.delete(ids=[bucket_id])
218
+
219
+ # Update documents to remove bucket_id
220
+ docs = self.documents_collection.get(where={"bucket_id": bucket_id})
221
+ for i, doc_id in enumerate(docs['ids']):
222
+ # Update metadata to remove bucket_id (set to empty)
223
+ meta = docs['metadatas'][i]
224
+ meta['bucket_id'] = ""
225
+ self.documents_collection.update(
226
+ ids=[doc_id],
227
+ metadatas=[meta]
228
+ )
229
+
230
+ return True
231
+
232
+ # ==================== Document Operations ====================
233
+
234
+ def store_document(self, user_id: str, doc_id: str, filename: str,
235
+ doc_type: str, content: str, bucket_id: str = "") -> dict:
236
+ """Store document metadata"""
237
+ self.documents_collection.add(
238
+ ids=[doc_id],
239
+ documents=[content[:1000]], # Store preview
240
+ metadatas=[{
241
+ "user_id": user_id,
242
+ "filename": filename,
243
+ "doc_type": doc_type,
244
+ "bucket_id": bucket_id,
245
+ "content_length": len(content),
246
+ "created_at": time.time()
247
+ }]
248
+ )
249
+ return {"doc_id": doc_id, "filename": filename}
250
+
251
+ def update_document_bucket(self, doc_id: str, user_id: str, bucket_id: str) -> bool:
252
+ """Move document to a different bucket"""
253
+ doc = self.documents_collection.get(ids=[doc_id])
254
+ if not doc['ids'] or doc['metadatas'][0]['user_id'] != user_id:
255
+ return False
256
+
257
+ meta = doc['metadatas'][0]
258
+ meta['bucket_id'] = bucket_id
259
+
260
+ self.documents_collection.update(
261
+ ids=[doc_id],
262
+ metadatas=[meta]
263
+ )
264
+ return True
265
+
266
+ def get_user_documents(self, user_id: str, bucket_id: str = None) -> list:
267
+ """Get all documents for a user, optionally filtered by bucket"""
268
+ if bucket_id:
269
+ results = self.documents_collection.get(
270
+ where={"$and": [{"user_id": user_id}, {"bucket_id": bucket_id}]}
271
+ )
272
+ else:
273
+ results = self.documents_collection.get(where={"user_id": user_id})
274
+
275
+ documents = []
276
+ for i, doc_id in enumerate(results['ids']):
277
+ documents.append({
278
+ "doc_id": doc_id,
279
+ "filename": results['metadatas'][i]['filename'],
280
+ "doc_type": results['metadatas'][i]['doc_type'],
281
+ "bucket_id": results['metadatas'][i].get('bucket_id', ''),
282
+ "created_at": results['metadatas'][i]['created_at']
283
+ })
284
+ return documents
285
+
286
+ def get_document(self, doc_id: str, user_id: str) -> dict | None:
287
+ """Get a single document by ID"""
288
+ doc = self.documents_collection.get(ids=[doc_id])
289
+ if not doc['ids'] or doc['metadatas'][0]['user_id'] != user_id:
290
+ return None
291
+
292
+ return {
293
+ "doc_id": doc_id,
294
+ "filename": doc['metadatas'][0]['filename'],
295
+ "doc_type": doc['metadatas'][0]['doc_type'],
296
+ "bucket_id": doc['metadatas'][0].get('bucket_id', ''),
297
+ "content_preview": doc['documents'][0],
298
+ "created_at": doc['metadatas'][0]['created_at']
299
+ }
300
+
301
+ def delete_document(self, doc_id: str, user_id: str) -> bool:
302
+ """Delete a document and ALL its chunks from the database"""
303
+ doc = self.documents_collection.get(ids=[doc_id])
304
+ if not doc['ids'] or doc['metadatas'][0]['user_id'] != user_id:
305
+ print(f"Document {doc_id} not found or access denied for user {user_id}")
306
+ return False
307
+
308
+ filename = doc['metadatas'][0].get('filename', 'unknown')
309
+ print(f"Deleting document: {filename} (ID: {doc_id})")
310
+
311
+ # First, delete all chunks for this document
312
+ try:
313
+ chunks = self.chunks_collection.get(where={"doc_id": doc_id})
314
+ chunk_count = len(chunks['ids']) if chunks['ids'] else 0
315
+
316
+ if chunk_count > 0:
317
+ print(f" Deleting {chunk_count} chunks for document {doc_id}...")
318
+ self.chunks_collection.delete(ids=chunks['ids'])
319
+ print(f" Successfully deleted {chunk_count} chunks")
320
+ else:
321
+ print(f" No chunks found for document {doc_id}")
322
+ except Exception as e:
323
+ print(f" Error deleting chunks: {e}")
324
+ # Continue to delete document even if chunk deletion fails
325
+
326
+ # Then delete the document metadata
327
+ try:
328
+ self.documents_collection.delete(ids=[doc_id])
329
+ print(f" Successfully deleted document metadata for {doc_id}")
330
+ except Exception as e:
331
+ print(f" Error deleting document metadata: {e}")
332
+ return False
333
+
334
+ return True
335
+
336
+ def clear_all_user_chunks(self, user_id: str) -> int:
337
+ """Clear ALL chunks for a user - useful for cleanup after stale data issues"""
338
+ chunks = self.chunks_collection.get(where={"user_id": user_id})
339
+ if chunks['ids']:
340
+ self.chunks_collection.delete(ids=chunks['ids'])
341
+ return len(chunks['ids'])
342
+ return 0
343
+
344
+ # ==================== Chunk Operations (RAG) ====================
345
+
346
+ def store_chunks(self, doc_id: str, user_id: str, chunks: list[dict], bucket_id: str = ""):
347
+ """Store document chunks with embeddings for RAG - one at a time for quota compliance"""
348
+ if not chunks:
349
+ return
350
+
351
+ total_chunks = len(chunks)
352
+ print(f"Storing {total_chunks} chunks for document...")
353
+
354
+ total_chunks = len(chunks)
355
+ print(f"Storing {total_chunks} chunks for document...")
356
+
357
+ # Batch size for ChromaDB Cloud (max 100 per batch recommended)
358
+ BATCH_SIZE = 100
359
+
360
+ for i in range(0, total_chunks, BATCH_SIZE):
361
+ batch = chunks[i:i + BATCH_SIZE]
362
+ batch_ids = []
363
+ batch_documents = []
364
+ batch_metadatas = []
365
+
366
+ for j, chunk in enumerate(batch):
367
+ # Global index
368
+ global_idx = i + j
369
+ chunk_id = f"{doc_id}_chunk_{global_idx}"
370
+
371
+ # Truncate chunk text if too large
372
+ text = chunk['text']
373
+ if len(text) > 4000:
374
+ text = text[:4000]
375
+
376
+ metadata = {
377
+ "doc_id": doc_id,
378
+ "user_id": user_id,
379
+ "bucket_id": bucket_id,
380
+ "chunk_index": global_idx,
381
+ "start_char": chunk.get('start', 0),
382
+ "end_char": chunk.get('end', 0)
383
+ }
384
+
385
+ batch_ids.append(chunk_id)
386
+ batch_documents.append(text)
387
+ batch_metadatas.append(metadata)
388
+
389
+ try:
390
+ self.chunks_collection.add(
391
+ ids=batch_ids,
392
+ documents=batch_documents,
393
+ metadatas=batch_metadatas
394
+ )
395
+ print(f" Stored batch {i // BATCH_SIZE + 1} ({len(batch)} chunks)")
396
+ except Exception as e:
397
+ print(f" Error storing batch starting at index {i}: {str(e)[:100]}")
398
+ # Fallback: try one by one for this failed batch
399
+ print(" Retrying invalid batch one by one...")
400
+ for k, (bid, doc, meta) in enumerate(zip(batch_ids, batch_documents, batch_metadatas)):
401
+ try:
402
+ self.chunks_collection.add(ids=[bid], documents=[doc], metadatas=[meta])
403
+ except Exception as inner_e:
404
+ print(f" Failed chunk {i+k}: {str(inner_e)[:50]}")
405
+
406
+ def search_chunks(self, user_id: str, query: str,
407
+ doc_ids: list[str] = None, bucket_id: str = None,
408
+ top_k: int = 5) -> list[dict]:
409
+ """Search for relevant chunks with filtering by bucket or documents.
410
+
411
+ IMPORTANT: When bucket_id is provided, ONLY chunks from that bucket are returned.
412
+ This ensures strict bucket isolation for multi-bucket deployments.
413
+ """
414
+ # Build where clause with strict bucket isolation
415
+ if bucket_id:
416
+ where_clause = {
417
+ "$and": [
418
+ {"user_id": user_id},
419
+ {"bucket_id": bucket_id}
420
+ ]
421
+ }
422
+ print(f"[CHROMA] Strict bucket isolation: searching only bucket '{bucket_id}'")
423
+ elif doc_ids:
424
+ where_clause = {
425
+ "$and": [
426
+ {"user_id": user_id},
427
+ {"doc_id": {"$in": doc_ids}}
428
+ ]
429
+ }
430
+ else:
431
+ where_clause = {"user_id": user_id}
432
+
433
+ results = self.chunks_collection.query(
434
+ query_texts=[query],
435
+ n_results=top_k,
436
+ where=where_clause
437
+ )
438
+
439
+ chunks = []
440
+ if results['ids'] and results['ids'][0]:
441
+ for i, chunk_id in enumerate(results['ids'][0]):
442
+ chunk_bucket = results['metadatas'][0][i].get('bucket_id', '')
443
+
444
+ # Validate bucket isolation (should never happen, but log if it does)
445
+ if bucket_id and chunk_bucket != bucket_id:
446
+ print(f"[CHROMA WARNING] Bucket leak detected! Expected '{bucket_id}', got '{chunk_bucket}'")
447
+ continue # Skip leaked chunks
448
+
449
+ chunks.append({
450
+ "chunk_id": chunk_id,
451
+ "text": results['documents'][0][i],
452
+ "doc_id": results['metadatas'][0][i]['doc_id'],
453
+ "bucket_id": chunk_bucket,
454
+ "distance": results['distances'][0][i] if results.get('distances') else 0
455
+ })
456
+
457
+ return chunks
458
+
459
+ def get_bucket_document_list(self, user_id: str, bucket_id: str) -> list[str]:
460
+ """Get list of document filenames in a bucket for cross-document queries."""
461
+ if not bucket_id:
462
+ return []
463
+
464
+ docs = self.get_user_documents(user_id, bucket_id)
465
+ return [doc.get('filename', 'Unknown') for doc in docs]
466
+
467
+ def get_document_chunks(self, doc_id: str) -> list[dict]:
468
+ """Get all chunks for a specific document"""
469
+ results = self.chunks_collection.get(where={"doc_id": doc_id})
470
+
471
+ chunks = []
472
+ for i, chunk_id in enumerate(results['ids']):
473
+ chunks.append({
474
+ "chunk_id": chunk_id,
475
+ "text": results['documents'][i],
476
+ "chunk_index": results['metadatas'][i]['chunk_index']
477
+ })
478
+
479
+ chunks.sort(key=lambda x: x['chunk_index'])
480
+ return chunks
481
+
482
+ # ==================== Conversation Memory Operations ====================
483
+
484
+ def store_conversation(self, user_id: str, role: str, content: str,
485
+ bucket_id: str = "", chat_id: str = "") -> dict:
486
+ """Store a conversation message for persistent memory"""
487
+ import time
488
+ msg_id = f"{user_id}_{int(time.time() * 1000)}"
489
+
490
+ self.conversations_collection.add(
491
+ ids=[msg_id],
492
+ documents=[content],
493
+ metadatas=[{
494
+ "user_id": user_id,
495
+ "role": role, # 'user' or 'assistant'
496
+ "bucket_id": bucket_id,
497
+ "chat_id": chat_id,
498
+ "timestamp": time.time()
499
+ }]
500
+ )
501
+ return {"msg_id": msg_id}
502
+
503
+ def get_conversation_history(self, user_id: str, bucket_id: str = None,
504
+ limit: int = 20) -> list[dict]:
505
+ """Retrieve conversation history for a user, optionally filtered by bucket"""
506
+ if bucket_id:
507
+ where_clause = {
508
+ "$and": [
509
+ {"user_id": user_id},
510
+ {"bucket_id": bucket_id}
511
+ ]
512
+ }
513
+ else:
514
+ where_clause = {"user_id": user_id}
515
+
516
+ results = self.conversations_collection.get(
517
+ where=where_clause
518
+ )
519
+
520
+ messages = []
521
+ for i, msg_id in enumerate(results['ids']):
522
+ messages.append({
523
+ "msg_id": msg_id,
524
+ "role": results['metadatas'][i]['role'],
525
+ "content": results['documents'][i],
526
+ "timestamp": results['metadatas'][i]['timestamp'],
527
+ "bucket_id": results['metadatas'][i].get('bucket_id', ''),
528
+ "chat_id": results['metadatas'][i].get('chat_id', '')
529
+ })
530
+
531
+ # Sort by timestamp (newest last) and limit
532
+ messages.sort(key=lambda x: x['timestamp'])
533
+ return messages[-limit:]
534
+
535
+ def clear_conversation(self, user_id: str, bucket_id: str = None) -> bool:
536
+ """Clear conversation history for a user"""
537
+ if bucket_id:
538
+ where_clause = {
539
+ "$and": [
540
+ {"user_id": user_id},
541
+ {"bucket_id": bucket_id}
542
+ ]
543
+ }
544
+ else:
545
+ where_clause = {"user_id": user_id}
546
+
547
+ results = self.conversations_collection.get(where=where_clause)
548
+ if results['ids']:
549
+ self.conversations_collection.delete(ids=results['ids'])
550
+ return True
551
+
552
+ # ==================== Chat Session Operations ====================
553
+
554
+ def save_chat_session(self, user_id: str, chat_id: str, topic: str,
555
+ messages: list, bucket_id: str = "") -> dict:
556
+ """Store or update a complete chat session.
557
+
558
+ Note: ChromaDB Cloud has a 16KB document size limit, so we truncate
559
+ long messages to fit within this constraint.
560
+ """
561
+ import json
562
+
563
+ # ChromaDB Cloud has a 16KB (16384 bytes) document size limit
564
+ MAX_DOC_SIZE = 14000 # Leave buffer for metadata overhead
565
+ MAX_MESSAGE_LENGTH = 3000 # Max chars per message when truncating
566
+
567
+ # First, try to serialize as-is
568
+ messages_json = json.dumps(messages)
569
+
570
+ # If too large, truncate individual message contents
571
+ if len(messages_json) > MAX_DOC_SIZE:
572
+ truncated_messages = []
573
+ for msg in messages:
574
+ truncated_msg = {**msg}
575
+ if len(msg.get('content', '')) > MAX_MESSAGE_LENGTH:
576
+ truncated_msg['content'] = msg['content'][:MAX_MESSAGE_LENGTH] + '... [truncated for storage]'
577
+ truncated_messages.append(truncated_msg)
578
+ messages_json = json.dumps(truncated_messages)
579
+
580
+ # If still too large, keep only the last N messages
581
+ if len(messages_json) > MAX_DOC_SIZE:
582
+ # Keep first message (context) and last few exchanges
583
+ keep_count = 10
584
+ while len(messages_json) > MAX_DOC_SIZE and keep_count > 2:
585
+ truncated_messages = truncated_messages[-keep_count:]
586
+ messages_json = json.dumps(truncated_messages)
587
+ keep_count -= 2
588
+
589
+ metadata = {
590
+ "user_id": user_id,
591
+ "topic": topic,
592
+ "bucket_id": bucket_id,
593
+ "message_count": len(messages),
594
+ "timestamp": time.time()
595
+ }
596
+
597
+ # Check if chat exists
598
+ existing = self.chat_sessions_collection.get(ids=[chat_id])
599
+
600
+ try:
601
+ if existing['ids']:
602
+ # Update existing chat
603
+ self.chat_sessions_collection.update(
604
+ ids=[chat_id],
605
+ documents=[messages_json],
606
+ metadatas=[metadata]
607
+ )
608
+ else:
609
+ # Add new chat
610
+ self.chat_sessions_collection.add(
611
+ ids=[chat_id],
612
+ documents=[messages_json],
613
+ metadatas=[metadata]
614
+ )
615
+ except Exception as e:
616
+ # If still failing, store minimal version
617
+ print(f"[CHAT SAVE] Error saving full chat, storing minimal: {e}")
618
+ minimal_messages = [{"role": "system", "content": f"Chat with {len(messages)} messages (too large to store)"}]
619
+ self.chat_sessions_collection.upsert(
620
+ ids=[chat_id],
621
+ documents=[json.dumps(minimal_messages)],
622
+ metadatas=[metadata]
623
+ )
624
+
625
+ return {"chat_id": chat_id, "topic": topic}
626
+
627
+ def get_user_chat_sessions(self, user_id: str) -> list:
628
+ """Get all chat sessions for a user"""
629
+ import json
630
+
631
+ results = self.chat_sessions_collection.get(where={"user_id": user_id})
632
+
633
+ sessions = []
634
+ for i, chat_id in enumerate(results['ids']):
635
+ try:
636
+ messages = json.loads(results['documents'][i])
637
+ except:
638
+ messages = []
639
+
640
+ sessions.append({
641
+ "id": chat_id,
642
+ "topic": results['metadatas'][i].get('topic', 'Chat'),
643
+ "messages": messages,
644
+ "bucket": results['metadatas'][i].get('bucket_id', ''),
645
+ "timestamp": results['metadatas'][i].get('timestamp', 0)
646
+ })
647
+
648
+ # Sort by timestamp (newest first)
649
+ sessions.sort(key=lambda x: x['timestamp'], reverse=True)
650
+ return sessions
651
+
652
+ def get_chat_session(self, user_id: str, chat_id: str) -> dict | None:
653
+ """Get a single chat session by ID"""
654
+ import json
655
+
656
+ result = self.chat_sessions_collection.get(ids=[chat_id])
657
+
658
+ if not result['ids']:
659
+ return None
660
+
661
+ # Verify ownership
662
+ if result['metadatas'][0].get('user_id') != user_id:
663
+ return None
664
+
665
+ try:
666
+ messages = json.loads(result['documents'][0])
667
+ except:
668
+ messages = []
669
+
670
+ return {
671
+ "id": chat_id,
672
+ "topic": result['metadatas'][0].get('topic', 'Chat'),
673
+ "messages": messages,
674
+ "bucket": result['metadatas'][0].get('bucket_id', ''),
675
+ "timestamp": result['metadatas'][0].get('timestamp', 0)
676
+ }
677
+
678
+ def delete_chat_session(self, user_id: str, chat_id: str) -> bool:
679
+ """Delete a chat session and all associated conversation history"""
680
+ result = self.chat_sessions_collection.get(ids=[chat_id])
681
+
682
+ if not result['ids']:
683
+ return False
684
+
685
+ # Verify ownership
686
+ if result['metadatas'][0].get('user_id') != user_id:
687
+ return False
688
+
689
+ # Delete the chat session
690
+ self.chat_sessions_collection.delete(ids=[chat_id])
691
+
692
+ # Also delete all conversation history entries for this chat
693
+ try:
694
+ conv_results = self.conversations_collection.get(
695
+ where={
696
+ "$and": [
697
+ {"user_id": user_id},
698
+ {"chat_id": chat_id}
699
+ ]
700
+ }
701
+ )
702
+ if conv_results['ids']:
703
+ self.conversations_collection.delete(ids=conv_results['ids'])
704
+ print(f"Deleted {len(conv_results['ids'])} conversation history entries for chat {chat_id}")
705
+ except Exception as e:
706
+ print(f"Warning: Could not delete conversation history for chat {chat_id}: {e}")
707
+
708
+ return True
709
+
710
+ # ==================== Document Metadata Operations (Aggregate Queries) ====================
711
+
712
+ def store_document_metadata(self, doc_id: str, user_id: str, bucket_id: str,
713
+ metadata: dict) -> dict:
714
+ """
715
+ Store structured metadata for a document.
716
+ Used for aggregate queries like 'list all manufacturing units'.
717
+
718
+ Args:
719
+ doc_id: Document ID
720
+ user_id: User ID
721
+ bucket_id: Bucket ID
722
+ metadata: Structured metadata dict
723
+ """
724
+ import json
725
+
726
+ # Flatten metadata for ChromaDB (which only supports primitive types in metadata)
727
+ # Helper functions for safe type conversion
728
+ def safe_float(value, default=0.0):
729
+ if value is None:
730
+ return default
731
+ if isinstance(value, (int, float)):
732
+ return float(value)
733
+ try:
734
+ # Try to extract numbers from string
735
+ import re
736
+ if isinstance(value, str):
737
+ # Remove currency symbols and commas
738
+ cleaned = re.sub(r'[^\d.]', '', str(value).replace(',', ''))
739
+ if cleaned:
740
+ return float(cleaned)
741
+ return default
742
+ except:
743
+ return default
744
+
745
+ def safe_int(value, default=0):
746
+ if value is None:
747
+ return default
748
+ if isinstance(value, int):
749
+ return value
750
+ try:
751
+ return int(safe_float(value, default))
752
+ except:
753
+ return default
754
+
755
+ flat_metadata = {
756
+ "doc_id": doc_id,
757
+ "user_id": user_id,
758
+ "bucket_id": bucket_id,
759
+ "document_type": str(metadata.get("document_type", "")),
760
+ "document_title": str(metadata.get("document_title", "")),
761
+ "policy_number": str(metadata.get("policy_number", "")),
762
+ "insurer_name": str(metadata.get("insurer_name", "")),
763
+ "insured_name": str(metadata.get("insured_name", "")),
764
+ "broker_name": str(metadata.get("broker_name", "")),
765
+ "policy_type": str(metadata.get("policy_type", "")),
766
+ "industry": str(metadata.get("industry", "")),
767
+ "is_manufacturing": bool(metadata.get("is_manufacturing", False)),
768
+ "sum_insured": safe_float(metadata.get("sum_insured")),
769
+ "premium_amount": safe_float(metadata.get("premium_amount")),
770
+ "policy_start_date": str(metadata.get("policy_start_date", "")),
771
+ "policy_end_date": str(metadata.get("policy_end_date", "")),
772
+ "renewal_date": str(metadata.get("renewal_date", "")),
773
+ "renewal_year": safe_int(metadata.get("renewal_year")),
774
+ "city": str(metadata.get("city", "")),
775
+ "state": str(metadata.get("state", "")),
776
+ "pincode": str(metadata.get("pincode", "")),
777
+ "property_address": str(metadata.get("property_address", ""))[:500],
778
+ "created_at": metadata.get("created_at", time.time())
779
+ }
780
+
781
+
782
+ # Store arrays as JSON strings
783
+ coverage_types = metadata.get("coverage_type", [])
784
+ flat_metadata["coverage_type_json"] = json.dumps(coverage_types if isinstance(coverage_types, list) else [])
785
+
786
+ keywords = metadata.get("keywords", [])
787
+ flat_metadata["keywords_json"] = json.dumps(keywords if isinstance(keywords, list) else [])
788
+
789
+ # Create searchable text from metadata
790
+ searchable_text = f"""
791
+ {metadata.get('document_title', '')}
792
+ {metadata.get('insured_name', '')}
793
+ {metadata.get('insurer_name', '')}
794
+ {metadata.get('policy_type', '')}
795
+ {metadata.get('industry', '')}
796
+ {metadata.get('city', '')} {metadata.get('state', '')}
797
+ Policy Number: {metadata.get('policy_number', '')}
798
+ Sum Insured: {metadata.get('sum_insured', '')}
799
+ """.strip()
800
+
801
+ # Check if metadata already exists for this doc
802
+ existing = self.metadata_collection.get(ids=[doc_id])
803
+
804
+ if existing['ids']:
805
+ self.metadata_collection.update(
806
+ ids=[doc_id],
807
+ documents=[searchable_text],
808
+ metadatas=[flat_metadata]
809
+ )
810
+ else:
811
+ self.metadata_collection.add(
812
+ ids=[doc_id],
813
+ documents=[searchable_text],
814
+ metadatas=[flat_metadata]
815
+ )
816
+
817
+ return {"doc_id": doc_id, "status": "stored"}
818
+
819
+ def get_document_metadata(self, doc_id: str, user_id: str) -> dict | None:
820
+ """Get metadata for a specific document."""
821
+ result = self.metadata_collection.get(ids=[doc_id])
822
+
823
+ if not result['ids']:
824
+ return None
825
+
826
+ meta = result['metadatas'][0]
827
+ if meta.get('user_id') != user_id:
828
+ return None
829
+
830
+ return meta
831
+
832
+ def get_all_metadata(self, user_id: str, bucket_id: str = None) -> list[dict]:
833
+ """
834
+ Get ALL document metadata for a user/bucket.
835
+ Used for aggregate queries - returns complete list, no top-K limit.
836
+ """
837
+ import json
838
+
839
+ if bucket_id:
840
+ where_clause = {
841
+ "$and": [
842
+ {"user_id": user_id},
843
+ {"bucket_id": bucket_id}
844
+ ]
845
+ }
846
+ else:
847
+ where_clause = {"user_id": user_id}
848
+
849
+ results = self.metadata_collection.get(where=where_clause)
850
+
851
+ metadata_list = []
852
+ for i, doc_id in enumerate(results['ids']):
853
+ meta = results['metadatas'][i]
854
+
855
+ # Parse JSON arrays back
856
+ try:
857
+ meta['coverage_type'] = json.loads(meta.get('coverage_type_json', '[]'))
858
+ except:
859
+ meta['coverage_type'] = []
860
+
861
+ try:
862
+ meta['keywords'] = json.loads(meta.get('keywords_json', '[]'))
863
+ except:
864
+ meta['keywords'] = []
865
+
866
+ metadata_list.append(meta)
867
+
868
+ return metadata_list
869
+
870
+ def search_metadata(self, user_id: str, bucket_id: str, filters: dict) -> list[dict]:
871
+ """
872
+ Search metadata with filters.
873
+ Supports filtering by: policy_type, industry, is_manufacturing, renewal_year, city, state
874
+ """
875
+ # Build where clause
876
+ conditions = [{"user_id": user_id}]
877
+
878
+ if bucket_id:
879
+ conditions.append({"bucket_id": bucket_id})
880
+
881
+ for field, value in filters.items():
882
+ if value is not None and value != "":
883
+ conditions.append({field: value})
884
+
885
+ if len(conditions) > 1:
886
+ where_clause = {"$and": conditions}
887
+ else:
888
+ where_clause = conditions[0]
889
+
890
+ results = self.metadata_collection.get(where=where_clause)
891
+
892
+ return [results['metadatas'][i] for i in range(len(results['ids']))]
893
+
894
+ def delete_document_metadata(self, doc_id: str) -> bool:
895
+ """Delete metadata for a document."""
896
+ try:
897
+ self.metadata_collection.delete(ids=[doc_id])
898
+ return True
899
+ except:
900
+ return False
901
+
902
+ # ==================== Summary Chunks Operations ====================
903
+
904
+ def store_summary_chunk(self, doc_id: str, user_id: str, summary: str,
905
+ bucket_id: str = "", filename: str = "") -> dict:
906
+ """
907
+ Store a document summary as a special chunk for aggregate queries.
908
+ """
909
+ summary_id = f"{doc_id}_summary"
910
+
911
+ metadata = {
912
+ "doc_id": doc_id,
913
+ "user_id": user_id,
914
+ "bucket_id": bucket_id,
915
+ "filename": filename,
916
+ "chunk_type": "summary",
917
+ "created_at": time.time()
918
+ }
919
+
920
+ # Check if summary exists
921
+ existing = self.summary_chunks_collection.get(ids=[summary_id])
922
+
923
+ if existing['ids']:
924
+ self.summary_chunks_collection.update(
925
+ ids=[summary_id],
926
+ documents=[summary],
927
+ metadatas=[metadata]
928
+ )
929
+ else:
930
+ self.summary_chunks_collection.add(
931
+ ids=[summary_id],
932
+ documents=[summary],
933
+ metadatas=[metadata]
934
+ )
935
+
936
+ return {"summary_id": summary_id, "status": "stored"}
937
+
938
+ def get_all_summaries(self, user_id: str, bucket_id: str = None) -> list[dict]:
939
+ """
940
+ Get ALL document summaries for a user/bucket.
941
+ Returns complete list - no top-K limit!
942
+ """
943
+ if bucket_id:
944
+ where_clause = {
945
+ "$and": [
946
+ {"user_id": user_id},
947
+ {"bucket_id": bucket_id}
948
+ ]
949
+ }
950
+ else:
951
+ where_clause = {"user_id": user_id}
952
+
953
+ results = self.summary_chunks_collection.get(where=where_clause)
954
+
955
+ summaries = []
956
+ for i, summary_id in enumerate(results['ids']):
957
+ summaries.append({
958
+ "doc_id": results['metadatas'][i]['doc_id'],
959
+ "filename": results['metadatas'][i].get('filename', ''),
960
+ "summary": results['documents'][i],
961
+ "bucket_id": results['metadatas'][i].get('bucket_id', '')
962
+ })
963
+
964
+ return summaries
965
+
966
+ def search_summaries(self, user_id: str, query: str, bucket_id: str = None,
967
+ top_k: int = 50) -> list[dict]:
968
+ """Search summaries by semantic similarity."""
969
+ if bucket_id:
970
+ where_clause = {
971
+ "$and": [
972
+ {"user_id": user_id},
973
+ {"bucket_id": bucket_id}
974
+ ]
975
+ }
976
+ else:
977
+ where_clause = {"user_id": user_id}
978
+
979
+ results = self.summary_chunks_collection.query(
980
+ query_texts=[query],
981
+ n_results=top_k,
982
+ where=where_clause
983
+ )
984
+
985
+ summaries = []
986
+ if results['ids'] and results['ids'][0]:
987
+ for i, summary_id in enumerate(results['ids'][0]):
988
+ summaries.append({
989
+ "doc_id": results['metadatas'][0][i]['doc_id'],
990
+ "filename": results['metadatas'][0][i].get('filename', ''),
991
+ "summary": results['documents'][0][i],
992
+ "distance": results['distances'][0][i] if results.get('distances') else 0
993
+ })
994
+
995
+ return summaries
996
+
997
+ def delete_summary_chunk(self, doc_id: str) -> bool:
998
+ """Delete summary chunk for a document."""
999
+ try:
1000
+ summary_id = f"{doc_id}_summary"
1001
+ self.summary_chunks_collection.delete(ids=[summary_id])
1002
+ return True
1003
+ except:
1004
+ return False
1005
+
1006
+
1007
+ # Singleton instance
1008
+ chroma_service = ChromaService()
1009
+
services/date_parser.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Date Parser Service
3
+ Handles parsing of various date formats commonly found in insurance documents.
4
+ Supports:
5
+ - 1-1-25, 01-01-2025, 1/1/25, 01/01/2025
6
+ - January 1, 2025, Jan 1, 2025, 1 January 2025
7
+ - 2025-01-01 (ISO format)
8
+ - Date ranges and period calculations
9
+ """
10
+
11
+ import re
12
+ from datetime import datetime, timedelta
13
+ from typing import Optional, List, Dict, Tuple
14
+
15
+
16
+ class DateParser:
17
+ """Parse and normalize dates from various formats."""
18
+
19
+ # Month name mappings
20
+ MONTHS = {
21
+ 'january': 1, 'jan': 1,
22
+ 'february': 2, 'feb': 2,
23
+ 'march': 3, 'mar': 3,
24
+ 'april': 4, 'apr': 4,
25
+ 'may': 5,
26
+ 'june': 6, 'jun': 6,
27
+ 'july': 7, 'jul': 7,
28
+ 'august': 8, 'aug': 8,
29
+ 'september': 9, 'sep': 9, 'sept': 9,
30
+ 'october': 10, 'oct': 10,
31
+ 'november': 11, 'nov': 11,
32
+ 'december': 12, 'dec': 12
33
+ }
34
+
35
+ # Date context keywords for identifying date types
36
+ DATE_CONTEXTS = {
37
+ 'start': ['start', 'commence', 'inception', 'effective', 'from', 'begins', 'starting'],
38
+ 'end': ['end', 'expiry', 'expire', 'expiration', 'until', 'to', 'ending', 'valid till', 'valid until'],
39
+ 'renewal': ['renewal', 'renew', 'next renewal', 'due for renewal'],
40
+ 'issue': ['issue', 'issued', 'date of issue', 'policy date']
41
+ }
42
+
43
+ def __init__(self):
44
+ self._compile_patterns()
45
+
46
+ def _compile_patterns(self):
47
+ """Compile regex patterns for date extraction."""
48
+ # DD-MM-YY or DD-MM-YYYY (with - or /)
49
+ self.pattern_dmy = re.compile(
50
+ r'\b(\d{1,2})[-/](\d{1,2})[-/](\d{2,4})\b'
51
+ )
52
+
53
+ # YYYY-MM-DD (ISO format)
54
+ self.pattern_iso = re.compile(
55
+ r'\b(\d{4})[-/](\d{1,2})[-/](\d{1,2})\b'
56
+ )
57
+
58
+ # Month DD, YYYY or DD Month YYYY
59
+ month_names = '|'.join(self.MONTHS.keys())
60
+ self.pattern_month_name = re.compile(
61
+ rf'\b(\d{{1,2}})\s*(?:st|nd|rd|th)?\s*({month_names})[,]?\s*(\d{{4}})\b|'
62
+ rf'\b({month_names})\s*(\d{{1,2}})(?:st|nd|rd|th)?[,]?\s*(\d{{4}})\b',
63
+ re.IGNORECASE
64
+ )
65
+
66
+ def parse_date(self, date_str: str) -> Optional[datetime]:
67
+ """
68
+ Parse a date string in various formats to datetime object.
69
+
70
+ Args:
71
+ date_str: Date string to parse
72
+
73
+ Returns:
74
+ datetime object or None if parsing fails
75
+ """
76
+ if not date_str:
77
+ return None
78
+
79
+ date_str = str(date_str).strip()
80
+
81
+ # Try ISO format first (YYYY-MM-DD)
82
+ match = self.pattern_iso.search(date_str)
83
+ if match:
84
+ year, month, day = match.groups()
85
+ try:
86
+ return datetime(int(year), int(month), int(day))
87
+ except ValueError:
88
+ pass
89
+
90
+ # Try DMY format (DD-MM-YY or DD-MM-YYYY)
91
+ match = self.pattern_dmy.search(date_str)
92
+ if match:
93
+ day, month, year = match.groups()
94
+ year = int(year)
95
+ # Handle 2-digit years
96
+ if year < 100:
97
+ year = 2000 + year if year < 50 else 1900 + year
98
+ try:
99
+ return datetime(year, int(month), int(day))
100
+ except ValueError:
101
+ # Try swapping day/month for US format
102
+ try:
103
+ return datetime(year, int(day), int(month))
104
+ except ValueError:
105
+ pass
106
+
107
+ # Try month name format
108
+ match = self.pattern_month_name.search(date_str)
109
+ if match:
110
+ groups = match.groups()
111
+ if groups[0]: # DD Month YYYY format
112
+ day, month_name, year = groups[0], groups[1], groups[2]
113
+ else: # Month DD, YYYY format
114
+ month_name, day, year = groups[3], groups[4], groups[5]
115
+
116
+ month = self.MONTHS.get(month_name.lower())
117
+ if month:
118
+ try:
119
+ return datetime(int(year), month, int(day))
120
+ except ValueError:
121
+ pass
122
+
123
+ return None
124
+
125
+ def extract_dates_from_text(self, text: str) -> List[Dict]:
126
+ """
127
+ Extract all dates from text with their context.
128
+
129
+ Args:
130
+ text: Text to search for dates
131
+
132
+ Returns:
133
+ List of dicts with date info:
134
+ [{"date": datetime, "context": "start/end/renewal/issue/unknown",
135
+ "original": "01-01-2025", "position": 123}]
136
+ """
137
+ if not text:
138
+ return []
139
+
140
+ results = []
141
+ text_lower = text.lower()
142
+
143
+ # Find all date matches
144
+ all_matches = []
145
+
146
+ # DMY format
147
+ for match in self.pattern_dmy.finditer(text):
148
+ parsed = self.parse_date(match.group())
149
+ if parsed:
150
+ all_matches.append({
151
+ 'date': parsed,
152
+ 'original': match.group(),
153
+ 'position': match.start()
154
+ })
155
+
156
+ # ISO format
157
+ for match in self.pattern_iso.finditer(text):
158
+ parsed = self.parse_date(match.group())
159
+ if parsed:
160
+ all_matches.append({
161
+ 'date': parsed,
162
+ 'original': match.group(),
163
+ 'position': match.start()
164
+ })
165
+
166
+ # Month name format
167
+ for match in self.pattern_month_name.finditer(text):
168
+ parsed = self.parse_date(match.group())
169
+ if parsed:
170
+ all_matches.append({
171
+ 'date': parsed,
172
+ 'original': match.group(),
173
+ 'position': match.start()
174
+ })
175
+
176
+ # Determine context for each date
177
+ for match in all_matches:
178
+ pos = match['position']
179
+ # Look at surrounding text (100 chars before)
180
+ context_start = max(0, pos - 100)
181
+ context_text = text_lower[context_start:pos]
182
+
183
+ date_type = 'unknown'
184
+ for dtype, keywords in self.DATE_CONTEXTS.items():
185
+ if any(kw in context_text for kw in keywords):
186
+ date_type = dtype
187
+ break
188
+
189
+ results.append({
190
+ 'date': match['date'],
191
+ 'date_str': match['date'].strftime('%Y-%m-%d'),
192
+ 'context': date_type,
193
+ 'original': match['original'],
194
+ 'position': pos
195
+ })
196
+
197
+ # Remove duplicates based on date
198
+ seen_dates = set()
199
+ unique_results = []
200
+ for r in results:
201
+ date_key = r['date_str']
202
+ if date_key not in seen_dates:
203
+ seen_dates.add(date_key)
204
+ unique_results.append(r)
205
+
206
+ return unique_results
207
+
208
+ def calculate_renewal_date(self, policy_start: datetime,
209
+ term_months: int = 12) -> datetime:
210
+ """
211
+ Calculate policy renewal date.
212
+
213
+ Args:
214
+ policy_start: Policy start date
215
+ term_months: Policy term in months (default 12)
216
+
217
+ Returns:
218
+ Renewal date (policy_start + term_months)
219
+ """
220
+ # Add months
221
+ new_month = policy_start.month + term_months
222
+ new_year = policy_start.year + (new_month - 1) // 12
223
+ new_month = ((new_month - 1) % 12) + 1
224
+
225
+ # Handle day overflow
226
+ try:
227
+ return datetime(new_year, new_month, policy_start.day)
228
+ except ValueError:
229
+ # Last day of month for dates like Jan 31 + 1 month
230
+ if new_month == 12:
231
+ next_month = datetime(new_year + 1, 1, 1)
232
+ else:
233
+ next_month = datetime(new_year, new_month + 1, 1)
234
+ return next_month - timedelta(days=1)
235
+
236
+ def is_date_in_range(self, date: datetime,
237
+ year: int = None,
238
+ before: datetime = None,
239
+ after: datetime = None) -> bool:
240
+ """
241
+ Check if date matches filter criteria.
242
+
243
+ Args:
244
+ date: Date to check
245
+ year: Match specific year
246
+ before: Date must be before this
247
+ after: Date must be after this
248
+
249
+ Returns:
250
+ True if date matches all criteria
251
+ """
252
+ if not date:
253
+ return False
254
+
255
+ if year and date.year != year:
256
+ return False
257
+
258
+ if before and date >= before:
259
+ return False
260
+
261
+ if after and date <= after:
262
+ return False
263
+
264
+ return True
265
+
266
+ def get_year_from_query(self, query: str) -> Optional[int]:
267
+ """Extract year from query like 'policies renewing in 2026'."""
268
+ match = re.search(r'\b(20\d{2})\b', query)
269
+ if match:
270
+ return int(match.group(1))
271
+
272
+ # Handle relative years
273
+ current_year = datetime.now().year
274
+ if 'this year' in query.lower():
275
+ return current_year
276
+ if 'next year' in query.lower():
277
+ return current_year + 1
278
+ if 'last year' in query.lower():
279
+ return current_year - 1
280
+
281
+ return None
282
+
283
+
284
+ # Singleton instance
285
+ date_parser = DateParser()
services/document_processor.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Document Processor Service
3
+ Handles text extraction from various document types:
4
+ - PDF (text extraction + OCR fallback)
5
+ - DOCX (Word documents)
6
+ - Excel (XLS, XLSX)
7
+ - Images (via OCR)
8
+ - Plain text (TXT, MD)
9
+ """
10
+
11
+ import os
12
+ import io
13
+ from pathlib import Path
14
+ from typing import Optional
15
+ import fitz # PyMuPDF
16
+ from docx import Document
17
+ from pptx import Presentation
18
+ from pptx.util import Inches
19
+ import pandas as pd
20
+ from PIL import Image
21
+
22
+ from services.ocr_service import ocr_service
23
+ from config import Config
24
+
25
+
26
+ class DocumentProcessor:
27
+ def __init__(self):
28
+ self.supported_extensions = Config.ALLOWED_EXTENSIONS
29
+
30
+ def get_file_type(self, filename: str) -> str:
31
+ """Determine file type from extension"""
32
+ ext = Path(filename).suffix.lower().lstrip('.')
33
+
34
+ type_map = {
35
+ 'pdf': 'pdf',
36
+ 'doc': 'word',
37
+ 'docx': 'word',
38
+ 'ppt': 'powerpoint',
39
+ 'pptx': 'powerpoint',
40
+ 'xls': 'excel',
41
+ 'xlsx': 'excel',
42
+ 'txt': 'text',
43
+ 'md': 'text',
44
+ 'png': 'image',
45
+ 'jpg': 'image',
46
+ 'jpeg': 'image',
47
+ 'gif': 'image',
48
+ 'webp': 'image'
49
+ }
50
+
51
+ return type_map.get(ext, 'unknown')
52
+
53
+ def is_supported(self, filename: str) -> bool:
54
+ """Check if file type is supported"""
55
+ ext = Path(filename).suffix.lower().lstrip('.')
56
+ return ext in self.supported_extensions
57
+
58
+ def process(self, file_path: str, filename: str) -> dict:
59
+ """
60
+ Process a document and extract text
61
+ Returns: {"success": bool, "text": str, "method": str, "error": str}
62
+ """
63
+ file_type = self.get_file_type(filename)
64
+
65
+ try:
66
+ if file_type == 'pdf':
67
+ return self._process_pdf(file_path)
68
+ elif file_type == 'word':
69
+ return self._process_word(file_path)
70
+ elif file_type == 'powerpoint':
71
+ return self._process_pptx(file_path)
72
+ elif file_type == 'excel':
73
+ return self._process_excel(file_path)
74
+ elif file_type == 'image':
75
+ return self._process_image(file_path)
76
+ elif file_type == 'text':
77
+ return self._process_text(file_path)
78
+ else:
79
+ return {
80
+ "success": False,
81
+ "error": f"Unsupported file type: {file_type}"
82
+ }
83
+ except Exception as e:
84
+ return {"success": False, "error": str(e)}
85
+
86
+ def _process_pdf(self, file_path: str) -> dict:
87
+ """
88
+ Process PDF - Always use complete OpenRouter vision OCR for best accuracy
89
+ """
90
+ try:
91
+ doc = fitz.open(file_path)
92
+ total_pages = len(doc)
93
+ doc.close()
94
+
95
+ print(f"Processing {total_pages} page PDF with OpenRouter vision OCR...")
96
+
97
+ # Use OpenRouter vision models for OCR
98
+ ocr_result = ocr_service.extract_text_from_pdf(file_path)
99
+
100
+ if ocr_result['success']:
101
+ print(f"PDF OCR successful")
102
+ return {
103
+ "success": True,
104
+ "text": ocr_result['text'],
105
+ "method": ocr_result.get('model', 'OpenRouter Vision OCR'),
106
+ "page_count": total_pages
107
+ }
108
+ else:
109
+ return {
110
+ "success": False,
111
+ "error": f"OCR failed: {ocr_result['error']}"
112
+ }
113
+
114
+ except Exception as e:
115
+ return {"success": False, "error": f"PDF processing error: {str(e)}"}
116
+
117
+ def _process_pdf_hybrid(self, file_path: str, text_pages: list, ocr_needed_pages: list) -> dict:
118
+ """
119
+ Hybrid PDF processing: combine text extraction with OCR for scanned pages only
120
+ Used as fallback when full PDF OCR fails
121
+ """
122
+ try:
123
+ doc = fitz.open(file_path)
124
+ total_pages = len(doc)
125
+ all_pages = {}
126
+
127
+ # Add already extracted text pages
128
+ for page_num, text in text_pages:
129
+ all_pages[page_num] = f"--- Page {page_num + 1} ---\n{text}"
130
+
131
+ # OCR the scanned pages in batches
132
+ print(f"OCR processing {len(ocr_needed_pages)} scanned pages...")
133
+
134
+ for i, page_num in enumerate(ocr_needed_pages):
135
+ page = doc[page_num]
136
+
137
+ # Render page to image
138
+ mat = fitz.Matrix(2, 2) # 2x zoom for better OCR
139
+ pix = page.get_pixmap(matrix=mat)
140
+
141
+ temp_path = f"{file_path}_page_{page_num}.png"
142
+ pix.save(temp_path)
143
+
144
+ ocr_result = ocr_service.extract_text(temp_path)
145
+
146
+ # Clean up temp file
147
+ if os.path.exists(temp_path):
148
+ os.remove(temp_path)
149
+
150
+ if ocr_result['success']:
151
+ all_pages[page_num] = f"--- Page {page_num + 1} (OCR) ---\n{ocr_result['text']}"
152
+ else:
153
+ all_pages[page_num] = f"--- Page {page_num + 1} ---\n[OCR failed: {ocr_result['error']}]"
154
+
155
+ # Progress logging every 10 pages
156
+ if (i + 1) % 10 == 0:
157
+ print(f"OCR progress: {i + 1}/{len(ocr_needed_pages)} pages")
158
+
159
+ doc.close()
160
+
161
+ # Combine all pages in order
162
+ text_parts = [all_pages[i] for i in sorted(all_pages.keys())]
163
+
164
+ return {
165
+ "success": True,
166
+ "text": "\n\n".join(text_parts),
167
+ "method": "hybrid (text + OCR)",
168
+ "page_count": total_pages
169
+ }
170
+
171
+ except Exception as e:
172
+ return {"success": False, "error": f"Hybrid PDF processing error: {str(e)}"}
173
+
174
+ def _process_word(self, file_path: str) -> dict:
175
+ """Process Word documents (DOCX)"""
176
+ try:
177
+ doc = Document(file_path)
178
+
179
+ text_parts = []
180
+
181
+ # Extract paragraphs
182
+ for para in doc.paragraphs:
183
+ if para.text.strip():
184
+ text_parts.append(para.text)
185
+
186
+ # Extract tables
187
+ for table in doc.tables:
188
+ table_text = []
189
+ for row in table.rows:
190
+ row_text = [cell.text.strip() for cell in row.cells]
191
+ table_text.append(" | ".join(row_text))
192
+ if table_text:
193
+ text_parts.append("\n[Table]\n" + "\n".join(table_text))
194
+
195
+ return {
196
+ "success": True,
197
+ "text": "\n\n".join(text_parts),
198
+ "method": "docx extraction"
199
+ }
200
+
201
+ except Exception as e:
202
+ return {"success": False, "error": f"Word processing error: {str(e)}"}
203
+
204
+ def _process_pptx(self, file_path: str) -> dict:
205
+ """Process PowerPoint files (PPTX) - extracts all text from slides"""
206
+ try:
207
+ prs = Presentation(file_path)
208
+ text_parts = []
209
+ slide_count = 0
210
+
211
+ for slide_num, slide in enumerate(prs.slides, 1):
212
+ slide_count += 1
213
+ slide_text_parts = []
214
+
215
+ # Extract text from all shapes
216
+ for shape in slide.shapes:
217
+ # Text frames (text boxes, titles, etc.)
218
+ if shape.has_text_frame:
219
+ for paragraph in shape.text_frame.paragraphs:
220
+ para_text = ""
221
+ for run in paragraph.runs:
222
+ para_text += run.text
223
+ if para_text.strip():
224
+ slide_text_parts.append(para_text.strip())
225
+
226
+ # Tables in slides
227
+ if shape.has_table:
228
+ table = shape.table
229
+ table_rows = []
230
+ for row in table.rows:
231
+ row_cells = []
232
+ for cell in row.cells:
233
+ cell_text = ""
234
+ for paragraph in cell.text_frame.paragraphs:
235
+ for run in paragraph.runs:
236
+ cell_text += run.text
237
+ row_cells.append(cell_text.strip())
238
+ table_rows.append(" | ".join(row_cells))
239
+ if table_rows:
240
+ slide_text_parts.append("[Table]\n" + "\n".join(table_rows))
241
+
242
+ # Speaker notes
243
+ if slide.has_notes_slide:
244
+ notes_frame = slide.notes_slide.notes_text_frame
245
+ if notes_frame:
246
+ notes_text = ""
247
+ for paragraph in notes_frame.paragraphs:
248
+ for run in paragraph.runs:
249
+ notes_text += run.text
250
+ if notes_text.strip():
251
+ slide_text_parts.append(f"[Speaker Notes]\n{notes_text.strip()}")
252
+
253
+ if slide_text_parts:
254
+ text_parts.append(f"--- Slide {slide_num} ---\n" + "\n".join(slide_text_parts))
255
+
256
+ if not text_parts:
257
+ return {
258
+ "success": False,
259
+ "error": "No text content found in PowerPoint file"
260
+ }
261
+
262
+ return {
263
+ "success": True,
264
+ "text": "\n\n".join(text_parts),
265
+ "method": "pptx extraction",
266
+ "slide_count": slide_count
267
+ }
268
+
269
+ except Exception as e:
270
+ return {"success": False, "error": f"PowerPoint processing error: {str(e)}"}
271
+
272
+ def _process_excel(self, file_path: str) -> dict:
273
+ """Process Excel files"""
274
+ try:
275
+ # Read all sheets
276
+ excel_file = pd.ExcelFile(file_path)
277
+ text_parts = []
278
+
279
+ for sheet_name in excel_file.sheet_names:
280
+ df = pd.read_excel(excel_file, sheet_name=sheet_name)
281
+
282
+ if not df.empty:
283
+ # Convert to string representation
284
+ sheet_text = f"=== Sheet: {sheet_name} ===\n"
285
+ sheet_text += df.to_string(index=False)
286
+ text_parts.append(sheet_text)
287
+
288
+ return {
289
+ "success": True,
290
+ "text": "\n\n".join(text_parts),
291
+ "method": "excel extraction",
292
+ "sheet_count": len(excel_file.sheet_names)
293
+ }
294
+
295
+ except Exception as e:
296
+ return {"success": False, "error": f"Excel processing error: {str(e)}"}
297
+
298
+ def _process_image(self, file_path: str) -> dict:
299
+ """Process images using OCR"""
300
+ result = ocr_service.extract_text(file_path)
301
+
302
+ if result['success']:
303
+ return {
304
+ "success": True,
305
+ "text": result['text'],
306
+ "method": f"OCR ({result.get('model', 'unknown')})"
307
+ }
308
+ else:
309
+ return {"success": False, "error": result['error']}
310
+
311
+ def _process_text(self, file_path: str) -> dict:
312
+ """Process plain text files"""
313
+ try:
314
+ # Try different encodings
315
+ encodings = ['utf-8', 'latin-1', 'cp1252']
316
+
317
+ for encoding in encodings:
318
+ try:
319
+ with open(file_path, 'r', encoding=encoding) as f:
320
+ text = f.read()
321
+ return {
322
+ "success": True,
323
+ "text": text,
324
+ "method": f"text read ({encoding})"
325
+ }
326
+ except UnicodeDecodeError:
327
+ continue
328
+
329
+ return {"success": False, "error": "Could not decode text file"}
330
+
331
+ except Exception as e:
332
+ return {"success": False, "error": f"Text processing error: {str(e)}"}
333
+
334
+
335
+ # Singleton instance
336
+ document_processor = DocumentProcessor()
services/metadata_extractor.py ADDED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Metadata Extractor Service
3
+ Extracts structured metadata from insurance policy documents using AI.
4
+ Handles various document formats and naming conventions.
5
+ """
6
+
7
+ import re
8
+ import json
9
+ import requests
10
+ from typing import Optional, Dict, List
11
+ from config import Config
12
+ from services.date_parser import date_parser
13
+ from services.number_extractor import number_extractor
14
+
15
+
16
+ class MetadataExtractor:
17
+ """Extract structured metadata from document content using AI and regex."""
18
+
19
+ # Default metadata schema
20
+ DEFAULT_METADATA = {
21
+ # Identity
22
+ "document_type": "",
23
+ "document_title": "",
24
+ "policy_number": "",
25
+ "insurer_name": "",
26
+ "issue_date": "",
27
+
28
+ # Parties
29
+ "insured_name": "",
30
+ "broker_name": "",
31
+
32
+ # Dates
33
+ "policy_start_date": "",
34
+ "policy_end_date": "",
35
+ "renewal_date": "",
36
+ "renewal_year": None,
37
+
38
+ # Financial
39
+ "sum_insured": None,
40
+ "premium_amount": None,
41
+ "tax_amount": None,
42
+ "deductible": None,
43
+
44
+ # Risk & Coverage
45
+ "policy_type": "",
46
+ "insured_property_type": "",
47
+ "coverage_type": [],
48
+ "exclusions_present": False,
49
+ "add_on_covers": [],
50
+
51
+ # Location & Asset
52
+ "property_address": "",
53
+ "city": "",
54
+ "state": "",
55
+ "pincode": "",
56
+ "construction_type": "",
57
+
58
+ # RAG helpers
59
+ "section_name": "",
60
+ "clause_reference": "",
61
+ "page_number": "",
62
+ "chunk_type": "full_document",
63
+
64
+ # Search helpers
65
+ "keywords": [],
66
+ "industry": "",
67
+ "is_manufacturing": False
68
+ }
69
+
70
+ # Field name variations commonly found in documents
71
+ FIELD_VARIATIONS = {
72
+ 'insured_name': [
73
+ 'insured', 'name of insured', 'proposer', 'policyholder',
74
+ 'policy holder', 'insured party', 'insured name', 'name of the insured',
75
+ 'assured', 'name of assured', 'customer name', 'client name'
76
+ ],
77
+ 'insurer_name': [
78
+ 'insurer', 'insurance company', 'underwriter', 'company name',
79
+ 'issued by', 'insuring company'
80
+ ],
81
+ 'policy_number': [
82
+ 'policy no', 'policy number', 'policy #', 'certificate no',
83
+ 'certificate number', 'policy ref', 'reference number', 'ref no'
84
+ ],
85
+ 'sum_insured': [
86
+ 'sum insured', 'total sum insured', 'tsi', 'si', 'insured value',
87
+ 'coverage amount', 'insured amount', 'sum assured', 'cover amount',
88
+ 'amount insured', 'value insured'
89
+ ],
90
+ 'premium_amount': [
91
+ 'premium', 'total premium', 'net premium', 'gross premium',
92
+ 'annual premium', 'premium payable', 'premium amount'
93
+ ],
94
+ 'policy_start_date': [
95
+ 'start date', 'commencement', 'inception date', 'effective from',
96
+ 'period from', 'from date', 'valid from', 'cover starts'
97
+ ],
98
+ 'policy_end_date': [
99
+ 'end date', 'expiry date', 'expiry', 'valid until', 'valid till',
100
+ 'period to', 'to date', 'cover ends', 'expires on'
101
+ ],
102
+ 'policy_type': [
103
+ 'type of policy', 'policy type', 'cover type', 'insurance type',
104
+ 'class of insurance', 'product name', 'product type', 'scheme name'
105
+ ],
106
+ 'property_address': [
107
+ 'address', 'risk location', 'location of risk', 'property address',
108
+ 'insured location', 'premises address', 'site address'
109
+ ]
110
+ }
111
+
112
+ # Policy type patterns
113
+ POLICY_TYPES = {
114
+ 'fire': ['fire', 'fire & allied', 'fire insurance', 'sfsp'],
115
+ 'marine': ['marine', 'cargo', 'marine cargo', 'marine hull'],
116
+ 'motor': ['motor', 'vehicle', 'car', 'two wheeler', 'automobile'],
117
+ 'health': ['health', 'mediclaim', 'medical', 'hospitalization'],
118
+ 'life': ['life', 'term', 'endowment', 'ulip'],
119
+ 'property': ['property', 'building', 'structure', 'premises'],
120
+ 'liability': ['liability', 'professional indemnity', 'pi', 'directors'],
121
+ 'engineering': ['engineering', 'car', 'eai', 'cpm', 'boiler', 'machinery'],
122
+ 'personal_accident': ['personal accident', 'pa', 'accident'],
123
+ 'travel': ['travel', 'overseas', 'foreign travel'],
124
+ 'home': ['home', 'householder', 'household'],
125
+ 'group': ['group', 'employee', 'gpa', 'gmc']
126
+ }
127
+
128
+ # Industry classification patterns
129
+ INDUSTRY_PATTERNS = {
130
+ 'manufacturing': ['manufacturing', 'factory', 'plant', 'production', 'industrial'],
131
+ 'chemical': ['chemical', 'petrochemical', 'pharmaceutical', 'fertilizer'],
132
+ 'automotive': ['automobile', 'automotive', 'tyre', 'tire', 'vehicle'],
133
+ 'food_processing': ['food', 'beverage', 'dairy', 'agro'],
134
+ 'textile': ['textile', 'garment', 'apparel', 'fabric'],
135
+ 'it_services': ['software', 'it services', 'technology', 'tech'],
136
+ 'banking': ['bank', 'finance', 'nbfc', 'financial services'],
137
+ 'hospitality': ['hotel', 'restaurant', 'hospitality', 'resort'],
138
+ 'healthcare': ['hospital', 'clinic', 'healthcare', 'medical'],
139
+ 'retail': ['retail', 'shop', 'store', 'mall', 'supermarket'],
140
+ 'real_estate': ['real estate', 'construction', 'builder', 'developer'],
141
+ 'education': ['school', 'college', 'university', 'education', 'institute']
142
+ }
143
+
144
+ def __init__(self):
145
+ self.deepseek_api_key = getattr(Config, 'DEEPSEEK_API_KEY', '')
146
+ self.deepseek_base_url = getattr(Config, 'DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
147
+ self.deepseek_model = getattr(Config, 'DEEPSEEK_MODEL', 'deepseek-chat')
148
+
149
+ def extract_metadata(self, content: str, filename: str = "") -> Dict:
150
+ """
151
+ Extract structured metadata from document content.
152
+ Uses AI for complex extraction with regex fallback.
153
+
154
+ Args:
155
+ content: Document text content
156
+ filename: Original filename for context
157
+
158
+ Returns:
159
+ Dictionary with extracted metadata
160
+ """
161
+ # Start with default metadata
162
+ metadata = self.DEFAULT_METADATA.copy()
163
+ metadata['document_title'] = filename
164
+
165
+ # Try AI extraction first (more accurate)
166
+ if self.deepseek_api_key and len(content) > 100:
167
+ ai_metadata = self._extract_with_ai(content, filename)
168
+ if ai_metadata:
169
+ metadata.update({k: v for k, v in ai_metadata.items() if v})
170
+
171
+ # Fill in missing fields with regex extraction
172
+ metadata = self._extract_with_regex(content, metadata)
173
+
174
+ # Extract dates using date_parser
175
+ metadata = self._extract_dates(content, metadata)
176
+
177
+ # Extract numbers using number_extractor
178
+ metadata = self._extract_numbers(content, metadata)
179
+
180
+ # Determine policy type
181
+ if not metadata.get('policy_type'):
182
+ metadata['policy_type'] = self._detect_policy_type(content)
183
+
184
+ # Determine industry
185
+ if not metadata.get('industry'):
186
+ metadata['industry'] = self._detect_industry(content)
187
+
188
+ # Check if manufacturing
189
+ metadata['is_manufacturing'] = self._is_manufacturing(content, metadata)
190
+
191
+ # Extract keywords for search
192
+ metadata['keywords'] = self._extract_keywords(content, filename)
193
+
194
+ return metadata
195
+
196
+ def _extract_with_ai(self, content: str, filename: str) -> Optional[Dict]:
197
+ """Use DeepSeek AI to extract metadata."""
198
+ if not self.deepseek_api_key:
199
+ return None
200
+
201
+ # Truncate content to avoid token limits
202
+ max_content = content[:15000] if len(content) > 15000 else content
203
+
204
+ prompt = f"""Extract the following metadata from this insurance document. Return ONLY a valid JSON object with no explanation.
205
+
206
+ Document filename: {filename}
207
+ Document content:
208
+ {max_content}
209
+
210
+ Extract these fields (use empty string if not found, use null for missing numbers):
211
+ {{
212
+ "document_type": "policy/endorsement/certificate/schedule/etc",
213
+ "policy_number": "",
214
+ "insurer_name": "name of insurance company",
215
+ "insured_name": "name of insured party/policyholder",
216
+ "broker_name": "",
217
+ "policy_type": "fire/motor/health/marine/property/liability/etc",
218
+ "sum_insured": null,
219
+ "premium_amount": null,
220
+ "deductible": null,
221
+ "policy_start_date": "YYYY-MM-DD format",
222
+ "policy_end_date": "YYYY-MM-DD format",
223
+ "property_address": "",
224
+ "city": "",
225
+ "state": "",
226
+ "pincode": "",
227
+ "construction_type": "",
228
+ "insured_property_type": "",
229
+ "coverage_type": [],
230
+ "add_on_covers": [],
231
+ "industry": ""
232
+ }}
233
+
234
+ Return ONLY the JSON object, no markdown, no explanation."""
235
+
236
+ try:
237
+ response = requests.post(
238
+ f"{self.deepseek_base_url}/chat/completions",
239
+ headers={
240
+ "Authorization": f"Bearer {self.deepseek_api_key}",
241
+ "Content-Type": "application/json"
242
+ },
243
+ json={
244
+ "model": self.deepseek_model,
245
+ "messages": [{"role": "user", "content": prompt}],
246
+ "max_tokens": 1000,
247
+ "temperature": 0
248
+ },
249
+ timeout=30
250
+ )
251
+
252
+ if response.status_code == 200:
253
+ data = response.json()
254
+ ai_response = data['choices'][0]['message']['content'].strip()
255
+
256
+ # Parse JSON from response
257
+ # Remove markdown code blocks if present
258
+ if ai_response.startswith('```'):
259
+ ai_response = re.sub(r'^```(?:json)?\n?', '', ai_response)
260
+ ai_response = re.sub(r'\n?```$', '', ai_response)
261
+
262
+ return json.loads(ai_response)
263
+ except Exception as e:
264
+ print(f"[METADATA] AI extraction failed: {e}")
265
+
266
+ return None
267
+
268
+ def _extract_with_regex(self, content: str, metadata: Dict) -> Dict:
269
+ """Extract metadata using regex patterns."""
270
+ content_lower = content.lower()
271
+
272
+ # Extract fields using variations
273
+ for field, variations in self.FIELD_VARIATIONS.items():
274
+ if metadata.get(field): # Already extracted
275
+ continue
276
+
277
+ for variation in variations:
278
+ # Look for pattern: "variation: value" or "variation - value"
279
+ pattern = rf'{re.escape(variation)}\s*[:|-]\s*([^\n]+)'
280
+ match = re.search(pattern, content_lower)
281
+ if match:
282
+ value = match.group(1).strip()
283
+ # Clean up the value
284
+ value = re.sub(r'\s+', ' ', value)[:200] # Limit length
285
+ if value and len(value) > 2:
286
+ metadata[field] = value
287
+ break
288
+
289
+ # Extract policy number (often in specific formats)
290
+ if not metadata.get('policy_number'):
291
+ # Common policy number patterns
292
+ patterns = [
293
+ r'policy\s*(?:no|number|#)?\s*[:.]?\s*([A-Z0-9/-]{5,30})',
294
+ r'([A-Z]{2,5}[/-]?\d{6,15})',
295
+ r'(\d{10,20})'
296
+ ]
297
+ for pattern in patterns:
298
+ match = re.search(pattern, content, re.IGNORECASE)
299
+ if match:
300
+ metadata['policy_number'] = match.group(1).strip()
301
+ break
302
+
303
+ # Extract pincode
304
+ if not metadata.get('pincode'):
305
+ match = re.search(r'\b(\d{6})\b', content)
306
+ if match:
307
+ metadata['pincode'] = match.group(1)
308
+
309
+ return metadata
310
+
311
+ def _extract_dates(self, content: str, metadata: Dict) -> Dict:
312
+ """Extract dates using date_parser."""
313
+ dates = date_parser.extract_dates_from_text(content)
314
+
315
+ for date_info in dates:
316
+ context = date_info['context']
317
+ date_str = date_info['date_str']
318
+
319
+ if context == 'start' and not metadata.get('policy_start_date'):
320
+ metadata['policy_start_date'] = date_str
321
+ elif context == 'end' and not metadata.get('policy_end_date'):
322
+ metadata['policy_end_date'] = date_str
323
+ elif context == 'renewal' and not metadata.get('renewal_date'):
324
+ metadata['renewal_date'] = date_str
325
+ elif context == 'issue' and not metadata.get('issue_date'):
326
+ metadata['issue_date'] = date_str
327
+
328
+ # Calculate renewal date if not found but we have end date
329
+ if not metadata.get('renewal_date') and metadata.get('policy_end_date'):
330
+ end_date = date_parser.parse_date(metadata['policy_end_date'])
331
+ if end_date:
332
+ metadata['renewal_date'] = metadata['policy_end_date']
333
+ metadata['renewal_year'] = end_date.year
334
+
335
+ # Set renewal year
336
+ if metadata.get('renewal_date') and not metadata.get('renewal_year'):
337
+ renewal = date_parser.parse_date(metadata['renewal_date'])
338
+ if renewal:
339
+ metadata['renewal_year'] = renewal.year
340
+
341
+ return metadata
342
+
343
+ def _extract_numbers(self, content: str, metadata: Dict) -> Dict:
344
+ """Extract numerical values using number_extractor."""
345
+ numbers = number_extractor.extract_numbers(content)
346
+
347
+ for num_info in numbers:
348
+ context = num_info['context']
349
+ value = num_info['value']
350
+
351
+ if context == 'sum_insured' and not metadata.get('sum_insured'):
352
+ metadata['sum_insured'] = value
353
+ elif context == 'premium' and not metadata.get('premium_amount'):
354
+ metadata['premium_amount'] = value
355
+ elif context == 'tax' and not metadata.get('tax_amount'):
356
+ metadata['tax_amount'] = value
357
+ elif context == 'deductible' and not metadata.get('deductible'):
358
+ metadata['deductible'] = value
359
+
360
+ # If sum_insured not found, use largest number
361
+ if not metadata.get('sum_insured'):
362
+ sum_insured = number_extractor.extract_sum_insured(content)
363
+ if sum_insured:
364
+ metadata['sum_insured'] = sum_insured
365
+
366
+ return metadata
367
+
368
+ def _detect_policy_type(self, content: str) -> str:
369
+ """Detect policy type from content."""
370
+ content_lower = content.lower()
371
+
372
+ for policy_type, keywords in self.POLICY_TYPES.items():
373
+ if any(kw in content_lower for kw in keywords):
374
+ return policy_type
375
+
376
+ return "general"
377
+
378
+ def _detect_industry(self, content: str) -> str:
379
+ """Detect industry classification from content."""
380
+ content_lower = content.lower()
381
+
382
+ for industry, keywords in self.INDUSTRY_PATTERNS.items():
383
+ if any(kw in content_lower for kw in keywords):
384
+ return industry
385
+
386
+ return ""
387
+
388
+ def _is_manufacturing(self, content: str, metadata: Dict) -> bool:
389
+ """Check if this is a manufacturing-related policy."""
390
+ content_lower = content.lower()
391
+ manufacturing_keywords = [
392
+ 'manufacturing', 'factory', 'plant', 'production', 'industrial',
393
+ 'machinery', 'equipment', 'boiler', 'pressure vessel'
394
+ ]
395
+
396
+ if metadata.get('industry') == 'manufacturing':
397
+ return True
398
+
399
+ return any(kw in content_lower for kw in manufacturing_keywords)
400
+
401
+ def _extract_keywords(self, content: str, filename: str) -> List[str]:
402
+ """Extract keywords for search enhancement."""
403
+ keywords = []
404
+
405
+ # Add words from filename
406
+ filename_words = re.findall(r'[A-Za-z]{3,}', filename)
407
+ keywords.extend([w.lower() for w in filename_words])
408
+
409
+ # Extract capitalized words (likely proper nouns/company names)
410
+ proper_nouns = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content[:5000])
411
+ keywords.extend([n.lower() for n in proper_nouns[:20]])
412
+
413
+ # Remove duplicates and common words
414
+ stop_words = {'the', 'and', 'for', 'with', 'this', 'that', 'from', 'are', 'was', 'were'}
415
+ keywords = list(set(kw for kw in keywords if kw not in stop_words and len(kw) > 2))
416
+
417
+ return keywords[:30] # Limit to 30 keywords
418
+
419
+ def extract_metadata_batch(self, documents: List[Dict]) -> List[Dict]:
420
+ """
421
+ Extract metadata for multiple documents.
422
+
423
+ Args:
424
+ documents: List of dicts with 'content' and 'filename' keys
425
+
426
+ Returns:
427
+ List of metadata dicts
428
+ """
429
+ results = []
430
+ for doc in documents:
431
+ try:
432
+ metadata = self.extract_metadata(
433
+ doc.get('content', ''),
434
+ doc.get('filename', '')
435
+ )
436
+ metadata['doc_id'] = doc.get('doc_id', '')
437
+ results.append(metadata)
438
+ except Exception as e:
439
+ print(f"[METADATA] Error extracting from {doc.get('filename')}: {e}")
440
+ results.append({**self.DEFAULT_METADATA, 'doc_id': doc.get('doc_id', '')})
441
+
442
+ return results
443
+
444
+
445
+ # Singleton instance
446
+ metadata_extractor = MetadataExtractor()
services/number_extractor.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Number Extractor Service
3
+ Handles extraction and normalization of numerical values from insurance documents.
4
+ Supports:
5
+ - Indian number formats (lakhs, crores)
6
+ - Currency symbols (₹, Rs., INR, USD)
7
+ - Comma-separated numbers
8
+ - Word numbers (One Hundred Million)
9
+ - Percentage values
10
+ """
11
+
12
+ import re
13
+ from typing import Optional, List, Dict, Tuple
14
+ from decimal import Decimal, InvalidOperation
15
+
16
+
17
+ class NumberExtractor:
18
+ """Extract and normalize numerical values from text."""
19
+
20
+ # Indian number words
21
+ WORD_TO_NUMBER = {
22
+ 'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4,
23
+ 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9,
24
+ 'ten': 10, 'eleven': 11, 'twelve': 12, 'thirteen': 13,
25
+ 'fourteen': 14, 'fifteen': 15, 'sixteen': 16, 'seventeen': 17,
26
+ 'eighteen': 18, 'nineteen': 19, 'twenty': 20, 'thirty': 30,
27
+ 'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70,
28
+ 'eighty': 80, 'ninety': 90
29
+ }
30
+
31
+ MAGNITUDE_WORDS = {
32
+ 'hundred': 100,
33
+ 'thousand': 1000,
34
+ 'lakh': 100000,
35
+ 'lac': 100000,
36
+ 'lakhs': 100000,
37
+ 'lacs': 100000,
38
+ 'million': 1000000,
39
+ 'crore': 10000000,
40
+ 'crores': 10000000,
41
+ 'billion': 1000000000
42
+ }
43
+
44
+ # Currency patterns
45
+ CURRENCY_PATTERNS = {
46
+ 'INR': [r'₹', r'Rs\.?', r'INR', r'Rupees?'],
47
+ 'USD': [r'\$', r'USD', r'Dollars?'],
48
+ 'EUR': [r'€', r'EUR', r'Euros?']
49
+ }
50
+
51
+ # Context keywords for identifying number types
52
+ NUMBER_CONTEXTS = {
53
+ 'sum_insured': ['sum insured', 'total sum insured', 'tsi', 'si', 'insured value',
54
+ 'coverage amount', 'insured amount', 'sum assured'],
55
+ 'premium': ['premium', 'premium amount', 'total premium', 'net premium',
56
+ 'gross premium', 'annual premium'],
57
+ 'tax': ['tax', 'gst', 'cgst', 'sgst', 'igst', 'service tax'],
58
+ 'deductible': ['deductible', 'excess', 'franchise']
59
+ }
60
+
61
+ def __init__(self):
62
+ self._compile_patterns()
63
+
64
+ def _compile_patterns(self):
65
+ """Compile regex patterns for number extraction."""
66
+ # Currency amount: ₹1,00,000 or Rs. 1,00,000.00 or INR 100000
67
+ currency_symbols = '|'.join(
68
+ p for patterns in self.CURRENCY_PATTERNS.values() for p in patterns
69
+ )
70
+ self.pattern_currency = re.compile(
71
+ rf'({currency_symbols})\s*([\d,]+(?:\.\d{{1,2}})?)',
72
+ re.IGNORECASE
73
+ )
74
+
75
+ # Plain number with commas: 1,00,00,000 or 100,000,000
76
+ self.pattern_number = re.compile(
77
+ r'\b([\d,]+(?:\.\d+)?)\b'
78
+ )
79
+
80
+ # Number with magnitude words: 10 crore, 5.5 lakhs
81
+ magnitude_words = '|'.join(self.MAGNITUDE_WORDS.keys())
82
+ self.pattern_magnitude = re.compile(
83
+ rf'\b([\d,.]+)\s*({magnitude_words})\b',
84
+ re.IGNORECASE
85
+ )
86
+
87
+ # Percentage: 10%, 10.5 percent
88
+ self.pattern_percent = re.compile(
89
+ r'\b([\d.]+)\s*(?:%|percent|percentage)\b',
90
+ re.IGNORECASE
91
+ )
92
+
93
+ def parse_number(self, num_str: str) -> Optional[float]:
94
+ """
95
+ Parse a number string to float, handling Indian format.
96
+
97
+ Args:
98
+ num_str: Number string (e.g., "1,00,000" or "100,000.50")
99
+
100
+ Returns:
101
+ Float value or None
102
+ """
103
+ if not num_str:
104
+ return None
105
+
106
+ # Remove commas and spaces
107
+ num_str = str(num_str).replace(',', '').replace(' ', '').strip()
108
+
109
+ try:
110
+ return float(num_str)
111
+ except ValueError:
112
+ return None
113
+
114
+ def parse_indian_number(self, text: str) -> Optional[float]:
115
+ """
116
+ Parse Indian number format (lakhs, crores).
117
+
118
+ Args:
119
+ text: Text like "10 crore" or "5.5 lakhs"
120
+
121
+ Returns:
122
+ Float value or None
123
+ """
124
+ text = text.lower().strip()
125
+
126
+ match = self.pattern_magnitude.search(text)
127
+ if match:
128
+ num_part = self.parse_number(match.group(1))
129
+ magnitude = self.MAGNITUDE_WORDS.get(match.group(2).lower(), 1)
130
+ if num_part is not None:
131
+ return num_part * magnitude
132
+
133
+ return None
134
+
135
+ def word_to_number(self, text: str) -> Optional[int]:
136
+ """
137
+ Convert word numbers to integers.
138
+
139
+ Args:
140
+ text: Text like "One Hundred Million"
141
+
142
+ Returns:
143
+ Integer value or None
144
+ """
145
+ text = text.lower().strip()
146
+ words = text.split()
147
+
148
+ if not words:
149
+ return None
150
+
151
+ result = 0
152
+ current = 0
153
+
154
+ for word in words:
155
+ word = word.strip(',').strip()
156
+
157
+ if word in self.WORD_TO_NUMBER:
158
+ current += self.WORD_TO_NUMBER[word]
159
+ elif word in self.MAGNITUDE_WORDS:
160
+ magnitude = self.MAGNITUDE_WORDS[word]
161
+ if magnitude >= 1000:
162
+ current = (current or 1) * magnitude
163
+ result += current
164
+ current = 0
165
+ else:
166
+ current *= magnitude
167
+ elif word == 'and':
168
+ continue
169
+ else:
170
+ # Unknown word, try to parse as number
171
+ try:
172
+ current += int(word)
173
+ except ValueError:
174
+ pass
175
+
176
+ result += current
177
+ return result if result > 0 else None
178
+
179
+ def extract_numbers(self, text: str) -> List[Dict]:
180
+ """
181
+ Extract all numerical values from text with context.
182
+
183
+ Args:
184
+ text: Text to search for numbers
185
+
186
+ Returns:
187
+ List of dicts with number info:
188
+ [{"value": 101000000, "context": "sum_insured", "currency": "INR",
189
+ "original": "₹10,10,00,000"}]
190
+ """
191
+ if not text:
192
+ return []
193
+
194
+ results = []
195
+ text_lower = text.lower()
196
+
197
+ # Extract currency amounts
198
+ for match in self.pattern_currency.finditer(text):
199
+ currency_symbol = match.group(1)
200
+ num_str = match.group(2)
201
+ value = self.parse_number(num_str)
202
+
203
+ if value is not None and value > 0:
204
+ # Determine currency
205
+ currency = 'INR' # Default
206
+ for curr, patterns in self.CURRENCY_PATTERNS.items():
207
+ if any(re.match(p, currency_symbol, re.IGNORECASE) for p in patterns):
208
+ currency = curr
209
+ break
210
+
211
+ # Determine context
212
+ context = self._determine_number_context(text_lower, match.start())
213
+
214
+ results.append({
215
+ 'value': value,
216
+ 'context': context,
217
+ 'currency': currency,
218
+ 'original': match.group(),
219
+ 'position': match.start()
220
+ })
221
+
222
+ # Extract numbers with magnitude words (10 crore, 5 lakhs)
223
+ for match in self.pattern_magnitude.finditer(text):
224
+ value = self.parse_indian_number(match.group())
225
+ if value is not None and value > 0:
226
+ context = self._determine_number_context(text_lower, match.start())
227
+ results.append({
228
+ 'value': value,
229
+ 'context': context,
230
+ 'currency': 'INR', # Lakhs/crores are typically INR
231
+ 'original': match.group(),
232
+ 'position': match.start()
233
+ })
234
+
235
+ # Remove duplicates based on position (currency matches often overlap with magnitude)
236
+ seen_positions = set()
237
+ unique_results = []
238
+ for r in sorted(results, key=lambda x: -x['value']): # Prefer larger values
239
+ # Check if any existing result overlaps with this one
240
+ overlaps = False
241
+ for pos in seen_positions:
242
+ if abs(r['position'] - pos) < 20: # Within 20 chars
243
+ overlaps = True
244
+ break
245
+
246
+ if not overlaps:
247
+ seen_positions.add(r['position'])
248
+ unique_results.append(r)
249
+
250
+ return unique_results
251
+
252
+ def _determine_number_context(self, text: str, position: int) -> str:
253
+ """Determine what type of number this is based on surrounding text."""
254
+ # Look at 100 chars before the number
255
+ context_start = max(0, position - 100)
256
+ context_text = text[context_start:position]
257
+
258
+ for num_type, keywords in self.NUMBER_CONTEXTS.items():
259
+ if any(kw in context_text for kw in keywords):
260
+ return num_type
261
+
262
+ return 'unknown'
263
+
264
+ def extract_sum_insured(self, text: str) -> Optional[float]:
265
+ """Extract the sum insured value from text."""
266
+ numbers = self.extract_numbers(text)
267
+
268
+ # First, look for explicitly labeled sum insured
269
+ for num in numbers:
270
+ if num['context'] == 'sum_insured':
271
+ return num['value']
272
+
273
+ # Otherwise, return the largest number (likely to be sum insured)
274
+ if numbers:
275
+ return max(num['value'] for num in numbers)
276
+
277
+ return None
278
+
279
+ def extract_premium(self, text: str) -> Optional[float]:
280
+ """Extract the premium amount from text."""
281
+ numbers = self.extract_numbers(text)
282
+
283
+ for num in numbers:
284
+ if num['context'] == 'premium':
285
+ return num['value']
286
+
287
+ return None
288
+
289
+ def calculate_sum(self, values: List[float]) -> float:
290
+ """Calculate sum of values."""
291
+ return sum(v for v in values if v is not None)
292
+
293
+ def calculate_average(self, values: List[float]) -> Optional[float]:
294
+ """Calculate average of values."""
295
+ valid_values = [v for v in values if v is not None]
296
+ if valid_values:
297
+ return sum(valid_values) / len(valid_values)
298
+ return None
299
+
300
+
301
+ # Singleton instance
302
+ number_extractor = NumberExtractor()
services/ocr_service.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OCR Service - Document Text Extraction via OpenRouter Vision Models
3
+ Handles OCR for images and scanned documents using vision-capable models with fallback
4
+ """
5
+
6
+ import requests
7
+ import base64
8
+ from pathlib import Path
9
+ from config import Config
10
+
11
+
12
+ class OCRService:
13
+ def __init__(self):
14
+ self.api_key = Config.OPENROUTER_API_KEY
15
+ self.base_url = Config.OPENROUTER_BASE_URL
16
+
17
+ # Vision-capable models for OCR with fallback order
18
+ # Only models that support image/vision input can be used for OCR
19
+ self.ocr_models = [
20
+ "google/gemma-3-27b-it:free", # Primary - Largest Gemma 3
21
+ "google/gemma-3-12b-it:free", # Fallback 1
22
+ "google/gemma-3-4b-it:free", # Fallback 2
23
+ "google/gemma-3n-e4b-it:free", # Fallback 3
24
+ "google/gemma-3n-e2b-it:free", # Fallback 4 - Smallest
25
+ ]
26
+
27
+ def _encode_image(self, image_path: str) -> str:
28
+ """Encode image to base64"""
29
+ with open(image_path, "rb") as f:
30
+ return base64.b64encode(f.read()).decode('utf-8')
31
+
32
+ def _get_mime_type(self, file_path: str) -> str:
33
+ """Get MIME type from file extension"""
34
+ ext = Path(file_path).suffix.lower()
35
+ mime_types = {
36
+ '.png': 'image/png',
37
+ '.jpg': 'image/jpeg',
38
+ '.jpeg': 'image/jpeg',
39
+ '.gif': 'image/gif',
40
+ '.webp': 'image/webp',
41
+ '.pdf': 'application/pdf'
42
+ }
43
+ return mime_types.get(ext, 'image/png')
44
+
45
+ def _call_ocr_model(self, image_data: str, mime_type: str, model: str = None) -> dict:
46
+ """Call OpenRouter vision model for OCR"""
47
+ if not self.api_key:
48
+ return {"success": False, "error": "OpenRouter API key not configured"}
49
+
50
+ headers = {
51
+ "Authorization": f"Bearer {self.api_key}",
52
+ "Content-Type": "application/json",
53
+ "HTTP-Referer": "https://notebooklm-fast.hf.space",
54
+ "X-Title": "NotebookLM Fast OCR"
55
+ }
56
+
57
+ # OCR prompt for thorough text extraction
58
+ ocr_prompt = """You are a precise OCR system. Extract EVERY SINGLE piece of text from this image/document with 100% accuracy.
59
+
60
+ CRITICAL INSTRUCTIONS:
61
+ 1. Extract ALL text - do not skip or miss ANY section, heading, paragraph, or text block
62
+ 2. Include ALL sections (e.g., Education, Experience, Skills, Contact, Summary, Projects, etc.)
63
+ 3. Preserve the exact structure and hierarchy of the document
64
+ 4. Include all names, dates, numbers, addresses, phone numbers, emails, URLs
65
+ 5. Include text from headers, footers, sidebars, and any text boxes
66
+ 6. For tables, use markdown table format with all rows and columns
67
+ 7. For bullet points and lists, preserve the list structure
68
+ 8. Include any small text, footnotes, or captions
69
+
70
+ OUTPUT FORMAT:
71
+ - Return ONLY the extracted text, explanations
72
+ - Maintain the original reading order (top to bottom, left to right)
73
+ - Use markdown formatting for structure (headers, lists, tables)
74
+ - Separate sections clearly with line breaks
75
+
76
+ IMPORTANT: Do not summarize or paraphrase. Extract the EXACT text as it appears."""
77
+
78
+ payload = {
79
+ "model": model or self.ocr_models[0],
80
+ "messages": [
81
+ {
82
+ "role": "user",
83
+ "content": [
84
+ {
85
+ "type": "image_url",
86
+ "image_url": {
87
+ "url": f"data:{mime_type};base64,{image_data}"
88
+ }
89
+ },
90
+ {
91
+ "type": "text",
92
+ "text": ocr_prompt
93
+ }
94
+ ]
95
+ }
96
+ ],
97
+ "max_tokens": 4096,
98
+ "temperature": 0.1 # Low temperature for accurate extraction
99
+ }
100
+
101
+ try:
102
+ response = requests.post(
103
+ f"{self.base_url}/chat/completions",
104
+ headers=headers,
105
+ json=payload,
106
+ timeout=120 # Longer timeout for OCR
107
+ )
108
+
109
+ if response.status_code == 200:
110
+ data = response.json()
111
+ text = data.get('choices', [{}])[0].get('message', {}).get('content', '')
112
+
113
+ if text:
114
+ return {"success": True, "text": text, "model": model or self.ocr_models[0]}
115
+ else:
116
+ return {"success": False, "error": "No text extracted from response"}
117
+ else:
118
+ return {
119
+ "success": False,
120
+ "error": f"OpenRouter API error: {response.status_code} - {response.text}"
121
+ }
122
+ except requests.exceptions.Timeout:
123
+ return {"success": False, "error": "Request timed out. Please try again."}
124
+ except Exception as e:
125
+ return {"success": False, "error": str(e)}
126
+
127
+ def _call_ocr_with_fallback(self, image_data: str, mime_type: str) -> dict:
128
+ """Try OCR with fallback models"""
129
+ last_error = None
130
+
131
+ for model in self.ocr_models:
132
+ print(f"Attempting OCR with {model}...")
133
+ result = self._call_ocr_model(image_data, mime_type, model)
134
+
135
+ if result['success']:
136
+ print(f"OCR successful with {model}")
137
+ return result
138
+ else:
139
+ last_error = result.get('error', 'Unknown error')
140
+ print(f"OCR failed with {model}: {last_error}")
141
+ continue
142
+
143
+ return {"success": False, "error": f"All OCR models failed. Last error: {last_error}"}
144
+
145
+ def extract_text_from_pdf(self, pdf_path: str) -> dict:
146
+ """
147
+ Extract text from entire PDF using OpenRouter vision models.
148
+ Converts PDF pages to images and processes them.
149
+ """
150
+ import fitz # PyMuPDF
151
+ import os
152
+
153
+ try:
154
+ doc = fitz.open(pdf_path)
155
+ total_pages = len(doc)
156
+
157
+ print(f"Processing {total_pages} page PDF with OpenRouter vision OCR...")
158
+
159
+ all_text = []
160
+
161
+ for page_num in range(total_pages):
162
+ print(f"Processing page {page_num + 1}/{total_pages}...")
163
+
164
+ page = doc.load_page(page_num)
165
+
166
+ # Render page to image at good resolution for OCR
167
+ mat = fitz.Matrix(2, 2) # 2x zoom for better quality
168
+ pix = page.get_pixmap(matrix=mat)
169
+ img_data = pix.tobytes("png")
170
+
171
+ # Encode to base64
172
+ image_base64 = base64.b64encode(img_data).decode('utf-8')
173
+
174
+ # OCR the page with fallback
175
+ result = self._call_ocr_with_fallback(image_base64, 'image/png')
176
+
177
+ if result['success']:
178
+ all_text.append(f"--- Page {page_num + 1} ---\n{result['text']}")
179
+ else:
180
+ all_text.append(f"--- Page {page_num + 1} ---\n[Error extracting text: {result['error']}]")
181
+
182
+ doc.close()
183
+
184
+ combined_text = "\n\n".join(all_text)
185
+
186
+ return {
187
+ "success": True,
188
+ "text": combined_text,
189
+ "model": "OpenRouter Vision OCR"
190
+ }
191
+
192
+ except Exception as e:
193
+ return {"success": False, "error": f"Error processing PDF: {str(e)}"}
194
+
195
+ def _process_pdf_in_batches(self, pdf_path: str, total_pages: int) -> dict:
196
+ """Split PDF into chunks and process sequentially - kept for compatibility"""
197
+ return self.extract_text_from_pdf(pdf_path)
198
+
199
+ def _send_pdf_to_api(self, pdf_path: str) -> dict:
200
+ """Process PDF by converting to images - OpenRouter doesn't have native PDF support"""
201
+ return self.extract_text_from_pdf(pdf_path)
202
+
203
+ def extract_text(self, image_path: str) -> dict:
204
+ """
205
+ Extract text from image using OpenRouter vision models with fallback
206
+ """
207
+ image_data = self._encode_image(image_path)
208
+ mime_type = self._get_mime_type(image_path)
209
+
210
+ print(f"Attempting OCR with OpenRouter vision models...")
211
+ result = self._call_ocr_with_fallback(image_data, mime_type)
212
+
213
+ if result['success']:
214
+ print(f"OCR successful with {result.get('model', 'OpenRouter')}")
215
+ else:
216
+ print(f"OCR failed: {result['error']}")
217
+
218
+ return result
219
+
220
+ def extract_text_from_pdf_page(self, page_image_data: bytes,
221
+ page_num: int) -> dict:
222
+ """Extract text from a PDF page image"""
223
+ image_data = base64.b64encode(page_image_data).decode('utf-8')
224
+
225
+ print(f"Extracting text from PDF page {page_num} with OpenRouter vision OCR...")
226
+ result = self._call_ocr_with_fallback(image_data, 'image/png')
227
+ return result
228
+
229
+
230
+ # Singleton instance
231
+ ocr_service = OCRService()
services/rag_service.py ADDED
@@ -0,0 +1,1870 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RAG Service - Retrieval Augmented Generation
3
+ Handles:
4
+ - Text chunking with overlap
5
+ - GraphRAG-based context assembly
6
+ - Query processing with AI response generation
7
+ - Aggregate queries across all documents
8
+ - Date-based filtering and calculations
9
+ """
10
+
11
+ import requests
12
+ import re
13
+ from typing import Optional, List, Dict
14
+ from config import Config
15
+ from services.chroma_service import chroma_service
16
+ from services.date_parser import date_parser
17
+ from services.number_extractor import number_extractor
18
+
19
+
20
+ class RAGService:
21
+ def __init__(self):
22
+ # DeepSeek API (primary - highly capable)
23
+ self.deepseek_api_key = getattr(Config, 'DEEPSEEK_API_KEY', '')
24
+ self.deepseek_base_url = getattr(Config, 'DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
25
+ self.deepseek_model = getattr(Config, 'DEEPSEEK_MODEL', 'deepseek-chat')
26
+ self.use_deepseek = getattr(Config, 'USE_DEEPSEEK', True) and self.deepseek_api_key
27
+
28
+ # OpenRouter API (fallback)
29
+ self.api_key = Config.OPENROUTER_API_KEY
30
+ self.base_url = Config.OPENROUTER_BASE_URL
31
+ self.model_map = Config.MODEL_MAP
32
+ self.fallback_order = Config.FALLBACK_ORDER
33
+
34
+ # RAG settings
35
+ self.chunk_size = Config.CHUNK_SIZE
36
+ self.chunk_overlap = Config.CHUNK_OVERLAP
37
+ self.top_k = Config.TOP_K_RESULTS
38
+ self.temperature = Config.AI_TEMPERATURE
39
+ self.relevance_threshold = Config.RELEVANCE_THRESHOLD
40
+ self.max_history = Config.MAX_CONVERSATION_HISTORY
41
+ self.max_tokens = getattr(Config, 'AI_MAX_TOKENS', 1024)
42
+ self.timeout = getattr(Config, 'AI_TIMEOUT', 15)
43
+
44
+ def chunk_text(self, text: str) -> list[dict]:
45
+ """
46
+ Split text into overlapping chunks for better retrieval
47
+ Uses sentence-aware chunking for coherence
48
+ """
49
+ # Clean and normalize text
50
+ text = re.sub(r'\n{3,}', '\n\n', text)
51
+ text = text.strip()
52
+
53
+ if len(text) <= self.chunk_size:
54
+ return [{"text": text, "start": 0, "end": len(text)}]
55
+
56
+ chunks = []
57
+ sentences = self._split_into_sentences(text)
58
+
59
+ current_chunk = ""
60
+ current_start = 0
61
+ char_pos = 0
62
+
63
+ for sentence in sentences:
64
+ sentence_len = len(sentence)
65
+
66
+ if len(current_chunk) + sentence_len <= self.chunk_size:
67
+ current_chunk += sentence
68
+ else:
69
+ if current_chunk:
70
+ chunks.append({
71
+ "text": current_chunk.strip(),
72
+ "start": current_start,
73
+ "end": char_pos
74
+ })
75
+
76
+ # Start new chunk with overlap
77
+ overlap_start = max(0, len(current_chunk) - self.chunk_overlap)
78
+ current_chunk = current_chunk[overlap_start:] + sentence
79
+ current_start = char_pos - (len(current_chunk) - sentence_len)
80
+
81
+ char_pos += sentence_len
82
+
83
+ # Add final chunk
84
+ if current_chunk.strip():
85
+ chunks.append({
86
+ "text": current_chunk.strip(),
87
+ "start": current_start,
88
+ "end": char_pos
89
+ })
90
+
91
+ return chunks
92
+
93
+ def _split_into_sentences(self, text: str) -> list[str]:
94
+ """Split text into sentences while preserving delimiters"""
95
+ # Simple sentence splitting
96
+ pattern = r'(?<=[.!?])\s+(?=[A-Z])'
97
+ sentences = re.split(pattern, text)
98
+ return [s + ' ' for s in sentences]
99
+
100
+ def process_document(self, user_id: str, doc_id: str, content: str, bucket_id: str = ""):
101
+ """
102
+ Process document for RAG:
103
+ 1. Chunk the text
104
+ 2. Store chunks in ChromaDB
105
+ """
106
+ chunks = self.chunk_text(content)
107
+ chroma_service.store_chunks(doc_id, user_id, chunks, bucket_id)
108
+ return len(chunks)
109
+
110
+ def _expand_query(self, query: str) -> list[str]:
111
+ """
112
+ Generate query variations for better retrieval.
113
+ Extracts key terms and creates multiple search angles.
114
+ """
115
+ import re
116
+ queries = [query]
117
+ query_lower = query.lower()
118
+
119
+ # Map numbers to words for module/section matching
120
+ word_map = {
121
+ '1': 'one', '2': 'two', '3': 'three', '4': 'four',
122
+ '5': 'five', '6': 'six', '7': 'seven', '8': 'eight',
123
+ '9': 'nine', '10': 'ten', '11': 'eleven', '12': 'twelve'
124
+ }
125
+
126
+ # Extract key terms (nouns, proper nouns) - words that are likely searchable
127
+ # Remove common question words and stop words
128
+ stop_words = {'what', 'who', 'where', 'when', 'why', 'how', 'is', 'are', 'was', 'were',
129
+ 'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
130
+ 'from', 'about', 'tell', 'me', 'can', 'you', 'please', 'give', 'show',
131
+ 'list', 'all', 'find', 'get', 'does', 'do', 'did', 'has', 'have', 'had',
132
+ 'this', 'that', 'these', 'those', 'and', 'or', 'but', 'if', 'then',
133
+ 'there', 'their', 'they', 'them', 'it', 'its', 'be', 'been', 'being',
134
+ 'any', 'some', 'my', 'your', 'our', 'his', 'her', 'which', 'each'}
135
+
136
+ # Extract potential key terms (2+ chars, not stop words)
137
+ words = re.findall(r'\b[a-zA-Z]{2,}\b', query_lower)
138
+ key_terms = [w for w in words if w not in stop_words]
139
+
140
+ # Add each key term as a separate query for direct matching
141
+ for term in key_terms[:5]: # Top 5 key terms
142
+ if len(term) > 3: # Only meaningful terms
143
+ queries.append(term)
144
+
145
+ # Add combinations of key terms
146
+ if len(key_terms) >= 2:
147
+ queries.append(' '.join(key_terms[:3])) # First 3 key terms
148
+
149
+ # Find module/section references and create variations
150
+ patterns = [
151
+ (r'module\s*(\d+)', 'module'),
152
+ (r'section\s*(\d+)', 'section'),
153
+ (r'chapter\s*(\d+)', 'chapter'),
154
+ (r'part\s*(\d+)', 'part'),
155
+ ]
156
+
157
+ for pattern, prefix in patterns:
158
+ match = re.search(pattern, query_lower)
159
+ if match:
160
+ num = match.group(1)
161
+ # Add number word version: "module five"
162
+ if num in word_map:
163
+ queries.append(query_lower.replace(f'{prefix} {num}', f'{prefix} {word_map[num]}'))
164
+ # Add just the module reference: "module 5"
165
+ queries.append(f'{prefix} {num}')
166
+ # Add numbered list format: "5." or "5)"
167
+ queries.append(f'{num}.')
168
+ queries.append(f'{num})')
169
+ break
170
+
171
+ # Add question without question words for direct info retrieval
172
+ simplified = ' '.join(key_terms)
173
+ if simplified and simplified != query_lower:
174
+ queries.append(simplified)
175
+
176
+ # Deduplicate and limit
177
+ seen = set()
178
+ unique_queries = []
179
+ for q in queries:
180
+ q_clean = q.lower().strip()
181
+ if q_clean and q_clean not in seen and len(q_clean) > 1:
182
+ seen.add(q_clean)
183
+ unique_queries.append(q)
184
+
185
+ return unique_queries[:8] # Increased to 8 variations for better coverage
186
+
187
+ def _detect_document_reference(self, query: str, available_docs: list[dict]) -> list[str]:
188
+ """
189
+ Detect if user is asking about a specific document by name.
190
+ Returns list of matching doc_ids to prioritize in search.
191
+ """
192
+ query_lower = query.lower()
193
+ matching_doc_ids = []
194
+
195
+ for doc in available_docs:
196
+ filename = doc.get('filename', '')
197
+ if not filename:
198
+ continue
199
+
200
+ # Remove extension and normalize
201
+ name_parts = filename.lower().replace('.pdf', '').replace('.docx', '').replace('.xlsx', '').replace('.pptx', '').replace('.txt', '').replace('.md', '')
202
+
203
+ # Check if document name appears in query
204
+ # Handle common patterns like "the ABC document", "from XYZ file", "in document ABC"
205
+ if name_parts in query_lower or any(part in query_lower for part in name_parts.split('_') if len(part) > 3):
206
+ matching_doc_ids.append(doc.get('doc_id'))
207
+
208
+ return matching_doc_ids
209
+
210
+ def _parse_query_with_ai(self, query: str) -> dict:
211
+ """
212
+ Use DeepSeek AI to understand query intent and extract structured parameters.
213
+ This replaces hardcoded pattern matching with intelligent query understanding.
214
+
215
+ Returns dict with:
216
+ - intent: list|count|rank|calculate|compare|specific|summarize
217
+ - needs_metadata: True if needs aggregate data across all documents
218
+ - filters: dict of field->value filters
219
+ - sort_by: field to sort by (or None)
220
+ - sort_order: 'desc' or 'asc'
221
+ - limit: number of results (or None for all)
222
+ - calculation: sum|average|max|min (or None)
223
+ - calculation_field: field for calculation
224
+ """
225
+ import json
226
+
227
+ system_prompt = """You are a query parser for an insurance document system.
228
+ Analyze the user's question and extract structured parameters to help retrieve the right data.
229
+
230
+ Available fields for filtering:
231
+ - is_manufacturing (boolean): True if asking about manufacturing industry/sector
232
+ - policy_type (string): fire, marine, motor, health, liability, property, engineering, etc.
233
+ - industry (string): manufacturing, retail, IT, healthcare, construction, food, textile, etc.
234
+ - insurer_name (string): insurance company name
235
+ - insured_name (string): policyholder/company name
236
+ - broker_name (string): broker or agent name
237
+ - city (string): city name
238
+ - state (string): state name
239
+ - renewal_year (integer): 2024, 2025, 2026, etc.
240
+
241
+ Available fields for sorting:
242
+ - premium_amount: net premium, gross premium, premium
243
+ - sum_insured: coverage amount, insured value
244
+ - renewal_date: renewal date, expiry date
245
+ - policy_start_date: inception date, start date
246
+
247
+ Return ONLY valid JSON (no markdown, no explanation):
248
+ {
249
+ "intent": "list|count|rank|calculate|compare|specific|summarize",
250
+ "needs_metadata": true or false,
251
+ "filters": {"field_name": "value"},
252
+ "sort_by": "field_name" or null,
253
+ "sort_order": "desc" or "asc",
254
+ "limit": number or null,
255
+ "calculation": "sum|average|max|min|count" or null,
256
+ "calculation_field": "premium_amount|sum_insured" or null
257
+ }
258
+
259
+ Examples:
260
+ Query: "top 5 manufacturing policies by premium"
261
+ {"intent":"rank","needs_metadata":true,"filters":{"is_manufacturing":true},"sort_by":"premium_amount","sort_order":"desc","limit":5,"calculation":null,"calculation_field":null}
262
+
263
+ Query: "total sum insured for all fire policies"
264
+ {"intent":"calculate","needs_metadata":true,"filters":{"policy_type":"fire"},"sort_by":null,"sort_order":"desc","limit":null,"calculation":"sum","calculation_field":"sum_insured"}
265
+
266
+ Query: "what is covered in the ABC policy document?"
267
+ {"intent":"specific","needs_metadata":false,"filters":{},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null}
268
+
269
+ Query: "list all policies renewing in 2026"
270
+ {"intent":"list","needs_metadata":true,"filters":{"renewal_year":2026},"sort_by":"renewal_date","sort_order":"asc","limit":null,"calculation":null,"calculation_field":null}
271
+
272
+ Query: "how many manufacturing companies do we have?"
273
+ {"intent":"count","needs_metadata":true,"filters":{"is_manufacturing":true},"sort_by":null,"sort_order":"desc","limit":null,"calculation":"count","calculation_field":null}
274
+
275
+ Query: "compare policy A and policy B"
276
+ {"intent":"compare","needs_metadata":false,"filters":{},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null}"""
277
+
278
+ messages = [
279
+ {"role": "system", "content": system_prompt},
280
+ {"role": "user", "content": f"Parse this query: {query}"}
281
+ ]
282
+
283
+ try:
284
+ # Use non-streaming call for quick parsing
285
+ response = self._call_deepseek_sync(messages, max_tokens=300)
286
+
287
+ # Parse JSON response
288
+ parsed = json.loads(response.strip())
289
+ print(f"[AI QUERY PARSER] Parsed: {json.dumps(parsed, indent=2)}")
290
+ return parsed
291
+
292
+ except Exception as e:
293
+ print(f"[AI QUERY PARSER] Error: {e}, falling back to pattern matching")
294
+ # Fallback to basic detection
295
+ return {
296
+ "intent": "specific",
297
+ "needs_metadata": False,
298
+ "filters": {},
299
+ "sort_by": None,
300
+ "sort_order": "desc",
301
+ "limit": None,
302
+ "calculation": None,
303
+ "calculation_field": None
304
+ }
305
+
306
+ def _call_deepseek_sync(self, messages: list, max_tokens: int = 500) -> str:
307
+ """Synchronous call to DeepSeek for quick operations like query parsing."""
308
+ import requests
309
+
310
+ if not self.deepseek_api_key:
311
+ raise Exception("DeepSeek API key not configured")
312
+
313
+ response = requests.post(
314
+ "https://api.deepseek.com/v1/chat/completions",
315
+ headers={
316
+ "Authorization": f"Bearer {self.deepseek_api_key}",
317
+ "Content-Type": "application/json"
318
+ },
319
+ json={
320
+ "model": "deepseek-chat",
321
+ "messages": messages,
322
+ "max_tokens": max_tokens,
323
+ "temperature": 0.1 # Low temperature for consistent parsing
324
+ },
325
+ timeout=15
326
+ )
327
+
328
+ if response.status_code == 200:
329
+ return response.json()['choices'][0]['message']['content']
330
+ else:
331
+ raise Exception(f"DeepSeek API error: {response.status_code}")
332
+
333
+ def _detect_query_type(self, query: str, history: list[dict] = None) -> str:
334
+ """
335
+ Detect the type of query to optimize retrieval and response.
336
+ Returns: 'specific', 'aggregate', 'calculation', 'date_filter',
337
+ 'cross_document', 'followup', 'comparison', 'general'
338
+
339
+ NEW TYPES:
340
+ - 'aggregate': List all, count all, common across all documents
341
+ - 'calculation': Math operations (sum, average, total of numbers)
342
+ - 'date_filter': Date-based filtering (policies renewing in 2026)
343
+ """
344
+ query_lower = query.lower().strip()
345
+
346
+ # AGGREGATE patterns - queries that need to scan ALL documents
347
+ aggregate_patterns = [
348
+ 'list all', 'give me all', 'show all', 'all policies', 'all documents',
349
+ 'every policy', 'every document', 'all the policies', 'all the documents',
350
+ 'how many policies', 'how many documents', 'count all', 'total number of',
351
+ 'all manufacturing', 'all companies', 'all insured', 'all insurers',
352
+ 'common', 'across all', 'in all documents', 'throughout all',
353
+ 'summarize all', 'overview of all', 'complete list', 'full list',
354
+ 'what are the', 'what policies', 'which companies', 'which policies'
355
+ ]
356
+
357
+ # CALCULATION patterns - queries needing math operations
358
+ calculation_patterns = [
359
+ 'total sum', 'sum of', 'add up', 'combined', 'aggregate',
360
+ 'total insured', 'total premium', 'total value', 'total amount',
361
+ 'calculate', 'average', 'mean', 'maximum', 'minimum', 'highest', 'lowest',
362
+ 'what is the total', 'how much total', 'sum insured across',
363
+ 'cumulative', 'grand total'
364
+ ]
365
+
366
+ # DATE FILTER patterns - queries filtering by dates
367
+ date_patterns = [
368
+ 'renew in', 'renewal in', 'expiring in', 'expire in', 'expiry in',
369
+ 'renewing in 2024', 'renewing in 2025', 'renewing in 2026', 'renewing in 2027',
370
+ 'expiring in 2024', 'expiring in 2025', 'expiring in 2026', 'expiring in 2027',
371
+ 'policies in 2024', 'policies in 2025', 'policies in 2026', 'policies in 2027',
372
+ 'before 2025', 'after 2025', 'before 2026', 'after 2026',
373
+ 'next year', 'this year', 'last year', 'next month',
374
+ 'valid until', 'valid till', 'due for renewal'
375
+ ]
376
+
377
+ # Followup indicators - pronouns and references to previous context
378
+ followup_patterns = [
379
+ 'it', 'this', 'that', 'these', 'those', 'the same', 'same one',
380
+ 'mentioned', 'above', 'earlier', 'previous', 'last one',
381
+ 'for it', 'about it', 'of it', 'its ', "it's", 'for this', 'for that'
382
+ ]
383
+
384
+ # Cross-document patterns (legacy - now mostly covered by aggregate)
385
+ cross_doc_patterns = [
386
+ 'other documents', 'other policies', 'other files',
387
+ 'which documents', 'which files',
388
+ 'similar to', 'related to', 'like this one'
389
+ ]
390
+
391
+ # Comparison patterns
392
+ comparison_patterns = [
393
+ 'compare', 'difference between', 'versus', ' vs ', 'differ',
394
+ 'same as', 'similar to', 'contrast', 'both', 'either'
395
+ ]
396
+
397
+ # Check patterns in priority order
398
+ # 1. Aggregate queries (highest priority for "list all" type queries)
399
+ for pattern in aggregate_patterns:
400
+ if pattern in query_lower:
401
+ print(f"[QUERY TYPE] Detected AGGREGATE: matched '{pattern}'")
402
+ return 'aggregate'
403
+
404
+ # 2. Calculation queries
405
+ for pattern in calculation_patterns:
406
+ if pattern in query_lower:
407
+ print(f"[QUERY TYPE] Detected CALCULATION: matched '{pattern}'")
408
+ return 'calculation'
409
+
410
+ # 3. Date filter queries
411
+ for pattern in date_patterns:
412
+ if pattern in query_lower:
413
+ print(f"[QUERY TYPE] Detected DATE_FILTER: matched '{pattern}'")
414
+ return 'date_filter'
415
+
416
+ # 4. Followup queries (short queries with pronouns)
417
+ for pattern in followup_patterns:
418
+ if pattern in query_lower and len(query) < 100:
419
+ return 'followup'
420
+
421
+ # 5. Cross-document queries
422
+ for pattern in cross_doc_patterns:
423
+ if pattern in query_lower:
424
+ return 'cross_document'
425
+
426
+ # 6. Comparison queries
427
+ for pattern in comparison_patterns:
428
+ if pattern in query_lower:
429
+ return 'comparison'
430
+
431
+ # If there's recent history and query is short, likely a followup
432
+ if history and len(history) > 0 and len(query) < 50:
433
+ words = query_lower.split()
434
+ if words and words[0] in ['what', 'who', 'when', 'where', 'why', 'how', 'is', 'are', 'does', 'do', 'can']:
435
+ return 'followup'
436
+
437
+ return 'general'
438
+
439
+ def _handle_aggregate_query(self, user_id: str, bucket_id: str, query: str) -> dict:
440
+ """
441
+ Handle aggregate queries by retrieving ALL document metadata/summaries.
442
+ Used for 'list all', 'how many', etc.
443
+
444
+ Returns dict with context built from ALL documents.
445
+ """
446
+ print(f"[AGGREGATE] Handling aggregate query: {query[:50]}...")
447
+
448
+ # Get ALL metadata for this bucket
449
+ all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
450
+
451
+ # Get ALL summaries too
452
+ all_summaries = chroma_service.get_all_summaries(user_id, bucket_id)
453
+
454
+ print(f"[AGGREGATE] Retrieved {len(all_metadata)} metadata records, {len(all_summaries)} summaries")
455
+
456
+ # Build context from metadata
457
+ context_parts = []
458
+
459
+ # For large datasets, use a more compact format to avoid token limits
460
+ if len(all_metadata) > 50:
461
+ print(f"[AGGREGATE] Large dataset ({len(all_metadata)} docs) - using compact format")
462
+ # Compact format for large datasets
463
+ for i, meta in enumerate(all_metadata, 1):
464
+ entry = f"{i}. {meta.get('document_title', 'Unknown')} | Insured: {meta.get('insured_name', 'N/A')} | Type: {meta.get('policy_type', 'N/A')} | Industry: {meta.get('industry', 'N/A')} | Sum: {meta.get('sum_insured', 0)} | Mfg: {meta.get('is_manufacturing', False)}"
465
+ context_parts.append(entry)
466
+ else:
467
+ # Full format for smaller datasets
468
+ for i, meta in enumerate(all_metadata, 1):
469
+ doc_id = meta.get('doc_id', '')
470
+ filename = meta.get('document_title', 'Unknown Document')
471
+
472
+ # Find matching summary
473
+ summary = ""
474
+ for s in all_summaries:
475
+ if s.get('doc_id') == doc_id:
476
+ summary = s.get('summary', '')
477
+ break
478
+
479
+ # Build document entry
480
+ entry = f"""
481
+ === Document {i}: {filename} ===
482
+ - Policy Number: {meta.get('policy_number', 'N/A')}
483
+ - Insured: {meta.get('insured_name', 'N/A')}
484
+ - Insurer: {meta.get('insurer_name', 'N/A')}
485
+ - Policy Type: {meta.get('policy_type', 'N/A')}
486
+ - Industry: {meta.get('industry', 'N/A')}
487
+ - Sum Insured: {meta.get('sum_insured', 'N/A')}
488
+ - Premium: {meta.get('premium_amount', 'N/A')}
489
+ - Start Date: {meta.get('policy_start_date', 'N/A')}
490
+ - End Date: {meta.get('policy_end_date', 'N/A')}
491
+ - Renewal Date: {meta.get('renewal_date', 'N/A')}
492
+ - Location: {meta.get('city', '')}, {meta.get('state', '')}
493
+ - Is Manufacturing: {meta.get('is_manufacturing', False)}
494
+ Summary: {summary[:300] if summary else 'No summary available'}
495
+ """
496
+ context_parts.append(entry.strip())
497
+
498
+ context = '\n'.join(context_parts)
499
+ print(f"[AGGREGATE] Context length: {len(context)} characters")
500
+
501
+ return {
502
+ 'context': context,
503
+ 'metadata': all_metadata,
504
+ 'total_documents': len(all_metadata),
505
+ 'sources': {m.get('doc_id'): m.get('document_title') for m in all_metadata}
506
+ }
507
+
508
+ def _handle_calculation_query(self, user_id: str, bucket_id: str, query: str) -> dict:
509
+ """
510
+ Handle calculation queries by getting all metadata and performing math.
511
+ Used for 'total sum insured', 'average premium', etc.
512
+ """
513
+ print(f"[CALCULATION] Handling calculation query: {query[:50]}...")
514
+
515
+ query_lower = query.lower()
516
+
517
+ # Get all metadata
518
+ all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
519
+
520
+ # Determine what to calculate
521
+ calc_results = {}
522
+
523
+ # Sum insured calculations
524
+ if 'sum insured' in query_lower or 'insured' in query_lower:
525
+ values = [m.get('sum_insured', 0) for m in all_metadata if m.get('sum_insured')]
526
+ calc_results['sum_insured'] = {
527
+ 'total': sum(values),
528
+ 'count': len(values),
529
+ 'average': sum(values) / len(values) if values else 0,
530
+ 'max': max(values) if values else 0,
531
+ 'min': min(values) if values else 0
532
+ }
533
+
534
+ # Premium calculations
535
+ if 'premium' in query_lower:
536
+ values = [m.get('premium_amount', 0) for m in all_metadata if m.get('premium_amount')]
537
+ calc_results['premium'] = {
538
+ 'total': sum(values),
539
+ 'count': len(values),
540
+ 'average': sum(values) / len(values) if values else 0,
541
+ 'max': max(values) if values else 0,
542
+ 'min': min(values) if values else 0
543
+ }
544
+
545
+ # Policy count by type
546
+ if 'type' in query_lower or 'policies' in query_lower:
547
+ type_counts = {}
548
+ for m in all_metadata:
549
+ pt = m.get('policy_type', 'unknown')
550
+ type_counts[pt] = type_counts.get(pt, 0) + 1
551
+ calc_results['policy_types'] = type_counts
552
+
553
+ # Build context with calculation results
554
+ context = f"""
555
+ === CALCULATION RESULTS FOR {len(all_metadata)} DOCUMENTS ===
556
+
557
+ """
558
+
559
+ if 'sum_insured' in calc_results:
560
+ si = calc_results['sum_insured']
561
+ context += f"""
562
+ ## Sum Insured Analysis
563
+ - **Total Sum Insured**: ₹{si['total']:,.2f}
564
+ - **Number of policies with sum insured**: {si['count']}
565
+ - **Average Sum Insured**: ₹{si['average']:,.2f}
566
+ - **Maximum Sum Insured**: ₹{si['max']:,.2f}
567
+ - **Minimum Sum Insured**: ₹{si['min']:,.2f}
568
+ """
569
+
570
+ if 'premium' in calc_results:
571
+ pm = calc_results['premium']
572
+ context += f"""
573
+ ## Premium Analysis
574
+ - **Total Premium**: ₹{pm['total']:,.2f}
575
+ - **Number of policies with premium**: {pm['count']}
576
+ - **Average Premium**: ₹{pm['average']:,.2f}
577
+ - **Maximum Premium**: ₹{pm['max']:,.2f}
578
+ - **Minimum Premium**: ₹{pm['min']:,.2f}
579
+ """
580
+
581
+ if 'policy_types' in calc_results:
582
+ context += "\n## Policy Types Breakdown\n"
583
+ for pt, count in sorted(calc_results['policy_types'].items(), key=lambda x: -x[1]):
584
+ context += f"- **{pt.title()}**: {count} policies\n"
585
+
586
+ return {
587
+ 'context': context,
588
+ 'calculations': calc_results,
589
+ 'total_documents': len(all_metadata),
590
+ 'sources': {m.get('doc_id'): m.get('document_title') for m in all_metadata}
591
+ }
592
+
593
+ def _handle_date_filter_query(self, user_id: str, bucket_id: str, query: str) -> dict:
594
+ """
595
+ Handle date-based filter queries.
596
+ Used for 'policies renewing in 2026', 'expiring this year', etc.
597
+ """
598
+ print(f"[DATE FILTER] Handling date query: {query[:50]}...")
599
+
600
+ # Extract year from query
601
+ target_year = date_parser.get_year_from_query(query)
602
+
603
+ # Get all metadata
604
+ all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
605
+
606
+ # Filter by date criteria
607
+ matching_docs = []
608
+
609
+ query_lower = query.lower()
610
+
611
+ for meta in all_metadata:
612
+ matches = False
613
+
614
+ if 'renew' in query_lower and target_year:
615
+ renewal_year = meta.get('renewal_year', 0)
616
+ # Also check end date
617
+ if not renewal_year and meta.get('policy_end_date'):
618
+ end_date = date_parser.parse_date(meta.get('policy_end_date'))
619
+ if end_date:
620
+ renewal_year = end_date.year
621
+
622
+ if renewal_year == target_year:
623
+ matches = True
624
+
625
+ elif 'expir' in query_lower and target_year:
626
+ end_date_str = meta.get('policy_end_date', '')
627
+ if end_date_str:
628
+ end_date = date_parser.parse_date(end_date_str)
629
+ if end_date and end_date.year == target_year:
630
+ matches = True
631
+
632
+ elif 'start' in query_lower and target_year:
633
+ start_date_str = meta.get('policy_start_date', '')
634
+ if start_date_str:
635
+ start_date = date_parser.parse_date(start_date_str)
636
+ if start_date and start_date.year == target_year:
637
+ matches = True
638
+
639
+ if matches:
640
+ matching_docs.append(meta)
641
+
642
+ print(f"[DATE FILTER] Found {len(matching_docs)} documents matching year {target_year}")
643
+
644
+ # Build context from matching documents
645
+ context_parts = []
646
+ context_parts.append(f"=== POLICIES MATCHING DATE CRITERIA (Year: {target_year}) ===\n")
647
+ context_parts.append(f"Found {len(matching_docs)} policies:\n")
648
+
649
+ for i, meta in enumerate(matching_docs, 1):
650
+ entry = f"""
651
+ {i}. **{meta.get('document_title', 'Unknown')}**
652
+ - Insured: {meta.get('insured_name', 'N/A')}
653
+ - Policy Type: {meta.get('policy_type', 'N/A')}
654
+ - Start: {meta.get('policy_start_date', 'N/A')}
655
+ - End: {meta.get('policy_end_date', 'N/A')}
656
+ - Renewal: {meta.get('renewal_date', 'N/A')}
657
+ - Sum Insured: {meta.get('sum_insured', 'N/A')}
658
+ """
659
+ context_parts.append(entry)
660
+
661
+ return {
662
+ 'context': '\n'.join(context_parts),
663
+ 'matching_documents': matching_docs,
664
+ 'target_year': target_year,
665
+ 'total_matches': len(matching_docs),
666
+ 'sources': {m.get('doc_id'): m.get('document_title') for m in matching_docs}
667
+ }
668
+
669
+ def _handle_metadata_query(self, user_id: str, bucket_id: str,
670
+ query: str, parsed: dict) -> dict:
671
+ """
672
+ Handle queries using AI-parsed parameters for intelligent filtering and sorting.
673
+ This is the new AI-powered approach that replaces pattern-based routing.
674
+
675
+ Args:
676
+ user_id: User ID
677
+ bucket_id: Bucket ID
678
+ query: Original query text
679
+ parsed: AI-parsed parameters with filters, sort, limit, etc.
680
+ """
681
+ print(f"[METADATA QUERY] Using AI-parsed parameters: {parsed}")
682
+
683
+ # Get ALL metadata for this bucket
684
+ all_metadata = chroma_service.get_all_metadata(user_id, bucket_id)
685
+ total_before_filter = len(all_metadata)
686
+
687
+ print(f"[METADATA QUERY] Starting with {total_before_filter} documents")
688
+
689
+ # Apply AI-extracted filters
690
+ filters = parsed.get('filters', {})
691
+ for field, value in filters.items():
692
+ if value is None or value == '':
693
+ continue
694
+
695
+ if field == 'is_manufacturing' and value:
696
+ all_metadata = [m for m in all_metadata if m.get('is_manufacturing', False)]
697
+ print(f"[METADATA QUERY] Filtered by manufacturing: {len(all_metadata)} remaining")
698
+
699
+ elif field == 'industry':
700
+ all_metadata = [m for m in all_metadata
701
+ if str(value).lower() in str(m.get('industry', '')).lower()]
702
+ print(f"[METADATA QUERY] Filtered by industry '{value}': {len(all_metadata)} remaining")
703
+
704
+ elif field == 'policy_type':
705
+ all_metadata = [m for m in all_metadata
706
+ if str(value).lower() in str(m.get('policy_type', '')).lower()]
707
+ print(f"[METADATA QUERY] Filtered by policy_type '{value}': {len(all_metadata)} remaining")
708
+
709
+ elif field in ['city', 'state', 'insurer_name', 'insured_name', 'broker_name']:
710
+ all_metadata = [m for m in all_metadata
711
+ if str(value).lower() in str(m.get(field, '')).lower()]
712
+ print(f"[METADATA QUERY] Filtered by {field} '{value}': {len(all_metadata)} remaining")
713
+
714
+ elif field == 'renewal_year':
715
+ target_year = int(value) if isinstance(value, (int, str)) else None
716
+ if target_year:
717
+ all_metadata = [m for m in all_metadata if m.get('renewal_year') == target_year]
718
+ print(f"[METADATA QUERY] Filtered by renewal_year {target_year}: {len(all_metadata)} remaining")
719
+
720
+ # Apply AI-extracted sorting
721
+ sort_by = parsed.get('sort_by')
722
+ if sort_by and sort_by in ['premium_amount', 'sum_insured', 'renewal_date', 'policy_start_date']:
723
+ reverse = parsed.get('sort_order', 'desc') == 'desc'
724
+ all_metadata.sort(key=lambda x: x.get(sort_by, 0) or 0, reverse=reverse)
725
+ print(f"[METADATA QUERY] Sorted by {sort_by} {'desc' if reverse else 'asc'}")
726
+
727
+ # Apply AI-extracted limit
728
+ limit = parsed.get('limit')
729
+ if limit and isinstance(limit, int) and limit > 0:
730
+ all_metadata = all_metadata[:limit]
731
+ print(f"[METADATA QUERY] Limited to top {limit}")
732
+
733
+ # Handle calculations
734
+ calc_result = None
735
+ if parsed.get('intent') == 'calculate' or parsed.get('calculation'):
736
+ calc_type = parsed.get('calculation', 'sum')
737
+ calc_field = parsed.get('calculation_field', 'premium_amount')
738
+ values = [m.get(calc_field, 0) or 0 for m in all_metadata]
739
+
740
+ if calc_type == 'sum':
741
+ calc_result = {'type': 'sum', 'field': calc_field, 'value': sum(values)}
742
+ elif calc_type == 'average' and values:
743
+ calc_result = {'type': 'average', 'field': calc_field, 'value': sum(values) / len(values)}
744
+ elif calc_type == 'max' and values:
745
+ calc_result = {'type': 'max', 'field': calc_field, 'value': max(values)}
746
+ elif calc_type == 'min' and values:
747
+ calc_result = {'type': 'min', 'field': calc_field, 'value': min(values)}
748
+ elif calc_type == 'count':
749
+ calc_result = {'type': 'count', 'field': 'documents', 'value': len(all_metadata)}
750
+
751
+ # Handle count intent
752
+ if parsed.get('intent') == 'count' and not calc_result:
753
+ calc_result = {'type': 'count', 'field': 'documents', 'value': len(all_metadata)}
754
+
755
+ # Build context
756
+ context_parts = []
757
+
758
+ # Add calculation result if any
759
+ if calc_result:
760
+ if calc_result['type'] == 'count':
761
+ context_parts.append(f"**Total Count: {calc_result['value']} documents**\n")
762
+ else:
763
+ context_parts.append(f"**{calc_result['type'].title()} of {calc_result['field']}: ₹{calc_result['value']:,.2f}**\n")
764
+
765
+ # Add filtered results summary
766
+ filter_desc = ', '.join(f"{k}={v}" for k, v in filters.items() if v)
767
+ if filter_desc:
768
+ context_parts.append(f"Filtered by: {filter_desc}")
769
+ context_parts.append(f"Showing {len(all_metadata)} of {total_before_filter} total documents\n")
770
+
771
+ # Build document list
772
+ if len(all_metadata) > 0:
773
+ context_parts.append("---\n**Matching Documents:**\n")
774
+
775
+ for i, meta in enumerate(all_metadata, 1):
776
+ # Use rich format for smaller sets, compact for larger
777
+ if len(all_metadata) <= 20:
778
+ entry = f"""
779
+ **{i}. {meta.get('document_title', 'Unknown')}**
780
+ - Insured: {meta.get('insured_name', 'N/A')}
781
+ - Insurer: {meta.get('insurer_name', 'N/A')}
782
+ - Policy Type: {meta.get('policy_type', 'N/A')}
783
+ - Industry: {meta.get('industry', 'N/A')}
784
+ - Sum Insured: ₹{meta.get('sum_insured', 0):,.2f}
785
+ - Premium: ₹{meta.get('premium_amount', 0):,.2f}
786
+ - Renewal: {meta.get('renewal_date', 'N/A')}
787
+ - Location: {meta.get('city', '')}, {meta.get('state', '')}
788
+ """
789
+ else:
790
+ # Compact format for large sets
791
+ entry = f"{i}. {meta.get('document_title', 'Unknown')} | {meta.get('insured_name', 'N/A')} | ₹{meta.get('premium_amount', 0):,.0f} | {meta.get('policy_type', 'N/A')}"
792
+
793
+ context_parts.append(entry)
794
+
795
+ context = '\n'.join(context_parts)
796
+ print(f"[METADATA QUERY] Final context: {len(context)} chars, {len(all_metadata)} docs")
797
+
798
+ return {
799
+ 'context': context,
800
+ 'metadata': all_metadata,
801
+ 'total_documents': len(all_metadata),
802
+ 'total_before_filter': total_before_filter,
803
+ 'calculation': calc_result,
804
+ 'parsed': parsed,
805
+ 'sources': {m.get('doc_id'): m.get('document_title') for m in all_metadata}
806
+ }
807
+
808
+ def _stream_metadata_query(self, user_id: str, bucket_id: str,
809
+ query: str, parsed: dict, chat_id: str = ""):
810
+ """
811
+ Stream responses for AI-parsed metadata queries.
812
+ Uses intelligent filtering, sorting, and calculations based on AI-extracted parameters.
813
+
814
+ This is the new AI-powered streaming handler that replaces pattern-based routing.
815
+
816
+ Args:
817
+ user_id: User ID
818
+ bucket_id: Bucket ID
819
+ query: Original query text
820
+ parsed: AI-parsed parameters with intent, filters, sort, limit, etc.
821
+ chat_id: Chat session ID for conversation storage
822
+ """
823
+ print(f"[METADATA STREAM] Handling AI-parsed query: intent={parsed.get('intent')}")
824
+
825
+ # Step 1: Get filtered, sorted, and calculated metadata using AI-parsed parameters
826
+ result = self._handle_metadata_query(user_id, bucket_id, query, parsed)
827
+
828
+ context = result.get('context', '')
829
+ sources = result.get('sources', {})
830
+ total_docs = result.get('total_documents', 0)
831
+ total_before = result.get('total_before_filter', 0)
832
+ calculation = result.get('calculation')
833
+
834
+ # Check if we have any data
835
+ if not context or total_docs == 0:
836
+ yield {
837
+ "type": "error",
838
+ "content": "No document metadata found. Please run the migration script to extract metadata from your documents."
839
+ }
840
+ return
841
+
842
+ # Send sources first
843
+ yield {
844
+ "type": "sources",
845
+ "sources": list(sources.keys()),
846
+ "source_files": list(sources.values())
847
+ }
848
+
849
+ # Step 2: Build AI prompt based on parsed intent
850
+ intent = parsed.get('intent', 'list')
851
+
852
+ if intent == 'count':
853
+ system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COUNT query.
854
+
855
+ CRITICAL INSTRUCTIONS:
856
+ 1. The count has been computed: {total_docs} documents match the criteria.
857
+ 2. State the count clearly and directly.
858
+ 3. If filters were applied, mention what was filtered.
859
+ 4. Brief context about what was counted is helpful."""
860
+
861
+ elif intent == 'calculate':
862
+ calc_info = ""
863
+ if calculation:
864
+ calc_info = f"\nPre-computed: {calculation.get('type').title()} of {calculation.get('field')} = ₹{calculation.get('value', 0):,.2f}"
865
+ system_prompt = f"""You are Iribl AI, a document analysis assistant performing CALCULATIONS across {total_docs} documents.
866
+
867
+ CRITICAL INSTRUCTIONS:
868
+ 1. The calculation results have been computed from {total_docs} documents.{calc_info}
869
+ 2. Present the numbers clearly with proper formatting (₹ for currency, commas for thousands).
870
+ 3. Explain what the numbers mean in business context.
871
+ 4. Include document counts to show the calculation scope.
872
+
873
+ Present the data accurately - these are pre-computed from actual document metadata."""
874
+
875
+ elif intent == 'rank':
876
+ limit = parsed.get('limit', total_docs)
877
+ sort_by = parsed.get('sort_by', 'premium_amount')
878
+ sort_order = parsed.get('sort_order', 'desc')
879
+ system_prompt = f"""You are Iribl AI, a document analysis assistant answering a RANKING query.
880
+
881
+ CRITICAL INSTRUCTIONS:
882
+ 1. You have been given the top {limit} documents sorted by {sort_by} ({sort_order}).
883
+ 2. Present them as a clear ranked list with the ranking number.
884
+ 3. Highlight the key metric ({sort_by}) for each item.
885
+ 4. Format nicely with headers, bold for values, and bullet points.
886
+ 5. Include all {limit} items - do not truncate."""
887
+
888
+ elif intent == 'compare':
889
+ system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COMPARISON query.
890
+
891
+ CRITICAL INSTRUCTIONS:
892
+ 1. You have metadata for {total_docs} relevant documents.
893
+ 2. Create a clear comparison highlighting differences and similarities.
894
+ 3. Use tables or side-by-side format where helpful.
895
+ 4. Focus on the key metrics mentioned in the query.
896
+ 5. Be thorough but organized."""
897
+
898
+ else: # list, summarize, or other
899
+ system_prompt = f"""You are Iribl AI, a document analysis assistant. You are answering a query that requires information from {total_docs} documents.
900
+
901
+ CRITICAL INSTRUCTIONS:
902
+ 1. You have been given metadata for {total_docs} documents (from {total_before} total).
903
+ 2. Your answer must be COMPREHENSIVE - include ALL relevant items from the data provided.
904
+ 3. Format your response clearly with headers, bullet points, and bold text.
905
+ 4. For "list" queries, actually list ALL matching items with key details.
906
+ 5. Organize information logically (by type, by company, by date, etc.).
907
+ 6. For "summarize" queries, provide a concise overview with key statistics.
908
+
909
+ Do NOT say information is missing - you have the filtered list. Do NOT ask for more documents."""
910
+
911
+ # Step 3: Build messages
912
+ messages = [{"role": "system", "content": system_prompt}]
913
+
914
+ user_message = f"""Based on the following document metadata and any calculations, answer my question.
915
+
916
+ DOCUMENT DATA:
917
+ {context}
918
+
919
+ QUESTION: {query}
920
+
921
+ Instructions: Provide a complete, well-formatted answer based on ALL the data above."""
922
+
923
+ messages.append({"role": "user", "content": user_message})
924
+
925
+ # Step 4: Stream response using DeepSeek or fallback
926
+ full_response = ""
927
+ chunk_count = 0
928
+
929
+ if self.use_deepseek:
930
+ print("[METADATA STREAM] Using DeepSeek for response")
931
+ for chunk in self._call_deepseek_streaming(messages):
932
+ if "error" in chunk:
933
+ print(f"[METADATA STREAM] DeepSeek failed, falling back...")
934
+ break
935
+ if "chunk" in chunk:
936
+ full_response += chunk["chunk"]
937
+ chunk_count += 1
938
+ if chunk_count <= 3:
939
+ print(f"[METADATA YIELD] Chunk {chunk_count}: {chunk['chunk'][:30]}...")
940
+ yield {"type": "content", "content": chunk["chunk"]}
941
+
942
+ print(f"[METADATA STREAM] DeepSeek streaming done, yielded {chunk_count} chunks")
943
+
944
+ # Fallback to OpenRouter if DeepSeek failed or not available
945
+ if not full_response:
946
+ print("[METADATA STREAM] Falling back to OpenRouter")
947
+ for model_key in self.fallback_order:
948
+ try:
949
+ for chunk in self._call_ai_model_streaming(model_key, messages):
950
+ if "error" in chunk:
951
+ continue
952
+ if "chunk" in chunk:
953
+ full_response += chunk["chunk"]
954
+ chunk_count += 1
955
+ yield {"type": "content", "content": chunk["chunk"]}
956
+ if full_response:
957
+ break
958
+ except Exception as e:
959
+ print(f"[METADATA STREAM] Model {model_key} failed: {e}")
960
+ continue
961
+
962
+ # Step 5: Store conversation
963
+ if full_response and chat_id:
964
+ try:
965
+ chroma_service.store_conversation(
966
+ user_id=user_id,
967
+ role="user",
968
+ content=query,
969
+ bucket_id=bucket_id or "",
970
+ chat_id=chat_id
971
+ )
972
+ chroma_service.store_conversation(
973
+ user_id=user_id,
974
+ role="assistant",
975
+ content=full_response,
976
+ bucket_id=bucket_id or "",
977
+ chat_id=chat_id
978
+ )
979
+ except Exception as e:
980
+ print(f"[METADATA STREAM] Failed to store conversation: {e}")
981
+
982
+ # Send done signal with metadata about the query
983
+ yield {
984
+ "type": "done",
985
+ "query_type": "metadata",
986
+ "intent": parsed.get('intent'),
987
+ "total_documents": total_docs,
988
+ "total_before_filter": total_before
989
+ }
990
+
991
+ def _stream_specialized_query(self, user_id: str, bucket_id: str,
992
+ query: str, query_type: str, chat_id: str = ""):
993
+ """
994
+ Stream responses for specialized queries (aggregate, calculation, date_filter).
995
+ Uses metadata/summaries instead of top-K chunk retrieval.
996
+
997
+ This preserves the existing flow for specific/comparison/general queries.
998
+ """
999
+ import time
1000
+ print(f"[SPECIALIZED QUERY] Handling {query_type} query")
1001
+
1002
+ # Step 1: Get context from appropriate handler
1003
+ if query_type == 'aggregate':
1004
+ result = self._handle_aggregate_query(user_id, bucket_id, query)
1005
+ elif query_type == 'calculation':
1006
+ result = self._handle_calculation_query(user_id, bucket_id, query)
1007
+ elif query_type == 'date_filter':
1008
+ result = self._handle_date_filter_query(user_id, bucket_id, query)
1009
+ else:
1010
+ yield {"type": "error", "content": f"Unknown query type: {query_type}"}
1011
+ return
1012
+
1013
+ context = result.get('context', '')
1014
+ sources = result.get('sources', {})
1015
+ total_docs = result.get('total_documents', result.get('total_matches', 0))
1016
+
1017
+ # Check if we have any data
1018
+ if not context or total_docs == 0:
1019
+ yield {
1020
+ "type": "error",
1021
+ "content": "No document metadata found. Please run the migration script to extract metadata from your documents."
1022
+ }
1023
+ return
1024
+
1025
+ # Send sources first
1026
+ yield {
1027
+ "type": "sources",
1028
+ "sources": list(sources.keys()),
1029
+ "source_files": list(sources.values())
1030
+ }
1031
+
1032
+ # Step 2: Build AI prompt for specialized query
1033
+ if query_type == 'aggregate':
1034
+ system_prompt = f"""You are Iribl AI, a document analysis assistant. You are answering an AGGREGATE query that requires information from ALL {total_docs} documents.
1035
+
1036
+ CRITICAL INSTRUCTIONS:
1037
+ 1. You have been given metadata and summaries for ALL {total_docs} documents in the bucket.
1038
+ 2. Your answer must be COMPREHENSIVE - include ALL relevant items from the data provided.
1039
+ 3. Format your response clearly with headers, bullet points, and bold text.
1040
+ 4. For "list all" queries, actually list ALL matching items.
1041
+ 5. For "how many" queries, give exact counts.
1042
+ 6. Organize information logically (by type, by company, by date, etc.).
1043
+
1044
+ Do NOT say information is missing - you have the full list. Do NOT ask for more documents."""
1045
+
1046
+ elif query_type == 'calculation':
1047
+ system_prompt = f"""You are Iribl AI, a document analysis assistant performing CALCULATIONS across {total_docs} documents.
1048
+
1049
+ CRITICAL INSTRUCTIONS:
1050
+ 1. The calculation results have already been computed from all documents.
1051
+ 2. Present the numbers clearly with proper formatting (₹ for currency, commas for thousands).
1052
+ 3. Explain what the numbers mean in business context.
1053
+ 4. If asked for totals, provide grand totals.
1054
+ 5. If asked for averages, provide averages with context.
1055
+ 6. Include document counts to show the calculation scope.
1056
+
1057
+ Present the data accurately - these are pre-computed from actual document metadata."""
1058
+
1059
+ elif query_type == 'date_filter':
1060
+ total_matches = result.get('total_matches', 0)
1061
+ target_year = result.get('target_year', 'N/A')
1062
+ system_prompt = f"""You are Iribl AI, a document analysis assistant answering a DATE-BASED query.
1063
+
1064
+ CRITICAL INSTRUCTIONS:
1065
+ 1. You have been given {total_matches} policies matching the date criteria (year {target_year}).
1066
+ 2. List ALL matching policies with their relevant dates.
1067
+ 3. Format the response as a clear list with key details.
1068
+ 4. If no matches found, say so explicitly.
1069
+ 5. Include date-relevant details: start date, end date, renewal date.
1070
+
1071
+ Present ALL matching documents - do not truncate the list."""
1072
+
1073
+ # Step 3: Build messages
1074
+ messages = [{"role": "system", "content": system_prompt}]
1075
+
1076
+ # Add context and query
1077
+ user_message = f"""Based on the following document metadata and calculations, answer my question.
1078
+
1079
+ DOCUMENT DATA:
1080
+ {context}
1081
+
1082
+ QUESTION: {query}
1083
+
1084
+ Instructions: Provide a complete, well-formatted answer based on ALL the data above."""
1085
+
1086
+ messages.append({"role": "user", "content": user_message})
1087
+
1088
+ # Step 4: Stream response using DeepSeek or fallback
1089
+ full_response = ""
1090
+ chunk_count = 0
1091
+
1092
+ if self.use_deepseek:
1093
+ print("[SPECIALIZED QUERY] Using DeepSeek for response")
1094
+ for chunk in self._call_deepseek_streaming(messages):
1095
+ if "error" in chunk:
1096
+ # Fallback to OpenRouter
1097
+ print(f"[SPECIALIZED QUERY] DeepSeek failed, falling back...")
1098
+ break
1099
+ if "chunk" in chunk:
1100
+ full_response += chunk["chunk"]
1101
+ chunk_count += 1
1102
+ if chunk_count <= 3:
1103
+ print(f"[SPECIALIZED YIELD] Chunk {chunk_count}: {chunk['chunk'][:30]}...")
1104
+ yield {"type": "content", "content": chunk["chunk"]}
1105
+
1106
+ print(f"[SPECIALIZED QUERY] DeepSeek streaming done, yielded {chunk_count} chunks")
1107
+
1108
+ # Fallback to OpenRouter if DeepSeek failed or not available
1109
+ if not full_response:
1110
+ print("[SPECIALIZED QUERY] Falling back to OpenRouter")
1111
+ for model_key in self.fallback_order:
1112
+ try:
1113
+ for chunk in self._call_ai_model_streaming(model_key, messages):
1114
+ if "error" in chunk:
1115
+ continue
1116
+ if "chunk" in chunk:
1117
+ full_response += chunk["chunk"]
1118
+ chunk_count += 1
1119
+ yield {"type": "content", "content": chunk["chunk"]}
1120
+ if full_response:
1121
+ break
1122
+ except Exception as e:
1123
+ print(f"[SPECIALIZED QUERY] Model {model_key} failed: {e}")
1124
+ continue
1125
+
1126
+ # Step 5: Store conversation
1127
+ if full_response and chat_id:
1128
+ try:
1129
+ chroma_service.store_conversation(
1130
+ user_id=user_id,
1131
+ role="user",
1132
+ content=query,
1133
+ bucket_id=bucket_id or "",
1134
+ chat_id=chat_id
1135
+ )
1136
+ chroma_service.store_conversation(
1137
+ user_id=user_id,
1138
+ role="assistant",
1139
+ content=full_response,
1140
+ bucket_id=bucket_id or "",
1141
+ chat_id=chat_id
1142
+ )
1143
+ except Exception as e:
1144
+ print(f"[SPECIALIZED QUERY] Failed to store conversation: {e}")
1145
+
1146
+ # Send done signal
1147
+ yield {"type": "done", "query_type": query_type, "total_documents": total_docs}
1148
+
1149
+ def _build_conversation_context(self, history: list[dict], query: str) -> str:
1150
+ """
1151
+ Build a context summary from conversation history for pronoun resolution.
1152
+ Extracts key entities and topics from recent messages.
1153
+ """
1154
+ if not history:
1155
+ return ""
1156
+
1157
+ # Get last 4 messages (2 Q&A pairs)
1158
+ recent = history[-4:] if len(history) >= 4 else history
1159
+
1160
+ context_parts = []
1161
+ for msg in recent:
1162
+ role = msg.get('role', 'user')
1163
+ content = msg.get('content', '')[:500] # Truncate long messages
1164
+
1165
+ if role == 'user':
1166
+ context_parts.append(f"User asked: {content}")
1167
+ else:
1168
+ # Extract key info from assistant response (first 300 chars)
1169
+ context_parts.append(f"Assistant answered about: {content[:300]}...")
1170
+
1171
+ if context_parts:
1172
+ return "\n".join(context_parts)
1173
+ return ""
1174
+
1175
+ def _build_graph_context(self, chunks: list[dict],
1176
+ user_id: str) -> list[dict]:
1177
+ """
1178
+ Build graph-based context from retrieved chunks
1179
+ Expands context by including related chunks and document metadata
1180
+ """
1181
+ enhanced_chunks = []
1182
+ seen_docs = set()
1183
+
1184
+ for chunk in chunks:
1185
+ doc_id = chunk['doc_id']
1186
+
1187
+ # Get document metadata if not seen
1188
+ if doc_id not in seen_docs:
1189
+ seen_docs.add(doc_id)
1190
+
1191
+ # Get adjacent chunks for context
1192
+ all_doc_chunks = chroma_service.get_document_chunks(doc_id)
1193
+
1194
+ # Find current chunk index
1195
+ chunk_id = chunk['chunk_id']
1196
+ current_idx = None
1197
+
1198
+ for i, c in enumerate(all_doc_chunks):
1199
+ if c['chunk_id'] == chunk_id:
1200
+ current_idx = i
1201
+ break
1202
+
1203
+ # Include surrounding chunks for graph context
1204
+ if current_idx is not None:
1205
+ start_idx = max(0, current_idx - 1)
1206
+ end_idx = min(len(all_doc_chunks), current_idx + 2)
1207
+
1208
+ for i in range(start_idx, end_idx):
1209
+ if all_doc_chunks[i]['chunk_id'] != chunk_id:
1210
+ enhanced_chunks.append({
1211
+ **all_doc_chunks[i],
1212
+ 'doc_id': doc_id,
1213
+ 'is_context': True
1214
+ })
1215
+
1216
+ enhanced_chunks.append({**chunk, 'is_context': False})
1217
+
1218
+ return enhanced_chunks
1219
+
1220
+ def _call_ai_model(self, model_key: str, messages: list[dict]) -> dict:
1221
+ """Call AI model via OpenRouter"""
1222
+ model_id = self.model_map.get(model_key)
1223
+ if not model_id:
1224
+ return {"success": False, "error": f"Unknown model: {model_key}"}
1225
+
1226
+ headers = {
1227
+ "Authorization": f"Bearer {self.api_key}",
1228
+ "Content-Type": "application/json",
1229
+ "HTTP-Referer": "http://localhost:5000",
1230
+ "X-Title": "NotebookLM Clone"
1231
+ }
1232
+
1233
+ payload = {
1234
+ "model": model_id,
1235
+ "messages": messages,
1236
+ "max_tokens": self.max_tokens,
1237
+ "temperature": self.temperature
1238
+ }
1239
+
1240
+ try:
1241
+ response = requests.post(
1242
+ f"{self.base_url}/chat/completions",
1243
+ headers=headers,
1244
+ json=payload,
1245
+ timeout=self.timeout
1246
+ )
1247
+
1248
+ if response.status_code == 200:
1249
+ data = response.json()
1250
+ text = data['choices'][0]['message']['content']
1251
+ return {"success": True, "response": text, "model": model_key}
1252
+ else:
1253
+ return {
1254
+ "success": False,
1255
+ "error": f"API error: {response.status_code}"
1256
+ }
1257
+ except Exception as e:
1258
+ return {"success": False, "error": str(e)}
1259
+
1260
+ def _call_ai_model_streaming(self, model_key: str, messages: list[dict]):
1261
+ """Call AI model with streaming - yields text chunks as they arrive"""
1262
+ model_id = self.model_map.get(model_key)
1263
+ if not model_id:
1264
+ yield {"error": f"Unknown model: {model_key}"}
1265
+ return
1266
+
1267
+ headers = {
1268
+ "Authorization": f"Bearer {self.api_key}",
1269
+ "Content-Type": "application/json",
1270
+ "HTTP-Referer": "http://localhost:5000",
1271
+ "X-Title": "NotebookLM Clone"
1272
+ }
1273
+
1274
+ payload = {
1275
+ "model": model_id,
1276
+ "messages": messages,
1277
+ "max_tokens": self.max_tokens,
1278
+ "temperature": self.temperature,
1279
+ "stream": True
1280
+ }
1281
+
1282
+ try:
1283
+ response = requests.post(
1284
+ f"{self.base_url}/chat/completions",
1285
+ headers=headers,
1286
+ json=payload,
1287
+ timeout=self.timeout,
1288
+ stream=True
1289
+ )
1290
+
1291
+ if response.status_code == 200:
1292
+ for line in response.iter_lines():
1293
+ if line:
1294
+ line_text = line.decode('utf-8')
1295
+ if line_text.startswith('data: '):
1296
+ data_str = line_text[6:]
1297
+ if data_str.strip() == '[DONE]':
1298
+ break
1299
+ try:
1300
+ import json
1301
+ data = json.loads(data_str)
1302
+ delta = data.get('choices', [{}])[0].get('delta', {})
1303
+ content = delta.get('content', '')
1304
+ if content:
1305
+ yield {"chunk": content, "model": model_key}
1306
+ except:
1307
+ pass
1308
+ else:
1309
+ yield {"error": f"API error: {response.status_code}"}
1310
+ except Exception as e:
1311
+ yield {"error": str(e)}
1312
+
1313
+ def _call_deepseek_streaming(self, messages: list[dict]):
1314
+ """Call DeepSeek API with streaming - highly capable model"""
1315
+ if not self.deepseek_api_key:
1316
+ print("[DEEPSEEK] No API key configured")
1317
+ yield {"error": "DeepSeek API key not configured"}
1318
+ return
1319
+
1320
+ print(f"[DEEPSEEK] Calling model: {self.deepseek_model}")
1321
+
1322
+ headers = {
1323
+ "Authorization": f"Bearer {self.deepseek_api_key}",
1324
+ "Content-Type": "application/json"
1325
+ }
1326
+
1327
+ payload = {
1328
+ "model": self.deepseek_model,
1329
+ "messages": messages,
1330
+ "max_tokens": self.max_tokens,
1331
+ "temperature": self.temperature,
1332
+ "stream": True
1333
+ }
1334
+
1335
+ try:
1336
+ import time
1337
+ start = time.time()
1338
+ response = requests.post(
1339
+ f"{self.deepseek_base_url}/chat/completions",
1340
+ headers=headers,
1341
+ json=payload,
1342
+ timeout=60, # DeepSeek may need more time for complex queries
1343
+ stream=True
1344
+ )
1345
+
1346
+ print(f"[DEEPSEEK] Response status: {response.status_code} in {time.time()-start:.2f}s")
1347
+
1348
+ if response.status_code == 200:
1349
+ chunk_count = 0
1350
+ for line in response.iter_lines():
1351
+ if line:
1352
+ line_text = line.decode('utf-8')
1353
+ if line_text.startswith('data: '):
1354
+ data_str = line_text[6:]
1355
+ if data_str.strip() == '[DONE]':
1356
+ print(f"[DEEPSEEK] Streaming complete, yielded {chunk_count} chunks")
1357
+ break
1358
+ try:
1359
+ import json
1360
+ data = json.loads(data_str)
1361
+ delta = data.get('choices', [{}])[0].get('delta', {})
1362
+ content = delta.get('content', '')
1363
+ if content:
1364
+ chunk_count += 1
1365
+ if chunk_count <= 3:
1366
+ print(f"[DEEPSEEK] Chunk {chunk_count}: {content[:50]}...")
1367
+ yield {"chunk": content, "model": "deepseek"}
1368
+ except Exception as parse_error:
1369
+ print(f"[DEEPSEEK] Parse error: {parse_error}")
1370
+ pass
1371
+ if chunk_count == 0:
1372
+ print(f"[DEEPSEEK] WARNING: No chunks received from stream")
1373
+ else:
1374
+ print(f"[DEEPSEEK] Error: {response.text[:200]}")
1375
+ yield {"error": f"DeepSeek API error: {response.status_code}"}
1376
+ except Exception as e:
1377
+ print(f"[DEEPSEEK] Exception: {e}")
1378
+ yield {"error": str(e)}
1379
+
1380
+ def query(self, user_id: str, query: str,
1381
+ doc_ids: list[str] = None,
1382
+ bucket_id: str = None,
1383
+ conversation_history: list[dict] = None) -> dict:
1384
+ """
1385
+ Process a RAG query:
1386
+ 1. Search for relevant chunks (optionally filtered by bucket)
1387
+ 2. Filter by relevance threshold
1388
+ 3. Build graph context
1389
+ 4. Load persistent conversation memory
1390
+ 5. Generate AI response
1391
+ 6. Store conversation in memory
1392
+ """
1393
+ # Step 1: Retrieve relevant chunks
1394
+ chunks = chroma_service.search_chunks(
1395
+ user_id=user_id,
1396
+ query=query,
1397
+ doc_ids=doc_ids,
1398
+ bucket_id=bucket_id,
1399
+ top_k=self.top_k
1400
+ )
1401
+
1402
+ # Step 2: Filter chunks by relevance threshold (lower distance = more relevant)
1403
+ # If threshold filters everything, use original chunks
1404
+ relevant_chunks = [
1405
+ chunk for chunk in chunks
1406
+ if chunk.get('distance', 0) < self.relevance_threshold
1407
+ ]
1408
+
1409
+ # Fallback: if threshold is too strict, use top chunks anyway
1410
+ if not relevant_chunks and chunks:
1411
+ relevant_chunks = chunks[:5] # Use top 5 most relevant
1412
+
1413
+ if not relevant_chunks:
1414
+ # Store user question even if no answer
1415
+ chroma_service.store_conversation(
1416
+ user_id=user_id,
1417
+ role="user",
1418
+ content=query,
1419
+ bucket_id=bucket_id or ""
1420
+ )
1421
+ no_info_response = "I don't have any relevant information in your documents to answer this question. Please upload some documents first or ask about a topic covered in your uploaded documents."
1422
+ chroma_service.store_conversation(
1423
+ user_id=user_id,
1424
+ role="assistant",
1425
+ content=no_info_response,
1426
+ bucket_id=bucket_id or ""
1427
+ )
1428
+ return {
1429
+ "success": True,
1430
+ "response": no_info_response,
1431
+ "sources": []
1432
+ }
1433
+
1434
+ # Step 3: Skip graph expansion for speed - use chunks directly
1435
+ enhanced_chunks = [{'doc_id': c['doc_id'], 'text': c['text'], 'is_context': False} for c in relevant_chunks]
1436
+
1437
+ # Step 4: Prepare context for AI with document sources
1438
+ context_parts = []
1439
+ sources = {} # doc_id -> filename mapping
1440
+
1441
+ for chunk in enhanced_chunks:
1442
+ doc_id = chunk['doc_id']
1443
+ # Get document filename for source attribution
1444
+ if doc_id not in sources:
1445
+ doc_info = chroma_service.get_document(doc_id, user_id)
1446
+ sources[doc_id] = doc_info['filename'] if doc_info else doc_id
1447
+
1448
+ # Include source in context for better attribution
1449
+ source_label = f"[Source: {sources[doc_id]}]"
1450
+ context_parts.append(f"{source_label}\n{chunk['text']}")
1451
+
1452
+ context = "\n\n---\n\n".join(context_parts)
1453
+
1454
+ # Step 5: Build messages with cross-document intelligence prompt
1455
+ system_prompt = """You are Iribl AI, a document analysis assistant. You MUST follow these rules strictly:
1456
+
1457
+ **CROSS-DOCUMENT INTELLIGENCE (CRITICAL):**
1458
+ 1. SYNTHESIZE information from ALL relevant document sections
1459
+ 2. If documents have CONFLICTING information, state both clearly
1460
+ 3. Never confuse or mix up information between different documents
1461
+
1462
+ **ACCURACY RULES:**
1463
+ 1. ONLY answer using information from the DOCUMENT CONTEXT provided below
1464
+ 2. NEVER use external knowledge, training data, or make assumptions
1465
+ 3. If the answer is NOT in the documents, say: "This information is not found in your documents."
1466
+
1467
+ **FORMATTING:**
1468
+ - Use **bold** for key terms and important values
1469
+ - Use headers (##) for multi-topic answers
1470
+ - Use bullet points with each item on its own line
1471
+ - For tables, use proper markdown: | col | col | with |---| separator
1472
+
1473
+ **RESPONSE LENGTH:**
1474
+ - Simple questions: 2-4 sentences
1475
+ - Lists: Complete list from ALL documents
1476
+ - Analysis: Structured response with headers
1477
+
1478
+ **IMPORTANT: Do NOT list document names or sources at the end of your response.**
1479
+
1480
+ You are answering questions about the user's uploaded documents ONLY."""
1481
+
1482
+ messages = [{"role": "system", "content": system_prompt}]
1483
+
1484
+ # Step 6: Load minimal conversation history for speed
1485
+ stored_history = chroma_service.get_conversation_history(
1486
+ user_id=user_id,
1487
+ bucket_id=bucket_id,
1488
+ limit=self.max_history
1489
+ )
1490
+
1491
+ # Add only last 4 messages for context (speed optimized)
1492
+ for msg in stored_history[-4:]:
1493
+ messages.append({
1494
+ "role": msg['role'],
1495
+ "content": msg['content']
1496
+ })
1497
+
1498
+ # Also add any session-based conversation history if provided
1499
+ if conversation_history:
1500
+ for msg in conversation_history[-6:]:
1501
+ # Avoid duplicates
1502
+ if msg not in messages:
1503
+ messages.append(msg)
1504
+
1505
+ # Add current query with context
1506
+ user_message = f"""Based on the following document sections, answer my question accurately.
1507
+
1508
+ DOCUMENT SECTIONS:
1509
+ {context}
1510
+
1511
+ QUESTION: {query}
1512
+
1513
+ Instructions: Synthesize from multiple documents if relevant. Be detailed but concise. Do NOT mention document names or sources at the end."""
1514
+
1515
+ messages.append({"role": "user", "content": user_message})
1516
+
1517
+ # Step 7: Generate response with fallback
1518
+ for model_key in self.fallback_order:
1519
+ result = self._call_ai_model(model_key, messages)
1520
+
1521
+ if result['success']:
1522
+ # Step 8: Store conversation in persistent memory
1523
+ chroma_service.store_conversation(
1524
+ user_id=user_id,
1525
+ role="user",
1526
+ content=query,
1527
+ bucket_id=bucket_id or ""
1528
+ )
1529
+ chroma_service.store_conversation(
1530
+ user_id=user_id,
1531
+ role="assistant",
1532
+ content=result['response'],
1533
+ bucket_id=bucket_id or ""
1534
+ )
1535
+
1536
+ return {
1537
+ "success": True,
1538
+ "response": result['response'],
1539
+ "model": result['model'],
1540
+ "sources": list(sources.keys()),
1541
+ "source_files": list(sources.values()),
1542
+ "chunks_used": len(enhanced_chunks),
1543
+ "chunks_filtered": len(chunks) - len(relevant_chunks)
1544
+ }
1545
+
1546
+ return {
1547
+ "success": False,
1548
+ "error": "All AI models failed to generate a response"
1549
+ }
1550
+
1551
+ def query_stream(self, user_id: str, query: str,
1552
+ doc_ids: list[str] = None,
1553
+ bucket_id: str = None,
1554
+ chat_id: str = ""):
1555
+ """
1556
+ Streaming version of query - yields response chunks as they arrive.
1557
+ Returns generator for SSE streaming.
1558
+
1559
+ ENHANCED: Now uses AI-powered query parsing to understand intent, filters, sorting, and limits.
1560
+ Routes to metadata handler for aggregate queries, regular RAG for specific document queries.
1561
+ """
1562
+ import time
1563
+
1564
+ # Step 0: AI-powered query parsing - understand intent and extract structured parameters
1565
+ parsed = self._parse_query_with_ai(query)
1566
+ print(f"[QUERY ROUTING] AI-parsed query: {parsed}")
1567
+
1568
+ # Route based on AI-parsed intent
1569
+ # needs_metadata = True means query requires aggregate data across all documents
1570
+ if parsed.get('needs_metadata', False):
1571
+ yield from self._stream_metadata_query(user_id, bucket_id, query, parsed, chat_id)
1572
+ return
1573
+
1574
+ # For all other query types (specific, comparison, followup, general),
1575
+ # continue with existing top-K chunk retrieval logic
1576
+
1577
+ # Step 1: Expand query for better retrieval (handles "module 5" -> "module five", etc.)
1578
+ expanded_queries = self._expand_query(query)
1579
+ print(f"[DEBUG] Query expansion: {expanded_queries}")
1580
+
1581
+ # Step 1.5: Detect if user is asking about a specific document by name
1582
+ user_docs = chroma_service.get_user_documents(user_id, bucket_id)
1583
+ referenced_doc_ids = self._detect_document_reference(query, user_docs)
1584
+ if referenced_doc_ids:
1585
+ print(f"[DEBUG] Detected document reference in query: {referenced_doc_ids}")
1586
+ # If user mentioned specific docs, prioritize those but also include others
1587
+ if doc_ids is None:
1588
+ doc_ids = referenced_doc_ids
1589
+
1590
+
1591
+ # Step 2: Retrieve chunks using all query variations and merge unique results
1592
+ t1 = time.time()
1593
+ all_chunks = []
1594
+ seen_chunk_ids = set()
1595
+
1596
+ for q in expanded_queries:
1597
+ chunks = chroma_service.search_chunks(
1598
+ user_id=user_id,
1599
+ query=q,
1600
+ doc_ids=doc_ids,
1601
+ bucket_id=bucket_id,
1602
+ top_k=self.top_k
1603
+ )
1604
+ for chunk in chunks:
1605
+ chunk_id = chunk.get('chunk_id', chunk['text'][:50])
1606
+ if chunk_id not in seen_chunk_ids:
1607
+ seen_chunk_ids.add(chunk_id)
1608
+ all_chunks.append(chunk)
1609
+
1610
+ # Sort by relevance (distance) and limit
1611
+ all_chunks.sort(key=lambda x: x.get('distance', 0))
1612
+ chunks = all_chunks[:self.top_k]
1613
+
1614
+ print(f"[TIMING] ChromaDB search with expansion: {time.time()-t1:.2f}s")
1615
+
1616
+ # Debug: Show what chunks we're getting
1617
+ print(f"[DEBUG] Retrieved {len(chunks)} unique chunks from {len(expanded_queries)} queries:")
1618
+ for i, c in enumerate(chunks[:5]): # Show first 5
1619
+ print(f" Chunk {i+1} (dist={c.get('distance', 0):.3f}): {c['text'][:100]}...")
1620
+
1621
+ # Step 3: Use ALL retrieved chunks - do not filter aggressively
1622
+ # For 64+ documents, we need comprehensive coverage
1623
+ relevant_chunks = chunks # Use all retrieved chunks
1624
+
1625
+ # Only apply minimal filtering if we have way too many chunks
1626
+ if len(relevant_chunks) > 100:
1627
+ # Keep only chunks with reasonable similarity
1628
+ relevant_chunks = [c for c in chunks if c.get('distance', 0) < self.relevance_threshold]
1629
+ if not relevant_chunks:
1630
+ relevant_chunks = chunks[:80] # Fallback to top 80
1631
+
1632
+ if not relevant_chunks:
1633
+ yield {"type": "error", "content": "No relevant documents found. Please upload documents first."}
1634
+ return
1635
+
1636
+ # Step 4: Build context with prominent document source labels for cross-document intelligence
1637
+ t2 = time.time()
1638
+ context_parts = []
1639
+ sources = {}
1640
+
1641
+ for i, chunk in enumerate(relevant_chunks, 1):
1642
+ doc_id = chunk['doc_id']
1643
+ filename = chunk.get('filename', 'Document')
1644
+
1645
+ # Get filename from chroma if not in chunk
1646
+ if filename == 'Document':
1647
+ doc_info = chroma_service.get_document(doc_id, user_id)
1648
+ if doc_info:
1649
+ filename = doc_info.get('filename', 'Document')
1650
+
1651
+ sources[doc_id] = filename
1652
+ # Add prominent document source label with chunk number for cross-document intelligence
1653
+ section = f"=== DOCUMENT: {filename} (Section {i}) ===\n{chunk['text']}"
1654
+ context_parts.append(section)
1655
+
1656
+ context = "\n\n" + "\n\n".join(context_parts)
1657
+ print(f"[TIMING] Context build: {time.time()-t2:.2f}s")
1658
+ print(f"[DEBUG] Context length: {len(context)} chars, chunks: {len(relevant_chunks)}")
1659
+
1660
+ # Send sources first
1661
+ yield {"type": "sources", "sources": list(sources.keys()), "source_files": list(sources.values())}
1662
+
1663
+ # Step 5: Load conversation history for this chat (CRITICAL FOR MEMORY)
1664
+ stored_history = []
1665
+ if chat_id:
1666
+ try:
1667
+ all_history = chroma_service.get_conversation_history(
1668
+ user_id=user_id,
1669
+ bucket_id=bucket_id,
1670
+ limit=50 # Get more, filter by chat_id
1671
+ )
1672
+ # Filter to only this chat's messages
1673
+ stored_history = [msg for msg in all_history
1674
+ if msg.get('chat_id', '') == chat_id or
1675
+ (not msg.get('chat_id') and msg.get('bucket_id', '') == (bucket_id or ''))]
1676
+ stored_history = stored_history[-self.max_history:]
1677
+ print(f"[DEBUG] Loaded {len(stored_history)} history messages for chat {chat_id}")
1678
+ except Exception as e:
1679
+ print(f"[DEBUG] Failed to load history: {e}")
1680
+
1681
+ # Step 6: Detect query type and build conversation context
1682
+ query_type = self._detect_query_type(query, stored_history)
1683
+ conversation_context = self._build_conversation_context(stored_history, query)
1684
+ print(f"[DEBUG] Query type: {query_type}, has conversation context: {bool(conversation_context)}")
1685
+
1686
+ # Get list of documents in bucket for cross-document queries
1687
+ doc_list = ""
1688
+ if query_type in ['cross_document', 'comparison']:
1689
+ doc_names = list(sources.values())
1690
+ if doc_names:
1691
+ doc_list = f"\n\nDOCUMENTS IN THIS BUCKET: {', '.join(set(doc_names))}"
1692
+
1693
+ # Step 7: Build messages with PRODUCTION-GRADE conversational prompt
1694
+ system_prompt = """You are Iribl AI, a smart document assistant. Be conversational, precise, and THOROUGH.
1695
+
1696
+ ## FINDING INFORMATION (CRITICAL)
1697
+ 1. Search EVERY document section before saying something isn't there
1698
+ 2. Look for ALL types of values: per-item amounts, TOTALS, AGGREGATES, counts, numbers of people/items
1699
+ 3. Information may be phrased differently - "total sum insured", "aggregate SI", "Sum Insured" could all refer to different values
1700
+ 4. When asked about "total" - look for aggregate/overall amounts, not per-unit amounts
1701
+ 5. When asked "how many" - look for counts, numbers, quantities in the documents
1702
+ 6. NEVER say "not mentioned" unless you've checked every single section and truly cannot find it
1703
+
1704
+ ## RESPONSE QUALITY
1705
+ 1. NEVER start with preambles like "Based on a thorough review..." - just answer directly
1706
+ 2. If user says "it", "this", "that" - refer to previous conversation for context
1707
+ 3. Provide COMPLETE answers - include ALL relevant details, numbers, and figures
1708
+ 4. When numbers exist - mention BOTH per-unit AND total/aggregate if available
1709
+ 5. Format responses clearly with bold, bullets, and structure
1710
+
1711
+ ## ACCURACY RULES
1712
+ 1. Only answer from the documents provided - never use external knowledge
1713
+ 2. When asked about Person A, only give Person A's info - never mix up entities
1714
+ 3. If documents conflict, state both versions
1715
+
1716
+ ## FORMATTING
1717
+ - **Bold** for names, numbers, key terms
1718
+ - Bullet points for lists (comprehensive, include all items)
1719
+ - Tables for comparisons
1720
+ - No document source lists at the end
1721
+
1722
+ When asked about numbers/totals/counts - SEARCH THOROUGHLY and provide ALL relevant figures found in the documents."""
1723
+
1724
+ messages = [{"role": "system", "content": system_prompt}]
1725
+
1726
+ # Add conversation history for memory (CRITICAL for pronoun resolution)
1727
+ for msg in stored_history:
1728
+ messages.append({
1729
+ "role": msg['role'],
1730
+ "content": msg['content']
1731
+ })
1732
+
1733
+ # Build user message with context injection for pronouns
1734
+ context_injection = ""
1735
+ if query_type == 'followup' and conversation_context:
1736
+ context_injection = f"""
1737
+ CONVERSATION CONTEXT (use this to understand pronouns like "it", "this", "that"):
1738
+ {conversation_context}
1739
+
1740
+ """
1741
+
1742
+ user_message = f"""{context_injection}DOCUMENT SECTIONS (search ALL of these thoroughly):
1743
+ {context}{doc_list}
1744
+
1745
+ QUESTION: {query}
1746
+
1747
+ INSTRUCTIONS:
1748
+ - Answer directly and completely
1749
+ - Include ALL relevant numbers, totals, counts, and details from the documents
1750
+ - If this is a follow-up, use conversation history to understand what I'm referring to
1751
+ - For number questions: look for per-unit values, totals, aggregates, and counts - include all that are relevant"""
1752
+
1753
+ messages.append({"role": "user", "content": user_message})
1754
+
1755
+ # Step 6: Stream the response - Try DeepSeek first (highly capable), then OpenRouter
1756
+ full_response = ""
1757
+ model_used = None
1758
+
1759
+ # Try DeepSeek first if available
1760
+ if self.use_deepseek:
1761
+ for chunk_data in self._call_deepseek_streaming(messages):
1762
+ if "error" in chunk_data:
1763
+ break # Fall through to OpenRouter
1764
+ if "chunk" in chunk_data:
1765
+ full_response += chunk_data["chunk"]
1766
+ model_used = chunk_data["model"]
1767
+ yield {"type": "chunk", "content": chunk_data["chunk"]}
1768
+
1769
+ # Fallback to OpenRouter if Groq didn't work
1770
+ if not full_response:
1771
+ for model_key in self.fallback_order:
1772
+ had_response = False
1773
+ for chunk_data in self._call_ai_model_streaming(model_key, messages):
1774
+ if "error" in chunk_data:
1775
+ break
1776
+ if "chunk" in chunk_data:
1777
+ had_response = True
1778
+ full_response += chunk_data["chunk"]
1779
+ model_used = chunk_data["model"]
1780
+ yield {"type": "chunk", "content": chunk_data["chunk"]}
1781
+
1782
+ if had_response:
1783
+ break
1784
+
1785
+ if full_response:
1786
+ # Store conversation with chat_id for proper linking
1787
+ chroma_service.store_conversation(user_id, "user", query, bucket_id or "", chat_id)
1788
+ chroma_service.store_conversation(user_id, "assistant", full_response, bucket_id or "", chat_id)
1789
+ yield {"type": "done", "model": model_used}
1790
+ else:
1791
+ yield {"type": "error", "content": "Failed to generate response"}
1792
+
1793
+ def clear_memory(self, user_id: str, bucket_id: str = None) -> bool:
1794
+ """Clear conversation memory for a user"""
1795
+ return chroma_service.clear_conversation(user_id, bucket_id)
1796
+
1797
+ def generate_summary(self, content: str, filename: str = "") -> dict:
1798
+ """
1799
+ Generate a short summary (2-3 sentences) of the document content.
1800
+ Uses DeepSeek as primary, with OpenRouter fallback.
1801
+ """
1802
+ # Truncate content if too long (use first ~4000 chars for summary)
1803
+ truncated_content = content[:4000] if len(content) > 4000 else content
1804
+
1805
+ summary_prompt = f"""Please provide a concise 2-3 sentence summary of the following document.
1806
+ Focus on the main topic, key points, and purpose of the document.
1807
+ Do not include any preamble like "This document..." - just state the summary directly.
1808
+
1809
+ Document: {filename}
1810
+ Content:
1811
+ {truncated_content}
1812
+
1813
+ Summary:"""
1814
+
1815
+ messages = [
1816
+ {"role": "system", "content": "You are a document summarization assistant. Provide brief, accurate summaries in 2-3 sentences."},
1817
+ {"role": "user", "content": summary_prompt}
1818
+ ]
1819
+
1820
+ # Try DeepSeek first if available
1821
+ if self.use_deepseek:
1822
+ try:
1823
+ import requests
1824
+ headers = {
1825
+ "Authorization": f"Bearer {self.deepseek_api_key}",
1826
+ "Content-Type": "application/json"
1827
+ }
1828
+ payload = {
1829
+ "model": self.deepseek_model,
1830
+ "messages": messages,
1831
+ "max_tokens": 200,
1832
+ "temperature": 0.3
1833
+ }
1834
+ response = requests.post(
1835
+ f"{self.deepseek_base_url}/chat/completions",
1836
+ headers=headers,
1837
+ json=payload,
1838
+ timeout=30
1839
+ )
1840
+ if response.status_code == 200:
1841
+ data = response.json()
1842
+ text = data['choices'][0]['message']['content']
1843
+ return {
1844
+ "success": True,
1845
+ "summary": text.strip(),
1846
+ "model": "deepseek"
1847
+ }
1848
+ except Exception as e:
1849
+ print(f"[DEEPSEEK SUMMARY] Error: {e}")
1850
+
1851
+ # Fallback to OpenRouter models
1852
+ for model_key in self.fallback_order:
1853
+ result = self._call_ai_model(model_key, messages)
1854
+ if result['success']:
1855
+ return {
1856
+ "success": True,
1857
+ "summary": result['response'].strip(),
1858
+ "model": result['model']
1859
+ }
1860
+
1861
+ return {
1862
+ "success": False,
1863
+ "error": "Failed to generate summary with all models",
1864
+ "summary": f"Document: {filename}" # Fallback summary
1865
+ }
1866
+
1867
+
1868
+ # Singleton instance
1869
+ rag_service = RAGService()
1870
+
static/css/styles.css ADDED
@@ -0,0 +1,2567 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ==================== CSS Variables & Root Styles ==================== */
2
+ :root {
3
+ /* Dark Mode Color Palette */
4
+ --bg-darkest: #0a0a0a;
5
+ --bg-dark: #121212;
6
+ --bg-medium: #1a1a1a;
7
+ --bg-light: #242424;
8
+ --bg-lighter: #2d2d2d;
9
+ --bg-hover: #363636;
10
+
11
+ /* Accent Colors */
12
+ --accent-primary: #ffffff;
13
+ --accent-secondary: #e0e0e0;
14
+ --accent-muted: #888888;
15
+
16
+ /* Glass Effect */
17
+ --glass-bg: rgba(255, 255, 255, 0.03);
18
+ --glass-border: rgba(255, 255, 255, 0.08);
19
+ --glass-shadow: rgba(0, 0, 0, 0.5);
20
+
21
+ /* Text Colors */
22
+ --text-primary: #ffffff;
23
+ --text-secondary: rgba(255, 255, 255, 0.7);
24
+ --text-muted: rgba(255, 255, 255, 0.4);
25
+
26
+ /* Status Colors */
27
+ --success: #4ade80;
28
+ --error: #f87171;
29
+ --info: #60a5fa;
30
+
31
+ /* Spacing */
32
+ --radius-sm: 6px;
33
+ --radius-md: 10px;
34
+ --radius-lg: 16px;
35
+ --radius-xl: 24px;
36
+
37
+ /* Transitions */
38
+ --transition-fast: 0.15s ease;
39
+ --transition-smooth: 0.3s ease;
40
+ --transition-bounce: 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55);
41
+
42
+ /* Sidebar Width */
43
+ --sidebar-width: 300px;
44
+ --sidebar-collapsed: 50px;
45
+ }
46
+
47
+ /* ==================== Global Styles ==================== */
48
+ * {
49
+ margin: 0;
50
+ padding: 0;
51
+ box-sizing: border-box;
52
+ }
53
+
54
+ html {
55
+ font-size: 16px;
56
+ scroll-behavior: smooth;
57
+ height: 100vh;
58
+ overflow: hidden;
59
+ }
60
+
61
+ body {
62
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
63
+ background: var(--bg-darkest);
64
+ color: var(--text-primary);
65
+ height: 100vh;
66
+ overflow: hidden;
67
+ }
68
+
69
+ /* ==================== Glass Panels ==================== */
70
+ .glass-panel {
71
+ background: var(--bg-dark);
72
+ border: 1px solid var(--glass-border);
73
+ border-radius: var(--radius-lg);
74
+ box-shadow: 0 4px 20px var(--glass-shadow);
75
+ }
76
+
77
+ /* ==================== App Container ==================== */
78
+ .app-container {
79
+ position: relative;
80
+ z-index: 10;
81
+ height: 100vh;
82
+ display: flex;
83
+ flex-direction: column;
84
+ overflow: hidden;
85
+ }
86
+
87
+ /* ==================== Main Layout ==================== */
88
+ .main-content {
89
+ flex: 1;
90
+ display: flex;
91
+ padding: 1rem;
92
+ gap: 1rem;
93
+ height: calc(100vh - 0px);
94
+ /* Full viewport height */
95
+ min-height: 0;
96
+ /* Allow flex children to shrink */
97
+ overflow: hidden;
98
+ }
99
+
100
+ /* ==================== Dual Sidebars ==================== */
101
+ .sidebar {
102
+ width: var(--sidebar-width);
103
+ height: 100%;
104
+ /* Fill available height */
105
+ display: flex;
106
+ flex-direction: column;
107
+ flex-shrink: 0;
108
+ position: relative;
109
+ transition: width var(--transition-smooth), opacity var(--transition-smooth);
110
+ }
111
+
112
+ .sidebar.collapsed {
113
+ width: var(--sidebar-collapsed);
114
+ }
115
+
116
+ .sidebar.collapsed .sidebar-content {
117
+ opacity: 0;
118
+ pointer-events: none;
119
+ }
120
+
121
+ .sidebar.collapsed .sidebar-toggle .toggle-icon {
122
+ transform: rotate(180deg);
123
+ }
124
+
125
+ .sidebar-content {
126
+ flex: 1;
127
+ display: flex;
128
+ flex-direction: column;
129
+ gap: 0.75rem;
130
+ overflow-y: auto;
131
+ overflow-x: hidden;
132
+ transition: opacity var(--transition-smooth);
133
+ }
134
+
135
+ .sidebar-content::-webkit-scrollbar {
136
+ width: 4px;
137
+ }
138
+
139
+ .sidebar-content::-webkit-scrollbar-thumb {
140
+ background: var(--bg-hover);
141
+ border-radius: 2px;
142
+ }
143
+
144
+ /* Sidebar Toggle Button */
145
+ .sidebar-toggle {
146
+ position: absolute;
147
+ top: 50%;
148
+ transform: translateY(-50%);
149
+ width: 24px;
150
+ height: 48px;
151
+ background: var(--bg-light);
152
+ border: 1px solid var(--glass-border);
153
+ display: flex;
154
+ align-items: center;
155
+ justify-content: center;
156
+ cursor: pointer;
157
+ z-index: 10;
158
+ transition: all var(--transition-fast);
159
+ }
160
+
161
+ .sidebar-toggle:hover {
162
+ background: var(--bg-hover);
163
+ }
164
+
165
+ .toggle-icon {
166
+ font-size: 0.7rem;
167
+ color: var(--text-muted);
168
+ transition: transform var(--transition-smooth);
169
+ }
170
+
171
+ .sidebar-left .sidebar-toggle {
172
+ right: -12px;
173
+ border-radius: 0 var(--radius-sm) var(--radius-sm) 0;
174
+ }
175
+
176
+ .sidebar-right .sidebar-toggle {
177
+ left: -12px;
178
+ border-radius: var(--radius-sm) 0 0 var(--radius-sm);
179
+ }
180
+
181
+ /* ==================== Sidebar Sections ==================== */
182
+ .sidebar-section {
183
+ padding: 1rem;
184
+ }
185
+
186
+ .section-header {
187
+ display: flex;
188
+ align-items: center;
189
+ justify-content: space-between;
190
+ cursor: pointer;
191
+ user-select: none;
192
+ }
193
+
194
+ .section-header:hover .collapse-icon {
195
+ color: var(--text-primary);
196
+ }
197
+
198
+ .sidebar-title {
199
+ font-size: 0.75rem;
200
+ font-weight: 600;
201
+ text-transform: uppercase;
202
+ letter-spacing: 0.5px;
203
+ color: var(--text-muted);
204
+ display: flex;
205
+ align-items: center;
206
+ gap: 0.5rem;
207
+ margin: 0;
208
+ }
209
+
210
+ .section-actions {
211
+ display: flex;
212
+ align-items: center;
213
+ gap: 0.25rem;
214
+ }
215
+
216
+ .collapse-icon {
217
+ font-size: 0.6rem;
218
+ color: var(--text-muted);
219
+ transition: transform var(--transition-smooth), color var(--transition-fast);
220
+ }
221
+
222
+ .collapsible.collapsed .collapse-icon {
223
+ transform: rotate(-90deg);
224
+ }
225
+
226
+ .section-body {
227
+ margin-top: 0.75rem;
228
+ max-height: 500px;
229
+ overflow: hidden;
230
+ transition: max-height var(--transition-smooth), opacity var(--transition-smooth), margin var(--transition-smooth);
231
+ }
232
+
233
+ .collapsible.collapsed .section-body {
234
+ max-height: 0;
235
+ opacity: 0;
236
+ margin-top: 0;
237
+ }
238
+
239
+ /* ==================== User Section ==================== */
240
+ .user-section {
241
+ padding: 0.75rem 1rem !important;
242
+ }
243
+
244
+ .user-info-row {
245
+ display: flex;
246
+ align-items: center;
247
+ justify-content: space-between;
248
+ }
249
+
250
+ .user-badge {
251
+ display: flex;
252
+ align-items: center;
253
+ gap: 0.5rem;
254
+ }
255
+
256
+ .user-avatar {
257
+ width: 32px;
258
+ height: 32px;
259
+ background: var(--bg-hover);
260
+ border-radius: 50%;
261
+ display: flex;
262
+ align-items: center;
263
+ justify-content: center;
264
+ font-weight: 600;
265
+ font-size: 0.85rem;
266
+ }
267
+
268
+ .user-details {
269
+ display: flex;
270
+ flex-direction: column;
271
+ gap: 0.1rem;
272
+ }
273
+
274
+ .user-details span:first-child {
275
+ font-size: 0.9rem;
276
+ font-weight: 500;
277
+ }
278
+
279
+ .user-role {
280
+ font-size: 0.7rem;
281
+ color: var(--text-muted);
282
+ }
283
+
284
+ /* ==================== Custom Animated Dropdown ==================== */
285
+ .custom-select {
286
+ position: relative;
287
+ width: 100%;
288
+ margin-bottom: 0.75rem;
289
+ }
290
+
291
+ .custom-select.compact {
292
+ margin-bottom: 0;
293
+ width: auto;
294
+ min-width: 180px;
295
+ }
296
+
297
+ .select-trigger {
298
+ display: flex;
299
+ align-items: center;
300
+ justify-content: space-between;
301
+ padding: 0.65rem 1rem;
302
+ background: var(--bg-medium);
303
+ border: 1px solid var(--glass-border);
304
+ border-radius: var(--radius-md);
305
+ cursor: pointer;
306
+ transition: all var(--transition-fast);
307
+ }
308
+
309
+ .select-trigger:hover {
310
+ background: var(--bg-light);
311
+ border-color: rgba(255, 255, 255, 0.15);
312
+ }
313
+
314
+ .custom-select.open .select-trigger {
315
+ border-color: rgba(255, 255, 255, 0.2);
316
+ border-radius: var(--radius-md) var(--radius-md) 0 0;
317
+ }
318
+
319
+ .select-value {
320
+ font-size: 0.85rem;
321
+ color: var(--text-secondary);
322
+ white-space: nowrap;
323
+ overflow: hidden;
324
+ text-overflow: ellipsis;
325
+ }
326
+
327
+ .select-arrow {
328
+ font-size: 0.6rem;
329
+ color: var(--text-muted);
330
+ transition: transform var(--transition-smooth);
331
+ margin-left: 0.5rem;
332
+ }
333
+
334
+ .custom-select.open .select-arrow {
335
+ transform: rotate(180deg);
336
+ }
337
+
338
+ .select-options {
339
+ position: absolute;
340
+ top: 100%;
341
+ left: 0;
342
+ right: 0;
343
+ background: var(--bg-medium);
344
+ border: 1px solid var(--glass-border);
345
+ border-top: none;
346
+ border-radius: 0 0 var(--radius-md) var(--radius-md);
347
+ max-height: 0;
348
+ overflow: hidden;
349
+ opacity: 0;
350
+ z-index: 100;
351
+ transition: max-height var(--transition-smooth), opacity var(--transition-fast);
352
+ box-shadow: 0 8px 20px rgba(0, 0, 0, 0.4);
353
+ }
354
+
355
+ .custom-select.open .select-options {
356
+ max-height: 200px;
357
+ opacity: 1;
358
+ overflow-y: auto;
359
+ }
360
+
361
+ .select-options::-webkit-scrollbar {
362
+ width: 4px;
363
+ }
364
+
365
+ .select-options::-webkit-scrollbar-thumb {
366
+ background: var(--bg-hover);
367
+ border-radius: 2px;
368
+ }
369
+
370
+ .select-option {
371
+ padding: 0.6rem 1rem;
372
+ font-size: 0.85rem;
373
+ color: var(--text-secondary);
374
+ cursor: pointer;
375
+ transition: all var(--transition-fast);
376
+ display: flex;
377
+ align-items: center;
378
+ gap: 0.5rem;
379
+ }
380
+
381
+ .select-option:hover {
382
+ background: var(--bg-light);
383
+ color: var(--text-primary);
384
+ }
385
+
386
+ .select-option.active {
387
+ background: var(--bg-hover);
388
+ color: var(--text-primary);
389
+ }
390
+
391
+ .select-option .option-icon {
392
+ font-size: 1rem;
393
+ }
394
+
395
+ /* ==================== Buckets List ==================== */
396
+ .buckets-list {
397
+ display: flex;
398
+ flex-direction: column;
399
+ gap: 0.25rem;
400
+ max-height: 180px;
401
+ overflow-y: auto;
402
+ }
403
+
404
+ .bucket-item {
405
+ display: flex;
406
+ align-items: center;
407
+ gap: 0.5rem;
408
+ padding: 0.5rem 0.75rem;
409
+ border-radius: var(--radius-sm);
410
+ cursor: pointer;
411
+ transition: all var(--transition-fast);
412
+ }
413
+
414
+ .bucket-item:hover {
415
+ background: var(--bg-light);
416
+ }
417
+
418
+ .bucket-item.active {
419
+ background: var(--bg-light);
420
+ border-left: 2px solid var(--accent-primary);
421
+ }
422
+
423
+ .bucket-name {
424
+ flex: 1;
425
+ font-size: 0.85rem;
426
+ }
427
+
428
+ .bucket-count {
429
+ font-size: 0.7rem;
430
+ color: var(--text-muted);
431
+ background: var(--bg-hover);
432
+ padding: 0.1rem 0.4rem;
433
+ border-radius: var(--radius-sm);
434
+ }
435
+
436
+ .bucket-delete {
437
+ opacity: 0;
438
+ padding: 0.2rem;
439
+ font-size: 0.7rem;
440
+ transition: opacity var(--transition-fast);
441
+ }
442
+
443
+ .bucket-item:hover .bucket-delete {
444
+ opacity: 1;
445
+ }
446
+
447
+ /* ==================== Upload Zone ==================== */
448
+ .upload-zone {
449
+ padding: 1.5rem;
450
+ border: 1px dashed rgba(255, 255, 255, 0.15);
451
+ border-radius: var(--radius-md);
452
+ text-align: center;
453
+ cursor: pointer;
454
+ transition: all var(--transition-smooth);
455
+ background: var(--bg-medium);
456
+ }
457
+
458
+ .upload-zone:hover,
459
+ .upload-zone.dragover {
460
+ border-color: rgba(255, 255, 255, 0.3);
461
+ background: var(--bg-light);
462
+ transform: scale(1.02);
463
+ }
464
+
465
+ .upload-icon {
466
+ font-size: 2rem;
467
+ margin-bottom: 0.5rem;
468
+ }
469
+
470
+ .upload-title {
471
+ font-size: 0.9rem;
472
+ font-weight: 600;
473
+ margin-bottom: 0.25rem;
474
+ }
475
+
476
+ .upload-subtitle {
477
+ font-size: 0.75rem;
478
+ color: var(--text-muted);
479
+ }
480
+
481
+ .progress-info {
482
+ display: flex;
483
+ align-items: center;
484
+ gap: 0.5rem;
485
+ margin-bottom: 0.5rem;
486
+ }
487
+
488
+ .progress-bar {
489
+ height: 4px;
490
+ background: var(--bg-hover);
491
+ border-radius: 2px;
492
+ overflow: hidden;
493
+ }
494
+
495
+ .progress-fill {
496
+ height: 100%;
497
+ background: var(--accent-primary);
498
+ width: 0%;
499
+ transition: width var(--transition-smooth);
500
+ }
501
+
502
+ /* Cancel Upload Button */
503
+ .btn-cancel-upload {
504
+ margin-top: 0.75rem;
505
+ width: 100%;
506
+ padding: 0.5rem 1rem;
507
+ background: rgba(248, 113, 113, 0.15);
508
+ color: var(--error);
509
+ border: 1px solid rgba(248, 113, 113, 0.3);
510
+ border-radius: var(--radius-md);
511
+ font-size: 0.8rem;
512
+ font-weight: 500;
513
+ cursor: pointer;
514
+ transition: all var(--transition-fast);
515
+ }
516
+
517
+ .btn-cancel-upload:hover {
518
+ background: rgba(248, 113, 113, 0.25);
519
+ border-color: rgba(248, 113, 113, 0.5);
520
+ transform: translateY(-1px);
521
+ }
522
+
523
+ /* ==================== Documents Section (Right Sidebar) ==================== */
524
+ .documents-section {
525
+ flex: 1;
526
+ display: flex;
527
+ flex-direction: column;
528
+ min-height: 0;
529
+ max-height: 50%;
530
+ transition: all var(--transition-smooth);
531
+ }
532
+
533
+ /* ==================== Chat History Section (Right Sidebar) ==================== */
534
+ .chat-history-section {
535
+ flex: 1;
536
+ display: flex;
537
+ flex-direction: column;
538
+ min-height: 0;
539
+ max-height: 50%;
540
+ transition: all var(--transition-smooth);
541
+ }
542
+
543
+ .chat-history-section.collapsed {
544
+ flex: 0 0 auto;
545
+ min-height: auto;
546
+ }
547
+
548
+ .chat-history-section.collapsed .section-header {
549
+ padding-bottom: 0;
550
+ }
551
+
552
+ .chat-history-section .section-header {
553
+ padding-bottom: 0.5rem;
554
+ }
555
+
556
+ .documents-section.collapsed {
557
+ flex: 0 0 auto;
558
+ min-height: auto;
559
+ }
560
+
561
+ .documents-section.collapsed .section-header {
562
+ padding-bottom: 0;
563
+ }
564
+
565
+ .documents-section .section-header {
566
+ padding-bottom: 0.5rem;
567
+ }
568
+
569
+ .documents-body {
570
+ flex: 1;
571
+ overflow: hidden;
572
+ }
573
+
574
+ .chat-history-body {
575
+ flex: 1;
576
+ overflow: hidden;
577
+ }
578
+
579
+ .documents-list {
580
+ height: 100%;
581
+ overflow-y: auto;
582
+ display: flex;
583
+ flex-direction: column;
584
+ gap: 0.4rem;
585
+ padding-right: 0.25rem;
586
+ }
587
+
588
+ .doc-count {
589
+ margin-left: auto;
590
+ font-size: 0.7rem;
591
+ opacity: 0.6;
592
+ }
593
+
594
+ .document-item {
595
+ display: flex;
596
+ align-items: center;
597
+ gap: 0.5rem;
598
+ padding: 0.6rem 0.75rem;
599
+ background: transparent;
600
+ border: 1px solid transparent;
601
+ border-radius: var(--radius-md);
602
+ cursor: pointer;
603
+ transition: all var(--transition-fast);
604
+ position: relative;
605
+ }
606
+
607
+ .document-item:hover {
608
+ background: var(--bg-light);
609
+ border-color: var(--glass-border);
610
+ }
611
+
612
+ .doc-icon {
613
+ width: 32px;
614
+ height: 32px;
615
+ border-radius: var(--radius-sm);
616
+ display: flex;
617
+ align-items: center;
618
+ justify-content: center;
619
+ font-size: 1rem;
620
+ background: var(--bg-hover);
621
+ }
622
+
623
+ .doc-info {
624
+ flex: 1;
625
+ min-width: 0;
626
+ }
627
+
628
+ .doc-name {
629
+ font-size: 0.8rem;
630
+ font-weight: 500;
631
+ white-space: nowrap;
632
+ overflow: hidden;
633
+ text-overflow: ellipsis;
634
+ }
635
+
636
+ .doc-meta {
637
+ font-size: 0.65rem;
638
+ color: var(--text-muted);
639
+ margin-top: 0.1rem;
640
+ }
641
+
642
+ .doc-view,
643
+ .doc-delete {
644
+ opacity: 0;
645
+ padding: 0.25rem;
646
+ font-size: 0.8rem;
647
+ transition: opacity var(--transition-fast);
648
+ }
649
+
650
+ .document-item:hover .doc-view,
651
+ .document-item:hover .doc-delete {
652
+ opacity: 1;
653
+ }
654
+
655
+ .doc-view:hover {
656
+ color: var(--info);
657
+ }
658
+
659
+ .doc-delete:hover {
660
+ color: var(--error);
661
+ }
662
+
663
+ /* ==================== Chat Container ==================== */
664
+ .chat-container {
665
+ flex: 1;
666
+ display: flex;
667
+ flex-direction: column;
668
+ min-width: 0;
669
+ min-height: 0;
670
+ /* Critical: allows flex child to shrink */
671
+ overflow: hidden;
672
+ height: 100%;
673
+ /* Ensure it takes full height */
674
+ }
675
+
676
+ /* ==================== Chat Bucket Filter ==================== */
677
+ .chat-bucket-filter {
678
+ display: flex;
679
+ align-items: center;
680
+ gap: 0.75rem;
681
+ padding: 0.75rem 1.25rem;
682
+ border-bottom: 1px solid var(--glass-border);
683
+ background: var(--bg-dark);
684
+ flex-shrink: 0;
685
+ /* Prevent filter bar from shrinking */
686
+ }
687
+
688
+ .filter-label {
689
+ font-size: 0.8rem;
690
+ color: var(--text-muted);
691
+ }
692
+
693
+ /* New Chat Button */
694
+ .btn-new-chat {
695
+ margin-left: auto;
696
+ background: var(--accent-primary);
697
+ color: var(--bg-darkest);
698
+ padding: 0.4rem 0.75rem;
699
+ font-size: 0.75rem;
700
+ font-weight: 600;
701
+ border-radius: var(--radius-md);
702
+ white-space: nowrap;
703
+ transition: all var(--transition-fast);
704
+ }
705
+
706
+ .btn-new-chat:hover {
707
+ background: var(--accent-secondary);
708
+ transform: translateY(-1px);
709
+ }
710
+
711
+ /* Clear Chat Button */
712
+ .btn-clear-chat {
713
+ background: var(--bg-light);
714
+ color: var(--text-secondary);
715
+ padding: 0.4rem 0.75rem;
716
+ font-size: 0.75rem;
717
+ font-weight: 600;
718
+ border-radius: var(--radius-md);
719
+ border: 1px solid var(--glass-border);
720
+ white-space: nowrap;
721
+ transition: all var(--transition-fast);
722
+ }
723
+
724
+ .btn-clear-chat:hover {
725
+ background: var(--bg-hover);
726
+ color: var(--text-primary);
727
+ transform: translateY(-1px);
728
+ }
729
+
730
+ /* Chat History List */
731
+ .chat-history-list {
732
+ display: flex;
733
+ flex-direction: column;
734
+ gap: 0.35rem;
735
+ }
736
+
737
+ .chat-history-item {
738
+ display: flex;
739
+ align-items: center;
740
+ gap: 0.5rem;
741
+ padding: 0.5rem 0.6rem;
742
+ background: var(--bg-medium);
743
+ border: 1px solid transparent;
744
+ border-radius: var(--radius-md);
745
+ cursor: pointer;
746
+ transition: all var(--transition-fast);
747
+ }
748
+
749
+ .chat-history-item:hover {
750
+ background: var(--bg-light);
751
+ border-color: var(--glass-border);
752
+ }
753
+
754
+ .chat-history-item.active {
755
+ background: var(--bg-light);
756
+ border-color: var(--accent-muted);
757
+ }
758
+
759
+ .chat-history-icon {
760
+ font-size: 0.9rem;
761
+ flex-shrink: 0;
762
+ }
763
+
764
+ .chat-history-info {
765
+ flex: 1;
766
+ min-width: 0;
767
+ }
768
+
769
+ .chat-history-topic {
770
+ font-size: 0.8rem;
771
+ font-weight: 500;
772
+ white-space: nowrap;
773
+ overflow: hidden;
774
+ text-overflow: ellipsis;
775
+ }
776
+
777
+ .chat-history-date {
778
+ font-size: 0.65rem;
779
+ color: var(--text-muted);
780
+ margin-top: 0.1rem;
781
+ }
782
+
783
+ .chat-history-delete {
784
+ opacity: 0;
785
+ padding: 0.2rem;
786
+ font-size: 0.75rem;
787
+ transition: opacity var(--transition-fast);
788
+ }
789
+
790
+ .chat-history-item:hover .chat-history-delete {
791
+ opacity: 1;
792
+ }
793
+
794
+ .chat-history-delete:hover {
795
+ color: var(--error);
796
+ }
797
+
798
+
799
+ /* ==================== Chat Messages ==================== */
800
+ .chat-messages {
801
+ flex: 1;
802
+ overflow-y: auto;
803
+ overflow-x: hidden;
804
+ padding: 1rem;
805
+ display: flex;
806
+ flex-direction: column;
807
+ gap: 1rem;
808
+ min-height: 0;
809
+ /* Critical: allows scrolling to work */
810
+ }
811
+
812
+ /* Custom scrollbar for chat messages */
813
+ .chat-messages::-webkit-scrollbar {
814
+ width: 6px;
815
+ }
816
+
817
+ .chat-messages::-webkit-scrollbar-track {
818
+ background: transparent;
819
+ }
820
+
821
+ .chat-messages::-webkit-scrollbar-thumb {
822
+ background: var(--bg-hover);
823
+ border-radius: 3px;
824
+ }
825
+
826
+ .chat-messages::-webkit-scrollbar-thumb:hover {
827
+ background: var(--bg-lighter);
828
+ }
829
+
830
+ .message {
831
+ display: flex;
832
+ gap: 0.75rem;
833
+ max-width: 85%;
834
+ animation: messageSlide 0.3s ease-out;
835
+ }
836
+
837
+ @keyframes messageSlide {
838
+ from {
839
+ opacity: 0;
840
+ transform: translateY(10px);
841
+ }
842
+
843
+ to {
844
+ opacity: 1;
845
+ transform: translateY(0);
846
+ }
847
+ }
848
+
849
+ .message.user {
850
+ align-self: flex-end;
851
+ flex-direction: row-reverse;
852
+ }
853
+
854
+ .message-avatar {
855
+ width: 32px;
856
+ height: 32px;
857
+ border-radius: 50%;
858
+ display: flex;
859
+ align-items: center;
860
+ justify-content: center;
861
+ flex-shrink: 0;
862
+ font-size: 0.9rem;
863
+ background: var(--bg-light);
864
+ border: 1px solid var(--glass-border);
865
+ }
866
+
867
+ .message-content {
868
+ padding: 1rem 1.25rem;
869
+ border-radius: var(--radius-lg);
870
+ font-size: 0.9rem;
871
+ line-height: 1.6;
872
+ }
873
+
874
+ .message.user .message-content {
875
+ background: var(--accent-primary);
876
+ color: var(--bg-darkest);
877
+ border-bottom-right-radius: 4px;
878
+ }
879
+
880
+ .message.assistant .message-content {
881
+ background: linear-gradient(135deg, var(--bg-light) 0%, var(--bg-medium) 100%);
882
+ border: 1px solid var(--glass-border);
883
+ border-bottom-left-radius: 4px;
884
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
885
+ }
886
+
887
+ .message-sources {
888
+ margin-top: 0.5rem;
889
+ padding-top: 0.5rem;
890
+ border-top: 1px solid rgba(255, 255, 255, 0.1);
891
+ font-size: 0.75rem;
892
+ color: var(--text-muted);
893
+ }
894
+
895
+ .source-tag {
896
+ display: inline-block;
897
+ padding: 0.1rem 0.4rem;
898
+ background: var(--bg-hover);
899
+ border-radius: var(--radius-sm);
900
+ margin-left: 0.25rem;
901
+ }
902
+
903
+ /* ==================== Markdown Styling in Messages ==================== */
904
+ .message-content h1,
905
+ .message-content h2,
906
+ .message-content h3,
907
+ .message-content h4,
908
+ .message-content .msg-header {
909
+ font-weight: 600;
910
+ color: var(--text-primary);
911
+ margin: 1.25rem 0 0.6rem 0;
912
+ line-height: 1.4;
913
+ }
914
+
915
+ .message-content h1 {
916
+ font-size: 1.25rem;
917
+ background: linear-gradient(90deg, var(--accent-primary), var(--accent-secondary));
918
+ -webkit-background-clip: text;
919
+ -webkit-text-fill-color: transparent;
920
+ background-clip: text;
921
+ padding-bottom: 0.5rem;
922
+ border-bottom: 2px solid rgba(168, 85, 247, 0.3);
923
+ }
924
+
925
+ .message-content h2 {
926
+ font-size: 1.1rem;
927
+ color: var(--accent-secondary);
928
+ border-bottom: 1px solid rgba(168, 85, 247, 0.2);
929
+ padding-bottom: 0.4rem;
930
+ }
931
+
932
+ .message-content h3 {
933
+ font-size: 1rem;
934
+ color: var(--info);
935
+ }
936
+
937
+ .message-content h4 {
938
+ font-size: 0.95rem;
939
+ font-weight: 600;
940
+ color: var(--text-secondary);
941
+ margin: 0.9rem 0 0.4rem 0;
942
+ }
943
+
944
+ .message-content h1:first-child,
945
+ .message-content h2:first-child,
946
+ .message-content h3:first-child,
947
+ .message-content h4:first-child,
948
+ .message-content .msg-header:first-child {
949
+ margin-top: 0;
950
+ }
951
+
952
+ .message-content p,
953
+ .message-content .msg-para {
954
+ margin: 0.75rem 0;
955
+ line-height: 1.75;
956
+ }
957
+
958
+ .message-content p:first-child,
959
+ .message-content .msg-para:first-child {
960
+ margin-top: 0;
961
+ }
962
+
963
+ /* ==================== Enhanced Lists ==================== */
964
+ .message-content .formatted-list {
965
+ margin: 1rem 0;
966
+ padding-left: 0;
967
+ list-style: none;
968
+ }
969
+
970
+ .message-content ol.formatted-list {
971
+ counter-reset: item;
972
+ }
973
+
974
+ .message-content .formatted-list li {
975
+ position: relative;
976
+ padding: 0.5rem 0.75rem 0.5rem 2.25rem;
977
+ margin: 0.35rem 0;
978
+ background: rgba(255, 255, 255, 0.02);
979
+ border-radius: var(--radius-md);
980
+ border-left: 3px solid transparent;
981
+ line-height: 1.65;
982
+ transition: all 0.2s ease;
983
+ }
984
+
985
+ .message-content .formatted-list li:hover {
986
+ background: rgba(255, 255, 255, 0.04);
987
+ }
988
+
989
+ .message-content .formatted-list li.numbered {
990
+ border-left-color: var(--accent-primary);
991
+ }
992
+
993
+ .message-content .formatted-list li.bullet {
994
+ border-left-color: var(--info);
995
+ }
996
+
997
+ .message-content .formatted-list li.numbered .list-num {
998
+ position: absolute;
999
+ left: 0.6rem;
1000
+ font-weight: 700;
1001
+ color: var(--accent-primary);
1002
+ font-size: 0.9rem;
1003
+ }
1004
+
1005
+ .message-content .formatted-list li.bullet::before {
1006
+ content: "▸";
1007
+ position: absolute;
1008
+ left: 0.75rem;
1009
+ color: var(--info);
1010
+ font-size: 0.85em;
1011
+ font-weight: 600;
1012
+ }
1013
+
1014
+ .message-content .formatted-list.sub-list {
1015
+ margin: 0.5rem 0 0.5rem 1.5rem;
1016
+ }
1017
+
1018
+ .message-content .formatted-list.sub-list li {
1019
+ padding-left: 1.75rem;
1020
+ background: transparent;
1021
+ border-left: 2px solid rgba(168, 85, 247, 0.3);
1022
+ }
1023
+
1024
+ .message-content .formatted-list.sub-list li::before {
1025
+ content: "○";
1026
+ position: absolute;
1027
+ left: 0.5rem;
1028
+ color: var(--accent-muted);
1029
+ font-size: 0.7em;
1030
+ }
1031
+
1032
+ /* Legacy list support */
1033
+ .message-content ul,
1034
+ .message-content ol {
1035
+ margin: 0.75rem 0;
1036
+ padding-left: 1.5rem;
1037
+ }
1038
+
1039
+ .message-content li {
1040
+ margin: 0.4rem 0;
1041
+ padding-left: 0.5rem;
1042
+ line-height: 1.6;
1043
+ }
1044
+
1045
+ .message-content ul li::marker {
1046
+ color: var(--info);
1047
+ }
1048
+
1049
+ .message-content ol li::marker {
1050
+ color: var(--accent-secondary);
1051
+ font-weight: 600;
1052
+ }
1053
+
1054
+ /* Nested lists */
1055
+ .message-content ul ul,
1056
+ .message-content ol ol,
1057
+ .message-content ul ol,
1058
+ .message-content ol ul {
1059
+ margin: 0.25rem 0 0.25rem 1rem;
1060
+ }
1061
+
1062
+ /* ==================== Premium Tables ==================== */
1063
+ .message-content .table-wrapper {
1064
+ margin: 1rem 0;
1065
+ border-radius: var(--radius-md);
1066
+ overflow-x: auto;
1067
+ overflow-y: hidden;
1068
+ max-width: 100%;
1069
+ box-shadow: 0 2px 12px rgba(0, 0, 0, 0.2);
1070
+ border: 1px solid rgba(255, 255, 255, 0.08);
1071
+ }
1072
+
1073
+ .message-content table {
1074
+ width: 100%;
1075
+ border-collapse: collapse;
1076
+ font-size: 0.8rem;
1077
+ background: rgba(0, 0, 0, 0.2);
1078
+ table-layout: auto;
1079
+ }
1080
+
1081
+ .message-content thead {
1082
+ background: linear-gradient(135deg, rgba(168, 85, 247, 0.2) 0%, rgba(96, 165, 250, 0.15) 100%);
1083
+ }
1084
+
1085
+ .message-content th {
1086
+ padding: 0.6rem 0.75rem;
1087
+ font-weight: 600;
1088
+ color: var(--text-primary);
1089
+ text-align: left;
1090
+ border-bottom: 2px solid rgba(168, 85, 247, 0.3);
1091
+ text-transform: uppercase;
1092
+ font-size: 0.7rem;
1093
+ letter-spacing: 0.3px;
1094
+ white-space: nowrap;
1095
+ }
1096
+
1097
+ .message-content td {
1098
+ padding: 0.5rem 0.75rem;
1099
+ border-bottom: 1px solid rgba(255, 255, 255, 0.05);
1100
+ color: var(--text-secondary);
1101
+ word-break: break-word;
1102
+ max-width: 200px;
1103
+ }
1104
+
1105
+ .message-content tbody tr {
1106
+ transition: background 0.2s ease;
1107
+ }
1108
+
1109
+ .message-content tbody tr:hover {
1110
+ background: rgba(168, 85, 247, 0.08);
1111
+ }
1112
+
1113
+ .message-content tbody tr:nth-child(even) {
1114
+ background: rgba(255, 255, 255, 0.02);
1115
+ }
1116
+
1117
+ .message-content tbody tr:nth-child(even):hover {
1118
+ background: rgba(168, 85, 247, 0.08);
1119
+ }
1120
+
1121
+ .message-content tbody tr:last-child td {
1122
+ border-bottom: none;
1123
+ }
1124
+
1125
+ /* ==================== Code Blocks ==================== */
1126
+ .message-content .code-block {
1127
+ margin: 1rem 0;
1128
+ padding: 1rem 1.25rem;
1129
+ background: linear-gradient(135deg, rgba(0, 0, 0, 0.4) 0%, rgba(0, 0, 0, 0.3) 100%);
1130
+ border: 1px solid rgba(255, 255, 255, 0.08);
1131
+ border-radius: var(--radius-lg);
1132
+ overflow-x: auto;
1133
+ font-family: 'Consolas', 'Monaco', 'Fira Code', monospace;
1134
+ font-size: 0.85rem;
1135
+ line-height: 1.6;
1136
+ }
1137
+
1138
+ .message-content .code-block code {
1139
+ background: none;
1140
+ padding: 0;
1141
+ border: none;
1142
+ color: var(--info);
1143
+ }
1144
+
1145
+ /* Inline code */
1146
+ .message-content code.inline-code,
1147
+ .message-content code {
1148
+ background: rgba(96, 165, 250, 0.12);
1149
+ padding: 0.2rem 0.5rem;
1150
+ border-radius: var(--radius-sm);
1151
+ font-family: 'Consolas', 'Monaco', monospace;
1152
+ font-size: 0.85em;
1153
+ color: var(--info);
1154
+ border: 1px solid rgba(96, 165, 250, 0.2);
1155
+ }
1156
+
1157
+ /* Bold and emphasis */
1158
+ .message-content strong,
1159
+ .message-content b {
1160
+ font-weight: 700;
1161
+ color: var(--text-primary);
1162
+ }
1163
+
1164
+ .message-content em,
1165
+ .message-content i {
1166
+ font-style: italic;
1167
+ color: var(--text-secondary);
1168
+ }
1169
+
1170
+ /* ==================== Dividers ==================== */
1171
+ .message-content hr.divider {
1172
+ border: none;
1173
+ height: 1px;
1174
+ background: linear-gradient(90deg, transparent, rgba(168, 85, 247, 0.4), transparent);
1175
+ margin: 1.5rem 0;
1176
+ }
1177
+
1178
+ /* ==================== Blockquotes ==================== */
1179
+ .message-content blockquote {
1180
+ border-left: 4px solid var(--accent-primary);
1181
+ margin: 1rem 0;
1182
+ padding: 0.75rem 1.25rem;
1183
+ background: linear-gradient(135deg, rgba(168, 85, 247, 0.08) 0%, rgba(96, 165, 250, 0.05) 100%);
1184
+ border-radius: 0 var(--radius-md) var(--radius-md) 0;
1185
+ font-style: italic;
1186
+ color: var(--text-secondary);
1187
+ }
1188
+
1189
+ /* ==================== Typing Indicator ==================== */
1190
+ .typing-indicator {
1191
+ display: flex;
1192
+ gap: 0.75rem;
1193
+ padding: 1rem;
1194
+ }
1195
+
1196
+ .typing-dots {
1197
+ display: flex;
1198
+ gap: 4px;
1199
+ padding: 0.75rem 1rem;
1200
+ background: var(--bg-light);
1201
+ border: 1px solid var(--glass-border);
1202
+ border-radius: var(--radius-lg);
1203
+ }
1204
+
1205
+ .typing-dot {
1206
+ width: 6px;
1207
+ height: 6px;
1208
+ background: var(--text-muted);
1209
+ border-radius: 50%;
1210
+ animation: typingBounce 1.4s infinite ease-in-out;
1211
+ }
1212
+
1213
+ .typing-dot:nth-child(1) {
1214
+ animation-delay: 0s;
1215
+ }
1216
+
1217
+ .typing-dot:nth-child(2) {
1218
+ animation-delay: 0.2s;
1219
+ }
1220
+
1221
+ .typing-dot:nth-child(3) {
1222
+ animation-delay: 0.4s;
1223
+ }
1224
+
1225
+ @keyframes typingBounce {
1226
+
1227
+ 0%,
1228
+ 80%,
1229
+ 100% {
1230
+ transform: scale(0.6);
1231
+ opacity: 0.4;
1232
+ }
1233
+
1234
+ 40% {
1235
+ transform: scale(1);
1236
+ opacity: 1;
1237
+ }
1238
+ }
1239
+
1240
+ /* ==================== Chat Input ==================== */
1241
+ .chat-input-container {
1242
+ padding: 1rem;
1243
+ background: var(--bg-dark);
1244
+ border-top: 1px solid var(--glass-border);
1245
+ }
1246
+
1247
+ .chat-input-wrapper {
1248
+ display: flex;
1249
+ gap: 0.75rem;
1250
+ align-items: flex-end;
1251
+ }
1252
+
1253
+ .chat-input {
1254
+ flex: 1;
1255
+ padding: 0.75rem 1rem;
1256
+ background: var(--bg-medium);
1257
+ border: 1px solid var(--glass-border);
1258
+ border-radius: var(--radius-lg);
1259
+ color: var(--text-primary);
1260
+ font-size: 0.9rem;
1261
+ resize: none;
1262
+ max-height: 150px;
1263
+ font-family: inherit;
1264
+ transition: all var(--transition-fast);
1265
+ }
1266
+
1267
+ .chat-input:focus {
1268
+ outline: none;
1269
+ border-color: rgba(255, 255, 255, 0.2);
1270
+ background: var(--bg-light);
1271
+ }
1272
+
1273
+ .chat-input::placeholder {
1274
+ color: var(--text-muted);
1275
+ }
1276
+
1277
+ .send-btn {
1278
+ width: 44px;
1279
+ height: 44px;
1280
+ border-radius: 50%;
1281
+ background: var(--accent-primary);
1282
+ border: none;
1283
+ color: var(--bg-darkest);
1284
+ font-size: 1.1rem;
1285
+ cursor: pointer;
1286
+ display: flex;
1287
+ align-items: center;
1288
+ justify-content: center;
1289
+ transition: all var(--transition-fast);
1290
+ }
1291
+
1292
+ .send-btn:hover {
1293
+ transform: scale(1.05);
1294
+ }
1295
+
1296
+ .send-btn:disabled {
1297
+ opacity: 0.3;
1298
+ cursor: not-allowed;
1299
+ transform: none;
1300
+ }
1301
+
1302
+ /* Stop Generation Button */
1303
+ .stop-btn {
1304
+ width: 44px;
1305
+ height: 44px;
1306
+ border-radius: 50%;
1307
+ background: var(--error);
1308
+ border: none;
1309
+ color: white;
1310
+ font-size: 1rem;
1311
+ cursor: pointer;
1312
+ display: flex;
1313
+ align-items: center;
1314
+ justify-content: center;
1315
+ transition: all var(--transition-fast);
1316
+ animation: pulse-stop 1.5s ease-in-out infinite;
1317
+ }
1318
+
1319
+ .stop-btn:hover {
1320
+ transform: scale(1.1);
1321
+ background: #dc2626;
1322
+ }
1323
+
1324
+ @keyframes pulse-stop {
1325
+ 0%, 100% {
1326
+ box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4);
1327
+ }
1328
+ 50% {
1329
+ box-shadow: 0 0 0 8px rgba(239, 68, 68, 0);
1330
+ }
1331
+ }
1332
+
1333
+ /* ==================== Welcome Screen ==================== */
1334
+ .welcome-screen {
1335
+ flex: 1;
1336
+ display: flex;
1337
+ flex-direction: column;
1338
+ align-items: center;
1339
+ justify-content: center;
1340
+ text-align: center;
1341
+ padding: 2rem;
1342
+ }
1343
+
1344
+ .welcome-icon {
1345
+ display: flex;
1346
+ align-items: center;
1347
+ justify-content: center;
1348
+ margin-bottom: 1rem;
1349
+ }
1350
+
1351
+ .welcome-title {
1352
+ font-size: 1.5rem;
1353
+ font-weight: 700;
1354
+ margin-bottom: 0.5rem;
1355
+ }
1356
+
1357
+ .welcome-subtitle {
1358
+ font-size: 0.9rem;
1359
+ color: var(--text-secondary);
1360
+ max-width: 400px;
1361
+ }
1362
+
1363
+ .welcome-features {
1364
+ display: flex;
1365
+ gap: 0.75rem;
1366
+ margin-top: 1.5rem;
1367
+ }
1368
+
1369
+ .feature-card {
1370
+ padding: 1rem;
1371
+ background: var(--bg-medium);
1372
+ border: 1px solid var(--glass-border);
1373
+ border-radius: var(--radius-lg);
1374
+ width: 100px;
1375
+ text-align: center;
1376
+ transition: all var(--transition-smooth);
1377
+ }
1378
+
1379
+ .feature-card:hover {
1380
+ transform: translateY(-3px);
1381
+ background: var(--bg-light);
1382
+ }
1383
+
1384
+ .feature-icon {
1385
+ font-size: 1.5rem;
1386
+ margin-bottom: 0.25rem;
1387
+ }
1388
+
1389
+ .feature-title {
1390
+ font-size: 0.75rem;
1391
+ font-weight: 600;
1392
+ }
1393
+
1394
+ /* ==================== Modal ==================== */
1395
+ .modal-overlay {
1396
+ position: fixed;
1397
+ top: 0;
1398
+ left: 0;
1399
+ right: 0;
1400
+ bottom: 0;
1401
+ background: rgba(0, 0, 0, 0.8);
1402
+ display: flex;
1403
+ align-items: center;
1404
+ justify-content: center;
1405
+ z-index: 1000;
1406
+ opacity: 0;
1407
+ visibility: hidden;
1408
+ transition: all var(--transition-smooth);
1409
+ }
1410
+
1411
+ .modal-overlay.active {
1412
+ opacity: 1;
1413
+ visibility: visible;
1414
+ }
1415
+
1416
+ .modal {
1417
+ background: var(--bg-dark);
1418
+ border: 1px solid var(--glass-border);
1419
+ border-radius: var(--radius-xl);
1420
+ padding: 2rem;
1421
+ width: 100%;
1422
+ max-width: 400px;
1423
+ transform: scale(0.95) translateY(20px);
1424
+ transition: transform var(--transition-smooth);
1425
+ }
1426
+
1427
+ .modal-overlay.active .modal {
1428
+ transform: scale(1) translateY(0);
1429
+ }
1430
+
1431
+ .modal-header {
1432
+ text-align: center;
1433
+ margin-bottom: 1.5rem;
1434
+ }
1435
+
1436
+ .modal-logo {
1437
+ width: 50px;
1438
+ height: 50px;
1439
+ background: var(--bg-light);
1440
+ border: 1px solid var(--glass-border);
1441
+ border-radius: var(--radius-lg);
1442
+ display: flex;
1443
+ align-items: center;
1444
+ justify-content: center;
1445
+ font-size: 1.5rem;
1446
+ margin: 0 auto 1rem;
1447
+ }
1448
+
1449
+ .modal-title {
1450
+ font-size: 1.25rem;
1451
+ font-weight: 700;
1452
+ }
1453
+
1454
+ .modal-subtitle {
1455
+ font-size: 0.8rem;
1456
+ color: var(--text-muted);
1457
+ margin-top: 0.25rem;
1458
+ }
1459
+
1460
+ /* ==================== Auth Tabs ==================== */
1461
+ .role-tabs,
1462
+ .auth-tabs {
1463
+ display: flex;
1464
+ background: var(--bg-medium);
1465
+ border-radius: var(--radius-md);
1466
+ padding: 4px;
1467
+ margin-bottom: 1rem;
1468
+ }
1469
+
1470
+ .role-tab,
1471
+ .auth-tab {
1472
+ flex: 1;
1473
+ padding: 0.65rem;
1474
+ background: transparent;
1475
+ border: none;
1476
+ border-radius: var(--radius-sm);
1477
+ color: var(--text-muted);
1478
+ font-weight: 600;
1479
+ font-size: 0.85rem;
1480
+ cursor: pointer;
1481
+ transition: all var(--transition-fast);
1482
+ }
1483
+
1484
+ .role-tab.active,
1485
+ .auth-tab.active {
1486
+ background: var(--bg-light);
1487
+ color: var(--text-primary);
1488
+ }
1489
+
1490
+ .auth-tab.active {
1491
+ background: var(--accent-primary);
1492
+ color: var(--bg-darkest);
1493
+ }
1494
+
1495
+ /* ==================== Form Styles ==================== */
1496
+ .form-group {
1497
+ margin-bottom: 1rem;
1498
+ }
1499
+
1500
+ .form-label {
1501
+ display: block;
1502
+ font-size: 0.8rem;
1503
+ font-weight: 500;
1504
+ margin-bottom: 0.4rem;
1505
+ color: var(--text-secondary);
1506
+ }
1507
+
1508
+ .form-input {
1509
+ width: 100%;
1510
+ padding: 0.7rem 1rem;
1511
+ background: var(--bg-medium);
1512
+ border: 1px solid var(--glass-border);
1513
+ border-radius: var(--radius-md);
1514
+ color: var(--text-primary);
1515
+ font-size: 0.9rem;
1516
+ transition: all var(--transition-fast);
1517
+ }
1518
+
1519
+ .form-input:focus {
1520
+ outline: none;
1521
+ border-color: rgba(255, 255, 255, 0.2);
1522
+ background: var(--bg-light);
1523
+ }
1524
+
1525
+ .form-input::placeholder {
1526
+ color: var(--text-muted);
1527
+ }
1528
+
1529
+ .form-error {
1530
+ font-size: 0.8rem;
1531
+ color: var(--error);
1532
+ margin-top: 0.4rem;
1533
+ }
1534
+
1535
+ .auth-btn {
1536
+ width: 100%;
1537
+ padding: 0.8rem;
1538
+ margin-top: 0.5rem;
1539
+ }
1540
+
1541
+ .modal-actions {
1542
+ display: flex;
1543
+ gap: 0.75rem;
1544
+ margin-top: 1rem;
1545
+ }
1546
+
1547
+ .modal-actions .btn {
1548
+ flex: 1;
1549
+ }
1550
+
1551
+ /* ==================== Buttons ==================== */
1552
+ .btn {
1553
+ position: relative;
1554
+ padding: 0.6rem 1.2rem;
1555
+ border: none;
1556
+ border-radius: var(--radius-md);
1557
+ font-size: 0.85rem;
1558
+ font-weight: 600;
1559
+ cursor: pointer;
1560
+ transition: all var(--transition-fast);
1561
+ }
1562
+
1563
+ .btn-primary {
1564
+ background: var(--accent-primary);
1565
+ color: var(--bg-darkest);
1566
+ }
1567
+
1568
+ .btn-primary:hover {
1569
+ background: var(--accent-secondary);
1570
+ transform: translateY(-1px);
1571
+ }
1572
+
1573
+ .btn-secondary {
1574
+ background: var(--bg-light);
1575
+ border: 1px solid var(--glass-border);
1576
+ color: var(--text-primary);
1577
+ }
1578
+
1579
+ .btn-secondary:hover {
1580
+ background: var(--bg-hover);
1581
+ }
1582
+
1583
+ .btn-ghost {
1584
+ background: transparent;
1585
+ color: var(--text-muted);
1586
+ padding: 0.4rem;
1587
+ }
1588
+
1589
+ .btn-ghost:hover {
1590
+ color: var(--text-primary);
1591
+ background: var(--bg-light);
1592
+ }
1593
+
1594
+ .btn-logout {
1595
+ background: rgba(248, 113, 113, 0.15);
1596
+ color: #f87171;
1597
+ padding: 0.35rem 0.75rem;
1598
+ font-size: 0.75rem;
1599
+ border: 1px solid rgba(248, 113, 113, 0.3);
1600
+ }
1601
+
1602
+ .btn-logout:hover {
1603
+ background: rgba(248, 113, 113, 0.25);
1604
+ border-color: rgba(248, 113, 113, 0.5);
1605
+ }
1606
+
1607
+ /* ==================== Document Viewer Modal ==================== */
1608
+ .doc-viewer-modal {
1609
+ width: 90%;
1610
+ max-width: 900px;
1611
+ height: 80vh;
1612
+ padding: 0;
1613
+ display: flex;
1614
+ flex-direction: column;
1615
+ }
1616
+
1617
+ .doc-viewer-header {
1618
+ display: flex;
1619
+ justify-content: space-between;
1620
+ align-items: center;
1621
+ padding: 1rem 1.5rem;
1622
+ border-bottom: 1px solid var(--glass-border);
1623
+ }
1624
+
1625
+ .doc-viewer-header h3 {
1626
+ font-size: 1rem;
1627
+ font-weight: 600;
1628
+ white-space: nowrap;
1629
+ overflow: hidden;
1630
+ text-overflow: ellipsis;
1631
+ }
1632
+
1633
+ .doc-viewer-content {
1634
+ flex: 1;
1635
+ overflow: auto;
1636
+ display: flex;
1637
+ align-items: center;
1638
+ justify-content: center;
1639
+ padding: 1rem;
1640
+ background: var(--bg-medium);
1641
+ }
1642
+
1643
+ .doc-viewer-content iframe,
1644
+ .doc-viewer-content img {
1645
+ max-width: 100%;
1646
+ max-height: 100%;
1647
+ }
1648
+
1649
+ .doc-text-preview {
1650
+ width: 100%;
1651
+ height: 100%;
1652
+ overflow: auto;
1653
+ padding: 1rem;
1654
+ }
1655
+
1656
+ .doc-text-preview pre {
1657
+ white-space: pre-wrap;
1658
+ word-wrap: break-word;
1659
+ font-size: 0.85rem;
1660
+ line-height: 1.6;
1661
+ color: var(--text-secondary);
1662
+ }
1663
+
1664
+ /* ==================== Empty State ==================== */
1665
+ .empty-state {
1666
+ text-align: center;
1667
+ padding: 2rem;
1668
+ color: var(--text-muted);
1669
+ }
1670
+
1671
+ .empty-state.small {
1672
+ padding: 0.75rem;
1673
+ }
1674
+
1675
+ .empty-icon {
1676
+ font-size: 2rem;
1677
+ margin-bottom: 0.5rem;
1678
+ }
1679
+
1680
+ .empty-text {
1681
+ font-size: 0.8rem;
1682
+ }
1683
+
1684
+ .empty-state.small .empty-text {
1685
+ font-size: 0.75rem;
1686
+ }
1687
+
1688
+ /* ==================== Loading ==================== */
1689
+ .loading-spinner {
1690
+ width: 18px;
1691
+ height: 18px;
1692
+ border: 2px solid rgba(255, 255, 255, 0.2);
1693
+ border-top-color: var(--accent-primary);
1694
+ border-radius: 50%;
1695
+ animation: spin 0.7s linear infinite;
1696
+ }
1697
+
1698
+ @keyframes spin {
1699
+ to {
1700
+ transform: rotate(360deg);
1701
+ }
1702
+ }
1703
+
1704
+ /* ==================== Toast ==================== */
1705
+ .toast-container {
1706
+ position: fixed;
1707
+ bottom: 1.5rem;
1708
+ right: 1.5rem;
1709
+ z-index: 2000;
1710
+ display: flex;
1711
+ flex-direction: column;
1712
+ gap: 0.5rem;
1713
+ }
1714
+
1715
+ .toast {
1716
+ display: flex;
1717
+ align-items: center;
1718
+ gap: 0.75rem;
1719
+ padding: 0.75rem 1rem;
1720
+ background: var(--bg-dark);
1721
+ border: 1px solid var(--glass-border);
1722
+ border-radius: var(--radius-md);
1723
+ animation: toastSlide 0.3s ease-out;
1724
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.4);
1725
+ }
1726
+
1727
+ @keyframes toastSlide {
1728
+ from {
1729
+ opacity: 0;
1730
+ transform: translateX(50px);
1731
+ }
1732
+
1733
+ to {
1734
+ opacity: 1;
1735
+ transform: translateX(0);
1736
+ }
1737
+ }
1738
+
1739
+ .toast-message {
1740
+ font-size: 0.85rem;
1741
+ }
1742
+
1743
+ .toast-close {
1744
+ background: none;
1745
+ border: none;
1746
+ color: var(--text-muted);
1747
+ cursor: pointer;
1748
+ padding: 0.25rem;
1749
+ }
1750
+
1751
+ /* ==================== Utility Classes ==================== */
1752
+ .hidden {
1753
+ display: none !important;
1754
+ }
1755
+
1756
+ .flex {
1757
+ display: flex;
1758
+ }
1759
+
1760
+ .items-center {
1761
+ align-items: center;
1762
+ }
1763
+
1764
+ .gap-2 {
1765
+ gap: 0.5rem;
1766
+ }
1767
+
1768
+ .mt-3 {
1769
+ margin-top: 0.75rem;
1770
+ }
1771
+
1772
+ /* ==================== Document Summary Panel ==================== */
1773
+ .summary-panel {
1774
+ position: relative;
1775
+ background: linear-gradient(135deg, var(--bg-medium), var(--bg-light));
1776
+ border: 1px solid var(--glass-border);
1777
+ border-radius: var(--radius-lg);
1778
+ padding: 1.25rem;
1779
+ margin-bottom: 1rem;
1780
+ animation: summarySlideIn 0.3s ease-out;
1781
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
1782
+ }
1783
+
1784
+ @keyframes summarySlideIn {
1785
+ from {
1786
+ opacity: 0;
1787
+ transform: translateY(-10px);
1788
+ }
1789
+
1790
+ to {
1791
+ opacity: 1;
1792
+ transform: translateY(0);
1793
+ }
1794
+ }
1795
+
1796
+ .summary-header {
1797
+ display: flex;
1798
+ align-items: center;
1799
+ gap: 0.5rem;
1800
+ margin-bottom: 0.75rem;
1801
+ }
1802
+
1803
+ .summary-icon {
1804
+ font-size: 1.25rem;
1805
+ }
1806
+
1807
+ .summary-title {
1808
+ font-size: 0.9rem;
1809
+ font-weight: 600;
1810
+ color: var(--text-primary);
1811
+ flex: 1;
1812
+ white-space: nowrap;
1813
+ overflow: hidden;
1814
+ text-overflow: ellipsis;
1815
+ }
1816
+
1817
+ .summary-content {
1818
+ padding-right: 1.5rem;
1819
+ }
1820
+
1821
+ .summary-text {
1822
+ font-size: 0.9rem;
1823
+ line-height: 1.6;
1824
+ color: var(--text-secondary);
1825
+ }
1826
+
1827
+ .summary-close {
1828
+ position: absolute;
1829
+ top: 0.75rem;
1830
+ right: 0.75rem;
1831
+ background: none;
1832
+ border: none;
1833
+ color: var(--text-muted);
1834
+ cursor: pointer;
1835
+ padding: 0.25rem;
1836
+ font-size: 0.9rem;
1837
+ transition: color var(--transition-fast);
1838
+ opacity: 0.6;
1839
+ }
1840
+
1841
+ .summary-close:hover {
1842
+ color: var(--text-primary);
1843
+ opacity: 1;
1844
+ }
1845
+
1846
+ /* ==================== Selected Document State ==================== */
1847
+ .document-item.selected {
1848
+ background: var(--bg-light);
1849
+ border-color: var(--accent-primary);
1850
+ box-shadow: 0 0 0 1px rgba(255, 255, 255, 0.15);
1851
+ }
1852
+
1853
+ .document-item.selected::before {
1854
+ content: '';
1855
+ position: absolute;
1856
+ left: 0;
1857
+ top: 0;
1858
+ bottom: 0;
1859
+ width: 3px;
1860
+ background: var(--accent-primary);
1861
+ border-radius: var(--radius-sm) 0 0 var(--radius-sm);
1862
+ }
1863
+
1864
+ .document-item.selected .doc-name {
1865
+ color: var(--text-primary);
1866
+ font-weight: 600;
1867
+ }
1868
+
1869
+ /* ==================== Mobile Navigation Bar ==================== */
1870
+ .mobile-nav {
1871
+ display: none;
1872
+ position: fixed;
1873
+ bottom: 0;
1874
+ left: 0;
1875
+ right: 0;
1876
+ height: 70px;
1877
+ background: var(--bg-dark);
1878
+ border-top: 1px solid var(--glass-border);
1879
+ z-index: 1000;
1880
+ justify-content: space-around;
1881
+ align-items: center;
1882
+ padding: 0 1rem;
1883
+ padding-bottom: env(safe-area-inset-bottom, 0);
1884
+ box-shadow: 0 -4px 20px rgba(0, 0, 0, 0.3);
1885
+ }
1886
+
1887
+ .mobile-nav-btn {
1888
+ display: flex;
1889
+ flex-direction: column;
1890
+ align-items: center;
1891
+ justify-content: center;
1892
+ gap: 0.25rem;
1893
+ background: transparent;
1894
+ border: none;
1895
+ color: var(--text-muted);
1896
+ padding: 0.5rem 1.5rem;
1897
+ border-radius: var(--radius-md);
1898
+ cursor: pointer;
1899
+ transition: all var(--transition-fast);
1900
+ min-width: 70px;
1901
+ }
1902
+
1903
+ .mobile-nav-btn:active {
1904
+ transform: scale(0.95);
1905
+ }
1906
+
1907
+ .mobile-nav-btn.active {
1908
+ color: var(--accent-primary);
1909
+ }
1910
+
1911
+ .mobile-nav-btn .nav-icon {
1912
+ font-size: 1.5rem;
1913
+ line-height: 1;
1914
+ }
1915
+
1916
+ .mobile-nav-btn .nav-label {
1917
+ font-size: 0.65rem;
1918
+ font-weight: 600;
1919
+ text-transform: uppercase;
1920
+ letter-spacing: 0.5px;
1921
+ }
1922
+
1923
+ /* ==================== Mobile Backdrop ==================== */
1924
+ .mobile-backdrop {
1925
+ display: none;
1926
+ position: fixed;
1927
+ top: 0;
1928
+ left: 0;
1929
+ right: 0;
1930
+ bottom: 0;
1931
+ background: rgba(0, 0, 0, 0.7);
1932
+ z-index: 500;
1933
+ opacity: 0;
1934
+ visibility: hidden;
1935
+ transition: opacity var(--transition-smooth), visibility var(--transition-smooth);
1936
+ }
1937
+
1938
+ .mobile-backdrop.active {
1939
+ opacity: 1;
1940
+ visibility: visible;
1941
+ }
1942
+
1943
+ /* ==================== Tablet Breakpoint (768px - 1024px) ==================== */
1944
+ @media screen and (max-width: 1024px) {
1945
+ :root {
1946
+ --sidebar-width: 260px;
1947
+ }
1948
+
1949
+ .main-content {
1950
+ padding: 0.75rem;
1951
+ gap: 0.75rem;
1952
+ }
1953
+
1954
+ .sidebar-section {
1955
+ padding: 0.75rem;
1956
+ }
1957
+
1958
+ .chat-bucket-filter {
1959
+ padding: 0.6rem 1rem;
1960
+ gap: 0.5rem;
1961
+ }
1962
+
1963
+ .welcome-title {
1964
+ font-size: 1.25rem;
1965
+ }
1966
+
1967
+ .welcome-subtitle {
1968
+ font-size: 0.85rem;
1969
+ }
1970
+ }
1971
+
1972
+ /* ==================== Mobile Breakpoint (< 768px) ==================== */
1973
+ @media screen and (max-width: 768px) {
1974
+ :root {
1975
+ --sidebar-width: 85vw;
1976
+ --sidebar-collapsed: 0px;
1977
+ }
1978
+
1979
+ /* Show mobile navigation */
1980
+ .mobile-nav {
1981
+ display: flex;
1982
+ }
1983
+
1984
+ .mobile-backdrop {
1985
+ display: block;
1986
+ }
1987
+
1988
+ /* Ensure app container is above backdrop */
1989
+ .app-container {
1990
+ z-index: 600;
1991
+ }
1992
+
1993
+ /* Adjust main layout for mobile */
1994
+ .main-content {
1995
+ padding: 0;
1996
+ gap: 0;
1997
+ flex-direction: column;
1998
+ height: 100vh;
1999
+ overflow: hidden;
2000
+ }
2001
+
2002
+ /* ===== Off-Canvas Sidebars ===== */
2003
+ .sidebar {
2004
+ position: fixed;
2005
+ top: 0;
2006
+ bottom: 70px;
2007
+ /* Above mobile nav */
2008
+ width: var(--sidebar-width);
2009
+ max-width: 320px;
2010
+ z-index: 900;
2011
+ transition: transform var(--transition-smooth);
2012
+ border-radius: 0;
2013
+ background: var(--bg-dark);
2014
+ /* Solid background to prevent blur */
2015
+ box-shadow: 0 0 30px rgba(0, 0, 0, 0.5);
2016
+ }
2017
+
2018
+ .sidebar .sidebar-content {
2019
+ opacity: 1;
2020
+ pointer-events: auto;
2021
+ padding: 1rem;
2022
+ padding-bottom: 2rem;
2023
+ }
2024
+
2025
+ .sidebar-left {
2026
+ left: 0;
2027
+ transform: translateX(-100%);
2028
+ border-right: 1px solid var(--glass-border);
2029
+ }
2030
+
2031
+ .sidebar-left.mobile-open {
2032
+ transform: translateX(0);
2033
+ }
2034
+
2035
+ .sidebar-right {
2036
+ right: 0;
2037
+ transform: translateX(100%);
2038
+ border-left: 1px solid var(--glass-border);
2039
+ }
2040
+
2041
+ .sidebar-right.mobile-open {
2042
+ transform: translateX(0);
2043
+ }
2044
+
2045
+ /* Hide desktop sidebar toggles on mobile */
2046
+ .sidebar-toggle {
2047
+ display: none;
2048
+ }
2049
+
2050
+ /* ===== Chat Container Full Width ===== */
2051
+ .chat-container {
2052
+ border-radius: 0;
2053
+ border: none;
2054
+ height: calc(100vh - 70px);
2055
+ /* Full height minus mobile nav */
2056
+ display: flex;
2057
+ flex-direction: column;
2058
+ }
2059
+
2060
+ /* ===== Simplified Chat Header ===== */
2061
+ .chat-bucket-filter {
2062
+ padding: 0.75rem;
2063
+ gap: 0.5rem;
2064
+ flex-wrap: wrap;
2065
+ }
2066
+
2067
+ .filter-label {
2068
+ display: none;
2069
+ }
2070
+
2071
+ .chat-bucket-filter .custom-select.compact {
2072
+ flex: 1;
2073
+ min-width: 120px;
2074
+ }
2075
+
2076
+ .btn-new-chat,
2077
+ .btn-clear-chat {
2078
+ padding: 0.5rem 0.6rem;
2079
+ font-size: 0.7rem;
2080
+ }
2081
+
2082
+ .btn-new-chat {
2083
+ margin-left: 0;
2084
+ }
2085
+
2086
+ /* ===== Chat Messages ===== */
2087
+ .chat-messages {
2088
+ padding: 0.75rem;
2089
+ gap: 0.75rem;
2090
+ flex: 1;
2091
+ min-height: 0;
2092
+ }
2093
+
2094
+ .message {
2095
+ max-width: 92%;
2096
+ }
2097
+
2098
+ .message-content {
2099
+ padding: 0.875rem 1rem;
2100
+ font-size: 0.875rem;
2101
+ }
2102
+
2103
+ .message-avatar {
2104
+ width: 28px;
2105
+ height: 28px;
2106
+ font-size: 0.8rem;
2107
+ }
2108
+
2109
+ /* ===== Chat Input ===== */
2110
+ .chat-input-container {
2111
+ padding: 0.75rem;
2112
+ margin-bottom: 70px;
2113
+ /* Space for mobile nav */
2114
+ background: var(--bg-dark);
2115
+ border-top: 1px solid var(--glass-border);
2116
+ flex-shrink: 0;
2117
+ }
2118
+
2119
+ .chat-input {
2120
+ font-size: 16px;
2121
+ /* Prevents iOS zoom on focus */
2122
+ padding: 0.875rem 1rem;
2123
+ }
2124
+
2125
+ .send-btn {
2126
+ width: 48px;
2127
+ height: 48px;
2128
+ font-size: 1.2rem;
2129
+ }
2130
+
2131
+ /* ===== Welcome Screen ===== */
2132
+ .welcome-screen {
2133
+ padding: 1.5rem 1rem;
2134
+ }
2135
+
2136
+ .welcome-icon img {
2137
+ width: 160px !important;
2138
+ }
2139
+
2140
+ .welcome-title {
2141
+ font-size: 1.2rem;
2142
+ }
2143
+
2144
+ .welcome-subtitle {
2145
+ font-size: 0.85rem;
2146
+ max-width: 300px;
2147
+ }
2148
+
2149
+ .welcome-features {
2150
+ flex-wrap: wrap;
2151
+ justify-content: center;
2152
+ }
2153
+
2154
+ .feature-card {
2155
+ width: 85px;
2156
+ padding: 0.75rem;
2157
+ }
2158
+
2159
+ .feature-icon {
2160
+ font-size: 1.25rem;
2161
+ }
2162
+
2163
+ .feature-title {
2164
+ font-size: 0.7rem;
2165
+ }
2166
+
2167
+ /* ===== Modal Responsiveness ===== */
2168
+ .modal {
2169
+ width: 95%;
2170
+ max-width: none;
2171
+ margin: 1rem;
2172
+ padding: 1.5rem;
2173
+ max-height: 90vh;
2174
+ overflow-y: auto;
2175
+ }
2176
+
2177
+ .modal-title {
2178
+ font-size: 1.1rem;
2179
+ }
2180
+
2181
+ .modal-subtitle {
2182
+ font-size: 0.75rem;
2183
+ }
2184
+
2185
+ .form-input {
2186
+ font-size: 16px;
2187
+ /* Prevents iOS zoom */
2188
+ padding: 0.875rem 1rem;
2189
+ }
2190
+
2191
+ .auth-btn {
2192
+ padding: 1rem;
2193
+ font-size: 0.9rem;
2194
+ }
2195
+
2196
+ .modal-actions {
2197
+ flex-direction: column;
2198
+ }
2199
+
2200
+ /* ===== Document Viewer Modal ===== */
2201
+ .doc-viewer-modal {
2202
+ width: 100%;
2203
+ height: 100%;
2204
+ max-width: 100%;
2205
+ border-radius: 0;
2206
+ }
2207
+
2208
+ .doc-viewer-header {
2209
+ padding: 0.875rem 1rem;
2210
+ }
2211
+
2212
+ .doc-viewer-header h3 {
2213
+ font-size: 0.9rem;
2214
+ }
2215
+
2216
+ /* ===== Sidebar Content Adjustments ===== */
2217
+ .documents-section,
2218
+ .chat-history-section {
2219
+ max-height: none;
2220
+ flex: 0 0 auto;
2221
+ min-height: 0;
2222
+ }
2223
+
2224
+ .section-body {
2225
+ max-height: 80vh;
2226
+ /* Allow large lists to expand fully on mobile */
2227
+ }
2228
+
2229
+ .sidebar-section {
2230
+ padding: 1rem;
2231
+ }
2232
+
2233
+ .sidebar-title {
2234
+ font-size: 0.8rem;
2235
+ }
2236
+
2237
+ .user-section {
2238
+ padding: 1rem !important;
2239
+ }
2240
+
2241
+ .user-avatar {
2242
+ width: 36px;
2243
+ height: 36px;
2244
+ font-size: 0.9rem;
2245
+ }
2246
+
2247
+ .user-details span:first-child {
2248
+ font-size: 0.95rem;
2249
+ }
2250
+
2251
+ .user-role {
2252
+ font-size: 0.75rem;
2253
+ }
2254
+
2255
+ .btn-logout {
2256
+ padding: 0.4rem 0.8rem;
2257
+ font-size: 0.8rem;
2258
+ }
2259
+
2260
+ /* ===== Upload Zone ===== */
2261
+ .upload-zone {
2262
+ padding: 1.25rem;
2263
+ }
2264
+
2265
+ .upload-icon {
2266
+ font-size: 1.75rem;
2267
+ }
2268
+
2269
+ .upload-title {
2270
+ font-size: 0.85rem;
2271
+ }
2272
+
2273
+ /* ===== Document & Chat History Items ===== */
2274
+ .document-item {
2275
+ padding: 0.75rem;
2276
+ }
2277
+
2278
+ .doc-icon {
2279
+ width: 36px;
2280
+ height: 36px;
2281
+ }
2282
+
2283
+ .doc-name {
2284
+ font-size: 0.85rem;
2285
+ }
2286
+
2287
+ .doc-view,
2288
+ .doc-delete {
2289
+ opacity: 1;
2290
+ /* Always visible on mobile */
2291
+ padding: 0.5rem;
2292
+ font-size: 0.9rem;
2293
+ }
2294
+
2295
+ .chat-history-item {
2296
+ padding: 0.75rem;
2297
+ }
2298
+
2299
+ .chat-history-delete {
2300
+ opacity: 1;
2301
+ /* Always visible on mobile */
2302
+ }
2303
+
2304
+ /* ===== Bucket Items ===== */
2305
+ .bucket-item {
2306
+ padding: 0.75rem;
2307
+ }
2308
+
2309
+ .bucket-name {
2310
+ font-size: 0.9rem;
2311
+ }
2312
+
2313
+ .bucket-delete {
2314
+ opacity: 1;
2315
+ /* Always visible on mobile */
2316
+ }
2317
+
2318
+ /* ===== Custom Dropdowns ===== */
2319
+ .select-trigger {
2320
+ padding: 0.75rem 1rem;
2321
+ }
2322
+
2323
+ .select-value {
2324
+ font-size: 0.9rem;
2325
+ }
2326
+
2327
+ .select-option {
2328
+ padding: 0.875rem 1rem;
2329
+ font-size: 0.9rem;
2330
+ }
2331
+
2332
+ /* ===== Toast Notifications ===== */
2333
+ .toast-container {
2334
+ bottom: 80px;
2335
+ /* Above mobile nav */
2336
+ left: 1rem;
2337
+ right: 1rem;
2338
+ }
2339
+
2340
+ .toast {
2341
+ width: 100%;
2342
+ }
2343
+
2344
+ /* ===== Summary Panel ===== */
2345
+ .summary-panel {
2346
+ padding: 1rem;
2347
+ margin-bottom: 0.75rem;
2348
+ }
2349
+
2350
+ .summary-title {
2351
+ font-size: 0.85rem;
2352
+ }
2353
+
2354
+ .summary-text {
2355
+ font-size: 0.85rem;
2356
+ }
2357
+
2358
+ /* ===== Tables in Messages ===== */
2359
+ .message-content .table-wrapper {
2360
+ margin: 1rem -0.5rem;
2361
+ border-radius: var(--radius-md);
2362
+ overflow-x: auto;
2363
+ }
2364
+
2365
+ .message-content table {
2366
+ font-size: 0.8rem;
2367
+ min-width: 400px;
2368
+ }
2369
+
2370
+ .message-content th,
2371
+ .message-content td {
2372
+ padding: 0.6rem 0.75rem;
2373
+ }
2374
+
2375
+ /* ===== Code Blocks ===== */
2376
+ .message-content .code-block {
2377
+ padding: 0.875rem 1rem;
2378
+ font-size: 0.8rem;
2379
+ margin: 0.75rem -0.25rem;
2380
+ }
2381
+
2382
+ /* ===== Lists ===== */
2383
+ .message-content .formatted-list li {
2384
+ padding: 0.5rem 0.5rem 0.5rem 2rem;
2385
+ }
2386
+ }
2387
+
2388
+ /* ==================== Small Mobile (< 480px) ==================== */
2389
+ @media screen and (max-width: 480px) {
2390
+ .mobile-nav {
2391
+ height: 65px;
2392
+ padding: 0 0.5rem;
2393
+ }
2394
+
2395
+ .mobile-nav-btn {
2396
+ min-width: 60px;
2397
+ padding: 0.4rem 1rem;
2398
+ }
2399
+
2400
+ .mobile-nav-btn .nav-icon {
2401
+ font-size: 1.35rem;
2402
+ }
2403
+
2404
+ .mobile-nav-btn .nav-label {
2405
+ font-size: 0.6rem;
2406
+ }
2407
+
2408
+ .sidebar {
2409
+ max-width: 100%;
2410
+ width: 100%;
2411
+ }
2412
+
2413
+ .chat-bucket-filter {
2414
+ padding: 0.6rem;
2415
+ }
2416
+
2417
+ .message {
2418
+ max-width: 95%;
2419
+ }
2420
+
2421
+ .message-content {
2422
+ padding: 0.75rem 0.875rem;
2423
+ font-size: 0.85rem;
2424
+ }
2425
+
2426
+ .message-avatar {
2427
+ width: 26px;
2428
+ height: 26px;
2429
+ }
2430
+
2431
+ .welcome-icon img {
2432
+ width: 140px !important;
2433
+ }
2434
+
2435
+ .welcome-title {
2436
+ font-size: 1.1rem;
2437
+ }
2438
+
2439
+ .welcome-subtitle {
2440
+ font-size: 0.8rem;
2441
+ }
2442
+
2443
+ .modal {
2444
+ padding: 1.25rem;
2445
+ margin: 0.5rem;
2446
+ }
2447
+
2448
+ .role-tabs,
2449
+ .auth-tabs {
2450
+ padding: 3px;
2451
+ }
2452
+
2453
+ .role-tab,
2454
+ .auth-tab {
2455
+ padding: 0.6rem 0.5rem;
2456
+ font-size: 0.8rem;
2457
+ }
2458
+
2459
+ .form-label {
2460
+ font-size: 0.75rem;
2461
+ }
2462
+ }
2463
+
2464
+ /* ==================== Landscape Mobile ==================== */
2465
+ @media screen and (max-width: 768px) and (orientation: landscape) {
2466
+ .mobile-nav {
2467
+ height: 55px;
2468
+ }
2469
+
2470
+ .main-content {
2471
+ padding-bottom: 55px;
2472
+ }
2473
+
2474
+ .sidebar {
2475
+ bottom: 55px;
2476
+ }
2477
+
2478
+ .mobile-nav-btn .nav-label {
2479
+ display: none;
2480
+ }
2481
+
2482
+ .mobile-nav-btn .nav-icon {
2483
+ font-size: 1.5rem;
2484
+ }
2485
+
2486
+ .welcome-screen {
2487
+ padding: 1rem;
2488
+ flex-direction: row;
2489
+ gap: 2rem;
2490
+ }
2491
+
2492
+ .welcome-icon,
2493
+ .welcome-title,
2494
+ .welcome-subtitle {
2495
+ margin: 0;
2496
+ }
2497
+ }
2498
+
2499
+ /* ==================== Touch Device Optimizations ==================== */
2500
+ @media (hover: none) and (pointer: coarse) {
2501
+
2502
+ /* Larger touch targets */
2503
+ .btn {
2504
+ min-height: 44px;
2505
+ }
2506
+
2507
+ .btn-ghost {
2508
+ min-width: 44px;
2509
+ min-height: 44px;
2510
+ }
2511
+
2512
+ .document-item,
2513
+ .bucket-item,
2514
+ .chat-history-item {
2515
+ min-height: 48px;
2516
+ }
2517
+
2518
+ /* Remove hover effects that don't work on touch */
2519
+ .upload-zone:hover {
2520
+ transform: none;
2521
+ }
2522
+
2523
+ .feature-card:hover {
2524
+ transform: none;
2525
+ }
2526
+
2527
+ /* Always show action buttons */
2528
+ .doc-view,
2529
+ .doc-delete,
2530
+ .bucket-delete,
2531
+ .chat-history-delete {
2532
+ opacity: 1;
2533
+ }
2534
+ }
2535
+
2536
+ /* ==================== Safe Area Support (iPhone X+) ==================== */
2537
+ @supports (padding-bottom: env(safe-area-inset-bottom)) {
2538
+ .mobile-nav {
2539
+ padding-bottom: env(safe-area-inset-bottom);
2540
+ height: calc(70px + env(safe-area-inset-bottom));
2541
+ }
2542
+
2543
+ @media screen and (max-width: 768px) {
2544
+ .main-content {
2545
+ padding-bottom: calc(70px + env(safe-area-inset-bottom));
2546
+ }
2547
+
2548
+ .sidebar {
2549
+ bottom: calc(70px + env(safe-area-inset-bottom));
2550
+ }
2551
+ }
2552
+ }
2553
+
2554
+ /* ==================== Reduced Motion ==================== */
2555
+ @media (prefers-reduced-motion: reduce) {
2556
+
2557
+ .sidebar,
2558
+ .mobile-backdrop,
2559
+ .modal,
2560
+ .message {
2561
+ transition: none;
2562
+ }
2563
+
2564
+ .typing-dot {
2565
+ animation: none;
2566
+ }
2567
+ }
static/images/WhatsApp Image 2025-12-23 at 5.10.00 PM.jpeg ADDED
static/index.html ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <meta name="description" content="AI-powered document intelligence platform with bucket organization.">
8
+ <title>Iribl AI - Document Intelligence</title>
9
+
10
+ <link rel="preconnect" href="https://fonts.googleapis.com">
11
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
12
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap" rel="stylesheet">
13
+ <link rel="stylesheet" href="/css/styles.css">
14
+ <link rel="icon" type="image/svg+xml"
15
+ href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🧠</text></svg>">
16
+ </head>
17
+
18
+ <body>
19
+ <!-- Toast Container -->
20
+ <div class="toast-container" id="toastContainer"></div>
21
+
22
+ <!-- Mobile Backdrop Overlay -->
23
+ <div class="mobile-backdrop" id="mobileBackdrop"></div>
24
+
25
+ <!-- Mobile Bottom Navigation -->
26
+ <nav class="mobile-nav" id="mobileNav">
27
+ <button class="mobile-nav-btn" id="mobileLeftToggle" title="Menu">
28
+ <span class="nav-icon">☰</span>
29
+ <span class="nav-label">Menu</span>
30
+ </button>
31
+ <button class="mobile-nav-btn active" id="mobileChatToggle" title="Chat">
32
+ <span class="nav-icon">💬</span>
33
+ <span class="nav-label">Chat</span>
34
+ </button>
35
+ <button class="mobile-nav-btn" id="mobileRightToggle" title="Documents">
36
+ <span class="nav-icon">📚</span>
37
+ <span class="nav-label">Docs</span>
38
+ </button>
39
+ </nav>
40
+
41
+ <!-- Auth Modal -->
42
+ <div class="modal-overlay" id="authModal">
43
+ <div class="modal glass-panel">
44
+ <div class="modal-header">
45
+ <div class="modal-logo">🧠</div>
46
+ <h2 class="modal-title">Welcome to Iribl AI</h2>
47
+ <p class="modal-subtitle">Your intelligent document companion</p>
48
+ </div>
49
+
50
+ <div class="role-tabs">
51
+ <button class="role-tab active" data-role="admin">👔 Admin</button>
52
+ <button class="role-tab" data-role="employee">Employee</button>
53
+ </div>
54
+
55
+ <div class="auth-tabs" id="authTabs">
56
+ <button class="auth-tab active" data-tab="login">Sign In</button>
57
+ <button class="auth-tab" data-tab="register">Sign Up</button>
58
+ </div>
59
+
60
+ <form id="loginForm" class="auth-form">
61
+ <div class="form-group">
62
+ <label class="form-label">Username</label>
63
+ <input type="text" class="form-input" name="username" placeholder="Enter your username" required>
64
+ </div>
65
+ <div class="form-group">
66
+ <label class="form-label">Password</label>
67
+ <input type="password" class="form-input" name="password" placeholder="Enter your password"
68
+ required>
69
+ </div>
70
+ <div id="loginError" class="form-error hidden"></div>
71
+ <button type="submit" class="btn btn-primary auth-btn">
72
+ <span class="btn-text">Sign In</span>
73
+ <span class="btn-loader hidden">
74
+ <div class="loading-spinner"></div>
75
+ </span>
76
+ </button>
77
+ </form>
78
+
79
+ <form id="registerForm" class="auth-form hidden">
80
+ <div class="form-group">
81
+ <label class="form-label">Username</label>
82
+ <input type="text" class="form-input" name="username" placeholder="Choose a username" required
83
+ minlength="3">
84
+ </div>
85
+ <div class="form-group">
86
+ <label class="form-label">Email (optional)</label>
87
+ <input type="email" class="form-input" name="email" placeholder="your@email.com">
88
+ </div>
89
+ <div class="form-group">
90
+ <label class="form-label">Password</label>
91
+ <input type="password" class="form-input" name="password" placeholder="Create a password" required
92
+ minlength="6">
93
+ </div>
94
+ <div id="registerError" class="form-error hidden"></div>
95
+ <button type="submit" class="btn btn-primary auth-btn">
96
+ <span class="btn-text">Create Admin Account</span>
97
+ <span class="btn-loader hidden">
98
+ <div class="loading-spinner"></div>
99
+ </span>
100
+ </button>
101
+ </form>
102
+
103
+ <form id="employeeLoginForm" class="auth-form hidden">
104
+ <div class="form-group">
105
+ <label class="form-label">Email</label>
106
+ <input type="email" class="form-input" name="email" placeholder="Enter your work email" required>
107
+ </div>
108
+ <div class="form-group">
109
+ <label class="form-label">Password</label>
110
+ <input type="password" class="form-input" name="password" placeholder="Enter your password"
111
+ required>
112
+ </div>
113
+ <div id="employeeLoginError" class="form-error hidden"></div>
114
+ <button type="submit" class="btn btn-primary auth-btn">
115
+ <span class="btn-text">Sign In as Employee</span>
116
+ <span class="btn-loader hidden">
117
+ <div class="loading-spinner"></div>
118
+ </span>
119
+ </button>
120
+ </form>
121
+ </div>
122
+ </div>
123
+
124
+ <!-- Add Employee Modal -->
125
+ <div class="modal-overlay" id="addEmployeeModal">
126
+ <div class="modal glass-panel">
127
+ <div class="modal-header">
128
+ <h2 class="modal-title">➕ Add Employee</h2>
129
+ <p class="modal-subtitle">Create login credentials for a new employee</p>
130
+ </div>
131
+ <form id="addEmployeeForm" class="auth-form">
132
+ <div class="form-group">
133
+ <label class="form-label">Employee Email</label>
134
+ <input type="email" class="form-input" name="email" placeholder="employee@company.com" required>
135
+ </div>
136
+ <div class="form-group">
137
+ <label class="form-label">Password</label>
138
+ <input type="password" class="form-input" name="password" placeholder="Create a password" required
139
+ minlength="6">
140
+ </div>
141
+ <div id="addEmployeeError" class="form-error hidden"></div>
142
+ <div class="modal-actions">
143
+ <button type="button" class="btn btn-secondary" id="cancelAddEmployee">Cancel</button>
144
+ <button type="submit" class="btn btn-primary">
145
+ <span class="btn-text">Add Employee</span>
146
+ <span class="btn-loader hidden">
147
+ <div class="loading-spinner"></div>
148
+ </span>
149
+ </button>
150
+ </div>
151
+ </form>
152
+ </div>
153
+ </div>
154
+
155
+ <!-- Create Bucket Modal -->
156
+ <div class="modal-overlay" id="createBucketModal">
157
+ <div class="modal glass-panel">
158
+ <div class="modal-header">
159
+ <h2 class="modal-title">📁 Create Bucket</h2>
160
+ <p class="modal-subtitle">Organize your documents into buckets</p>
161
+ </div>
162
+ <form id="createBucketForm" class="auth-form">
163
+ <div class="form-group">
164
+ <label class="form-label">Bucket Name</label>
165
+ <input type="text" class="form-input" name="name" placeholder="e.g., Project Alpha" required>
166
+ </div>
167
+ <div class="form-group">
168
+ <label class="form-label">Description (optional)</label>
169
+ <input type="text" class="form-input" name="description" placeholder="Brief description...">
170
+ </div>
171
+ <div id="createBucketError" class="form-error hidden"></div>
172
+ <div class="modal-actions">
173
+ <button type="button" class="btn btn-secondary" id="cancelCreateBucket">Cancel</button>
174
+ <button type="submit" class="btn btn-primary">
175
+ <span class="btn-text">Create Bucket</span>
176
+ <span class="btn-loader hidden">
177
+ <div class="loading-spinner"></div>
178
+ </span>
179
+ </button>
180
+ </div>
181
+ </form>
182
+ </div>
183
+ </div>
184
+
185
+ <!-- Document Viewer Modal -->
186
+ <div class="modal-overlay" id="docViewerModal">
187
+ <div class="modal glass-panel doc-viewer-modal">
188
+ <div class="doc-viewer-header">
189
+ <h3 id="docViewerTitle">Document</h3>
190
+ <button class="btn btn-ghost" id="closeDocViewer">✕</button>
191
+ </div>
192
+ <div class="doc-viewer-content" id="docViewerContent">
193
+ <div class="loading-spinner"></div>
194
+ </div>
195
+ </div>
196
+ </div>
197
+
198
+ <!-- Main App Container -->
199
+ <div class="app-container" id="appContainer">
200
+ <main class="main-content">
201
+ <!-- LEFT SIDEBAR -->
202
+ <aside class="sidebar sidebar-left" id="leftSidebar">
203
+ <div class="sidebar-toggle" id="leftToggle" title="Toggle sidebar">
204
+ <span class="toggle-icon">◀</span>
205
+ </div>
206
+
207
+ <div class="sidebar-content">
208
+ <!-- User Info -->
209
+ <section class="sidebar-section glass-panel user-section">
210
+ <div class="user-info-row">
211
+ <div class="user-badge">
212
+ <div class="user-avatar" id="userAvatar">U</div>
213
+ <div class="user-details">
214
+ <span id="userName">User</span>
215
+ <span class="user-role" id="userRole">Admin</span>
216
+ </div>
217
+ </div>
218
+ <button class="btn btn-logout" id="logoutBtn" title="Sign Out">logout</button>
219
+ </div>
220
+ </section>
221
+
222
+ <!-- Admin: Employees -->
223
+ <section class="sidebar-section glass-panel collapsible hidden" id="adminSection">
224
+ <div class="section-header" data-target="employeesList">
225
+ <h3 class="sidebar-title"><span></span> Employees</h3>
226
+ <div class="section-actions">
227
+ <button class="btn btn-ghost" id="addEmployeeBtn" title="Add">➕</button>
228
+ <span class="collapse-icon">▼</span>
229
+ </div>
230
+ </div>
231
+ <div class="section-body" id="employeesList">
232
+ <div class="empty-state small">
233
+ <div class="empty-text">No employees</div>
234
+ </div>
235
+ </div>
236
+ </section>
237
+
238
+ <!-- Buckets -->
239
+ <section class="sidebar-section glass-panel collapsible">
240
+ <div class="section-header" data-target="bucketsBody">
241
+ <h3 class="sidebar-title"><span>📁</span> Buckets</h3>
242
+ <div class="section-actions">
243
+ <button class="btn btn-ghost" id="createBucketBtn" title="Create">➕</button>
244
+ <span class="collapse-icon">▼</span>
245
+ </div>
246
+ </div>
247
+ <div class="section-body" id="bucketsBody">
248
+ <div class="buckets-list" id="bucketsList">
249
+ <div class="bucket-item active" data-id="">
250
+ <span class="bucket-name">📂 All Documents</span>
251
+ </div>
252
+ </div>
253
+ </div>
254
+ </section>
255
+
256
+ <!-- Upload -->
257
+ <section class="sidebar-section glass-panel collapsible">
258
+ <div class="section-header" data-target="uploadBody">
259
+ <h3 class="sidebar-title"><span></span> Upload</h3>
260
+ <span class="collapse-icon">▼</span>
261
+ </div>
262
+ <div class="section-body" id="uploadBody">
263
+ <div class="custom-select" id="uploadBucketWrapper">
264
+ <div class="select-trigger" id="uploadBucketTrigger">
265
+ <span class="select-value">No Bucket (General)</span>
266
+ <span class="select-arrow">▼</span>
267
+ </div>
268
+ <div class="select-options" id="uploadBucketOptions"></div>
269
+ <input type="hidden" id="uploadBucketSelect" value="">
270
+ </div>
271
+ <div class="upload-zone" id="uploadZone">
272
+ <input type="file" id="fileInput" hidden multiple
273
+ accept=".pdf,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md,.png,.jpg,.jpeg,.gif,.webp">
274
+ <div class="upload-icon">📁</div>
275
+ <div class="upload-title">Drop files here</div>
276
+ <div class="upload-subtitle">or click to browse</div>
277
+ </div>
278
+ <div id="uploadProgress" class="hidden">
279
+ <div class="progress-info">
280
+ <div class="loading-spinner"></div>
281
+ <span id="uploadStatus">Uploading...</span>
282
+ </div>
283
+ <div class="progress-bar">
284
+ <div class="progress-fill" id="progressFill"></div>
285
+ </div>
286
+ <button class="btn btn-cancel-upload" id="cancelUploadBtn" title="Cancel Upload">✕
287
+ Cancel</button>
288
+ </div>
289
+ </div>
290
+ </section>
291
+ </div>
292
+ </aside>
293
+
294
+ <!-- CHAT AREA (CENTER) -->
295
+ <section class="chat-container glass-panel">
296
+ <!-- Bucket Filter -->
297
+ <div class="chat-bucket-filter">
298
+ <span class="filter-label">🔍 Querying:</span>
299
+ <div class="custom-select compact" id="chatBucketWrapper">
300
+ <div class="select-trigger" id="chatBucketTrigger">
301
+ <span class="select-value">All Documents</span>
302
+ <span class="select-arrow">▼</span>
303
+ </div>
304
+ <div class="select-options" id="chatBucketOptions"></div>
305
+ <input type="hidden" id="chatBucketSelect" value="">
306
+ </div>
307
+ <button class="btn btn-new-chat" id="newChatBtn" title="Start New Chat">➕ New Chat</button>
308
+ <button class="btn btn-clear-chat" id="clearChatBtnTop" title="Clear Current Chat">Clear</button>
309
+ </div>
310
+
311
+ <!-- Messages -->
312
+ <div class="chat-messages" id="chatMessages">
313
+ <!-- Document Summary Panel -->
314
+ <div class="summary-panel hidden" id="summaryPanel">
315
+ <div class="summary-header">
316
+ <span class="summary-icon">📄</span>
317
+ <span class="summary-title" id="summaryTitle">Document Summary</span>
318
+ </div>
319
+ <div class="summary-content" id="summaryContent">
320
+ <div class="summary-text" id="summaryText"></div>
321
+ </div>
322
+ <button class="summary-close" id="summaryClose" title="Close summary">✕</button>
323
+ </div>
324
+
325
+ <div class="welcome-screen" id="welcomeScreen">
326
+ <div class="welcome-icon"><img src="/images/WhatsApp Image 2025-12-23 at 5.10.00 PM.jpeg"
327
+ alt="Logo"
328
+ style="width: 220px; height: auto; filter: invert(1); mix-blend-mode: lighten;"></div>
329
+ <h2 class="welcome-title">Welcome to Iribl AI</h2>
330
+ <p class="welcome-subtitle">
331
+ Upload documents, organize into buckets, and ask questions.
332
+ </p>
333
+ </div>
334
+ </div>
335
+
336
+ <!-- Typing Indicator -->
337
+ <div class="typing-indicator hidden" id="typingIndicator">
338
+ <div class="message-avatar">🧠</div>
339
+ <div class="typing-dots">
340
+ <div class="typing-dot"></div>
341
+ <div class="typing-dot"></div>
342
+ <div class="typing-dot"></div>
343
+ </div>
344
+ </div>
345
+
346
+ <!-- Chat Input -->
347
+ <div class="chat-input-container">
348
+ <div class="chat-input-wrapper">
349
+ <textarea class="chat-input" id="chatInput" placeholder="Ask anything about your documents..."
350
+ rows="1"></textarea>
351
+ <button class="send-btn" id="sendBtn" disabled title="Send">➤</button>
352
+ <button class="stop-btn hidden" id="stopBtn" title="Stop generating">■</button>
353
+ </div>
354
+ </div>
355
+ </section>
356
+
357
+ <!-- RIGHT SIDEBAR -->
358
+ <aside class="sidebar sidebar-right" id="rightSidebar">
359
+ <div class="sidebar-toggle" id="rightToggle" title="Toggle sidebar">
360
+ <span class="toggle-icon">▶</span>
361
+ </div>
362
+
363
+ <div class="sidebar-content">
364
+ <!-- Documents -->
365
+ <section class="sidebar-section glass-panel documents-section collapsible">
366
+ <div class="section-header" data-target="documentsBody">
367
+ <h3 class="sidebar-title">
368
+ <span>📚</span> Documents
369
+ <span id="docCount" class="doc-count">(0)</span>
370
+ </h3>
371
+ <span class="collapse-icon">▼</span>
372
+ </div>
373
+ <div class="section-body documents-body" id="documentsBody">
374
+ <div class="documents-list" id="documentsList">
375
+ <div class="empty-state">
376
+ <div class="empty-icon">📭</div>
377
+ <div class="empty-text">No documents yet</div>
378
+ </div>
379
+ </div>
380
+ </div>
381
+ </section>
382
+
383
+ <!-- Chat History -->
384
+ <section class="sidebar-section glass-panel chat-history-section collapsible">
385
+ <div class="section-header" data-target="chatHistoryBody">
386
+ <h3 class="sidebar-title">
387
+ <span>💬</span> Chat History
388
+ <span id="chatHistoryCount" class="doc-count">(0)</span>
389
+ </h3>
390
+ <div class="section-actions">
391
+ <button class="btn btn-ghost" id="clearChatBtn" title="Clear current chat">🗑️</button>
392
+ <span class="collapse-icon">▼</span>
393
+ </div>
394
+ </div>
395
+ <div class="section-body chat-history-body" id="chatHistoryBody">
396
+ <div class="chat-history-list" id="chatHistoryList">
397
+ <div class="empty-state small">
398
+ <div class="empty-text">No chats yet</div>
399
+ </div>
400
+ </div>
401
+ </div>
402
+ </section>
403
+ </div>
404
+ </aside>
405
+ </main>
406
+ </div>
407
+
408
+ <script src="/js/app.js"></script>
409
+ </body>
410
+
411
+ </html>
static/js/app.js ADDED
@@ -0,0 +1,1798 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Iribl AI - Document Intelligence Application
3
+ * With Dual Sidebars, Collapsible Sections, and Animated Dropdowns
4
+ */
5
+
6
+ // ==================== App State ====================
7
+ const state = {
8
+ token: localStorage.getItem('Iribl AI_token'),
9
+ user: JSON.parse(localStorage.getItem('Iribl AI_user') || 'null'),
10
+ documents: [],
11
+ buckets: [],
12
+ employees: [],
13
+ messages: [],
14
+ summaries: {}, // doc_id -> summary text cache
15
+ selectedDocument: null, // Currently selected document for summary display
16
+ selectedBucket: '',
17
+ chatBucket: '',
18
+ isLoading: false,
19
+ currentRole: 'admin',
20
+ // Chat History
21
+ chatHistory: JSON.parse(localStorage.getItem('Iribl AI_chat_history') || '[]'),
22
+ currentChatId: null,
23
+ // Upload cancellation
24
+ uploadCancelled: false,
25
+ currentUploadAbortController: null,
26
+ // Stream abort controller for stopping generation
27
+ streamAbortController: null
28
+ };
29
+
30
+ // ==================== DOM Elements ====================
31
+ const elements = {
32
+ // Auth
33
+ authModal: document.getElementById('authModal'),
34
+ loginForm: document.getElementById('loginForm'),
35
+ registerForm: document.getElementById('registerForm'),
36
+ employeeLoginForm: document.getElementById('employeeLoginForm'),
37
+ authTabs: document.getElementById('authTabs'),
38
+ loginError: document.getElementById('loginError'),
39
+ registerError: document.getElementById('registerError'),
40
+ employeeLoginError: document.getElementById('employeeLoginError'),
41
+
42
+ // Modals
43
+ addEmployeeModal: document.getElementById('addEmployeeModal'),
44
+ addEmployeeForm: document.getElementById('addEmployeeForm'),
45
+ addEmployeeError: document.getElementById('addEmployeeError'),
46
+ addEmployeeBtn: document.getElementById('addEmployeeBtn'),
47
+ cancelAddEmployee: document.getElementById('cancelAddEmployee'),
48
+ createBucketModal: document.getElementById('createBucketModal'),
49
+ createBucketForm: document.getElementById('createBucketForm'),
50
+ createBucketError: document.getElementById('createBucketError'),
51
+ createBucketBtn: document.getElementById('createBucketBtn'),
52
+ cancelCreateBucket: document.getElementById('cancelCreateBucket'),
53
+ docViewerModal: document.getElementById('docViewerModal'),
54
+ docViewerTitle: document.getElementById('docViewerTitle'),
55
+ docViewerContent: document.getElementById('docViewerContent'),
56
+ closeDocViewer: document.getElementById('closeDocViewer'),
57
+
58
+ // Sidebars
59
+ leftSidebar: document.getElementById('leftSidebar'),
60
+ rightSidebar: document.getElementById('rightSidebar'),
61
+ leftToggle: document.getElementById('leftToggle'),
62
+ rightToggle: document.getElementById('rightToggle'),
63
+
64
+ // App
65
+ appContainer: document.getElementById('appContainer'),
66
+ userName: document.getElementById('userName'),
67
+ userAvatar: document.getElementById('userAvatar'),
68
+ userRole: document.getElementById('userRole'),
69
+ logoutBtn: document.getElementById('logoutBtn'),
70
+
71
+ // Admin
72
+ adminSection: document.getElementById('adminSection'),
73
+ employeesList: document.getElementById('employeesList'),
74
+
75
+ // Buckets
76
+ bucketsList: document.getElementById('bucketsList'),
77
+
78
+ // Custom Dropdowns
79
+ uploadBucketWrapper: document.getElementById('uploadBucketWrapper'),
80
+ uploadBucketTrigger: document.getElementById('uploadBucketTrigger'),
81
+ uploadBucketOptions: document.getElementById('uploadBucketOptions'),
82
+ uploadBucketSelect: document.getElementById('uploadBucketSelect'),
83
+ chatBucketWrapper: document.getElementById('chatBucketWrapper'),
84
+ chatBucketTrigger: document.getElementById('chatBucketTrigger'),
85
+ chatBucketOptions: document.getElementById('chatBucketOptions'),
86
+ chatBucketSelect: document.getElementById('chatBucketSelect'),
87
+
88
+ // Upload
89
+ uploadZone: document.getElementById('uploadZone'),
90
+ fileInput: document.getElementById('fileInput'),
91
+ uploadProgress: document.getElementById('uploadProgress'),
92
+ uploadStatus: document.getElementById('uploadStatus'),
93
+ progressFill: document.getElementById('progressFill'),
94
+ cancelUploadBtn: document.getElementById('cancelUploadBtn'),
95
+
96
+ // Documents
97
+ documentsList: document.getElementById('documentsList'),
98
+ docCount: document.getElementById('docCount'),
99
+
100
+ // Chat
101
+ chatMessages: document.getElementById('chatMessages'),
102
+ welcomeScreen: document.getElementById('welcomeScreen'),
103
+ chatInput: document.getElementById('chatInput'),
104
+ sendBtn: document.getElementById('sendBtn'),
105
+ stopBtn: document.getElementById('stopBtn'),
106
+ typingIndicator: document.getElementById('typingIndicator'),
107
+ toastContainer: document.getElementById('toastContainer'),
108
+
109
+ // Summary Panel
110
+ summaryPanel: document.getElementById('summaryPanel'),
111
+ summaryTitle: document.getElementById('summaryTitle'),
112
+ summaryText: document.getElementById('summaryText'),
113
+ summaryClose: document.getElementById('summaryClose'),
114
+
115
+ // Chat History
116
+ newChatBtn: document.getElementById('newChatBtn'),
117
+ clearChatBtn: document.getElementById('clearChatBtn'),
118
+ clearChatBtnTop: document.getElementById('clearChatBtnTop'),
119
+ chatHistoryList: document.getElementById('chatHistoryList'),
120
+ chatHistoryCount: document.getElementById('chatHistoryCount'),
121
+
122
+ // Mobile Navigation
123
+ mobileNav: document.getElementById('mobileNav'),
124
+ mobileBackdrop: document.getElementById('mobileBackdrop'),
125
+ mobileLeftToggle: document.getElementById('mobileLeftToggle'),
126
+ mobileChatToggle: document.getElementById('mobileChatToggle'),
127
+ mobileRightToggle: document.getElementById('mobileRightToggle')
128
+ };
129
+
130
+ // ==================== Toast ====================
131
+ function showToast(message, type = 'info') {
132
+ const icons = { success: '✅', error: '❌', info: 'ℹ️' };
133
+ const toast = document.createElement('div');
134
+ toast.className = `toast ${type}`;
135
+ toast.innerHTML = `<span class="toast-icon">${icons[type]}</span><span class="toast-message">${message}</span><button class="toast-close">✕</button>`;
136
+ elements.toastContainer.appendChild(toast);
137
+ toast.querySelector('.toast-close').addEventListener('click', () => toast.remove());
138
+ setTimeout(() => { if (toast.parentElement) toast.remove(); }, 4000);
139
+ }
140
+
141
+ // ==================== Sidebar Toggle ====================
142
+ function initSidebars() {
143
+ elements.leftToggle.addEventListener('click', () => {
144
+ elements.leftSidebar.classList.toggle('collapsed');
145
+ const icon = elements.leftToggle.querySelector('.toggle-icon');
146
+ icon.textContent = elements.leftSidebar.classList.contains('collapsed') ? '▶' : '◀';
147
+ });
148
+
149
+ elements.rightToggle.addEventListener('click', () => {
150
+ elements.rightSidebar.classList.toggle('collapsed');
151
+ const icon = elements.rightToggle.querySelector('.toggle-icon');
152
+ icon.textContent = elements.rightSidebar.classList.contains('collapsed') ? '◀' : '▶';
153
+ });
154
+ }
155
+
156
+ // ==================== Mobile Navigation ====================
157
+ function initMobileNavigation() {
158
+ // Check if we're on mobile
159
+ const isMobile = () => window.innerWidth <= 768;
160
+
161
+ // Close all sidebars on mobile
162
+ function closeMobileSidebars() {
163
+ elements.leftSidebar.classList.remove('mobile-open');
164
+ elements.rightSidebar.classList.remove('mobile-open');
165
+ elements.mobileBackdrop.classList.remove('active');
166
+ document.body.style.overflow = '';
167
+
168
+ // Reset nav button active states
169
+ elements.mobileLeftToggle.classList.remove('active');
170
+ elements.mobileRightToggle.classList.remove('active');
171
+ elements.mobileChatToggle.classList.add('active');
172
+ }
173
+
174
+ // Open left sidebar (Menu)
175
+ function openLeftSidebar() {
176
+ closeMobileSidebars();
177
+ elements.leftSidebar.classList.add('mobile-open');
178
+ elements.mobileBackdrop.classList.add('active');
179
+ document.body.style.overflow = 'hidden';
180
+
181
+ elements.mobileLeftToggle.classList.add('active');
182
+ elements.mobileChatToggle.classList.remove('active');
183
+ }
184
+
185
+ // Open right sidebar (Docs)
186
+ function openRightSidebar() {
187
+ closeMobileSidebars();
188
+ elements.rightSidebar.classList.add('mobile-open');
189
+ elements.mobileBackdrop.classList.add('active');
190
+ document.body.style.overflow = 'hidden';
191
+
192
+ elements.mobileRightToggle.classList.add('active');
193
+ elements.mobileChatToggle.classList.remove('active');
194
+ }
195
+
196
+ // Mobile nav button handlers
197
+ elements.mobileLeftToggle.addEventListener('click', () => {
198
+ if (elements.leftSidebar.classList.contains('mobile-open')) {
199
+ closeMobileSidebars();
200
+ } else {
201
+ openLeftSidebar();
202
+ }
203
+ });
204
+
205
+ elements.mobileChatToggle.addEventListener('click', () => {
206
+ closeMobileSidebars();
207
+ });
208
+
209
+ elements.mobileRightToggle.addEventListener('click', () => {
210
+ if (elements.rightSidebar.classList.contains('mobile-open')) {
211
+ closeMobileSidebars();
212
+ } else {
213
+ openRightSidebar();
214
+ }
215
+ });
216
+
217
+ // Close sidebar when backdrop is clicked
218
+ elements.mobileBackdrop.addEventListener('click', closeMobileSidebars);
219
+
220
+ // Close sidebar on window resize to desktop
221
+ window.addEventListener('resize', () => {
222
+ if (!isMobile()) {
223
+ closeMobileSidebars();
224
+ // Reset any mobile-specific classes
225
+ elements.leftSidebar.classList.remove('mobile-open');
226
+ elements.rightSidebar.classList.remove('mobile-open');
227
+ }
228
+ });
229
+
230
+ // Close sidebar when starting a new chat or after uploading (for better UX)
231
+ const originalStartNewChat = window.startNewChat;
232
+ if (typeof originalStartNewChat === 'function') {
233
+ window.startNewChat = function () {
234
+ if (isMobile()) closeMobileSidebars();
235
+ return originalStartNewChat.apply(this, arguments);
236
+ };
237
+ }
238
+
239
+ // Handle swipe gestures (optional enhancement)
240
+ let touchStartX = 0;
241
+ let touchEndX = 0;
242
+
243
+ document.addEventListener('touchstart', (e) => {
244
+ touchStartX = e.changedTouches[0].screenX;
245
+ }, { passive: true });
246
+
247
+ document.addEventListener('touchend', (e) => {
248
+ if (!isMobile()) return;
249
+
250
+ touchEndX = e.changedTouches[0].screenX;
251
+ const swipeDistance = touchEndX - touchStartX;
252
+ const minSwipeDistance = 80;
253
+
254
+ // Swipe right from left edge - open left sidebar
255
+ if (touchStartX < 30 && swipeDistance > minSwipeDistance) {
256
+ openLeftSidebar();
257
+ }
258
+
259
+ // Swipe left from right edge - open right sidebar
260
+ if (touchStartX > window.innerWidth - 30 && swipeDistance < -minSwipeDistance) {
261
+ openRightSidebar();
262
+ }
263
+
264
+ // Swipe to close sidebars
265
+ if (elements.leftSidebar.classList.contains('mobile-open') && swipeDistance < -minSwipeDistance) {
266
+ closeMobileSidebars();
267
+ }
268
+
269
+ if (elements.rightSidebar.classList.contains('mobile-open') && swipeDistance > minSwipeDistance) {
270
+ closeMobileSidebars();
271
+ }
272
+ }, { passive: true });
273
+ }
274
+
275
+ // ==================== Collapsible Sections ====================
276
+ function initCollapsible() {
277
+ document.querySelectorAll('.collapsible .section-header').forEach(header => {
278
+ header.addEventListener('click', (e) => {
279
+ // Don't toggle if clicking on action buttons
280
+ if (e.target.closest('.btn')) return;
281
+
282
+ const section = header.closest('.collapsible');
283
+ section.classList.toggle('collapsed');
284
+ });
285
+ });
286
+ }
287
+
288
+ // ==================== Custom Dropdowns ====================
289
+ function initCustomDropdowns() {
290
+ // Close dropdowns when clicking outside
291
+ document.addEventListener('click', (e) => {
292
+ document.querySelectorAll('.custom-select.open').forEach(select => {
293
+ if (!select.contains(e.target)) {
294
+ select.classList.remove('open');
295
+ }
296
+ });
297
+ });
298
+
299
+ // Upload bucket dropdown
300
+ elements.uploadBucketTrigger.addEventListener('click', (e) => {
301
+ e.stopPropagation();
302
+ elements.uploadBucketWrapper.classList.toggle('open');
303
+ elements.chatBucketWrapper.classList.remove('open');
304
+ });
305
+
306
+ // Chat bucket dropdown
307
+ elements.chatBucketTrigger.addEventListener('click', (e) => {
308
+ e.stopPropagation();
309
+ elements.chatBucketWrapper.classList.toggle('open');
310
+ elements.uploadBucketWrapper.classList.remove('open');
311
+ });
312
+ }
313
+
314
+ function updateDropdownOptions() {
315
+ // Upload dropdown options
316
+ let uploadOptions = `<div class="select-option active" data-value=""><span class="option-icon">📂</span> No Bucket (General)</div>`;
317
+ uploadOptions += state.buckets.map(b =>
318
+ `<div class="select-option" data-value="${b.bucket_id}"><span class="option-icon">📁</span> ${b.name}</div>`
319
+ ).join('');
320
+ elements.uploadBucketOptions.innerHTML = uploadOptions;
321
+
322
+ // Chat dropdown options
323
+ let chatOptions = `<div class="select-option active" data-value=""><span class="option-icon">📂</span> All Documents</div>`;
324
+ chatOptions += state.buckets.map(b =>
325
+ `<div class="select-option" data-value="${b.bucket_id}"><span class="option-icon">📁</span> ${b.name}</div>`
326
+ ).join('');
327
+ elements.chatBucketOptions.innerHTML = chatOptions;
328
+
329
+ // Add click handlers
330
+ elements.uploadBucketOptions.querySelectorAll('.select-option').forEach(opt => {
331
+ opt.addEventListener('click', () => {
332
+ const value = opt.dataset.value;
333
+ elements.uploadBucketSelect.value = value;
334
+ elements.uploadBucketTrigger.querySelector('.select-value').textContent = opt.textContent.trim();
335
+ elements.uploadBucketOptions.querySelectorAll('.select-option').forEach(o => o.classList.remove('active'));
336
+ opt.classList.add('active');
337
+ elements.uploadBucketWrapper.classList.remove('open');
338
+ });
339
+ });
340
+
341
+ elements.chatBucketOptions.querySelectorAll('.select-option').forEach(opt => {
342
+ opt.addEventListener('click', () => {
343
+ const value = opt.dataset.value;
344
+ elements.chatBucketSelect.value = value;
345
+ state.chatBucket = value;
346
+ elements.chatBucketTrigger.querySelector('.select-value').textContent = opt.textContent.trim();
347
+ elements.chatBucketOptions.querySelectorAll('.select-option').forEach(o => o.classList.remove('active'));
348
+ opt.classList.add('active');
349
+ elements.chatBucketWrapper.classList.remove('open');
350
+ });
351
+ });
352
+ }
353
+
354
+ // ==================== Auth ====================
355
+ function showAuthModal() {
356
+ elements.authModal.classList.add('active');
357
+ elements.appContainer.style.filter = 'blur(5px)';
358
+ }
359
+
360
+ function hideAuthModal() {
361
+ elements.authModal.classList.remove('active');
362
+ elements.appContainer.style.filter = '';
363
+ }
364
+
365
+ function updateAuthUI() {
366
+ if (state.user) {
367
+ elements.userName.textContent = state.user.username;
368
+ elements.userAvatar.textContent = state.user.username.charAt(0).toUpperCase();
369
+ elements.userRole.textContent = state.user.role === 'admin' ? 'Admin' : 'Employee';
370
+
371
+ if (state.user.role === 'admin') {
372
+ elements.adminSection.classList.remove('hidden');
373
+ loadEmployees();
374
+ } else {
375
+ elements.adminSection.classList.add('hidden');
376
+ }
377
+ hideAuthModal();
378
+ } else {
379
+ showAuthModal();
380
+ }
381
+ }
382
+
383
+ // Role tabs
384
+ document.querySelectorAll('.role-tab').forEach(tab => {
385
+ tab.addEventListener('click', () => {
386
+ document.querySelectorAll('.role-tab').forEach(t => t.classList.remove('active'));
387
+ tab.classList.add('active');
388
+ state.currentRole = tab.dataset.role;
389
+
390
+ if (state.currentRole === 'admin') {
391
+ elements.authTabs.classList.remove('hidden');
392
+ elements.loginForm.classList.remove('hidden');
393
+ elements.registerForm.classList.add('hidden');
394
+ elements.employeeLoginForm.classList.add('hidden');
395
+ } else {
396
+ elements.authTabs.classList.add('hidden');
397
+ elements.loginForm.classList.add('hidden');
398
+ elements.registerForm.classList.add('hidden');
399
+ elements.employeeLoginForm.classList.remove('hidden');
400
+ }
401
+ });
402
+ });
403
+
404
+ // Auth tabs
405
+ document.querySelectorAll('.auth-tab').forEach(tab => {
406
+ tab.addEventListener('click', () => {
407
+ document.querySelectorAll('.auth-tab').forEach(t => t.classList.remove('active'));
408
+ tab.classList.add('active');
409
+ const tabName = tab.dataset.tab;
410
+ elements.loginForm.classList.toggle('hidden', tabName !== 'login');
411
+ elements.registerForm.classList.toggle('hidden', tabName !== 'register');
412
+ });
413
+ });
414
+
415
+ // Admin Login
416
+ elements.loginForm.addEventListener('submit', async (e) => {
417
+ e.preventDefault();
418
+ const formData = new FormData(e.target);
419
+ const btn = e.target.querySelector('.auth-btn');
420
+ btn.querySelector('.btn-text').classList.add('hidden');
421
+ btn.querySelector('.btn-loader').classList.remove('hidden');
422
+ elements.loginError.classList.add('hidden');
423
+
424
+ try {
425
+ const response = await fetch('/api/auth/login', {
426
+ method: 'POST',
427
+ headers: { 'Content-Type': 'application/json' },
428
+ body: JSON.stringify({ username: formData.get('username'), password: formData.get('password'), role: 'admin' })
429
+ });
430
+ const data = await response.json();
431
+ if (response.ok) {
432
+ state.token = data.token;
433
+ state.user = { user_id: data.user_id, username: data.username, role: data.role };
434
+ localStorage.setItem('Iribl AI_token', state.token);
435
+ localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
436
+ updateAuthUI();
437
+ loadBuckets();
438
+ loadDocuments();
439
+ loadChatHistoryFromServer();
440
+ showToast('Welcome back!', 'success');
441
+ } else {
442
+ elements.loginError.textContent = data.error;
443
+ elements.loginError.classList.remove('hidden');
444
+ }
445
+ } catch (error) {
446
+ elements.loginError.textContent = 'Connection error';
447
+ elements.loginError.classList.remove('hidden');
448
+ }
449
+ btn.querySelector('.btn-text').classList.remove('hidden');
450
+ btn.querySelector('.btn-loader').classList.add('hidden');
451
+ });
452
+
453
+ // Admin Register
454
+ elements.registerForm.addEventListener('submit', async (e) => {
455
+ e.preventDefault();
456
+ const formData = new FormData(e.target);
457
+ const btn = e.target.querySelector('.auth-btn');
458
+ btn.querySelector('.btn-text').classList.add('hidden');
459
+ btn.querySelector('.btn-loader').classList.remove('hidden');
460
+ elements.registerError.classList.add('hidden');
461
+
462
+ try {
463
+ const response = await fetch('/api/auth/register/admin', {
464
+ method: 'POST',
465
+ headers: { 'Content-Type': 'application/json' },
466
+ body: JSON.stringify({ username: formData.get('username'), email: formData.get('email'), password: formData.get('password') })
467
+ });
468
+ const data = await response.json();
469
+ if (response.ok) {
470
+ state.token = data.token;
471
+ state.user = { user_id: data.user_id, username: data.username, role: data.role };
472
+ localStorage.setItem('Iribl AI_token', state.token);
473
+ localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
474
+ updateAuthUI();
475
+ loadBuckets();
476
+ loadDocuments();
477
+ loadChatHistoryFromServer();
478
+ showToast('Account created!', 'success');
479
+ } else {
480
+ elements.registerError.textContent = data.error;
481
+ elements.registerError.classList.remove('hidden');
482
+ }
483
+ } catch (error) {
484
+ elements.registerError.textContent = 'Connection error';
485
+ elements.registerError.classList.remove('hidden');
486
+ }
487
+ btn.querySelector('.btn-text').classList.remove('hidden');
488
+ btn.querySelector('.btn-loader').classList.add('hidden');
489
+ });
490
+
491
+ // Employee Login
492
+ elements.employeeLoginForm.addEventListener('submit', async (e) => {
493
+ e.preventDefault();
494
+ const formData = new FormData(e.target);
495
+ const btn = e.target.querySelector('.auth-btn');
496
+ btn.querySelector('.btn-text').classList.add('hidden');
497
+ btn.querySelector('.btn-loader').classList.remove('hidden');
498
+ elements.employeeLoginError.classList.add('hidden');
499
+
500
+ try {
501
+ const response = await fetch('/api/auth/login', {
502
+ method: 'POST',
503
+ headers: { 'Content-Type': 'application/json' },
504
+ body: JSON.stringify({ username: formData.get('email'), password: formData.get('password'), role: 'employee' })
505
+ });
506
+ const data = await response.json();
507
+ if (response.ok) {
508
+ state.token = data.token;
509
+ state.user = { user_id: data.user_id, username: data.username, role: data.role };
510
+ localStorage.setItem('Iribl AI_token', state.token);
511
+ localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
512
+ updateAuthUI();
513
+ loadBuckets();
514
+ loadDocuments();
515
+ loadChatHistoryFromServer();
516
+ showToast('Welcome!', 'success');
517
+ } else {
518
+ elements.employeeLoginError.textContent = data.error;
519
+ elements.employeeLoginError.classList.remove('hidden');
520
+ }
521
+ } catch (error) {
522
+ elements.employeeLoginError.textContent = 'Connection error';
523
+ elements.employeeLoginError.classList.remove('hidden');
524
+ }
525
+ btn.querySelector('.btn-text').classList.remove('hidden');
526
+ btn.querySelector('.btn-loader').classList.add('hidden');
527
+ });
528
+
529
+ // Logout
530
+ elements.logoutBtn.addEventListener('click', () => {
531
+ state.token = null;
532
+ state.user = null;
533
+ state.documents = [];
534
+ state.buckets = [];
535
+ state.messages = [];
536
+ localStorage.removeItem('Iribl AI_token');
537
+ localStorage.removeItem('Iribl AI_user');
538
+ updateAuthUI();
539
+ renderDocuments();
540
+ renderMessages();
541
+ showToast('Logged out', 'info');
542
+ });
543
+
544
+ // ==================== Employees ====================
545
+ async function loadEmployees() {
546
+ if (!state.token || state.user?.role !== 'admin') return;
547
+ try {
548
+ const response = await fetch('/api/admin/employees', { headers: { 'Authorization': `Bearer ${state.token}` } });
549
+ if (response.ok) {
550
+ const data = await response.json();
551
+ state.employees = data.employees;
552
+ renderEmployees();
553
+ }
554
+ } catch (error) { console.error('Failed to load employees:', error); }
555
+ }
556
+
557
+ function renderEmployees() {
558
+ if (state.employees.length === 0) {
559
+ elements.employeesList.innerHTML = `<div class="empty-state small"><div class="empty-text">No employees</div></div>`;
560
+ return;
561
+ }
562
+ elements.employeesList.innerHTML = state.employees.map(emp => `
563
+ <div class="employee-item">
564
+ <span class="employee-email">${emp.email || emp.username}</span>
565
+ <button class="btn btn-ghost" onclick="deleteEmployee('${emp.user_id}')" title="Remove">🗑️</button>
566
+ </div>
567
+ `).join('');
568
+ }
569
+
570
+ elements.addEmployeeBtn.addEventListener('click', (e) => {
571
+ e.stopPropagation();
572
+ elements.addEmployeeModal.classList.add('active');
573
+ elements.addEmployeeError.classList.add('hidden');
574
+ elements.addEmployeeForm.reset();
575
+ });
576
+
577
+ elements.cancelAddEmployee.addEventListener('click', () => elements.addEmployeeModal.classList.remove('active'));
578
+
579
+ elements.addEmployeeForm.addEventListener('submit', async (e) => {
580
+ e.preventDefault();
581
+ const formData = new FormData(e.target);
582
+ const btn = e.target.querySelector('.btn-primary');
583
+ btn.querySelector('.btn-text').classList.add('hidden');
584
+ btn.querySelector('.btn-loader').classList.remove('hidden');
585
+
586
+ try {
587
+ const response = await fetch('/api/admin/employees', {
588
+ method: 'POST',
589
+ headers: { 'Authorization': `Bearer ${state.token}`, 'Content-Type': 'application/json' },
590
+ body: JSON.stringify({ email: formData.get('email'), password: formData.get('password') })
591
+ });
592
+ const data = await response.json();
593
+ if (response.ok) {
594
+ elements.addEmployeeModal.classList.remove('active');
595
+ loadEmployees();
596
+ showToast('Employee added!', 'success');
597
+ } else {
598
+ elements.addEmployeeError.textContent = data.error;
599
+ elements.addEmployeeError.classList.remove('hidden');
600
+ }
601
+ } catch (error) {
602
+ elements.addEmployeeError.textContent = 'Connection error';
603
+ elements.addEmployeeError.classList.remove('hidden');
604
+ }
605
+ btn.querySelector('.btn-text').classList.remove('hidden');
606
+ btn.querySelector('.btn-loader').classList.add('hidden');
607
+ });
608
+
609
+ async function deleteEmployee(employeeId) {
610
+ try {
611
+ const response = await fetch(`/api/admin/employees/${employeeId}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${state.token}` } });
612
+ if (response.ok) {
613
+ state.employees = state.employees.filter(e => e.user_id !== employeeId);
614
+ renderEmployees();
615
+ showToast('Employee removed', 'success');
616
+ }
617
+ } catch (error) { showToast('Failed to remove employee', 'error'); }
618
+ }
619
+
620
+ // ==================== Buckets ====================
621
+ async function loadBuckets() {
622
+ if (!state.token) return;
623
+ try {
624
+ const response = await fetch('/api/buckets', { headers: { 'Authorization': `Bearer ${state.token}` } });
625
+ if (response.ok) {
626
+ const data = await response.json();
627
+ state.buckets = data.buckets;
628
+ renderBuckets();
629
+ updateDropdownOptions();
630
+ }
631
+ } catch (error) { console.error('Failed to load buckets:', error); }
632
+ }
633
+
634
+ function renderBuckets() {
635
+ let html = `<div class="bucket-item ${state.selectedBucket === '' ? 'active' : ''}" onclick="selectBucket('')">
636
+ <span class="bucket-name">📂 All Documents</span>
637
+ </div>`;
638
+
639
+ html += state.buckets.map(b => `
640
+ <div class="bucket-item ${state.selectedBucket === b.bucket_id ? 'active' : ''}" data-id="${b.bucket_id}">
641
+ <span class="bucket-name" onclick="selectBucket('${b.bucket_id}')">📁 ${b.name}</span>
642
+ <span class="bucket-count">${b.doc_count}</span>
643
+ <button class="btn btn-ghost bucket-delete" onclick="event.stopPropagation(); deleteBucket('${b.bucket_id}')">🗑️</button>
644
+ </div>
645
+ `).join('');
646
+
647
+ elements.bucketsList.innerHTML = html;
648
+ }
649
+
650
+ function selectBucket(bucketId) {
651
+ state.selectedBucket = bucketId;
652
+ state.chatBucket = bucketId; // Sync chat bucket filter
653
+
654
+ // Get bucket name for display
655
+ const bucketName = bucketId ?
656
+ (state.buckets.find(b => b.bucket_id === bucketId)?.name || 'Selected Bucket') :
657
+ '';
658
+ const displayName = bucketId ? bucketName : 'All Documents';
659
+ const uploadDisplayName = bucketId ? bucketName : 'No Bucket (General)';
660
+
661
+ // Sync upload bucket dropdown
662
+ elements.uploadBucketSelect.value = bucketId;
663
+ elements.uploadBucketTrigger.querySelector('.select-value').textContent = uploadDisplayName;
664
+ elements.uploadBucketOptions.querySelectorAll('.select-option').forEach(opt => {
665
+ opt.classList.toggle('active', opt.dataset.value === bucketId);
666
+ });
667
+
668
+ // Sync chat bucket dropdown
669
+ elements.chatBucketSelect.value = bucketId;
670
+ elements.chatBucketTrigger.querySelector('.select-value').textContent = displayName;
671
+ elements.chatBucketOptions.querySelectorAll('.select-option').forEach(opt => {
672
+ opt.classList.toggle('active', opt.dataset.value === bucketId);
673
+ });
674
+
675
+ // Render all filtered components
676
+ renderBuckets();
677
+ loadDocuments();
678
+ renderChatHistory(); // Re-render to filter by bucket
679
+ }
680
+
681
+ elements.createBucketBtn.addEventListener('click', (e) => {
682
+ e.stopPropagation();
683
+ elements.createBucketModal.classList.add('active');
684
+ elements.createBucketError.classList.add('hidden');
685
+ elements.createBucketForm.reset();
686
+ });
687
+
688
+ elements.cancelCreateBucket.addEventListener('click', () => elements.createBucketModal.classList.remove('active'));
689
+
690
+ elements.createBucketForm.addEventListener('submit', async (e) => {
691
+ e.preventDefault();
692
+ const formData = new FormData(e.target);
693
+ const btn = e.target.querySelector('.btn-primary');
694
+ btn.querySelector('.btn-text').classList.add('hidden');
695
+ btn.querySelector('.btn-loader').classList.remove('hidden');
696
+
697
+ try {
698
+ const response = await fetch('/api/buckets', {
699
+ method: 'POST',
700
+ headers: { 'Authorization': `Bearer ${state.token}`, 'Content-Type': 'application/json' },
701
+ body: JSON.stringify({ name: formData.get('name'), description: formData.get('description') })
702
+ });
703
+ const data = await response.json();
704
+ if (response.ok) {
705
+ elements.createBucketModal.classList.remove('active');
706
+ loadBuckets();
707
+ showToast('Bucket created!', 'success');
708
+ } else {
709
+ elements.createBucketError.textContent = data.error;
710
+ elements.createBucketError.classList.remove('hidden');
711
+ }
712
+ } catch (error) {
713
+ elements.createBucketError.textContent = 'Connection error';
714
+ elements.createBucketError.classList.remove('hidden');
715
+ }
716
+ btn.querySelector('.btn-text').classList.remove('hidden');
717
+ btn.querySelector('.btn-loader').classList.add('hidden');
718
+ });
719
+
720
+ async function deleteBucket(bucketId) {
721
+ try {
722
+ const response = await fetch(`/api/buckets/${bucketId}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${state.token}` } });
723
+ if (response.ok) {
724
+ if (state.selectedBucket === bucketId) state.selectedBucket = '';
725
+ loadBuckets();
726
+ loadDocuments();
727
+ showToast('Bucket deleted', 'success');
728
+ }
729
+ } catch (error) { showToast('Failed to delete bucket', 'error'); }
730
+ }
731
+
732
+ // ==================== Documents ====================
733
+ async function loadDocuments() {
734
+ if (!state.token) return;
735
+ try {
736
+ let url = '/api/documents';
737
+ if (state.selectedBucket) url += `?bucket_id=${state.selectedBucket}`;
738
+
739
+ const response = await fetch(url, { headers: { 'Authorization': `Bearer ${state.token}` } });
740
+ if (response.ok) {
741
+ const data = await response.json();
742
+ state.documents = data.documents;
743
+ renderDocuments();
744
+ }
745
+ } catch (error) { console.error('Failed to load documents:', error); }
746
+ }
747
+
748
+ function renderDocuments() {
749
+ elements.docCount.textContent = `(${state.documents.length})`;
750
+
751
+ if (state.documents.length === 0) {
752
+ elements.documentsList.innerHTML = `<div class="empty-state"><div class="empty-icon">📭</div><div class="empty-text">No documents yet</div></div>`;
753
+ return;
754
+ }
755
+
756
+ const icons = { pdf: '📕', word: '📘', powerpoint: '📙', excel: '📗', image: '🖼️', text: '📄' };
757
+
758
+ elements.documentsList.innerHTML = state.documents.map(doc => `
759
+ <div class="document-item ${state.selectedDocument === doc.doc_id ? 'selected' : ''}" data-id="${doc.doc_id}" onclick="selectDocument('${doc.doc_id}')">
760
+ <div class="doc-icon">${icons[doc.doc_type] || '📄'}</div>
761
+ <div class="doc-info">
762
+ <div class="doc-name">${doc.filename}</div>
763
+ <div class="doc-meta">${formatDate(doc.created_at)}</div>
764
+ </div>
765
+ <button class="btn btn-ghost doc-view" onclick="event.stopPropagation(); viewDocument('${doc.doc_id}', '${doc.filename}')" title="View">👁️</button>
766
+ <button class="btn btn-ghost doc-delete" onclick="event.stopPropagation(); deleteDocument('${doc.doc_id}')" title="Delete">🗑️</button>
767
+ </div>
768
+ `).join('');
769
+ }
770
+
771
+ function formatDate(timestamp) {
772
+ const date = new Date(timestamp * 1000);
773
+ const now = new Date();
774
+ const diff = now - date;
775
+ if (diff < 60000) return 'Just now';
776
+ if (diff < 3600000) return `${Math.floor(diff / 60000)}m ago`;
777
+ if (diff < 86400000) return `${Math.floor(diff / 3600000)}h ago`;
778
+ return date.toLocaleDateString();
779
+ }
780
+
781
+ async function deleteDocument(docId) {
782
+ try {
783
+ const response = await fetch(`/api/documents/${docId}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${state.token}` } });
784
+ if (response.ok) {
785
+ state.documents = state.documents.filter(d => d.doc_id !== docId);
786
+ // Clear selection if deleted doc was selected
787
+ if (state.selectedDocument === docId) {
788
+ state.selectedDocument = null;
789
+ hideSummary();
790
+ }
791
+ // Remove from summaries cache
792
+ delete state.summaries[docId];
793
+ renderDocuments();
794
+ loadBuckets();
795
+ showToast('Document deleted', 'success');
796
+ }
797
+ } catch (error) { showToast('Failed to delete', 'error'); }
798
+ }
799
+
800
+ // ==================== Document Summary ====================
801
+ function selectDocument(docId) {
802
+ state.selectedDocument = docId;
803
+ renderDocuments();
804
+ displaySummary(docId);
805
+ }
806
+
807
+ async function displaySummary(docId) {
808
+ const doc = state.documents.find(d => d.doc_id === docId);
809
+ if (!doc) return;
810
+
811
+ // Check if summary is cached
812
+ if (state.summaries[docId]) {
813
+ showSummaryPanel(doc.filename, state.summaries[docId].summary);
814
+ } else {
815
+ // Show loading state
816
+ showSummaryPanel(doc.filename, 'Generating summary...');
817
+ // Fetch summary from server
818
+ await fetchSummary(docId);
819
+ }
820
+ }
821
+
822
+ async function fetchSummary(docId) {
823
+ try {
824
+ const response = await fetch(`/api/documents/${docId}/summary`, {
825
+ headers: { 'Authorization': `Bearer ${state.token}` }
826
+ });
827
+ const data = await response.json();
828
+
829
+ if (response.ok && data.summary) {
830
+ // Cache the summary
831
+ state.summaries[docId] = {
832
+ summary: data.summary,
833
+ filename: data.filename
834
+ };
835
+ // Update display if still selected
836
+ if (state.selectedDocument === docId) {
837
+ showSummaryPanel(data.filename, data.summary);
838
+ }
839
+ } else {
840
+ // Show error state
841
+ if (state.selectedDocument === docId) {
842
+ showSummaryPanel(data.filename || 'Document', 'Unable to generate summary.');
843
+ }
844
+ }
845
+ } catch (error) {
846
+ console.error('Failed to fetch summary:', error);
847
+ if (state.selectedDocument === docId) {
848
+ const doc = state.documents.find(d => d.doc_id === docId);
849
+ showSummaryPanel(doc?.filename || 'Document', 'Failed to load summary.');
850
+ }
851
+ }
852
+ }
853
+
854
+ function showSummaryPanel(filename, summaryText) {
855
+ elements.summaryPanel.classList.remove('hidden');
856
+ elements.summaryTitle.textContent = filename;
857
+ elements.summaryText.textContent = summaryText;
858
+ }
859
+
860
+ function hideSummary() {
861
+ elements.summaryPanel.classList.add('hidden');
862
+ state.selectedDocument = null;
863
+ renderDocuments();
864
+ }
865
+
866
+ function initSummaryPanel() {
867
+ elements.summaryClose.addEventListener('click', hideSummary);
868
+ }
869
+
870
+ // ==================== Document Viewer ====================
871
+ async function viewDocument(docId, filename) {
872
+ try {
873
+ // Fetch the document with proper authorization
874
+ const response = await fetch(`/api/documents/${docId}/view`, {
875
+ headers: { 'Authorization': `Bearer ${state.token}` }
876
+ });
877
+
878
+ if (!response.ok) {
879
+ showToast('Failed to load document', 'error');
880
+ return;
881
+ }
882
+
883
+ // Get the blob and create a URL
884
+ const blob = await response.blob();
885
+ const blobUrl = URL.createObjectURL(blob);
886
+
887
+ // Open in a new tab
888
+ window.open(blobUrl, '_blank');
889
+ } catch (error) {
890
+ console.error('Failed to view document:', error);
891
+ showToast('Failed to open document', 'error');
892
+ }
893
+ }
894
+
895
+
896
+ elements.closeDocViewer.addEventListener('click', () => elements.docViewerModal.classList.remove('active'));
897
+
898
+ // ==================== Upload ====================
899
+ let currentPollInterval = null; // Track the current polling interval for cancellation
900
+
901
+ function initUpload() {
902
+ elements.uploadZone.addEventListener('click', () => elements.fileInput.click());
903
+ elements.fileInput.addEventListener('change', (e) => {
904
+ if (e.target.files.length > 0) uploadFiles(Array.from(e.target.files));
905
+ });
906
+ elements.uploadZone.addEventListener('dragover', (e) => { e.preventDefault(); elements.uploadZone.classList.add('dragover'); });
907
+ elements.uploadZone.addEventListener('dragleave', () => elements.uploadZone.classList.remove('dragover'));
908
+ elements.uploadZone.addEventListener('drop', (e) => {
909
+ e.preventDefault();
910
+ elements.uploadZone.classList.remove('dragover');
911
+ if (e.dataTransfer.files.length > 0) uploadFiles(Array.from(e.dataTransfer.files));
912
+ });
913
+
914
+ // Cancel upload button
915
+ elements.cancelUploadBtn.addEventListener('click', cancelUpload);
916
+ }
917
+
918
+ function cancelUpload() {
919
+ state.uploadCancelled = true;
920
+
921
+ // Abort any ongoing fetch request
922
+ if (state.currentUploadAbortController) {
923
+ state.currentUploadAbortController.abort();
924
+ state.currentUploadAbortController = null;
925
+ }
926
+
927
+ // Clear any polling interval
928
+ if (currentPollInterval) {
929
+ clearInterval(currentPollInterval);
930
+ currentPollInterval = null;
931
+ }
932
+
933
+ // Reset UI
934
+ elements.uploadProgress.classList.add('hidden');
935
+ elements.uploadZone.style.pointerEvents = '';
936
+ elements.fileInput.value = '';
937
+ elements.progressFill.style.width = '0%';
938
+
939
+ showToast('Upload cancelled', 'info');
940
+ }
941
+
942
+ async function uploadFiles(files) {
943
+ // Reset cancellation state
944
+ state.uploadCancelled = false;
945
+
946
+ elements.uploadProgress.classList.remove('hidden');
947
+ elements.uploadZone.style.pointerEvents = 'none';
948
+
949
+ const bucketId = elements.uploadBucketSelect.value;
950
+ let completed = 0;
951
+
952
+ // Process files sequentially to avoid overwhelming the client,
953
+ // but the server handles them in background.
954
+ for (const file of files) {
955
+ // Check if cancelled before processing each file
956
+ if (state.uploadCancelled) {
957
+ break;
958
+ }
959
+
960
+ elements.uploadStatus.textContent = `Uploading ${file.name}...`;
961
+ elements.progressFill.style.width = '10%'; // Initial progress
962
+
963
+ const formData = new FormData();
964
+ formData.append('file', file);
965
+ formData.append('bucket_id', bucketId);
966
+
967
+ // Create abort controller for this request
968
+ state.currentUploadAbortController = new AbortController();
969
+
970
+ try {
971
+ // Initial upload request
972
+ const response = await fetch('/api/documents/upload', {
973
+ method: 'POST',
974
+ headers: { 'Authorization': `Bearer ${state.token}` },
975
+ body: formData,
976
+ signal: state.currentUploadAbortController.signal
977
+ });
978
+
979
+ if (response.status === 202) {
980
+ // Async processing started
981
+ const data = await response.json();
982
+ await pollUploadStatus(data.doc_id, file.name);
983
+ if (!state.uploadCancelled) {
984
+ completed++;
985
+ }
986
+ } else if (response.ok) {
987
+ // Instant completion (legacy or small file)
988
+ const data = await response.json();
989
+ handleUploadSuccess(data);
990
+ completed++;
991
+ } else {
992
+ const data = await response.json();
993
+ showToast(`Failed: ${file.name} - ${data.error}`, 'error');
994
+ }
995
+ } catch (e) {
996
+ if (e.name === 'AbortError') {
997
+ // Upload was cancelled by user
998
+ break;
999
+ }
1000
+ console.error(e);
1001
+ showToast(`Failed to upload ${file.name}`, 'error');
1002
+ }
1003
+ }
1004
+
1005
+ // Clean up abort controller
1006
+ state.currentUploadAbortController = null;
1007
+
1008
+ // Only update UI if not cancelled (cancelUpload already handles UI reset)
1009
+ if (!state.uploadCancelled) {
1010
+ elements.uploadProgress.classList.add('hidden');
1011
+ elements.uploadZone.style.pointerEvents = '';
1012
+ elements.fileInput.value = '';
1013
+ elements.progressFill.style.width = '0%';
1014
+
1015
+ // Load documents first, then show summary
1016
+ await loadDocuments();
1017
+ loadBuckets();
1018
+ }
1019
+ }
1020
+
1021
+ async function pollUploadStatus(docId, filename) {
1022
+ return new Promise((resolve, reject) => {
1023
+ currentPollInterval = setInterval(async () => {
1024
+ // Check if cancelled
1025
+ if (state.uploadCancelled) {
1026
+ clearInterval(currentPollInterval);
1027
+ currentPollInterval = null;
1028
+ resolve();
1029
+ return;
1030
+ }
1031
+
1032
+ try {
1033
+ const response = await fetch(`/api/documents/${docId}/status`, {
1034
+ headers: { 'Authorization': `Bearer ${state.token}` }
1035
+ });
1036
+
1037
+ if (response.ok) {
1038
+ const statusData = await response.json();
1039
+
1040
+ // Update UI
1041
+ elements.uploadStatus.textContent = `Processing ${filename}: ${statusData.message || '...'}`;
1042
+ // Map 0-100 progress to UI width (keeping 10% buffer)
1043
+ if (statusData.progress) {
1044
+ elements.progressFill.style.width = `${Math.max(10, statusData.progress)}%`;
1045
+ }
1046
+
1047
+ if (statusData.status === 'completed') {
1048
+ clearInterval(currentPollInterval);
1049
+ currentPollInterval = null;
1050
+ if (statusData.result) {
1051
+ handleUploadSuccess(statusData.result);
1052
+ }
1053
+ resolve();
1054
+ } else if (statusData.status === 'failed') {
1055
+ clearInterval(currentPollInterval);
1056
+ currentPollInterval = null;
1057
+ showToast(`Processing failed: ${filename} - ${statusData.error}`, 'error');
1058
+ resolve(); // Resolve anyway to continue with next file
1059
+ }
1060
+ } else {
1061
+ // Status check failed - might be network glitch, ignore once
1062
+ }
1063
+ } catch (e) {
1064
+ console.error("Polling error", e);
1065
+ // Continue polling despite error
1066
+ }
1067
+ }, 2000); // Check every 2 seconds
1068
+ });
1069
+ }
1070
+
1071
+ function handleUploadSuccess(data) {
1072
+ showToast(`Ready: ${data.filename}`, 'success');
1073
+
1074
+ // Cache the summary
1075
+ if (data.summary) {
1076
+ state.summaries[data.doc_id] = {
1077
+ summary: data.summary,
1078
+ filename: data.filename
1079
+ };
1080
+ }
1081
+
1082
+ // Auto-display this document
1083
+ state.selectedDocument = data.doc_id;
1084
+ // We will re-render documents shortly after this returns
1085
+
1086
+ if (data.summary) {
1087
+ // Defer slightly to ensure DOM is ready if needed
1088
+ setTimeout(() => {
1089
+ showSummaryPanel(data.filename, data.summary);
1090
+ }, 500);
1091
+ }
1092
+ }
1093
+
1094
+ // ==================== Chat ====================
1095
+ function initChat() {
1096
+ elements.chatInput.addEventListener('input', () => {
1097
+ elements.chatInput.style.height = 'auto';
1098
+ elements.chatInput.style.height = Math.min(elements.chatInput.scrollHeight, 150) + 'px';
1099
+ elements.sendBtn.disabled = !elements.chatInput.value.trim();
1100
+ });
1101
+ elements.chatInput.addEventListener('keydown', (e) => {
1102
+ if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMessage(); }
1103
+ });
1104
+ elements.sendBtn.addEventListener('click', sendMessage);
1105
+
1106
+ // Stop generation button
1107
+ elements.stopBtn.addEventListener('click', stopGeneration);
1108
+ }
1109
+
1110
+ function stopGeneration() {
1111
+ if (state.streamAbortController) {
1112
+ state.streamAbortController.abort();
1113
+ state.streamAbortController = null;
1114
+ }
1115
+
1116
+ // Hide stop button, show send button
1117
+ elements.stopBtn.classList.add('hidden');
1118
+ elements.sendBtn.classList.remove('hidden');
1119
+ elements.typingIndicator.classList.add('hidden');
1120
+ state.isLoading = false;
1121
+
1122
+ // Add a note that generation was stopped
1123
+ if (state.messages.length > 0) {
1124
+ const lastMsg = state.messages[state.messages.length - 1];
1125
+ if (lastMsg.role === 'assistant' && lastMsg.content) {
1126
+ lastMsg.content += '\n\n*[Generation stopped]*';
1127
+ renderMessages();
1128
+ saveCurrentChat();
1129
+ }
1130
+ }
1131
+
1132
+ showToast('Generation stopped', 'info');
1133
+ }
1134
+
1135
+ async function sendMessage() {
1136
+ const message = elements.chatInput.value.trim();
1137
+ if (!message || state.isLoading) return;
1138
+
1139
+ elements.chatInput.value = '';
1140
+ elements.chatInput.style.height = 'auto';
1141
+ elements.sendBtn.disabled = true;
1142
+ elements.welcomeScreen.classList.add('hidden');
1143
+
1144
+ // Create a chat ID if this is the first message
1145
+ if (state.messages.length === 0 && !state.currentChatId) {
1146
+ state.currentChatId = Date.now().toString();
1147
+ }
1148
+
1149
+ const targetChatId = state.currentChatId;
1150
+ addMessage('user', message);
1151
+ elements.typingIndicator.classList.remove('hidden');
1152
+ state.isLoading = true;
1153
+ scrollToBottom();
1154
+
1155
+ // Show stop button, hide send button
1156
+ elements.sendBtn.classList.add('hidden');
1157
+ elements.stopBtn.classList.remove('hidden');
1158
+
1159
+ // Create abort controller for this request
1160
+ state.streamAbortController = new AbortController();
1161
+
1162
+ try {
1163
+ // Use streaming endpoint for instant response
1164
+ const response = await fetch('/api/chat/stream', {
1165
+ method: 'POST',
1166
+ headers: {
1167
+ 'Authorization': `Bearer ${state.token}`,
1168
+ 'Content-Type': 'application/json'
1169
+ },
1170
+ body: JSON.stringify({
1171
+ message: message,
1172
+ bucket_id: state.chatBucket || null,
1173
+ chat_id: state.currentChatId
1174
+ }),
1175
+ signal: state.streamAbortController.signal
1176
+ });
1177
+
1178
+ if (!response.ok) {
1179
+ throw new Error('Stream request failed');
1180
+ }
1181
+
1182
+ elements.typingIndicator.classList.add('hidden');
1183
+
1184
+ // Create a placeholder message for streaming
1185
+ let streamingContent = '';
1186
+ let sources = [];
1187
+
1188
+ // Add empty assistant message and get reference to its content element
1189
+ state.messages.push({ role: 'assistant', content: '', sources: [] });
1190
+ renderMessages();
1191
+ scrollToBottom();
1192
+
1193
+ // Get direct reference to the streaming message element for fast updates
1194
+ const messageElements = elements.chatMessages.querySelectorAll('.message.assistant .message-content');
1195
+ const streamingElement = messageElements[messageElements.length - 1];
1196
+
1197
+ const reader = response.body.getReader();
1198
+ const decoder = new TextDecoder();
1199
+
1200
+ // Throttle DOM updates for smooth rendering (update every 50ms max)
1201
+ let lastUpdateTime = 0;
1202
+ let pendingUpdate = false;
1203
+ const UPDATE_INTERVAL = 50; // ms
1204
+
1205
+ while (true) {
1206
+ const { done, value } = await reader.read();
1207
+ if (done) break;
1208
+
1209
+ const text = decoder.decode(value);
1210
+ const lines = text.split('\n');
1211
+
1212
+ for (const line of lines) {
1213
+ if (line.startsWith('data: ')) {
1214
+ try {
1215
+ const data = JSON.parse(line.slice(6));
1216
+
1217
+ if (data.type === 'sources') {
1218
+ sources = data.sources || [];
1219
+ } else if (data.type === 'chunk' || data.type === 'content') {
1220
+ // Support both 'chunk' (legacy) and 'content' (specialized queries)
1221
+ streamingContent += data.content;
1222
+ // Update state for saving later
1223
+ state.messages[state.messages.length - 1].content = streamingContent;
1224
+ state.messages[state.messages.length - 1].sources = sources;
1225
+
1226
+ // Throttled DOM update for smooth rendering
1227
+ const now = Date.now();
1228
+ if (now - lastUpdateTime >= UPDATE_INTERVAL) {
1229
+ if (streamingElement) {
1230
+ streamingElement.innerHTML = formatContent(streamingContent);
1231
+ }
1232
+ lastUpdateTime = now;
1233
+ pendingUpdate = false;
1234
+ } else {
1235
+ pendingUpdate = true;
1236
+ }
1237
+ // No auto-scroll during streaming - stay at current position
1238
+ } else if (data.type === 'done') {
1239
+ // Final update with any pending content
1240
+ if (pendingUpdate && streamingElement) {
1241
+ streamingElement.innerHTML = formatContent(streamingContent);
1242
+ }
1243
+ // Streaming complete - do final render for proper formatting
1244
+ renderMessages();
1245
+ saveCurrentChat();
1246
+ // No auto-scroll - user stays at current position
1247
+ } else if (data.type === 'error') {
1248
+ state.messages[state.messages.length - 1].content = data.content || 'Error generating response';
1249
+ renderMessages();
1250
+ }
1251
+ } catch (e) {
1252
+ // Skip malformed JSON
1253
+ }
1254
+ }
1255
+ }
1256
+ }
1257
+ } catch (err) {
1258
+ elements.typingIndicator.classList.add('hidden');
1259
+ // Only show error if not aborted by user
1260
+ if (err.name !== 'AbortError') {
1261
+ addMessageToChat(targetChatId, 'assistant', 'Connection error. Please try again.');
1262
+ }
1263
+ }
1264
+
1265
+ // Cleanup: hide stop button, show send button
1266
+ elements.stopBtn.classList.add('hidden');
1267
+ elements.sendBtn.classList.remove('hidden');
1268
+ state.streamAbortController = null;
1269
+ state.isLoading = false;
1270
+ // No auto-scroll - user stays at current position
1271
+ }
1272
+
1273
+ function addMessage(role, content, sources = []) {
1274
+ // Create a new chat ID if this is the first message
1275
+ if (state.messages.length === 0 && !state.currentChatId) {
1276
+ state.currentChatId = Date.now().toString();
1277
+ }
1278
+
1279
+ state.messages.push({ role, content, sources });
1280
+ renderMessages();
1281
+
1282
+ // Auto-save after assistant responds (complete exchange)
1283
+ if (role === 'assistant') {
1284
+ saveCurrentChat();
1285
+ }
1286
+ }
1287
+
1288
+ // Add message to a specific chat (handles case where user switched chats during loading)
1289
+ function addMessageToChat(chatId, role, content, sources = []) {
1290
+ // If this is the current chat, add directly
1291
+ if (chatId === state.currentChatId) {
1292
+ state.messages.push({ role, content, sources });
1293
+ renderMessages();
1294
+ saveCurrentChat();
1295
+ } else {
1296
+ // Add to the chat in history
1297
+ const chatIndex = state.chatHistory.findIndex(c => c.id === chatId);
1298
+ if (chatIndex >= 0) {
1299
+ state.chatHistory[chatIndex].messages.push({ role, content, sources });
1300
+ saveChatHistory();
1301
+ syncChatToServer(state.chatHistory[chatIndex]);
1302
+ renderChatHistory();
1303
+ showToast('Response added to previous chat', 'info');
1304
+ }
1305
+ }
1306
+ }
1307
+
1308
+ function renderMessages() {
1309
+ // Preserve summary panel state before re-rendering
1310
+ const summaryVisible = !elements.summaryPanel.classList.contains('hidden');
1311
+ const summaryTitle = elements.summaryTitle.textContent;
1312
+ const summaryText = elements.summaryText.textContent;
1313
+
1314
+ if (state.messages.length === 0) {
1315
+ // Clear chat messages and show welcome screen
1316
+ elements.chatMessages.innerHTML = '';
1317
+ elements.welcomeScreen.classList.remove('hidden');
1318
+ elements.chatMessages.appendChild(elements.welcomeScreen);
1319
+ // Re-show summary if it was visible
1320
+ if (summaryVisible) {
1321
+ elements.summaryPanel.classList.remove('hidden');
1322
+ }
1323
+ return;
1324
+ }
1325
+ elements.welcomeScreen.classList.add('hidden');
1326
+
1327
+ const html = state.messages.map((msg, i) => {
1328
+ const avatar = msg.role === 'user' ? (state.user?.username?.charAt(0).toUpperCase() || 'U') : '🧠';
1329
+ return `<div class="message ${msg.role}"><div class="message-avatar">${avatar}</div><div class="message-content">${formatContent(msg.content)}</div></div>`;
1330
+ }).join('');
1331
+
1332
+ // Build full content with summary panel and welcome screen
1333
+ const summaryPanelHTML = `
1334
+ <div class="summary-panel ${summaryVisible ? '' : 'hidden'}" id="summaryPanel">
1335
+ <div class="summary-header">
1336
+ <span class="summary-icon">📄</span>
1337
+ <span class="summary-title" id="summaryTitle">${summaryTitle}</span>
1338
+ </div>
1339
+ <div class="summary-content" id="summaryContent">
1340
+ <div class="summary-text" id="summaryText">${summaryText}</div>
1341
+ </div>
1342
+ <button class="summary-close" id="summaryClose" title="Close summary">✕</button>
1343
+ </div>
1344
+ `;
1345
+
1346
+ elements.chatMessages.innerHTML = summaryPanelHTML + html + elements.welcomeScreen.outerHTML;
1347
+ document.getElementById('welcomeScreen')?.classList.add('hidden');
1348
+
1349
+ // Re-bind summary panel elements and event listener
1350
+ elements.summaryPanel = document.getElementById('summaryPanel');
1351
+ elements.summaryTitle = document.getElementById('summaryTitle');
1352
+ elements.summaryText = document.getElementById('summaryText');
1353
+ elements.summaryClose = document.getElementById('summaryClose');
1354
+ elements.summaryClose.addEventListener('click', hideSummary);
1355
+ }
1356
+
1357
+ function formatContent(content) {
1358
+ // Enhanced markdown parsing for beautiful formatting
1359
+ let html = content;
1360
+
1361
+ // Escape HTML special characters first (except for already parsed markdown)
1362
+ // Skip this if content looks like it's already HTML
1363
+ if (!html.includes('<table') && !html.includes('<div')) {
1364
+ // Don't escape - let markdown do its thing
1365
+ }
1366
+
1367
+ // Code blocks: ```code```
1368
+ html = html.replace(/```(\w*)\n?([\s\S]*?)```/g, (match, lang, code) => {
1369
+ return `<pre class="code-block${lang ? ' lang-' + lang : ''}"><code>${code.trim()}</code></pre>`;
1370
+ });
1371
+
1372
+ // Tables: | Header | Header |
1373
+ html = html.replace(/(?:^|\n)(\|.+\|)\n(\|[-:\s|]+\|)\n((?:\|.+\|\n?)+)/gm, (match, headerRow, sepRow, bodyRows) => {
1374
+ const headers = headerRow.split('|').filter(cell => cell.trim()).map(cell =>
1375
+ `<th>${cell.trim()}</th>`
1376
+ ).join('');
1377
+
1378
+ const rows = bodyRows.trim().split('\n').map(row => {
1379
+ const cells = row.split('|').filter(cell => cell.trim()).map(cell =>
1380
+ `<td>${cell.trim()}</td>`
1381
+ ).join('');
1382
+ return `<tr>${cells}</tr>`;
1383
+ }).join('');
1384
+
1385
+ return `<div class="table-wrapper"><table><thead><tr>${headers}</tr></thead><tbody>${rows}</tbody></table></div>`;
1386
+ });
1387
+
1388
+ // Headers: ### Header, ## Header, # Header
1389
+ html = html.replace(/^#### (.+)$/gm, '<h4>$1</h4>');
1390
+ html = html.replace(/^### (.+)$/gm, '<h3>$1</h3>');
1391
+ html = html.replace(/^## (.+)$/gm, '<h2>$1</h2>');
1392
+ html = html.replace(/^# (.+)$/gm, '<h1>$1</h1>');
1393
+
1394
+ // Bold headers at start of line (NotebookLM style)
1395
+ html = html.replace(/^(\*\*[^*]+\*\*):?\s*$/gm, '<h4>$1</h4>');
1396
+
1397
+ // Bold text: **text**
1398
+ html = html.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
1399
+
1400
+ // Italic text: *text*
1401
+ html = html.replace(/(?<!\*)\*([^*]+)\*(?!\*)/g, '<em>$1</em>');
1402
+
1403
+ // Inline code: `code`
1404
+ html = html.replace(/`([^`]+)`/g, '<code class="inline-code">$1</code>');
1405
+
1406
+ // Horizontal rule: --- or ***
1407
+ html = html.replace(/^[-*]{3,}$/gm, '<hr class="divider">');
1408
+
1409
+ // Numbered lists: 1. Item, 2. Item, etc.
1410
+ html = html.replace(/^(\d+)\.\s+(.+)$/gm, '<li class="numbered"><span class="list-num">$1.</span> $2</li>');
1411
+
1412
+ // Bullet points: • Item or - Item or * Item at start of line
1413
+ html = html.replace(/^[\•\-\*]\s+(.+)$/gm, '<li class="bullet">$1</li>');
1414
+
1415
+ // Sub-bullets with indentation (2+ spaces before bullet)
1416
+ html = html.replace(/^[\s]{2,}[\•\-\*]\s+(.+)$/gm, '<li class="sub-bullet">$1</li>');
1417
+
1418
+ // Wrap consecutive numbered list items
1419
+ html = html.replace(/(<li class="numbered">[\s\S]*?<\/li>\n?)+/g, '<ol class="formatted-list">$&</ol>');
1420
+
1421
+ // Wrap consecutive bullet items
1422
+ html = html.replace(/(<li class="bullet">[\s\S]*?<\/li>\n?)+/g, '<ul class="formatted-list">$&</ul>');
1423
+
1424
+ // Wrap consecutive sub-bullet items
1425
+ html = html.replace(/(<li class="sub-bullet">[\s\S]*?<\/li>\n?)+/g, '<ul class="formatted-list sub-list">$&</ul>');
1426
+
1427
+ // Blockquotes: > text
1428
+ html = html.replace(/^>\s+(.+)$/gm, '<blockquote>$1</blockquote>');
1429
+ // Merge consecutive blockquotes
1430
+ html = html.replace(/<\/blockquote>\n<blockquote>/g, '<br>');
1431
+
1432
+ // Double newlines become paragraph breaks
1433
+ html = html.replace(/\n\n+/g, '</p><p>');
1434
+
1435
+ // Single newlines become line breaks (but not inside lists)
1436
+ html = html.replace(/\n/g, '<br>');
1437
+
1438
+ // Clean up br tags in lists, headers, tables
1439
+ html = html.replace(/<br><li/g, '<li');
1440
+ html = html.replace(/<\/li><br>/g, '</li>');
1441
+ html = html.replace(/<br><h/g, '<h');
1442
+ html = html.replace(/<\/h(\d)><br>/g, '</h$1>');
1443
+ html = html.replace(/<br><ul/g, '<ul');
1444
+ html = html.replace(/<br><ol/g, '<ol');
1445
+ html = html.replace(/<\/ul><br>/g, '</ul>');
1446
+ html = html.replace(/<\/ol><br>/g, '</ol>');
1447
+ html = html.replace(/<br><table/g, '<table');
1448
+ html = html.replace(/<\/table><br>/g, '</table>');
1449
+ html = html.replace(/<br><div class="table/g, '<div class="table');
1450
+ html = html.replace(/<\/div><br>/g, '</div>');
1451
+ html = html.replace(/<br><pre/g, '<pre');
1452
+ html = html.replace(/<\/pre><br>/g, '</pre>');
1453
+ html = html.replace(/<br><hr/g, '<hr');
1454
+ html = html.replace(/<hr[^>]*><br>/g, '<hr class="divider">');
1455
+ html = html.replace(/<br><blockquote/g, '<blockquote');
1456
+ html = html.replace(/<\/blockquote><br>/g, '</blockquote>');
1457
+
1458
+ // Wrap in paragraph
1459
+ html = '<p>' + html + '</p>';
1460
+
1461
+ // Clean up empty paragraphs
1462
+ html = html.replace(/<p><\/p>/g, '');
1463
+ html = html.replace(/<p>(\s|<br>)*<\/p>/g, '');
1464
+ html = html.replace(/<p><(h\d|ul|ol|table|div|pre|hr|blockquote)/g, '<$1');
1465
+ html = html.replace(/<\/(h\d|ul|ol|table|div|pre|blockquote)><\/p>/g, '</$1>');
1466
+ html = html.replace(/<p><hr/g, '<hr');
1467
+
1468
+ return html;
1469
+ }
1470
+
1471
+ function scrollToBottom() {
1472
+ elements.chatMessages.scrollTop = elements.chatMessages.scrollHeight;
1473
+ }
1474
+
1475
+ // ==================== Token Verification ====================
1476
+ async function verifyToken() {
1477
+ if (!state.token) { showAuthModal(); return; }
1478
+ try {
1479
+ const response = await fetch('/api/auth/verify', { headers: { 'Authorization': `Bearer ${state.token}` } });
1480
+ if (response.ok) {
1481
+ const data = await response.json();
1482
+ state.user = data;
1483
+ localStorage.setItem('Iribl AI_user', JSON.stringify(state.user));
1484
+ updateAuthUI();
1485
+ loadBuckets();
1486
+ loadDocuments();
1487
+ // Load chat history from server database
1488
+ loadChatHistoryFromServer();
1489
+ } else {
1490
+ state.token = null;
1491
+ state.user = null;
1492
+ localStorage.removeItem('Iribl AI_token');
1493
+ localStorage.removeItem('Iribl AI_user');
1494
+ showAuthModal();
1495
+ }
1496
+ } catch { showAuthModal(); }
1497
+ }
1498
+
1499
+ // ==================== Chat History ====================
1500
+ function generateChatTopic(messages) {
1501
+ // Get the first user message as the topic
1502
+ const firstUserMsg = messages.find(m => m.role === 'user');
1503
+ if (firstUserMsg) {
1504
+ // Truncate to first 40 chars
1505
+ let topic = firstUserMsg.content.substring(0, 40);
1506
+ if (firstUserMsg.content.length > 40) topic += '...';
1507
+ return topic;
1508
+ }
1509
+ return 'New Conversation';
1510
+ }
1511
+
1512
+ function saveChatHistory() {
1513
+ localStorage.setItem('Iribl AI_chat_history', JSON.stringify(state.chatHistory));
1514
+ }
1515
+
1516
+ // Sync chat to server
1517
+ async function syncChatToServer(chatData) {
1518
+ if (!state.token) return;
1519
+
1520
+ try {
1521
+ await fetch('/api/chats', {
1522
+ method: 'POST',
1523
+ headers: {
1524
+ 'Authorization': `Bearer ${state.token}`,
1525
+ 'Content-Type': 'application/json'
1526
+ },
1527
+ body: JSON.stringify(chatData)
1528
+ });
1529
+ } catch (error) {
1530
+ console.error('Failed to sync chat to server:', error);
1531
+ }
1532
+ }
1533
+
1534
+ // Load chat history from server
1535
+ async function loadChatHistoryFromServer() {
1536
+ if (!state.token) return;
1537
+
1538
+ try {
1539
+ const response = await fetch('/api/chats', {
1540
+ headers: { 'Authorization': `Bearer ${state.token}` }
1541
+ });
1542
+
1543
+ if (response.ok) {
1544
+ const data = await response.json();
1545
+ if (data.chats && data.chats.length > 0) {
1546
+ // Merge server chats with local (server takes priority)
1547
+ state.chatHistory = data.chats;
1548
+ saveChatHistory(); // Update local storage
1549
+ renderChatHistory();
1550
+ }
1551
+ }
1552
+ } catch (error) {
1553
+ console.error('Failed to load chats from server:', error);
1554
+ }
1555
+ }
1556
+
1557
+ // Delete chat from server
1558
+ async function deleteChatFromServer(chatId) {
1559
+ if (!state.token) return;
1560
+
1561
+ try {
1562
+ await fetch(`/api/chats/${chatId}`, {
1563
+ method: 'DELETE',
1564
+ headers: { 'Authorization': `Bearer ${state.token}` }
1565
+ });
1566
+ } catch (error) {
1567
+ console.error('Failed to delete chat from server:', error);
1568
+ }
1569
+ }
1570
+
1571
+ function saveCurrentChat() {
1572
+ // Only save if there are messages
1573
+ if (state.messages.length === 0) return null;
1574
+
1575
+ const chatId = state.currentChatId || Date.now().toString();
1576
+ const topic = generateChatTopic(state.messages);
1577
+
1578
+ // Check if this chat already exists
1579
+ const existingIndex = state.chatHistory.findIndex(c => c.id === chatId);
1580
+
1581
+ const chatData = {
1582
+ id: chatId,
1583
+ topic: topic,
1584
+ messages: [...state.messages],
1585
+ timestamp: Date.now(),
1586
+ bucket: state.chatBucket
1587
+ };
1588
+
1589
+ if (existingIndex >= 0) {
1590
+ // Update existing chat
1591
+ state.chatHistory[existingIndex] = chatData;
1592
+ } else {
1593
+ // Add new chat at the beginning
1594
+ state.chatHistory.unshift(chatData);
1595
+ }
1596
+
1597
+ saveChatHistory();
1598
+ renderChatHistory();
1599
+
1600
+ // Sync to server
1601
+ syncChatToServer(chatData);
1602
+
1603
+ return chatId;
1604
+ }
1605
+
1606
+ function startNewChat() {
1607
+ // Warn if AI is still generating
1608
+ if (state.isLoading) {
1609
+ showToast('AI is still responding - response will go to current chat', 'info');
1610
+ }
1611
+
1612
+ // Save current chat first if it has messages
1613
+ if (state.messages.length > 0) {
1614
+ saveCurrentChat();
1615
+ }
1616
+
1617
+ // Clear current chat
1618
+ state.messages = [];
1619
+ state.currentChatId = null;
1620
+
1621
+ // Reset UI
1622
+ renderMessages();
1623
+ elements.welcomeScreen.classList.remove('hidden');
1624
+ hideSummary();
1625
+ renderChatHistory();
1626
+
1627
+ showToast('Started new chat', 'info');
1628
+ }
1629
+
1630
+ function loadChatFromHistory(chatId) {
1631
+ // Warn if AI is still generating
1632
+ if (state.isLoading) {
1633
+ showToast('AI is still responding - response will go to current chat', 'info');
1634
+ }
1635
+
1636
+ // Save current chat first if it has messages
1637
+ if (state.messages.length > 0 && state.currentChatId !== chatId) {
1638
+ saveCurrentChat();
1639
+ }
1640
+
1641
+ const chat = state.chatHistory.find(c => c.id === chatId);
1642
+ if (!chat) return;
1643
+
1644
+ // Load the chat
1645
+ state.messages = [...chat.messages];
1646
+ state.currentChatId = chat.id;
1647
+ state.chatBucket = chat.bucket || '';
1648
+
1649
+ // Update bucket dropdown
1650
+ if (elements.chatBucketSelect) {
1651
+ elements.chatBucketSelect.value = state.chatBucket;
1652
+ const bucketName = state.chatBucket ?
1653
+ state.buckets.find(b => b.bucket_id === state.chatBucket)?.name || 'Selected Bucket' :
1654
+ 'All Documents';
1655
+ elements.chatBucketTrigger.querySelector('.select-value').textContent = bucketName;
1656
+ }
1657
+
1658
+ // Render messages
1659
+ renderMessages();
1660
+
1661
+ // Show/hide welcome screen based on whether chat has messages
1662
+ if (state.messages.length === 0) {
1663
+ elements.welcomeScreen.classList.remove('hidden');
1664
+ } else {
1665
+ elements.welcomeScreen.classList.add('hidden');
1666
+ }
1667
+
1668
+ renderChatHistory();
1669
+ scrollToBottom();
1670
+ }
1671
+
1672
+ function deleteChatFromHistory(chatId) {
1673
+ event.stopPropagation();
1674
+
1675
+ state.chatHistory = state.chatHistory.filter(c => c.id !== chatId);
1676
+
1677
+ // If deleting current chat, clear it
1678
+ if (state.currentChatId === chatId) {
1679
+ state.messages = [];
1680
+ state.currentChatId = null;
1681
+ renderMessages();
1682
+ elements.welcomeScreen.classList.remove('hidden');
1683
+ }
1684
+
1685
+ saveChatHistory();
1686
+ renderChatHistory();
1687
+
1688
+ // Delete from server
1689
+ deleteChatFromServer(chatId);
1690
+
1691
+ showToast('Chat deleted', 'success');
1692
+ }
1693
+
1694
+ function renderChatHistory() {
1695
+ // Filter chats by selected bucket
1696
+ let filteredChats = state.chatHistory;
1697
+ if (state.selectedBucket) {
1698
+ filteredChats = state.chatHistory.filter(chat =>
1699
+ chat.bucket === state.selectedBucket ||
1700
+ // Also include chats with no bucket for backwards compatibility
1701
+ (!chat.bucket && !state.selectedBucket)
1702
+ );
1703
+ }
1704
+
1705
+ const count = filteredChats.length;
1706
+ const totalCount = state.chatHistory.length;
1707
+
1708
+ // Show filtered count vs total if filtering is active
1709
+ elements.chatHistoryCount.textContent = state.selectedBucket && count !== totalCount ?
1710
+ `(${count}/${totalCount})` : `(${totalCount})`;
1711
+
1712
+ if (count === 0) {
1713
+ elements.chatHistoryList.innerHTML = state.selectedBucket ?
1714
+ `<div class="empty-state small"><div class="empty-text">No chats in this bucket</div></div>` :
1715
+ `<div class="empty-state small"><div class="empty-text">No chats yet</div></div>`;
1716
+ return;
1717
+ }
1718
+
1719
+ elements.chatHistoryList.innerHTML = filteredChats.map(chat => {
1720
+ const isActive = state.currentChatId === chat.id;
1721
+ const date = formatDate(chat.timestamp / 1000);
1722
+ return `
1723
+ <div class="chat-history-item ${isActive ? 'active' : ''}" onclick="loadChatFromHistory('${chat.id}')">
1724
+ <span class="chat-history-icon">💬</span>
1725
+ <div class="chat-history-info">
1726
+ <div class="chat-history-topic">${chat.topic}</div>
1727
+ <div class="chat-history-date">${date}</div>
1728
+ </div>
1729
+ <button class="btn btn-ghost chat-history-delete" onclick="deleteChatFromHistory('${chat.id}')" title="Delete">🗑️</button>
1730
+ </div>
1731
+ `;
1732
+ }).join('');
1733
+ }
1734
+
1735
+ function clearCurrentChat() {
1736
+ // Warn if AI is still generating
1737
+ if (state.isLoading) {
1738
+ showToast('AI is still responding - response will go to current chat', 'info');
1739
+ }
1740
+
1741
+ // If there's a current chat, clear its messages but keep it in history
1742
+ if (state.currentChatId) {
1743
+ const chatIndex = state.chatHistory.findIndex(c => c.id === state.currentChatId);
1744
+ if (chatIndex >= 0) {
1745
+ // Clear the messages in history
1746
+ state.chatHistory[chatIndex].messages = [];
1747
+ saveChatHistory();
1748
+ // Sync cleared chat to server
1749
+ syncChatToServer(state.chatHistory[chatIndex]);
1750
+ }
1751
+ }
1752
+
1753
+ // Clear current chat messages
1754
+ state.messages = [];
1755
+
1756
+ // Reset UI
1757
+ renderMessages();
1758
+ elements.welcomeScreen.classList.remove('hidden');
1759
+ hideSummary();
1760
+ renderChatHistory();
1761
+
1762
+ showToast('Chat cleared', 'info');
1763
+ }
1764
+
1765
+ function initChatHistory() {
1766
+ // New Chat button handler
1767
+ elements.newChatBtn.addEventListener('click', startNewChat);
1768
+
1769
+ // Clear Chat button handler (sidebar)
1770
+ elements.clearChatBtn.addEventListener('click', (e) => {
1771
+ e.stopPropagation();
1772
+ clearCurrentChat();
1773
+ });
1774
+
1775
+ // Clear Chat button handler (top)
1776
+ elements.clearChatBtnTop.addEventListener('click', clearCurrentChat);
1777
+
1778
+ // Render existing history
1779
+ renderChatHistory();
1780
+
1781
+ // Auto-save current chat when sending messages (hook into sendMessage)
1782
+ // This is handled by updating currentChatId after first message
1783
+ }
1784
+
1785
+ // ==================== Init ====================
1786
+ function init() {
1787
+ initSidebars();
1788
+ initMobileNavigation();
1789
+ initCollapsible();
1790
+ initCustomDropdowns();
1791
+ initUpload();
1792
+ initChat();
1793
+ initSummaryPanel();
1794
+ initChatHistory();
1795
+ verifyToken();
1796
+ }
1797
+
1798
+ document.addEventListener('DOMContentLoaded', init);
test_chroma.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Test ChromaDB Cloud connection
2
+ import chromadb
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ # Use CloudClient - the correct way to connect to ChromaDB Cloud
9
+ client = chromadb.CloudClient(
10
+ tenant="jash_doshi_211294",
11
+ database="visionextract",
12
+ api_key=os.getenv("CHROMA_API_KEY")
13
+ )
14
+
15
+ print("Connected successfully!")
16
+ print("Collections:", client.list_collections())