cryogenic22 commited on
Commit
31e950a
·
verified ·
1 Parent(s): 7cbdc3a

Update utils/database.py

Browse files
Files changed (1) hide show
  1. utils/database.py +132 -1
utils/database.py CHANGED
@@ -170,7 +170,7 @@ def verify_vector_store(vector_store):
170
  return False
171
 
172
 
173
- # utils/database.py
174
 
175
  def handle_document_upload(uploaded_files, **kwargs):
176
  """
@@ -323,6 +323,137 @@ def handle_document_upload(uploaded_files, **kwargs):
323
  st.session_state.chat_ready = False
324
  return False
325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  def process_document(file_path):
327
  """
328
  Process a PDF document with proper chunking.
 
170
  return False
171
 
172
 
173
+
174
 
175
  def handle_document_upload(uploaded_files, **kwargs):
176
  """
 
323
  st.session_state.chat_ready = False
324
  return False
325
 
326
+
327
+
328
+ def get_all_documents(conn: sqlite3.Connection) -> List[Dict]:
329
+ """
330
+ Get all documents with their metadata and collection info.
331
+
332
+ Args:
333
+ conn (sqlite3.Connection): Database connection
334
+
335
+ Returns:
336
+ List[Dict]: List of documents with their metadata
337
+ """
338
+ try:
339
+ with conn_lock:
340
+ cursor = conn.cursor()
341
+ cursor.execute('''
342
+ SELECT
343
+ d.id,
344
+ d.name,
345
+ d.content,
346
+ d.upload_date,
347
+ GROUP_CONCAT(c.name) as collections
348
+ FROM documents d
349
+ LEFT JOIN document_collections dc ON d.id = dc.document_id
350
+ LEFT JOIN collections c ON dc.collection_id = c.id
351
+ GROUP BY d.id
352
+ ORDER BY d.upload_date DESC
353
+ ''')
354
+
355
+ documents = []
356
+ for row in cursor.fetchall():
357
+ documents.append({
358
+ 'id': row[0],
359
+ 'name': row[1],
360
+ 'content': row[2],
361
+ 'upload_date': row[3],
362
+ 'collections': row[4].split(',') if row[4] else []
363
+ })
364
+ return documents
365
+
366
+ except sqlite3.Error as e:
367
+ st.error(f"Error retrieving documents: {e}")
368
+ return []
369
+
370
+ def get_document_by_id(conn: sqlite3.Connection, document_id: int) -> Optional[Dict]:
371
+ """
372
+ Get a single document by its ID.
373
+
374
+ Args:
375
+ conn (sqlite3.Connection): Database connection
376
+ document_id (int): ID of the document to retrieve
377
+
378
+ Returns:
379
+ Optional[Dict]: Document data if found, None otherwise
380
+ """
381
+ try:
382
+ with conn_lock:
383
+ cursor = conn.cursor()
384
+ cursor.execute('''
385
+ SELECT
386
+ d.id,
387
+ d.name,
388
+ d.content,
389
+ d.upload_date,
390
+ GROUP_CONCAT(c.name) as collections
391
+ FROM documents d
392
+ LEFT JOIN document_collections dc ON d.id = dc.document_id
393
+ LEFT JOIN collections c ON dc.collection_id = c.id
394
+ WHERE d.id = ?
395
+ GROUP BY d.id
396
+ ''', (document_id,))
397
+
398
+ row = cursor.fetchone()
399
+ if row:
400
+ return {
401
+ 'id': row[0],
402
+ 'name': row[1],
403
+ 'content': row[2],
404
+ 'upload_date': row[3],
405
+ 'collections': row[4].split(',') if row[4] else []
406
+ }
407
+ return None
408
+
409
+ except sqlite3.Error as e:
410
+ st.error(f"Error retrieving document: {e}")
411
+ return None
412
+
413
+ def get_recent_documents(conn: sqlite3.Connection, limit: int = 5) -> List[Dict]:
414
+ """
415
+ Get most recently uploaded documents.
416
+
417
+ Args:
418
+ conn (sqlite3.Connection): Database connection
419
+ limit (int): Maximum number of documents to return
420
+
421
+ Returns:
422
+ List[Dict]: List of recent documents
423
+ """
424
+ try:
425
+ with conn_lock:
426
+ cursor = conn.cursor()
427
+ cursor.execute('''
428
+ SELECT
429
+ d.id,
430
+ d.name,
431
+ d.content,
432
+ d.upload_date,
433
+ GROUP_CONCAT(c.name) as collections
434
+ FROM documents d
435
+ LEFT JOIN document_collections dc ON d.id = dc.document_id
436
+ LEFT JOIN collections c ON dc.collection_id = c.id
437
+ GROUP BY d.id
438
+ ORDER BY d.upload_date DESC
439
+ LIMIT ?
440
+ ''', (limit,))
441
+
442
+ documents = []
443
+ for row in cursor.fetchall():
444
+ documents.append({
445
+ 'id': row[0],
446
+ 'name': row[1],
447
+ 'content': row[2],
448
+ 'upload_date': row[3],
449
+ 'collections': row[4].split(',') if row[4] else []
450
+ })
451
+ return documents
452
+
453
+ except sqlite3.Error as e:
454
+ st.error(f"Error retrieving recent documents: {e}")
455
+ return []
456
+
457
  def process_document(file_path):
458
  """
459
  Process a PDF document with proper chunking.