cryogenic22 commited on
Commit
081071b
·
verified ·
1 Parent(s): 6d02fb7

Delete utils/document_search.py

Browse files
Files changed (1) hide show
  1. utils/document_search.py +0 -90
utils/document_search.py DELETED
@@ -1,90 +0,0 @@
1
-
2
- #utils/document_search.py
3
- from typing import Dict, List, Optional, Any
4
- import sqlite3
5
- from fuzzywuzzy import fuzz
6
- import streamlit as st
7
- from datetime import datetime
8
-
9
-
10
- def search_documents(
11
- conn: sqlite3.Connection,
12
- query: str,
13
- collection_id: Optional[int] = None,
14
- filters: Optional[Dict] = None
15
- ) -> List[Dict]:
16
- """
17
- Search documents using fuzzy matching and filters.
18
-
19
- Args:
20
- conn: Database connection
21
- query: Search query
22
- collection_id: Optional collection filter
23
- filters: Optional dictionary of filters (date range, file type, etc.)
24
- """
25
- try:
26
- cursor = conn.cursor()
27
-
28
- # Base query
29
- sql = """
30
- SELECT DISTINCT
31
- d.id,
32
- d.name,
33
- d.content,
34
- d.upload_date,
35
- GROUP_CONCAT(c.name) as collections
36
- FROM documents d
37
- LEFT JOIN document_collections dc ON d.id = dc.document_id
38
- LEFT JOIN collections c ON dc.collection_id = c.id
39
- """
40
-
41
- params = []
42
- where_clauses = []
43
-
44
- # Add collection filter if specified
45
- if collection_id:
46
- where_clauses.append("dc.collection_id = ?")
47
- params.append(collection_id)
48
-
49
- # Add date filters if specified
50
- if filters and 'date_range' in filters:
51
- start_date, end_date = filters['date_range']
52
- where_clauses.append("d.upload_date BETWEEN ? AND ?")
53
- params.extend([start_date, end_date])
54
-
55
- # Combine WHERE clauses
56
- if where_clauses:
57
- sql += " WHERE " + " AND ".join(where_clauses)
58
-
59
- sql += " GROUP BY d.id"
60
-
61
- # Execute query
62
- cursor.execute(sql, params)
63
-
64
- # Process results with fuzzy matching
65
- documents = []
66
- for row in cursor.fetchall():
67
- # Calculate match score
68
- name_score = fuzz.partial_ratio(query.lower(), row[1].lower())
69
- content_score = fuzz.partial_ratio(query.lower(), row[2].lower()[:1000]) # Limit content search
70
-
71
- # Use maximum score between name and content
72
- match_score = max(name_score, content_score)
73
-
74
- if match_score > 60: # Threshold for matches
75
- documents.append({
76
- 'id': row[0],
77
- 'name': row[1],
78
- 'content': row[2],
79
- 'upload_date': row[3],
80
- 'collections': row[4].split(',') if row[4] else [],
81
- 'match_score': match_score
82
- })
83
-
84
- # Sort by match score
85
- documents.sort(key=lambda x: x['match_score'], reverse=True)
86
- return documents
87
-
88
- except sqlite3.Error as e:
89
- st.error(f"Error searching documents: {e}")
90
- return []