RFP_Analyzer_Agent_backup

Sleeping

App Files Files Community

cryogenic22 commited on Dec 2, 2024

Commit

dd96f4e

verified ·

1 Parent(s): 94dc3dd

Create utils/document_search.py

Browse files

Files changed (1) hide show

utils/document_search.py +89 -0

utils/document_search.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#utils/document_search.py
+from typing import List, Dict, Optional
+import sqlite3
+from fuzzywuzzy import fuzz
+import streamlit as st
+from datetime import datetime
+def search_documents(
+    conn: sqlite3.Connection,
+    query: str,
+    collection_id: Optional[int] = None,
+    filters: Optional[Dict] = None
+) -> List[Dict]:
+    """
+    Search documents using fuzzy matching and filters.
+    Args:
+        conn: Database connection
+        query: Search query
+        collection_id: Optional collection filter
+        filters: Optional dictionary of filters (date range, file type, etc.)
+    """
+    try:
+        cursor = conn.cursor()
+        # Base query
+        sql = """
+            SELECT DISTINCT
+                d.id,
+                d.name,
+                d.content,
+                d.upload_date,
+                GROUP_CONCAT(c.name) as collections
+            FROM documents d
+            LEFT JOIN document_collections dc ON d.id = dc.document_id
+            LEFT JOIN collections c ON dc.collection_id = c.id
+        """
+        params = []
+        where_clauses = []
+        # Add collection filter if specified
+        if collection_id:
+            where_clauses.append("dc.collection_id = ?")
+            params.append(collection_id)
+        # Add date filters if specified
+        if filters and 'date_range' in filters:
+            start_date, end_date = filters['date_range']
+            where_clauses.append("d.upload_date BETWEEN ? AND ?")
+            params.extend([start_date, end_date])
+        # Combine WHERE clauses
+        if where_clauses:
+            sql += " WHERE " + " AND ".join(where_clauses)
+        sql += " GROUP BY d.id"
+        # Execute query
+        cursor.execute(sql, params)
+        # Process results with fuzzy matching
+        documents = []
+        for row in cursor.fetchall():
+            # Calculate match score
+            name_score = fuzz.partial_ratio(query.lower(), row[1].lower())
+            content_score = fuzz.partial_ratio(query.lower(), row[2].lower()[:1000])  # Limit content search
+            # Use maximum score between name and content
+            match_score = max(name_score, content_score)
+            if match_score > 60:  # Threshold for matches
+                documents.append({
+                    'id': row[0],
+                    'name': row[1],
+                    'content': row[2],
+                    'upload_date': row[3],
+                    'collections': row[4].split(',') if row[4] else [],
+                    'match_score': match_score
+                })
+        # Sort by match score
+        documents.sort(key=lambda x: x['match_score'], reverse=True)
+        return documents
+    except sqlite3.Error as e:
+        st.error(f"Error searching documents: {e}")
+        return []