minh-4T commited on
Commit
4095dea
·
1 Parent(s): f42dd10

one collection

Browse files
Files changed (1) hide show
  1. core/collection_utils.py +7 -4
core/collection_utils.py CHANGED
@@ -14,10 +14,13 @@ def normalize_folder_key(folder_key: str) -> str:
14
 
15
 
16
  def build_collection_name(folder_key: str, prefix: str = "rag") -> str:
17
- normalized = normalize_folder_key(folder_key)
18
- base = f"{prefix}_{normalized}"
19
- # Qdrant collection names should stay short and simple.
20
- return base[:63]
 
 
 
21
 
22
 
23
  def extract_year_tokens(value: str) -> Set[str]:
 
14
 
15
 
16
  def build_collection_name(folder_key: str, prefix: str = "rag") -> str:
17
+ """
18
+ OPTIMIZED: Always return single collection name regardless of folder_key.
19
+ This ensures all documents go into ONE collection for deduplication and efficient querying.
20
+ Folder structure is preserved in payload metadata (folder_key), not as separate collections.
21
+ """
22
+ # ✅ Force single collection: always return "rag_docs"
23
+ return f"{prefix}_docs"
24
 
25
 
26
  def extract_year_tokens(value: str) -> Set[str]: