akryldigital commited on
Commit
e53ce4e
Β·
verified Β·
1 Parent(s): 27e8dcc

update filtering logging

Browse files
Files changed (2) hide show
  1. src/pipeline.py +20 -21
  2. src/utils.py +4 -2
src/pipeline.py CHANGED
@@ -10,11 +10,7 @@ try:
10
  from langchain.docstore.document import Document
11
  except ModuleNotFoundError as me:
12
  print(me.__str__())
13
- try:
14
- from langchain.schema import Document
15
- except:
16
- from langchain_core.documents import Document
17
- print('only "from langchain_core.documents import Document" worked !')
18
 
19
  from .logging import log_error
20
 
@@ -540,7 +536,24 @@ Answer:"""
540
  filters_applied = False
541
  qdrant_filter = None # Add this
542
 
543
- if auto_infer_filters and not any([reports, sources, subtype]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
  print(f"πŸ€– AUTO-INFERRING FILTERS: No explicit filters provided, analyzing query...")
545
  try:
546
  # Get available metadata
@@ -556,7 +569,6 @@ Answer:"""
556
  if qdrant_filter:
557
  print(f"βœ… QDRANT FILTER APPLIED: Using inferred Qdrant filter")
558
  filters_applied = True
559
- # Don't set sources/reports/subtype - use the Qdrant filter directly
560
  else:
561
  print(f"⚠️ NO QDRANT FILTER: Could not build Qdrant filter from query")
562
 
@@ -564,20 +576,7 @@ Answer:"""
564
  print(f"❌ AUTO-INFERENCE FAILED: {e}")
565
  qdrant_filter = None
566
  else:
567
- # Check if any explicit filters were provided
568
- filters_applied = any([reports, sources, subtype])
569
- if filters_applied:
570
- print(f"βœ… EXPLICIT FILTERS: Using provided filters")
571
- else:
572
- print(f"⚠️ NO FILTERS: No explicit filters and auto-inference disabled")
573
-
574
- # Extract filter parameters from the filters parameter
575
- reports = filters.get('reports', []) if filters else []
576
- sources = filters.get('sources', []) if filters else []
577
- subtype = filters.get('subtype', []) if filters else []
578
- year = filters.get('year', []) if filters else []
579
- district = filters.get('district', []) if filters else []
580
- filenames = filters.get('filenames', []) if filters else [] # Support mutually exclusive filename filtering
581
 
582
  # Get vectorstore
583
  vectorstore = self.vectorstore_manager.get_vectorstore()
 
10
  from langchain.docstore.document import Document
11
  except ModuleNotFoundError as me:
12
  print(me.__str__())
13
+ from langchain.schema import Document
 
 
 
 
14
 
15
  from .logging import log_error
16
 
 
536
  filters_applied = False
537
  qdrant_filter = None # Add this
538
 
539
+ # ALWAYS extract filter parameters from the filters dict first
540
+ # These need to be defined before the conditional logic
541
+ year = filters.get('year', []) if filters else []
542
+ district = filters.get('district', []) if filters else []
543
+ filenames = filters.get('filenames', []) if filters else []
544
+ sources_from_filters = filters.get('sources', []) if filters else []
545
+
546
+ # Use sources from filters dict if not provided directly
547
+ if sources_from_filters and not sources:
548
+ sources = sources_from_filters
549
+
550
+ # Check if any explicit filters were provided
551
+ has_explicit_filters = any([reports, sources, subtype, year, district, filenames])
552
+
553
+ if has_explicit_filters:
554
+ print(f"βœ… EXPLICIT FILTERS: year={year}, district={district}, sources={sources}, filenames={filenames}")
555
+ filters_applied = True
556
+ elif auto_infer_filters:
557
  print(f"πŸ€– AUTO-INFERRING FILTERS: No explicit filters provided, analyzing query...")
558
  try:
559
  # Get available metadata
 
569
  if qdrant_filter:
570
  print(f"βœ… QDRANT FILTER APPLIED: Using inferred Qdrant filter")
571
  filters_applied = True
 
572
  else:
573
  print(f"⚠️ NO QDRANT FILTER: Could not build Qdrant filter from query")
574
 
 
576
  print(f"❌ AUTO-INFERENCE FAILED: {e}")
577
  qdrant_filter = None
578
  else:
579
+ print(f"⚠️ NO FILTERS: No explicit filters and auto-inference disabled")
 
 
 
 
 
 
 
 
 
 
 
 
 
580
 
581
  # Get vectorstore
582
  vectorstore = self.vectorstore_manager.get_vectorstore()
src/utils.py CHANGED
@@ -6,11 +6,13 @@ from datetime import datetime, date
6
 
7
 
8
  import configparser
9
- from torch import cuda
10
  from qdrant_client.http import models as rest
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
13
 
 
 
 
14
 
15
  def get_config(fp):
16
  config = configparser.ConfigParser()
@@ -19,7 +21,7 @@ def get_config(fp):
19
 
20
 
21
  def get_embeddings_model(config):
22
- device = "cuda" if cuda.is_available() else "cpu"
23
 
24
  # Define embedding model
25
  model_name = config.get("retriever", "MODEL")
 
6
 
7
 
8
  import configparser
 
9
  from qdrant_client.http import models as rest
10
  from langchain_community.embeddings import HuggingFaceEmbeddings
11
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
12
 
13
+ # Import device detection utility
14
+ from src.utils.device import get_device_for_sentence_transformers
15
+
16
 
17
  def get_config(fp):
18
  config = configparser.ConfigParser()
 
21
 
22
 
23
  def get_embeddings_model(config):
24
+ device = get_device_for_sentence_transformers()
25
 
26
  # Define embedding model
27
  model_name = config.get("retriever", "MODEL")