Spaces:
Sleeping
Sleeping
update filtering logging
Browse files- src/pipeline.py +20 -21
- src/utils.py +4 -2
src/pipeline.py
CHANGED
|
@@ -10,11 +10,7 @@ try:
|
|
| 10 |
from langchain.docstore.document import Document
|
| 11 |
except ModuleNotFoundError as me:
|
| 12 |
print(me.__str__())
|
| 13 |
-
|
| 14 |
-
from langchain.schema import Document
|
| 15 |
-
except:
|
| 16 |
-
from langchain_core.documents import Document
|
| 17 |
-
print('only "from langchain_core.documents import Document" worked !')
|
| 18 |
|
| 19 |
from .logging import log_error
|
| 20 |
|
|
@@ -540,7 +536,24 @@ Answer:"""
|
|
| 540 |
filters_applied = False
|
| 541 |
qdrant_filter = None # Add this
|
| 542 |
|
| 543 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
print(f"π€ AUTO-INFERRING FILTERS: No explicit filters provided, analyzing query...")
|
| 545 |
try:
|
| 546 |
# Get available metadata
|
|
@@ -556,7 +569,6 @@ Answer:"""
|
|
| 556 |
if qdrant_filter:
|
| 557 |
print(f"β
QDRANT FILTER APPLIED: Using inferred Qdrant filter")
|
| 558 |
filters_applied = True
|
| 559 |
-
# Don't set sources/reports/subtype - use the Qdrant filter directly
|
| 560 |
else:
|
| 561 |
print(f"β οΈ NO QDRANT FILTER: Could not build Qdrant filter from query")
|
| 562 |
|
|
@@ -564,20 +576,7 @@ Answer:"""
|
|
| 564 |
print(f"β AUTO-INFERENCE FAILED: {e}")
|
| 565 |
qdrant_filter = None
|
| 566 |
else:
|
| 567 |
-
|
| 568 |
-
filters_applied = any([reports, sources, subtype])
|
| 569 |
-
if filters_applied:
|
| 570 |
-
print(f"β
EXPLICIT FILTERS: Using provided filters")
|
| 571 |
-
else:
|
| 572 |
-
print(f"β οΈ NO FILTERS: No explicit filters and auto-inference disabled")
|
| 573 |
-
|
| 574 |
-
# Extract filter parameters from the filters parameter
|
| 575 |
-
reports = filters.get('reports', []) if filters else []
|
| 576 |
-
sources = filters.get('sources', []) if filters else []
|
| 577 |
-
subtype = filters.get('subtype', []) if filters else []
|
| 578 |
-
year = filters.get('year', []) if filters else []
|
| 579 |
-
district = filters.get('district', []) if filters else []
|
| 580 |
-
filenames = filters.get('filenames', []) if filters else [] # Support mutually exclusive filename filtering
|
| 581 |
|
| 582 |
# Get vectorstore
|
| 583 |
vectorstore = self.vectorstore_manager.get_vectorstore()
|
|
|
|
| 10 |
from langchain.docstore.document import Document
|
| 11 |
except ModuleNotFoundError as me:
|
| 12 |
print(me.__str__())
|
| 13 |
+
from langchain.schema import Document
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
from .logging import log_error
|
| 16 |
|
|
|
|
| 536 |
filters_applied = False
|
| 537 |
qdrant_filter = None # Add this
|
| 538 |
|
| 539 |
+
# ALWAYS extract filter parameters from the filters dict first
|
| 540 |
+
# These need to be defined before the conditional logic
|
| 541 |
+
year = filters.get('year', []) if filters else []
|
| 542 |
+
district = filters.get('district', []) if filters else []
|
| 543 |
+
filenames = filters.get('filenames', []) if filters else []
|
| 544 |
+
sources_from_filters = filters.get('sources', []) if filters else []
|
| 545 |
+
|
| 546 |
+
# Use sources from filters dict if not provided directly
|
| 547 |
+
if sources_from_filters and not sources:
|
| 548 |
+
sources = sources_from_filters
|
| 549 |
+
|
| 550 |
+
# Check if any explicit filters were provided
|
| 551 |
+
has_explicit_filters = any([reports, sources, subtype, year, district, filenames])
|
| 552 |
+
|
| 553 |
+
if has_explicit_filters:
|
| 554 |
+
print(f"β
EXPLICIT FILTERS: year={year}, district={district}, sources={sources}, filenames={filenames}")
|
| 555 |
+
filters_applied = True
|
| 556 |
+
elif auto_infer_filters:
|
| 557 |
print(f"π€ AUTO-INFERRING FILTERS: No explicit filters provided, analyzing query...")
|
| 558 |
try:
|
| 559 |
# Get available metadata
|
|
|
|
| 569 |
if qdrant_filter:
|
| 570 |
print(f"β
QDRANT FILTER APPLIED: Using inferred Qdrant filter")
|
| 571 |
filters_applied = True
|
|
|
|
| 572 |
else:
|
| 573 |
print(f"β οΈ NO QDRANT FILTER: Could not build Qdrant filter from query")
|
| 574 |
|
|
|
|
| 576 |
print(f"β AUTO-INFERENCE FAILED: {e}")
|
| 577 |
qdrant_filter = None
|
| 578 |
else:
|
| 579 |
+
print(f"β οΈ NO FILTERS: No explicit filters and auto-inference disabled")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
|
| 581 |
# Get vectorstore
|
| 582 |
vectorstore = self.vectorstore_manager.get_vectorstore()
|
src/utils.py
CHANGED
|
@@ -6,11 +6,13 @@ from datetime import datetime, date
|
|
| 6 |
|
| 7 |
|
| 8 |
import configparser
|
| 9 |
-
from torch import cuda
|
| 10 |
from qdrant_client.http import models as rest
|
| 11 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 12 |
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
def get_config(fp):
|
| 16 |
config = configparser.ConfigParser()
|
|
@@ -19,7 +21,7 @@ def get_config(fp):
|
|
| 19 |
|
| 20 |
|
| 21 |
def get_embeddings_model(config):
|
| 22 |
-
device =
|
| 23 |
|
| 24 |
# Define embedding model
|
| 25 |
model_name = config.get("retriever", "MODEL")
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
import configparser
|
|
|
|
| 9 |
from qdrant_client.http import models as rest
|
| 10 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 11 |
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
| 12 |
|
| 13 |
+
# Import device detection utility
|
| 14 |
+
from src.utils.device import get_device_for_sentence_transformers
|
| 15 |
+
|
| 16 |
|
| 17 |
def get_config(fp):
|
| 18 |
config = configparser.ConfigParser()
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
def get_embeddings_model(config):
|
| 24 |
+
device = get_device_for_sentence_transformers()
|
| 25 |
|
| 26 |
# Define embedding model
|
| 27 |
model_name = config.get("retriever", "MODEL")
|