Spaces:
Sleeping
Sleeping
remove legacy imports
Browse files- src/pipeline.py +58 -8
src/pipeline.py
CHANGED
|
@@ -118,8 +118,23 @@ class PipelineManager:
|
|
| 118 |
try:
|
| 119 |
# Load config if not provided
|
| 120 |
if not self.config:
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
# Auto-infer embedding model from collection name if not "docling"
|
| 125 |
collection_name = self.config.get('qdrant', {}).get('collection_name', 'docling')
|
|
@@ -138,7 +153,16 @@ class PipelineManager:
|
|
| 138 |
if 'vectorstore' in self.config:
|
| 139 |
self.config['vectorstore']['embedding_model'] = inferred_model
|
| 140 |
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
self.llm_manager = LLMRegistry()
|
| 144 |
|
|
@@ -151,7 +175,7 @@ class PipelineManager:
|
|
| 151 |
except Exception as e:
|
| 152 |
try:
|
| 153 |
# Try direct instantiation with config
|
| 154 |
-
from
|
| 155 |
self.llm_client = get_llm_client("openai", self.config)
|
| 156 |
print("β
LLM CLIENT: Initialized using direct get_llm_client function with config")
|
| 157 |
except Exception as e2:
|
|
@@ -176,19 +200,28 @@ class PipelineManager:
|
|
| 176 |
self.llm_client = None
|
| 177 |
|
| 178 |
# Load system prompt
|
| 179 |
-
from
|
| 180 |
self.system_prompt = DEFAULT_AUDIT_SYSTEM_PROMPT
|
| 181 |
|
| 182 |
# Initialize report service
|
| 183 |
try:
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
| 185 |
self.report_service = ReportService()
|
| 186 |
except Exception as e:
|
| 187 |
print(f"Warning: Could not initialize report service: {e}")
|
| 188 |
self.report_service = None
|
| 189 |
|
| 190 |
except Exception as e:
|
| 191 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
def test_retrieval(
|
| 194 |
self,
|
|
@@ -293,6 +326,21 @@ class PipelineManager:
|
|
| 293 |
Returns:
|
| 294 |
True if successful, False otherwise
|
| 295 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
try:
|
| 297 |
vectorstore = self.vectorstore_manager.connect_to_existing(force_recreate=force_recreate)
|
| 298 |
if vectorstore:
|
|
@@ -304,6 +352,8 @@ class PipelineManager:
|
|
| 304 |
except Exception as e:
|
| 305 |
print(f"β Error connecting to vector store: {e}")
|
| 306 |
log_error(e, {"component": "vectorstore_connection"})
|
|
|
|
|
|
|
| 307 |
|
| 308 |
# If it's a dimension mismatch error, try with force_recreate
|
| 309 |
if "dimensions" in str(e).lower() and not force_recreate:
|
|
@@ -492,7 +542,7 @@ Answer:"""
|
|
| 492 |
print(f"π€ AUTO-INFERRING FILTERS: No explicit filters provided, analyzing query...")
|
| 493 |
try:
|
| 494 |
# Import get_available_metadata here to avoid circular imports
|
| 495 |
-
from
|
| 496 |
|
| 497 |
# Get available metadata
|
| 498 |
available_metadata = get_available_metadata(self.vectorstore_manager.get_vectorstore())
|
|
|
|
| 118 |
try:
|
| 119 |
# Load config if not provided
|
| 120 |
if not self.config:
|
| 121 |
+
try:
|
| 122 |
+
from src.config.loader import load_config
|
| 123 |
+
self.config = load_config()
|
| 124 |
+
except ImportError:
|
| 125 |
+
# Try alternate import path
|
| 126 |
+
from src.config.loader import load_config
|
| 127 |
+
self.config = load_config()
|
| 128 |
+
|
| 129 |
+
# Validate config structure
|
| 130 |
+
if not isinstance(self.config, dict):
|
| 131 |
+
raise ValueError(f"Config must be a dict, got {type(self.config)}")
|
| 132 |
+
|
| 133 |
+
# Ensure retriever config exists
|
| 134 |
+
if 'retriever' not in self.config:
|
| 135 |
+
self.config['retriever'] = {}
|
| 136 |
+
if 'model' not in self.config['retriever']:
|
| 137 |
+
raise ValueError("Config must have 'retriever.model' specified")
|
| 138 |
|
| 139 |
# Auto-infer embedding model from collection name if not "docling"
|
| 140 |
collection_name = self.config.get('qdrant', {}).get('collection_name', 'docling')
|
|
|
|
| 153 |
if 'vectorstore' in self.config:
|
| 154 |
self.config['vectorstore']['embedding_model'] = inferred_model
|
| 155 |
|
| 156 |
+
# Initialize vectorstore manager - this might fail if model loading fails
|
| 157 |
+
try:
|
| 158 |
+
self.vectorstore_manager = VectorStoreManager(self.config)
|
| 159 |
+
print("β
VectorStoreManager initialized successfully")
|
| 160 |
+
except Exception as vs_error:
|
| 161 |
+
print(f"β Error initializing VectorStoreManager: {vs_error}")
|
| 162 |
+
import traceback
|
| 163 |
+
traceback.print_exc()
|
| 164 |
+
self.vectorstore_manager = None
|
| 165 |
+
raise # Re-raise to be caught by outer try-except
|
| 166 |
|
| 167 |
self.llm_manager = LLMRegistry()
|
| 168 |
|
|
|
|
| 175 |
except Exception as e:
|
| 176 |
try:
|
| 177 |
# Try direct instantiation with config
|
| 178 |
+
from src.llm.adapters import get_llm_client
|
| 179 |
self.llm_client = get_llm_client("openai", self.config)
|
| 180 |
print("β
LLM CLIENT: Initialized using direct get_llm_client function with config")
|
| 181 |
except Exception as e2:
|
|
|
|
| 200 |
self.llm_client = None
|
| 201 |
|
| 202 |
# Load system prompt
|
| 203 |
+
from src.llm.templates import DEFAULT_AUDIT_SYSTEM_PROMPT
|
| 204 |
self.system_prompt = DEFAULT_AUDIT_SYSTEM_PROMPT
|
| 205 |
|
| 206 |
# Initialize report service
|
| 207 |
try:
|
| 208 |
+
try:
|
| 209 |
+
from src.reporting.service import ReportService
|
| 210 |
+
except ImportError:
|
| 211 |
+
from src.reporting.service import ReportService
|
| 212 |
self.report_service = ReportService()
|
| 213 |
except Exception as e:
|
| 214 |
print(f"Warning: Could not initialize report service: {e}")
|
| 215 |
self.report_service = None
|
| 216 |
|
| 217 |
except Exception as e:
|
| 218 |
+
print(f"β Error initializing components: {e}")
|
| 219 |
+
import traceback
|
| 220 |
+
traceback.print_exc()
|
| 221 |
+
# Don't set vectorstore_manager to None if it was already set
|
| 222 |
+
if not hasattr(self, 'vectorstore_manager') or self.vectorstore_manager is None:
|
| 223 |
+
self.vectorstore_manager = None
|
| 224 |
+
raise # Re-raise to allow caller to handle
|
| 225 |
|
| 226 |
def test_retrieval(
|
| 227 |
self,
|
|
|
|
| 326 |
Returns:
|
| 327 |
True if successful, False otherwise
|
| 328 |
"""
|
| 329 |
+
# Check if vectorstore_manager is initialized
|
| 330 |
+
if self.vectorstore_manager is None:
|
| 331 |
+
print("β Vector store manager is not initialized")
|
| 332 |
+
print("π Attempting to initialize vector store manager...")
|
| 333 |
+
try:
|
| 334 |
+
self._initialize_components()
|
| 335 |
+
if self.vectorstore_manager is None:
|
| 336 |
+
print("β Failed to initialize vector store manager")
|
| 337 |
+
return False
|
| 338 |
+
except Exception as init_error:
|
| 339 |
+
print(f"β Error initializing vector store manager: {init_error}")
|
| 340 |
+
import traceback
|
| 341 |
+
traceback.print_exc()
|
| 342 |
+
return False
|
| 343 |
+
|
| 344 |
try:
|
| 345 |
vectorstore = self.vectorstore_manager.connect_to_existing(force_recreate=force_recreate)
|
| 346 |
if vectorstore:
|
|
|
|
| 352 |
except Exception as e:
|
| 353 |
print(f"β Error connecting to vector store: {e}")
|
| 354 |
log_error(e, {"component": "vectorstore_connection"})
|
| 355 |
+
import traceback
|
| 356 |
+
traceback.print_exc()
|
| 357 |
|
| 358 |
# If it's a dimension mismatch error, try with force_recreate
|
| 359 |
if "dimensions" in str(e).lower() and not force_recreate:
|
|
|
|
| 542 |
print(f"π€ AUTO-INFERRING FILTERS: No explicit filters provided, analyzing query...")
|
| 543 |
try:
|
| 544 |
# Import get_available_metadata here to avoid circular imports
|
| 545 |
+
from src.retrieval.filter import get_available_metadata, infer_filters_from_query
|
| 546 |
|
| 547 |
# Get available metadata
|
| 548 |
available_metadata = get_available_metadata(self.vectorstore_manager.get_vectorstore())
|