Ara Yeroyan commited on
Commit
72318ee
Β·
1 Parent(s): 1154c8d
Files changed (1) hide show
  1. src/pipeline.py +32 -37
src/pipeline.py CHANGED
@@ -1,5 +1,7 @@
1
  """Main pipeline orchestrator for the Audit QA system."""
 
2
  import time
 
3
  from pathlib import Path
4
  from dataclasses import dataclass
5
  from typing import Dict, Any, List, Optional
@@ -11,11 +13,21 @@ except ModuleNotFoundError as me:
11
  from langchain.schema import Document
12
 
13
  from .logging import log_error
14
- from .llm.adapters import LLMRegistry
15
  from .loader import chunks_to_documents
16
  from .vectorstore import VectorStoreManager
 
17
  from .retrieval.context import ContextRetriever
18
- from .config.loader import get_embedding_model_for_collection
 
 
 
 
 
 
 
 
 
19
 
20
 
21
 
@@ -41,12 +53,13 @@ class PipelineManager:
41
  """
42
  Initialize the pipeline manager.
43
  """
 
 
44
  self.config = config or {}
 
45
  self.vectorstore_manager = None
46
  self.context_retriever = None # Initialize as None
47
- self.llm_client = None
48
- self.report_service = None
49
- self.chunks = None
50
 
51
  # Initialize components
52
  self._initialize_components()
@@ -118,13 +131,7 @@ class PipelineManager:
118
  try:
119
  # Load config if not provided
120
  if not self.config:
121
- try:
122
- from src.config.loader import load_config
123
- self.config = load_config()
124
- except ImportError:
125
- # Try alternate import path
126
- from src.config.loader import load_config
127
- self.config = load_config()
128
 
129
  # Validate config structure
130
  if not isinstance(self.config, dict):
@@ -159,7 +166,6 @@ class PipelineManager:
159
  print("βœ… VectorStoreManager initialized successfully")
160
  except Exception as vs_error:
161
  print(f"❌ Error initializing VectorStoreManager: {vs_error}")
162
- import traceback
163
  traceback.print_exc()
164
  self.vectorstore_manager = None
165
  raise # Re-raise to be caught by outer try-except
@@ -175,40 +181,35 @@ class PipelineManager:
175
  except Exception as e:
176
  try:
177
  # Try direct instantiation with config
178
- from src.llm.adapters import get_llm_client
179
  self.llm_client = get_llm_client("openai", self.config)
180
  print("βœ… LLM CLIENT: Initialized using direct get_llm_client function with config")
181
  except Exception as e2:
182
  print(f"❌ LLM CLIENT: Registry methods failed - {e2}")
183
  # Try to create a simple LLM client directly
184
  try:
185
- from langchain_openai import ChatOpenAI
186
- import os
187
- api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
188
- if api_key:
189
- self.llm_client = ChatOpenAI(
190
- model="gpt-3.5-turbo",
191
- api_key=api_key,
192
- temperature=0.1,
193
- max_tokens=1000
194
- )
195
- print("βœ… LLM CLIENT: Initialized using direct ChatOpenAI")
 
196
  else:
197
- print("❌ LLM CLIENT: No API key available")
198
  except Exception as e3:
199
  print(f"❌ LLM CLIENT: Direct instantiation also failed - {e3}")
200
  self.llm_client = None
201
 
202
  # Load system prompt
203
- from src.llm.templates import DEFAULT_AUDIT_SYSTEM_PROMPT
204
  self.system_prompt = DEFAULT_AUDIT_SYSTEM_PROMPT
205
 
206
  # Initialize report service
207
  try:
208
- try:
209
- from src.reporting.service import ReportService
210
- except ImportError:
211
- from src.reporting.service import ReportService
212
  self.report_service = ReportService()
213
  except Exception as e:
214
  print(f"Warning: Could not initialize report service: {e}")
@@ -216,7 +217,6 @@ class PipelineManager:
216
 
217
  except Exception as e:
218
  print(f"❌ Error initializing components: {e}")
219
- import traceback
220
  traceback.print_exc()
221
  # Don't set vectorstore_manager to None if it was already set
222
  if not hasattr(self, 'vectorstore_manager') or self.vectorstore_manager is None:
@@ -337,7 +337,6 @@ class PipelineManager:
337
  return False
338
  except Exception as init_error:
339
  print(f"❌ Error initializing vector store manager: {init_error}")
340
- import traceback
341
  traceback.print_exc()
342
  return False
343
 
@@ -352,7 +351,6 @@ class PipelineManager:
352
  except Exception as e:
353
  print(f"❌ Error connecting to vector store: {e}")
354
  log_error(e, {"component": "vectorstore_connection"})
355
- import traceback
356
  traceback.print_exc()
357
 
358
  # If it's a dimension mismatch error, try with force_recreate
@@ -541,9 +539,6 @@ Answer:"""
541
  if auto_infer_filters and not any([reports, sources, subtype]):
542
  print(f"πŸ€– AUTO-INFERRING FILTERS: No explicit filters provided, analyzing query...")
543
  try:
544
- # Import get_available_metadata here to avoid circular imports
545
- from src.retrieval.filter import get_available_metadata, infer_filters_from_query
546
-
547
  # Get available metadata
548
  available_metadata = get_available_metadata(self.vectorstore_manager.get_vectorstore())
549
 
 
1
  """Main pipeline orchestrator for the Audit QA system."""
2
+ import os
3
  import time
4
+ import traceback
5
  from pathlib import Path
6
  from dataclasses import dataclass
7
  from typing import Dict, Any, List, Optional
 
13
  from langchain.schema import Document
14
 
15
  from .logging import log_error
16
+
17
  from .loader import chunks_to_documents
18
  from .vectorstore import VectorStoreManager
19
+ from .reporting.service import ReportService
20
  from .retrieval.context import ContextRetriever
21
+ from .llm.adapters import LLMRegistry, get_llm_client
22
+ from .llm.templates import DEFAULT_AUDIT_SYSTEM_PROMPT
23
+ from .config.loader import load_config, get_embedding_model_for_collection
24
+ from .retrieval.filter import get_available_metadata, infer_filters_from_query
25
+
26
+ try:
27
+ from langchain_openai import ChatOpenAI
28
+ LANGCHAIN_OPENAI_AVAILABLE = True
29
+ except ImportError:
30
+ LANGCHAIN_OPENAI_AVAILABLE = False
31
 
32
 
33
 
 
53
  """
54
  Initialize the pipeline manager.
55
  """
56
+ self.chunks = None
57
+ self.llm_client = None
58
  self.config = config or {}
59
+ self.report_service = None
60
  self.vectorstore_manager = None
61
  self.context_retriever = None # Initialize as None
62
+
 
 
63
 
64
  # Initialize components
65
  self._initialize_components()
 
131
  try:
132
  # Load config if not provided
133
  if not self.config:
134
+ self.config = load_config()
 
 
 
 
 
 
135
 
136
  # Validate config structure
137
  if not isinstance(self.config, dict):
 
166
  print("βœ… VectorStoreManager initialized successfully")
167
  except Exception as vs_error:
168
  print(f"❌ Error initializing VectorStoreManager: {vs_error}")
 
169
  traceback.print_exc()
170
  self.vectorstore_manager = None
171
  raise # Re-raise to be caught by outer try-except
 
181
  except Exception as e:
182
  try:
183
  # Try direct instantiation with config
 
184
  self.llm_client = get_llm_client("openai", self.config)
185
  print("βœ… LLM CLIENT: Initialized using direct get_llm_client function with config")
186
  except Exception as e2:
187
  print(f"❌ LLM CLIENT: Registry methods failed - {e2}")
188
  # Try to create a simple LLM client directly
189
  try:
190
+ if LANGCHAIN_OPENAI_AVAILABLE:
191
+ api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
192
+ if api_key:
193
+ self.llm_client = ChatOpenAI(
194
+ model="gpt-3.5-turbo",
195
+ api_key=api_key,
196
+ temperature=0.1,
197
+ max_tokens=1000
198
+ )
199
+ print("βœ… LLM CLIENT: Initialized using direct ChatOpenAI")
200
+ else:
201
+ print("❌ LLM CLIENT: No API key available")
202
  else:
203
+ print("❌ LLM CLIENT: langchain-openai not available")
204
  except Exception as e3:
205
  print(f"❌ LLM CLIENT: Direct instantiation also failed - {e3}")
206
  self.llm_client = None
207
 
208
  # Load system prompt
 
209
  self.system_prompt = DEFAULT_AUDIT_SYSTEM_PROMPT
210
 
211
  # Initialize report service
212
  try:
 
 
 
 
213
  self.report_service = ReportService()
214
  except Exception as e:
215
  print(f"Warning: Could not initialize report service: {e}")
 
217
 
218
  except Exception as e:
219
  print(f"❌ Error initializing components: {e}")
 
220
  traceback.print_exc()
221
  # Don't set vectorstore_manager to None if it was already set
222
  if not hasattr(self, 'vectorstore_manager') or self.vectorstore_manager is None:
 
337
  return False
338
  except Exception as init_error:
339
  print(f"❌ Error initializing vector store manager: {init_error}")
 
340
  traceback.print_exc()
341
  return False
342
 
 
351
  except Exception as e:
352
  print(f"❌ Error connecting to vector store: {e}")
353
  log_error(e, {"component": "vectorstore_connection"})
 
354
  traceback.print_exc()
355
 
356
  # If it's a dimension mismatch error, try with force_recreate
 
539
  if auto_infer_filters and not any([reports, sources, subtype]):
540
  print(f"πŸ€– AUTO-INFERRING FILTERS: No explicit filters provided, analyzing query...")
541
  try:
 
 
 
542
  # Get available metadata
543
  available_metadata = get_available_metadata(self.vectorstore_manager.get_vectorstore())
544