TilanB commited on
Commit
33bc5e4
·
verified ·
1 Parent(s): cc9e035
content_analyzer/document_parser.py CHANGED
@@ -140,11 +140,13 @@ class DocumentProcessor:
140
  )
141
  self.gemini_client = None
142
  self.genai_module = None # Store the module reference
143
- if parameters.ENABLE_CHART_EXTRACTION:
 
 
144
  self._init_gemini_vision()
145
  logger.debug(f"DocumentProcessor initialized with cache dir: {self.cache_dir}")
146
  logger.debug(f"Chunk size: {parameters.CHUNK_SIZE}, Chunk overlap: {parameters.CHUNK_OVERLAP}")
147
- logger.debug(f"Chart extraction: {'enabled' if parameters.ENABLE_CHART_EXTRACTION else 'disabled'}")
148
 
149
  def _init_gemini_vision(self):
150
  """Initialize Gemini Vision client for chart analysis."""
@@ -156,7 +158,7 @@ class DocumentProcessor:
156
  except ImportError as e:
157
  logger.warning(f"google-genai not installed: {e}")
158
  logger.info("Install with: pip install google-genai")
159
- parameters.ENABLE_CHART_EXTRACTION = False
160
  return
161
  self.genai_module = genai
162
  try:
@@ -165,7 +167,7 @@ class DocumentProcessor:
165
  logger.info(f"✅ Gemini Vision client initialized")
166
  except Exception as e:
167
  logger.error(f"❌ Failed to initialize Gemini Vision client: {e}")
168
- parameters.ENABLE_CHART_EXTRACTION = False
169
 
170
  def validate_files(self, files: List) -> bool:
171
  """
@@ -288,8 +290,8 @@ class DocumentProcessor:
288
  def run_pdfplumber():
289
  return self._load_pdf_with_pdfplumber(file.name)
290
  def run_charts():
291
- logger.info(f"ENABLE_CHART_EXTRACTION={parameters.ENABLE_CHART_EXTRACTION}, gemini_client={self.gemini_client is not None}")
292
- if parameters.ENABLE_CHART_EXTRACTION and self.gemini_client:
293
  return self._extract_charts_from_pdf(file.name)
294
  return []
295
  try:
@@ -313,7 +315,7 @@ class DocumentProcessor:
313
  except MemoryError as e:
314
  logger.error(f"Out of memory in parallel PDF processing: {e}. Falling back to sequential.")
315
  documents = self._load_pdf_with_pdfplumber(file.name)
316
- if parameters.ENABLE_CHART_EXTRACTION and self.gemini_client:
317
  chart_docs = self._extract_charts_from_pdf(file.name)
318
  if chart_docs:
319
  documents.extend(chart_docs)
 
140
  )
141
  self.gemini_client = None
142
  self.genai_module = None # Store the module reference
143
+ # Instance-level flag instead of modifying global parameters
144
+ self.chart_extraction_enabled = parameters.ENABLE_CHART_EXTRACTION
145
+ if self.chart_extraction_enabled:
146
  self._init_gemini_vision()
147
  logger.debug(f"DocumentProcessor initialized with cache dir: {self.cache_dir}")
148
  logger.debug(f"Chunk size: {parameters.CHUNK_SIZE}, Chunk overlap: {parameters.CHUNK_OVERLAP}")
149
+ logger.debug(f"Chart extraction: {'enabled' if self.chart_extraction_enabled else 'disabled'}")
150
 
151
  def _init_gemini_vision(self):
152
  """Initialize Gemini Vision client for chart analysis."""
 
158
  except ImportError as e:
159
  logger.warning(f"google-genai not installed: {e}")
160
  logger.info("Install with: pip install google-genai")
161
+ self.chart_extraction_enabled = False # Instance-level, not global
162
  return
163
  self.genai_module = genai
164
  try:
 
167
  logger.info(f"✅ Gemini Vision client initialized")
168
  except Exception as e:
169
  logger.error(f"❌ Failed to initialize Gemini Vision client: {e}")
170
+ self.chart_extraction_enabled = False # Instance-level, not global
171
 
172
  def validate_files(self, files: List) -> bool:
173
  """
 
290
  def run_pdfplumber():
291
  return self._load_pdf_with_pdfplumber(file.name)
292
  def run_charts():
293
+ logger.info(f"chart_extraction_enabled={self.chart_extraction_enabled}, gemini_client={self.gemini_client is not None}")
294
+ if self.chart_extraction_enabled and self.gemini_client:
295
  return self._extract_charts_from_pdf(file.name)
296
  return []
297
  try:
 
315
  except MemoryError as e:
316
  logger.error(f"Out of memory in parallel PDF processing: {e}. Falling back to sequential.")
317
  documents = self._load_pdf_with_pdfplumber(file.name)
318
+ if self.chart_extraction_enabled and self.gemini_client:
319
  chart_docs = self._extract_charts_from_pdf(file.name)
320
  if chart_docs:
321
  documents.extend(chart_docs)