Suhasdev commited on
Commit
c0b1bdf
·
1 Parent(s): 9a000b8

Remove hardcoded debug.log file writes - fix file not found errors

Browse files
src/gepa_optimizer/core/optimizer.py CHANGED
@@ -139,12 +139,7 @@ class GepaOptimizer:
139
  self.logger.info(f"Starting optimization session: {session_id}")
140
  self.logger.info(f"Using model: {self.config.model.model_name} (provider: {self.config.model.provider})")
141
 
142
- # #region agent log
143
- import json as _json_debug
144
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
145
- with open(_debug_log_path, "a") as _f:
146
- _f.write(_json_debug.dumps({"hypothesisId": "E", "location": "optimizer.py:train_start", "message": "Optimization train() started", "data": {"session_id": session_id, "max_iterations": self.config.max_iterations}, "timestamp": int(time.time() * 1000), "sessionId": "debug-session"}) + "\n")
147
- # #endregion
148
 
149
  # 🔥 FIX E: Reset Pareto logger at start of each optimization run
150
  from ..utils.pareto_logger import reset_pareto_logger
@@ -906,12 +901,7 @@ Output the improved prompt directly and only the prompt."""
906
  if reflection_lm_passed:
907
  self.logger.debug(f"reflection_lm_callable passed to GEPA (may be ignored in adapter mode)")
908
 
909
- # #region agent log
910
- import json as _json_debug
911
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
912
- with open(_debug_log_path, "a") as _f:
913
- _f.write(_json_debug.dumps({"hypothesisId": "A", "location": "optimizer.py:gepa_params", "message": "GEPA params construction", "data": {"max_iterations_from_config": max_iterations, "max_metric_calls": max_metric_calls, "batch_size": batch_size}, "timestamp": int(time.time() * 1000), "sessionId": "debug-session"}) + "\n")
914
- # #endregion
915
 
916
  gepa_params = {
917
  'adapter': adapter, # Use the adapter created above (with or without LLEGO)
@@ -958,10 +948,7 @@ Output the improved prompt directly and only the prompt."""
958
  elif key not in VALID_GEPA_PARAMS:
959
  self.logger.debug(f"⚠️ Filtering out invalid GEPA parameter: {key}")
960
 
961
- # #region agent log
962
- with open(_debug_log_path, "a") as _f:
963
- _f.write(_json_debug.dumps({"hypothesisId": "A", "location": "optimizer.py:gepa_params_final", "message": "Final GEPA params keys", "data": {"params_keys": list(gepa_params.keys()), "max_metric_calls": gepa_params.get('max_metric_calls', 'NOT_PASSED')}, "timestamp": int(time.time() * 1000), "sessionId": "debug-session"}) + "\n")
964
- # #endregion
965
 
966
  # 🎯 NEW: Capture GEPA's internal logging for pareto front information
967
  gepa_output = io.StringIO()
@@ -1060,12 +1047,7 @@ Output the improved prompt directly and only the prompt."""
1060
 
1061
  self.logger.info(f"GEPA Optimization Complete: {max_iteration} iterations")
1062
 
1063
- # #region agent log
1064
- import json as _json_debug
1065
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
1066
- with open(_debug_log_path, "a") as _f:
1067
- _f.write(_json_debug.dumps({"hypothesisId": "F", "location": "optimizer.py:gepa_complete", "message": "GEPA optimization complete - iteration count", "data": {"max_iteration_from_logs": max_iteration, "expected_iterations": self.config.max_iterations, "off_by_one": max_iteration != self.config.max_iterations, "gepa_logs_length": len(gepa_logs)}, "timestamp": int(time.time() * 1000), "sessionId": "debug-session"}) + "\n")
1068
- # #endregion
1069
 
1070
  return max_iteration # Return the max iteration count
1071
 
 
139
  self.logger.info(f"Starting optimization session: {session_id}")
140
  self.logger.info(f"Using model: {self.config.model.model_name} (provider: {self.config.model.provider})")
141
 
142
+ # Debug logging removed - not needed in production
 
 
 
 
 
143
 
144
  # 🔥 FIX E: Reset Pareto logger at start of each optimization run
145
  from ..utils.pareto_logger import reset_pareto_logger
 
901
  if reflection_lm_passed:
902
  self.logger.debug(f"reflection_lm_callable passed to GEPA (may be ignored in adapter mode)")
903
 
904
+ # Debug logging removed - not needed in production
 
 
 
 
 
905
 
906
  gepa_params = {
907
  'adapter': adapter, # Use the adapter created above (with or without LLEGO)
 
948
  elif key not in VALID_GEPA_PARAMS:
949
  self.logger.debug(f"⚠️ Filtering out invalid GEPA parameter: {key}")
950
 
951
+ # Debug logging removed - not needed in production
 
 
 
952
 
953
  # 🎯 NEW: Capture GEPA's internal logging for pareto front information
954
  gepa_output = io.StringIO()
 
1047
 
1048
  self.logger.info(f"GEPA Optimization Complete: {max_iteration} iterations")
1049
 
1050
+ # Debug logging removed - not needed in production
 
 
 
 
 
1051
 
1052
  return max_iteration # Return the max iteration count
1053
 
src/gepa_optimizer/core/universal_adapter.py CHANGED
@@ -200,27 +200,11 @@ Output the improved prompt directly and only the prompt."""
200
  self.logger.info(f"📐 FORMAT DETECTED: {self._detected_format['format_type']}")
201
  self.logger.info(f" Spec: {self._detected_format['format_spec'][:100]}...")
202
  self.logger.info(f" Avg length: {self._detected_format['avg_length']} chars")
203
- # #region agent log
204
- import json as _json_debug
205
- import time as _time_debug
206
- import os as _os_debug
207
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
208
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
209
- with open(_debug_log_path, "a") as _f:
210
- _f.write(_json_debug.dumps({"hypothesisId": "FORMAT_DETECT", "location": "universal_adapter.py:format_detected", "message": "Format detection successful", "data": {"format_type": self._detected_format['format_type'], "num_outputs": len(expected_outputs), "avg_length": self._detected_format['avg_length'], "has_constraint": bool(self._detected_format.get('format_constraint'))}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
211
- # #endregion
212
  else:
213
  self.logger.warning("⚠️ No expected outputs found for format detection")
214
  self._detected_format = None
215
- # #region agent log
216
- import json as _json_debug
217
- import time as _time_debug
218
- import os as _os_debug
219
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
220
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
221
- with open(_debug_log_path, "a") as _f:
222
- _f.write(_json_debug.dumps({"hypothesisId": "FORMAT_DETECT", "location": "universal_adapter.py:format_detected", "message": "Format detection failed - no outputs", "data": {"batch_size": len(batch)}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
223
- # #endregion
224
 
225
  except Exception as e:
226
  self.logger.warning(f"⚠️ Format detection failed: {e}")
@@ -323,14 +307,7 @@ Output the improved prompt directly and only the prompt."""
323
  self.logger.debug("🌱 Detected seed prompt (S₀)")
324
  else:
325
  candidate_type = 'unknown' # Truly unknown
326
-
327
- # #region agent log
328
- import json as _json_debug
329
- import time as _time_debug
330
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
331
- with open(_debug_log_path, "a") as _f:
332
- _f.write(_json_debug.dumps({"hypothesisId": "C", "location": "universal_adapter.py:candidate_type_detect", "message": "Candidate type detection", "data": {"candidate_type": candidate_type, "evaluation_count": self._evaluation_count, "from_candidate_dict": candidate.get('source'), "from_sources_mapping": self._candidate_sources.get(system_prompt), "from_current_type": self._current_evaluation_type}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
333
- # #endregion
334
 
335
  # Store source for future lookups (always update if we found a valid type)
336
  if candidate_type and candidate_type != 'unknown' and system_prompt not in self._candidate_sources:
@@ -338,19 +315,7 @@ Output the improved prompt directly and only the prompt."""
338
  self.logger.debug(f" 📝 Stored candidate type: {candidate_type} for prompt (length: {len(system_prompt)})")
339
 
340
  # Dataset type already determined above for cache check - reuse it
341
-
342
- # #region agent log
343
- try:
344
- import json as _json_debug
345
- import time as _time_debug
346
- import os as _os_debug
347
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
348
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
349
- with open(_debug_log_path, "a") as _f:
350
- _f.write(_json_debug.dumps({"hypothesisId": "H", "location": "universal_adapter.py:dataset_type_detect", "message": "Dataset type detection", "data": {"batch_size": len(batch), "valset_size": getattr(self, '_valset_size', None), "batch_size_threshold": batch_size_threshold, "detected_type": dataset_type, "evaluation_count": self._evaluation_count}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
351
- except Exception:
352
- pass
353
- # #endregion
354
 
355
  # Check if this is a new candidate (different from last one)
356
  if self._last_candidate != system_prompt:
@@ -383,14 +348,7 @@ Output the improved prompt directly and only the prompt."""
383
  )
384
 
385
  avg_score = sum(scores) / len(scores) if scores else 0.0
386
-
387
- # #region agent log
388
- import json as _json_debug
389
- import time as _time_debug
390
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
391
- with open(_debug_log_path, "a") as _f:
392
- _f.write(_json_debug.dumps({"hypothesisId": "B,C", "location": "universal_adapter.py:baseline_check", "message": "Baseline check conditions", "data": {"baseline_score_is_none": self._baseline_score is None, "current_dataset_type": self._current_dataset_type, "current_evaluation_type": self._current_evaluation_type, "is_baseline_evaluation": self._is_baseline_evaluation, "batch_size": len(batch), "avg_score": avg_score}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
393
- # #endregion
394
 
395
  # 🔥 CRITICAL FIX: Baseline MUST be set from seed's first Dpareto evaluation ONLY
396
  # This ensures FAIR comparison: seed and candidates evaluated on SAME dataset (Dpareto) with SAME number of datapoints
@@ -416,10 +374,7 @@ Output the improved prompt directly and only the prompt."""
416
  pareto_log.set_baseline(avg_score)
417
  self.logger.info(f"📊 Baseline score (Dpareto, {len(batch)} samples): {avg_score:.4f}")
418
  self.logger.info(f" ✅ Baseline set from {'baseline evaluation' if is_baseline_eval else 'first Dpareto'} (type: {self._current_evaluation_type})")
419
- # #region agent log
420
- with open(_debug_log_path, "a") as _f:
421
- _f.write(_json_debug.dumps({"hypothesisId": "B", "location": "universal_adapter.py:baseline_set", "message": "Baseline score SET", "data": {"baseline_score": avg_score, "candidate_type": self._current_evaluation_type, "dataset_type": self._current_dataset_type, "is_baseline_eval": is_baseline_eval}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
422
- # #endregion
423
  # Note: Test set evaluations are ignored for baseline - baseline comes from Dpareto
424
  else:
425
  # 🔥 SAFETY CHECK: Ensure Pareto logger also has baseline if adapter has it
@@ -595,19 +550,7 @@ Output the improved prompt directly and only the prompt."""
595
 
596
  composite_score = evaluation_results.get("composite_score", 0.0)
597
  scores.append(composite_score)
598
-
599
- # #region agent log
600
- try:
601
- import json as _json_debug
602
- import time as _time_debug
603
- import os as _os_debug
604
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
605
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
606
- with open(_debug_log_path, "a") as _f:
607
- _f.write(_json_debug.dumps({"hypothesisId": "G", "location": "universal_adapter.py:evaluation_result", "message": "Individual evaluation result", "data": {"sample_idx": i, "composite_score": composite_score, "semantic_sim": evaluation_results.get("semantic_similarity", -1), "structural_sim": evaluation_results.get("structural_similarity", -1), "format_mismatch": evaluation_results.get("analysis", {}).get("format_mismatch", False), "predicted_len": len(predicted_output) if predicted_output else 0, "expected_len": len(standardized_item.get('output', ''))}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
608
- except Exception:
609
- pass
610
- # #endregion
611
 
612
  # Update performance tracking
613
  self._evaluation_count += 1
 
200
  self.logger.info(f"📐 FORMAT DETECTED: {self._detected_format['format_type']}")
201
  self.logger.info(f" Spec: {self._detected_format['format_spec'][:100]}...")
202
  self.logger.info(f" Avg length: {self._detected_format['avg_length']} chars")
203
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
204
  else:
205
  self.logger.warning("⚠️ No expected outputs found for format detection")
206
  self._detected_format = None
207
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
208
 
209
  except Exception as e:
210
  self.logger.warning(f"⚠️ Format detection failed: {e}")
 
307
  self.logger.debug("🌱 Detected seed prompt (S₀)")
308
  else:
309
  candidate_type = 'unknown' # Truly unknown
310
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
311
 
312
  # Store source for future lookups (always update if we found a valid type)
313
  if candidate_type and candidate_type != 'unknown' and system_prompt not in self._candidate_sources:
 
315
  self.logger.debug(f" 📝 Stored candidate type: {candidate_type} for prompt (length: {len(system_prompt)})")
316
 
317
  # Dataset type already determined above for cache check - reuse it
318
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
 
 
 
 
319
 
320
  # Check if this is a new candidate (different from last one)
321
  if self._last_candidate != system_prompt:
 
348
  )
349
 
350
  avg_score = sum(scores) / len(scores) if scores else 0.0
351
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
352
 
353
  # 🔥 CRITICAL FIX: Baseline MUST be set from seed's first Dpareto evaluation ONLY
354
  # This ensures FAIR comparison: seed and candidates evaluated on SAME dataset (Dpareto) with SAME number of datapoints
 
374
  pareto_log.set_baseline(avg_score)
375
  self.logger.info(f"📊 Baseline score (Dpareto, {len(batch)} samples): {avg_score:.4f}")
376
  self.logger.info(f" ✅ Baseline set from {'baseline evaluation' if is_baseline_eval else 'first Dpareto'} (type: {self._current_evaluation_type})")
377
+ # Debug logging removed - not needed in production
 
 
 
378
  # Note: Test set evaluations are ignored for baseline - baseline comes from Dpareto
379
  else:
380
  # 🔥 SAFETY CHECK: Ensure Pareto logger also has baseline if adapter has it
 
550
 
551
  composite_score = evaluation_results.get("composite_score", 0.0)
552
  scores.append(composite_score)
553
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
 
 
 
 
554
 
555
  # Update performance tracking
556
  self._evaluation_count += 1
src/gepa_optimizer/evaluation/universal_evaluator.py CHANGED
@@ -200,19 +200,7 @@ class UniversalSemanticEvaluator(BaseEvaluator):
200
  logger.debug(f"📊 Evaluation: composite={result['composite_score']:.3f}, "
201
  f"semantic={result['semantic_similarity']:.3f}, "
202
  f"structural={result['structural_similarity']:.3f}")
203
-
204
- # #region agent log
205
- try:
206
- import json as _json_debug
207
- import time as _time_debug
208
- import os as _os_debug
209
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
210
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
211
- with open(_debug_log_path, "a") as _f:
212
- _f.write(_json_debug.dumps({"hypothesisId": "G", "location": "universal_evaluator.py:final_score", "message": "Final evaluation score breakdown", "data": {"composite": result["composite_score"], "semantic": result["semantic_similarity"], "structural": result["structural_similarity"], "exact_match": result["exact_match"], "format_mismatch": result.get("analysis", {}).get("format_mismatch", False), "predicted_preview": predicted_str[:150] if predicted_str else "EMPTY", "expected_preview": expected_str[:150] if expected_str else "EMPTY"}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
213
- except Exception as _e:
214
- pass # Silent fail for instrumentation
215
- # #endregion
216
 
217
  return result
218
 
 
200
  logger.debug(f"📊 Evaluation: composite={result['composite_score']:.3f}, "
201
  f"semantic={result['semantic_similarity']:.3f}, "
202
  f"structural={result['structural_similarity']:.3f}")
203
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  return result
206
 
src/gepa_optimizer/llms/llego_enhanced_llm.py CHANGED
@@ -253,15 +253,7 @@ class LLEGOEnhancedLLMClient(BaseLLMClient):
253
  json_str = json.dumps(python_dict)
254
  data = json.loads(json_str)
255
  if 'variations' in data:
256
- # #region agent log
257
- import json as _json_debug
258
- import time as _time_debug
259
- import os as _os_debug
260
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
261
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
262
- with open(_debug_log_path, "a") as _f:
263
- _f.write(_json_debug.dumps({"hypothesisId": "JSON_FIX", "location": "llego_enhanced_llm.py:python_dict_parse", "message": "Successfully parsed Python dict syntax", "data": {"num_expected": num_expected, "parsed_variations": len(data.get('variations', []))}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
264
- # #endregion
265
  return self._extract_variations_from_json(data, num_expected)
266
  except (ValueError, SyntaxError, TypeError) as e:
267
  # If ast.literal_eval fails, try string replacement as fallback
@@ -271,15 +263,7 @@ class LLEGOEnhancedLLMClient(BaseLLMClient):
271
  converted = cleaned.replace("'", '"')
272
  data = json.loads(converted)
273
  if 'variations' in data:
274
- # #region agent log
275
- import json as _json_debug
276
- import time as _time_debug
277
- import os as _os_debug
278
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
279
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
280
- with open(_debug_log_path, "a") as _f:
281
- _f.write(_json_debug.dumps({"hypothesisId": "JSON_FIX", "location": "llego_enhanced_llm.py:python_dict_string_replace", "message": "Parsed Python dict via string replacement", "data": {"num_expected": num_expected, "parsed_variations": len(data.get('variations', []))}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
282
- # #endregion
283
  return self._extract_variations_from_json(data, num_expected)
284
  except json.JSONDecodeError:
285
  pass
@@ -360,14 +344,7 @@ class LLEGOEnhancedLLMClient(BaseLLMClient):
360
  return self._parse_numbered_section_variations(response_text, num_expected)
361
  except ValueError:
362
  pass
363
-
364
- # #region agent log
365
- import json as _json_debug
366
- import time as _time_debug
367
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
368
- with open(_debug_log_path, "a") as _f:
369
- _f.write(_json_debug.dumps({"hypothesisId": "D", "location": "llego_enhanced_llm.py:json_parse_fail", "message": "JSON parsing failed completely", "data": {"num_expected": num_expected, "response_preview": response_text[:500] if response_text else "EMPTY", "response_length": len(response_text) if response_text else 0}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
370
- # #endregion
371
 
372
  raise ValueError(f"Could not parse {num_expected} variations from response")
373
 
@@ -610,19 +587,7 @@ class LLEGOEnhancedLLMClient(BaseLLMClient):
610
  logger.debug(f" user_prompt length: {len(user_prompt)} chars")
611
  logger.debug(f" in_reflection: {self.reflection_context['in_reflection']}")
612
  logger.debug(f" has_image: {bool(image_base64)}")
613
-
614
- # #region agent log
615
- try:
616
- import json as _json_debug
617
- import time as _time_debug
618
- import os as _os_debug
619
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
620
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
621
- with open(_debug_log_path, "a") as _f:
622
- _f.write(_json_debug.dumps({"hypothesisId": "INTERCEPTION", "location": "llego_enhanced_llm.py:generate", "message": "Generate called", "data": {"system_prompt_len": len(system_prompt), "user_prompt_len": len(user_prompt), "has_image": bool(image_base64), "has_candidates": len(getattr(self, '_adapter_generated_candidates', [])), "in_reflection": self.reflection_context.get('in_reflection', False)}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
623
- except Exception:
624
- pass
625
- # #endregion
626
 
627
  # 🔥 CRITICAL: Check if we have pre-generated candidates from adapter-level generation
628
  # This happens when GEPA calls adapter.llm_client to generate candidates
@@ -999,26 +964,10 @@ class LLEGOEnhancedLLMClient(BaseLLMClient):
999
  if self._detected_format and self._detected_format.get('format_constraint'):
1000
  format_constraint = self._detected_format['format_constraint']
1001
  logger.info(f"📐 Injecting format constraint into candidate generation")
1002
- # #region agent log
1003
- import json as _json_debug
1004
- import time as _time_debug
1005
- import os as _os_debug
1006
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
1007
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
1008
- with open(_debug_log_path, "a") as _f:
1009
- _f.write(_json_debug.dumps({"hypothesisId": "FORMAT_CONSTRAINT", "location": "llego_enhanced_llm.py:format_injection", "message": "Format constraint injected", "data": {"format_type": self._detected_format.get('format_type', 'unknown'), "constraint_length": len(format_constraint), "avg_length": self._detected_format.get('avg_length', 0)}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
1010
- # #endregion
1011
  else:
1012
  format_constraint = "No specific format detected - ensure output is CONCISE and matches expected examples."
1013
- # #region agent log
1014
- import json as _json_debug
1015
- import time as _time_debug
1016
- import os as _os_debug
1017
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
1018
- _os_debug.makedirs(_os_debug.path.dirname(_debug_log_path), exist_ok=True)
1019
- with open(_debug_log_path, "a") as _f:
1020
- _f.write(_json_debug.dumps({"hypothesisId": "FORMAT_CONSTRAINT", "location": "llego_enhanced_llm.py:format_injection", "message": "No format constraint available", "data": {"has_detected_format": bool(self._detected_format)}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
1021
- # #endregion
1022
 
1023
  # 🔥 EVOLUTIONARY PROMPT ENGINEER: Forces radically different mutations
1024
  # Each variation MUST use a distinct genetic strategy to maximize search space
 
253
  json_str = json.dumps(python_dict)
254
  data = json.loads(json_str)
255
  if 'variations' in data:
256
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
257
  return self._extract_variations_from_json(data, num_expected)
258
  except (ValueError, SyntaxError, TypeError) as e:
259
  # If ast.literal_eval fails, try string replacement as fallback
 
263
  converted = cleaned.replace("'", '"')
264
  data = json.loads(converted)
265
  if 'variations' in data:
266
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
267
  return self._extract_variations_from_json(data, num_expected)
268
  except json.JSONDecodeError:
269
  pass
 
344
  return self._parse_numbered_section_variations(response_text, num_expected)
345
  except ValueError:
346
  pass
347
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
348
 
349
  raise ValueError(f"Could not parse {num_expected} variations from response")
350
 
 
587
  logger.debug(f" user_prompt length: {len(user_prompt)} chars")
588
  logger.debug(f" in_reflection: {self.reflection_context['in_reflection']}")
589
  logger.debug(f" has_image: {bool(image_base64)}")
590
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
  # 🔥 CRITICAL: Check if we have pre-generated candidates from adapter-level generation
593
  # This happens when GEPA calls adapter.llm_client to generate candidates
 
964
  if self._detected_format and self._detected_format.get('format_constraint'):
965
  format_constraint = self._detected_format['format_constraint']
966
  logger.info(f"📐 Injecting format constraint into candidate generation")
967
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
968
  else:
969
  format_constraint = "No specific format detected - ensure output is CONCISE and matches expected examples."
970
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
971
 
972
  # 🔥 EVOLUTIONARY PROMPT ENGINEER: Forces radically different mutations
973
  # Each variation MUST use a distinct genetic strategy to maximize search space
src/gepa_optimizer/llms/vision_llm.py CHANGED
@@ -395,31 +395,7 @@ class VisionLLMClient(BaseLLMClient):
395
  ]
396
 
397
  if image_base64:
398
- # #region agent log
399
- import json as _json_debug
400
- import time as _time_debug
401
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
402
- try:
403
- with open(_debug_log_path, "a") as _f:
404
- _f.write(_json_debug.dumps({
405
- "id": f"log_{int(_time_debug.time() * 1000)}",
406
- "timestamp": int(_time_debug.time() * 1000),
407
- "location": "vision_llm.py:_generate_openai",
408
- "message": "Image base64 BEFORE processing",
409
- "data": {
410
- "image_base64_length": len(image_base64) if image_base64 else 0,
411
- "has_data_uri_prefix": image_base64.startswith("data:image") if image_base64 else False,
412
- "prefix": image_base64[:50] if image_base64 and len(image_base64) > 50 else image_base64,
413
- "is_none": image_base64 is None,
414
- "is_empty": image_base64 == "" if image_base64 else True
415
- },
416
- "sessionId": "debug-session",
417
- "runId": "run1",
418
- "hypothesisId": "A,C,D"
419
- }) + "\n")
420
- except Exception:
421
- pass
422
- # #endregion
423
 
424
  # Detect and extract image format
425
  detected_format = "jpeg" # Default fallback
@@ -473,29 +449,7 @@ class VisionLLMClient(BaseLLMClient):
473
  final_format = format_map.get(detected_format, "jpeg")
474
 
475
  final_url = f"data:image/{final_format};base64,{clean_base64}"
476
-
477
- # #region agent log
478
- try:
479
- with open(_debug_log_path, "a") as _f:
480
- _f.write(_json_debug.dumps({
481
- "id": f"log_{int(_time_debug.time() * 1000)}",
482
- "timestamp": int(_time_debug.time() * 1000),
483
- "location": "vision_llm.py:_generate_openai",
484
- "message": "Image URL AFTER processing",
485
- "data": {
486
- "detected_format": detected_format,
487
- "final_format": final_format,
488
- "clean_base64_length": len(clean_base64),
489
- "final_url_length": len(final_url),
490
- "final_url_prefix": final_url[:60]
491
- },
492
- "sessionId": "debug-session",
493
- "runId": "run1",
494
- "hypothesisId": "A,B"
495
- }) + "\n")
496
- except Exception:
497
- pass
498
- # #endregion
499
 
500
  messages[1]["content"].append({
501
  "type": "image_url",
@@ -675,31 +629,7 @@ class VisionLLMClient(BaseLLMClient):
675
 
676
  # Add image if provided
677
  if image_base64:
678
- # #region agent log
679
- import json as _json_debug
680
- import time as _time_debug
681
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
682
- try:
683
- with open(_debug_log_path, "a") as _f:
684
- _f.write(_json_debug.dumps({
685
- "id": f"log_{int(_time_debug.time() * 1000)}",
686
- "timestamp": int(_time_debug.time() * 1000),
687
- "location": "vision_llm.py:_generate_google",
688
- "message": "Image base64 BEFORE processing (Google)",
689
- "data": {
690
- "image_base64_length": len(image_base64) if image_base64 else 0,
691
- "has_data_uri_prefix": image_base64.startswith("data:image") if image_base64 else False,
692
- "prefix": image_base64[:50] if image_base64 and len(image_base64) > 50 else image_base64,
693
- "is_none": image_base64 is None,
694
- "is_empty": image_base64 == "" if image_base64 else True
695
- },
696
- "sessionId": "debug-session",
697
- "runId": "run1",
698
- "hypothesisId": "A,C,D"
699
- }) + "\n")
700
- except Exception:
701
- pass
702
- # #endregion
703
 
704
  try:
705
  # Strip data URI prefix if present (hypothesis A fix)
 
395
  ]
396
 
397
  if image_base64:
398
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
  # Detect and extract image format
401
  detected_format = "jpeg" # Default fallback
 
449
  final_format = format_map.get(detected_format, "jpeg")
450
 
451
  final_url = f"data:image/{final_format};base64,{clean_base64}"
452
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
 
454
  messages[1]["content"].append({
455
  "type": "image_url",
 
629
 
630
  # Add image if provided
631
  if image_base64:
632
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633
 
634
  try:
635
  # Strip data URI prefix if present (hypothesis A fix)
src/gepa_optimizer/utils/pareto_logger.py CHANGED
@@ -106,13 +106,7 @@ class ParetoLogger:
106
  logger.error(f" Cannot evaluate {cand_notation} without baseline f(S₀)")
107
  logger.error(f" 💡 Seed prompt must be evaluated on Dpareto first")
108
  logger.error(f" 💡 Rejecting candidate to maintain correctness")
109
- # #region agent log
110
- import json as _json_debug
111
- import time as _time_debug
112
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
113
- with open(_debug_log_path, "a") as _f:
114
- _f.write(_json_debug.dumps({"hypothesisId": "B", "location": "pareto_logger.py:baseline_not_set", "message": "CRITICAL: Baseline not set when checking Pareto", "data": {"candidate_type": candidate_type, "candidate_notation": cand_notation, "score": score}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
115
- # #endregion
116
  return # Reject candidate - baseline is required
117
 
118
  # Check if this candidate dominates any in current front
@@ -450,12 +444,6 @@ def reset_pareto_logger() -> ParetoLogger:
450
  """Reset global Pareto logger instance (for new runs)"""
451
  global _pareto_logger
452
  _pareto_logger = ParetoLogger()
453
- # #region agent log
454
- import json as _json_debug
455
- import time as _time_debug
456
- _debug_log_path = "/Users/suhas/Desktop/Projects/Prompt-Optimizer/.cursor/debug.log"
457
- with open(_debug_log_path, "a") as _f:
458
- _f.write(_json_debug.dumps({"hypothesisId": "E", "location": "pareto_logger.py:reset", "message": "Pareto logger reset", "data": {"baseline_score": _pareto_logger.baseline_score, "pareto_front_size": len(_pareto_logger.pareto_front)}, "timestamp": int(_time_debug.time() * 1000), "sessionId": "debug-session"}) + "\n")
459
- # #endregion
460
  return _pareto_logger
461
 
 
106
  logger.error(f" Cannot evaluate {cand_notation} without baseline f(S₀)")
107
  logger.error(f" 💡 Seed prompt must be evaluated on Dpareto first")
108
  logger.error(f" 💡 Rejecting candidate to maintain correctness")
109
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
110
  return # Reject candidate - baseline is required
111
 
112
  # Check if this candidate dominates any in current front
 
444
  """Reset global Pareto logger instance (for new runs)"""
445
  global _pareto_logger
446
  _pareto_logger = ParetoLogger()
447
+ # Debug logging removed - not needed in production
 
 
 
 
 
 
448
  return _pareto_logger
449