Suhasdev commited on
Commit
3ba279d
·
1 Parent(s): a50ba2a

Fix improvement_data field mapping for real backend - use baseline_val_score, optimized_val_score, relative_improvement_percent

Browse files
Files changed (1) hide show
  1. app.py +50 -3
app.py CHANGED
@@ -351,6 +351,10 @@ def safe_optimize(seed_prompt, dataset, model, custom_model="", max_iterations=5
351
 
352
  # Run optimization
353
  try:
 
 
 
 
354
  result = quick_optimize_sync(
355
  seed_prompt=seed_prompt,
356
  dataset=dataset,
@@ -362,6 +366,22 @@ def safe_optimize(seed_prompt, dataset, model, custom_model="", max_iterations=5
362
  verbose=True,
363
  )
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  # Validate result structure
366
  if not result:
367
  return False, "Optimization returned no result.", None
@@ -719,16 +739,43 @@ def run_optimization_flow(seed, dataset, model, custom_model, iter_count, call_c
719
  improvement_data = result.improvement_data if hasattr(result, 'improvement_data') else {}
720
 
721
  # Convert improvement_data to display format
 
722
  if isinstance(improvement_data, dict):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
723
  improvement_metrics = {
724
- "baseline_score": improvement_data.get("baseline_score", improvement_data.get("baseline_metrics", {}).get("composite_score", 0.0)),
725
- "final_score": improvement_data.get("final_score", improvement_data.get("final_metrics", {}).get("composite_score", 0.0)),
726
- "improvement": improvement_data.get("improvement_percent", "N/A"),
727
  "iterations_run": result.total_iterations if hasattr(result, 'total_iterations') else improvement_data.get("iterations", 0),
728
  "optimization_time": f"{result.optimization_time:.2f}s" if hasattr(result, 'optimization_time') else "N/A",
729
  }
 
 
 
 
730
  else:
731
  improvement_metrics = {}
 
732
 
733
  # Create iteration history from reflection_history if available
734
  iteration_history = []
 
351
 
352
  # Run optimization
353
  try:
354
+ logger.info(f"🚀 Starting optimization with model: {final_model}")
355
+ logger.info(f" Parameters: iterations={max_iterations}, metric_calls={max_metric_calls}, batch={batch_size}, llego={use_llego}")
356
+ logger.info(f" Dataset size: {len(dataset)} examples")
357
+
358
  result = quick_optimize_sync(
359
  seed_prompt=seed_prompt,
360
  dataset=dataset,
 
366
  verbose=True,
367
  )
368
 
369
+ # Log result details for debugging
370
+ logger.info(f"📊 Optimization result received:")
371
+ logger.info(f" Type: {type(result)}")
372
+ logger.info(f" Has prompt: {hasattr(result, 'prompt')}")
373
+ logger.info(f" Has optimized_prompt: {hasattr(result, 'optimized_prompt')}")
374
+ if hasattr(result, 'improvement_data'):
375
+ logger.info(f" improvement_data: {result.improvement_data}")
376
+ if hasattr(result, 'total_iterations'):
377
+ logger.info(f" total_iterations: {result.total_iterations}")
378
+ if hasattr(result, 'optimization_time'):
379
+ logger.info(f" optimization_time: {result.optimization_time}")
380
+ if hasattr(result, 'status'):
381
+ logger.info(f" status: {result.status}")
382
+ if hasattr(result, 'error_message') and result.error_message:
383
+ logger.error(f" error_message: {result.error_message}")
384
+
385
  # Validate result structure
386
  if not result:
387
  return False, "Optimization returned no result.", None
 
739
  improvement_data = result.improvement_data if hasattr(result, 'improvement_data') else {}
740
 
741
  # Convert improvement_data to display format
742
+ # Real backend uses: baseline_val_score, optimized_val_score, relative_improvement_percent
743
  if isinstance(improvement_data, dict):
744
+ # Try real backend field names first, then fall back to alternatives
745
+ baseline_score = (
746
+ improvement_data.get("baseline_val_score") or
747
+ improvement_data.get("baseline_score") or
748
+ improvement_data.get("baseline_metrics", {}).get("composite_score", 0.0)
749
+ )
750
+ final_score = (
751
+ improvement_data.get("optimized_val_score") or
752
+ improvement_data.get("final_score") or
753
+ improvement_data.get("final_metrics", {}).get("composite_score", 0.0)
754
+ )
755
+ improvement_percent = (
756
+ improvement_data.get("relative_improvement_percent") or
757
+ improvement_data.get("improvement_percent") or
758
+ "N/A"
759
+ )
760
+
761
+ # Format improvement percent
762
+ if isinstance(improvement_percent, (int, float)):
763
+ improvement_percent = f"+{improvement_percent:.1f}%" if improvement_percent > 0 else f"{improvement_percent:.1f}%"
764
+
765
  improvement_metrics = {
766
+ "baseline_score": round(baseline_score, 4) if isinstance(baseline_score, (int, float)) else baseline_score,
767
+ "final_score": round(final_score, 4) if isinstance(final_score, (int, float)) else final_score,
768
+ "improvement": improvement_percent,
769
  "iterations_run": result.total_iterations if hasattr(result, 'total_iterations') else improvement_data.get("iterations", 0),
770
  "optimization_time": f"{result.optimization_time:.2f}s" if hasattr(result, 'optimization_time') else "N/A",
771
  }
772
+
773
+ # Log the improvement data for debugging
774
+ logger.info(f"📊 Improvement data received: {improvement_data}")
775
+ logger.info(f"📊 Formatted metrics: {improvement_metrics}")
776
  else:
777
  improvement_metrics = {}
778
+ logger.warning(f"⚠️ improvement_data is not a dict: {type(improvement_data)}")
779
 
780
  # Create iteration history from reflection_history if available
781
  iteration_history = []