MedGRPO Team commited on
Commit
2bd924c
·
1 Parent(s): ebf8102

Filter DVC/VS/RC from tasks list when skip-llm-judge is set

Browse files
Files changed (1) hide show
  1. evaluation/evaluate_all_pai.py +8 -0
evaluation/evaluate_all_pai.py CHANGED
@@ -773,6 +773,14 @@ def run_evaluation(output_file, tasks=None, grouping="per-dataset", silent_eval=
773
  print(f"\nRunning evaluation for tasks: {tasks}", flush=True)
774
  print(f"Total tasks to evaluate: {len(tasks)}", flush=True)
775
 
 
 
 
 
 
 
 
 
776
  # Dictionary to store all evaluation results
777
  all_task_results = {}
778
 
 
773
  print(f"\nRunning evaluation for tasks: {tasks}", flush=True)
774
  print(f"Total tasks to evaluate: {len(tasks)}", flush=True)
775
 
776
+ # Filter out LLM judge tasks if skip flag is set
777
+ if skip_llm_judge:
778
+ original_tasks = tasks.copy()
779
+ tasks = [t for t in tasks if t not in ['dvc', 'vs', 'rc']]
780
+ if len(tasks) < len(original_tasks):
781
+ print(f"Skipping LLM judge tasks: {[t for t in original_tasks if t not in tasks]}", flush=True)
782
+ print(f"Evaluating {len(tasks)} tasks: {tasks}", flush=True)
783
+
784
  # Dictionary to store all evaluation results
785
  all_task_results = {}
786