amanoaki commited on
Commit
03afa52
·
verified ·
1 Parent(s): 23ee82e

Update dataset_construction/evaluate_on_eurus_2.py

Browse files
dataset_construction/evaluate_on_eurus_2.py CHANGED
@@ -12,12 +12,12 @@ def read_args():
12
  parser = argparse.ArgumentParser()
13
  parser.add_argument("--model", type=str, default="Qwen/Qwen2.5-7B", help="The model to use for evaluation")
14
  parser.add_argument("--batch_size", type=int, default=4, help="Batch size for evaluation")
15
- parser.add_argument("--k", type=int, default=5, help="Number of completions to generate for each prompt")
16
- parser.add_argument("--tensor_parallel_size", type=int, default=2, help="Number of parallel tensors to use for generation")
17
- parser.add_argument("--temperature", type=float, default=0.8, help="Temperature for sampling")
18
- parser.add_argument("--top_p", type=float, default=0.95, help="Top-p for sampling")
19
- parser.add_argument("--num_samples", type=int, default=100, help="Number of samples to evaluate; if -1, use the entire shard")
20
- parser.add_argument("--output_file", type=str, default="evaluation_results.json", help="Base file name to save evaluation results")
21
  # NOTE: token consuming
22
  parser.add_argument("--max_tokens", type=int, default=4096, help="Maximum number of tokens to generate.")
23
  parser.add_argument("--log_per_step", type=int, default=1000, help="Log results every N samples")
@@ -83,7 +83,9 @@ def main():
83
  num_samples = len(split_ds)
84
 
85
  result_file = f"evaluation_results_{split}_shard{args.shard_index}_of_{args.num_shards}.json"
 
86
  complete_result_file = f"complete_evaluation_results_{split}_shard{args.shard_index}_of_{args.num_shards}.json"
 
87
 
88
  if args.resume and os.path.exists(result_file):
89
  with open(result_file, "r", encoding="utf-8") as f:
 
12
  parser = argparse.ArgumentParser()
13
  parser.add_argument("--model", type=str, default="Qwen/Qwen2.5-7B", help="The model to use for evaluation")
14
  parser.add_argument("--batch_size", type=int, default=4, help="Batch size for evaluation")
15
+ parser.add_argument("--k", type=int, default=8, help="Number of completions to generate for each prompt")
16
+ parser.add_argument("--tensor_parallel_size", type=int, default=1, help="Number of parallel tensors to use for generation")
17
+ parser.add_argument("--temperature", type=float, default=1.0, help="Temperature for sampling")
18
+ parser.add_argument("--top_p", type=float, default=1.0, help="Top-p for sampling")
19
+ parser.add_argument("--num_samples", type=int, default=-1, help="Number of samples to evaluate; if -1, use the entire shard")
20
+ parser.add_argument("--output_folder", type=str, default=".", help="Base folder to save evaluation results")
21
  # NOTE: token consuming
22
  parser.add_argument("--max_tokens", type=int, default=4096, help="Maximum number of tokens to generate.")
23
  parser.add_argument("--log_per_step", type=int, default=1000, help="Log results every N samples")
 
83
  num_samples = len(split_ds)
84
 
85
  result_file = f"evaluation_results_{split}_shard{args.shard_index}_of_{args.num_shards}.json"
86
+ result_file = os.path.join(args.output_folder, result_file)
87
  complete_result_file = f"complete_evaluation_results_{split}_shard{args.shard_index}_of_{args.num_shards}.json"
88
+ complete_result_file = os.path.join(args.output_folder, complete_result_file)
89
 
90
  if args.resume and os.path.exists(result_file):
91
  with open(result_file, "r", encoding="utf-8") as f: