bensondccnqwc commited on
Commit
8f418cb
·
verified ·
1 Parent(s): e69a28a

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +34 -0
  2. eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +3 -0
  3. eval_results/plots/eval_results_acc_keywords.png +3 -0
  4. eval_results/plots/eval_results_acc_pass_acc.png +3 -0
  5. eval_results/plots/eval_results_acc_tokens.png +3 -0
  6. eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
  7. eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
  8. eval_results/plots/eval_results_clip_ratio.png +3 -0
  9. eval_results/plots/eval_results_correct_tokens.png +3 -0
  10. eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
  11. eval_results/plots/eval_results_tokens_keywords.png +3 -0
  12. eval_results/plots/eval_results_wrong_tokens.png +3 -0
  13. eval_results_avg4/plots/eval_results_avg4_acc_keywords.png +3 -0
  14. eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png +3 -0
  15. eval_results_avg4/plots/eval_results_avg4_acc_tokens.png +3 -0
  16. eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png +3 -0
  17. eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png +3 -0
  18. eval_results_avg4/plots/eval_results_avg4_clip_ratio.png +3 -0
  19. eval_results_avg4/plots/eval_results_avg4_correct_tokens.png +3 -0
  20. eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png +3 -0
  21. eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
  22. eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png +3 -0
  23. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.csv +3 -0
  24. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.parquet +3 -0
  25. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_lighteval|gpqa:diamond|0_2025-08-18T16-21-32.723446.parquet +3 -0
  26. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.csv +3 -0
  27. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.parquet +3 -0
  28. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_lighteval|gpqa:diamond|0_2025-08-18T16-10-44.289594.parquet +3 -0
  29. eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_100_actor_huggingface/2025-08-18T16-41-52.767998/details_extended|ifeval|0_2025-08-18T16-41-52.767998.parquet +3 -0
  30. eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_100_actor_huggingface/2025-08-18T16-41-52.767998/details_lighteval|gpqa:diamond|0_2025-08-18T16-41-52.767998.parquet +3 -0
  31. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.csv +3 -0
  32. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.parquet +3 -0
  33. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_lighteval|gpqa:diamond|0_2025-08-18T15-50-59.925267.parquet +3 -0
  34. eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.csv +3 -0
  35. eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.parquet +3 -0
  36. eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_lighteval|gpqa:diamond|0_2025-08-18T15-54-45.139937.parquet +3 -0
  37. eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.csv +3 -0
  38. eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.parquet +3 -0
  39. eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_lighteval|gpqa:diamond|0_2025-08-18T15-47-59.222617.parquet +3 -0
  40. eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_50_actor_huggingface/2025-08-18T15-45-31.395283/details_extended|ifeval|0_2025-08-18T15-45-31.395283.parquet +3 -0
  41. eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_50_actor_huggingface/2025-08-18T15-45-31.395283/details_lighteval|gpqa:diamond|0_2025-08-18T15-45-31.395283.parquet +3 -0
  42. eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_60_actor_huggingface/2025-08-18T15-39-35.865731/details_extended|ifeval|0_2025-08-18T15-39-35.865731.parquet +3 -0
  43. eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_60_actor_huggingface/2025-08-18T15-39-35.865731/details_lighteval|gpqa:diamond|0_2025-08-18T15-39-35.865731.parquet +3 -0
  44. eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_70_actor_huggingface/2025-08-18T15-45-24.707769/details_extended|ifeval|0_2025-08-18T15-45-24.707769.parquet +3 -0
  45. eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_70_actor_huggingface/2025-08-18T15-45-24.707769/details_lighteval|gpqa:diamond|0_2025-08-18T15-45-24.707769.parquet +3 -0
  46. eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_80_actor_huggingface/2025-08-18T16-44-03.891235/details_extended|ifeval|0_2025-08-18T16-44-03.891235.parquet +3 -0
  47. eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_80_actor_huggingface/2025-08-18T16-44-03.891235/details_lighteval|gpqa:diamond|0_2025-08-18T16-44-03.891235.parquet +3 -0
  48. eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_90_actor_huggingface/2025-08-18T16-39-21.271295/details_extended|ifeval|0_2025-08-18T16-39-21.271295.parquet +3 -0
  49. eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_90_actor_huggingface/2025-08-18T16-39-21.271295/details_lighteval|gpqa:diamond|0_2025-08-18T16-39-21.271295.parquet +3 -0
  50. global_step_10/actor/huggingface/tokenizer.json +3 -0
.gitattributes CHANGED
@@ -33,3 +33,37 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
38
+ eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
39
+ eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
40
+ eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
41
+ eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
42
+ eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
43
+ eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
44
+ eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
45
+ eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
46
+ eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
47
+ global_step_40/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
+ global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
49
+ global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
+ global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
51
+ global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
52
+ global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
53
+ eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
54
+ eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
55
+ eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
56
+ eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
57
+ eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
58
+ eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
59
+ eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
60
+ eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
61
+ eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
62
+ eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
63
+ eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.csv filter=lfs diff=lfs merge=lfs -text
64
+ eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.csv filter=lfs diff=lfs merge=lfs -text
65
+ eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.csv filter=lfs diff=lfs merge=lfs -text
66
+ eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.csv filter=lfs diff=lfs merge=lfs -text
67
+ eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.csv filter=lfs diff=lfs merge=lfs -text
68
+ global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
69
+ global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11982e5fd8fef307ea51ecf60643f5b5b15567f6fddc200ad110306b45e88753
3
+ size 12914359
eval_results/plots/eval_results_acc_keywords.png ADDED

Git LFS Details

  • SHA256: efefd243abedb274f832fe358f8910a4e2467969a93680f1a4fdefe5396794d5
  • Pointer size: 131 Bytes
  • Size of remote file: 420 kB
eval_results/plots/eval_results_acc_pass_acc.png ADDED

Git LFS Details

  • SHA256: 753d659427a1ad5aaf3bf5c328ddbdd8bd0b5cc5e283ce3161e2f1cf009e0a19
  • Pointer size: 131 Bytes
  • Size of remote file: 312 kB
eval_results/plots/eval_results_acc_tokens.png ADDED

Git LFS Details

  • SHA256: d8dfe0383ae0869fc4bcf32fd633c0e4ee8455140edb1eb52ec4488b2f3bfbf6
  • Pointer size: 131 Bytes
  • Size of remote file: 439 kB
eval_results/plots/eval_results_avg_stop_tokens.png ADDED

Git LFS Details

  • SHA256: 30a68959884682fd3a17ba26f80a180c1276938643877aa099fce75c3c03f06e
  • Pointer size: 131 Bytes
  • Size of remote file: 495 kB
eval_results/plots/eval_results_box_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: d761d99d819a24b03156a33d278f9a052b8b93a273db779ff17db3a0e18d7e11
  • Pointer size: 131 Bytes
  • Size of remote file: 442 kB
eval_results/plots/eval_results_clip_ratio.png ADDED

Git LFS Details

  • SHA256: 365be7136faae863d1f411008cdea373e9c7bc852f5ee0b88a9971dc3881ed26
  • Pointer size: 131 Bytes
  • Size of remote file: 417 kB
eval_results/plots/eval_results_correct_tokens.png ADDED

Git LFS Details

  • SHA256: 2edd0e903f1e87ca3b2814f7d5281140d39789f69603291445c017f7a1fb514b
  • Pointer size: 131 Bytes
  • Size of remote file: 424 kB
eval_results/plots/eval_results_repeat_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 1e12ee021fbe484189723033eb5242ee03990cbad9a678ee7b5c04671119fdcb
  • Pointer size: 131 Bytes
  • Size of remote file: 485 kB
eval_results/plots/eval_results_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: 7210e8f40c6da97fd5188e42f63f9b36da0ba9f1be734bcff4114f9b4fedc427
  • Pointer size: 131 Bytes
  • Size of remote file: 435 kB
eval_results/plots/eval_results_wrong_tokens.png ADDED

Git LFS Details

  • SHA256: eb064fb1f17f0d1cabba939f7012eed7fba55809360fc7ee6c4195a823b21c7e
  • Pointer size: 131 Bytes
  • Size of remote file: 442 kB
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png ADDED

Git LFS Details

  • SHA256: 2e93cefb847f7069de65f1ae412e629f3befe23257aaea72f65220da5b9fc414
  • Pointer size: 131 Bytes
  • Size of remote file: 209 kB
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png ADDED

Git LFS Details

  • SHA256: b4a1cf36dcc7228ada6042ca3ec2678722a5688f4f873ceecf2f9ba9648e55de
  • Pointer size: 131 Bytes
  • Size of remote file: 198 kB
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png ADDED

Git LFS Details

  • SHA256: 18111453ff0c0ebf0e3b74b56610e33af7240e8dfd19f17e31ee0eecf5992f0d
  • Pointer size: 131 Bytes
  • Size of remote file: 230 kB
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png ADDED

Git LFS Details

  • SHA256: bc0fe7fb9325dfbb59b77e1bac9e603fd8ad55359383f0568042ae1713e50457
  • Pointer size: 131 Bytes
  • Size of remote file: 249 kB
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 1880ddcdb3302592bcb7b6fe1f6b60ec72088d109c162a83a78dbf2309144355
  • Pointer size: 131 Bytes
  • Size of remote file: 245 kB
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png ADDED

Git LFS Details

  • SHA256: d25c742be80f3b102211a617098d4291db8c9825de14323fc3ffe201eb353f8d
  • Pointer size: 131 Bytes
  • Size of remote file: 213 kB
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png ADDED

Git LFS Details

  • SHA256: 001c35d09fe13cc38f907eccfebdc031695f558a84659b21b582858c80a14e6d
  • Pointer size: 131 Bytes
  • Size of remote file: 211 kB
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 7a717998b7df8f7d128271b000d71414ba29a15cb31f6f67a56701dddaf06a47
  • Pointer size: 131 Bytes
  • Size of remote file: 256 kB
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: e6542704622e813b91da3a3dd9c9f9f973455819ad8dbb23026b94eb8595c94b
  • Pointer size: 131 Bytes
  • Size of remote file: 228 kB
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png ADDED

Git LFS Details

  • SHA256: 18410a3b90e269778a63250a9839c278ce7f1455db68bc6006a613ccc17adaa8
  • Pointer size: 131 Bytes
  • Size of remote file: 232 kB
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a68fabbf29c74825a3b25f47e1dd5d8b6ff9b365aadd3f7a6e958915eda8fc7
3
+ size 18334919
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34bacdac47e83c635d89bb64fe8fef71de883ea95f51e3a31cebe32903a5121a
3
+ size 3010355
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_lighteval|gpqa:diamond|0_2025-08-18T16-21-32.723446.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74cd417d66e222fd211285ab6d2f8bd72b026701512000e9724e100584212d26
3
+ size 570631
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10ec661a65f876511a3a009602fb1a39599d85c6834d28419f68da773bf583c2
3
+ size 16965722
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a3be47e2a30f7a41c34fd52b1c68ebba33b7824687f4e8b41dfde0d367030ac
3
+ size 2691645
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_lighteval|gpqa:diamond|0_2025-08-18T16-10-44.289594.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6ec882fea65fb93295b7d6e8dcbd57bc683f42fc287ff9fc93540f1ea89227
3
+ size 519547
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_100_actor_huggingface/2025-08-18T16-41-52.767998/details_extended|ifeval|0_2025-08-18T16-41-52.767998.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e71386d57dead1b26499fd6672be698ca34d709def4d27d2f6db24db409c569
3
+ size 1498483
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_100_actor_huggingface/2025-08-18T16-41-52.767998/details_lighteval|gpqa:diamond|0_2025-08-18T16-41-52.767998.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6303acde7adf9472a18ae4fe112758c0ada25807dcd43eedf12cc4a456acaa9
3
+ size 658714
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e7203de9136f0b1a47efa1c2f3ce858bc123da32bc3b0250e43b56bb9fffcd
3
+ size 11154595
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a80038e98338b97c4ebf7558d5a4a83fb67092a8080bf6e3c193e0c4a89da30b
3
+ size 2083800
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_lighteval|gpqa:diamond|0_2025-08-18T15-50-59.925267.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b37171406736aa94d9e04fbb2def7613bdaa00d122e874b8b9292d291c1ec629
3
+ size 574644
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c29af90c8aaa76611bc094ac0cc8984147e52486ba3bf67a0cfe62834eccdaf
3
+ size 11387185
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd33ede61db0fcdb173b8d4372d3dfb91e9278ca48135f90643daa04b31ed47d
3
+ size 2256722
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_lighteval|gpqa:diamond|0_2025-08-18T15-54-45.139937.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47d60bc54edbacbe4c84c236580fdd65d3d1c215b458eb5f605785cedcf90a77
3
+ size 654486
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24bcf48d7c49154b06cf56ced4567a07f47c9c4e4ecc28e08010c6396b6b45aa
3
+ size 11323716
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dae9f46f0f349f1c173e059b38ca13c1c52fc15d92162a13567d2134f3926ec
3
+ size 1945986
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_lighteval|gpqa:diamond|0_2025-08-18T15-47-59.222617.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0dd9c292fb8b29c3085e23e564321091d1196e06a4740f8d83ec8b3db215381
3
+ size 618770
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_50_actor_huggingface/2025-08-18T15-45-31.395283/details_extended|ifeval|0_2025-08-18T15-45-31.395283.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:279affbcfe2ceb7e0c9f60d4e6ee3a4ba1f9a84687eef9ed57f1d304c461b60e
3
+ size 1925213
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_50_actor_huggingface/2025-08-18T15-45-31.395283/details_lighteval|gpqa:diamond|0_2025-08-18T15-45-31.395283.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927632d9deb938b57f4e558647f7f2b3de5bba51786fb40229663080dc094e56
3
+ size 608015
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_60_actor_huggingface/2025-08-18T15-39-35.865731/details_extended|ifeval|0_2025-08-18T15-39-35.865731.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286ce3e196a6ac3176c7001f9a722f2b80849fbf79a9984fccd86274d979b0f9
3
+ size 1623068
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_60_actor_huggingface/2025-08-18T15-39-35.865731/details_lighteval|gpqa:diamond|0_2025-08-18T15-39-35.865731.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:337877be84c349305b4dde8a473bf6ea664a245f007bd57ea79cd3f2dd84d413
3
+ size 677607
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_70_actor_huggingface/2025-08-18T15-45-24.707769/details_extended|ifeval|0_2025-08-18T15-45-24.707769.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac30c9873cdcc8d5d95ff31cfdcf37c76bf78e1b7bb85c086f86daccc739b7f
3
+ size 1929554
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_70_actor_huggingface/2025-08-18T15-45-24.707769/details_lighteval|gpqa:diamond|0_2025-08-18T15-45-24.707769.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28fa63b65deb15ee21e9ab6bd2235f4853494753d2c4194d11c419689287d81e
3
+ size 684473
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_80_actor_huggingface/2025-08-18T16-44-03.891235/details_extended|ifeval|0_2025-08-18T16-44-03.891235.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8812184b17fe91f50bcb740642982fd8e9dc31281bead5bb951a5be7761e0a3c
3
+ size 1675223
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_80_actor_huggingface/2025-08-18T16-44-03.891235/details_lighteval|gpqa:diamond|0_2025-08-18T16-44-03.891235.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67ec72c743a462249d9de35a86e01bfaba2c1f3b71fbfca71997583a80c42b7d
3
+ size 674531
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_90_actor_huggingface/2025-08-18T16-39-21.271295/details_extended|ifeval|0_2025-08-18T16-39-21.271295.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b2c90673c7b839cccfce4d605bad54a7d4e01ebc5fb9e03b1eb8e7c777846d
3
+ size 1644351
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_90_actor_huggingface/2025-08-18T16-39-21.271295/details_lighteval|gpqa:diamond|0_2025-08-18T16-39-21.271295.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7e2440801cfffee42d282ea2fbfee5099158e7a34361a34a81df542acfc497d
3
+ size 633477
global_step_10/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654