bensondccnqwc commited on
Commit
3d54645
·
verified ·
1 Parent(s): 627137f

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +31 -0
  2. eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +3 -0
  3. eval_results/plots/eval_results_acc_keywords.png +3 -0
  4. eval_results/plots/eval_results_acc_pass_acc.png +3 -0
  5. eval_results/plots/eval_results_acc_tokens.png +3 -0
  6. eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
  7. eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
  8. eval_results/plots/eval_results_clip_ratio.png +3 -0
  9. eval_results/plots/eval_results_correct_tokens.png +3 -0
  10. eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
  11. eval_results/plots/eval_results_tokens_keywords.png +3 -0
  12. eval_results/plots/eval_results_wrong_tokens.png +3 -0
  13. eval_results_avg4/plots/eval_results_avg4_acc_keywords.png +3 -0
  14. eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png +3 -0
  15. eval_results_avg4/plots/eval_results_avg4_acc_tokens.png +3 -0
  16. eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png +3 -0
  17. eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png +3 -0
  18. eval_results_avg4/plots/eval_results_avg4_clip_ratio.png +3 -0
  19. eval_results_avg4/plots/eval_results_avg4_correct_tokens.png +3 -0
  20. eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png +3 -0
  21. eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
  22. eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png +3 -0
  23. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.csv +3 -0
  24. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.parquet +3 -0
  25. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_lighteval|gpqa:diamond|0_2025-08-22T07-57-51.872019.parquet +3 -0
  26. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.csv +3 -0
  27. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.parquet +3 -0
  28. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_lighteval|gpqa:diamond|0_2025-08-22T07-47-25.389710.parquet +3 -0
  29. eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_100_actor_huggingface/2025-08-22T08-13-29.136616/details_extended|ifeval|0_2025-08-22T08-13-29.136616.parquet +3 -0
  30. eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_100_actor_huggingface/2025-08-22T08-13-29.136616/details_lighteval|gpqa:diamond|0_2025-08-22T08-13-29.136616.parquet +3 -0
  31. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_20_actor_huggingface/2025-08-22T07-29-13.669606/details_extended|ifeval|0_2025-08-22T07-29-13.669606.parquet +3 -0
  32. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_20_actor_huggingface/2025-08-22T07-29-13.669606/details_lighteval|gpqa:diamond|0_2025-08-22T07-29-13.669606.parquet +3 -0
  33. eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_30_actor_huggingface/2025-08-22T07-15-48.254808/details_extended|ifeval|0_2025-08-22T07-15-48.254808.parquet +3 -0
  34. eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_30_actor_huggingface/2025-08-22T07-15-48.254808/details_lighteval|gpqa:diamond|0_2025-08-22T07-15-48.254808.parquet +3 -0
  35. eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_40_actor_huggingface/2025-08-22T07-12-09.643109/details_extended|ifeval|0_2025-08-22T07-12-09.643109.parquet +3 -0
  36. eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_40_actor_huggingface/2025-08-22T07-12-09.643109/details_lighteval|gpqa:diamond|0_2025-08-22T07-12-09.643109.parquet +3 -0
  37. eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_50_actor_huggingface/2025-08-22T07-09-28.251052/details_extended|ifeval|0_2025-08-22T07-09-28.251052.parquet +3 -0
  38. eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_50_actor_huggingface/2025-08-22T07-09-28.251052/details_lighteval|gpqa:diamond|0_2025-08-22T07-09-28.251052.parquet +3 -0
  39. eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_60_actor_huggingface/2025-08-22T07-09-44.265871/details_extended|ifeval|0_2025-08-22T07-09-44.265871.parquet +3 -0
  40. eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_60_actor_huggingface/2025-08-22T07-09-44.265871/details_lighteval|gpqa:diamond|0_2025-08-22T07-09-44.265871.parquet +3 -0
  41. eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_70_actor_huggingface/2025-08-22T08-09-53.250731/details_extended|ifeval|0_2025-08-22T08-09-53.250731.parquet +3 -0
  42. eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_70_actor_huggingface/2025-08-22T08-09-53.250731/details_lighteval|gpqa:diamond|0_2025-08-22T08-09-53.250731.parquet +3 -0
  43. eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_80_actor_huggingface/2025-08-22T08-08-44.641132/details_extended|ifeval|0_2025-08-22T08-08-44.641132.parquet +3 -0
  44. eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_80_actor_huggingface/2025-08-22T08-08-44.641132/details_lighteval|gpqa:diamond|0_2025-08-22T08-08-44.641132.parquet +3 -0
  45. eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_90_actor_huggingface/2025-08-22T08-09-51.132895/details_extended|ifeval|0_2025-08-22T08-09-51.132895.parquet +3 -0
  46. eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_90_actor_huggingface/2025-08-22T08-09-51.132895/details_lighteval|gpqa:diamond|0_2025-08-22T08-09-51.132895.parquet +3 -0
  47. global_step_10/actor/huggingface/tokenizer.json +3 -0
  48. global_step_10/data.pt +3 -0
  49. global_step_100/actor/huggingface/tokenizer.json +3 -0
  50. global_step_100/data.pt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,34 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
42
+ eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
43
+ eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
44
+ eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
45
+ eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
46
+ eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
47
+ eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
48
+ eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
49
+ eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
50
+ eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
51
+ eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
52
+ eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
53
+ eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
54
+ eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
55
+ eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
56
+ eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
57
+ eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
58
+ eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
59
+ eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
60
+ eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
61
+ eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
62
+ eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.csv filter=lfs diff=lfs merge=lfs -text
63
+ eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.csv filter=lfs diff=lfs merge=lfs -text
64
+ global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
65
+ global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
66
+ global_step_20/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e25369e899dd25ca782745e42fde9dc9526de1d22c7d6646308a664742ccd6d2
3
+ size 12120990
eval_results/plots/eval_results_acc_keywords.png ADDED

Git LFS Details

  • SHA256: da3269bfadf2540cf2872eea9a25e8cd30a1a28a90210610323d208dfdf67bf2
  • Pointer size: 131 Bytes
  • Size of remote file: 413 kB
eval_results/plots/eval_results_acc_pass_acc.png ADDED

Git LFS Details

  • SHA256: d4f9025e8e2127fd2718f0fcf924afb0ae3325c769c47f88f8742729d95bdebf
  • Pointer size: 131 Bytes
  • Size of remote file: 313 kB
eval_results/plots/eval_results_acc_tokens.png ADDED

Git LFS Details

  • SHA256: 32b8dc6f79c23e446f70ef55070fbbb3d10f2341a6195bc6b0b12a8549b22052
  • Pointer size: 131 Bytes
  • Size of remote file: 432 kB
eval_results/plots/eval_results_avg_stop_tokens.png ADDED

Git LFS Details

  • SHA256: 76acfbbc53edfe800dccae948f18c1efeeef67bd012772112e686dfdc08a5abe
  • Pointer size: 131 Bytes
  • Size of remote file: 470 kB
eval_results/plots/eval_results_box_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 73e203420d26aece576b6383a5b05aa2b72578f8a8e88bbc2d740e114c1575ff
  • Pointer size: 131 Bytes
  • Size of remote file: 422 kB
eval_results/plots/eval_results_clip_ratio.png ADDED

Git LFS Details

  • SHA256: c1e3cf1a1a4fed32c11b5211fb68842fc1db7b68bdd40f1885dc932b8392aa78
  • Pointer size: 131 Bytes
  • Size of remote file: 400 kB
eval_results/plots/eval_results_correct_tokens.png ADDED

Git LFS Details

  • SHA256: a032b6ed462f8fa6cd85b8272bd55151d73ab27e60b874dcacd82ec5e1360599
  • Pointer size: 131 Bytes
  • Size of remote file: 417 kB
eval_results/plots/eval_results_repeat_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: dce39ab1d122b878f35c37169d23df63ca0d28d1afb5a63fb1285087a57dedb9
  • Pointer size: 131 Bytes
  • Size of remote file: 474 kB
eval_results/plots/eval_results_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: 4c4b907bcabe216935655669bb7ff468d16f29a177c2c2b684c830ce28994513
  • Pointer size: 131 Bytes
  • Size of remote file: 427 kB
eval_results/plots/eval_results_wrong_tokens.png ADDED

Git LFS Details

  • SHA256: 971d008b3c91af385f3fee04a5833eade8511afa50ca8180fa7c2fb03b4ca958
  • Pointer size: 131 Bytes
  • Size of remote file: 433 kB
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png ADDED

Git LFS Details

  • SHA256: 0674c9a8b123a4e1e5b8b408fad05103f19b39007a618da1969beffe926034af
  • Pointer size: 131 Bytes
  • Size of remote file: 205 kB
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png ADDED

Git LFS Details

  • SHA256: 28cf2e0e0a990b352d513439db83e100313de37d6b094b0ae6e8354afebf0d35
  • Pointer size: 131 Bytes
  • Size of remote file: 220 kB
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png ADDED

Git LFS Details

  • SHA256: 92e079f48931646632af648a385174f4c3ccaa4b85604746b1d114cd9788de2b
  • Pointer size: 131 Bytes
  • Size of remote file: 229 kB
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png ADDED

Git LFS Details

  • SHA256: 7735ff599ccbf7fe60e12f89537db256e8aee852065b9ad04949b7175f2fb839
  • Pointer size: 131 Bytes
  • Size of remote file: 231 kB
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: a126b33d44f3ecde076732b6099f62687a86fb00b7295ca7ea8eb390bd78ea4b
  • Pointer size: 131 Bytes
  • Size of remote file: 231 kB
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png ADDED

Git LFS Details

  • SHA256: 6c880c2fbd31823bef5d23100aaa78c18256ca4e06d07bc279b8b6d2315653db
  • Pointer size: 131 Bytes
  • Size of remote file: 216 kB
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png ADDED

Git LFS Details

  • SHA256: 45f87269f2b989f477260c7c4bb23a4aacb41c5f6bd782e747e220643181a3fe
  • Pointer size: 131 Bytes
  • Size of remote file: 223 kB
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 153587ee1c6ef97fe881909bcc133fb537a4184d9e1df783d873e30a6ba2f6ca
  • Pointer size: 131 Bytes
  • Size of remote file: 249 kB
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: 8fc8f4ccdcf73b531c8a42be7a9a4c86433b3116b76ee5a5f4f6fd513ad401e9
  • Pointer size: 131 Bytes
  • Size of remote file: 212 kB
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png ADDED

Git LFS Details

  • SHA256: 4319604aede547f4d186dad942304ef3c2db6d057ad33ffa6ca7bed92b48549f
  • Pointer size: 131 Bytes
  • Size of remote file: 233 kB
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da2b503d544d3adaf82d1bcdb74dd2338defd8f9934f88fce278f11d262b3c8
3
+ size 20285461
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df24775c74045785e614b2ced0830d6d6aeebcc119b8a428f716cc9ae0797a3d
3
+ size 3168296
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_lighteval|gpqa:diamond|0_2025-08-22T07-57-51.872019.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f635f006ad862d99fb9c834773b5214dbce21047eac6598d40ccd56cb3db143b
3
+ size 617970
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8170a4bec78c145a20d4b17154dba882cb4e283744c06a80846441628c0cab53
3
+ size 15610177
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4adb83e29ae5e2c6dface3cac1ef86ab82c826b1f9b5e83c8ed5248abe46a512
3
+ size 2843855
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_lighteval|gpqa:diamond|0_2025-08-22T07-47-25.389710.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007353af2bd4da05eac818c1bb9af3caecffbbe18a8aeb046292d46b4e0210a9
3
+ size 543738
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_100_actor_huggingface/2025-08-22T08-13-29.136616/details_extended|ifeval|0_2025-08-22T08-13-29.136616.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:459b904b60dc662054534daeb60c655b68ed818b47b49a74aacb03cda7df0c0b
3
+ size 1306224
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_100_actor_huggingface/2025-08-22T08-13-29.136616/details_lighteval|gpqa:diamond|0_2025-08-22T08-13-29.136616.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9a3a319adc75f19f2904b9c4df6bc0c3e1b6edc44f0ca587ba86fc737762ff
3
+ size 710733
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_20_actor_huggingface/2025-08-22T07-29-13.669606/details_extended|ifeval|0_2025-08-22T07-29-13.669606.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af44d6bdc27afa78c75348219b7a97094c2fafbba367df73882639ee06ba71e
3
+ size 1852554
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_20_actor_huggingface/2025-08-22T07-29-13.669606/details_lighteval|gpqa:diamond|0_2025-08-22T07-29-13.669606.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c323901a555f9e2d4071903687f56b04721f7931c55c48b5a028dd6679e1375c
3
+ size 644713
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_30_actor_huggingface/2025-08-22T07-15-48.254808/details_extended|ifeval|0_2025-08-22T07-15-48.254808.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7614b496007aad888271cd710e3a5541eb9cc05a0e270d6d31f063f8a1294f2
3
+ size 1611319
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_30_actor_huggingface/2025-08-22T07-15-48.254808/details_lighteval|gpqa:diamond|0_2025-08-22T07-15-48.254808.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69ed92f5d51449b3c8b3f434205c9763fa11eb3b4caf597384837347b88c0128
3
+ size 649559
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_40_actor_huggingface/2025-08-22T07-12-09.643109/details_extended|ifeval|0_2025-08-22T07-12-09.643109.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a06b81ab020f2135899ed6a3721e4fdfa71ad9ae340e83ab482770405d70c01
3
+ size 1522928
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_40_actor_huggingface/2025-08-22T07-12-09.643109/details_lighteval|gpqa:diamond|0_2025-08-22T07-12-09.643109.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bbcb1345665e9f96896ee1503803f848b34af147250248c67572fc9b21a5923
3
+ size 680229
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_50_actor_huggingface/2025-08-22T07-09-28.251052/details_extended|ifeval|0_2025-08-22T07-09-28.251052.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0c7e77a6adae7bf6ee41b947f353c2355f38fe524511ff3564af6f2876056f
3
+ size 1431840
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_50_actor_huggingface/2025-08-22T07-09-28.251052/details_lighteval|gpqa:diamond|0_2025-08-22T07-09-28.251052.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d872f2c877cd51909dcb2d623e9b7ffdbb965b3376675d2ab7bfe21f9ce7c972
3
+ size 669400
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_60_actor_huggingface/2025-08-22T07-09-44.265871/details_extended|ifeval|0_2025-08-22T07-09-44.265871.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d58b1de3823dcb594c613507928e6079866644c8f7fce2e7a90a97ba8b1ba09e
3
+ size 1416768
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_60_actor_huggingface/2025-08-22T07-09-44.265871/details_lighteval|gpqa:diamond|0_2025-08-22T07-09-44.265871.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9057e644306261ac22c2883c32cf3e4c042f319c9fd5ccae2cde6515ad117dbd
3
+ size 690886
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_70_actor_huggingface/2025-08-22T08-09-53.250731/details_extended|ifeval|0_2025-08-22T08-09-53.250731.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a159d4da06c7a15f52cafd6413fb71d1590b961b2ce077bdc938ac09c396f21
3
+ size 1284283
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_70_actor_huggingface/2025-08-22T08-09-53.250731/details_lighteval|gpqa:diamond|0_2025-08-22T08-09-53.250731.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8cb4519f5ccd8217e0276e1d7456bc3908ecb29e241628ae6a1ca1a934caef1
3
+ size 673612
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_80_actor_huggingface/2025-08-22T08-08-44.641132/details_extended|ifeval|0_2025-08-22T08-08-44.641132.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e01921ee052d37f655863cea8cebe19e34632aa273de1bdb7cdb13a851f8fc
3
+ size 1292453
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_80_actor_huggingface/2025-08-22T08-08-44.641132/details_lighteval|gpqa:diamond|0_2025-08-22T08-08-44.641132.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09631eb71eaf3d81ca1b15b56e3eb565fc8d6e5b203d3ed6d6b0ccf3c21e1cd8
3
+ size 684884
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_90_actor_huggingface/2025-08-22T08-09-51.132895/details_extended|ifeval|0_2025-08-22T08-09-51.132895.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f943a5855b2736779ef6b695e2d751107a67885b91201d58ff35b7c02fc783
3
+ size 1374059
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_90_actor_huggingface/2025-08-22T08-09-51.132895/details_lighteval|gpqa:diamond|0_2025-08-22T08-09-51.132895.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e4e06978ada198f9b4857089229e95c92433097332cf31f159cc3515411d17e
3
+ size 683583
global_step_10/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_10/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:361b3a2db9de44ab0f855170ee87f31505491e0a949c4376e2e334e64b147592
3
+ size 1947
global_step_100/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_100/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace1ba6509c69d2d98913c7d0a01abe0da6c0896b8d6629782cbce5ffef4bc99
3
+ size 1947