bensondccnqwc commited on
Commit
8b2e2e7
·
verified ·
1 Parent(s): 01ab4e1

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +29 -0
  2. eval_results/plots/eval_results_acc_keywords.png +3 -0
  3. eval_results/plots/eval_results_acc_pass_acc.png +3 -0
  4. eval_results/plots/eval_results_acc_tokens.png +3 -0
  5. eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
  6. eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
  7. eval_results/plots/eval_results_clip_ratio.png +3 -0
  8. eval_results/plots/eval_results_correct_tokens.png +3 -0
  9. eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
  10. eval_results/plots/eval_results_tokens_keywords.png +3 -0
  11. eval_results/plots/eval_results_wrong_tokens.png +3 -0
  12. eval_results_avg4/plots/eval_results_avg4_acc_keywords.png +3 -0
  13. eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png +3 -0
  14. eval_results_avg4/plots/eval_results_avg4_acc_tokens.png +3 -0
  15. eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png +3 -0
  16. eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png +3 -0
  17. eval_results_avg4/plots/eval_results_avg4_clip_ratio.png +3 -0
  18. eval_results_avg4/plots/eval_results_avg4_correct_tokens.png +3 -0
  19. eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png +3 -0
  20. eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
  21. eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png +3 -0
  22. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-15T11-09-39.306198/details_extended|ifeval|0_2025-08-15T11-09-39.306198.csv +3 -0
  23. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-15T11-09-39.306198/details_extended|ifeval|0_2025-08-15T11-09-39.306198.parquet +3 -0
  24. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-15T11-09-39.306198/details_lighteval|gpqa:diamond|0_2025-08-15T11-09-39.306198.parquet +3 -0
  25. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-15T11-00-48.004731/details_extended|ifeval|0_2025-08-15T11-00-48.004731.csv +3 -0
  26. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-15T11-00-48.004731/details_extended|ifeval|0_2025-08-15T11-00-48.004731.parquet +3 -0
  27. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-15T11-00-48.004731/details_lighteval|gpqa:diamond|0_2025-08-15T11-00-48.004731.parquet +3 -0
  28. eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-22T15-43-34.436852/details_extended|ifeval|0_2025-08-22T15-43-34.436852.csv +3 -0
  29. eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-22T15-43-34.436852/details_extended|ifeval|0_2025-08-22T15-43-34.436852.parquet +3 -0
  30. eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-22T15-43-34.436852/details_lighteval|gpqa:diamond|0_2025-08-22T15-43-34.436852.parquet +3 -0
  31. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-15T11-00-50.060940/details_extended|ifeval|0_2025-08-15T11-00-50.060940.parquet +3 -0
  32. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-15T11-00-50.060940/details_lighteval|gpqa:diamond|0_2025-08-15T11-00-50.060940.parquet +3 -0
  33. eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-15T11-04-02.688065/details_extended|ifeval|0_2025-08-15T11-04-02.688065.parquet +3 -0
  34. eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-15T11-04-02.688065/details_lighteval|gpqa:diamond|0_2025-08-15T11-04-02.688065.parquet +3 -0
  35. eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-15T11-58-04.589248/details_extended|ifeval|0_2025-08-15T11-58-04.589248.parquet +3 -0
  36. eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-15T11-58-04.589248/details_lighteval|gpqa:diamond|0_2025-08-15T11-58-04.589248.parquet +3 -0
  37. eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_50_actor_huggingface/2025-08-15T11-57-24.288945/details_extended|ifeval|0_2025-08-15T11-57-24.288945.parquet +3 -0
  38. eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_50_actor_huggingface/2025-08-15T11-57-24.288945/details_lighteval|gpqa:diamond|0_2025-08-15T11-57-24.288945.parquet +3 -0
  39. eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_60_actor_huggingface/2025-08-15T11-53-18.171067/details_extended|ifeval|0_2025-08-15T11-53-18.171067.parquet +3 -0
  40. eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_60_actor_huggingface/2025-08-15T11-53-18.171067/details_lighteval|gpqa:diamond|0_2025-08-15T11-53-18.171067.parquet +3 -0
  41. eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_70_actor_huggingface/2025-08-15T11-51-22.040232/details_extended|ifeval|0_2025-08-15T11-51-22.040232.parquet +3 -0
  42. eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_70_actor_huggingface/2025-08-15T11-51-22.040232/details_lighteval|gpqa:diamond|0_2025-08-15T11-51-22.040232.parquet +3 -0
  43. eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_80_actor_huggingface/2025-08-15T12-32-10.307029/details_lighteval|gpqa:diamond|0_2025-08-15T12-32-10.307029.parquet +3 -0
  44. global_step_30/actor/huggingface/vocab.json +0 -0
  45. global_step_40/actor/fsdp_config.json +4 -0
  46. global_step_40/actor/huggingface/added_tokens.json +28 -0
  47. global_step_40/actor/huggingface/chat_template.jinja +85 -0
  48. global_step_40/actor/huggingface/config.json +60 -0
  49. global_step_40/actor/huggingface/generation_config.json +6 -0
  50. global_step_40/actor/huggingface/merges.txt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,32 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
37
+ eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
38
+ eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
39
+ eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
40
+ eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
41
+ eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
42
+ eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
43
+ eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
44
+ eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
45
+ eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
46
+ global_step_40/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
47
+ global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
+ global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
49
+ global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
+ global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
51
+ global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
52
+ eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
53
+ eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
54
+ eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
55
+ eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
56
+ eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
57
+ eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
58
+ eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
59
+ eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
60
+ eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
61
+ eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
62
+ eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-22T15-43-34.436852/details_extended|ifeval|0_2025-08-22T15-43-34.436852.csv filter=lfs diff=lfs merge=lfs -text
63
+ eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-15T11-00-48.004731/details_extended|ifeval|0_2025-08-15T11-00-48.004731.csv filter=lfs diff=lfs merge=lfs -text
64
+ eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-15T11-09-39.306198/details_extended|ifeval|0_2025-08-15T11-09-39.306198.csv filter=lfs diff=lfs merge=lfs -text
eval_results/plots/eval_results_acc_keywords.png ADDED

Git LFS Details

  • SHA256: 433e14d30bcd0744f01fa3c7b881826f338603c5b164905b88e4167bb3f92918
  • Pointer size: 131 Bytes
  • Size of remote file: 406 kB
eval_results/plots/eval_results_acc_pass_acc.png ADDED

Git LFS Details

  • SHA256: c25f726868ca5f362a536b5a44cb266e9e7f2e0a10398f99cdb8f526cb5000be
  • Pointer size: 131 Bytes
  • Size of remote file: 308 kB
eval_results/plots/eval_results_acc_tokens.png ADDED

Git LFS Details

  • SHA256: 0e31adc2f8a8da0cfb5aa639d81f1d3c1e034e94d8360c820353dbbcfbcfb2c1
  • Pointer size: 131 Bytes
  • Size of remote file: 448 kB
eval_results/plots/eval_results_avg_stop_tokens.png ADDED

Git LFS Details

  • SHA256: 5821211bbc7207e95856255ef832dfd342b58c22a4bba34b9414efeb95084a4b
  • Pointer size: 131 Bytes
  • Size of remote file: 475 kB
eval_results/plots/eval_results_box_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: b69f17f956c3dff8439c8540829f3996812538871002221db1284dd356f387eb
  • Pointer size: 131 Bytes
  • Size of remote file: 449 kB
eval_results/plots/eval_results_clip_ratio.png ADDED

Git LFS Details

  • SHA256: 0978badd8559a0ca95613f1b1f896d713b3981e1a80f4162a66b34dedd82e770
  • Pointer size: 131 Bytes
  • Size of remote file: 433 kB
eval_results/plots/eval_results_correct_tokens.png ADDED

Git LFS Details

  • SHA256: 322c49b9778cedb1521b583c9c847e2181031edbb0d9f83a8e3c2e3ed15e52ac
  • Pointer size: 131 Bytes
  • Size of remote file: 416 kB
eval_results/plots/eval_results_repeat_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 841d96d0bf18b0c5adc4510780575614cd5b77a81e6a91490738a35ff53fa062
  • Pointer size: 131 Bytes
  • Size of remote file: 497 kB
eval_results/plots/eval_results_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: 560da136488d59dce2a626444abeaf9e3cb7a0b41ea02632a5e66a548befbda8
  • Pointer size: 131 Bytes
  • Size of remote file: 442 kB
eval_results/plots/eval_results_wrong_tokens.png ADDED

Git LFS Details

  • SHA256: 2d2a67a44e137121c7300f580dee58a262b0de1ead69408e4be869b8c44e52b8
  • Pointer size: 131 Bytes
  • Size of remote file: 455 kB
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png ADDED

Git LFS Details

  • SHA256: c228abc23633c0a942eb5d253d3ff222fff825a1f742b59138fd89d7612309ea
  • Pointer size: 131 Bytes
  • Size of remote file: 220 kB
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png ADDED

Git LFS Details

  • SHA256: 1ab4d5f5d775b944156043fdc2217fa20bc4cd85339bb46e40027d57345f1d48
  • Pointer size: 131 Bytes
  • Size of remote file: 211 kB
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png ADDED

Git LFS Details

  • SHA256: d77c67623d041f6f294c2e472624168569973185aec980ce08f30882c75afc9e
  • Pointer size: 131 Bytes
  • Size of remote file: 227 kB
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png ADDED

Git LFS Details

  • SHA256: 76e7820215ce9dc2055a7efb79ebe33378cefeb2e2ea68f2eb4431128578ed73
  • Pointer size: 131 Bytes
  • Size of remote file: 245 kB
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 3a0820094787112cc6fd61beeaed8e8e188487e275f304b4a1df728989183211
  • Pointer size: 131 Bytes
  • Size of remote file: 245 kB
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png ADDED

Git LFS Details

  • SHA256: 9b22aa24f487994c2aff39654728b6e6cf6ea8252905c7dc9bdab07e8a3ac0b4
  • Pointer size: 131 Bytes
  • Size of remote file: 218 kB
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png ADDED

Git LFS Details

  • SHA256: b6826bae9b6a4dff1bcca2f7929bb0d9d27f497501fc89fbcd89e5c1c391fd3b
  • Pointer size: 131 Bytes
  • Size of remote file: 222 kB
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: b868e65cb1b6b323df987fe2cabbc1e8e7183446c138f7b136de129cf2291bf2
  • Pointer size: 131 Bytes
  • Size of remote file: 249 kB
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: 19904c444ca7517fbd81e8e5caa10ba46526448651c5018227d84eadb0ef67e2
  • Pointer size: 131 Bytes
  • Size of remote file: 228 kB
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png ADDED

Git LFS Details

  • SHA256: 93ee1f529ea009731a6b2f65054adff7053b0307769bc5028688148be4c4ad49
  • Pointer size: 131 Bytes
  • Size of remote file: 226 kB
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-15T11-09-39.306198/details_extended|ifeval|0_2025-08-15T11-09-39.306198.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba25009d3195ef4f302969d187cd0ead55f801794761df1cfdc17d383ce7272d
3
+ size 17982625
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-15T11-09-39.306198/details_extended|ifeval|0_2025-08-15T11-09-39.306198.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d327d6b95f90194cb6994e322a34e8a413c2c056ca7d298df9bd2cb7ebca0062
3
+ size 2587594
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-15T11-09-39.306198/details_lighteval|gpqa:diamond|0_2025-08-15T11-09-39.306198.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b01da6d648a8cd33c37eb2d88ddbed0b5edd62dba41d3bff486a2f153cdb04d
3
+ size 637219
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-15T11-00-48.004731/details_extended|ifeval|0_2025-08-15T11-00-48.004731.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aac5b259acb2bb11fa6f6269922167d22ac502f65faaacfb24ca596250ed29a
3
+ size 15512248
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-15T11-00-48.004731/details_extended|ifeval|0_2025-08-15T11-00-48.004731.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3de1830cbd609be851ff3a65b0750535e8b0e522193d693e0d12b446bfe457d
3
+ size 2309106
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-15T11-00-48.004731/details_lighteval|gpqa:diamond|0_2025-08-15T11-00-48.004731.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd4304adb3095d24edec3618aa55c5d2ffa61661c514a364099959619baa6857
3
+ size 645401
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-22T15-43-34.436852/details_extended|ifeval|0_2025-08-22T15-43-34.436852.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce109454ca9db002045c7388fa2cafb6a67e4cc30552fe381c9d38b08187d452
3
+ size 10843482
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-22T15-43-34.436852/details_extended|ifeval|0_2025-08-22T15-43-34.436852.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9323c665c7e398841fad4d0a11bf8ff30fa5f76425368558a20153e74ba81375
3
+ size 1918374
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-22T15-43-34.436852/details_lighteval|gpqa:diamond|0_2025-08-22T15-43-34.436852.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6a9ffb05513a70c5e4c7da1b0ad394b7a54d0ce5e7df7bca15a7ed47614e46e
3
+ size 554787
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-15T11-00-50.060940/details_extended|ifeval|0_2025-08-15T11-00-50.060940.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ae4f8c5b78dd375a1908468f8de282240f0a8edf4a5dcb22f0e4bd5d7b063e5
3
+ size 2409406
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-15T11-00-50.060940/details_lighteval|gpqa:diamond|0_2025-08-15T11-00-50.060940.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073db43b6709125161c191ecd64cde7b3e1c43700532c9eea62488093a69070d
3
+ size 513644
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-15T11-04-02.688065/details_extended|ifeval|0_2025-08-15T11-04-02.688065.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0df6fdf6652de534c9c7f0989303f8c826ddc3109ddf7b086e8b686d3455333f
3
+ size 2627692
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-15T11-04-02.688065/details_lighteval|gpqa:diamond|0_2025-08-15T11-04-02.688065.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c4619672b936e5646db5ee8e1d02aae6ce0d1c56ed1f9b0be91536d3d25e49c
3
+ size 509697
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-15T11-58-04.589248/details_extended|ifeval|0_2025-08-15T11-58-04.589248.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed83d1ee041e4d1e634fa434e8c78ca03c511fe37568085a6ef0a31df10bcaf7
3
+ size 2215370
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-15T11-58-04.589248/details_lighteval|gpqa:diamond|0_2025-08-15T11-58-04.589248.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdcb51742700745022425679785b56c157e9233f74a393e0fe38ade962679f8b
3
+ size 586448
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_50_actor_huggingface/2025-08-15T11-57-24.288945/details_extended|ifeval|0_2025-08-15T11-57-24.288945.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede96e5baf52d142ce38e94fb4c8f15c1bc99dc2e5030e8c92a14d70babbe64a
3
+ size 2195726
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_50_actor_huggingface/2025-08-15T11-57-24.288945/details_lighteval|gpqa:diamond|0_2025-08-15T11-57-24.288945.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2372b64e22b49d927325bf0bcb1fbeb7e199cbd48efc7f9674f55476066a3324
3
+ size 509600
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_60_actor_huggingface/2025-08-15T11-53-18.171067/details_extended|ifeval|0_2025-08-15T11-53-18.171067.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a3af218d2b633adb30b9a563890873a6ac278a88e3bb06474a5ef8d383a4c4c
3
+ size 2085242
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_60_actor_huggingface/2025-08-15T11-53-18.171067/details_lighteval|gpqa:diamond|0_2025-08-15T11-53-18.171067.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35951f3340502f296b7798e95c42d212ded4002d17cfdb3343ee1ced89f6b464
3
+ size 518608
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_70_actor_huggingface/2025-08-15T11-51-22.040232/details_extended|ifeval|0_2025-08-15T11-51-22.040232.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9091284ac521d787bc5119a5b027f59483782220da8ba2979b81ae5bcb098cfe
3
+ size 2198576
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_70_actor_huggingface/2025-08-15T11-51-22.040232/details_lighteval|gpqa:diamond|0_2025-08-15T11-51-22.040232.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61818eb49695d3bec63e328738fba94274f2fd823d9da2ae5029f7b7a1c5ee85
3
+ size 621605
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_80_actor_huggingface/2025-08-15T12-32-10.307029/details_lighteval|gpqa:diamond|0_2025-08-15T12-32-10.307029.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81f31fa417c47f421e890fd349bf505e93734031a524ab6e0056782853d63b6d
3
+ size 605959
global_step_30/actor/huggingface/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
global_step_40/actor/fsdp_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "FSDP_version": 1,
3
+ "world_size": 8
4
+ }
global_step_40/actor/huggingface/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
global_step_40/actor/huggingface/chat_template.jinja ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
+ {%- elif message.role == "assistant" %}
29
+ {%- set content = message.content %}
30
+ {%- set reasoning_content = '' %}
31
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
+ {%- set reasoning_content = message.reasoning_content %}
33
+ {%- else %}
34
+ {%- if '</think>' in message.content %}
35
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
+ {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
+ {%- endif %}
38
+ {%- endif %}
39
+ {%- if loop.index0 > ns.last_query_index %}
40
+ {%- if loop.last or (not loop.last and reasoning_content) %}
41
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
+ {%- else %}
43
+ {{- '<|im_start|>' + message.role + '\n' + content }}
44
+ {%- endif %}
45
+ {%- else %}
46
+ {{- '<|im_start|>' + message.role + '\n' + content }}
47
+ {%- endif %}
48
+ {%- if message.tool_calls %}
49
+ {%- for tool_call in message.tool_calls %}
50
+ {%- if (loop.first and content) or (not loop.first) %}
51
+ {{- '\n' }}
52
+ {%- endif %}
53
+ {%- if tool_call.function %}
54
+ {%- set tool_call = tool_call.function %}
55
+ {%- endif %}
56
+ {{- '<tool_call>\n{"name": "' }}
57
+ {{- tool_call.name }}
58
+ {{- '", "arguments": ' }}
59
+ {%- if tool_call.arguments is string %}
60
+ {{- tool_call.arguments }}
61
+ {%- else %}
62
+ {{- tool_call.arguments | tojson }}
63
+ {%- endif %}
64
+ {{- '}\n</tool_call>' }}
65
+ {%- endfor %}
66
+ {%- endif %}
67
+ {{- '<|im_end|>\n' }}
68
+ {%- elif message.role == "tool" %}
69
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
+ {{- '<|im_start|>user' }}
71
+ {%- endif %}
72
+ {{- '\n<tool_response>\n' }}
73
+ {{- message.content }}
74
+ {{- '\n</tool_response>' }}
75
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
+ {{- '<|im_end|>\n' }}
77
+ {%- endif %}
78
+ {%- endif %}
79
+ {%- endfor %}
80
+ {%- if add_generation_prompt %}
81
+ {{- '<|im_start|>assistant\n' }}
82
+ {%- if enable_thinking is defined and enable_thinking is false %}
83
+ {{- '<think>\n\n</think>\n\n' }}
84
+ {%- endif %}
85
+ {%- endif %}
global_step_40/actor/huggingface/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151643,
8
+ "head_dim": 128,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 2048,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 6144,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention"
42
+ ],
43
+ "max_position_embeddings": 32768,
44
+ "max_window_layers": 28,
45
+ "model_type": "qwen3",
46
+ "num_attention_heads": 16,
47
+ "num_hidden_layers": 28,
48
+ "num_key_value_heads": 8,
49
+ "pad_token_id": 151643,
50
+ "rms_norm_eps": 1e-06,
51
+ "rope_scaling": null,
52
+ "rope_theta": 1000000,
53
+ "sliding_window": null,
54
+ "tie_word_embeddings": true,
55
+ "torch_dtype": "bfloat16",
56
+ "transformers_version": "4.53.2",
57
+ "use_cache": true,
58
+ "use_sliding_window": false,
59
+ "vocab_size": 151936
60
+ }
global_step_40/actor/huggingface/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.53.2"
6
+ }
global_step_40/actor/huggingface/merges.txt ADDED
The diff for this file is too large to render. See raw diff