bensondccnqwc commited on
Commit
e743db2
·
verified ·
1 Parent(s): 8bede4f

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +34 -0
  2. eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +3 -0
  3. eval_results/plots/eval_results_acc_keywords.png +3 -0
  4. eval_results/plots/eval_results_acc_pass_acc.png +3 -0
  5. eval_results/plots/eval_results_acc_tokens.png +3 -0
  6. eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
  7. eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
  8. eval_results/plots/eval_results_clip_ratio.png +3 -0
  9. eval_results/plots/eval_results_correct_tokens.png +3 -0
  10. eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
  11. eval_results/plots/eval_results_tokens_keywords.png +3 -0
  12. eval_results/plots/eval_results_wrong_tokens.png +3 -0
  13. eval_results_avg4/plots/eval_results_avg4_acc_keywords.png +3 -0
  14. eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png +3 -0
  15. eval_results_avg4/plots/eval_results_avg4_acc_tokens.png +3 -0
  16. eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png +3 -0
  17. eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png +3 -0
  18. eval_results_avg4/plots/eval_results_avg4_clip_ratio.png +3 -0
  19. eval_results_avg4/plots/eval_results_avg4_correct_tokens.png +3 -0
  20. eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png +3 -0
  21. eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
  22. eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png +3 -0
  23. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.csv +3 -0
  24. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.parquet +3 -0
  25. eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_lighteval|gpqa:diamond|0_2025-08-19T03-03-31.558565.parquet +3 -0
  26. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.csv +3 -0
  27. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.parquet +3 -0
  28. eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_lighteval|gpqa:diamond|0_2025-08-19T02-59-44.015414.parquet +3 -0
  29. eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_100_actor_huggingface/2025-08-19T03-23-25.775217/details_extended|ifeval|0_2025-08-19T03-23-25.775217.parquet +3 -0
  30. eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_100_actor_huggingface/2025-08-19T03-23-25.775217/details_lighteval|gpqa:diamond|0_2025-08-19T03-23-25.775217.parquet +3 -0
  31. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.csv +3 -0
  32. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.parquet +3 -0
  33. eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_lighteval|gpqa:diamond|0_2025-08-19T02-45-28.477295.parquet +3 -0
  34. eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_30_actor_huggingface/2025-08-19T02-31-43.587146/details_extended|ifeval|0_2025-08-19T02-31-43.587146.parquet +3 -0
  35. eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_30_actor_huggingface/2025-08-19T02-31-43.587146/details_lighteval|gpqa:diamond|0_2025-08-19T02-31-43.587146.parquet +3 -0
  36. eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_40_actor_huggingface/2025-08-19T02-32-20.502116/details_extended|ifeval|0_2025-08-19T02-32-20.502116.parquet +3 -0
  37. eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_40_actor_huggingface/2025-08-19T02-32-20.502116/details_lighteval|gpqa:diamond|0_2025-08-19T02-32-20.502116.parquet +3 -0
  38. eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_50_actor_huggingface/2025-08-19T02-27-37.276111/details_extended|ifeval|0_2025-08-19T02-27-37.276111.parquet +3 -0
  39. eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_50_actor_huggingface/2025-08-19T02-27-37.276111/details_lighteval|gpqa:diamond|0_2025-08-19T02-27-37.276111.parquet +3 -0
  40. eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_60_actor_huggingface/2025-08-19T02-24-48.573923/details_extended|ifeval|0_2025-08-19T02-24-48.573923.parquet +3 -0
  41. eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_60_actor_huggingface/2025-08-19T02-24-48.573923/details_lighteval|gpqa:diamond|0_2025-08-19T02-24-48.573923.parquet +3 -0
  42. eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_70_actor_huggingface/2025-08-19T02-23-41.225302/details_extended|ifeval|0_2025-08-19T02-23-41.225302.parquet +3 -0
  43. eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_70_actor_huggingface/2025-08-19T02-23-41.225302/details_lighteval|gpqa:diamond|0_2025-08-19T02-23-41.225302.parquet +3 -0
  44. eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_80_actor_huggingface/2025-08-19T03-22-27.759640/details_extended|ifeval|0_2025-08-19T03-22-27.759640.parquet +3 -0
  45. eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_80_actor_huggingface/2025-08-19T03-22-27.759640/details_lighteval|gpqa:diamond|0_2025-08-19T03-22-27.759640.parquet +3 -0
  46. eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_90_actor_huggingface/2025-08-19T03-23-41.379521/details_extended|ifeval|0_2025-08-19T03-23-41.379521.parquet +3 -0
  47. eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_90_actor_huggingface/2025-08-19T03-23-41.379521/details_lighteval|gpqa:diamond|0_2025-08-19T03-23-41.379521.parquet +3 -0
  48. global_step_0/actor/huggingface/model.safetensors +3 -0
  49. global_step_10/actor/huggingface/tokenizer.json +3 -0
  50. global_step_10/data.pt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,37 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
38
+ eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
39
+ eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
40
+ eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
41
+ eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
42
+ eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
43
+ eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
44
+ eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
45
+ eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
46
+ eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
47
+ global_step_40/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
+ global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
49
+ global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
+ global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
51
+ global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
52
+ global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
53
+ eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
54
+ eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
55
+ eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
56
+ eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
57
+ eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
58
+ eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
59
+ eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
60
+ eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
61
+ eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
62
+ eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
63
+ eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.csv filter=lfs diff=lfs merge=lfs -text
64
+ eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.csv filter=lfs diff=lfs merge=lfs -text
65
+ eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.csv filter=lfs diff=lfs merge=lfs -text
66
+ global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
67
+ global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
68
+ global_step_20/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
69
+ global_step_30/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c128f44b9bd5065604edd55ea62d32b25dfd015eac122620303e55215bed515
3
+ size 12239207
eval_results/plots/eval_results_acc_keywords.png ADDED

Git LFS Details

  • SHA256: 0a7735963ac1d1819be8e0ccf12b0a860b1dadd610da44df9b1fe6a008d1211d
  • Pointer size: 131 Bytes
  • Size of remote file: 412 kB
eval_results/plots/eval_results_acc_pass_acc.png ADDED

Git LFS Details

  • SHA256: 5686e6fe961c95482e75da13c5fb8589d9b40963c2f250e6d7b93244baa61ff0
  • Pointer size: 131 Bytes
  • Size of remote file: 294 kB
eval_results/plots/eval_results_acc_tokens.png ADDED

Git LFS Details

  • SHA256: ac029a95516f36e8a1a3d6616fadbbed54acadab17458af725ce04a54928a603
  • Pointer size: 131 Bytes
  • Size of remote file: 434 kB
eval_results/plots/eval_results_avg_stop_tokens.png ADDED

Git LFS Details

  • SHA256: 483459577c1cfa27576e707e93111e7eabdd3273456d20873ba7d497ec5c77b3
  • Pointer size: 131 Bytes
  • Size of remote file: 463 kB
eval_results/plots/eval_results_box_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 61d9a7d0b2ba39886da32aa54a78b63954ab0e0d848ccb91dfc503c5787f0eef
  • Pointer size: 131 Bytes
  • Size of remote file: 450 kB
eval_results/plots/eval_results_clip_ratio.png ADDED

Git LFS Details

  • SHA256: 105c493d2d1e50e10da5aa7c57362a41bc175769f9cadd40bcb20932585a10c4
  • Pointer size: 131 Bytes
  • Size of remote file: 425 kB
eval_results/plots/eval_results_correct_tokens.png ADDED

Git LFS Details

  • SHA256: f403b72f70124d2ce96a9cc10bfd3803b3d8810ea201c3e06067ad1b15c949fc
  • Pointer size: 131 Bytes
  • Size of remote file: 417 kB
eval_results/plots/eval_results_repeat_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 898aaa3c9431205dc4db3f6394ac0e6916e9805cd14b523d20cbeee995eb7145
  • Pointer size: 131 Bytes
  • Size of remote file: 492 kB
eval_results/plots/eval_results_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: 9939d9b80cf151065318ef64248f012d284bfb1ebdd47b0a0dc6cab87e3a79b3
  • Pointer size: 131 Bytes
  • Size of remote file: 448 kB
eval_results/plots/eval_results_wrong_tokens.png ADDED

Git LFS Details

  • SHA256: 0d2456fde41f6b378fd38cd7a28c43a011954b566f0f44fc712399e0b9be4223
  • Pointer size: 131 Bytes
  • Size of remote file: 447 kB
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png ADDED

Git LFS Details

  • SHA256: 555a99c6db47c025ee251071983953a04ccaba6d95c5c225dbc4a3cf7b1b4cea
  • Pointer size: 131 Bytes
  • Size of remote file: 208 kB
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png ADDED

Git LFS Details

  • SHA256: dce1b170b2b95a226495a755030d32b358112c3e286d9980330f1d0891bb1b71
  • Pointer size: 131 Bytes
  • Size of remote file: 215 kB
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png ADDED

Git LFS Details

  • SHA256: b4429a5052e5a6500cf006ac7b3457cc33152206b2f7f4d7de56794845a3744f
  • Pointer size: 131 Bytes
  • Size of remote file: 233 kB
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png ADDED

Git LFS Details

  • SHA256: 7ab8f5c7c1c91adb9d84e7e1940140949b6d6796748869b1446a9196ef331320
  • Pointer size: 131 Bytes
  • Size of remote file: 235 kB
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 12f68f633894e50b52cc03681201aef8fd01fee273125f25b1dd02c8492b344e
  • Pointer size: 131 Bytes
  • Size of remote file: 250 kB
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png ADDED

Git LFS Details

  • SHA256: 75789f97825ea1dbe62141f730b192261e49f140f0d2cd9d8f6130c914cca3b9
  • Pointer size: 131 Bytes
  • Size of remote file: 211 kB
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png ADDED

Git LFS Details

  • SHA256: 1fd3d9f1ebee829a11ac25441f0f51eeaeab97c97990de8945113a9d627e9dc8
  • Pointer size: 131 Bytes
  • Size of remote file: 211 kB
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 5059c1ae54ba581b1c791a0a371b78a785a1e463b55f26a7bd570bff1e4fb080
  • Pointer size: 131 Bytes
  • Size of remote file: 252 kB
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: 9a642bab9c4fc6f23e519071a1e0717679b4b5d8f4f684d8b2e06a6a4fcd11a8
  • Pointer size: 131 Bytes
  • Size of remote file: 231 kB
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png ADDED

Git LFS Details

  • SHA256: c1bb7b0c5a5d99ba648ad01e9a69a78356ca9c53ef31c5e9c233488d2d97f793
  • Pointer size: 131 Bytes
  • Size of remote file: 229 kB
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f4cbd1d57baaa65840d41c4e3fdf408aa0a5d078c3ab233d451de97e99b727
3
+ size 18592855
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d611194575c562264b41400d3d9288f43e8c6320cb0e6d912a6489b1d85682f1
3
+ size 2789061
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_lighteval|gpqa:diamond|0_2025-08-19T03-03-31.558565.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4073c119302a1e9cd694f357f710227a8957303f7178eb8702a4bd2c42b5a7b
3
+ size 566842
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50f4fa9aca026027eac08f0ed0ddc2b00d926e18b7f88e330fc4293c392a73d0
3
+ size 17748213
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cea5449f92cc18f780f33cf1ab76f3769bac4d58eb2e93547daf41df8842abeb
3
+ size 3026523
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_lighteval|gpqa:diamond|0_2025-08-19T02-59-44.015414.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f310b31e7299ff4348cd8d9655f8334f9b76a8990f85a17be0445fc50aafe21c
3
+ size 611170
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_100_actor_huggingface/2025-08-19T03-23-25.775217/details_extended|ifeval|0_2025-08-19T03-23-25.775217.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c4fdf2936ca8d92b10ccc2b75ceb72f551b3b3be588ede7604967537743b29
3
+ size 1526557
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_100_actor_huggingface/2025-08-19T03-23-25.775217/details_lighteval|gpqa:diamond|0_2025-08-19T03-23-25.775217.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a6c707d324d9afd6a1cfd752d177e675674484b761e2760fe7ce65b56324180
3
+ size 762665
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0309fa76751880316815296f3f5d9ea697b4bcf47aab8ff480b5b00cb1728aa9
3
+ size 17747706
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d101c8fa4c97ffd37e3554371ec26e987a62ad1b7d8cd2551e0cecae28d65d5
3
+ size 2445648
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_lighteval|gpqa:diamond|0_2025-08-19T02-45-28.477295.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dffd5da7c2e6e280733a688198e964c5b843c36ac5e1187658f3a1da0467b32
3
+ size 604606
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_30_actor_huggingface/2025-08-19T02-31-43.587146/details_extended|ifeval|0_2025-08-19T02-31-43.587146.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e06532f2a4ca0d6fc447b2c01eaa81246f747d6e82cd3a415179fd94e01ab5c9
3
+ size 1997084
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_30_actor_huggingface/2025-08-19T02-31-43.587146/details_lighteval|gpqa:diamond|0_2025-08-19T02-31-43.587146.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49be0512c751e57b2bdc48244427efe32f3db3411839a30cc75027c2a0bff29a
3
+ size 659079
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_40_actor_huggingface/2025-08-19T02-32-20.502116/details_extended|ifeval|0_2025-08-19T02-32-20.502116.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccbccd663da32f237c308c7ce146a10b1403cd60ade911b955a8466c59aa4315
3
+ size 1933203
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_40_actor_huggingface/2025-08-19T02-32-20.502116/details_lighteval|gpqa:diamond|0_2025-08-19T02-32-20.502116.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72fe64ee7be245e4906a6f79dea95073c247311a70f7469a2e86d7ca3d721dc1
3
+ size 729678
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_50_actor_huggingface/2025-08-19T02-27-37.276111/details_extended|ifeval|0_2025-08-19T02-27-37.276111.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7d7fc6fb913cabb9c3bb4ab6faf1d413a69f88cb882fbd63f23a0ce21d85ad8
3
+ size 1887216
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_50_actor_huggingface/2025-08-19T02-27-37.276111/details_lighteval|gpqa:diamond|0_2025-08-19T02-27-37.276111.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4626ac4a1695080f6379b1712918e3c90c9c9b2d6bb41ce561ffc262554982e1
3
+ size 710992
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_60_actor_huggingface/2025-08-19T02-24-48.573923/details_extended|ifeval|0_2025-08-19T02-24-48.573923.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35ce730b988707e09af26e33ee0b669dc0b0eac8a56c63d1446260dd37bc6d1c
3
+ size 1633920
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_60_actor_huggingface/2025-08-19T02-24-48.573923/details_lighteval|gpqa:diamond|0_2025-08-19T02-24-48.573923.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b3eafdc4a5dc80faae8416428e649a5ea69b3720ab02eb6945c3455ccfd8379
3
+ size 717748
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_70_actor_huggingface/2025-08-19T02-23-41.225302/details_extended|ifeval|0_2025-08-19T02-23-41.225302.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ff941a9a8eb14c97cd6a36795beb4e5cddcf45ac9977f605eb78a892b75e1b
3
+ size 1554610
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_70_actor_huggingface/2025-08-19T02-23-41.225302/details_lighteval|gpqa:diamond|0_2025-08-19T02-23-41.225302.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:896ae09b2e2167fe2443b2586ec718a9a92a7eb5a521b891fdf654b8b0a7f06a
3
+ size 711115
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_80_actor_huggingface/2025-08-19T03-22-27.759640/details_extended|ifeval|0_2025-08-19T03-22-27.759640.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d7564a8979cd22c865885d2905e4d6999907884b7c4139c2e8582799de3f041
3
+ size 1863422
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_80_actor_huggingface/2025-08-19T03-22-27.759640/details_lighteval|gpqa:diamond|0_2025-08-19T03-22-27.759640.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e3309afb1ba407363c2582463ed55d238c47b3f50af967b601c0a0bd54572b0
3
+ size 731554
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_90_actor_huggingface/2025-08-19T03-23-41.379521/details_extended|ifeval|0_2025-08-19T03-23-41.379521.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bdc3b985034af8c1a96535639333cd8fec96b045a5cd0147f1ccc7f09931fab
3
+ size 1712739
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_90_actor_huggingface/2025-08-19T03-23-41.379521/details_lighteval|gpqa:diamond|0_2025-08-19T03-23-41.379521.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a22384154eb28672a4c38af7f8b186cbaecdf854de6d2a9596b0bd3293aa4e5
3
+ size 761415
global_step_0/actor/huggingface/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6df85b39330e5a425ee36253d0f894e4387e4f0a15b9c53cb467d668e6b3a841
3
+ size 3441185608
global_step_10/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_10/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5d7cb574e77c186989ac15b0b93bfabe487df954f0d0061aeabf2d618cedd25
3
+ size 1947