bensondccnqwc commited on
Commit
c9c128f
·
verified ·
1 Parent(s): 6a444ef

Add files using upload-large-folder tool

Browse files
Files changed (36) hide show
  1. .gitattributes +5 -0
  2. eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
  3. eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-21T21-29-36.921510/details_extended|ifeval|0_2025-08-21T21-29-36.921510.csv +3 -0
  4. eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-21T21-29-36.921510/details_extended|ifeval|0_2025-08-21T21-29-36.921510.parquet +3 -0
  5. eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-21T21-29-36.921510/details_lighteval|gpqa:diamond|0_2025-08-21T21-29-36.921510.parquet +3 -0
  6. eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-21T21-23-51.437133/details_extended|ifeval|0_2025-08-21T21-23-51.437133.parquet +3 -0
  7. eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-21T21-23-51.437133/details_lighteval|gpqa:diamond|0_2025-08-21T21-23-51.437133.parquet +3 -0
  8. eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-21T22-04-05.839743/details_extended|ifeval|0_2025-08-21T22-04-05.839743.parquet +3 -0
  9. eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-21T22-04-05.839743/details_lighteval|gpqa:diamond|0_2025-08-21T22-04-05.839743.parquet +3 -0
  10. eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-21T21-13-14.882470/details_extended|ifeval|0_2025-08-21T21-13-14.882470.csv +3 -0
  11. eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-21T21-13-14.882470/details_extended|ifeval|0_2025-08-21T21-13-14.882470.parquet +3 -0
  12. eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-21T21-13-14.882470/details_lighteval|gpqa:diamond|0_2025-08-21T21-13-14.882470.parquet +3 -0
  13. eval_results_ood/global_step_30/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-21T21-12-33.887360/details_extended|ifeval|0_2025-08-21T21-12-33.887360.csv +3 -0
  14. eval_results_ood/global_step_30/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-21T21-12-33.887360/details_extended|ifeval|0_2025-08-21T21-12-33.887360.parquet +3 -0
  15. eval_results_ood/global_step_30/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-21T21-12-33.887360/details_lighteval|gpqa:diamond|0_2025-08-21T21-12-33.887360.parquet +3 -0
  16. eval_results_ood/global_step_40/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-21T21-05-41.573261/details_extended|ifeval|0_2025-08-21T21-05-41.573261.csv +3 -0
  17. eval_results_ood/global_step_40/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-21T21-05-41.573261/details_extended|ifeval|0_2025-08-21T21-05-41.573261.parquet +3 -0
  18. eval_results_ood/global_step_40/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-21T21-05-41.573261/details_lighteval|gpqa:diamond|0_2025-08-21T21-05-41.573261.parquet +3 -0
  19. eval_results_ood/global_step_50/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_50_actor_huggingface/2025-08-21T21-07-16.342773/details_extended|ifeval|0_2025-08-21T21-07-16.342773.parquet +3 -0
  20. eval_results_ood/global_step_50/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_50_actor_huggingface/2025-08-21T21-07-16.342773/details_lighteval|gpqa:diamond|0_2025-08-21T21-07-16.342773.parquet +3 -0
  21. eval_results_ood/global_step_60/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_60_actor_huggingface/2025-08-21T21-02-28.834434/details_extended|ifeval|0_2025-08-21T21-02-28.834434.parquet +3 -0
  22. eval_results_ood/global_step_60/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_60_actor_huggingface/2025-08-21T21-02-28.834434/details_lighteval|gpqa:diamond|0_2025-08-21T21-02-28.834434.parquet +3 -0
  23. eval_results_ood/global_step_70/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_70_actor_huggingface/2025-08-21T22-07-45.235827/details_extended|ifeval|0_2025-08-21T22-07-45.235827.parquet +3 -0
  24. eval_results_ood/global_step_70/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_70_actor_huggingface/2025-08-21T22-07-45.235827/details_lighteval|gpqa:diamond|0_2025-08-21T22-07-45.235827.parquet +3 -0
  25. eval_results_ood/global_step_80/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_80_actor_huggingface/2025-08-21T22-09-19.047501/details_extended|ifeval|0_2025-08-21T22-09-19.047501.parquet +3 -0
  26. eval_results_ood/global_step_80/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_80_actor_huggingface/2025-08-21T22-09-19.047501/details_lighteval|gpqa:diamond|0_2025-08-21T22-09-19.047501.parquet +3 -0
  27. global_step_40/actor/huggingface/tokenizer.json +3 -0
  28. global_step_50/actor/huggingface/tokenizer.json +3 -0
  29. global_step_60/actor/huggingface/tokenizer.json +3 -0
  30. global_step_60/data.pt +3 -0
  31. global_step_70/actor/huggingface/tokenizer.json +3 -0
  32. global_step_70/data.pt +3 -0
  33. global_step_80/actor/huggingface/tokenizer.json +3 -0
  34. global_step_80/data.pt +3 -0
  35. global_step_90/actor/huggingface/tokenizer.json +3 -0
  36. global_step_90/data.pt +3 -0
.gitattributes CHANGED
@@ -76,3 +76,8 @@ eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs mer
76
  eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
77
  eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
78
  eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
76
  eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
77
  eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
78
  eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
79
+ eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
80
+ eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-21T21-13-14.882470/details_extended|ifeval|0_2025-08-21T21-13-14.882470.csv filter=lfs diff=lfs merge=lfs -text
81
+ eval_results_ood/global_step_40/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-21T21-05-41.573261/details_extended|ifeval|0_2025-08-21T21-05-41.573261.csv filter=lfs diff=lfs merge=lfs -text
82
+ eval_results_ood/global_step_30/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-21T21-12-33.887360/details_extended|ifeval|0_2025-08-21T21-12-33.887360.csv filter=lfs diff=lfs merge=lfs -text
83
+ eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-21T21-29-36.921510/details_extended|ifeval|0_2025-08-21T21-29-36.921510.csv filter=lfs diff=lfs merge=lfs -text
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: 25685aeb0a33b39fb3916302421d288a747fe545d9707da468c0720e0360d881
  • Pointer size: 131 Bytes
  • Size of remote file: 220 kB
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-21T21-29-36.921510/details_extended|ifeval|0_2025-08-21T21-29-36.921510.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03c483e125698454a9a391e620eb9eba146b61e519eab8cc4dc168ecdf6a857a
3
+ size 21177731
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-21T21-29-36.921510/details_extended|ifeval|0_2025-08-21T21-29-36.921510.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09aa4cacf1f1182770a87a0e141ba6c34346d8162f9c2eb72c13899cdf8d52c0
3
+ size 2974780
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_0_actor_huggingface/2025-08-21T21-29-36.921510/details_lighteval|gpqa:diamond|0_2025-08-21T21-29-36.921510.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26398883f98c3686e5a8a7bf3d440028c67abb9fb17b5a19d82ac36ea27d1945
3
+ size 570699
eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-21T21-23-51.437133/details_extended|ifeval|0_2025-08-21T21-23-51.437133.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c8d0935d990d685202b1e0f8ea65726f4eb4e131f83293f0f44f2f74f99c62b
3
+ size 2633515
eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_10_actor_huggingface/2025-08-21T21-23-51.437133/details_lighteval|gpqa:diamond|0_2025-08-21T21-23-51.437133.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a054ed9632828b0c800c4ee2df15be5d8df9ced603fe46a961613b48cda46f7e
3
+ size 501352
eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-21T22-04-05.839743/details_extended|ifeval|0_2025-08-21T22-04-05.839743.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f10f74e918f12db81b2bdd07dfa779ca03dd5c1382c56db580973f0d4587fd
3
+ size 1674677
eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_100_actor_huggingface/2025-08-21T22-04-05.839743/details_lighteval|gpqa:diamond|0_2025-08-21T22-04-05.839743.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f74ede7f799de14ff30cb8b3d839f95fcb447e3c0b0f1bbe736e21ef8bbddb4
3
+ size 530549
eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-21T21-13-14.882470/details_extended|ifeval|0_2025-08-21T21-13-14.882470.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15069292637859f420a2de75a90b527193487a21d42e5b4ccf977174045b83e4
3
+ size 14367367
eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-21T21-13-14.882470/details_extended|ifeval|0_2025-08-21T21-13-14.882470.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:985393e910b5c94576ad983cac56240fd86e20ab6e0e9c2c91d841abc2b4957d
3
+ size 2422975
eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_20_actor_huggingface/2025-08-21T21-13-14.882470/details_lighteval|gpqa:diamond|0_2025-08-21T21-13-14.882470.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b528e157d42f4f0c7fc24e268339cf1863f80cc382c0af9640ae0bf06ad20d
3
+ size 527451
eval_results_ood/global_step_30/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-21T21-12-33.887360/details_extended|ifeval|0_2025-08-21T21-12-33.887360.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b944dd332f178b56dc8e04514c0bfb08450ca48d59fe492b87ae8595baa7e559
3
+ size 14435151
eval_results_ood/global_step_30/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-21T21-12-33.887360/details_extended|ifeval|0_2025-08-21T21-12-33.887360.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7fe3ba9c0151e6decce4e56617b5e332fc85e3a93638873a66e8544d576f0cd
3
+ size 2307178
eval_results_ood/global_step_30/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_30_actor_huggingface/2025-08-21T21-12-33.887360/details_lighteval|gpqa:diamond|0_2025-08-21T21-12-33.887360.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7e3b4242c6cf72eb1d3545ccb43b759650356ca4d7c6f9a8e60fb5b8e49dd6
3
+ size 521438
eval_results_ood/global_step_40/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-21T21-05-41.573261/details_extended|ifeval|0_2025-08-21T21-05-41.573261.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc236ca64bb6158864e0f723564b6739b258fbb80995328af60845cbab1533e
3
+ size 12905963
eval_results_ood/global_step_40/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-21T21-05-41.573261/details_extended|ifeval|0_2025-08-21T21-05-41.573261.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a26a92fde69e7133e9fdbc8a18d1d155d0003fdf2a1a8fd63e22f94e46bc7b
3
+ size 1873896
eval_results_ood/global_step_40/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_40_actor_huggingface/2025-08-21T21-05-41.573261/details_lighteval|gpqa:diamond|0_2025-08-21T21-05-41.573261.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7edb5c611f5397039f0d95229ec85d87d604217b61e7a70d3a82986439cec4e4
3
+ size 510379
eval_results_ood/global_step_50/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_50_actor_huggingface/2025-08-21T21-07-16.342773/details_extended|ifeval|0_2025-08-21T21-07-16.342773.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80c184031e314b57245972a08112910b1deffa84680353ae4f0117079e48b8d6
3
+ size 2188770
eval_results_ood/global_step_50/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_50_actor_huggingface/2025-08-21T21-07-16.342773/details_lighteval|gpqa:diamond|0_2025-08-21T21-07-16.342773.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0e6cae26b10a70e00192d86192cf6e2d63290b55b54a202b6dc334828676ee
3
+ size 483095
eval_results_ood/global_step_60/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_60_actor_huggingface/2025-08-21T21-02-28.834434/details_extended|ifeval|0_2025-08-21T21-02-28.834434.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a63e5ebd4c1b622afc5a5b4608d273043d9ee889bfbeba9ca201d679cc7151
3
+ size 1820524
eval_results_ood/global_step_60/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_60_actor_huggingface/2025-08-21T21-02-28.834434/details_lighteval|gpqa:diamond|0_2025-08-21T21-02-28.834434.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6edd1c75fc0370c7ba85385582838df701de257fca081a69544dbe7acd75f2e
3
+ size 506049
eval_results_ood/global_step_70/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_70_actor_huggingface/2025-08-21T22-07-45.235827/details_extended|ifeval|0_2025-08-21T22-07-45.235827.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eff6ff333731df25f53dd222bdc571406129f25bb057a0f8370f889b856e7df
3
+ size 1833126
eval_results_ood/global_step_70/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_70_actor_huggingface/2025-08-21T22-07-45.235827/details_lighteval|gpqa:diamond|0_2025-08-21T22-07-45.235827.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc08a150d1c6dcd1227ea2a5259cf7c448417956c49f30517edfb2292e91ca83
3
+ size 518906
eval_results_ood/global_step_80/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_80_actor_huggingface/2025-08-21T22-09-19.047501/details_extended|ifeval|0_2025-08-21T22-09-19.047501.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09409244ab01782d6a324b6b7ba6ddc2c74885017f612b353da34353a7333ba2
3
+ size 1805502
eval_results_ood/global_step_80/details/_home_work_minzijun_rl_output_checkpoints_ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_global_step_80_actor_huggingface/2025-08-21T22-09-19.047501/details_lighteval|gpqa:diamond|0_2025-08-21T22-09-19.047501.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe2cdab407de930d504353f31ec7028562081759b8a4caa86bce00b5f3c478f
3
+ size 516656
global_step_40/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_50/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_60/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_60/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d3bd5d8feff3608d95cb8be3cbfa7854b6f415805bfeb6dd9fff04532de97cc
3
+ size 1947
global_step_70/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_70/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05becedff2a436a3fe52cc49654d4cb45b1afa8004a0bc2b5d8ad3bea4eacd7f
3
+ size 1947
global_step_80/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_80/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80af52da7482cb06da7a95d8b961db7948c0f99266e5b1351ea69261a8b4e3dd
3
+ size 1947
global_step_90/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_90/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286885cc5233ead32cb72b4bea6d94110c16f3a596f9378a390939879cbc2ae5
3
+ size 1947