johndoe123345 commited on
Commit
45bb50b
·
verified ·
1 Parent(s): 5dcd207

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. eval_results_0622/eval_results.csv +25 -0
  3. eval_results_0622/global_step_10/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  4. eval_results_0622/global_step_10/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  5. eval_results_0622/global_step_10/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  6. eval_results_0622/global_step_10/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  7. eval_results_0622/global_step_10/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  8. eval_results_0622/global_step_10/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  9. eval_results_0622/global_step_10/math500/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  10. eval_results_0622/global_step_10/math500/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  11. eval_results_0622/global_step_10/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  12. eval_results_0622/global_step_10/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +24 -0
  13. eval_results_0622/global_step_10/olympiadbench/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  14. eval_results_0622/global_step_10/olympiadbench/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  15. eval_results_0622/global_step_60/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  16. eval_results_0622/global_step_60/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  17. eval_results_0622/global_step_60/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  18. eval_results_0622/global_step_60/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  19. eval_results_0622/global_step_60/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  20. eval_results_0622/global_step_60/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  21. eval_results_0622/global_step_60/math500/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  22. eval_results_0622/global_step_60/math500/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  23. eval_results_0622/global_step_60/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  24. eval_results_0622/global_step_60/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +24 -0
  25. eval_results_0622/global_step_60/olympiadbench/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  26. eval_results_0622/global_step_60/olympiadbench/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  27. eval_results_0622/global_step_85/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  28. eval_results_0622/global_step_85/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  29. eval_results_0622/global_step_85/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  30. eval_results_0622/global_step_85/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  31. eval_results_0622/global_step_85/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +13 -0
  32. eval_results_0622/global_step_85/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  33. eval_results_0622/global_step_85/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +24 -0
  34. global_step_100/actor/huggingface/added_tokens.json +24 -0
  35. global_step_100/actor/huggingface/config.json +30 -0
  36. global_step_100/actor/huggingface/generation_config.json +6 -0
  37. global_step_100/actor/huggingface/merges.txt +0 -0
  38. global_step_100/actor/huggingface/model.safetensors.index.json +346 -0
  39. global_step_100/actor/huggingface/special_tokens_map.json +31 -0
  40. global_step_100/actor/huggingface/tokenizer_config.json +208 -0
  41. global_step_100/actor/huggingface/vocab.json +0 -0
  42. global_step_115/actor/huggingface/model-00001-of-00007.safetensors +3 -0
  43. global_step_115/actor/huggingface/model-00002-of-00007.safetensors +3 -0
  44. global_step_115/actor/huggingface/model-00003-of-00007.safetensors +3 -0
  45. global_step_115/actor/huggingface/model-00004-of-00007.safetensors +3 -0
  46. global_step_115/actor/huggingface/model-00005-of-00007.safetensors +3 -0
  47. global_step_115/actor/huggingface/model-00006-of-00007.safetensors +3 -0
  48. global_step_115/actor/huggingface/model-00007-of-00007.safetensors +3 -0
  49. global_step_115/actor/model_world_size_8_rank_6.pt +3 -0
  50. global_step_115/actor/optim_world_size_8_rank_0.pt +3 -0
.gitattributes CHANGED
@@ -44,3 +44,8 @@ global_step_45/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -t
44
  global_step_105/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
  global_step_5/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
  global_step_115/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
44
  global_step_105/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
  global_step_5/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
  global_step_115/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
47
+ global_step_75/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
+ global_step_55/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
49
+ global_step_15/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
+ global_step_30/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
51
+ global_step_95/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
eval_results_0622/eval_results.csv ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model,minerva_math_acc,minerva_math_pass_acc,minerva_math_tokens,minerva_math_keywords,minerva_math_correct_tokens,minerva_math_wrong_tokens,minerva_math_clip_ratio,minerva_math_stop_tokens,minerva_math_stop_ratio,minerva_math_box_ratio,minerva_math_repeat_ratio,amc23_acc,amc23_pass_acc,amc23_tokens,amc23_keywords,amc23_correct_tokens,amc23_wrong_tokens,amc23_clip_ratio,amc23_stop_tokens,amc23_stop_ratio,amc23_box_ratio,amc23_repeat_ratio,aime24_acc,aime24_pass_acc,aime24_tokens,aime24_keywords,aime24_correct_tokens,aime24_wrong_tokens,aime24_clip_ratio,aime24_stop_tokens,aime24_stop_ratio,aime24_box_ratio,aime24_repeat_ratio,gsm8k_acc,gsm8k_pass_acc,gsm8k_tokens,gsm8k_keywords,gsm8k_correct_tokens,gsm8k_wrong_tokens,gsm8k_clip_ratio,gsm8k_stop_tokens,gsm8k_stop_ratio,gsm8k_box_ratio,gsm8k_repeat_ratio,math500_acc,math500_pass_acc,math500_tokens,math500_keywords,math500_correct_tokens,math500_wrong_tokens,math500_clip_ratio,math500_stop_tokens,math500_stop_ratio,math500_box_ratio,math500_repeat_ratio,olympiadbench_acc,olympiadbench_pass_acc,olympiadbench_tokens,olympiadbench_keywords,olympiadbench_correct_tokens,olympiadbench_wrong_tokens,olympiadbench_clip_ratio,olympiadbench_stop_tokens,olympiadbench_stop_ratio,olympiadbench_box_ratio,olympiadbench_repeat_ratio,avg_acc,avg_pass_acc,avg_tokens,avg_keywords,avg_correct_tokens,avg_wrong_tokens,avg_clip_ratio,avg_stop_tokens,avg_stop_ratio,avg_box_ratio,avg_repeat_ratio
2
+ eval_results-global_step_0,19.9,19.9,634.0625,0.15441176470588236,507.7037037037037,665.3623853211009,0.0,634.0625,1.0,0.7610294117647058,0.44485294117647056,35.0,35.0,867.125,0.25,1288.857142857143,640.0384615384615,0.0,867.125,1.0,0.85,0.625,3.3,3.3,1002.0,0.13333333333333333,713.0,1011.9655172413793,0.0,1002.0,1.0,0.7333333333333333,0.7666666666666667,74.8,74.8,341.27445034116755,0.0356330553449583,286.6764705882353,502.93693693693695,0.0,341.27445034116755,1.0,0.8377558756633814,0.2623199393479909,52.0,52.0,631.896,0.166,482.8076923076923,793.4083333333333,0.0,631.896,1.0,0.828,0.514,17.8,17.8,822.5377777777778,0.23851851851851852,655.625,858.627027027027,0.0,822.5377777777778,1.0,0.7822222222222223,0.6251851851851852,33.800000000000004,33.800000000000004,716.4826213531575,0.16298277865044875,655.7783349094624,745.3897768997064,0.0,716.4826213531575,1.0,0.798723473830607,0.5396707887293856
3
+ eval_results-global_step_5,31.6,31.6,655.7022058823529,0.15073529411764705,496.3720930232558,729.3709677419355,0.0,655.7022058823529,1.0,0.9926470588235294,0.5330882352941176,37.5,37.5,788.35,0.35,620.4,889.12,0.0,788.35,1.0,1.0,0.7,10.0,10.0,1205.8,0.5333333333333333,816.3333333333334,1249.0740740740741,0.0,1205.8,1.0,0.9666666666666667,0.9,88.4,88.4,303.60121304018196,0.03184230477634572,291.0317324185249,399.3921568627451,0.0,303.60121304018196,1.0,0.9992418498862775,0.17513267626990145,71.8,71.8,614.86,0.108,505.3175487465181,893.7659574468086,0.0,614.86,1.0,0.994,0.514,31.4,31.4,930.4696296296296,0.28888888888888886,655.9811320754717,1056.1533477321814,0.0,930.4696296296296,1.0,0.9777777777777777,0.6696296296296296,45.11666666666667,45.11666666666667,749.797174758694,0.24379997018603583,564.2393065995174,869.479417309624,0.0,749.797174758694,1.0,0.9883888921923752,0.5819750901989414
4
+ eval_results-global_step_10,34.2,34.2,683.1066176470588,0.16176470588235295,556.7956989247311,748.731843575419,0.0,683.1066176470588,1.0,0.9926470588235294,0.4264705882352941,47.5,47.5,1004.575,0.175,692.4736842105264,1286.952380952381,0.0,1004.575,1.0,0.975,0.6,13.3,13.3,1129.4666666666667,0.4666666666666667,876.5,1168.3846153846155,0.0,1129.4666666666667,1.0,0.9666666666666667,0.7666666666666667,89.5,89.5,320.00833965125094,0.024260803639120546,311.93220338983053,388.568345323741,0.0,320.00833965125094,1.0,0.9992418498862775,0.1599696739954511,72.8,72.8,648.294,0.138,540.2692307692307,937.4191176470588,0.0,648.294,1.0,0.998,0.49,33.6,33.6,932.2133333333334,0.2562962962962963,745.9559471365639,1026.5892857142858,0.0,932.2133333333334,1.0,0.9837037037037037,0.6874074074074074,48.48333333333334,48.48333333333334,786.277326216385,0.20366474541407276,620.6544607384805,926.1075980995835,0.0,786.277326216385,1.0,0.9858765465133629,0.5217523893841365
5
+ eval_results-global_step_15,37.5,37.5,678.7426470588235,0.09558823529411764,572.8627450980392,742.2705882352941,0.0,678.7426470588235,1.0,1.0,0.4522058823529412,42.5,42.5,1082.625,0.275,745.5882352941177,1331.7391304347825,0.0,1082.625,1.0,0.975,0.65,10.0,10.0,1149.2666666666667,0.4666666666666667,879.0,1179.2962962962963,0.0,1149.2666666666667,1.0,1.0,0.8,90.2,90.2,328.26762699014404,0.02047005307050796,317.7336134453781,425.4418604651163,0.0,328.26762699014404,1.0,0.9992418498862775,0.155420773313116,73.6,73.6,694.716,0.128,559.3695652173913,1072.0454545454545,0.0,694.716,1.0,0.99,0.48,34.5,34.5,942.0681481481481,0.2696296296296296,745.7982832618026,1045.5316742081448,0.0,942.0681481481481,1.0,0.9851851851851852,0.6711111111111111,48.04999999999999,48.04999999999999,812.6143481439636,0.20922576411015367,636.7254070527882,966.0541673641816,0.0,812.6143481439636,1.0,0.9915711725119104,0.5347896277961948
6
+ eval_results-global_step_20,36.8,36.8,699.4154411764706,0.1213235294117647,581.24,768.1220930232558,0.0,699.4154411764706,1.0,0.9963235294117647,0.47794117647058826,45.0,45.0,946.625,0.125,792.6111111111111,1072.6363636363637,0.0,946.625,1.0,1.0,0.725,13.3,13.3,1163.3333333333333,0.4,1187.5,1159.6153846153845,0.0,1163.3333333333333,1.0,0.9666666666666667,0.7666666666666667,90.4,90.4,328.5253980288097,0.01819560272934041,321.2975691533948,396.96031746031747,0.0,328.5253980288097,1.0,0.9992418498862775,0.14480667172100076,75.2,75.2,664.806,0.126,554.5398936170212,999.1612903225806,0.0,664.806,1.0,0.992,0.472,37.6,37.6,946.085925925926,0.25925925925925924,747.7716535433071,1065.7339667458432,0.0,946.085925925926,1.0,0.9807407407407407,0.6444444444444445,49.71666666666667,49.71666666666667,791.4651830774234,0.17496306523339408,697.4933712374724,910.3715693006243,0.0,791.4651830774234,1.0,0.9891621311175749,0.5384764932171167
7
+ eval_results-global_step_25,37.9,37.9,699.7977941176471,0.125,595.0194174757281,763.6568047337278,0.0,699.7977941176471,1.0,0.9926470588235294,0.4742647058823529,47.5,47.5,1156.9,0.225,789.3157894736842,1489.4761904761904,0.0,1156.9,1.0,0.975,0.675,16.7,16.7,1119.7666666666667,0.4,1035.4,1136.64,0.0,1119.7666666666667,1.0,0.9666666666666667,0.7333333333333333,92.0,92.0,327.84988627748294,0.014404852160727824,321.6153212520593,399.93333333333334,0.0,327.84988627748294,1.0,0.9992418498862775,0.14783927217589082,74.2,74.2,655.184,0.2,541.7385444743935,981.4496124031008,0.0,655.184,1.0,0.996,0.46,35.1,35.1,1000.8251851851852,0.2637037037037037,767.506329113924,1127.0730593607307,0.0,1000.8251851851852,1.0,0.9748148148148148,0.6637037037037037,50.56666666666667,50.56666666666667,826.7205887078304,0.2046847593107386,675.0992336316316,983.0381667178472,0.0,826.7205887078304,1.0,0.9840617316985482,0.5256901691825469
8
+ eval_results-global_step_30,40.4,40.4,683.4227941176471,0.11764705882352941,551.9727272727273,772.679012345679,0.0,683.4227941176471,1.0,0.9963235294117647,0.4375,47.5,47.5,952.4,0.2,655.4736842105264,1221.047619047619,0.0,952.4,1.0,1.0,0.6,6.7,6.7,1360.6333333333334,0.6333333333333333,914.0,1392.5357142857142,0.0,1360.6333333333334,1.0,0.9,0.8333333333333334,91.6,91.6,327.2941622441243,0.03335860500379075,318.2756622516556,425.44144144144144,0.0,327.2941622441243,1.0,0.9984836997725549,0.1463229719484458,73.8,73.8,642.038,0.14,539.7886178861788,930.0534351145038,0.0,642.038,1.0,0.996,0.43,37.3,37.3,960.6666666666666,0.3333333333333333,722.2420634920635,1102.7068557919622,0.0,960.6666666666666,1.0,0.9807407407407407,0.6414814814814814,49.550000000000004,49.550000000000004,821.0758260602953,0.24294538841566446,616.9587925188586,974.0773463378199,0.0,821.0758260602953,1.0,0.9785913283208435,0.5147729644605435
9
+ eval_results-global_step_35,39.0,39.0,685.2573529411765,0.18382352941176472,569.2075471698113,759.3614457831326,0.0,685.2573529411765,1.0,0.9926470588235294,0.41911764705882354,55.0,55.0,939.525,0.175,749.1363636363636,1172.2222222222222,0.0,939.525,1.0,1.0,0.625,13.3,13.3,1198.2,0.4666666666666667,1001.25,1228.5,0.0,1198.2,1.0,0.9666666666666667,0.6,91.2,91.2,331.1614859742229,0.026535253980288095,316.8279301745636,479.8103448275862,0.0,331.1614859742229,1.0,0.9977255496588324,0.15390447308567096,75.0,75.0,609.056,0.184,511.58666666666664,901.464,0.0,609.056,1.0,0.998,0.428,37.2,37.2,981.0014814814815,0.30518518518518517,719.792828685259,1135.632075471698,0.0,981.0014814814815,1.0,0.9644444444444444,0.6444444444444445,51.78333333333333,51.78333333333333,790.7002200661468,0.2235351058739841,644.633556055444,946.1650147174399,0.0,790.7002200661468,1.0,0.9865806199322456,0.47841109409815646
10
+ eval_results-global_step_40,43.0,43.0,694.1286764705883,0.1801470588235294,584.3675213675214,776.9806451612903,0.0,694.1286764705883,1.0,0.9963235294117647,0.4852941176470588,52.5,52.5,1009.225,0.225,786.8571428571429,1255.0,0.0,1009.225,1.0,0.975,0.625,10.0,10.0,1235.1,0.6,930.0,1269.0,0.0,1235.1,1.0,0.9666666666666667,0.8,90.8,90.8,327.78241091736163,0.03335860500379075,320.07178631051755,404.12396694214874,0.0,327.78241091736163,1.0,0.9992418498862775,0.15466262319939347,74.6,74.6,656.17,0.172,535.8096514745308,1009.6692913385826,0.0,656.17,1.0,0.99,0.426,36.1,36.1,964.2696296296297,0.3540740740740741,705.0,1111.048723897912,0.0,964.2696296296297,1.0,0.9674074074074074,0.6548148148148148,51.166666666666664,51.166666666666664,814.4459528362632,0.2607632896502324,643.684350334952,970.9704378899891,0.0,814.4459528362632,1.0,0.9824399088953526,0.5242952592768778
11
+ eval_results-global_step_45,41.2,41.2,703.2389705882352,0.1875,560.3660714285714,803.25,0.0,703.2389705882352,1.0,0.9963235294117647,0.4632352941176471,52.5,52.5,1061.975,0.25,704.1904761904761,1457.421052631579,0.0,1061.975,1.0,0.975,0.625,16.7,16.7,1529.9,0.43333333333333335,1025.6,1630.76,0.0,1529.9,1.0,0.9,0.5666666666666667,91.2,91.2,346.3912054586808,0.030326004548900682,333.7098919368246,477.9051724137931,0.0,346.3912054586808,1.0,0.9992418498862775,0.16224412433661864,76.0,76.0,665.828,0.26,547.2842105263157,1041.2166666666667,0.0,665.828,1.0,0.992,0.456,38.1,38.1,1013.0059259259259,0.4266666666666667,715.4241245136187,1195.968899521531,0.0,1013.0059259259259,1.0,0.9733333333333334,0.6370370370370371,52.616666666666674,52.616666666666674,886.7231836621403,0.2646376674248168,647.7624624326344,1101.086965205595,0.0,886.7231836621403,1.0,0.9726497854385626,0.48503052035966165
12
+ eval_results-global_step_50,41.9,41.9,747.9852941176471,0.17647058823529413,628.5526315789474,834.1582278481013,0.0,747.9852941176471,1.0,0.9926470588235294,0.5073529411764706,52.5,52.5,942.05,0.375,751.3333333333334,1152.842105263158,0.0,942.05,1.0,1.0,0.625,16.7,16.7,1360.2333333333333,0.5333333333333333,935.6,1445.16,0.0,1360.2333333333333,1.0,0.9333333333333333,0.7,90.7,90.7,358.65579984836995,0.04852160727824109,341.5242474916388,525.2357723577236,0.0,358.65579984836995,1.0,0.9984836997725549,0.17513267626990145,74.8,74.8,654.414,0.202,536.9812834224599,1002.984126984127,0.0,654.414,1.0,0.998,0.466,37.6,37.6,1034.0,0.4074074074074074,732.1732283464567,1216.0997624703089,0.0,1034.0,1.0,0.9688888888888889,0.6444444444444445,52.366666666666674,52.366666666666674,849.5564045498918,0.2904554893757127,654.3607873621394,1029.4133324872364,0.0,849.5564045498918,1.0,0.9818921634697176,0.5196550103151361
13
+ eval_results-global_step_55,39.7,39.7,740.0588235294117,0.20955882352941177,633.5277777777778,810.2134146341464,0.0,740.0588235294117,1.0,1.0,0.48161764705882354,60.0,60.0,1049.425,0.3,938.8333333333334,1215.3125,0.0,1049.425,1.0,1.0,0.7,16.7,16.7,1838.8333333333333,0.9333333333333333,961.2,2014.36,0.0,1838.8333333333333,1.0,0.9,0.7666666666666667,90.5,90.5,365.8385140257771,0.03866565579984837,339.5427135678392,617.016,0.0,365.8385140257771,1.0,0.9984836997725549,0.14556482183472327,75.8,75.8,708.026,0.188,560.1688654353562,1171.1487603305786,0.0,708.026,1.0,0.988,0.464,39.6,39.6,1052.1925925925925,0.3718518518518519,816.6966292134831,1206.3039215686274,0.0,1052.1925925925925,1.0,0.9688888888888889,0.6548148148148148,53.71666666666667,53.71666666666667,959.0623772468524,0.34023494408574084,708.328219887965,1172.3924327555587,0.0,959.0623772468524,1.0,0.975895431443574,0.5354439917291713
14
+ eval_results-global_step_60,39.3,39.3,714.2647058823529,0.20220588235294118,613.3831775700935,779.6848484848484,0.0,714.2647058823529,1.0,0.9926470588235294,0.4742647058823529,50.0,50.0,917.825,0.4,715.05,1120.6,0.0,917.825,1.0,1.0,0.65,20.0,20.0,1479.0666666666666,0.8333333333333334,1132.5,1565.7083333333333,0.0,1479.0666666666666,1.0,0.9333333333333333,0.8666666666666667,90.4,90.4,387.07202426080363,0.04094010614101592,356.78942953020135,671.2992125984252,0.0,387.07202426080363,1.0,0.9992418498862775,0.177407126611069,76.0,76.0,709.612,0.2,557.8078947368421,1190.325,0.0,709.612,1.0,0.99,0.464,40.1,40.1,1060.3437037037038,0.41333333333333333,783.4391143911439,1246.0891089108911,0.0,1060.3437037037038,1.0,0.9688888888888889,0.674074074074074,52.63333333333333,52.63333333333333,878.0306834189211,0.3483021091934373,693.1616027047135,1095.617750554583,0.0,878.0306834189211,1.0,0.9806851884886715,0.5510687622056938
15
+ eval_results-global_step_65,39.0,39.0,723.2463235294117,0.21691176470588236,583.4905660377359,812.4879518072289,0.0,723.2463235294117,1.0,1.0,0.4632352941176471,52.5,52.5,985.125,0.425,849.0,1135.578947368421,0.0,985.125,1.0,1.0,0.7,16.7,16.7,1149.4333333333334,0.9,951.4,1189.04,0.0,1149.4333333333334,1.0,0.9666666666666667,0.8333333333333334,89.8,89.8,407.357846853677,0.05155420773313116,353.884388185654,880.2388059701492,0.0,407.357846853677,1.0,0.9984836997725549,0.18802122820318423,77.8,77.8,669.208,0.212,554.8560411311054,1069.954954954955,0.0,669.208,1.0,0.992,0.476,39.3,39.3,1054.8874074074074,0.4014814814814815,790.9018867924528,1225.5121951219512,0.0,1054.8874074074074,1.0,0.9718518518518519,0.6355555555555555,52.51666666666667,52.51666666666667,831.542985187305,0.3678245756534158,680.5888136911581,1052.135475870451,0.0,831.542985187305,1.0,0.9881670363818457,0.5493575685349533
16
+ eval_results-global_step_70,39.7,39.7,793.9926470588235,0.20220588235294118,579.0648148148148,935.530487804878,0.0,793.9926470588235,1.0,0.9926470588235294,0.48161764705882354,50.0,50.0,950.125,0.225,714.85,1185.4,0.0,950.125,1.0,1.0,0.7,13.3,13.3,1461.9,1.0,1171.75,1506.5384615384614,0.0,1461.9,1.0,0.9333333333333333,0.6,88.5,88.5,447.41167551175135,0.05458680818802123,366.4335904027421,1069.1315789473683,0.0,447.41167551175135,1.0,0.9984836997725549,0.19636087945413191,75.0,75.0,705.668,0.25,561.2346666666666,1138.968,0.0,705.668,1.0,0.994,0.466,40.0,40.0,1085.362962962963,0.40444444444444444,820.0481481481481,1262.2395061728396,0.0,1085.362962962963,1.0,0.9748148148148148,0.6622222222222223,51.083333333333336,51.083333333333336,907.410047588923,0.3560395224975678,702.2302033387286,1182.9680057439246,0.0,907.410047588923,1.0,0.9822131511240387,0.5177001247891962
17
+ eval_results-global_step_75,39.3,39.3,741.7022058823529,0.1875,579.1588785046729,847.1090909090909,0.0,741.7022058823529,1.0,0.9926470588235294,0.5073529411764706,55.0,55.0,1440.75,0.275,722.9090909090909,2318.1111111111113,0.0,1440.75,1.0,0.925,0.625,16.7,16.7,1550.2666666666667,1.1333333333333333,1031.8,1653.96,0.0,1550.2666666666667,1.0,0.9333333333333333,0.7666666666666667,86.6,86.6,474.4495830174375,0.06520090978013647,375.492119089317,1112.9209039548023,0.0,474.4495830174375,1.0,0.9992418498862775,0.19863532979529946,75.6,75.6,702.708,0.256,594.1216931216932,1039.1475409836066,0.0,702.708,1.0,0.99,0.46,40.0,40.0,1080.6414814814814,0.6874074074074074,765.3296296296296,1290.8493827160494,0.0,1080.6414814814814,1.0,0.9688888888888889,0.674074074074074,52.199999999999996,52.199999999999996,998.4196561746563,0.4340736084201462,678.1352352090672,1377.01633827911,0.0,998.4196561746563,1.0,0.9681851884886715,0.5386215019520851
18
+ eval_results-global_step_80,40.4,40.4,750.0698529411765,0.22794117647058823,600.2545454545455,851.7962962962963,0.0,750.0698529411765,1.0,0.9963235294117647,0.4227941176470588,47.5,47.5,1132.975,0.325,722.2631578947369,1504.5714285714287,0.0,1132.975,1.0,1.0,0.7,16.7,16.7,1875.9,1.1,1095.8,2031.92,0.0,1875.9,1.0,0.8666666666666667,0.7,88.6,88.6,489.7210007581501,0.0576194086429113,413.6595380667237,1082.4933333333333,0.0,489.7210007581501,1.0,0.9984836997725549,0.21986353297952996,76.2,76.2,702.188,0.318,567.9685039370079,1131.9159663865546,0.002,671.5270541082165,0.998,0.99,0.464,40.9,40.9,1076.6696296296295,0.6281481481481481,771.3586956521739,1287.8621553884711,0.0,1076.6696296296295,1.0,0.9703703703703703,0.6666666666666666,51.71666666666666,51.71666666666666,1004.5872472214927,0.44278478887694134,695.2174068341978,1315.0931966626808,0.0003333333333333333,999.477089572862,0.9996666666666667,0.9703073777035595,0.5288873862155425
19
+ eval_results-global_step_85,40.4,40.4,760.4080882352941,1.1544117647058822,586.8818181818182,878.2345679012345,0.0,760.4080882352941,1.0,0.9963235294117647,0.5257352941176471,57.5,57.5,1000.55,0.25,901.3913043478261,1134.7058823529412,0.0,1000.55,1.0,1.0,0.625,13.3,13.3,1397.0,0.9666666666666667,1086.75,1444.7307692307693,0.0,1397.0,1.0,0.9666666666666667,0.6666666666666666,88.6,88.6,476.5481425322214,0.08188021228203184,414.2457191780822,958.4635761589404,0.001516300227445034,452.9703872437358,0.9984836997725549,0.9992418498862775,0.22517058377558757,75.2,75.2,725.412,0.246,559.0984042553191,1229.717741935484,0.002,694.7975951903808,0.998,0.992,0.502,39.1,39.1,1132.634074074074,0.5244444444444445,804.6363636363636,1343.3187347931873,0.0,1132.634074074074,1.0,0.9674074074074074,0.6696296296296296,52.35,52.35,915.425384140265,0.5372338480165042,725.5006015999015,1164.8618787287594,0.0005860500379075057,906.3933574572475,0.9994139499620925,0.9869399088953527,0.5357003623649219
20
+ eval_results-global_step_90,39.7,39.7,782.4558823529412,0.2536764705882353,652.5833333333334,867.9817073170732,0.0,782.4558823529412,1.0,0.9963235294117647,0.49264705882352944,52.5,52.5,978.2,0.4,788.2380952380952,1188.157894736842,0.0,978.2,1.0,0.975,0.7,13.3,13.3,1396.6333333333334,0.8,1078.25,1445.6153846153845,0.0,1396.6333333333334,1.0,0.9666666666666667,0.8,89.1,89.1,478.39954510993175,0.05989385898407885,377.2672340425532,1303.611111111111,0.0,478.39954510993175,1.0,0.9969673995451099,0.20849128127369218,77.8,77.8,761.876,0.242,603.6041131105399,1316.5405405405406,0.004,700.6726907630522,0.996,0.986,0.486,38.7,38.7,1061.754074074074,0.5037037037037037,793.3141762452108,1230.987922705314,0.0,1061.754074074074,1.0,0.9762962962962963,0.6725925925925926,51.849999999999994,51.849999999999994,909.8864724783801,0.37654567221266966,715.5428253282888,1225.482426837711,0.0006666666666666666,899.6859209388887,0.9993333333333334,0.9828756486533062,0.5599551554483023
21
+ eval_results-global_step_95,38.2,38.2,789.9117647058823,0.2426470588235294,605.1538461538462,904.2857142857143,0.0,789.9117647058823,1.0,0.9963235294117647,0.5,55.0,55.0,1106.6,0.375,878.2272727272727,1385.7222222222222,0.0,1106.6,1.0,0.975,0.775,16.7,16.7,1185.9,0.9666666666666667,1118.6,1199.36,0.0,1185.9,1.0,0.9666666666666667,0.7333333333333333,88.9,88.9,534.4450341167551,0.05913570887035633,462.6962457337884,1106.4829931972788,0.000758150113722517,510.7723823975721,0.9992418498862775,0.9992418498862775,0.2259287338893101,73.6,73.6,792.604,0.744,571.5733695652174,1408.810606060606,0.0,792.604,1.0,0.984,0.5,37.9,37.9,1115.5733333333333,0.482962962962963,806.3984375,1304.472553699284,0.0,1115.5733333333333,1.0,0.9733333333333334,0.6622222222222223,51.71666666666666,51.71666666666666,920.8390220259953,0.4784020662205859,740.4415286133541,1218.1890149108508,0.00012635835228708617,916.893580072798,0.9998736416477129,0.9824275632163405,0.5660807149074776
22
+ eval_results-global_step_100,39.0,39.0,841.4485294117648,0.19117647058823528,680.8490566037735,944.0,0.0,841.4485294117648,1.0,0.9963235294117647,0.5183823529411765,50.0,50.0,1164.55,0.55,745.5,1583.6,0.0,1164.55,1.0,1.0,0.75,20.0,20.0,1889.7,0.9666666666666667,981.8333333333334,2116.6666666666665,0.0,1889.7,1.0,0.8666666666666667,0.8,87.0,87.0,602.1000758150113,0.09552691432903715,478.80296425457715,1424.3197674418604,0.000758150113722517,590.4157814871016,0.9992418498862775,0.9977255496588324,0.25246398786959817,75.6,75.6,762.322,0.308,586.1111111111111,1308.2868852459017,0.0,762.322,1.0,0.992,0.496,40.1,40.1,1179.0592592592593,0.5422222222222223,811.3247232472324,1425.7326732673268,0.0,1179.0592592592593,1.0,0.9703703703703703,0.6755555555555556,51.95000000000001,51.95000000000001,1073.1966440810058,0.4422653789676936,714.0701980916712,1467.1009987702926,0.00012635835228708617,1071.249261693021,0.9998736416477129,0.9705143526846056,0.5820669827277217
23
+ eval_results-global_step_105,40.4,40.4,855.7242647058823,0.21691176470588236,591.1272727272727,1035.388888888889,0.0,855.7242647058823,1.0,0.9889705882352942,0.5220588235294118,57.5,57.5,1227.775,0.9,1150.1739130434783,1332.764705882353,0.0,1227.775,1.0,1.0,0.725,16.7,16.7,1724.2333333333333,0.9,1033.8,1862.32,0.0,1724.2333333333333,1.0,0.9333333333333333,0.7333333333333333,87.8,87.8,680.8218347232752,0.06595905989385899,553.2124352331606,1598.6583850931677,0.0,680.8218347232752,1.0,0.9992418498862775,0.24564063684609552,72.6,72.6,868.526,0.372,630.4573002754821,1499.3211678832117,0.0,868.526,1.0,0.992,0.502,39.3,39.3,1186.2977777777778,0.6518518518518519,814.0490566037736,1426.8975609756098,0.0,1186.2977777777778,1.0,0.9718518518518519,0.6844444444444444,52.38333333333333,52.38333333333333,1090.5630350900449,0.5177871127419322,795.4699963138611,1459.2251181205386,0.0,1090.5630350900449,1.0,0.9808996038844594,0.5687462063588808
24
+ eval_results-global_step_110,36.0,36.0,977.5367647058823,0.3014705882352941,778.0408163265306,1089.896551724138,0.0,977.5367647058823,1.0,0.9889705882352942,0.5147058823529411,55.0,55.0,1829.7,0.525,1231.0,2561.4444444444443,0.0,1829.7,1.0,0.925,0.775,16.7,16.7,1951.5666666666666,0.7666666666666667,2760.0,1789.88,0.0,1951.5666666666666,1.0,0.9666666666666667,0.8333333333333334,85.1,85.1,852.5640636846096,0.16982562547384383,689.9715048975958,1784.1530612244899,0.001516300227445034,817.3659832953682,0.9984836997725549,0.9992418498862775,0.28203184230477635,74.6,74.6,999.538,0.526,748.2815013404826,1737.4803149606298,0.0,999.538,1.0,0.988,0.516,38.7,38.7,1346.0074074074073,0.4874074074074074,1031.11877394636,1544.524154589372,0.0014814814814814814,1324.2299703264096,0.9985185185185185,0.9703703703703703,0.7007407407407408,51.01666666666666,51.01666666666666,1326.1521504107611,0.46272838129720206,1206.402099418495,1751.2297544905123,0.0004996302848210859,1316.6562308323878,0.9995003697151789,0.9730415791931014,0.603635299788632
25
+ eval_results-global_step_115,37.9,37.9,912.125,0.21691176470588236,587.1844660194175,1110.1656804733727,0.0,912.125,1.0,0.9889705882352942,0.5257352941176471,50.0,50.0,1149.125,0.425,945.0,1353.25,0.0,1149.125,1.0,1.0,0.675,16.7,16.7,2277.866666666667,1.1,944.0,2544.64,0.0,2277.866666666667,1.0,0.8666666666666667,0.9333333333333333,88.3,88.3,559.9886277482941,0.1508718726307809,469.8291845493562,1242.0389610389611,0.0,559.9886277482941,1.0,0.9992418498862775,0.2494313874147081,74.8,74.8,822.55,0.242,634.187165775401,1381.6587301587301,0.002,792.1362725450902,0.998,0.988,0.5,38.7,38.7,1189.5748148148148,0.4237037037037037,816.0613026819923,1425.0507246376812,0.0,1189.5748148148148,1.0,0.9792592592592593,0.6874074074074074,51.06666666666666,51.06666666666666,1151.8716848716292,0.4264145568400612,732.7103531710278,1509.467349384791,0.0003333333333333333,1146.802730295811,0.9996666666666667,0.9703563940079163,0.595151237045516
eval_results_0622/global_step_10/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_10/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 30,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 13.3,
7
+ "pass_acc": 13.3,
8
+ "pass@k": {
9
+ "1": 13.3
10
+ },
11
+ "time_use_in_second": 18.95798373222351,
12
+ "time_use_in_minite": "0:18"
13
+ }
eval_results_0622/global_step_10/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_10/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 40,
3
+ "num_scores": 40,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 47.5,
7
+ "pass_acc": 47.5,
8
+ "pass@k": {
9
+ "1": 47.5
10
+ },
11
+ "time_use_in_second": 23.865264892578125,
12
+ "time_use_in_minite": "0:23"
13
+ }
eval_results_0622/global_step_10/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_10/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 1319,
3
+ "num_scores": 1319,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 1,
6
+ "acc": 89.5,
7
+ "pass_acc": 89.5,
8
+ "pass@k": {
9
+ "1": 89.5
10
+ },
11
+ "time_use_in_second": 59.92423129081726,
12
+ "time_use_in_minite": "0:59"
13
+ }
eval_results_0622/global_step_10/math500/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_10/math500/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 500,
3
+ "num_scores": 500,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 1,
6
+ "acc": 72.8,
7
+ "pass_acc": 72.8,
8
+ "pass@k": {
9
+ "1": 72.8
10
+ },
11
+ "time_use_in_second": 64.16467952728271,
12
+ "time_use_in_minite": "1:04"
13
+ }
eval_results_0622/global_step_10/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_10/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 272,
3
+ "num_scores": 272,
4
+ "timeout_samples": 3,
5
+ "empty_samples": 0,
6
+ "acc": 34.2,
7
+ "pass_acc": 34.2,
8
+ "pass@k": {
9
+ "1": 34.2
10
+ },
11
+ "type_acc": {
12
+ "Differential Equations (18.03 Spring 2010)": 58.3,
13
+ "Dynamics and Control (2.003 Spring 2005)": 61.5,
14
+ "Ecology I (1.018J Fall 2009)": 20.0,
15
+ "Information and Entropy (6.050J Spring 2008)": 33.3,
16
+ "Introduction to Astronomy (8.282J Spring 2006)": 18.9,
17
+ "Introduction to Solid State Chemistry (3.091 Fall 2010)": 23.7,
18
+ "Physical Chemistry (5.61 Fall 2017)": 9.1,
19
+ "Principles of Microeconomics (14.01 Fall 2011)": 50.0,
20
+ "Relativity (8.033 Fall 2006)": 36.4
21
+ },
22
+ "time_use_in_second": 27.58268928527832,
23
+ "time_use_in_minite": "0:27"
24
+ }
eval_results_0622/global_step_10/olympiadbench/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_10/olympiadbench/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 675,
3
+ "num_scores": 675,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 33.6,
7
+ "pass_acc": 33.6,
8
+ "pass@k": {
9
+ "1": 33.6
10
+ },
11
+ "time_use_in_second": 104.6881103515625,
12
+ "time_use_in_minite": "1:44"
13
+ }
eval_results_0622/global_step_60/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_60/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 30,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 20.0,
7
+ "pass_acc": 20.0,
8
+ "pass@k": {
9
+ "1": 20.0
10
+ },
11
+ "time_use_in_second": 41.93203568458557,
12
+ "time_use_in_minite": "0:41"
13
+ }
eval_results_0622/global_step_60/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_60/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 40,
3
+ "num_scores": 40,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 50.0,
7
+ "pass_acc": 50.0,
8
+ "pass@k": {
9
+ "1": 50.0
10
+ },
11
+ "time_use_in_second": 13.810226917266846,
12
+ "time_use_in_minite": "0:13"
13
+ }
eval_results_0622/global_step_60/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_60/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 1319,
3
+ "num_scores": 1319,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 2,
6
+ "acc": 90.4,
7
+ "pass_acc": 90.4,
8
+ "pass@k": {
9
+ "1": 90.4
10
+ },
11
+ "time_use_in_second": 105.65820074081421,
12
+ "time_use_in_minite": "1:45"
13
+ }
eval_results_0622/global_step_60/math500/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_60/math500/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 500,
3
+ "num_scores": 500,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 1,
6
+ "acc": 76.0,
7
+ "pass_acc": 76.0,
8
+ "pass@k": {
9
+ "1": 76.0
10
+ },
11
+ "time_use_in_second": 78.9439492225647,
12
+ "time_use_in_minite": "1:18"
13
+ }
eval_results_0622/global_step_60/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_60/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 272,
3
+ "num_scores": 272,
4
+ "timeout_samples": 2,
5
+ "empty_samples": 0,
6
+ "acc": 39.3,
7
+ "pass_acc": 39.3,
8
+ "pass@k": {
9
+ "1": 39.3
10
+ },
11
+ "type_acc": {
12
+ "Differential Equations (18.03 Spring 2010)": 70.8,
13
+ "Dynamics and Control (2.003 Spring 2005)": 53.8,
14
+ "Ecology I (1.018J Fall 2009)": 60.0,
15
+ "Information and Entropy (6.050J Spring 2008)": 33.3,
16
+ "Introduction to Astronomy (8.282J Spring 2006)": 26.4,
17
+ "Introduction to Solid State Chemistry (3.091 Fall 2010)": 28.9,
18
+ "Physical Chemistry (5.61 Fall 2017)": 9.1,
19
+ "Principles of Microeconomics (14.01 Fall 2011)": 50.0,
20
+ "Relativity (8.033 Fall 2006)": 27.3
21
+ },
22
+ "time_use_in_second": 28.255818367004395,
23
+ "time_use_in_minite": "0:28"
24
+ }
eval_results_0622/global_step_60/olympiadbench/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_60/olympiadbench/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 675,
3
+ "num_scores": 675,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 40.1,
7
+ "pass_acc": 40.1,
8
+ "pass@k": {
9
+ "1": 40.1
10
+ },
11
+ "time_use_in_second": 118.97032117843628,
12
+ "time_use_in_minite": "1:58"
13
+ }
eval_results_0622/global_step_85/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_85/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 30,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 13.3,
7
+ "pass_acc": 13.3,
8
+ "pass@k": {
9
+ "1": 13.3
10
+ },
11
+ "time_use_in_second": 31.84497904777527,
12
+ "time_use_in_minite": "0:31"
13
+ }
eval_results_0622/global_step_85/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_85/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 40,
3
+ "num_scores": 40,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 57.5,
7
+ "pass_acc": 57.5,
8
+ "pass@k": {
9
+ "1": 57.5
10
+ },
11
+ "time_use_in_second": 13.160411357879639,
12
+ "time_use_in_minite": "0:13"
13
+ }
eval_results_0622/global_step_85/gsm8k/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 1319,
3
+ "num_scores": 1319,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 1,
6
+ "acc": 88.6,
7
+ "pass_acc": 88.6,
8
+ "pass@k": {
9
+ "1": 88.6
10
+ },
11
+ "time_use_in_second": 151.9373815059662,
12
+ "time_use_in_minite": "2:31"
13
+ }
eval_results_0622/global_step_85/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_0622/global_step_85/minerva_math/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 272,
3
+ "num_scores": 272,
4
+ "timeout_samples": 2,
5
+ "empty_samples": 1,
6
+ "acc": 40.4,
7
+ "pass_acc": 40.4,
8
+ "pass@k": {
9
+ "1": 40.4
10
+ },
11
+ "type_acc": {
12
+ "Differential Equations (18.03 Spring 2010)": 68.8,
13
+ "Dynamics and Control (2.003 Spring 2005)": 53.8,
14
+ "Ecology I (1.018J Fall 2009)": 40.0,
15
+ "Information and Entropy (6.050J Spring 2008)": 33.3,
16
+ "Introduction to Astronomy (8.282J Spring 2006)": 24.5,
17
+ "Introduction to Solid State Chemistry (3.091 Fall 2010)": 29.9,
18
+ "Physical Chemistry (5.61 Fall 2017)": 18.2,
19
+ "Principles of Microeconomics (14.01 Fall 2011)": 66.7,
20
+ "Relativity (8.033 Fall 2006)": 36.4
21
+ },
22
+ "time_use_in_second": 58.61312198638916,
23
+ "time_use_in_minite": "0:58"
24
+ }
global_step_100/actor/huggingface/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
global_step_100/actor/huggingface/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/work/minzijun_rl/models/Qwen2.5-7B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151643,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 131072,
13
+ "max_window_layers": 28,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 28,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 4,
18
+ "pad_token_id": 151643,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.47.1",
26
+ "use_cache": true,
27
+ "use_mrope": false,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 152064
30
+ }
global_step_100/actor/huggingface/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.47.1"
6
+ }
global_step_100/actor/huggingface/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
global_step_100/actor/huggingface/model.safetensors.index.json ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 30462466048
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00007-of-00007.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00007.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
13
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00007.safetensors",
14
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
15
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
16
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00007.safetensors",
17
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
18
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00007.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
25
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00007.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
28
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00007.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
30
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00007.safetensors",
31
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
32
+ "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors",
33
+ "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
34
+ "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
35
+ "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
36
+ "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
37
+ "model.layers.10.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
38
+ "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
39
+ "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
40
+ "model.layers.10.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
41
+ "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
42
+ "model.layers.10.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
43
+ "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
44
+ "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors",
45
+ "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
46
+ "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
47
+ "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
48
+ "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
49
+ "model.layers.11.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
50
+ "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
51
+ "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
52
+ "model.layers.11.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
53
+ "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
54
+ "model.layers.11.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
55
+ "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
56
+ "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors",
57
+ "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
58
+ "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
59
+ "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
60
+ "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
61
+ "model.layers.12.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
62
+ "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
63
+ "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
64
+ "model.layers.12.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
65
+ "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
66
+ "model.layers.12.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
67
+ "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
68
+ "model.layers.13.input_layernorm.weight": "model-00004-of-00007.safetensors",
69
+ "model.layers.13.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
70
+ "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
71
+ "model.layers.13.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
72
+ "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
73
+ "model.layers.13.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
74
+ "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
75
+ "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
76
+ "model.layers.13.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
77
+ "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
78
+ "model.layers.13.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
79
+ "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
80
+ "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors",
81
+ "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
82
+ "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
83
+ "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
84
+ "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
85
+ "model.layers.14.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
86
+ "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
87
+ "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
88
+ "model.layers.14.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
89
+ "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
90
+ "model.layers.14.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
91
+ "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
92
+ "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors",
93
+ "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
94
+ "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
95
+ "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
96
+ "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
97
+ "model.layers.15.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
98
+ "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
99
+ "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
100
+ "model.layers.15.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
101
+ "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
102
+ "model.layers.15.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
103
+ "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
104
+ "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors",
105
+ "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
106
+ "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
107
+ "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
108
+ "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
109
+ "model.layers.16.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
110
+ "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
111
+ "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
112
+ "model.layers.16.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
113
+ "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
114
+ "model.layers.16.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
115
+ "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
116
+ "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors",
117
+ "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
118
+ "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
119
+ "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
120
+ "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
121
+ "model.layers.17.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
122
+ "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
123
+ "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
124
+ "model.layers.17.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
125
+ "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
126
+ "model.layers.17.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
127
+ "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
128
+ "model.layers.18.input_layernorm.weight": "model-00005-of-00007.safetensors",
129
+ "model.layers.18.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
130
+ "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
131
+ "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
132
+ "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
133
+ "model.layers.18.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
134
+ "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
135
+ "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
136
+ "model.layers.18.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
137
+ "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
138
+ "model.layers.18.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
139
+ "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
140
+ "model.layers.19.input_layernorm.weight": "model-00005-of-00007.safetensors",
141
+ "model.layers.19.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
142
+ "model.layers.19.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
143
+ "model.layers.19.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
144
+ "model.layers.19.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
145
+ "model.layers.19.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
146
+ "model.layers.19.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
147
+ "model.layers.19.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
148
+ "model.layers.19.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
149
+ "model.layers.19.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
150
+ "model.layers.19.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
151
+ "model.layers.19.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
152
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors",
153
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
154
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
155
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
156
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
157
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00007.safetensors",
158
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
159
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
160
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00007.safetensors",
161
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
162
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00007.safetensors",
163
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
164
+ "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors",
165
+ "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
166
+ "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
167
+ "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
168
+ "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
169
+ "model.layers.20.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
170
+ "model.layers.20.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
171
+ "model.layers.20.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
172
+ "model.layers.20.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
173
+ "model.layers.20.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
174
+ "model.layers.20.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
175
+ "model.layers.20.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
176
+ "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors",
177
+ "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
178
+ "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
179
+ "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
180
+ "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
181
+ "model.layers.21.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
182
+ "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
183
+ "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
184
+ "model.layers.21.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
185
+ "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
186
+ "model.layers.21.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
187
+ "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
188
+ "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors",
189
+ "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
190
+ "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
191
+ "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
192
+ "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
193
+ "model.layers.22.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
194
+ "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
195
+ "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
196
+ "model.layers.22.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
197
+ "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
198
+ "model.layers.22.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
199
+ "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
200
+ "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors",
201
+ "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
202
+ "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
203
+ "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
204
+ "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
205
+ "model.layers.23.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
206
+ "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
207
+ "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
208
+ "model.layers.23.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
209
+ "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
210
+ "model.layers.23.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
211
+ "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
212
+ "model.layers.24.input_layernorm.weight": "model-00006-of-00007.safetensors",
213
+ "model.layers.24.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
214
+ "model.layers.24.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
215
+ "model.layers.24.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
216
+ "model.layers.24.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
217
+ "model.layers.24.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
218
+ "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
219
+ "model.layers.24.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
220
+ "model.layers.24.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
221
+ "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
222
+ "model.layers.24.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
223
+ "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
224
+ "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors",
225
+ "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
226
+ "model.layers.25.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
227
+ "model.layers.25.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
228
+ "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
229
+ "model.layers.25.self_attn.k_proj.bias": "model-00006-of-00007.safetensors",
230
+ "model.layers.25.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
231
+ "model.layers.25.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
232
+ "model.layers.25.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
233
+ "model.layers.25.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
234
+ "model.layers.25.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
235
+ "model.layers.25.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
236
+ "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors",
237
+ "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
238
+ "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
239
+ "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
240
+ "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
241
+ "model.layers.26.self_attn.k_proj.bias": "model-00006-of-00007.safetensors",
242
+ "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
243
+ "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
244
+ "model.layers.26.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
245
+ "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
246
+ "model.layers.26.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
247
+ "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
248
+ "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors",
249
+ "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
250
+ "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
251
+ "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
252
+ "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
253
+ "model.layers.27.self_attn.k_proj.bias": "model-00006-of-00007.safetensors",
254
+ "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
255
+ "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
256
+ "model.layers.27.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
257
+ "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
258
+ "model.layers.27.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
259
+ "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
260
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors",
261
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
262
+ "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
263
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
264
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
265
+ "model.layers.3.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
266
+ "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
267
+ "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
268
+ "model.layers.3.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
269
+ "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
270
+ "model.layers.3.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
271
+ "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
272
+ "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors",
273
+ "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
274
+ "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
275
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
276
+ "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
277
+ "model.layers.4.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
278
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
279
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
280
+ "model.layers.4.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
281
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
282
+ "model.layers.4.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
283
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
284
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors",
285
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
286
+ "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
287
+ "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
288
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
289
+ "model.layers.5.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
290
+ "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
291
+ "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
292
+ "model.layers.5.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
293
+ "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
294
+ "model.layers.5.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
295
+ "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
296
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors",
297
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
298
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
299
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
300
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
301
+ "model.layers.6.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
302
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
303
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
304
+ "model.layers.6.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
305
+ "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
306
+ "model.layers.6.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
307
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
308
+ "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors",
309
+ "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
310
+ "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
311
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
312
+ "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
313
+ "model.layers.7.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
314
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
315
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
316
+ "model.layers.7.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
317
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
318
+ "model.layers.7.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
319
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
320
+ "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors",
321
+ "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
322
+ "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
323
+ "model.layers.8.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
324
+ "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
325
+ "model.layers.8.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
326
+ "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
327
+ "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
328
+ "model.layers.8.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
329
+ "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
330
+ "model.layers.8.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
331
+ "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
332
+ "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors",
333
+ "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
334
+ "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
335
+ "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
336
+ "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
337
+ "model.layers.9.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
338
+ "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
339
+ "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
340
+ "model.layers.9.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
341
+ "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
342
+ "model.layers.9.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
343
+ "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
344
+ "model.norm.weight": "model-00006-of-00007.safetensors"
345
+ }
346
+ }
global_step_100/actor/huggingface/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
global_step_100/actor/huggingface/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|endoftext|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 131072,
204
+ "pad_token": "<|endoftext|>",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
global_step_100/actor/huggingface/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
global_step_115/actor/huggingface/model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350da97e88101d4c1d9713b48553ecfb580f20fdbb35a6aa9ac4d660a1f7437e
3
+ size 4976687216
global_step_115/actor/huggingface/model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4353b529dcd0452a843011591b8350c38375819d1db2b8ce46dc244c77de06b9
3
+ size 4778622352
global_step_115/actor/huggingface/model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ff7cbec1f54e8596fd09acf7c799da57a0b48d3a353faa49f68fc0bd4e61027
3
+ size 4932743960
global_step_115/actor/huggingface/model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d4d4cdaee4d55de710c04f885496def5404434ff0a3d8a748c750c8dd31f7f
3
+ size 4932743992
global_step_115/actor/huggingface/model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544a01d6ca84776412732c309a3b761986e4f07a64e19f2d6c9bfc1c00e8c569
3
+ size 4998852296
global_step_115/actor/huggingface/model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8412915716e1b78ca088beadb8003d089c33251a66615653138ccc57c31ee3d
3
+ size 3662865184
global_step_115/actor/huggingface/model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:695745a01cfdae9f2641a33e404fc5af850654e86d1bcce39d457e5975a1241c
3
+ size 2179989632
global_step_115/actor/model_world_size_8_rank_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd47634ac278daee5b508a71620c1b88e36b618bb8298d51b568668d3ad02c79
3
+ size 3807999738
global_step_115/actor/optim_world_size_8_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a525a83b50e3b4a3bc50d0d6d1b1d8fc7b30182f9e164d16b8580ce108f89a5
3
+ size 7615643215