kangdawei commited on
Commit
9d331ab
·
verified ·
1 Parent(s): 09194c4

Model save

Browse files
README.md CHANGED
@@ -1,19 +1,17 @@
1
  ---
2
  base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
3
- datasets: knoveleng/open-rs
4
  library_name: transformers
5
  model_name: DAPO-7B
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
- - dapo
10
  - trl
 
11
  licence: license
12
  ---
13
 
14
  # Model Card for DAPO-7B
15
 
16
- This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) on the [knoveleng/open-rs](https://huggingface.co/datasets/knoveleng/open-rs) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
 
1
  ---
2
  base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
 
3
  library_name: transformers
4
  model_name: DAPO-7B
5
  tags:
6
  - generated_from_trainer
 
 
7
  - trl
8
+ - dapo
9
  licence: license
10
  ---
11
 
12
  # Model Card for DAPO-7B
13
 
14
+ This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
adapter/adapter_config.json CHANGED
@@ -29,13 +29,13 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "q_proj",
33
- "gate_proj",
34
  "v_proj",
35
- "k_proj",
36
  "o_proj",
37
  "up_proj",
38
- "down_proj"
 
 
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
 
32
  "v_proj",
33
+ "down_proj",
34
  "o_proj",
35
  "up_proj",
36
+ "q_proj",
37
+ "k_proj",
38
+ "gate_proj"
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
adapter/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f41cef2bb66b9353a9df6cc7f16045d9af89f4627456717e3247d6b15a98a0f
3
  size 323014560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47969ecdc8316c40e6f959f6369d54cb33fa5660e1e4073f9f17e03ac0e208bd
3
  size 323014560
adapter/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92df2953de292a8a4d447867c90e350e8357338da5214d2a17070cb10ce845a7
3
  size 8760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d047cbc55cda44b752f416d36e3a69e56abc0bd3f8f2902b298452f531a4525
3
  size 8760
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.03280465058982372,
4
- "train_runtime": 132752.8895,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 0.036,
7
  "train_steps_per_second": 0.001
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.01698429927288089,
4
+ "train_runtime": 137940.7556,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 0.07,
7
  "train_steps_per_second": 0.001
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.03280465058982372,
4
- "train_runtime": 132752.8895,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 0.036,
7
  "train_steps_per_second": 0.001
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.01698429927288089,
4
+ "train_runtime": 137940.7556,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 0.07,
7
  "train_steps_per_second": 0.001
8
  }
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.11428571428571428,
6
  "eval_steps": 500,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1910,17 +1910,1917 @@
1910
  "step": 100
1911
  },
1912
  {
1913
- "epoch": 0.11428571428571428,
1914
- "step": 100,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1915
  "total_flos": 0.0,
1916
- "train_loss": 0.03280465058982372,
1917
- "train_runtime": 132752.8895,
1918
- "train_samples_per_second": 0.036,
1919
  "train_steps_per_second": 0.001
1920
  }
1921
  ],
1922
  "logging_steps": 1,
1923
- "max_steps": 100,
1924
  "num_input_tokens_seen": 0,
1925
  "num_train_epochs": 1,
1926
  "save_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.22857142857142856,
6
  "eval_steps": 500,
7
+ "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1910
  "step": 100
1911
  },
1912
  {
1913
+ "clip_fraction": 0.0,
1914
+ "completion_length": 2261.625045776367,
1915
+ "dapo/avg_reward_std": 0.2656887276419278,
1916
+ "dapo/filter_reward_index": 0.0,
1917
+ "dapo/kept_prompts_ratio": 0.3563218414783478,
1918
+ "dapo/num_sampling_attempts": 3.625,
1919
+ "dapo/sampling_efficiency": 30.952380952380942,
1920
+ "dapo/total_prompts_processed": 21.75,
1921
+ "dapo/valid_prompts_collected": 6.0,
1922
+ "epoch": 0.11542857142857142,
1923
+ "grad_norm": 0.012256976217031479,
1924
+ "kl": 0.0002308487892150879,
1925
+ "learning_rate": 1e-07,
1926
+ "loss": 0.0255,
1927
+ "reward": 0.6794679276645184,
1928
+ "reward_std": 0.936141237616539,
1929
+ "step": 101
1930
+ },
1931
+ {
1932
+ "clip_fraction": 0.0,
1933
+ "completion_length": 2019.2500381469727,
1934
+ "dapo/avg_reward_std": 0.27603574914316975,
1935
+ "dapo/filter_reward_index": 0.0,
1936
+ "dapo/kept_prompts_ratio": 0.40322581414253483,
1937
+ "dapo/num_sampling_attempts": 3.875,
1938
+ "dapo/sampling_efficiency": 31.14583333333333,
1939
+ "dapo/total_prompts_processed": 23.25,
1940
+ "dapo/valid_prompts_collected": 6.0,
1941
+ "epoch": 0.11657142857142858,
1942
+ "grad_norm": 0.011883130297064781,
1943
+ "kl": 0.00018447637557983398,
1944
+ "learning_rate": 6.203955092681039e-07,
1945
+ "loss": 0.0566,
1946
+ "reward": 0.9531724825501442,
1947
+ "reward_std": 0.9424103274941444,
1948
+ "step": 102
1949
+ },
1950
+ {
1951
+ "clip_fraction": 0.0,
1952
+ "completion_length": 2447.142379760742,
1953
+ "dapo/avg_reward_std": 0.2595460871855418,
1954
+ "dapo/filter_reward_index": 0.0,
1955
+ "dapo/kept_prompts_ratio": 0.3833333447575569,
1956
+ "dapo/num_sampling_attempts": 3.75,
1957
+ "dapo/sampling_efficiency": 28.958333333333332,
1958
+ "dapo/total_prompts_processed": 22.5,
1959
+ "dapo/valid_prompts_collected": 6.0,
1960
+ "epoch": 0.11771428571428572,
1961
+ "grad_norm": 0.010165790095925331,
1962
+ "kl": 0.00018906593322753906,
1963
+ "learning_rate": 6.126278954320294e-07,
1964
+ "loss": 0.0361,
1965
+ "reward": 0.8079591542482376,
1966
+ "reward_std": 0.9323313534259796,
1967
+ "step": 103
1968
+ },
1969
+ {
1970
+ "clip_fraction": 0.0,
1971
+ "completion_length": 2414.4305725097656,
1972
+ "dapo/avg_reward_std": 0.2675211922875766,
1973
+ "dapo/filter_reward_index": 0.0,
1974
+ "dapo/kept_prompts_ratio": 0.3505747174394542,
1975
+ "dapo/num_sampling_attempts": 3.625,
1976
+ "dapo/sampling_efficiency": 42.013888888888886,
1977
+ "dapo/total_prompts_processed": 21.75,
1978
+ "dapo/valid_prompts_collected": 6.0,
1979
+ "epoch": 0.11885714285714286,
1980
+ "grad_norm": 0.009563453495502472,
1981
+ "kl": 0.0002244710922241211,
1982
+ "learning_rate": 6.048412045323164e-07,
1983
+ "loss": 0.0367,
1984
+ "reward": 0.6746065132319927,
1985
+ "reward_std": 0.9439321234822273,
1986
+ "step": 104
1987
+ },
1988
+ {
1989
+ "clip_fraction": 0.0,
1990
+ "completion_length": 2350.4653396606445,
1991
+ "dapo/avg_reward_std": 0.2709802109183687,
1992
+ "dapo/filter_reward_index": 0.0,
1993
+ "dapo/kept_prompts_ratio": 0.33838384621071094,
1994
+ "dapo/num_sampling_attempts": 4.125,
1995
+ "dapo/sampling_efficiency": 39.58333333333333,
1996
+ "dapo/total_prompts_processed": 24.75,
1997
+ "dapo/valid_prompts_collected": 6.0,
1998
+ "epoch": 0.12,
1999
+ "grad_norm": 0.011944189667701721,
2000
+ "kl": 0.00023399293422698975,
2001
+ "learning_rate": 5.97037808470444e-07,
2002
+ "loss": 0.0133,
2003
+ "reward": 0.7501634955406189,
2004
+ "reward_std": 0.9493465423583984,
2005
+ "step": 105
2006
+ },
2007
+ {
2008
+ "clip_fraction": 0.0,
2009
+ "completion_length": 2232.5590209960938,
2010
+ "dapo/avg_reward_std": 0.2304972934311834,
2011
+ "dapo/filter_reward_index": 0.0,
2012
+ "dapo/kept_prompts_ratio": 0.32183908234382497,
2013
+ "dapo/num_sampling_attempts": 3.625,
2014
+ "dapo/sampling_efficiency": 46.05654761904762,
2015
+ "dapo/total_prompts_processed": 21.75,
2016
+ "dapo/valid_prompts_collected": 6.0,
2017
+ "epoch": 0.12114285714285715,
2018
+ "grad_norm": 0.011308341287076473,
2019
+ "kl": 0.0002868175506591797,
2020
+ "learning_rate": 5.892200842364462e-07,
2021
+ "loss": 0.017,
2022
+ "reward": 0.8449488952755928,
2023
+ "reward_std": 0.9235394075512886,
2024
+ "step": 106
2025
+ },
2026
+ {
2027
+ "clip_fraction": 0.0,
2028
+ "completion_length": 2245.4306259155273,
2029
+ "dapo/avg_reward_std": 0.32372228088586225,
2030
+ "dapo/filter_reward_index": 0.0,
2031
+ "dapo/kept_prompts_ratio": 0.37681160478488257,
2032
+ "dapo/num_sampling_attempts": 2.875,
2033
+ "dapo/sampling_efficiency": 45.53571428571428,
2034
+ "dapo/total_prompts_processed": 17.25,
2035
+ "dapo/valid_prompts_collected": 6.0,
2036
+ "epoch": 0.12228571428571429,
2037
+ "grad_norm": 0.01079760491847992,
2038
+ "kl": 0.00018143653869628906,
2039
+ "learning_rate": 5.813904131848564e-07,
2040
+ "loss": 0.0407,
2041
+ "reward": 0.876940418034792,
2042
+ "reward_std": 0.945194236934185,
2043
+ "step": 107
2044
+ },
2045
+ {
2046
+ "clip_fraction": 0.0,
2047
+ "completion_length": 2877.4410095214844,
2048
+ "dapo/avg_reward_std": 0.31851592376118615,
2049
+ "dapo/filter_reward_index": 0.0,
2050
+ "dapo/kept_prompts_ratio": 0.44444445201328825,
2051
+ "dapo/num_sampling_attempts": 2.625,
2052
+ "dapo/sampling_efficiency": 47.916666666666664,
2053
+ "dapo/total_prompts_processed": 15.75,
2054
+ "dapo/valid_prompts_collected": 6.0,
2055
+ "epoch": 0.12342857142857143,
2056
+ "grad_norm": 0.008532079868018627,
2057
+ "kl": 0.00026416778564453125,
2058
+ "learning_rate": 5.735511803093248e-07,
2059
+ "loss": 0.0189,
2060
+ "reward": 0.5354619715362787,
2061
+ "reward_std": 0.9343887642025948,
2062
+ "step": 108
2063
+ },
2064
+ {
2065
+ "clip_fraction": 0.0,
2066
+ "completion_length": 2287.3403396606445,
2067
+ "dapo/avg_reward_std": 0.2637126021660291,
2068
+ "dapo/filter_reward_index": 0.0,
2069
+ "dapo/kept_prompts_ratio": 0.41025641560554504,
2070
+ "dapo/num_sampling_attempts": 3.25,
2071
+ "dapo/sampling_efficiency": 44.6875,
2072
+ "dapo/total_prompts_processed": 19.5,
2073
+ "dapo/valid_prompts_collected": 6.0,
2074
+ "epoch": 0.12457142857142857,
2075
+ "grad_norm": 0.010662744753062725,
2076
+ "kl": 0.00025856494903564453,
2077
+ "learning_rate": 5.657047735161255e-07,
2078
+ "loss": 0.0139,
2079
+ "reward": 0.6945868469774723,
2080
+ "reward_std": 0.945196196436882,
2081
+ "step": 109
2082
+ },
2083
+ {
2084
+ "clip_fraction": 0.0,
2085
+ "completion_length": 2099.197898864746,
2086
+ "dapo/avg_reward_std": 0.2950383967586926,
2087
+ "dapo/filter_reward_index": 0.0,
2088
+ "dapo/kept_prompts_ratio": 0.380952388048172,
2089
+ "dapo/num_sampling_attempts": 3.5,
2090
+ "dapo/sampling_efficiency": 38.541666666666664,
2091
+ "dapo/total_prompts_processed": 21.0,
2092
+ "dapo/valid_prompts_collected": 6.0,
2093
+ "epoch": 0.12571428571428572,
2094
+ "grad_norm": 0.011256680823862553,
2095
+ "kl": 0.0002060532569885254,
2096
+ "learning_rate": 5.578535828967777e-07,
2097
+ "loss": 0.0106,
2098
+ "reward": 0.6000825632363558,
2099
+ "reward_std": 0.9193084537982941,
2100
+ "step": 110
2101
+ },
2102
+ {
2103
+ "clip_fraction": 0.0,
2104
+ "completion_length": 2716.079864501953,
2105
+ "dapo/avg_reward_std": 0.2741352463590688,
2106
+ "dapo/filter_reward_index": 0.0,
2107
+ "dapo/kept_prompts_ratio": 0.3620689691140734,
2108
+ "dapo/num_sampling_attempts": 3.625,
2109
+ "dapo/sampling_efficiency": 41.488095238095234,
2110
+ "dapo/total_prompts_processed": 21.75,
2111
+ "dapo/valid_prompts_collected": 6.0,
2112
+ "epoch": 0.12685714285714286,
2113
+ "grad_norm": 0.009062284603714943,
2114
+ "kl": 0.0002288222312927246,
2115
+ "learning_rate": 5.5e-07,
2116
+ "loss": 0.0461,
2117
+ "reward": 0.6751261968165636,
2118
+ "reward_std": 0.9856812655925751,
2119
+ "step": 111
2120
+ },
2121
+ {
2122
+ "clip_fraction": 0.0,
2123
+ "completion_length": 3045.125,
2124
+ "dapo/avg_reward_std": 0.36701818108558654,
2125
+ "dapo/filter_reward_index": 0.0,
2126
+ "dapo/kept_prompts_ratio": 0.5888888945182165,
2127
+ "dapo/num_sampling_attempts": 1.875,
2128
+ "dapo/sampling_efficiency": 65.625,
2129
+ "dapo/total_prompts_processed": 11.25,
2130
+ "dapo/valid_prompts_collected": 6.0,
2131
+ "epoch": 0.128,
2132
+ "grad_norm": 0.013615131378173828,
2133
+ "kl": 0.0003104209899902344,
2134
+ "learning_rate": 5.421464171032224e-07,
2135
+ "loss": 0.0541,
2136
+ "reward": 0.6107649356126785,
2137
+ "reward_std": 0.9386496767401695,
2138
+ "step": 112
2139
+ },
2140
+ {
2141
+ "clip_fraction": 0.0,
2142
+ "completion_length": 2261.1597442626953,
2143
+ "dapo/avg_reward_std": 0.2829060518741608,
2144
+ "dapo/filter_reward_index": 0.0,
2145
+ "dapo/kept_prompts_ratio": 0.413333340883255,
2146
+ "dapo/num_sampling_attempts": 3.125,
2147
+ "dapo/sampling_efficiency": 41.041666666666664,
2148
+ "dapo/total_prompts_processed": 18.75,
2149
+ "dapo/valid_prompts_collected": 6.0,
2150
+ "epoch": 0.12914285714285714,
2151
+ "grad_norm": 0.01245199330151081,
2152
+ "kl": 0.0002949833869934082,
2153
+ "learning_rate": 5.342952264838747e-07,
2154
+ "loss": 0.0273,
2155
+ "reward": 0.7544166818261147,
2156
+ "reward_std": 0.9633913785219193,
2157
+ "step": 113
2158
+ },
2159
+ {
2160
+ "clip_fraction": 0.0,
2161
+ "completion_length": 2030.1041946411133,
2162
+ "dapo/avg_reward_std": 0.2660287490912846,
2163
+ "dapo/filter_reward_index": 0.0,
2164
+ "dapo/kept_prompts_ratio": 0.32738095788019045,
2165
+ "dapo/num_sampling_attempts": 3.5,
2166
+ "dapo/sampling_efficiency": 45.535714285714285,
2167
+ "dapo/total_prompts_processed": 21.0,
2168
+ "dapo/valid_prompts_collected": 6.0,
2169
+ "epoch": 0.13028571428571428,
2170
+ "grad_norm": 0.011100664734840393,
2171
+ "kl": 0.00016885995864868164,
2172
+ "learning_rate": 5.264488196906752e-07,
2173
+ "loss": 0.0649,
2174
+ "reward": 0.5986752398312092,
2175
+ "reward_std": 0.9739916548132896,
2176
+ "step": 114
2177
+ },
2178
+ {
2179
+ "clip_fraction": 0.0,
2180
+ "completion_length": 2791.465301513672,
2181
+ "dapo/avg_reward_std": 0.297807412147522,
2182
+ "dapo/filter_reward_index": 0.0,
2183
+ "dapo/kept_prompts_ratio": 0.44000000655651095,
2184
+ "dapo/num_sampling_attempts": 3.125,
2185
+ "dapo/sampling_efficiency": 42.08333333333333,
2186
+ "dapo/total_prompts_processed": 18.75,
2187
+ "dapo/valid_prompts_collected": 6.0,
2188
+ "epoch": 0.13142857142857142,
2189
+ "grad_norm": 0.011278674006462097,
2190
+ "kl": 0.0002925395965576172,
2191
+ "learning_rate": 5.186095868151436e-07,
2192
+ "loss": 0.0586,
2193
+ "reward": 0.6219565980136395,
2194
+ "reward_std": 0.9591977074742317,
2195
+ "step": 115
2196
+ },
2197
+ {
2198
+ "clip_fraction": 0.0,
2199
+ "completion_length": 2804.6493606567383,
2200
+ "dapo/avg_reward_std": 0.35703572371731634,
2201
+ "dapo/filter_reward_index": 0.0,
2202
+ "dapo/kept_prompts_ratio": 0.42028986371081806,
2203
+ "dapo/num_sampling_attempts": 2.875,
2204
+ "dapo/sampling_efficiency": 43.12499999999999,
2205
+ "dapo/total_prompts_processed": 17.25,
2206
+ "dapo/valid_prompts_collected": 6.0,
2207
+ "epoch": 0.13257142857142856,
2208
+ "grad_norm": 0.01122019812464714,
2209
+ "kl": 0.00034046173095703125,
2210
+ "learning_rate": 5.107799157635538e-07,
2211
+ "loss": 0.0233,
2212
+ "reward": 0.469740716740489,
2213
+ "reward_std": 0.9214994236826897,
2214
+ "step": 116
2215
+ },
2216
+ {
2217
+ "clip_fraction": 0.0,
2218
+ "completion_length": 2037.885456085205,
2219
+ "dapo/avg_reward_std": 0.30805256009101867,
2220
+ "dapo/filter_reward_index": 0.0,
2221
+ "dapo/kept_prompts_ratio": 0.4666666799783707,
2222
+ "dapo/num_sampling_attempts": 3.125,
2223
+ "dapo/sampling_efficiency": 44.27083333333333,
2224
+ "dapo/total_prompts_processed": 18.75,
2225
+ "dapo/valid_prompts_collected": 6.0,
2226
+ "epoch": 0.1337142857142857,
2227
+ "grad_norm": 0.014056873507797718,
2228
+ "kl": 0.0002486705780029297,
2229
+ "learning_rate": 5.02962191529556e-07,
2230
+ "loss": 0.038,
2231
+ "reward": 0.9076524265110493,
2232
+ "reward_std": 0.9655390456318855,
2233
+ "step": 117
2234
+ },
2235
+ {
2236
+ "clip_fraction": 0.0,
2237
+ "completion_length": 2517.215316772461,
2238
+ "dapo/avg_reward_std": 0.23199922059263503,
2239
+ "dapo/filter_reward_index": 0.0,
2240
+ "dapo/kept_prompts_ratio": 0.3571428635290691,
2241
+ "dapo/num_sampling_attempts": 3.5,
2242
+ "dapo/sampling_efficiency": 45.535714285714285,
2243
+ "dapo/total_prompts_processed": 21.0,
2244
+ "dapo/valid_prompts_collected": 6.0,
2245
+ "epoch": 0.13485714285714287,
2246
+ "grad_norm": 0.011827170848846436,
2247
+ "kl": 0.00034999847412109375,
2248
+ "learning_rate": 4.951587954676837e-07,
2249
+ "loss": 0.023,
2250
+ "reward": 0.5725362580269575,
2251
+ "reward_std": 0.9489376917481422,
2252
+ "step": 118
2253
+ },
2254
+ {
2255
+ "clip_fraction": 0.0,
2256
+ "completion_length": 2309.763916015625,
2257
+ "dapo/avg_reward_std": 0.33521059803340747,
2258
+ "dapo/filter_reward_index": 0.0,
2259
+ "dapo/kept_prompts_ratio": 0.48550726084605506,
2260
+ "dapo/num_sampling_attempts": 2.875,
2261
+ "dapo/sampling_efficiency": 44.166666666666664,
2262
+ "dapo/total_prompts_processed": 17.25,
2263
+ "dapo/valid_prompts_collected": 6.0,
2264
+ "epoch": 0.136,
2265
+ "grad_norm": 0.014920210465788841,
2266
+ "kl": 0.0003477334976196289,
2267
+ "learning_rate": 4.873721045679706e-07,
2268
+ "loss": 0.0967,
2269
+ "reward": 0.7152486853301525,
2270
+ "reward_std": 0.9450967088341713,
2271
+ "step": 119
2272
+ },
2273
+ {
2274
+ "clip_fraction": 0.0,
2275
+ "completion_length": 2588.423629760742,
2276
+ "dapo/avg_reward_std": 0.322092001636823,
2277
+ "dapo/filter_reward_index": 0.0,
2278
+ "dapo/kept_prompts_ratio": 0.4236111156642437,
2279
+ "dapo/num_sampling_attempts": 3.0,
2280
+ "dapo/sampling_efficiency": 40.62499999999999,
2281
+ "dapo/total_prompts_processed": 18.0,
2282
+ "dapo/valid_prompts_collected": 6.0,
2283
+ "epoch": 0.13714285714285715,
2284
+ "grad_norm": 0.009259553626179695,
2285
+ "kl": 0.0002923011779785156,
2286
+ "learning_rate": 4.79604490731896e-07,
2287
+ "loss": 0.0204,
2288
+ "reward": 0.5492150112986565,
2289
+ "reward_std": 0.9336576908826828,
2290
+ "step": 120
2291
+ },
2292
+ {
2293
+ "clip_fraction": 0.0,
2294
+ "completion_length": 2224.4583282470703,
2295
+ "dapo/avg_reward_std": 0.2846992796375638,
2296
+ "dapo/filter_reward_index": 0.0,
2297
+ "dapo/kept_prompts_ratio": 0.4603174633923031,
2298
+ "dapo/num_sampling_attempts": 2.625,
2299
+ "dapo/sampling_efficiency": 51.45833333333333,
2300
+ "dapo/total_prompts_processed": 15.75,
2301
+ "dapo/valid_prompts_collected": 6.0,
2302
+ "epoch": 0.1382857142857143,
2303
+ "grad_norm": 0.012681272812187672,
2304
+ "kl": 0.0002828836441040039,
2305
+ "learning_rate": 4.7185832004988133e-07,
2306
+ "loss": 0.084,
2307
+ "reward": 0.8260641098022461,
2308
+ "reward_std": 0.9569381102919579,
2309
+ "step": 121
2310
+ },
2311
+ {
2312
+ "clip_fraction": 0.0,
2313
+ "completion_length": 2042.4618453979492,
2314
+ "dapo/avg_reward_std": 0.21980997684754824,
2315
+ "dapo/filter_reward_index": 0.0,
2316
+ "dapo/kept_prompts_ratio": 0.2500000039213582,
2317
+ "dapo/num_sampling_attempts": 4.75,
2318
+ "dapo/sampling_efficiency": 32.916666666666664,
2319
+ "dapo/total_prompts_processed": 28.5,
2320
+ "dapo/valid_prompts_collected": 6.0,
2321
+ "epoch": 0.13942857142857143,
2322
+ "grad_norm": 0.014447196386754513,
2323
+ "kl": 0.0002307891845703125,
2324
+ "learning_rate": 4.641359520805548e-07,
2325
+ "loss": 0.0797,
2326
+ "reward": 0.5401283344253898,
2327
+ "reward_std": 0.8589324243366718,
2328
+ "step": 122
2329
+ },
2330
+ {
2331
+ "clip_fraction": 0.0,
2332
+ "completion_length": 1821.270851135254,
2333
+ "dapo/avg_reward_std": 0.30661167701085407,
2334
+ "dapo/filter_reward_index": 0.0,
2335
+ "dapo/kept_prompts_ratio": 0.4652777910232544,
2336
+ "dapo/num_sampling_attempts": 3.0,
2337
+ "dapo/sampling_efficiency": 51.979166666666664,
2338
+ "dapo/total_prompts_processed": 18.0,
2339
+ "dapo/valid_prompts_collected": 6.0,
2340
+ "epoch": 0.14057142857142857,
2341
+ "grad_norm": 0.012464089319109917,
2342
+ "kl": 0.00021943449974060059,
2343
+ "learning_rate": 4.5643973913200837e-07,
2344
+ "loss": 0.0524,
2345
+ "reward": 0.7340994998812675,
2346
+ "reward_std": 0.948041707277298,
2347
+ "step": 123
2348
+ },
2349
+ {
2350
+ "clip_fraction": 0.0,
2351
+ "completion_length": 1917.8576431274414,
2352
+ "dapo/avg_reward_std": 0.26710175829274313,
2353
+ "dapo/filter_reward_index": 0.0,
2354
+ "dapo/kept_prompts_ratio": 0.3452381023338863,
2355
+ "dapo/num_sampling_attempts": 3.5,
2356
+ "dapo/sampling_efficiency": 35.83333333333333,
2357
+ "dapo/total_prompts_processed": 21.0,
2358
+ "dapo/valid_prompts_collected": 6.0,
2359
+ "epoch": 0.1417142857142857,
2360
+ "grad_norm": 0.01295761950314045,
2361
+ "kl": 0.00027441978454589844,
2362
+ "learning_rate": 4.4877202554526084e-07,
2363
+ "loss": 0.0395,
2364
+ "reward": 0.44990649446845055,
2365
+ "reward_std": 0.9298848733305931,
2366
+ "step": 124
2367
+ },
2368
+ {
2369
+ "clip_fraction": 0.0,
2370
+ "completion_length": 2151.381965637207,
2371
+ "dapo/avg_reward_std": 0.2770674500776374,
2372
+ "dapo/filter_reward_index": 0.0,
2373
+ "dapo/kept_prompts_ratio": 0.4347826164701711,
2374
+ "dapo/num_sampling_attempts": 2.875,
2375
+ "dapo/sampling_efficiency": 55.729166666666664,
2376
+ "dapo/total_prompts_processed": 17.25,
2377
+ "dapo/valid_prompts_collected": 6.0,
2378
+ "epoch": 0.14285714285714285,
2379
+ "grad_norm": 0.011758905835449696,
2380
+ "kl": 0.00027173757553100586,
2381
+ "learning_rate": 4.4113514698014953e-07,
2382
+ "loss": -0.0284,
2383
+ "reward": 0.5582777298986912,
2384
+ "reward_std": 0.9428363367915154,
2385
+ "step": 125
2386
+ },
2387
+ {
2388
+ "clip_fraction": 0.0,
2389
+ "completion_length": 1810.0104522705078,
2390
+ "dapo/avg_reward_std": 0.21576001878940698,
2391
+ "dapo/filter_reward_index": 0.0,
2392
+ "dapo/kept_prompts_ratio": 0.2777777860562007,
2393
+ "dapo/num_sampling_attempts": 4.125,
2394
+ "dapo/sampling_efficiency": 36.45833333333333,
2395
+ "dapo/total_prompts_processed": 24.75,
2396
+ "dapo/valid_prompts_collected": 6.0,
2397
+ "epoch": 0.144,
2398
+ "grad_norm": 0.011465526185929775,
2399
+ "kl": 0.00021535158157348633,
2400
+ "learning_rate": 4.3353142970386557e-07,
2401
+ "loss": -0.0108,
2402
+ "reward": 0.6622855560854077,
2403
+ "reward_std": 0.9075748100876808,
2404
+ "step": 126
2405
+ },
2406
+ {
2407
+ "clip_fraction": 0.0,
2408
+ "completion_length": 2243.732635498047,
2409
+ "dapo/avg_reward_std": 0.2920382275031163,
2410
+ "dapo/filter_reward_index": 0.0,
2411
+ "dapo/kept_prompts_ratio": 0.3782051377571546,
2412
+ "dapo/num_sampling_attempts": 3.25,
2413
+ "dapo/sampling_efficiency": 32.291666666666664,
2414
+ "dapo/total_prompts_processed": 19.5,
2415
+ "dapo/valid_prompts_collected": 6.0,
2416
+ "epoch": 0.14514285714285713,
2417
+ "grad_norm": 0.01050955057144165,
2418
+ "kl": 0.00027239322662353516,
2419
+ "learning_rate": 4.2596318988235037e-07,
2420
+ "loss": 0.0464,
2421
+ "reward": 0.533456489443779,
2422
+ "reward_std": 0.9191579967737198,
2423
+ "step": 127
2424
+ },
2425
+ {
2426
+ "clip_fraction": 0.0,
2427
+ "completion_length": 2544.6875,
2428
+ "dapo/avg_reward_std": 0.27494730835869197,
2429
+ "dapo/filter_reward_index": 0.0,
2430
+ "dapo/kept_prompts_ratio": 0.46031746906893595,
2431
+ "dapo/num_sampling_attempts": 2.625,
2432
+ "dapo/sampling_efficiency": 46.24999999999999,
2433
+ "dapo/total_prompts_processed": 15.75,
2434
+ "dapo/valid_prompts_collected": 6.0,
2435
+ "epoch": 0.1462857142857143,
2436
+ "grad_norm": 0.017111552879214287,
2437
+ "kl": 0.0003796815872192383,
2438
+ "learning_rate": 4.1843273287476854e-07,
2439
+ "loss": 0.0784,
2440
+ "reward": 0.7016365043818951,
2441
+ "reward_std": 0.986565351486206,
2442
+ "step": 128
2443
+ },
2444
+ {
2445
+ "clip_fraction": 0.0,
2446
+ "completion_length": 2367.3646240234375,
2447
+ "dapo/avg_reward_std": 0.23454804884062874,
2448
+ "dapo/filter_reward_index": 0.0,
2449
+ "dapo/kept_prompts_ratio": 0.34567901988824207,
2450
+ "dapo/num_sampling_attempts": 3.375,
2451
+ "dapo/sampling_efficiency": 47.222222222222214,
2452
+ "dapo/total_prompts_processed": 20.25,
2453
+ "dapo/valid_prompts_collected": 6.0,
2454
+ "epoch": 0.14742857142857144,
2455
+ "grad_norm": 0.009941876865923405,
2456
+ "kl": 0.00034287571907043457,
2457
+ "learning_rate": 4.1094235253127374e-07,
2458
+ "loss": 0.0061,
2459
+ "reward": 0.7930427435785532,
2460
+ "reward_std": 0.9500019550323486,
2461
+ "step": 129
2462
+ },
2463
+ {
2464
+ "clip_fraction": 0.0,
2465
+ "completion_length": 2704.125045776367,
2466
+ "dapo/avg_reward_std": 0.33850957382292973,
2467
+ "dapo/filter_reward_index": 0.0,
2468
+ "dapo/kept_prompts_ratio": 0.5158730284089134,
2469
+ "dapo/num_sampling_attempts": 2.625,
2470
+ "dapo/sampling_efficiency": 47.291666666666664,
2471
+ "dapo/total_prompts_processed": 15.75,
2472
+ "dapo/valid_prompts_collected": 6.0,
2473
+ "epoch": 0.14857142857142858,
2474
+ "grad_norm": 0.012839604169130325,
2475
+ "kl": 0.0004292726516723633,
2476
+ "learning_rate": 4.034943304942796e-07,
2477
+ "loss": 0.0353,
2478
+ "reward": 0.6285950914025307,
2479
+ "reward_std": 0.9615181535482407,
2480
+ "step": 130
2481
+ },
2482
+ {
2483
+ "clip_fraction": 0.0,
2484
+ "completion_length": 2475.5104370117188,
2485
+ "dapo/avg_reward_std": 0.30972740189595654,
2486
+ "dapo/filter_reward_index": 0.0,
2487
+ "dapo/kept_prompts_ratio": 0.4696969762444496,
2488
+ "dapo/num_sampling_attempts": 2.75,
2489
+ "dapo/sampling_efficiency": 52.82738095238095,
2490
+ "dapo/total_prompts_processed": 16.5,
2491
+ "dapo/valid_prompts_collected": 6.0,
2492
+ "epoch": 0.14971428571428572,
2493
+ "grad_norm": 0.00980184692889452,
2494
+ "kl": 0.00036525726318359375,
2495
+ "learning_rate": 3.9609093550344907e-07,
2496
+ "loss": -0.0176,
2497
+ "reward": 0.7969067245721817,
2498
+ "reward_std": 0.9501392021775246,
2499
+ "step": 131
2500
+ },
2501
+ {
2502
+ "clip_fraction": 0.0,
2503
+ "completion_length": 2550.0034790039062,
2504
+ "dapo/avg_reward_std": 0.2723326214722225,
2505
+ "dapo/filter_reward_index": 0.0,
2506
+ "dapo/kept_prompts_ratio": 0.30952381661960054,
2507
+ "dapo/num_sampling_attempts": 3.5,
2508
+ "dapo/sampling_efficiency": 29.999999999999993,
2509
+ "dapo/total_prompts_processed": 21.0,
2510
+ "dapo/valid_prompts_collected": 6.0,
2511
+ "epoch": 0.15085714285714286,
2512
+ "grad_norm": 0.017491888254880905,
2513
+ "kl": 0.00042629241943359375,
2514
+ "learning_rate": 3.8873442270461485e-07,
2515
+ "loss": 0.0772,
2516
+ "reward": 0.6202478259801865,
2517
+ "reward_std": 0.9556004330515862,
2518
+ "step": 132
2519
+ },
2520
+ {
2521
+ "clip_fraction": 0.0,
2522
+ "completion_length": 2056.5000534057617,
2523
+ "dapo/avg_reward_std": 0.3334644228219986,
2524
+ "dapo/filter_reward_index": 0.0,
2525
+ "dapo/kept_prompts_ratio": 0.5333333447575569,
2526
+ "dapo/num_sampling_attempts": 2.5,
2527
+ "dapo/sampling_efficiency": 47.916666666666664,
2528
+ "dapo/total_prompts_processed": 15.0,
2529
+ "dapo/valid_prompts_collected": 6.0,
2530
+ "epoch": 0.152,
2531
+ "grad_norm": 0.012553170323371887,
2532
+ "kl": 0.0004407167434692383,
2533
+ "learning_rate": 3.8142703296283953e-07,
2534
+ "loss": -0.0185,
2535
+ "reward": 0.7429189011454582,
2536
+ "reward_std": 1.0187850967049599,
2537
+ "step": 133
2538
+ },
2539
+ {
2540
+ "clip_fraction": 0.0,
2541
+ "completion_length": 2446.9270935058594,
2542
+ "dapo/avg_reward_std": 0.28044995562783603,
2543
+ "dapo/filter_reward_index": 0.0,
2544
+ "dapo/kept_prompts_ratio": 0.32758621254871634,
2545
+ "dapo/num_sampling_attempts": 3.625,
2546
+ "dapo/sampling_efficiency": 32.41071428571428,
2547
+ "dapo/total_prompts_processed": 21.75,
2548
+ "dapo/valid_prompts_collected": 6.0,
2549
+ "epoch": 0.15314285714285714,
2550
+ "grad_norm": 0.010931877419352531,
2551
+ "kl": 0.00035947561264038086,
2552
+ "learning_rate": 3.7417099217982686e-07,
2553
+ "loss": 0.0372,
2554
+ "reward": 0.6385626457631588,
2555
+ "reward_std": 0.9372833296656609,
2556
+ "step": 134
2557
+ },
2558
+ {
2559
+ "clip_fraction": 0.0,
2560
+ "completion_length": 2216.4479370117188,
2561
+ "dapo/avg_reward_std": 0.3021530819435914,
2562
+ "dapo/filter_reward_index": 0.0,
2563
+ "dapo/kept_prompts_ratio": 0.40972223070760566,
2564
+ "dapo/num_sampling_attempts": 3.0,
2565
+ "dapo/sampling_efficiency": 47.291666666666664,
2566
+ "dapo/total_prompts_processed": 18.0,
2567
+ "dapo/valid_prompts_collected": 6.0,
2568
+ "epoch": 0.15428571428571428,
2569
+ "grad_norm": 0.014175104908645153,
2570
+ "kl": 0.00040656328201293945,
2571
+ "learning_rate": 3.6696851061588994e-07,
2572
+ "loss": 0.0637,
2573
+ "reward": 0.6612157337367535,
2574
+ "reward_std": 0.9335344135761261,
2575
+ "step": 135
2576
+ },
2577
+ {
2578
+ "clip_fraction": 0.0,
2579
+ "completion_length": 1879.3889083862305,
2580
+ "dapo/avg_reward_std": 0.24328701660550875,
2581
+ "dapo/filter_reward_index": 0.0,
2582
+ "dapo/kept_prompts_ratio": 0.36206897219707224,
2583
+ "dapo/num_sampling_attempts": 3.625,
2584
+ "dapo/sampling_efficiency": 31.77083333333333,
2585
+ "dapo/total_prompts_processed": 21.75,
2586
+ "dapo/valid_prompts_collected": 6.0,
2587
+ "epoch": 0.15542857142857142,
2588
+ "grad_norm": 0.011906570754945278,
2589
+ "kl": 0.0002865791320800781,
2590
+ "learning_rate": 3.5982178221668533e-07,
2591
+ "loss": 0.0254,
2592
+ "reward": 0.621966740116477,
2593
+ "reward_std": 0.9788949191570282,
2594
+ "step": 136
2595
+ },
2596
+ {
2597
+ "clip_fraction": 0.0,
2598
+ "completion_length": 2137.1180725097656,
2599
+ "dapo/avg_reward_std": 0.19872227481433324,
2600
+ "dapo/filter_reward_index": 0.0,
2601
+ "dapo/kept_prompts_ratio": 0.2857142903975078,
2602
+ "dapo/num_sampling_attempts": 4.375,
2603
+ "dapo/sampling_efficiency": 26.180555555555557,
2604
+ "dapo/total_prompts_processed": 26.25,
2605
+ "dapo/valid_prompts_collected": 6.0,
2606
+ "epoch": 0.15657142857142858,
2607
+ "grad_norm": 0.011921432800590992,
2608
+ "kl": 0.000335007905960083,
2609
+ "learning_rate": 3.5273298394491515e-07,
2610
+ "loss": 0.0425,
2611
+ "reward": 0.8858193010091782,
2612
+ "reward_std": 0.960886999964714,
2613
+ "step": 137
2614
+ },
2615
+ {
2616
+ "clip_fraction": 0.0,
2617
+ "completion_length": 2133.6875381469727,
2618
+ "dapo/avg_reward_std": 0.30985672700972783,
2619
+ "dapo/filter_reward_index": 0.0,
2620
+ "dapo/kept_prompts_ratio": 0.5079365216550373,
2621
+ "dapo/num_sampling_attempts": 2.625,
2622
+ "dapo/sampling_efficiency": 50.41666666666666,
2623
+ "dapo/total_prompts_processed": 15.75,
2624
+ "dapo/valid_prompts_collected": 6.0,
2625
+ "epoch": 0.15771428571428572,
2626
+ "grad_norm": 0.011959100142121315,
2627
+ "kl": 0.00031131505966186523,
2628
+ "learning_rate": 3.45704275117204e-07,
2629
+ "loss": 0.0473,
2630
+ "reward": 0.8114638328552246,
2631
+ "reward_std": 0.9208285436034203,
2632
+ "step": 138
2633
+ },
2634
+ {
2635
+ "clip_fraction": 0.0,
2636
+ "completion_length": 1887.6423797607422,
2637
+ "dapo/avg_reward_std": 0.30113077312707903,
2638
+ "dapo/filter_reward_index": 0.0,
2639
+ "dapo/kept_prompts_ratio": 0.45833334401249887,
2640
+ "dapo/num_sampling_attempts": 2.5,
2641
+ "dapo/sampling_efficiency": 47.916666666666664,
2642
+ "dapo/total_prompts_processed": 15.0,
2643
+ "dapo/valid_prompts_collected": 6.0,
2644
+ "epoch": 0.15885714285714286,
2645
+ "grad_norm": 0.01248843315988779,
2646
+ "kl": 0.0003123283386230469,
2647
+ "learning_rate": 3.387377967463493e-07,
2648
+ "loss": 0.0133,
2649
+ "reward": 0.4802711680531502,
2650
+ "reward_std": 0.9749159812927246,
2651
+ "step": 139
2652
+ },
2653
+ {
2654
+ "clip_fraction": 0.0,
2655
+ "completion_length": 1555.0173835754395,
2656
+ "dapo/avg_reward_std": 0.2354262595375379,
2657
+ "dapo/filter_reward_index": 0.0,
2658
+ "dapo/kept_prompts_ratio": 0.3750000074505806,
2659
+ "dapo/num_sampling_attempts": 3.0,
2660
+ "dapo/sampling_efficiency": 58.05555555555555,
2661
+ "dapo/total_prompts_processed": 18.0,
2662
+ "dapo/valid_prompts_collected": 6.0,
2663
+ "epoch": 0.16,
2664
+ "grad_norm": 0.01651233807206154,
2665
+ "kl": 0.0003075599670410156,
2666
+ "learning_rate": 3.3183567088914833e-07,
2667
+ "loss": -0.0302,
2668
+ "reward": 0.8893436007201672,
2669
+ "reward_std": 0.9632327631115913,
2670
+ "step": 140
2671
+ },
2672
+ {
2673
+ "clip_fraction": 0.0,
2674
+ "completion_length": 2886.878517150879,
2675
+ "dapo/avg_reward_std": 0.2881770460378556,
2676
+ "dapo/filter_reward_index": 0.0,
2677
+ "dapo/kept_prompts_ratio": 0.48412698933056425,
2678
+ "dapo/num_sampling_attempts": 2.625,
2679
+ "dapo/sampling_efficiency": 53.12499999999999,
2680
+ "dapo/total_prompts_processed": 15.75,
2681
+ "dapo/valid_prompts_collected": 6.0,
2682
+ "epoch": 0.16114285714285714,
2683
+ "grad_norm": 0.010870919562876225,
2684
+ "kl": 0.0004533529281616211,
2685
+ "learning_rate": 3.250000000000001e-07,
2686
+ "loss": 0.0545,
2687
+ "reward": 0.612054293975234,
2688
+ "reward_std": 0.9482586532831192,
2689
+ "step": 141
2690
+ },
2691
+ {
2692
+ "clip_fraction": 0.0,
2693
+ "completion_length": 1937.1458740234375,
2694
+ "dapo/avg_reward_std": 0.3629945723906807,
2695
+ "dapo/filter_reward_index": 0.0,
2696
+ "dapo/kept_prompts_ratio": 0.47826088057911914,
2697
+ "dapo/num_sampling_attempts": 2.875,
2698
+ "dapo/sampling_efficiency": 47.39583333333333,
2699
+ "dapo/total_prompts_processed": 17.25,
2700
+ "dapo/valid_prompts_collected": 6.0,
2701
+ "epoch": 0.16228571428571428,
2702
+ "grad_norm": 0.011180016212165356,
2703
+ "kl": 0.00029921531677246094,
2704
+ "learning_rate": 3.182328662904756e-07,
2705
+ "loss": -0.0113,
2706
+ "reward": 0.6175431702286005,
2707
+ "reward_std": 0.9589766189455986,
2708
+ "step": 142
2709
+ },
2710
+ {
2711
+ "clip_fraction": 0.0,
2712
+ "completion_length": 2465.3159942626953,
2713
+ "dapo/avg_reward_std": 0.3803708272821763,
2714
+ "dapo/filter_reward_index": 0.0,
2715
+ "dapo/kept_prompts_ratio": 0.5588235381771537,
2716
+ "dapo/num_sampling_attempts": 2.125,
2717
+ "dapo/sampling_efficiency": 60.41666666666666,
2718
+ "dapo/total_prompts_processed": 12.75,
2719
+ "dapo/valid_prompts_collected": 6.0,
2720
+ "epoch": 0.16342857142857142,
2721
+ "grad_norm": 0.012431374751031399,
2722
+ "kl": 0.00048232078552246094,
2723
+ "learning_rate": 3.115363310950578e-07,
2724
+ "loss": 0.0679,
2725
+ "reward": 0.7579541122540832,
2726
+ "reward_std": 0.9723308756947517,
2727
+ "step": 143
2728
+ },
2729
+ {
2730
+ "clip_fraction": 0.0,
2731
+ "completion_length": 2344.1180419921875,
2732
+ "dapo/avg_reward_std": 0.21175828889796608,
2733
+ "dapo/filter_reward_index": 0.0,
2734
+ "dapo/kept_prompts_ratio": 0.26315790179528686,
2735
+ "dapo/num_sampling_attempts": 4.75,
2736
+ "dapo/sampling_efficiency": 25.347222222222218,
2737
+ "dapo/total_prompts_processed": 28.5,
2738
+ "dapo/valid_prompts_collected": 6.0,
2739
+ "epoch": 0.16457142857142856,
2740
+ "grad_norm": 0.010860033333301544,
2741
+ "kl": 0.0004448890686035156,
2742
+ "learning_rate": 3.0491243424323783e-07,
2743
+ "loss": -0.0005,
2744
+ "reward": 0.5643926626071334,
2745
+ "reward_std": 0.9328553825616837,
2746
+ "step": 144
2747
+ },
2748
+ {
2749
+ "clip_fraction": 0.0,
2750
+ "completion_length": 2482.1389389038086,
2751
+ "dapo/avg_reward_std": 0.37865253537893295,
2752
+ "dapo/filter_reward_index": 0.0,
2753
+ "dapo/kept_prompts_ratio": 0.6145833432674408,
2754
+ "dapo/num_sampling_attempts": 2.0,
2755
+ "dapo/sampling_efficiency": 58.33333333333333,
2756
+ "dapo/total_prompts_processed": 12.0,
2757
+ "dapo/valid_prompts_collected": 6.0,
2758
+ "epoch": 0.1657142857142857,
2759
+ "grad_norm": 0.011065399274230003,
2760
+ "kl": 0.0004658699035644531,
2761
+ "learning_rate": 2.9836319343816397e-07,
2762
+ "loss": 0.0412,
2763
+ "reward": 0.8742740526795387,
2764
+ "reward_std": 0.9688765779137611,
2765
+ "step": 145
2766
+ },
2767
+ {
2768
+ "clip_fraction": 0.0,
2769
+ "completion_length": 2297.6806030273438,
2770
+ "dapo/avg_reward_std": 0.40159281912971945,
2771
+ "dapo/filter_reward_index": 0.0,
2772
+ "dapo/kept_prompts_ratio": 0.627450992955881,
2773
+ "dapo/num_sampling_attempts": 2.125,
2774
+ "dapo/sampling_efficiency": 56.24999999999999,
2775
+ "dapo/total_prompts_processed": 12.75,
2776
+ "dapo/valid_prompts_collected": 6.0,
2777
+ "epoch": 0.16685714285714287,
2778
+ "grad_norm": 0.014852684922516346,
2779
+ "kl": 0.00038546323776245117,
2780
+ "learning_rate": 2.918906036420294e-07,
2781
+ "loss": 0.1043,
2782
+ "reward": 0.7259054481983185,
2783
+ "reward_std": 0.9452414810657501,
2784
+ "step": 146
2785
+ },
2786
+ {
2787
+ "clip_fraction": 0.0,
2788
+ "completion_length": 2485.2639389038086,
2789
+ "dapo/avg_reward_std": 0.2594580222731051,
2790
+ "dapo/filter_reward_index": 0.0,
2791
+ "dapo/kept_prompts_ratio": 0.42028986435869464,
2792
+ "dapo/num_sampling_attempts": 2.875,
2793
+ "dapo/sampling_efficiency": 43.125,
2794
+ "dapo/total_prompts_processed": 17.25,
2795
+ "dapo/valid_prompts_collected": 6.0,
2796
+ "epoch": 0.168,
2797
+ "grad_norm": 0.011770485900342464,
2798
+ "kl": 0.00037994980812072754,
2799
+ "learning_rate": 2.854966364683872e-07,
2800
+ "loss": 0.0414,
2801
+ "reward": 0.596230074763298,
2802
+ "reward_std": 0.944911852478981,
2803
+ "step": 147
2804
+ },
2805
+ {
2806
+ "clip_fraction": 0.0,
2807
+ "completion_length": 2030.6180877685547,
2808
+ "dapo/avg_reward_std": 0.28245899453759193,
2809
+ "dapo/filter_reward_index": 0.0,
2810
+ "dapo/kept_prompts_ratio": 0.43055556528270245,
2811
+ "dapo/num_sampling_attempts": 3.0,
2812
+ "dapo/sampling_efficiency": 47.70833333333333,
2813
+ "dapo/total_prompts_processed": 18.0,
2814
+ "dapo/valid_prompts_collected": 6.0,
2815
+ "epoch": 0.16914285714285715,
2816
+ "grad_norm": 0.010600890032947063,
2817
+ "kl": 0.0003261566162109375,
2818
+ "learning_rate": 2.791832395815782e-07,
2819
+ "loss": 0.018,
2820
+ "reward": 0.5254655107855797,
2821
+ "reward_std": 0.9357841089367867,
2822
+ "step": 148
2823
+ },
2824
+ {
2825
+ "clip_fraction": 0.0,
2826
+ "completion_length": 2956.184051513672,
2827
+ "dapo/avg_reward_std": 0.3112214480837186,
2828
+ "dapo/filter_reward_index": 0.0,
2829
+ "dapo/kept_prompts_ratio": 0.44444445334374905,
2830
+ "dapo/num_sampling_attempts": 3.0,
2831
+ "dapo/sampling_efficiency": 42.08333333333333,
2832
+ "dapo/total_prompts_processed": 18.0,
2833
+ "dapo/valid_prompts_collected": 6.0,
2834
+ "epoch": 0.1702857142857143,
2835
+ "grad_norm": 0.010259653441607952,
2836
+ "kl": 0.00048613548278808594,
2837
+ "learning_rate": 2.729523361034538e-07,
2838
+ "loss": 0.0339,
2839
+ "reward": 0.6315554305911064,
2840
+ "reward_std": 0.9876029044389725,
2841
+ "step": 149
2842
+ },
2843
+ {
2844
+ "clip_fraction": 0.0,
2845
+ "completion_length": 2855.0520629882812,
2846
+ "dapo/avg_reward_std": 0.32461989257070756,
2847
+ "dapo/filter_reward_index": 0.0,
2848
+ "dapo/kept_prompts_ratio": 0.5370370447635651,
2849
+ "dapo/num_sampling_attempts": 2.25,
2850
+ "dapo/sampling_efficiency": 55.20833333333333,
2851
+ "dapo/total_prompts_processed": 13.5,
2852
+ "dapo/valid_prompts_collected": 6.0,
2853
+ "epoch": 0.17142857142857143,
2854
+ "grad_norm": 0.011087276972830296,
2855
+ "kl": 0.0005285739898681641,
2856
+ "learning_rate": 2.6680582402757324e-07,
2857
+ "loss": 0.054,
2858
+ "reward": 0.80087810754776,
2859
+ "reward_std": 1.0038108006119728,
2860
+ "step": 150
2861
+ },
2862
+ {
2863
+ "clip_fraction": 0.0,
2864
+ "completion_length": 2653.5834197998047,
2865
+ "dapo/avg_reward_std": 0.24287073779851198,
2866
+ "dapo/filter_reward_index": 0.0,
2867
+ "dapo/kept_prompts_ratio": 0.36979167396202683,
2868
+ "dapo/num_sampling_attempts": 4.0,
2869
+ "dapo/sampling_efficiency": 30.327380952380953,
2870
+ "dapo/total_prompts_processed": 24.0,
2871
+ "dapo/valid_prompts_collected": 6.0,
2872
+ "epoch": 0.17257142857142857,
2873
+ "grad_norm": 0.011102661490440369,
2874
+ "kl": 0.0005296468734741211,
2875
+ "learning_rate": 2.6074557564105724e-07,
2876
+ "loss": 0.0527,
2877
+ "reward": 0.7124785147607327,
2878
+ "reward_std": 0.9657682925462723,
2879
+ "step": 151
2880
+ },
2881
+ {
2882
+ "clip_fraction": 0.0,
2883
+ "completion_length": 2141.173614501953,
2884
+ "dapo/avg_reward_std": 0.25965497308763963,
2885
+ "dapo/filter_reward_index": 0.0,
2886
+ "dapo/kept_prompts_ratio": 0.3333333396706088,
2887
+ "dapo/num_sampling_attempts": 3.625,
2888
+ "dapo/sampling_efficiency": 36.875,
2889
+ "dapo/total_prompts_processed": 21.75,
2890
+ "dapo/valid_prompts_collected": 6.0,
2891
+ "epoch": 0.1737142857142857,
2892
+ "grad_norm": 0.01081050094217062,
2893
+ "kl": 0.00039577484130859375,
2894
+ "learning_rate": 2.547734369542718e-07,
2895
+ "loss": 0.0232,
2896
+ "reward": 0.5607589241117239,
2897
+ "reward_std": 0.9106607139110565,
2898
+ "step": 152
2899
+ },
2900
+ {
2901
+ "clip_fraction": 0.0,
2902
+ "completion_length": 2218.2882347106934,
2903
+ "dapo/avg_reward_std": 0.24554675072431564,
2904
+ "dapo/filter_reward_index": 0.0,
2905
+ "dapo/kept_prompts_ratio": 0.39583333767950535,
2906
+ "dapo/num_sampling_attempts": 3.0,
2907
+ "dapo/sampling_efficiency": 48.75,
2908
+ "dapo/total_prompts_processed": 18.0,
2909
+ "dapo/valid_prompts_collected": 6.0,
2910
+ "epoch": 0.17485714285714285,
2911
+ "grad_norm": 0.01474699191749096,
2912
+ "kl": 0.000436246395111084,
2913
+ "learning_rate": 2.488912271385139e-07,
2914
+ "loss": 0.0585,
2915
+ "reward": 0.4214355852454901,
2916
+ "reward_std": 0.9400415197014809,
2917
+ "step": 153
2918
+ },
2919
+ {
2920
+ "clip_fraction": 0.0,
2921
+ "completion_length": 2466.3368377685547,
2922
+ "dapo/avg_reward_std": 0.3308070342649113,
2923
+ "dapo/filter_reward_index": 0.0,
2924
+ "dapo/kept_prompts_ratio": 0.46212121776559134,
2925
+ "dapo/num_sampling_attempts": 2.75,
2926
+ "dapo/sampling_efficiency": 38.541666666666664,
2927
+ "dapo/total_prompts_processed": 16.5,
2928
+ "dapo/valid_prompts_collected": 6.0,
2929
+ "epoch": 0.176,
2930
+ "grad_norm": 0.011210402473807335,
2931
+ "kl": 0.0004417896270751953,
2932
+ "learning_rate": 2.4310073797187573e-07,
2933
+ "loss": -0.0244,
2934
+ "reward": 0.7323229797184467,
2935
+ "reward_std": 0.9493635967373848,
2936
+ "step": 154
2937
+ },
2938
+ {
2939
+ "clip_fraction": 0.0,
2940
+ "completion_length": 2012.8715438842773,
2941
+ "dapo/avg_reward_std": 0.2809670078754425,
2942
+ "dapo/filter_reward_index": 0.0,
2943
+ "dapo/kept_prompts_ratio": 0.4133333426713943,
2944
+ "dapo/num_sampling_attempts": 3.125,
2945
+ "dapo/sampling_efficiency": 35.83333333333333,
2946
+ "dapo/total_prompts_processed": 18.75,
2947
+ "dapo/valid_prompts_collected": 6.0,
2948
+ "epoch": 0.17714285714285713,
2949
+ "grad_norm": 0.01654733158648014,
2950
+ "kl": 0.00036275386810302734,
2951
+ "learning_rate": 2.374037332934512e-07,
2952
+ "loss": 0.0589,
2953
+ "reward": 0.6634213328361511,
2954
+ "reward_std": 0.8785304054617882,
2955
+ "step": 155
2956
+ },
2957
+ {
2958
+ "clip_fraction": 0.0,
2959
+ "completion_length": 2291.3021240234375,
2960
+ "dapo/avg_reward_std": 0.3599580733672432,
2961
+ "dapo/filter_reward_index": 0.0,
2962
+ "dapo/kept_prompts_ratio": 0.44927537182103033,
2963
+ "dapo/num_sampling_attempts": 2.875,
2964
+ "dapo/sampling_efficiency": 45.53571428571428,
2965
+ "dapo/total_prompts_processed": 17.25,
2966
+ "dapo/valid_prompts_collected": 6.0,
2967
+ "epoch": 0.1782857142857143,
2968
+ "grad_norm": 0.011936171911656857,
2969
+ "kl": 0.00043827295303344727,
2970
+ "learning_rate": 2.3180194846605364e-07,
2971
+ "loss": 0.0699,
2972
+ "reward": 0.8599490560591221,
2973
+ "reward_std": 0.9719394743442535,
2974
+ "step": 156
2975
+ },
2976
+ {
2977
+ "clip_fraction": 0.0,
2978
+ "completion_length": 2499.791702270508,
2979
+ "dapo/avg_reward_std": 0.3457585884766145,
2980
+ "dapo/filter_reward_index": 0.0,
2981
+ "dapo/kept_prompts_ratio": 0.5378787998448719,
2982
+ "dapo/num_sampling_attempts": 2.75,
2983
+ "dapo/sampling_efficiency": 48.95833333333333,
2984
+ "dapo/total_prompts_processed": 16.5,
2985
+ "dapo/valid_prompts_collected": 6.0,
2986
+ "epoch": 0.17942857142857144,
2987
+ "grad_norm": 0.01289551891386509,
2988
+ "kl": 0.00048601627349853516,
2989
+ "learning_rate": 2.2629708984760706e-07,
2990
+ "loss": 0.0584,
2991
+ "reward": 0.6511420179158449,
2992
+ "reward_std": 0.9461185112595558,
2993
+ "step": 157
2994
+ },
2995
+ {
2996
+ "clip_fraction": 0.0,
2997
+ "completion_length": 2437.9375228881836,
2998
+ "dapo/avg_reward_std": 0.23957703853475637,
2999
+ "dapo/filter_reward_index": 0.0,
3000
+ "dapo/kept_prompts_ratio": 0.36781609829129847,
3001
+ "dapo/num_sampling_attempts": 3.625,
3002
+ "dapo/sampling_efficiency": 37.291666666666664,
3003
+ "dapo/total_prompts_processed": 21.75,
3004
+ "dapo/valid_prompts_collected": 6.0,
3005
+ "epoch": 0.18057142857142858,
3006
+ "grad_norm": 0.012769551016390324,
3007
+ "kl": 0.0004298686981201172,
3008
+ "learning_rate": 2.2089083427137329e-07,
3009
+ "loss": 0.0258,
3010
+ "reward": 0.6606059782207012,
3011
+ "reward_std": 0.9088018089532852,
3012
+ "step": 158
3013
+ },
3014
+ {
3015
+ "clip_fraction": 0.0,
3016
+ "completion_length": 1726.5868225097656,
3017
+ "dapo/avg_reward_std": 0.3139249332573103,
3018
+ "dapo/filter_reward_index": 0.0,
3019
+ "dapo/kept_prompts_ratio": 0.3913043543048527,
3020
+ "dapo/num_sampling_attempts": 2.875,
3021
+ "dapo/sampling_efficiency": 55.416666666666664,
3022
+ "dapo/total_prompts_processed": 17.25,
3023
+ "dapo/valid_prompts_collected": 6.0,
3024
+ "epoch": 0.18171428571428572,
3025
+ "grad_norm": 0.013688490726053715,
3026
+ "kl": 0.00027683377265930176,
3027
+ "learning_rate": 2.1558482853517253e-07,
3028
+ "loss": 0.0506,
3029
+ "reward": 0.7147123599424958,
3030
+ "reward_std": 0.9531080722808838,
3031
+ "step": 159
3032
+ },
3033
+ {
3034
+ "clip_fraction": 0.0,
3035
+ "completion_length": 1593.003475189209,
3036
+ "dapo/avg_reward_std": 0.2799004193010001,
3037
+ "dapo/filter_reward_index": 0.0,
3038
+ "dapo/kept_prompts_ratio": 0.33908046782016754,
3039
+ "dapo/num_sampling_attempts": 3.625,
3040
+ "dapo/sampling_efficiency": 45.3125,
3041
+ "dapo/total_prompts_processed": 21.75,
3042
+ "dapo/valid_prompts_collected": 6.0,
3043
+ "epoch": 0.18285714285714286,
3044
+ "grad_norm": 0.020229365676641464,
3045
+ "kl": 0.00033217668533325195,
3046
+ "learning_rate": 2.1038068889975259e-07,
3047
+ "loss": 0.0296,
3048
+ "reward": 0.7677402682602406,
3049
+ "reward_std": 0.9385703578591347,
3050
+ "step": 160
3051
+ },
3052
+ {
3053
+ "clip_fraction": 0.0,
3054
+ "completion_length": 1877.9444274902344,
3055
+ "dapo/avg_reward_std": 0.36716995636622113,
3056
+ "dapo/filter_reward_index": 0.0,
3057
+ "dapo/kept_prompts_ratio": 0.4930555621782939,
3058
+ "dapo/num_sampling_attempts": 3.0,
3059
+ "dapo/sampling_efficiency": 44.49404761904761,
3060
+ "dapo/total_prompts_processed": 18.0,
3061
+ "dapo/valid_prompts_collected": 6.0,
3062
+ "epoch": 0.184,
3063
+ "grad_norm": 0.012556586414575577,
3064
+ "kl": 0.00037413835525512695,
3065
+ "learning_rate": 2.0528000059645995e-07,
3066
+ "loss": 0.0401,
3067
+ "reward": 0.6385876163840294,
3068
+ "reward_std": 0.9741755276918411,
3069
+ "step": 161
3070
+ },
3071
+ {
3072
+ "clip_fraction": 0.0,
3073
+ "completion_length": 2543.1145782470703,
3074
+ "dapo/avg_reward_std": 0.20304026060244618,
3075
+ "dapo/filter_reward_index": 0.0,
3076
+ "dapo/kept_prompts_ratio": 0.28921569226419225,
3077
+ "dapo/num_sampling_attempts": 4.25,
3078
+ "dapo/sampling_efficiency": 26.249999999999996,
3079
+ "dapo/total_prompts_processed": 25.5,
3080
+ "dapo/valid_prompts_collected": 6.0,
3081
+ "epoch": 0.18514285714285714,
3082
+ "grad_norm": 0.010984732769429684,
3083
+ "kl": 0.0005058050155639648,
3084
+ "learning_rate": 2.0028431734436308e-07,
3085
+ "loss": 0.0214,
3086
+ "reward": 0.8138710260391235,
3087
+ "reward_std": 0.937220610678196,
3088
+ "step": 162
3089
+ },
3090
+ {
3091
+ "clip_fraction": 0.0,
3092
+ "completion_length": 2579.7916946411133,
3093
+ "dapo/avg_reward_std": 0.2669851701049244,
3094
+ "dapo/filter_reward_index": 0.0,
3095
+ "dapo/kept_prompts_ratio": 0.3333333397612852,
3096
+ "dapo/num_sampling_attempts": 4.25,
3097
+ "dapo/sampling_efficiency": 31.38888888888889,
3098
+ "dapo/total_prompts_processed": 25.5,
3099
+ "dapo/valid_prompts_collected": 6.0,
3100
+ "epoch": 0.18628571428571428,
3101
+ "grad_norm": 0.01393849402666092,
3102
+ "kl": 0.0005407929420471191,
3103
+ "learning_rate": 1.9539516087697517e-07,
3104
+ "loss": 0.0557,
3105
+ "reward": 0.6086596520617604,
3106
+ "reward_std": 0.9360831007361412,
3107
+ "step": 163
3108
+ },
3109
+ {
3110
+ "clip_fraction": 0.0,
3111
+ "completion_length": 2303.781295776367,
3112
+ "dapo/avg_reward_std": 0.2889538109302521,
3113
+ "dapo/filter_reward_index": 0.0,
3114
+ "dapo/kept_prompts_ratio": 0.40384616129673445,
3115
+ "dapo/num_sampling_attempts": 3.25,
3116
+ "dapo/sampling_efficiency": 41.041666666666664,
3117
+ "dapo/total_prompts_processed": 19.5,
3118
+ "dapo/valid_prompts_collected": 6.0,
3119
+ "epoch": 0.18742857142857142,
3120
+ "grad_norm": 0.012467012740671635,
3121
+ "kl": 0.0005753040313720703,
3122
+ "learning_rate": 1.9061402047871833e-07,
3123
+ "loss": 0.0286,
3124
+ "reward": 0.7579413987696171,
3125
+ "reward_std": 0.966604009270668,
3126
+ "step": 164
3127
+ },
3128
+ {
3129
+ "clip_fraction": 0.0,
3130
+ "completion_length": 2215.8715744018555,
3131
+ "dapo/avg_reward_std": 0.2284111071910177,
3132
+ "dapo/filter_reward_index": 0.0,
3133
+ "dapo/kept_prompts_ratio": 0.3630952446588448,
3134
+ "dapo/num_sampling_attempts": 3.5,
3135
+ "dapo/sampling_efficiency": 49.72222222222222,
3136
+ "dapo/total_prompts_processed": 21.0,
3137
+ "dapo/valid_prompts_collected": 6.0,
3138
+ "epoch": 0.18857142857142858,
3139
+ "grad_norm": 0.013376005925238132,
3140
+ "kl": 0.00038570165634155273,
3141
+ "learning_rate": 1.8594235253127372e-07,
3142
+ "loss": 0.0737,
3143
+ "reward": 0.6369971446692944,
3144
+ "reward_std": 0.944696456193924,
3145
+ "step": 165
3146
+ },
3147
+ {
3148
+ "clip_fraction": 0.0,
3149
+ "completion_length": 2194.999984741211,
3150
+ "dapo/avg_reward_std": 0.35230770577555115,
3151
+ "dapo/filter_reward_index": 0.0,
3152
+ "dapo/kept_prompts_ratio": 0.5289855158847311,
3153
+ "dapo/num_sampling_attempts": 2.875,
3154
+ "dapo/sampling_efficiency": 40.416666666666664,
3155
+ "dapo/total_prompts_processed": 17.25,
3156
+ "dapo/valid_prompts_collected": 6.0,
3157
+ "epoch": 0.18971428571428572,
3158
+ "grad_norm": 0.00896221399307251,
3159
+ "kl": 0.0004324018955230713,
3160
+ "learning_rate": 1.8138158006995363e-07,
3161
+ "loss": -0.0087,
3162
+ "reward": 0.770520705729723,
3163
+ "reward_std": 0.9258415997028351,
3164
+ "step": 166
3165
+ },
3166
+ {
3167
+ "clip_fraction": 0.0,
3168
+ "completion_length": 2363.9861373901367,
3169
+ "dapo/avg_reward_std": 0.23058613193662544,
3170
+ "dapo/filter_reward_index": 0.0,
3171
+ "dapo/kept_prompts_ratio": 0.2763157930029066,
3172
+ "dapo/num_sampling_attempts": 4.75,
3173
+ "dapo/sampling_efficiency": 30.44642857142857,
3174
+ "dapo/total_prompts_processed": 28.5,
3175
+ "dapo/valid_prompts_collected": 6.0,
3176
+ "epoch": 0.19085714285714286,
3177
+ "grad_norm": 0.011913989670574665,
3178
+ "kl": 0.0005799531936645508,
3179
+ "learning_rate": 1.7693309235023127e-07,
3180
+ "loss": 0.0282,
3181
+ "reward": 0.8937316909432411,
3182
+ "reward_std": 0.9134809225797653,
3183
+ "step": 167
3184
+ },
3185
+ {
3186
+ "clip_fraction": 0.0,
3187
+ "completion_length": 1846.3229217529297,
3188
+ "dapo/avg_reward_std": 0.2788652099412063,
3189
+ "dapo/filter_reward_index": 0.0,
3190
+ "dapo/kept_prompts_ratio": 0.37931035459041595,
3191
+ "dapo/num_sampling_attempts": 3.625,
3192
+ "dapo/sampling_efficiency": 33.03571428571428,
3193
+ "dapo/total_prompts_processed": 21.75,
3194
+ "dapo/valid_prompts_collected": 6.0,
3195
+ "epoch": 0.192,
3196
+ "grad_norm": 0.013345438055694103,
3197
+ "kl": 0.00038933753967285156,
3198
+ "learning_rate": 1.7259824442455923e-07,
3199
+ "loss": 0.0657,
3200
+ "reward": 0.5173812105786055,
3201
+ "reward_std": 0.9046202600002289,
3202
+ "step": 168
3203
+ },
3204
+ {
3205
+ "clip_fraction": 0.0,
3206
+ "completion_length": 1632.9965515136719,
3207
+ "dapo/avg_reward_std": 0.33004767837978544,
3208
+ "dapo/filter_reward_index": 0.0,
3209
+ "dapo/kept_prompts_ratio": 0.49206350318023134,
3210
+ "dapo/num_sampling_attempts": 2.625,
3211
+ "dapo/sampling_efficiency": 51.45833333333333,
3212
+ "dapo/total_prompts_processed": 15.75,
3213
+ "dapo/valid_prompts_collected": 6.0,
3214
+ "epoch": 0.19314285714285714,
3215
+ "grad_norm": 0.016018711030483246,
3216
+ "kl": 0.0004235506057739258,
3217
+ "learning_rate": 1.6837835672960831e-07,
3218
+ "loss": -0.0266,
3219
+ "reward": 0.7293304707854986,
3220
+ "reward_std": 0.9580913484096527,
3221
+ "step": 169
3222
+ },
3223
+ {
3224
+ "clip_fraction": 0.0,
3225
+ "completion_length": 2218.357666015625,
3226
+ "dapo/avg_reward_std": 0.30882045084779913,
3227
+ "dapo/filter_reward_index": 0.0,
3228
+ "dapo/kept_prompts_ratio": 0.4242424287579276,
3229
+ "dapo/num_sampling_attempts": 2.75,
3230
+ "dapo/sampling_efficiency": 49.37499999999999,
3231
+ "dapo/total_prompts_processed": 16.5,
3232
+ "dapo/valid_prompts_collected": 6.0,
3233
+ "epoch": 0.19428571428571428,
3234
+ "grad_norm": 0.012691031210124493,
3235
+ "kl": 0.0005915164947509766,
3236
+ "learning_rate": 1.6427471468404952e-07,
3237
+ "loss": 0.0375,
3238
+ "reward": 0.731636168435216,
3239
+ "reward_std": 0.9506037011742592,
3240
+ "step": 170
3241
+ },
3242
+ {
3243
+ "clip_fraction": 0.0,
3244
+ "completion_length": 2086.989585876465,
3245
+ "dapo/avg_reward_std": 0.26685478786627453,
3246
+ "dapo/filter_reward_index": 0.0,
3247
+ "dapo/kept_prompts_ratio": 0.372222230831782,
3248
+ "dapo/num_sampling_attempts": 3.75,
3249
+ "dapo/sampling_efficiency": 36.45833333333333,
3250
+ "dapo/total_prompts_processed": 22.5,
3251
+ "dapo/valid_prompts_collected": 6.0,
3252
+ "epoch": 0.19542857142857142,
3253
+ "grad_norm": 0.0107533298432827,
3254
+ "kl": 0.00045359134674072266,
3255
+ "learning_rate": 1.6028856829700258e-07,
3256
+ "loss": 0.0268,
3257
+ "reward": 0.6401270348578691,
3258
+ "reward_std": 0.9421326443552971,
3259
+ "step": 171
3260
+ },
3261
+ {
3262
+ "clip_fraction": 0.0,
3263
+ "completion_length": 1523.298625946045,
3264
+ "dapo/avg_reward_std": 0.2958875367274651,
3265
+ "dapo/filter_reward_index": 0.0,
3266
+ "dapo/kept_prompts_ratio": 0.4294871888481654,
3267
+ "dapo/num_sampling_attempts": 3.25,
3268
+ "dapo/sampling_efficiency": 41.041666666666664,
3269
+ "dapo/total_prompts_processed": 19.5,
3270
+ "dapo/valid_prompts_collected": 6.0,
3271
+ "epoch": 0.19657142857142856,
3272
+ "grad_norm": 0.02487981878221035,
3273
+ "kl": 0.00044208765029907227,
3274
+ "learning_rate": 1.5642113178727193e-07,
3275
+ "loss": 0.0215,
3276
+ "reward": 0.5742892920970917,
3277
+ "reward_std": 0.9192508533596992,
3278
+ "step": 172
3279
+ },
3280
+ {
3281
+ "clip_fraction": 0.0,
3282
+ "completion_length": 2197.4722290039062,
3283
+ "dapo/avg_reward_std": 0.33716599914160644,
3284
+ "dapo/filter_reward_index": 0.0,
3285
+ "dapo/kept_prompts_ratio": 0.4545454619960351,
3286
+ "dapo/num_sampling_attempts": 2.75,
3287
+ "dapo/sampling_efficiency": 45.20833333333333,
3288
+ "dapo/total_prompts_processed": 16.5,
3289
+ "dapo/valid_prompts_collected": 6.0,
3290
+ "epoch": 0.1977142857142857,
3291
+ "grad_norm": 0.00999497715383768,
3292
+ "kl": 0.0006158351898193359,
3293
+ "learning_rate": 1.5267358321348285e-07,
3294
+ "loss": -0.0198,
3295
+ "reward": 0.6909432113170624,
3296
+ "reward_std": 0.9331774786114693,
3297
+ "step": 173
3298
+ },
3299
+ {
3300
+ "clip_fraction": 0.0,
3301
+ "completion_length": 2469.1909942626953,
3302
+ "dapo/avg_reward_std": 0.31674497947096825,
3303
+ "dapo/filter_reward_index": 0.0,
3304
+ "dapo/kept_prompts_ratio": 0.4722222325702508,
3305
+ "dapo/num_sampling_attempts": 3.0,
3306
+ "dapo/sampling_efficiency": 47.61904761904762,
3307
+ "dapo/total_prompts_processed": 18.0,
3308
+ "dapo/valid_prompts_collected": 6.0,
3309
+ "epoch": 0.19885714285714284,
3310
+ "grad_norm": 0.027324816212058067,
3311
+ "kl": 0.0005202293395996094,
3312
+ "learning_rate": 1.4904706411523448e-07,
3313
+ "loss": 0.1381,
3314
+ "reward": 0.7919853329658508,
3315
+ "reward_std": 0.9734821692109108,
3316
+ "step": 174
3317
+ },
3318
+ {
3319
+ "clip_fraction": 0.0,
3320
+ "completion_length": 2290.7292098999023,
3321
+ "dapo/avg_reward_std": 0.2796748812709536,
3322
+ "dapo/filter_reward_index": 0.0,
3323
+ "dapo/kept_prompts_ratio": 0.41071429369705065,
3324
+ "dapo/num_sampling_attempts": 3.5,
3325
+ "dapo/sampling_efficiency": 31.041666666666664,
3326
+ "dapo/total_prompts_processed": 21.0,
3327
+ "dapo/valid_prompts_collected": 6.0,
3328
+ "epoch": 0.2,
3329
+ "grad_norm": 0.011332061141729355,
3330
+ "kl": 0.000499039888381958,
3331
+ "learning_rate": 1.4554267916537495e-07,
3332
+ "loss": 0.0026,
3333
+ "reward": 0.5971913021057844,
3334
+ "reward_std": 0.9767839089035988,
3335
+ "step": 175
3336
+ },
3337
+ {
3338
+ "clip_fraction": 0.0,
3339
+ "completion_length": 2643.475685119629,
3340
+ "dapo/avg_reward_std": 0.30459834399976227,
3341
+ "dapo/filter_reward_index": 0.0,
3342
+ "dapo/kept_prompts_ratio": 0.4649122922044051,
3343
+ "dapo/num_sampling_attempts": 2.375,
3344
+ "dapo/sampling_efficiency": 54.58333333333333,
3345
+ "dapo/total_prompts_processed": 14.25,
3346
+ "dapo/valid_prompts_collected": 6.0,
3347
+ "epoch": 0.20114285714285715,
3348
+ "grad_norm": 0.011058920994400978,
3349
+ "kl": 0.0006421804428100586,
3350
+ "learning_rate": 1.4216149583350755e-07,
3351
+ "loss": 0.0243,
3352
+ "reward": 0.801079198718071,
3353
+ "reward_std": 1.0328236892819405,
3354
+ "step": 176
3355
+ },
3356
+ {
3357
+ "clip_fraction": 0.0,
3358
+ "completion_length": 2657.517364501953,
3359
+ "dapo/avg_reward_std": 0.268055671826005,
3360
+ "dapo/filter_reward_index": 0.0,
3361
+ "dapo/kept_prompts_ratio": 0.3072916711680591,
3362
+ "dapo/num_sampling_attempts": 4.0,
3363
+ "dapo/sampling_efficiency": 30.32738095238095,
3364
+ "dapo/total_prompts_processed": 24.0,
3365
+ "dapo/valid_prompts_collected": 6.0,
3366
+ "epoch": 0.2022857142857143,
3367
+ "grad_norm": 0.012514113448560238,
3368
+ "kl": 0.0006227493286132812,
3369
+ "learning_rate": 1.3890454406082956e-07,
3370
+ "loss": 0.066,
3371
+ "reward": 0.5342087037861347,
3372
+ "reward_std": 0.9403787776827812,
3373
+ "step": 177
3374
+ },
3375
+ {
3376
+ "clip_fraction": 0.0,
3377
+ "completion_length": 1730.2395935058594,
3378
+ "dapo/avg_reward_std": 0.22906314557598484,
3379
+ "dapo/filter_reward_index": 0.0,
3380
+ "dapo/kept_prompts_ratio": 0.354838716406976,
3381
+ "dapo/num_sampling_attempts": 2.625,
3382
+ "dapo/sampling_efficiency": 49.99999999999999,
3383
+ "dapo/total_prompts_processed": 15.75,
3384
+ "dapo/valid_prompts_collected": 6.0,
3385
+ "epoch": 0.20342857142857143,
3386
+ "grad_norm": 0.013909725472331047,
3387
+ "kl": 0.0004641413688659668,
3388
+ "learning_rate": 1.3577281594640182e-07,
3389
+ "loss": -0.0032,
3390
+ "reward": 0.817855941131711,
3391
+ "reward_std": 0.9715805351734161,
3392
+ "step": 178
3393
+ },
3394
+ {
3395
+ "clip_fraction": 0.0,
3396
+ "completion_length": 1916.9652633666992,
3397
+ "dapo/avg_reward_std": 0.33905652307328726,
3398
+ "dapo/filter_reward_index": 0.0,
3399
+ "dapo/kept_prompts_ratio": 0.5000000085149493,
3400
+ "dapo/num_sampling_attempts": 2.625,
3401
+ "dapo/sampling_efficiency": 49.99999999999999,
3402
+ "dapo/total_prompts_processed": 15.75,
3403
+ "dapo/valid_prompts_collected": 6.0,
3404
+ "epoch": 0.20457142857142857,
3405
+ "grad_norm": 0.010170280002057552,
3406
+ "kl": 0.00033092498779296875,
3407
+ "learning_rate": 1.3276726544494571e-07,
3408
+ "loss": 0.0153,
3409
+ "reward": 0.6332587338984013,
3410
+ "reward_std": 0.9844094663858414,
3411
+ "step": 179
3412
+ },
3413
+ {
3414
+ "clip_fraction": 0.0,
3415
+ "completion_length": 2013.7534942626953,
3416
+ "dapo/avg_reward_std": 0.4115603660282336,
3417
+ "dapo/filter_reward_index": 0.0,
3418
+ "dapo/kept_prompts_ratio": 0.5175438719360452,
3419
+ "dapo/num_sampling_attempts": 2.375,
3420
+ "dapo/sampling_efficiency": 48.95833333333333,
3421
+ "dapo/total_prompts_processed": 14.25,
3422
+ "dapo/valid_prompts_collected": 6.0,
3423
+ "epoch": 0.2057142857142857,
3424
+ "grad_norm": 0.010059732012450695,
3425
+ "kl": 0.0004872828722000122,
3426
+ "learning_rate": 1.2988880807625927e-07,
3427
+ "loss": 0.012,
3428
+ "reward": 0.7964395936578512,
3429
+ "reward_std": 0.9064052030444145,
3430
+ "step": 180
3431
+ },
3432
+ {
3433
+ "clip_fraction": 0.0,
3434
+ "completion_length": 2538.3159713745117,
3435
+ "dapo/avg_reward_std": 0.3185795678032769,
3436
+ "dapo/filter_reward_index": 0.0,
3437
+ "dapo/kept_prompts_ratio": 0.3703703775450035,
3438
+ "dapo/num_sampling_attempts": 3.375,
3439
+ "dapo/sampling_efficiency": 40.0,
3440
+ "dapo/total_prompts_processed": 20.25,
3441
+ "dapo/valid_prompts_collected": 6.0,
3442
+ "epoch": 0.20685714285714285,
3443
+ "grad_norm": 0.009190794080495834,
3444
+ "kl": 0.0005941390991210938,
3445
+ "learning_rate": 1.2713832064634125e-07,
3446
+ "loss": -0.0091,
3447
+ "reward": 0.6052752519026399,
3448
+ "reward_std": 0.9398948326706886,
3449
+ "step": 181
3450
+ },
3451
+ {
3452
+ "clip_fraction": 0.0,
3453
+ "completion_length": 1992.0277557373047,
3454
+ "dapo/avg_reward_std": 0.30058977752923965,
3455
+ "dapo/filter_reward_index": 0.0,
3456
+ "dapo/kept_prompts_ratio": 0.3392857238650322,
3457
+ "dapo/num_sampling_attempts": 3.5,
3458
+ "dapo/sampling_efficiency": 45.32738095238095,
3459
+ "dapo/total_prompts_processed": 21.0,
3460
+ "dapo/valid_prompts_collected": 6.0,
3461
+ "epoch": 0.208,
3462
+ "grad_norm": 0.017918387427926064,
3463
+ "kl": 0.00043332576751708984,
3464
+ "learning_rate": 1.2451664098030743e-07,
3465
+ "loss": 0.0782,
3466
+ "reward": 0.7308525424450636,
3467
+ "reward_std": 0.8988610878586769,
3468
+ "step": 182
3469
+ },
3470
+ {
3471
+ "clip_fraction": 0.0,
3472
+ "completion_length": 2368.312515258789,
3473
+ "dapo/avg_reward_std": 0.2227620858213176,
3474
+ "dapo/filter_reward_index": 0.0,
3475
+ "dapo/kept_prompts_ratio": 0.40579710317694623,
3476
+ "dapo/num_sampling_attempts": 2.875,
3477
+ "dapo/sampling_efficiency": 48.33333333333333,
3478
+ "dapo/total_prompts_processed": 17.25,
3479
+ "dapo/valid_prompts_collected": 6.0,
3480
+ "epoch": 0.20914285714285713,
3481
+ "grad_norm": 0.01093615498393774,
3482
+ "kl": 0.0005226731300354004,
3483
+ "learning_rate": 1.220245676671809e-07,
3484
+ "loss": -0.0097,
3485
+ "reward": 0.6296821031719446,
3486
+ "reward_std": 0.9496165588498116,
3487
+ "step": 183
3488
+ },
3489
+ {
3490
+ "clip_fraction": 0.0,
3491
+ "completion_length": 1855.0486297607422,
3492
+ "dapo/avg_reward_std": 0.3308859848976135,
3493
+ "dapo/filter_reward_index": 0.0,
3494
+ "dapo/kept_prompts_ratio": 0.4133333384990692,
3495
+ "dapo/num_sampling_attempts": 3.125,
3496
+ "dapo/sampling_efficiency": 41.666666666666664,
3497
+ "dapo/total_prompts_processed": 18.75,
3498
+ "dapo/valid_prompts_collected": 6.0,
3499
+ "epoch": 0.2102857142857143,
3500
+ "grad_norm": 0.013805963099002838,
3501
+ "kl": 0.0004195570945739746,
3502
+ "learning_rate": 1.1966285981663407e-07,
3503
+ "loss": 0.0542,
3504
+ "reward": 0.8230033777654171,
3505
+ "reward_std": 0.9269852489233017,
3506
+ "step": 184
3507
+ },
3508
+ {
3509
+ "clip_fraction": 0.0,
3510
+ "completion_length": 2737.260452270508,
3511
+ "dapo/avg_reward_std": 0.3074522775908311,
3512
+ "dapo/filter_reward_index": 0.0,
3513
+ "dapo/kept_prompts_ratio": 0.45138889489074546,
3514
+ "dapo/num_sampling_attempts": 3.0,
3515
+ "dapo/sampling_efficiency": 49.37499999999999,
3516
+ "dapo/total_prompts_processed": 18.0,
3517
+ "dapo/valid_prompts_collected": 6.0,
3518
+ "epoch": 0.21142857142857144,
3519
+ "grad_norm": 0.01179632730782032,
3520
+ "kl": 0.0006718635559082031,
3521
+ "learning_rate": 1.1743223682775649e-07,
3522
+ "loss": 0.0529,
3523
+ "reward": 0.6228375509381294,
3524
+ "reward_std": 0.9775977432727814,
3525
+ "step": 185
3526
+ },
3527
+ {
3528
+ "clip_fraction": 0.0,
3529
+ "completion_length": 2526.899368286133,
3530
+ "dapo/avg_reward_std": 0.2964219942688942,
3531
+ "dapo/filter_reward_index": 0.0,
3532
+ "dapo/kept_prompts_ratio": 0.48333334401249883,
3533
+ "dapo/num_sampling_attempts": 2.5,
3534
+ "dapo/sampling_efficiency": 58.33333333333333,
3535
+ "dapo/total_prompts_processed": 15.0,
3536
+ "dapo/valid_prompts_collected": 6.0,
3537
+ "epoch": 0.21257142857142858,
3538
+ "grad_norm": 0.014796112664043903,
3539
+ "kl": 0.0005816221237182617,
3540
+ "learning_rate": 1.1533337816991931e-07,
3541
+ "loss": 0.088,
3542
+ "reward": 0.8448536917567253,
3543
+ "reward_std": 0.9608767181634903,
3544
+ "step": 186
3545
+ },
3546
+ {
3547
+ "clip_fraction": 0.0,
3548
+ "completion_length": 2288.274345397949,
3549
+ "dapo/avg_reward_std": 0.3166468055159957,
3550
+ "dapo/filter_reward_index": 0.0,
3551
+ "dapo/kept_prompts_ratio": 0.34567901823255753,
3552
+ "dapo/num_sampling_attempts": 3.375,
3553
+ "dapo/sampling_efficiency": 40.972222222222214,
3554
+ "dapo/total_prompts_processed": 20.25,
3555
+ "dapo/valid_prompts_collected": 6.0,
3556
+ "epoch": 0.21371428571428572,
3557
+ "grad_norm": 0.011898735538125038,
3558
+ "kl": 0.000521540641784668,
3559
+ "learning_rate": 1.1336692317580158e-07,
3560
+ "loss": 0.0415,
3561
+ "reward": 0.7687236070632935,
3562
+ "reward_std": 0.9334599822759628,
3563
+ "step": 187
3564
+ },
3565
+ {
3566
+ "clip_fraction": 0.0,
3567
+ "completion_length": 2432.531265258789,
3568
+ "dapo/avg_reward_std": 0.28751447051763535,
3569
+ "dapo/filter_reward_index": 0.0,
3570
+ "dapo/kept_prompts_ratio": 0.4513888979951541,
3571
+ "dapo/num_sampling_attempts": 3.0,
3572
+ "dapo/sampling_efficiency": 53.33333333333333,
3573
+ "dapo/total_prompts_processed": 18.0,
3574
+ "dapo/valid_prompts_collected": 6.0,
3575
+ "epoch": 0.21485714285714286,
3576
+ "grad_norm": 0.010497819632291794,
3577
+ "kl": 0.0007112026214599609,
3578
+ "learning_rate": 1.1153347084664419e-07,
3579
+ "loss": 0.0185,
3580
+ "reward": 0.7899295631796122,
3581
+ "reward_std": 0.9512373134493828,
3582
+ "step": 188
3583
+ },
3584
+ {
3585
+ "clip_fraction": 0.0,
3586
+ "completion_length": 1948.9167022705078,
3587
+ "dapo/avg_reward_std": 0.30568089832862216,
3588
+ "dapo/filter_reward_index": 0.0,
3589
+ "dapo/kept_prompts_ratio": 0.46527779040237266,
3590
+ "dapo/num_sampling_attempts": 3.0,
3591
+ "dapo/sampling_efficiency": 36.87499999999999,
3592
+ "dapo/total_prompts_processed": 18.0,
3593
+ "dapo/valid_prompts_collected": 6.0,
3594
+ "epoch": 0.216,
3595
+ "grad_norm": 0.013562222942709923,
3596
+ "kl": 0.0006091594696044922,
3597
+ "learning_rate": 1.0983357966978745e-07,
3598
+ "loss": 0.0388,
3599
+ "reward": 0.6485428418964148,
3600
+ "reward_std": 0.9110815972089767,
3601
+ "step": 189
3602
+ },
3603
+ {
3604
+ "clip_fraction": 0.0,
3605
+ "completion_length": 2494.395866394043,
3606
+ "dapo/avg_reward_std": 0.27111421525478363,
3607
+ "dapo/filter_reward_index": 0.0,
3608
+ "dapo/kept_prompts_ratio": 0.3863636404275894,
3609
+ "dapo/num_sampling_attempts": 2.75,
3610
+ "dapo/sampling_efficiency": 45.20833333333333,
3611
+ "dapo/total_prompts_processed": 16.5,
3612
+ "dapo/valid_prompts_collected": 6.0,
3613
+ "epoch": 0.21714285714285714,
3614
+ "grad_norm": 0.00931188277900219,
3615
+ "kl": 0.0006044209003448486,
3616
+ "learning_rate": 1.0826776744855121e-07,
3617
+ "loss": 0.0024,
3618
+ "reward": 0.5944220442324877,
3619
+ "reward_std": 0.9433802142739296,
3620
+ "step": 190
3621
+ },
3622
+ {
3623
+ "clip_fraction": 0.0,
3624
+ "completion_length": 2601.7569427490234,
3625
+ "dapo/avg_reward_std": 0.3233232215046883,
3626
+ "dapo/filter_reward_index": 0.0,
3627
+ "dapo/kept_prompts_ratio": 0.49166667386889457,
3628
+ "dapo/num_sampling_attempts": 2.5,
3629
+ "dapo/sampling_efficiency": 49.375,
3630
+ "dapo/total_prompts_processed": 15.0,
3631
+ "dapo/valid_prompts_collected": 6.0,
3632
+ "epoch": 0.21828571428571428,
3633
+ "grad_norm": 0.011869938112795353,
3634
+ "kl": 0.0006383061408996582,
3635
+ "learning_rate": 1.068365111445064e-07,
3636
+ "loss": 0.0221,
3637
+ "reward": 0.5644997656345367,
3638
+ "reward_std": 0.9473884925246239,
3639
+ "step": 191
3640
+ },
3641
+ {
3642
+ "clip_fraction": 0.0,
3643
+ "completion_length": 1624.8541564941406,
3644
+ "dapo/avg_reward_std": 0.33193936944007874,
3645
+ "dapo/filter_reward_index": 0.0,
3646
+ "dapo/kept_prompts_ratio": 0.46969697692177514,
3647
+ "dapo/num_sampling_attempts": 2.75,
3648
+ "dapo/sampling_efficiency": 44.791666666666664,
3649
+ "dapo/total_prompts_processed": 16.5,
3650
+ "dapo/valid_prompts_collected": 6.0,
3651
+ "epoch": 0.21942857142857142,
3652
+ "grad_norm": 0.011828861199319363,
3653
+ "kl": 0.0003381967544555664,
3654
+ "learning_rate": 1.0554024673218806e-07,
3655
+ "loss": -0.0125,
3656
+ "reward": 0.7034952798858285,
3657
+ "reward_std": 0.9275326952338219,
3658
+ "step": 192
3659
+ },
3660
+ {
3661
+ "clip_fraction": 0.0,
3662
+ "completion_length": 2333.607650756836,
3663
+ "dapo/avg_reward_std": 0.4260722654206412,
3664
+ "dapo/filter_reward_index": 0.0,
3665
+ "dapo/kept_prompts_ratio": 0.6309523891125407,
3666
+ "dapo/num_sampling_attempts": 1.75,
3667
+ "dapo/sampling_efficiency": 70.83333333333333,
3668
+ "dapo/total_prompts_processed": 10.5,
3669
+ "dapo/valid_prompts_collected": 6.0,
3670
+ "epoch": 0.22057142857142858,
3671
+ "grad_norm": 0.010871903039515018,
3672
+ "kl": 0.0005550980567932129,
3673
+ "learning_rate": 1.0437936906629334e-07,
3674
+ "loss": -0.004,
3675
+ "reward": 0.4316184278577566,
3676
+ "reward_std": 0.9555172920227051,
3677
+ "step": 193
3678
+ },
3679
+ {
3680
+ "clip_fraction": 0.0,
3681
+ "completion_length": 2939.9097442626953,
3682
+ "dapo/avg_reward_std": 0.2783619257119986,
3683
+ "dapo/filter_reward_index": 0.0,
3684
+ "dapo/kept_prompts_ratio": 0.3846153932122084,
3685
+ "dapo/num_sampling_attempts": 3.25,
3686
+ "dapo/sampling_efficiency": 47.39583333333333,
3687
+ "dapo/total_prompts_processed": 19.5,
3688
+ "dapo/valid_prompts_collected": 6.0,
3689
+ "epoch": 0.22171428571428572,
3690
+ "grad_norm": 0.014206220395863056,
3691
+ "kl": 0.0007078647613525391,
3692
+ "learning_rate": 1.0335423176140511e-07,
3693
+ "loss": 0.0805,
3694
+ "reward": 0.7283875979483128,
3695
+ "reward_std": 0.9719515442848206,
3696
+ "step": 194
3697
+ },
3698
+ {
3699
+ "clip_fraction": 0.0,
3700
+ "completion_length": 1945.9653244018555,
3701
+ "dapo/avg_reward_std": 0.3208765654187453,
3702
+ "dapo/filter_reward_index": 0.0,
3703
+ "dapo/kept_prompts_ratio": 0.5438596551355562,
3704
+ "dapo/num_sampling_attempts": 2.375,
3705
+ "dapo/sampling_efficiency": 60.416666666666664,
3706
+ "dapo/total_prompts_processed": 14.25,
3707
+ "dapo/valid_prompts_collected": 6.0,
3708
+ "epoch": 0.22285714285714286,
3709
+ "grad_norm": 0.015090257860720158,
3710
+ "kl": 0.000569462776184082,
3711
+ "learning_rate": 1.0246514708427701e-07,
3712
+ "loss": -0.021,
3713
+ "reward": 0.5579635920003057,
3714
+ "reward_std": 0.9634370356798172,
3715
+ "step": 195
3716
+ },
3717
+ {
3718
+ "clip_fraction": 0.0,
3719
+ "completion_length": 2212.5902709960938,
3720
+ "dapo/avg_reward_std": 0.23615881362382105,
3721
+ "dapo/filter_reward_index": 0.0,
3722
+ "dapo/kept_prompts_ratio": 0.2696078498573864,
3723
+ "dapo/num_sampling_attempts": 4.25,
3724
+ "dapo/sampling_efficiency": 27.916666666666664,
3725
+ "dapo/total_prompts_processed": 25.5,
3726
+ "dapo/valid_prompts_collected": 6.0,
3727
+ "epoch": 0.224,
3728
+ "grad_norm": 0.012650169432163239,
3729
+ "kl": 0.0005346536636352539,
3730
+ "learning_rate": 1.017123858587145e-07,
3731
+ "loss": 0.0756,
3732
+ "reward": 0.6994661018252373,
3733
+ "reward_std": 0.9281085133552551,
3734
+ "step": 196
3735
+ },
3736
+ {
3737
+ "clip_fraction": 0.0,
3738
+ "completion_length": 2392.7742919921875,
3739
+ "dapo/avg_reward_std": 0.3088900530338287,
3740
+ "dapo/filter_reward_index": 0.0,
3741
+ "dapo/kept_prompts_ratio": 0.406666676402092,
3742
+ "dapo/num_sampling_attempts": 3.125,
3743
+ "dapo/sampling_efficiency": 45.3125,
3744
+ "dapo/total_prompts_processed": 18.75,
3745
+ "dapo/valid_prompts_collected": 6.0,
3746
+ "epoch": 0.22514285714285714,
3747
+ "grad_norm": 0.01346337329596281,
3748
+ "kl": 0.0006176233291625977,
3749
+ "learning_rate": 1.0109617738307911e-07,
3750
+ "loss": 0.0523,
3751
+ "reward": 0.6644653081893921,
3752
+ "reward_std": 0.9385305866599083,
3753
+ "step": 197
3754
+ },
3755
+ {
3756
+ "clip_fraction": 0.0,
3757
+ "completion_length": 2743.819465637207,
3758
+ "dapo/avg_reward_std": 0.3153854298591614,
3759
+ "dapo/filter_reward_index": 0.0,
3760
+ "dapo/kept_prompts_ratio": 0.4133333432674408,
3761
+ "dapo/num_sampling_attempts": 3.125,
3762
+ "dapo/sampling_efficiency": 43.75,
3763
+ "dapo/total_prompts_processed": 18.75,
3764
+ "dapo/valid_prompts_collected": 6.0,
3765
+ "epoch": 0.22628571428571428,
3766
+ "grad_norm": 0.010797293856739998,
3767
+ "kl": 0.000672459602355957,
3768
+ "learning_rate": 1.0061670936044178e-07,
3769
+ "loss": 0.04,
3770
+ "reward": 0.5658168056979775,
3771
+ "reward_std": 0.9682240337133408,
3772
+ "step": 198
3773
+ },
3774
+ {
3775
+ "clip_fraction": 0.0,
3776
+ "completion_length": 2336.80558013916,
3777
+ "dapo/avg_reward_std": 0.3246711401835732,
3778
+ "dapo/filter_reward_index": 0.0,
3779
+ "dapo/kept_prompts_ratio": 0.4855072530715362,
3780
+ "dapo/num_sampling_attempts": 2.875,
3781
+ "dapo/sampling_efficiency": 41.666666666666664,
3782
+ "dapo/total_prompts_processed": 17.25,
3783
+ "dapo/valid_prompts_collected": 6.0,
3784
+ "epoch": 0.22742857142857142,
3785
+ "grad_norm": 0.011765834875404835,
3786
+ "kl": 0.00055694580078125,
3787
+ "learning_rate": 1.002741278414069e-07,
3788
+ "loss": 0.0308,
3789
+ "reward": 0.6460054386407137,
3790
+ "reward_std": 0.9711420610547066,
3791
+ "step": 199
3792
+ },
3793
+ {
3794
+ "clip_fraction": 0.0,
3795
+ "completion_length": 2571.1875228881836,
3796
+ "dapo/avg_reward_std": 0.29997331152359646,
3797
+ "dapo/filter_reward_index": 0.0,
3798
+ "dapo/kept_prompts_ratio": 0.486111119389534,
3799
+ "dapo/num_sampling_attempts": 3.0,
3800
+ "dapo/sampling_efficiency": 39.285714285714285,
3801
+ "dapo/total_prompts_processed": 18.0,
3802
+ "dapo/valid_prompts_collected": 6.0,
3803
+ "epoch": 0.22857142857142856,
3804
+ "grad_norm": 0.009876573458313942,
3805
+ "kl": 0.0005443096160888672,
3806
+ "learning_rate": 1.0006853717962393e-07,
3807
+ "loss": 0.0268,
3808
+ "reward": 0.5957941338419914,
3809
+ "reward_std": 0.992652915418148,
3810
+ "step": 200
3811
+ },
3812
+ {
3813
+ "epoch": 0.22857142857142856,
3814
+ "step": 200,
3815
  "total_flos": 0.0,
3816
+ "train_loss": 0.01698429927288089,
3817
+ "train_runtime": 137940.7556,
3818
+ "train_samples_per_second": 0.07,
3819
  "train_steps_per_second": 0.001
3820
  }
3821
  ],
3822
  "logging_steps": 1,
3823
+ "max_steps": 200,
3824
  "num_input_tokens_seen": 0,
3825
  "num_train_epochs": 1,
3826
  "save_steps": 10,