penfever commited on
Commit
ccdce57
·
verified ·
1 Parent(s): c989cdf

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ training_logs/20260427_180636_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
38
+ training_logs/a2-rl-stack_jest_v2_388828.out filter=lfs diff=lfs merge=lfs -text
training_logs/20260427_180636_metrics_a2-rl-stack_jest_v2_388828.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,timing/save_hf_model
2
+ 0.0,0,64,512,0,0.0,0,0.0,4662.2324,4028.125,4693.418,19909,1041,2498.6082,0.0469,0.0,0.0,0.0,0.197,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,15.8079,73.3613,349.227,59.3,857.9687,508.7417,8.3123,0.0911,3.8976,78.9876,674.5257,753.8845,52.0858,3548.2087,53.7378,674.8055,2736.6885,0,1,128.0,1024.0,1024.0,0.0,0.0,,,
3
+ 0.0,0,64,512,1,1.0,1,1.0,6064.8848,3838.1212,6218.2944,24241,1179,3490.2217,0.0645,-0.0014,0.01,-0.0,0.2042,-0.0,0.0,1.0,0.0,0.0112,0.0781,0.0645,18.9903,73.5099,384.7603,55.2,857.9687,473.2084,0.0094,0.0789,4.12,87.6227,757.9733,846.0369,52.3647,2579.1726,54.0587,758.335,1674.935,0,2,64.0,512.0,512.0,0.0,0.0,,,
4
+ 0.0,0,64,512,2,2.0,2,1.0,5474.791,5108.55,5542.6134,20121,1267,3050.0682,0.1562,0.0,0.0,0.0,0.1968,0.0,0.0,1.0,0.0,0.0,0.1562,0.1562,19.3334,73.7449,368.8224,57.0,857.9687,489.1463,0.0073,0.046,3.6227,82.308,744.5498,827.2625,53.8506,1382.7589,53.8402,744.9082,498.0035,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,
5
+ 0.0,0,64,512,3,3.0,3,1.0,5699.1973,3966.5185,5903.4869,23161,1,3770.7162,0.1055,0.0008,0.0051,0.0,0.1887,0.0,0.0,1.0,0.0,0.0067,0.1094,0.1055,19.666,73.957,363.7578,57.6,857.9687,494.2109,0.0071,0.083,3.9571,102.5779,806.9924,909.9862,52.7547,1728.7174,55.8091,807.3251,758.9374,0,4,64.0,512.0,512.0,0.0,0.0,,,
6
+ 0.0,0,64,512,4,4.0,4,1.0,5744.0078,6146.575,5709.8919,25696,1344,3513.2835,0.0781,0.0,0.0,0.0,0.1856,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,21.2582,74.2703,362.1384,57.8,857.9687,495.8303,0.0112,0.0602,4.4491,86.3703,782.6672,869.4493,53.9546,1540.5181,53.4133,783.0185,613.1685,0,5,64.0,512.0,512.0,0.0,0.0,,,37.877
7
+ 0.0,0,64,512,5,5.0,5,1.0,6107.5117,4430.6042,6280.9849,22958,1479,3691.5944,0.0938,0.0,0.0,0.0,0.1853,0.0,0.0,1.0,0.0,0.0,0.0938,0.0938,21.3301,74.3124,356.7458,58.4,857.9687,501.2229,0.0087,0.0515,4.6763,106.7279,820.4237,927.5826,52.5039,1456.9998,50.9701,820.8028,473.7404,0,6,64.0,512.0,512.0,0.0,0.0,,,
8
+ 0.0,0,64,512,6,6.0,6,1.0,7806.5371,4340.1667,7977.0143,30825,988,6175.2351,0.0469,0.0,0.0,0.0,0.1842,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.509,74.6829,381.4424,55.5,857.9687,476.5263,0.0124,0.1095,5.0382,186.7453,1098.6498,1286.0848,52.0406,6616.2683,12.4455,1099.2296,5312.6625,0,7,64.0,512.0,512.0,0.0,0.0,,,
9
+ 0.0,0,64,512,0,0.0,0,0.0,4898.9766,4360.25,4925.4713,16910,978,2678.3845,0.0469,0.0,0.0,0.0,0.1733,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.4579,74.6854,390.0168,54.5,857.9687,467.9518,0.0079,0.0447,3.1642,82.9256,676.4665,759.7588,52.5991,4268.5747,53.762,676.7881,3451.851,1,8,64.0,512.0,512.0,0.0,0.0,,,
10
+ 0.0,0,64,512,1,1.0,1,1.0,5547.7832,4123.2174,5769.6682,20143,1399,3225.9525,0.1348,0.0003,0.0054,-0.0,0.178,-0.0,0.0,1.0,0.0,0.01,0.1406,0.1348,25.6384,74.7434,390.2228,54.5,857.9687,467.7458,0.0088,0.0466,3.8325,85.8132,723.6436,809.8543,52.6413,1858.802,53.523,723.9942,991.5653,1,9,64.0,512.0,512.0,0.0,0.0,,,
11
+ 0.0,0,64,512,2,2.0,2,1.0,5534.4395,4362.7857,5678.3268,18891,1200,3017.3224,0.1094,0.0,0.0,0.0,0.179,0.0,0.0,1.0,0.0,0.0,0.1094,0.1094,25.8958,74.9708,411.3132,52.1,857.9687,446.6555,0.0087,0.0435,3.3808,81.611,702.3105,784.3806,52.7175,1563.2701,51.7063,702.7258,723.772,1,10,64.0,512.0,512.0,0.0,0.0,,,36.9492
12
+ 0.0,0,64,512,3,3.0,3,1.0,6088.8008,4722.359,6201.4672,19902,1146,3677.7865,0.0762,0.0003,0.0028,-0.0,0.1789,-0.0,0.0,1.0,0.0,0.0043,0.0781,0.0762,25.9409,74.9533,400.013,53.4,857.9687,457.9557,0.0078,0.0472,3.8067,98.8787,806.3553,905.6134,53.8827,1793.5863,52.8772,806.6871,831.2583,1,11,64.0,512.0,512.0,0.0,0.0,,,
13
+ 0.0,0,64,512,4,4.0,4,1.0,5448.5254,5549.35,5439.9809,21237,1272,3397.5694,0.0781,0.0,0.0,0.0,0.1734,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,26.3522,75.2733,393.5605,54.1,857.9687,464.4082,0.0076,0.0491,3.8599,82.5733,764.0154,847.0158,53.3604,1808.5047,52.2366,764.393,905.3587,1,12,64.0,512.0,512.0,0.0,0.0,,,
14
+ 0.0,0,64,512,5,5.0,5,1.0,6116.7676,4422.8214,6324.7961,22128,1347,3878.4717,0.1094,0.0,0.0,0.0,0.1796,0.0,0.0,1.0,0.0,0.0,0.1094,0.1094,26.7385,75.5441,390.6353,54.5,857.9687,467.3334,0.0078,0.0509,3.931,90.8837,796.749,888.0035,52.3352,1472.4338,52.5954,797.0685,527.8742,1,13,64.0,512.0,512.0,0.0,0.0,,,
15
+ 0.0,0,64,512,6,6.0,6,1.0,8099.6328,6103.6087,8193.5153,30165,1466,6102.5258,0.0449,0.0005,0.0028,-0.0,0.1791,-0.0,0.0,1.0,0.0,0.0035,0.0469,0.0449,27.2599,75.8359,386.1821,55.0,857.9687,471.7866,0.0095,0.08,5.3075,198.1348,1094.446,1292.9979,52.0547,6381.7149,12.5196,1094.7827,5070.8578,1,14,,,,,,,,
training_logs/20260427_180636_metrics_report.md ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SkyRL Training Metrics Analysis
2
+
3
+ Generated from 1 log files
4
+
5
+ ## Overview
6
+
7
+ | Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
8
+ |----------|-------------|---------------|---------------------|-------------------|----------------|
9
+ | a2-rl-stack_jest_v2_388828 | 14 | 14 | 0.0851 | 0.1562 | 37999.5 |
10
+
11
+ ## Async Metrics
12
+
13
+ | | Mean | Std | Min | Max | Count |
14
+ |:------------------------------|-----------:|---------:|------:|------:|--------:|
15
+ | async/discard_rate | 0 | 0 | 0 | 0 | 14 |
16
+ | async/discarded_count | 0 | 0 | 0 | 0 | 14 |
17
+ | async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
18
+ | async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
19
+ | async/staleness_max | 3 | 2.0755 | 0 | 6 | 14 |
20
+ | async/staleness_mean | 3 | 2.0755 | 0 | 6 | 14 |
21
+ | async/staleness_min | 3 | 2.0755 | 0 | 6 | 14 |
22
+ | async/staleness_ratio | 0.857143 | 0.363137 | 0 | 1 | 14 |
23
+
24
+ ## Generate Metrics
25
+
26
+ | | Mean | Std | Min | Max | Count |
27
+ |:-------------------------------------|---------:|---------:|---------:|---------:|--------:|
28
+ | generate/avg_num_tokens | 5949.58 | 954.053 | 4662.23 | 8099.63 | 14 |
29
+ | generate/avg_tokens_non_zero_rewards | 4678.79 | 760.473 | 3838.12 | 6146.57 | 14 |
30
+ | generate/avg_tokens_zero_rewards | 6061.35 | 983.506 | 4693.42 | 8193.52 | 14 |
31
+ | generate/max_num_tokens | 22591.9 | 4043.19 | 16910 | 30825 | 14 |
32
+ | generate/min_num_tokens | 1150.5 | 368.613 | 1 | 1479 | 14 |
33
+ | generate/std_num_tokens | 3726.27 | 1098.85 | 2498.61 | 6175.24 | 14 |
34
+
35
+ ## Loss Metrics
36
+
37
+ | | Mean | Std | Min | Max | Count |
38
+ |:----------------------------|------------:|------------:|--------:|-------:|--------:|
39
+ | loss/avg_final_rewards | 0.0851143 | 0.0348551 | 0.0449 | 0.1562 | 14 |
40
+ | loss/avg_raw_advantages | 3.57143e-05 | 0.000481356 | -0.0014 | 0.0008 | 14 |
41
+ | loss/avg_raw_advantages_abs | 0.00186429 | 0.00306584 | 0 | 0.01 | 14 |
42
+
43
+ ## Policy Metrics
44
+
45
+ | | Mean | Std | Min | Max | Count |
46
+ |:---------------------------|---------:|-----------:|--------:|-------:|--------:|
47
+ | policy/final_loss | 0 | 0 | -0 | 0 | 14 |
48
+ | policy/policy_entropy | 0.184507 | 0.00928559 | 0.1733 | 0.2042 | 14 |
49
+ | policy/policy_loss | 0 | 0 | -0 | 0 | 14 |
50
+ | policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
51
+ | policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
52
+ | policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
53
+ | policy/raw_grad_norm | 0.00255 | 0.00401952 | 0 | 0.0112 | 14 |
54
+
55
+ ## Reward Metrics
56
+
57
+ | | Mean | Std | Min | Max | Count |
58
+ |:----------------------|----------:|----------:|-------:|-------:|--------:|
59
+ | reward/avg_pass_at_8 | 0.0870571 | 0.0350406 | 0.0469 | 0.1562 | 14 |
60
+ | reward/avg_raw_reward | 0.0851143 | 0.0348551 | 0.0449 | 0.1562 | 14 |
61
+
62
+ ## System Metrics
63
+
64
+ | | Mean | Std | Min | Max | Count |
65
+ |:------------------------|---------:|-------------:|---------:|---------:|--------:|
66
+ | system/process_rss_gb | 23.227 | 3.69154 | 15.8079 | 27.2599 | 14 |
67
+ | system/process_vms_gb | 74.5603 | 0.744513 | 73.3613 | 75.8359 | 14 |
68
+ | system/ram_available_gb | 380.631 | 17.8364 | 349.227 | 411.313 | 14 |
69
+ | system/ram_percent | 55.6357 | 2.07201 | 52.1 | 59.3 | 14 |
70
+ | system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
71
+ | system/ram_used_gb | 477.337 | 17.8364 | 446.656 | 508.742 | 14 |
72
+
73
+ ## Timing Metrics
74
+
75
+ | | Mean | Std | Min | Max | Count |
76
+ |:--------------------------------------|-------------:|-------------:|----------:|----------:|--------:|
77
+ | timing/cleanup_old_checkpoints | 0.601893 | 2.21921 | 0.0071 | 8.3123 | 14 |
78
+ | timing/compute_advantages_and_returns | 0.0630143 | 0.0212967 | 0.0435 | 0.1095 | 14 |
79
+ | timing/convert_to_training_input | 4.07454 | 0.601892 | 3.1642 | 5.3075 | 14 |
80
+ | timing/fwd_logprobs_values_reward | 103.726 | 38.5559 | 78.9876 | 198.135 | 14 |
81
+ | timing/policy_train | 803.555 | 132.736 | 674.526 | 1098.65 | 14 |
82
+ | timing/run_training | 907.708 | 170.557 | 753.885 | 1293 | 14 |
83
+ | timing/save_checkpoints | 52.7961 | 0.686843 | 52.0406 | 53.9546 | 14 |
84
+ | timing/step | 2714.25 | 1813.94 | 1382.76 | 6616.27 | 14 |
85
+ | timing/sync_weights | 47.3925 | 14.8349 | 12.4455 | 55.8091 | 14 |
86
+ | timing/train_critic_and_policy | 803.919 | 132.775 | 674.806 | 1099.23 | 14 |
87
+ | timing/wait_for_generation_buffer | 1755.05 | 1703.98 | 473.74 | 5312.66 | 14 |
88
+ | timing/save_hf_model | 37.4131 | 0.656054 | 36.9492 | 37.877 | 2 |
89
+
90
+ ## Trainer Metrics
91
+
92
+ | | Mean | Std | Min | Max | Count |
93
+ |:--------------------|-------:|---------:|------:|------:|--------:|
94
+ | trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
95
+ | trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
96
+
97
+ ## Batch_Errors Metrics
98
+
99
+ | | Mean | Std | Min | Max | Count |
100
+ |:----------------------------------------|------------:|----------:|----------:|-----------:|--------:|
101
+ | batch_errors/total_batches | 68.9231 | 17.7504 | 64 | 128 | 13 |
102
+ | batch_errors/total_instances | 551.385 | 142.003 | 512 | 1024 | 13 |
103
+ | batch_errors/total_successful | 551.231 | 142.051 | 510 | 1024 | 13 |
104
+ | batch_errors/total_failed | 0.0769231 | 0.27735 | 0 | 1 | 13 |
105
+ | batch_errors/total_masked | 0 | 0 | 0 | 0 | 13 |
106
+ | batch_errors/avg_VerifierRuntimeError | 0.03125 | nan | 0.03125 | 0.03125 | 1 |
107
+ | batch_errors/total_VerifierRuntimeError | 2 | nan | 2 | 2 | 1 |
108
+
109
+ ## Training Progression by Log
110
+
111
+ ### a2-rl-stack_jest_v2_388828
112
+
113
+ | Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
114
+ |------|--------|--------|-----|------|---------------|-------------|
115
+ | 1 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 3548.2 | 2736.7 |
116
+ | 2 | 0.0645 | 0.0781 | 0.000000 | -0.0000 | 2579.2 | 1674.9 |
117
+ | 3 | 0.1562 | 0.1562 | 0.000000 | 0.0000 | 1382.8 | 498.0 |
118
+ | 4 | 0.1055 | 0.1094 | 0.000000 | 0.0000 | 1728.7 | 758.9 |
119
+ | 5 | 0.0781 | 0.0781 | 0.000000 | 0.0000 | 1540.5 | 613.2 |
120
+ | 6 | 0.0938 | 0.0938 | 0.000000 | 0.0000 | 1457.0 | 473.7 |
121
+ | 7 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 6616.3 | 5312.7 |
122
+ | 8 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 4268.6 | 3451.9 |
123
+ | 9 | 0.1348 | 0.1406 | 0.000000 | -0.0000 | 1858.8 | 991.6 |
124
+ | 10 | 0.1094 | 0.1094 | 0.000000 | 0.0000 | 1563.3 | 723.8 |
125
+ | 11 | 0.0762 | 0.0781 | 0.000000 | -0.0000 | 1793.6 | 831.3 |
126
+ | 12 | 0.0781 | 0.0781 | 0.000000 | 0.0000 | 1808.5 | 905.4 |
127
+ | 13 | 0.1094 | 0.1094 | 0.000000 | 0.0000 | 1472.4 | 527.9 |
128
+ | 14 | 0.0449 | 0.0469 | 0.000000 | -0.0000 | 6381.7 | 5070.9 |
129
+
130
+ ## Timing Analysis
131
+
132
+ ### Average Time Breakdown (% of step time)
133
+
134
+ | Component | Avg % of Step Time |
135
+ |-----------|-------------------|
136
+ | wait_for_generation_buffer | 54.8% |
137
+ | run_training | 42.5% |
138
+ | train_critic_and_policy | 37.9% |
139
+ | policy_train | 37.9% |
140
+ | fwd_logprobs_values_reward | 4.6% |
141
+ | save_checkpoints | 2.6% |
142
+ | sync_weights | 2.5% |
143
+ | save_hf_model | 2.4% |
144
+ | convert_to_training_input | 0.2% |
145
+ | cleanup_old_checkpoints | 0.0% |
146
+ | compute_advantages_and_returns | 0.0% |
147
+
148
+ ## vLLM Inference Engine Analysis
149
+
150
+ Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
151
+
152
+ > **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
153
+ > so we typically capture stats from one engine per timestamp. The stats shown are
154
+ > **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
155
+
156
+ ### Summary by Log (Per-Engine Stats)
157
+
158
+ | Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
159
+ |-----|-------------------|-------------------|--------------------------|----------------|------------------|
160
+ | a2-rl-stack_jest_v2_388828 | 3.5 | 0.0 | 71.1 tok/s | 7.5% | 88.4% |
161
+
162
+ ### Utilization Analysis (Per-Engine)
163
+
164
+ Key indicators of inference engine utilization:
165
+
166
+ - **Running requests/engine**: Concurrent requests being processed by each engine
167
+ - **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
168
+ - **Generation throughput**: Decode tokens/sec per engine
169
+ - 8B model on H100 can do **1000+ tok/s** when saturated
170
+ - If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
171
+
172
+ #### a2-rl-stack_jest_v2_388828
173
+
174
+ - **Running requests/engine**: avg=3.5, max=15
175
+ - **Waiting requests**: avg=0.0, max=0
176
+ - **Generation throughput/engine**: avg=71.1 tok/s, max=327.9 tok/s
177
+ - **KV cache usage**: avg=7.5%
178
+ - **Prefix cache hit rate**: avg=88.4%
179
+ - ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 3.5 running)
180
+ - Bottleneck is likely upstream (environment execution, not inference)
181
+
182
+ ## Trial-Level Analysis (from result.json)
183
+
184
+ Total trials parsed: 7168
185
+
186
+ ### Turn Count Statistics
187
+
188
+ | Metric | Value |
189
+ |--------|-------|
190
+ | Mean | 3.2 |
191
+ | Median | 2.0 |
192
+ | Std | 2.4 |
193
+ | Min | 2 |
194
+ | Max | 34 |
195
+ | Count | 7168 |
196
+
197
+ ### Exception Distribution
198
+
199
+ | Exception Type | Count | % |
200
+ |---------------|-------|---|
201
+ | No exception | 7098 | 99.0% |
202
+ | ContextLengthExceededError | 38 | 0.5% |
203
+ | AgentTimeoutError | 30 | 0.4% |
204
+ | VerifierRuntimeError | 2 | 0.0% |
205
+
206
+ ### Turn Count by Exception Type
207
+
208
+ | Exception Type | Mean Turns | Median Turns | Count |
209
+ |---------------|-----------|-------------|-------|
210
+ | ContextLengthExceededError | 19.6 | 19.0 | 38 |
211
+ | AgentTimeoutError | 13.7 | 12.0 | 30 |
212
+ | No exception | 3.1 | 2.0 | 7098 |
213
+ | VerifierRuntimeError | 2.0 | 2.0 | 2 |
214
+
215
+ ### Turn Count by Outcome
216
+
217
+ | Outcome | Mean Turns | Median Turns | Count |
218
+ |---------|-----------|-------------|-------|
219
+ | Success | 2.6 | 2.0 | 610 |
220
+ | Failure | 3.3 | 2.0 | 6556 |
221
+
222
+ ### Reward Summary
223
+
224
+ - Mean reward: 0.0851
225
+ - Success rate: 8.5%
226
+ - Trials with reward data: 7166
227
+
training_logs/20260427_180636_metrics_table.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,timing/save_hf_model,global_step
2
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,0,0.0,0,0.0,4662.2324,4028.125,4693.418,19909,1041,2498.6082,0.0469,0.0,0.0,0.0,0.197,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,15.8079,73.3613,349.227,59.3,857.9687,508.7417,8.3123,0.0911,3.8976,78.9876,674.5257,753.8845,52.0858,3548.2087,53.7378,674.8055,2736.6885,0,1,128.0,1024.0,1024.0,0.0,0.0,,,,1
3
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,1,1.0,1,1.0,6064.8848,3838.1212,6218.2944,24241,1179,3490.2217,0.0645,-0.0014,0.01,-0.0,0.2042,-0.0,0.0,1.0,0.0,0.0112,0.0781,0.0645,18.9903,73.5099,384.7603,55.2,857.9687,473.2084,0.0094,0.0789,4.12,87.6227,757.9733,846.0369,52.3647,2579.1726,54.0587,758.335,1674.935,0,2,64.0,512.0,512.0,0.0,0.0,,,,2
4
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,2,2.0,2,1.0,5474.791,5108.55,5542.6134,20121,1267,3050.0682,0.1562,0.0,0.0,0.0,0.1968,0.0,0.0,1.0,0.0,0.0,0.1562,0.1562,19.3334,73.7449,368.8224,57.0,857.9687,489.1463,0.0073,0.046,3.6227,82.308,744.5498,827.2625,53.8506,1382.7589,53.8402,744.9082,498.0035,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,,3
5
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,3,3.0,3,1.0,5699.1973,3966.5185,5903.4869,23161,1,3770.7162,0.1055,0.0008,0.0051,0.0,0.1887,0.0,0.0,1.0,0.0,0.0067,0.1094,0.1055,19.666,73.957,363.7578,57.6,857.9687,494.2109,0.0071,0.083,3.9571,102.5779,806.9924,909.9862,52.7547,1728.7174,55.8091,807.3251,758.9374,0,4,64.0,512.0,512.0,0.0,0.0,,,,4
6
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,4,4.0,4,1.0,5744.0078,6146.575,5709.8919,25696,1344,3513.2835,0.0781,0.0,0.0,0.0,0.1856,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,21.2582,74.2703,362.1384,57.8,857.9687,495.8303,0.0112,0.0602,4.4491,86.3703,782.6672,869.4493,53.9546,1540.5181,53.4133,783.0185,613.1685,0,5,64.0,512.0,512.0,0.0,0.0,,,37.877,5
7
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,5,5.0,5,1.0,6107.5117,4430.6042,6280.9849,22958,1479,3691.5944,0.0938,0.0,0.0,0.0,0.1853,0.0,0.0,1.0,0.0,0.0,0.0938,0.0938,21.3301,74.3124,356.7458,58.4,857.9687,501.2229,0.0087,0.0515,4.6763,106.7279,820.4237,927.5826,52.5039,1456.9998,50.9701,820.8028,473.7404,0,6,64.0,512.0,512.0,0.0,0.0,,,,6
8
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,6,6.0,6,1.0,7806.5371,4340.1667,7977.0143,30825,988,6175.2351,0.0469,0.0,0.0,0.0,0.1842,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.509,74.6829,381.4424,55.5,857.9687,476.5263,0.0124,0.1095,5.0382,186.7453,1098.6498,1286.0848,52.0406,6616.2683,12.4455,1099.2296,5312.6625,0,7,64.0,512.0,512.0,0.0,0.0,,,,7
9
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,0,0.0,0,0.0,4898.9766,4360.25,4925.4713,16910,978,2678.3845,0.0469,0.0,0.0,0.0,0.1733,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.4579,74.6854,390.0168,54.5,857.9687,467.9518,0.0079,0.0447,3.1642,82.9256,676.4665,759.7588,52.5991,4268.5747,53.762,676.7881,3451.851,1,8,64.0,512.0,512.0,0.0,0.0,,,,8
10
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,1,1.0,1,1.0,5547.7832,4123.2174,5769.6682,20143,1399,3225.9525,0.1348,0.0003,0.0054,-0.0,0.178,-0.0,0.0,1.0,0.0,0.01,0.1406,0.1348,25.6384,74.7434,390.2228,54.5,857.9687,467.7458,0.0088,0.0466,3.8325,85.8132,723.6436,809.8543,52.6413,1858.802,53.523,723.9942,991.5653,1,9,64.0,512.0,512.0,0.0,0.0,,,,9
11
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,2,2.0,2,1.0,5534.4395,4362.7857,5678.3268,18891,1200,3017.3224,0.1094,0.0,0.0,0.0,0.179,0.0,0.0,1.0,0.0,0.0,0.1094,0.1094,25.8958,74.9708,411.3132,52.1,857.9687,446.6555,0.0087,0.0435,3.3808,81.611,702.3105,784.3806,52.7175,1563.2701,51.7063,702.7258,723.772,1,10,64.0,512.0,512.0,0.0,0.0,,,36.9492,10
12
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,3,3.0,3,1.0,6088.8008,4722.359,6201.4672,19902,1146,3677.7865,0.0762,0.0003,0.0028,-0.0,0.1789,-0.0,0.0,1.0,0.0,0.0043,0.0781,0.0762,25.9409,74.9533,400.013,53.4,857.9687,457.9557,0.0078,0.0472,3.8067,98.8787,806.3553,905.6134,53.8827,1793.5863,52.8772,806.6871,831.2583,1,11,64.0,512.0,512.0,0.0,0.0,,,,11
13
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,4,4.0,4,1.0,5448.5254,5549.35,5439.9809,21237,1272,3397.5694,0.0781,0.0,0.0,0.0,0.1734,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,26.3522,75.2733,393.5605,54.1,857.9687,464.4082,0.0076,0.0491,3.8599,82.5733,764.0154,847.0158,53.3604,1808.5047,52.2366,764.393,905.3587,1,12,64.0,512.0,512.0,0.0,0.0,,,,12
14
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,5,5.0,5,1.0,6116.7676,4422.8214,6324.7961,22128,1347,3878.4717,0.1094,0.0,0.0,0.0,0.1796,0.0,0.0,1.0,0.0,0.0,0.1094,0.1094,26.7385,75.5441,390.6353,54.5,857.9687,467.3334,0.0078,0.0509,3.931,90.8837,796.749,888.0035,52.3352,1472.4338,52.5954,797.0685,527.8742,1,13,64.0,512.0,512.0,0.0,0.0,,,,13
15
+ a2-rl-stack_jest_v2_388828,0.0,0,64,512,6,6.0,6,1.0,8099.6328,6103.6087,8193.5153,30165,1466,6102.5258,0.0449,0.0005,0.0028,-0.0,0.1791,-0.0,0.0,1.0,0.0,0.0035,0.0469,0.0449,27.2599,75.8359,386.1821,55.0,857.9687,471.7866,0.0095,0.08,5.3075,198.1348,1094.446,1292.9979,52.0547,6381.7149,12.5196,1094.7827,5070.8578,1,14,,,,,,,,,14
training_logs/20260427_180636_reward_vs_steps.png ADDED

Git LFS Details

  • SHA256: b6b720d999e50563eacfbbdf24621ccc2179592fa933f21a142bd4763a427868
  • Pointer size: 131 Bytes
  • Size of remote file: 149 kB
training_logs/20260427_180636_trial_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260427_180636_turn_count_distribution.png ADDED
training_logs/20260427_180636_vllm_metrics_a2-rl-stack_jest_v2_388828.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260427_180636_vllm_metrics_table.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/a2-rl-stack_jest_v2_388828.out ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b505562f5352302ac6b7da64448b7eb101f46caacd98cef211b4195ff7b21ea9
3
+ size 20237247
training_logs/a2-rl-stack_jest_v2_388829.out ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/a2-rl-stack_jest_v2_388830.out ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/a2-rl-stack_jest_v2_388831.out ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/a2-rl-stack_jest_v2_388832.out ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/a2-rl-stack_jest_v2_388833.out ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/a2-rl-stack_jest_v2_388834.out ADDED
The diff for this file is too large to render. See raw diff