Add files using upload-large-folder tool
Browse files- .gitattributes +2 -0
- training_logs/20260427_180636_metrics_a2-rl-stack_jest_v2_388828.csv +15 -0
- training_logs/20260427_180636_metrics_report.md +227 -0
- training_logs/20260427_180636_metrics_table.csv +15 -0
- training_logs/20260427_180636_reward_vs_steps.png +3 -0
- training_logs/20260427_180636_trial_results.csv +0 -0
- training_logs/20260427_180636_turn_count_distribution.png +0 -0
- training_logs/20260427_180636_vllm_metrics_a2-rl-stack_jest_v2_388828.csv +0 -0
- training_logs/20260427_180636_vllm_metrics_table.csv +0 -0
- training_logs/a2-rl-stack_jest_v2_388828.out +3 -0
- training_logs/a2-rl-stack_jest_v2_388829.out +0 -0
- training_logs/a2-rl-stack_jest_v2_388830.out +0 -0
- training_logs/a2-rl-stack_jest_v2_388831.out +0 -0
- training_logs/a2-rl-stack_jest_v2_388832.out +0 -0
- training_logs/a2-rl-stack_jest_v2_388833.out +0 -0
- training_logs/a2-rl-stack_jest_v2_388834.out +0 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
training_logs/20260427_180636_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
training_logs/a2-rl-stack_jest_v2_388828.out filter=lfs diff=lfs merge=lfs -text
|
training_logs/20260427_180636_metrics_a2-rl-stack_jest_v2_388828.csv
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,timing/save_hf_model
|
| 2 |
+
0.0,0,64,512,0,0.0,0,0.0,4662.2324,4028.125,4693.418,19909,1041,2498.6082,0.0469,0.0,0.0,0.0,0.197,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,15.8079,73.3613,349.227,59.3,857.9687,508.7417,8.3123,0.0911,3.8976,78.9876,674.5257,753.8845,52.0858,3548.2087,53.7378,674.8055,2736.6885,0,1,128.0,1024.0,1024.0,0.0,0.0,,,
|
| 3 |
+
0.0,0,64,512,1,1.0,1,1.0,6064.8848,3838.1212,6218.2944,24241,1179,3490.2217,0.0645,-0.0014,0.01,-0.0,0.2042,-0.0,0.0,1.0,0.0,0.0112,0.0781,0.0645,18.9903,73.5099,384.7603,55.2,857.9687,473.2084,0.0094,0.0789,4.12,87.6227,757.9733,846.0369,52.3647,2579.1726,54.0587,758.335,1674.935,0,2,64.0,512.0,512.0,0.0,0.0,,,
|
| 4 |
+
0.0,0,64,512,2,2.0,2,1.0,5474.791,5108.55,5542.6134,20121,1267,3050.0682,0.1562,0.0,0.0,0.0,0.1968,0.0,0.0,1.0,0.0,0.0,0.1562,0.1562,19.3334,73.7449,368.8224,57.0,857.9687,489.1463,0.0073,0.046,3.6227,82.308,744.5498,827.2625,53.8506,1382.7589,53.8402,744.9082,498.0035,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,
|
| 5 |
+
0.0,0,64,512,3,3.0,3,1.0,5699.1973,3966.5185,5903.4869,23161,1,3770.7162,0.1055,0.0008,0.0051,0.0,0.1887,0.0,0.0,1.0,0.0,0.0067,0.1094,0.1055,19.666,73.957,363.7578,57.6,857.9687,494.2109,0.0071,0.083,3.9571,102.5779,806.9924,909.9862,52.7547,1728.7174,55.8091,807.3251,758.9374,0,4,64.0,512.0,512.0,0.0,0.0,,,
|
| 6 |
+
0.0,0,64,512,4,4.0,4,1.0,5744.0078,6146.575,5709.8919,25696,1344,3513.2835,0.0781,0.0,0.0,0.0,0.1856,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,21.2582,74.2703,362.1384,57.8,857.9687,495.8303,0.0112,0.0602,4.4491,86.3703,782.6672,869.4493,53.9546,1540.5181,53.4133,783.0185,613.1685,0,5,64.0,512.0,512.0,0.0,0.0,,,37.877
|
| 7 |
+
0.0,0,64,512,5,5.0,5,1.0,6107.5117,4430.6042,6280.9849,22958,1479,3691.5944,0.0938,0.0,0.0,0.0,0.1853,0.0,0.0,1.0,0.0,0.0,0.0938,0.0938,21.3301,74.3124,356.7458,58.4,857.9687,501.2229,0.0087,0.0515,4.6763,106.7279,820.4237,927.5826,52.5039,1456.9998,50.9701,820.8028,473.7404,0,6,64.0,512.0,512.0,0.0,0.0,,,
|
| 8 |
+
0.0,0,64,512,6,6.0,6,1.0,7806.5371,4340.1667,7977.0143,30825,988,6175.2351,0.0469,0.0,0.0,0.0,0.1842,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.509,74.6829,381.4424,55.5,857.9687,476.5263,0.0124,0.1095,5.0382,186.7453,1098.6498,1286.0848,52.0406,6616.2683,12.4455,1099.2296,5312.6625,0,7,64.0,512.0,512.0,0.0,0.0,,,
|
| 9 |
+
0.0,0,64,512,0,0.0,0,0.0,4898.9766,4360.25,4925.4713,16910,978,2678.3845,0.0469,0.0,0.0,0.0,0.1733,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.4579,74.6854,390.0168,54.5,857.9687,467.9518,0.0079,0.0447,3.1642,82.9256,676.4665,759.7588,52.5991,4268.5747,53.762,676.7881,3451.851,1,8,64.0,512.0,512.0,0.0,0.0,,,
|
| 10 |
+
0.0,0,64,512,1,1.0,1,1.0,5547.7832,4123.2174,5769.6682,20143,1399,3225.9525,0.1348,0.0003,0.0054,-0.0,0.178,-0.0,0.0,1.0,0.0,0.01,0.1406,0.1348,25.6384,74.7434,390.2228,54.5,857.9687,467.7458,0.0088,0.0466,3.8325,85.8132,723.6436,809.8543,52.6413,1858.802,53.523,723.9942,991.5653,1,9,64.0,512.0,512.0,0.0,0.0,,,
|
| 11 |
+
0.0,0,64,512,2,2.0,2,1.0,5534.4395,4362.7857,5678.3268,18891,1200,3017.3224,0.1094,0.0,0.0,0.0,0.179,0.0,0.0,1.0,0.0,0.0,0.1094,0.1094,25.8958,74.9708,411.3132,52.1,857.9687,446.6555,0.0087,0.0435,3.3808,81.611,702.3105,784.3806,52.7175,1563.2701,51.7063,702.7258,723.772,1,10,64.0,512.0,512.0,0.0,0.0,,,36.9492
|
| 12 |
+
0.0,0,64,512,3,3.0,3,1.0,6088.8008,4722.359,6201.4672,19902,1146,3677.7865,0.0762,0.0003,0.0028,-0.0,0.1789,-0.0,0.0,1.0,0.0,0.0043,0.0781,0.0762,25.9409,74.9533,400.013,53.4,857.9687,457.9557,0.0078,0.0472,3.8067,98.8787,806.3553,905.6134,53.8827,1793.5863,52.8772,806.6871,831.2583,1,11,64.0,512.0,512.0,0.0,0.0,,,
|
| 13 |
+
0.0,0,64,512,4,4.0,4,1.0,5448.5254,5549.35,5439.9809,21237,1272,3397.5694,0.0781,0.0,0.0,0.0,0.1734,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,26.3522,75.2733,393.5605,54.1,857.9687,464.4082,0.0076,0.0491,3.8599,82.5733,764.0154,847.0158,53.3604,1808.5047,52.2366,764.393,905.3587,1,12,64.0,512.0,512.0,0.0,0.0,,,
|
| 14 |
+
0.0,0,64,512,5,5.0,5,1.0,6116.7676,4422.8214,6324.7961,22128,1347,3878.4717,0.1094,0.0,0.0,0.0,0.1796,0.0,0.0,1.0,0.0,0.0,0.1094,0.1094,26.7385,75.5441,390.6353,54.5,857.9687,467.3334,0.0078,0.0509,3.931,90.8837,796.749,888.0035,52.3352,1472.4338,52.5954,797.0685,527.8742,1,13,64.0,512.0,512.0,0.0,0.0,,,
|
| 15 |
+
0.0,0,64,512,6,6.0,6,1.0,8099.6328,6103.6087,8193.5153,30165,1466,6102.5258,0.0449,0.0005,0.0028,-0.0,0.1791,-0.0,0.0,1.0,0.0,0.0035,0.0469,0.0449,27.2599,75.8359,386.1821,55.0,857.9687,471.7866,0.0095,0.08,5.3075,198.1348,1094.446,1292.9979,52.0547,6381.7149,12.5196,1094.7827,5070.8578,1,14,,,,,,,,
|
training_logs/20260427_180636_metrics_report.md
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SkyRL Training Metrics Analysis
|
| 2 |
+
|
| 3 |
+
Generated from 1 log files
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
| Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
|
| 8 |
+
|----------|-------------|---------------|---------------------|-------------------|----------------|
|
| 9 |
+
| a2-rl-stack_jest_v2_388828 | 14 | 14 | 0.0851 | 0.1562 | 37999.5 |
|
| 10 |
+
|
| 11 |
+
## Async Metrics
|
| 12 |
+
|
| 13 |
+
| | Mean | Std | Min | Max | Count |
|
| 14 |
+
|:------------------------------|-----------:|---------:|------:|------:|--------:|
|
| 15 |
+
| async/discard_rate | 0 | 0 | 0 | 0 | 14 |
|
| 16 |
+
| async/discarded_count | 0 | 0 | 0 | 0 | 14 |
|
| 17 |
+
| async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
|
| 18 |
+
| async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
|
| 19 |
+
| async/staleness_max | 3 | 2.0755 | 0 | 6 | 14 |
|
| 20 |
+
| async/staleness_mean | 3 | 2.0755 | 0 | 6 | 14 |
|
| 21 |
+
| async/staleness_min | 3 | 2.0755 | 0 | 6 | 14 |
|
| 22 |
+
| async/staleness_ratio | 0.857143 | 0.363137 | 0 | 1 | 14 |
|
| 23 |
+
|
| 24 |
+
## Generate Metrics
|
| 25 |
+
|
| 26 |
+
| | Mean | Std | Min | Max | Count |
|
| 27 |
+
|:-------------------------------------|---------:|---------:|---------:|---------:|--------:|
|
| 28 |
+
| generate/avg_num_tokens | 5949.58 | 954.053 | 4662.23 | 8099.63 | 14 |
|
| 29 |
+
| generate/avg_tokens_non_zero_rewards | 4678.79 | 760.473 | 3838.12 | 6146.57 | 14 |
|
| 30 |
+
| generate/avg_tokens_zero_rewards | 6061.35 | 983.506 | 4693.42 | 8193.52 | 14 |
|
| 31 |
+
| generate/max_num_tokens | 22591.9 | 4043.19 | 16910 | 30825 | 14 |
|
| 32 |
+
| generate/min_num_tokens | 1150.5 | 368.613 | 1 | 1479 | 14 |
|
| 33 |
+
| generate/std_num_tokens | 3726.27 | 1098.85 | 2498.61 | 6175.24 | 14 |
|
| 34 |
+
|
| 35 |
+
## Loss Metrics
|
| 36 |
+
|
| 37 |
+
| | Mean | Std | Min | Max | Count |
|
| 38 |
+
|:----------------------------|------------:|------------:|--------:|-------:|--------:|
|
| 39 |
+
| loss/avg_final_rewards | 0.0851143 | 0.0348551 | 0.0449 | 0.1562 | 14 |
|
| 40 |
+
| loss/avg_raw_advantages | 3.57143e-05 | 0.000481356 | -0.0014 | 0.0008 | 14 |
|
| 41 |
+
| loss/avg_raw_advantages_abs | 0.00186429 | 0.00306584 | 0 | 0.01 | 14 |
|
| 42 |
+
|
| 43 |
+
## Policy Metrics
|
| 44 |
+
|
| 45 |
+
| | Mean | Std | Min | Max | Count |
|
| 46 |
+
|:---------------------------|---------:|-----------:|--------:|-------:|--------:|
|
| 47 |
+
| policy/final_loss | 0 | 0 | -0 | 0 | 14 |
|
| 48 |
+
| policy/policy_entropy | 0.184507 | 0.00928559 | 0.1733 | 0.2042 | 14 |
|
| 49 |
+
| policy/policy_loss | 0 | 0 | -0 | 0 | 14 |
|
| 50 |
+
| policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
|
| 51 |
+
| policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
|
| 52 |
+
| policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
|
| 53 |
+
| policy/raw_grad_norm | 0.00255 | 0.00401952 | 0 | 0.0112 | 14 |
|
| 54 |
+
|
| 55 |
+
## Reward Metrics
|
| 56 |
+
|
| 57 |
+
| | Mean | Std | Min | Max | Count |
|
| 58 |
+
|:----------------------|----------:|----------:|-------:|-------:|--------:|
|
| 59 |
+
| reward/avg_pass_at_8 | 0.0870571 | 0.0350406 | 0.0469 | 0.1562 | 14 |
|
| 60 |
+
| reward/avg_raw_reward | 0.0851143 | 0.0348551 | 0.0449 | 0.1562 | 14 |
|
| 61 |
+
|
| 62 |
+
## System Metrics
|
| 63 |
+
|
| 64 |
+
| | Mean | Std | Min | Max | Count |
|
| 65 |
+
|:------------------------|---------:|-------------:|---------:|---------:|--------:|
|
| 66 |
+
| system/process_rss_gb | 23.227 | 3.69154 | 15.8079 | 27.2599 | 14 |
|
| 67 |
+
| system/process_vms_gb | 74.5603 | 0.744513 | 73.3613 | 75.8359 | 14 |
|
| 68 |
+
| system/ram_available_gb | 380.631 | 17.8364 | 349.227 | 411.313 | 14 |
|
| 69 |
+
| system/ram_percent | 55.6357 | 2.07201 | 52.1 | 59.3 | 14 |
|
| 70 |
+
| system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
|
| 71 |
+
| system/ram_used_gb | 477.337 | 17.8364 | 446.656 | 508.742 | 14 |
|
| 72 |
+
|
| 73 |
+
## Timing Metrics
|
| 74 |
+
|
| 75 |
+
| | Mean | Std | Min | Max | Count |
|
| 76 |
+
|:--------------------------------------|-------------:|-------------:|----------:|----------:|--------:|
|
| 77 |
+
| timing/cleanup_old_checkpoints | 0.601893 | 2.21921 | 0.0071 | 8.3123 | 14 |
|
| 78 |
+
| timing/compute_advantages_and_returns | 0.0630143 | 0.0212967 | 0.0435 | 0.1095 | 14 |
|
| 79 |
+
| timing/convert_to_training_input | 4.07454 | 0.601892 | 3.1642 | 5.3075 | 14 |
|
| 80 |
+
| timing/fwd_logprobs_values_reward | 103.726 | 38.5559 | 78.9876 | 198.135 | 14 |
|
| 81 |
+
| timing/policy_train | 803.555 | 132.736 | 674.526 | 1098.65 | 14 |
|
| 82 |
+
| timing/run_training | 907.708 | 170.557 | 753.885 | 1293 | 14 |
|
| 83 |
+
| timing/save_checkpoints | 52.7961 | 0.686843 | 52.0406 | 53.9546 | 14 |
|
| 84 |
+
| timing/step | 2714.25 | 1813.94 | 1382.76 | 6616.27 | 14 |
|
| 85 |
+
| timing/sync_weights | 47.3925 | 14.8349 | 12.4455 | 55.8091 | 14 |
|
| 86 |
+
| timing/train_critic_and_policy | 803.919 | 132.775 | 674.806 | 1099.23 | 14 |
|
| 87 |
+
| timing/wait_for_generation_buffer | 1755.05 | 1703.98 | 473.74 | 5312.66 | 14 |
|
| 88 |
+
| timing/save_hf_model | 37.4131 | 0.656054 | 36.9492 | 37.877 | 2 |
|
| 89 |
+
|
| 90 |
+
## Trainer Metrics
|
| 91 |
+
|
| 92 |
+
| | Mean | Std | Min | Max | Count |
|
| 93 |
+
|:--------------------|-------:|---------:|------:|------:|--------:|
|
| 94 |
+
| trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
|
| 95 |
+
| trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
|
| 96 |
+
|
| 97 |
+
## Batch_Errors Metrics
|
| 98 |
+
|
| 99 |
+
| | Mean | Std | Min | Max | Count |
|
| 100 |
+
|:----------------------------------------|------------:|----------:|----------:|-----------:|--------:|
|
| 101 |
+
| batch_errors/total_batches | 68.9231 | 17.7504 | 64 | 128 | 13 |
|
| 102 |
+
| batch_errors/total_instances | 551.385 | 142.003 | 512 | 1024 | 13 |
|
| 103 |
+
| batch_errors/total_successful | 551.231 | 142.051 | 510 | 1024 | 13 |
|
| 104 |
+
| batch_errors/total_failed | 0.0769231 | 0.27735 | 0 | 1 | 13 |
|
| 105 |
+
| batch_errors/total_masked | 0 | 0 | 0 | 0 | 13 |
|
| 106 |
+
| batch_errors/avg_VerifierRuntimeError | 0.03125 | nan | 0.03125 | 0.03125 | 1 |
|
| 107 |
+
| batch_errors/total_VerifierRuntimeError | 2 | nan | 2 | 2 | 1 |
|
| 108 |
+
|
| 109 |
+
## Training Progression by Log
|
| 110 |
+
|
| 111 |
+
### a2-rl-stack_jest_v2_388828
|
| 112 |
+
|
| 113 |
+
| Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
|
| 114 |
+
|------|--------|--------|-----|------|---------------|-------------|
|
| 115 |
+
| 1 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 3548.2 | 2736.7 |
|
| 116 |
+
| 2 | 0.0645 | 0.0781 | 0.000000 | -0.0000 | 2579.2 | 1674.9 |
|
| 117 |
+
| 3 | 0.1562 | 0.1562 | 0.000000 | 0.0000 | 1382.8 | 498.0 |
|
| 118 |
+
| 4 | 0.1055 | 0.1094 | 0.000000 | 0.0000 | 1728.7 | 758.9 |
|
| 119 |
+
| 5 | 0.0781 | 0.0781 | 0.000000 | 0.0000 | 1540.5 | 613.2 |
|
| 120 |
+
| 6 | 0.0938 | 0.0938 | 0.000000 | 0.0000 | 1457.0 | 473.7 |
|
| 121 |
+
| 7 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 6616.3 | 5312.7 |
|
| 122 |
+
| 8 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 4268.6 | 3451.9 |
|
| 123 |
+
| 9 | 0.1348 | 0.1406 | 0.000000 | -0.0000 | 1858.8 | 991.6 |
|
| 124 |
+
| 10 | 0.1094 | 0.1094 | 0.000000 | 0.0000 | 1563.3 | 723.8 |
|
| 125 |
+
| 11 | 0.0762 | 0.0781 | 0.000000 | -0.0000 | 1793.6 | 831.3 |
|
| 126 |
+
| 12 | 0.0781 | 0.0781 | 0.000000 | 0.0000 | 1808.5 | 905.4 |
|
| 127 |
+
| 13 | 0.1094 | 0.1094 | 0.000000 | 0.0000 | 1472.4 | 527.9 |
|
| 128 |
+
| 14 | 0.0449 | 0.0469 | 0.000000 | -0.0000 | 6381.7 | 5070.9 |
|
| 129 |
+
|
| 130 |
+
## Timing Analysis
|
| 131 |
+
|
| 132 |
+
### Average Time Breakdown (% of step time)
|
| 133 |
+
|
| 134 |
+
| Component | Avg % of Step Time |
|
| 135 |
+
|-----------|-------------------|
|
| 136 |
+
| wait_for_generation_buffer | 54.8% |
|
| 137 |
+
| run_training | 42.5% |
|
| 138 |
+
| train_critic_and_policy | 37.9% |
|
| 139 |
+
| policy_train | 37.9% |
|
| 140 |
+
| fwd_logprobs_values_reward | 4.6% |
|
| 141 |
+
| save_checkpoints | 2.6% |
|
| 142 |
+
| sync_weights | 2.5% |
|
| 143 |
+
| save_hf_model | 2.4% |
|
| 144 |
+
| convert_to_training_input | 0.2% |
|
| 145 |
+
| cleanup_old_checkpoints | 0.0% |
|
| 146 |
+
| compute_advantages_and_returns | 0.0% |
|
| 147 |
+
|
| 148 |
+
## vLLM Inference Engine Analysis
|
| 149 |
+
|
| 150 |
+
Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
|
| 151 |
+
|
| 152 |
+
> **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
|
| 153 |
+
> so we typically capture stats from one engine per timestamp. The stats shown are
|
| 154 |
+
> **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
|
| 155 |
+
|
| 156 |
+
### Summary by Log (Per-Engine Stats)
|
| 157 |
+
|
| 158 |
+
| Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
|
| 159 |
+
|-----|-------------------|-------------------|--------------------------|----------------|------------------|
|
| 160 |
+
| a2-rl-stack_jest_v2_388828 | 3.5 | 0.0 | 71.1 tok/s | 7.5% | 88.4% |
|
| 161 |
+
|
| 162 |
+
### Utilization Analysis (Per-Engine)
|
| 163 |
+
|
| 164 |
+
Key indicators of inference engine utilization:
|
| 165 |
+
|
| 166 |
+
- **Running requests/engine**: Concurrent requests being processed by each engine
|
| 167 |
+
- **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
|
| 168 |
+
- **Generation throughput**: Decode tokens/sec per engine
|
| 169 |
+
- 8B model on H100 can do **1000+ tok/s** when saturated
|
| 170 |
+
- If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
|
| 171 |
+
|
| 172 |
+
#### a2-rl-stack_jest_v2_388828
|
| 173 |
+
|
| 174 |
+
- **Running requests/engine**: avg=3.5, max=15
|
| 175 |
+
- **Waiting requests**: avg=0.0, max=0
|
| 176 |
+
- **Generation throughput/engine**: avg=71.1 tok/s, max=327.9 tok/s
|
| 177 |
+
- **KV cache usage**: avg=7.5%
|
| 178 |
+
- **Prefix cache hit rate**: avg=88.4%
|
| 179 |
+
- ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 3.5 running)
|
| 180 |
+
- Bottleneck is likely upstream (environment execution, not inference)
|
| 181 |
+
|
| 182 |
+
## Trial-Level Analysis (from result.json)
|
| 183 |
+
|
| 184 |
+
Total trials parsed: 7168
|
| 185 |
+
|
| 186 |
+
### Turn Count Statistics
|
| 187 |
+
|
| 188 |
+
| Metric | Value |
|
| 189 |
+
|--------|-------|
|
| 190 |
+
| Mean | 3.2 |
|
| 191 |
+
| Median | 2.0 |
|
| 192 |
+
| Std | 2.4 |
|
| 193 |
+
| Min | 2 |
|
| 194 |
+
| Max | 34 |
|
| 195 |
+
| Count | 7168 |
|
| 196 |
+
|
| 197 |
+
### Exception Distribution
|
| 198 |
+
|
| 199 |
+
| Exception Type | Count | % |
|
| 200 |
+
|---------------|-------|---|
|
| 201 |
+
| No exception | 7098 | 99.0% |
|
| 202 |
+
| ContextLengthExceededError | 38 | 0.5% |
|
| 203 |
+
| AgentTimeoutError | 30 | 0.4% |
|
| 204 |
+
| VerifierRuntimeError | 2 | 0.0% |
|
| 205 |
+
|
| 206 |
+
### Turn Count by Exception Type
|
| 207 |
+
|
| 208 |
+
| Exception Type | Mean Turns | Median Turns | Count |
|
| 209 |
+
|---------------|-----------|-------------|-------|
|
| 210 |
+
| ContextLengthExceededError | 19.6 | 19.0 | 38 |
|
| 211 |
+
| AgentTimeoutError | 13.7 | 12.0 | 30 |
|
| 212 |
+
| No exception | 3.1 | 2.0 | 7098 |
|
| 213 |
+
| VerifierRuntimeError | 2.0 | 2.0 | 2 |
|
| 214 |
+
|
| 215 |
+
### Turn Count by Outcome
|
| 216 |
+
|
| 217 |
+
| Outcome | Mean Turns | Median Turns | Count |
|
| 218 |
+
|---------|-----------|-------------|-------|
|
| 219 |
+
| Success | 2.6 | 2.0 | 610 |
|
| 220 |
+
| Failure | 3.3 | 2.0 | 6556 |
|
| 221 |
+
|
| 222 |
+
### Reward Summary
|
| 223 |
+
|
| 224 |
+
- Mean reward: 0.0851
|
| 225 |
+
- Success rate: 8.5%
|
| 226 |
+
- Trials with reward data: 7166
|
| 227 |
+
|
training_logs/20260427_180636_metrics_table.csv
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,timing/save_hf_model,global_step
|
| 2 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,0,0.0,0,0.0,4662.2324,4028.125,4693.418,19909,1041,2498.6082,0.0469,0.0,0.0,0.0,0.197,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,15.8079,73.3613,349.227,59.3,857.9687,508.7417,8.3123,0.0911,3.8976,78.9876,674.5257,753.8845,52.0858,3548.2087,53.7378,674.8055,2736.6885,0,1,128.0,1024.0,1024.0,0.0,0.0,,,,1
|
| 3 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,1,1.0,1,1.0,6064.8848,3838.1212,6218.2944,24241,1179,3490.2217,0.0645,-0.0014,0.01,-0.0,0.2042,-0.0,0.0,1.0,0.0,0.0112,0.0781,0.0645,18.9903,73.5099,384.7603,55.2,857.9687,473.2084,0.0094,0.0789,4.12,87.6227,757.9733,846.0369,52.3647,2579.1726,54.0587,758.335,1674.935,0,2,64.0,512.0,512.0,0.0,0.0,,,,2
|
| 4 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,2,2.0,2,1.0,5474.791,5108.55,5542.6134,20121,1267,3050.0682,0.1562,0.0,0.0,0.0,0.1968,0.0,0.0,1.0,0.0,0.0,0.1562,0.1562,19.3334,73.7449,368.8224,57.0,857.9687,489.1463,0.0073,0.046,3.6227,82.308,744.5498,827.2625,53.8506,1382.7589,53.8402,744.9082,498.0035,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,,3
|
| 5 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,3,3.0,3,1.0,5699.1973,3966.5185,5903.4869,23161,1,3770.7162,0.1055,0.0008,0.0051,0.0,0.1887,0.0,0.0,1.0,0.0,0.0067,0.1094,0.1055,19.666,73.957,363.7578,57.6,857.9687,494.2109,0.0071,0.083,3.9571,102.5779,806.9924,909.9862,52.7547,1728.7174,55.8091,807.3251,758.9374,0,4,64.0,512.0,512.0,0.0,0.0,,,,4
|
| 6 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,4,4.0,4,1.0,5744.0078,6146.575,5709.8919,25696,1344,3513.2835,0.0781,0.0,0.0,0.0,0.1856,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,21.2582,74.2703,362.1384,57.8,857.9687,495.8303,0.0112,0.0602,4.4491,86.3703,782.6672,869.4493,53.9546,1540.5181,53.4133,783.0185,613.1685,0,5,64.0,512.0,512.0,0.0,0.0,,,37.877,5
|
| 7 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,5,5.0,5,1.0,6107.5117,4430.6042,6280.9849,22958,1479,3691.5944,0.0938,0.0,0.0,0.0,0.1853,0.0,0.0,1.0,0.0,0.0,0.0938,0.0938,21.3301,74.3124,356.7458,58.4,857.9687,501.2229,0.0087,0.0515,4.6763,106.7279,820.4237,927.5826,52.5039,1456.9998,50.9701,820.8028,473.7404,0,6,64.0,512.0,512.0,0.0,0.0,,,,6
|
| 8 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,6,6.0,6,1.0,7806.5371,4340.1667,7977.0143,30825,988,6175.2351,0.0469,0.0,0.0,0.0,0.1842,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.509,74.6829,381.4424,55.5,857.9687,476.5263,0.0124,0.1095,5.0382,186.7453,1098.6498,1286.0848,52.0406,6616.2683,12.4455,1099.2296,5312.6625,0,7,64.0,512.0,512.0,0.0,0.0,,,,7
|
| 9 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,0,0.0,0,0.0,4898.9766,4360.25,4925.4713,16910,978,2678.3845,0.0469,0.0,0.0,0.0,0.1733,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.4579,74.6854,390.0168,54.5,857.9687,467.9518,0.0079,0.0447,3.1642,82.9256,676.4665,759.7588,52.5991,4268.5747,53.762,676.7881,3451.851,1,8,64.0,512.0,512.0,0.0,0.0,,,,8
|
| 10 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,1,1.0,1,1.0,5547.7832,4123.2174,5769.6682,20143,1399,3225.9525,0.1348,0.0003,0.0054,-0.0,0.178,-0.0,0.0,1.0,0.0,0.01,0.1406,0.1348,25.6384,74.7434,390.2228,54.5,857.9687,467.7458,0.0088,0.0466,3.8325,85.8132,723.6436,809.8543,52.6413,1858.802,53.523,723.9942,991.5653,1,9,64.0,512.0,512.0,0.0,0.0,,,,9
|
| 11 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,2,2.0,2,1.0,5534.4395,4362.7857,5678.3268,18891,1200,3017.3224,0.1094,0.0,0.0,0.0,0.179,0.0,0.0,1.0,0.0,0.0,0.1094,0.1094,25.8958,74.9708,411.3132,52.1,857.9687,446.6555,0.0087,0.0435,3.3808,81.611,702.3105,784.3806,52.7175,1563.2701,51.7063,702.7258,723.772,1,10,64.0,512.0,512.0,0.0,0.0,,,36.9492,10
|
| 12 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,3,3.0,3,1.0,6088.8008,4722.359,6201.4672,19902,1146,3677.7865,0.0762,0.0003,0.0028,-0.0,0.1789,-0.0,0.0,1.0,0.0,0.0043,0.0781,0.0762,25.9409,74.9533,400.013,53.4,857.9687,457.9557,0.0078,0.0472,3.8067,98.8787,806.3553,905.6134,53.8827,1793.5863,52.8772,806.6871,831.2583,1,11,64.0,512.0,512.0,0.0,0.0,,,,11
|
| 13 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,4,4.0,4,1.0,5448.5254,5549.35,5439.9809,21237,1272,3397.5694,0.0781,0.0,0.0,0.0,0.1734,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,26.3522,75.2733,393.5605,54.1,857.9687,464.4082,0.0076,0.0491,3.8599,82.5733,764.0154,847.0158,53.3604,1808.5047,52.2366,764.393,905.3587,1,12,64.0,512.0,512.0,0.0,0.0,,,,12
|
| 14 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,5,5.0,5,1.0,6116.7676,4422.8214,6324.7961,22128,1347,3878.4717,0.1094,0.0,0.0,0.0,0.1796,0.0,0.0,1.0,0.0,0.0,0.1094,0.1094,26.7385,75.5441,390.6353,54.5,857.9687,467.3334,0.0078,0.0509,3.931,90.8837,796.749,888.0035,52.3352,1472.4338,52.5954,797.0685,527.8742,1,13,64.0,512.0,512.0,0.0,0.0,,,,13
|
| 15 |
+
a2-rl-stack_jest_v2_388828,0.0,0,64,512,6,6.0,6,1.0,8099.6328,6103.6087,8193.5153,30165,1466,6102.5258,0.0449,0.0005,0.0028,-0.0,0.1791,-0.0,0.0,1.0,0.0,0.0035,0.0469,0.0449,27.2599,75.8359,386.1821,55.0,857.9687,471.7866,0.0095,0.08,5.3075,198.1348,1094.446,1292.9979,52.0547,6381.7149,12.5196,1094.7827,5070.8578,1,14,,,,,,,,,14
|
training_logs/20260427_180636_reward_vs_steps.png
ADDED
|
Git LFS Details
|
training_logs/20260427_180636_trial_results.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260427_180636_turn_count_distribution.png
ADDED
|
training_logs/20260427_180636_vllm_metrics_a2-rl-stack_jest_v2_388828.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260427_180636_vllm_metrics_table.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/a2-rl-stack_jest_v2_388828.out
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b505562f5352302ac6b7da64448b7eb101f46caacd98cef211b4195ff7b21ea9
|
| 3 |
+
size 20237247
|
training_logs/a2-rl-stack_jest_v2_388829.out
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/a2-rl-stack_jest_v2_388830.out
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/a2-rl-stack_jest_v2_388831.out
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/a2-rl-stack_jest_v2_388832.out
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/a2-rl-stack_jest_v2_388833.out
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/a2-rl-stack_jest_v2_388834.out
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|