77ethers commited on
Commit
24b8695
·
verified ·
1 Parent(s): b5843ac

grpo_qwen3_4b_base_smoke_v1: failed smoke metrics

Browse files
grpo_qwen3_4b_base_smoke_v1/smoke_metrics_failed.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clip_ratio/high_max": 0.0,
3
+ "clip_ratio/high_mean": 0.0,
4
+ "clip_ratio/low_mean": 0.0,
5
+ "clip_ratio/low_min": 0.0,
6
+ "clip_ratio/region_mean": 0.0,
7
+ "completion_length": 385.25,
8
+ "completions/clipped_ratio": 0.5,
9
+ "completions/max_length": 400.0,
10
+ "completions/max_terminated_length": 379.0,
11
+ "completions/mean_length": 385.25,
12
+ "completions/mean_terminated_length": 370.5,
13
+ "completions/min_length": 362.0,
14
+ "completions/min_terminated_length": 362.0,
15
+ "epoch": 0.25,
16
+ "frac_reward_zero_std": 0.0,
17
+ "grad_norm": 0.6496501564979553,
18
+ "kl": 0.5402975082397461,
19
+ "last": {
20
+ "epoch": 0.25,
21
+ "step": 10,
22
+ "total_flos": 0.0,
23
+ "train_loss": 0.041143313492648305,
24
+ "train_runtime": 134.0562,
25
+ "train_samples_per_second": 0.298,
26
+ "train_steps_per_second": 0.075
27
+ },
28
+ "learning_rate": 5.555555555555555e-07,
29
+ "log_rows": 11,
30
+ "loss": 0.036,
31
+ "num_tokens": 53307.0,
32
+ "reward": -0.24348190426826477,
33
+ "reward_std": 0.3061457574367523,
34
+ "rewards/base_format_phase1/mean": 0.05000000074505806,
35
+ "rewards/base_format_phase1/std": 0.057735029608011246,
36
+ "rewards/base_regret_phase1/mean": -0.2934819161891937,
37
+ "rewards/base_regret_phase1/std": 0.2507113218307495,
38
+ "smoke_gate_passed": false,
39
+ "step": 10,
40
+ "total_flos": 0.0,
41
+ "train_loss": 0.041143313492648305,
42
+ "train_runtime": 134.0562,
43
+ "train_samples_per_second": 0.298,
44
+ "train_steps_per_second": 0.075
45
+ }