Instructions to use 77ethers/CarbonAlpha with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use 77ethers/CarbonAlpha with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
grpo_qwen3_4b_base_smoke_v1: failed smoke metrics
Browse files
grpo_qwen3_4b_base_smoke_v1/smoke_metrics_failed.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"clip_ratio/high_max": 0.0,
|
| 3 |
+
"clip_ratio/high_mean": 0.0,
|
| 4 |
+
"clip_ratio/low_mean": 0.0,
|
| 5 |
+
"clip_ratio/low_min": 0.0,
|
| 6 |
+
"clip_ratio/region_mean": 0.0,
|
| 7 |
+
"completion_length": 385.25,
|
| 8 |
+
"completions/clipped_ratio": 0.5,
|
| 9 |
+
"completions/max_length": 400.0,
|
| 10 |
+
"completions/max_terminated_length": 379.0,
|
| 11 |
+
"completions/mean_length": 385.25,
|
| 12 |
+
"completions/mean_terminated_length": 370.5,
|
| 13 |
+
"completions/min_length": 362.0,
|
| 14 |
+
"completions/min_terminated_length": 362.0,
|
| 15 |
+
"epoch": 0.25,
|
| 16 |
+
"frac_reward_zero_std": 0.0,
|
| 17 |
+
"grad_norm": 0.6496501564979553,
|
| 18 |
+
"kl": 0.5402975082397461,
|
| 19 |
+
"last": {
|
| 20 |
+
"epoch": 0.25,
|
| 21 |
+
"step": 10,
|
| 22 |
+
"total_flos": 0.0,
|
| 23 |
+
"train_loss": 0.041143313492648305,
|
| 24 |
+
"train_runtime": 134.0562,
|
| 25 |
+
"train_samples_per_second": 0.298,
|
| 26 |
+
"train_steps_per_second": 0.075
|
| 27 |
+
},
|
| 28 |
+
"learning_rate": 5.555555555555555e-07,
|
| 29 |
+
"log_rows": 11,
|
| 30 |
+
"loss": 0.036,
|
| 31 |
+
"num_tokens": 53307.0,
|
| 32 |
+
"reward": -0.24348190426826477,
|
| 33 |
+
"reward_std": 0.3061457574367523,
|
| 34 |
+
"rewards/base_format_phase1/mean": 0.05000000074505806,
|
| 35 |
+
"rewards/base_format_phase1/std": 0.057735029608011246,
|
| 36 |
+
"rewards/base_regret_phase1/mean": -0.2934819161891937,
|
| 37 |
+
"rewards/base_regret_phase1/std": 0.2507113218307495,
|
| 38 |
+
"smoke_gate_passed": false,
|
| 39 |
+
"step": 10,
|
| 40 |
+
"total_flos": 0.0,
|
| 41 |
+
"train_loss": 0.041143313492648305,
|
| 42 |
+
"train_runtime": 134.0562,
|
| 43 |
+
"train_samples_per_second": 0.298,
|
| 44 |
+
"train_steps_per_second": 0.075
|
| 45 |
+
}
|