cleanup: remove ce_kl_auto_metrics.jsonl
Browse files- ce_kl_auto_metrics.jsonl +0 -27
ce_kl_auto_metrics.jsonl
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
{"step": 10, "mode": "ce_kl_auto", "data_bpt": 2.7318, "optimization_loss_bpt": 2.8517, "mdl_bpt": 2.778, "grad_norm": 0.1665, "param_bpt": 0.046212, "S_ratio": 0.0166, "lambda": 2.593742}
|
| 2 |
-
{"step": 20, "mode": "ce_kl_auto", "data_bpt": 2.3077, "optimization_loss_bpt": 2.5889, "mdl_bpt": 2.3495, "grad_norm": 0.1423, "param_bpt": 0.041798, "S_ratio": 0.0178, "lambda": 6.7275}
|
| 3 |
-
{"step": 30, "mode": "ce_kl_auto", "data_bpt": 1.5397, "optimization_loss_bpt": 1.8846, "mdl_bpt": 1.5742, "grad_norm": 0.1992, "param_bpt": 0.034493, "S_ratio": 0.0219, "lambda": 10.0}
|
| 4 |
-
{"step": 40, "mode": "ce_kl_auto", "data_bpt": 2.5056, "optimization_loss_bpt": 2.7785, "mdl_bpt": 2.5329, "grad_norm": 0.142, "param_bpt": 0.027293, "S_ratio": 0.0108, "lambda": 10.0}
|
| 5 |
-
{"step": 50, "mode": "ce_kl_auto", "data_bpt": 2.4712, "optimization_loss_bpt": 2.6931, "mdl_bpt": 2.4934, "grad_norm": 0.1489, "param_bpt": 0.022186, "S_ratio": 0.0089, "lambda": 10.0}
|
| 6 |
-
{"step": 60, "mode": "ce_kl_auto", "data_bpt": 2.5114, "optimization_loss_bpt": 2.6997, "mdl_bpt": 2.5302, "grad_norm": 0.1531, "param_bpt": 0.018827, "S_ratio": 0.0074, "lambda": 10.0}
|
| 7 |
-
{"step": 70, "mode": "ce_kl_auto", "data_bpt": 1.4309, "optimization_loss_bpt": 1.5956, "mdl_bpt": 1.4474, "grad_norm": 0.1411, "param_bpt": 0.016472, "S_ratio": 0.0114, "lambda": 10.0}
|
| 8 |
-
{"step": 80, "mode": "ce_kl_auto", "data_bpt": 2.6609, "optimization_loss_bpt": 2.7446, "mdl_bpt": 2.6758, "grad_norm": 0.1338, "param_bpt": 0.014887, "S_ratio": 0.0056, "lambda": 5.621424}
|
| 9 |
-
{"step": 90, "mode": "ce_kl_auto", "data_bpt": 2.2619, "optimization_loss_bpt": 2.304, "mdl_bpt": 2.2762, "grad_norm": 0.1457, "param_bpt": 0.014354, "S_ratio": 0.0063, "lambda": 2.936496}
|
| 10 |
-
{"step": 100, "mode": "ce_kl_auto", "data_bpt": 1.6573, "optimization_loss_bpt": 1.6847, "mdl_bpt": 1.6718, "grad_norm": 0.1454, "param_bpt": 0.014511, "S_ratio": 0.0087, "lambda": 1.885071}
|
| 11 |
-
{"step": 110, "mode": "ce_kl_auto", "data_bpt": 1.9088, "optimization_loss_bpt": 1.9312, "mdl_bpt": 1.9239, "grad_norm": 0.136, "param_bpt": 0.015122, "S_ratio": 0.0079, "lambda": 1.47983}
|
| 12 |
-
{"step": 120, "mode": "ce_kl_auto", "data_bpt": 1.7561, "optimization_loss_bpt": 1.7741, "mdl_bpt": 1.7721, "grad_norm": 0.1701, "param_bpt": 0.015973, "S_ratio": 0.009, "lambda": 1.127144}
|
| 13 |
-
{"step": 130, "mode": "ce_kl_auto", "data_bpt": 2.4639, "optimization_loss_bpt": 2.4706, "mdl_bpt": 2.4809, "grad_norm": 0.1715, "param_bpt": 0.017004, "S_ratio": 0.0069, "lambda": 0.393011}
|
| 14 |
-
{"step": 140, "mode": "ce_kl_auto", "data_bpt": 2.054, "optimization_loss_bpt": 2.0565, "mdl_bpt": 2.0721, "grad_norm": 0.1865, "param_bpt": 0.018096, "S_ratio": 0.0087, "lambda": 0.137034}
|
| 15 |
-
{"step": 150, "mode": "ce_kl_auto", "data_bpt": 2.6339, "optimization_loss_bpt": 2.6348, "mdl_bpt": 2.6532, "grad_norm": 0.1422, "param_bpt": 0.019254, "S_ratio": 0.0073, "lambda": 0.047781}
|
| 16 |
-
{"step": 160, "mode": "ce_kl_auto", "data_bpt": 2.0235, "optimization_loss_bpt": 2.0242, "mdl_bpt": 2.044, "grad_norm": 0.1476, "param_bpt": 0.020475, "S_ratio": 0.01, "lambda": 0.034832}
|
| 17 |
-
{"step": 170, "mode": "ce_kl_auto", "data_bpt": 2.5405, "optimization_loss_bpt": 2.5413, "mdl_bpt": 2.5622, "grad_norm": 0.1899, "param_bpt": 0.021699, "S_ratio": 0.0085, "lambda": 0.034832}
|
| 18 |
-
{"step": 180, "mode": "ce_kl_auto", "data_bpt": 2.403, "optimization_loss_bpt": 2.4038, "mdl_bpt": 2.426, "grad_norm": 0.1741, "param_bpt": 0.023031, "S_ratio": 0.0095, "lambda": 0.034832}
|
| 19 |
-
{"step": 190, "mode": "ce_kl_auto", "data_bpt": 2.2621, "optimization_loss_bpt": 2.2629, "mdl_bpt": 2.2864, "grad_norm": 0.1537, "param_bpt": 0.024352, "S_ratio": 0.0107, "lambda": 0.034832}
|
| 20 |
-
{"step": 200, "mode": "ce_kl_auto", "data_bpt": 2.4402, "optimization_loss_bpt": 2.4411, "mdl_bpt": 2.4658, "grad_norm": 0.1595, "param_bpt": 0.025665, "S_ratio": 0.0104, "lambda": 0.034832}
|
| 21 |
-
{"step": 210, "mode": "ce_kl_auto", "data_bpt": 2.3121, "optimization_loss_bpt": 2.3131, "mdl_bpt": 2.3391, "grad_norm": 0.1859, "param_bpt": 0.026968, "S_ratio": 0.0115, "lambda": 0.034832}
|
| 22 |
-
{"step": 220, "mode": "ce_kl_auto", "data_bpt": 2.1364, "optimization_loss_bpt": 2.1375, "mdl_bpt": 2.1648, "grad_norm": 0.1607, "param_bpt": 0.028332, "S_ratio": 0.0131, "lambda": 0.037018}
|
| 23 |
-
{"step": 230, "mode": "ce_kl_auto", "data_bpt": 2.8529, "optimization_loss_bpt": 2.8541, "mdl_bpt": 2.8826, "grad_norm": 0.1911, "param_bpt": 0.029676, "S_ratio": 0.0103, "lambda": 0.040473}
|
| 24 |
-
{"step": 240, "mode": "ce_kl_auto", "data_bpt": 2.1626, "optimization_loss_bpt": 2.1639, "mdl_bpt": 2.1937, "grad_norm": 0.1936, "param_bpt": 0.031114, "S_ratio": 0.0142, "lambda": 0.041946}
|
| 25 |
-
{"step": 250, "mode": "ce_kl_auto", "data_bpt": 0.6601, "optimization_loss_bpt": 0.6623, "mdl_bpt": 0.6927, "grad_norm": 0.134, "param_bpt": 0.032601, "S_ratio": 0.0471, "lambda": 0.068796}
|
| 26 |
-
{"step": 260, "mode": "ce_kl_auto", "data_bpt": 2.5056, "optimization_loss_bpt": 2.5117, "mdl_bpt": 2.5396, "grad_norm": 0.1723, "param_bpt": 0.034028, "S_ratio": 0.0134, "lambda": 0.178439}
|
| 27 |
-
{"step": 270, "mode": "ce_kl_auto", "data_bpt": 2.1665, "optimization_loss_bpt": 2.1828, "mdl_bpt": 2.2018, "grad_norm": 0.1907, "param_bpt": 0.035284, "S_ratio": 0.016, "lambda": 0.462824}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|