hunterbown commited on
Commit
60c62d9
·
verified ·
1 Parent(s): 1ee54d0

cleanup: remove ce_kl_auto_metrics.jsonl

Browse files
Files changed (1) hide show
  1. ce_kl_auto_metrics.jsonl +0 -27
ce_kl_auto_metrics.jsonl DELETED
@@ -1,27 +0,0 @@
1
- {"step": 10, "mode": "ce_kl_auto", "data_bpt": 2.7318, "optimization_loss_bpt": 2.8517, "mdl_bpt": 2.778, "grad_norm": 0.1665, "param_bpt": 0.046212, "S_ratio": 0.0166, "lambda": 2.593742}
2
- {"step": 20, "mode": "ce_kl_auto", "data_bpt": 2.3077, "optimization_loss_bpt": 2.5889, "mdl_bpt": 2.3495, "grad_norm": 0.1423, "param_bpt": 0.041798, "S_ratio": 0.0178, "lambda": 6.7275}
3
- {"step": 30, "mode": "ce_kl_auto", "data_bpt": 1.5397, "optimization_loss_bpt": 1.8846, "mdl_bpt": 1.5742, "grad_norm": 0.1992, "param_bpt": 0.034493, "S_ratio": 0.0219, "lambda": 10.0}
4
- {"step": 40, "mode": "ce_kl_auto", "data_bpt": 2.5056, "optimization_loss_bpt": 2.7785, "mdl_bpt": 2.5329, "grad_norm": 0.142, "param_bpt": 0.027293, "S_ratio": 0.0108, "lambda": 10.0}
5
- {"step": 50, "mode": "ce_kl_auto", "data_bpt": 2.4712, "optimization_loss_bpt": 2.6931, "mdl_bpt": 2.4934, "grad_norm": 0.1489, "param_bpt": 0.022186, "S_ratio": 0.0089, "lambda": 10.0}
6
- {"step": 60, "mode": "ce_kl_auto", "data_bpt": 2.5114, "optimization_loss_bpt": 2.6997, "mdl_bpt": 2.5302, "grad_norm": 0.1531, "param_bpt": 0.018827, "S_ratio": 0.0074, "lambda": 10.0}
7
- {"step": 70, "mode": "ce_kl_auto", "data_bpt": 1.4309, "optimization_loss_bpt": 1.5956, "mdl_bpt": 1.4474, "grad_norm": 0.1411, "param_bpt": 0.016472, "S_ratio": 0.0114, "lambda": 10.0}
8
- {"step": 80, "mode": "ce_kl_auto", "data_bpt": 2.6609, "optimization_loss_bpt": 2.7446, "mdl_bpt": 2.6758, "grad_norm": 0.1338, "param_bpt": 0.014887, "S_ratio": 0.0056, "lambda": 5.621424}
9
- {"step": 90, "mode": "ce_kl_auto", "data_bpt": 2.2619, "optimization_loss_bpt": 2.304, "mdl_bpt": 2.2762, "grad_norm": 0.1457, "param_bpt": 0.014354, "S_ratio": 0.0063, "lambda": 2.936496}
10
- {"step": 100, "mode": "ce_kl_auto", "data_bpt": 1.6573, "optimization_loss_bpt": 1.6847, "mdl_bpt": 1.6718, "grad_norm": 0.1454, "param_bpt": 0.014511, "S_ratio": 0.0087, "lambda": 1.885071}
11
- {"step": 110, "mode": "ce_kl_auto", "data_bpt": 1.9088, "optimization_loss_bpt": 1.9312, "mdl_bpt": 1.9239, "grad_norm": 0.136, "param_bpt": 0.015122, "S_ratio": 0.0079, "lambda": 1.47983}
12
- {"step": 120, "mode": "ce_kl_auto", "data_bpt": 1.7561, "optimization_loss_bpt": 1.7741, "mdl_bpt": 1.7721, "grad_norm": 0.1701, "param_bpt": 0.015973, "S_ratio": 0.009, "lambda": 1.127144}
13
- {"step": 130, "mode": "ce_kl_auto", "data_bpt": 2.4639, "optimization_loss_bpt": 2.4706, "mdl_bpt": 2.4809, "grad_norm": 0.1715, "param_bpt": 0.017004, "S_ratio": 0.0069, "lambda": 0.393011}
14
- {"step": 140, "mode": "ce_kl_auto", "data_bpt": 2.054, "optimization_loss_bpt": 2.0565, "mdl_bpt": 2.0721, "grad_norm": 0.1865, "param_bpt": 0.018096, "S_ratio": 0.0087, "lambda": 0.137034}
15
- {"step": 150, "mode": "ce_kl_auto", "data_bpt": 2.6339, "optimization_loss_bpt": 2.6348, "mdl_bpt": 2.6532, "grad_norm": 0.1422, "param_bpt": 0.019254, "S_ratio": 0.0073, "lambda": 0.047781}
16
- {"step": 160, "mode": "ce_kl_auto", "data_bpt": 2.0235, "optimization_loss_bpt": 2.0242, "mdl_bpt": 2.044, "grad_norm": 0.1476, "param_bpt": 0.020475, "S_ratio": 0.01, "lambda": 0.034832}
17
- {"step": 170, "mode": "ce_kl_auto", "data_bpt": 2.5405, "optimization_loss_bpt": 2.5413, "mdl_bpt": 2.5622, "grad_norm": 0.1899, "param_bpt": 0.021699, "S_ratio": 0.0085, "lambda": 0.034832}
18
- {"step": 180, "mode": "ce_kl_auto", "data_bpt": 2.403, "optimization_loss_bpt": 2.4038, "mdl_bpt": 2.426, "grad_norm": 0.1741, "param_bpt": 0.023031, "S_ratio": 0.0095, "lambda": 0.034832}
19
- {"step": 190, "mode": "ce_kl_auto", "data_bpt": 2.2621, "optimization_loss_bpt": 2.2629, "mdl_bpt": 2.2864, "grad_norm": 0.1537, "param_bpt": 0.024352, "S_ratio": 0.0107, "lambda": 0.034832}
20
- {"step": 200, "mode": "ce_kl_auto", "data_bpt": 2.4402, "optimization_loss_bpt": 2.4411, "mdl_bpt": 2.4658, "grad_norm": 0.1595, "param_bpt": 0.025665, "S_ratio": 0.0104, "lambda": 0.034832}
21
- {"step": 210, "mode": "ce_kl_auto", "data_bpt": 2.3121, "optimization_loss_bpt": 2.3131, "mdl_bpt": 2.3391, "grad_norm": 0.1859, "param_bpt": 0.026968, "S_ratio": 0.0115, "lambda": 0.034832}
22
- {"step": 220, "mode": "ce_kl_auto", "data_bpt": 2.1364, "optimization_loss_bpt": 2.1375, "mdl_bpt": 2.1648, "grad_norm": 0.1607, "param_bpt": 0.028332, "S_ratio": 0.0131, "lambda": 0.037018}
23
- {"step": 230, "mode": "ce_kl_auto", "data_bpt": 2.8529, "optimization_loss_bpt": 2.8541, "mdl_bpt": 2.8826, "grad_norm": 0.1911, "param_bpt": 0.029676, "S_ratio": 0.0103, "lambda": 0.040473}
24
- {"step": 240, "mode": "ce_kl_auto", "data_bpt": 2.1626, "optimization_loss_bpt": 2.1639, "mdl_bpt": 2.1937, "grad_norm": 0.1936, "param_bpt": 0.031114, "S_ratio": 0.0142, "lambda": 0.041946}
25
- {"step": 250, "mode": "ce_kl_auto", "data_bpt": 0.6601, "optimization_loss_bpt": 0.6623, "mdl_bpt": 0.6927, "grad_norm": 0.134, "param_bpt": 0.032601, "S_ratio": 0.0471, "lambda": 0.068796}
26
- {"step": 260, "mode": "ce_kl_auto", "data_bpt": 2.5056, "optimization_loss_bpt": 2.5117, "mdl_bpt": 2.5396, "grad_norm": 0.1723, "param_bpt": 0.034028, "S_ratio": 0.0134, "lambda": 0.178439}
27
- {"step": 270, "mode": "ce_kl_auto", "data_bpt": 2.1665, "optimization_loss_bpt": 2.1828, "mdl_bpt": 2.2018, "grad_norm": 0.1907, "param_bpt": 0.035284, "S_ratio": 0.016, "lambda": 0.462824}