TheodoreEhrenborg's picture
Upload results.json with huggingface_hub
a9b2487 verified
{
"d_sae": 16384,
"actual_l0": 159.6283643,
"effective_l0": 64,
"cross_entropy": 414.545231640625,
"sae_type": "saebench",
"sae_release": "canrager/saebench_gemma-2-2b_width-2pow14_date-0107",
"num_tokens": 10000000,
"num_epochs": 5,
"lr": 0.0005,
"loaded_dag": null,
"allow_negative_influence": false,
"script_runtime_seconds": 5197.718630075455,
"script_runtime_minutes": 86.62864383459092,
"history": {
"train_loss": [
732.1907091413226,
560.2760546003069,
514.9291493007115,
491.22300894601005,
476.2275621686663,
465.3508274623326,
457.40275312151226,
451.09822518484935,
446.30048413957866,
441.738911655971,
441.61235983712334,
438.730587550572,
435.8709974016462,
433.6602375575474,
431.13352857317244,
429.5512477329799,
427.9140635899135,
426.9687508719308,
425.8828778948103,
424.7388517107282,
423.9662421090262,
423.96841408865794,
422.5081399100167,
421.8477118355887,
421.43183070591516,
420.86571916852677,
420.3802429199219,
419.57361580984934,
419.46634172712055,
418.8761511666434,
418.72140110560827,
417.92284327915735,
417.9393042428153,
417.46479753766744,
417.235406930106,
417.29322139195034,
416.66914716448105,
416.730729675293,
416.1867268153599,
416.17149549211774,
415.9424224853516,
415.75340663364955,
415.61898738316125,
415.6563940865653,
415.4041767665318,
415.323065839495,
415.1418197631836,
415.0046354021345,
414.66681213378905,
414.5555570329939,
414.46331852504187,
414.56583448137553,
414.94642486572263,
414.2539657592773,
414.27054879324777
],
"train_steps": [
140,
280,
420,
560,
700,
840,
980,
1120,
1260,
1400,
1407,
1547,
1687,
1827,
1967,
2107,
2247,
2387,
2527,
2667,
2807,
2814,
2954,
3094,
3234,
3374,
3514,
3654,
3794,
3934,
4074,
4214,
4221,
4361,
4501,
4641,
4781,
4921,
5061,
5201,
5341,
5481,
5621,
5628,
5768,
5908,
6048,
6188,
6328,
6468,
6608,
6748,
6888,
7028,
7035
],
"val_loss": [
599.6144170898438,
531.20299453125,
500.5198718261719,
482.2030783691406,
469.7318541015625,
460.6147657714844,
453.6471915527344,
448.1693904296875,
443.7288943359375,
440.08709370117185,
440.0859723144531,
436.948605859375,
434.4268278808594,
432.26872094726565,
430.40091801757814,
428.79407412109373,
427.3747754882813,
426.132505859375,
425.0184524414062,
424.043707421875,
423.15604125976563,
423.2965830078125,
422.35915874023436,
421.66575834960935,
421.0220482910156,
420.4452380859375,
419.91448291015627,
419.4309670410156,
418.988144921875,
418.58148408203124,
418.18817978515625,
417.8346670898438,
417.99942548828125,
417.5140052734375,
417.2248073730469,
416.9462147949219,
416.6832835449219,
416.4367142578125,
416.230258203125,
416.02604897460935,
415.8633370117187,
415.68912651367185,
415.53023618164065,
415.7061401367188,
415.4003775390625,
415.27109868164064,
415.1384886230469,
415.018615234375,
414.89894208984373,
414.7949107421875,
414.68661064453124,
414.6003978515625,
414.48824331054686,
414.40605263671875,
414.545231640625
],
"val_cross_entropy": [
599.6144170898438,
531.20299453125,
500.5198718261719,
482.2030783691406,
469.7318541015625,
460.6147657714844,
453.6471915527344,
448.1693904296875,
443.7288943359375,
440.08709370117185,
440.0859723144531,
436.948605859375,
434.4268278808594,
432.26872094726565,
430.40091801757814,
428.79407412109373,
427.3747754882813,
426.132505859375,
425.0184524414062,
424.043707421875,
423.15604125976563,
423.2965830078125,
422.35915874023436,
421.66575834960935,
421.0220482910156,
420.4452380859375,
419.91448291015627,
419.4309670410156,
418.988144921875,
418.58148408203124,
418.18817978515625,
417.8346670898438,
417.99942548828125,
417.5140052734375,
417.2248073730469,
416.9462147949219,
416.6832835449219,
416.4367142578125,
416.230258203125,
416.02604897460935,
415.8633370117187,
415.68912651367185,
415.53023618164065,
415.7061401367188,
415.4003775390625,
415.27109868164064,
415.1384886230469,
415.018615234375,
414.89894208984373,
414.7949107421875,
414.68661064453124,
414.6003978515625,
414.48824331054686,
414.40605263671875,
414.545231640625
],
"effective_l0": [
99,
86,
80,
77,
74,
73,
71,
70,
70,
69,
69,
68,
68,
67,
67,
67,
67,
66,
66,
66,
66,
66,
66,
66,
65,
65,
65,
65,
65,
65,
65,
65,
65,
65,
65,
65,
65,
65,
65,
65,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64
],
"val_steps": [
140,
280,
420,
560,
700,
840,
980,
1120,
1260,
1400,
1407,
1547,
1687,
1827,
1967,
2107,
2247,
2387,
2527,
2667,
2807,
2814,
2954,
3094,
3234,
3374,
3514,
3654,
3794,
3934,
4074,
4214,
4221,
4361,
4501,
4641,
4781,
4921,
5061,
5201,
5341,
5481,
5621,
5628,
5768,
5908,
6048,
6188,
6328,
6468,
6608,
6748,
6888,
7028,
7035
]
},
"sae_id": "gemma-2-2b_matryoshka_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3"
}