Upload folder using huggingface_hub
Browse files- final_model.safetensors +3 -0
- results.json +63 -0
final_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:123f44e54e9cee63e764de60d5fa8d53133d74bae778e3d61b71b6da3af2aefa
|
| 3 |
+
size 2147549440
|
results.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"d_sae": 16384,
|
| 3 |
+
"actual_l0": 123.8377292,
|
| 4 |
+
"effective_l0": 41,
|
| 5 |
+
"cross_entropy": 279.04244228515626,
|
| 6 |
+
"sae_type": "gemmascope",
|
| 7 |
+
"sae_release": "gemma-scope-2b-pt-res-canonical",
|
| 8 |
+
"num_tokens": 10000000,
|
| 9 |
+
"num_epochs": 10,
|
| 10 |
+
"lr": 0.0005,
|
| 11 |
+
"loaded_dag": null,
|
| 12 |
+
"history": {
|
| 13 |
+
"train_loss": [
|
| 14 |
+
382.13484457594836,
|
| 15 |
+
305.76656718413375,
|
| 16 |
+
291.46070635157776,
|
| 17 |
+
285.45110804185686,
|
| 18 |
+
282.54676622554774,
|
| 19 |
+
280.92684051578743,
|
| 20 |
+
279.93711149192654,
|
| 21 |
+
279.2730671679965,
|
| 22 |
+
278.8017281191351,
|
| 23 |
+
278.44963708157735
|
| 24 |
+
],
|
| 25 |
+
"val_loss": [
|
| 26 |
+
319.55553959960935,
|
| 27 |
+
296.5204939941406,
|
| 28 |
+
288.0323685546875,
|
| 29 |
+
284.0652142089844,
|
| 30 |
+
282.05106518554686,
|
| 31 |
+
280.9238757324219,
|
| 32 |
+
280.195337109375,
|
| 33 |
+
279.681946484375,
|
| 34 |
+
279.29939350585937,
|
| 35 |
+
279.04244228515626
|
| 36 |
+
],
|
| 37 |
+
"val_cross_entropy": [
|
| 38 |
+
319.55553959960935,
|
| 39 |
+
296.5204939941406,
|
| 40 |
+
288.0323685546875,
|
| 41 |
+
284.0652142089844,
|
| 42 |
+
282.05106518554686,
|
| 43 |
+
280.9238757324219,
|
| 44 |
+
280.195337109375,
|
| 45 |
+
279.681946484375,
|
| 46 |
+
279.29939350585937,
|
| 47 |
+
279.04244228515626
|
| 48 |
+
],
|
| 49 |
+
"effective_l0": [
|
| 50 |
+
48,
|
| 51 |
+
44,
|
| 52 |
+
42,
|
| 53 |
+
42,
|
| 54 |
+
41,
|
| 55 |
+
41,
|
| 56 |
+
41,
|
| 57 |
+
41,
|
| 58 |
+
41,
|
| 59 |
+
41
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
"sae_id": "layer_12/width_16k/canonical"
|
| 63 |
+
}
|