ExplosionNuclear commited on
Commit
d84da2a
·
verified ·
1 Parent(s): b1b60f9

Upload checkpoint 50

Browse files
checkpoint-50/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90d7cc3a8a6e375738382fa5c67a50780e770f7e02322b23f896169c272e0368
3
+ size 5637240474
checkpoint-50/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37c40ce327861a7ca13b719d3aa37510a143368b6e74358bdb14becb3899e1e
3
+ size 14244
checkpoint-50/saes-00000.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3150cd83d7e0a0aab0cdecb5a674666600582e4bebc624179d74a0daa8f6f189
3
+ size 2818583704
checkpoint-50/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56bf2111f4eda14e639c448d3632235a37515971ad07b06022ea6bdb445e9642
3
+ size 1000
checkpoint-50/trainer_state.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.016,
5
+ "eval_steps": 20,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0032,
13
+ "grad_norm": 0.0,
14
+ "learning_rate": 2.9999999999999997e-05,
15
+ "loss": 4.2451,
16
+ "sae_grad_norm": 20.84289119605359,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.0064,
21
+ "grad_norm": 0.0,
22
+ "learning_rate": 5.9999999999999995e-05,
23
+ "loss": 3.3251,
24
+ "sae_grad_norm": 36.11170541944039,
25
+ "step": 20
26
+ },
27
+ {
28
+ "epoch": 0.0064,
29
+ "eval_runtime": 1.1705,
30
+ "eval_samples_per_second": 34.175,
31
+ "eval_steps_per_second": 17.087,
32
+ "sae_grad_norm": 36.11170541944039,
33
+ "step": 20
34
+ },
35
+ {
36
+ "epoch": 0.0096,
37
+ "grad_norm": 0.0,
38
+ "learning_rate": 8.999999999999999e-05,
39
+ "loss": 3.76,
40
+ "sae_grad_norm": 51.673781878506645,
41
+ "step": 30
42
+ },
43
+ {
44
+ "epoch": 0.0128,
45
+ "grad_norm": 0.0,
46
+ "learning_rate": 0.00011999999999999999,
47
+ "loss": 3.6995,
48
+ "sae_grad_norm": 70.40541113432494,
49
+ "step": 40
50
+ },
51
+ {
52
+ "epoch": 0.0128,
53
+ "eval_runtime": 0.9447,
54
+ "eval_samples_per_second": 42.34,
55
+ "eval_steps_per_second": 21.17,
56
+ "sae_grad_norm": 70.40541113432494,
57
+ "step": 40
58
+ },
59
+ {
60
+ "epoch": 0.016,
61
+ "grad_norm": 0.0,
62
+ "learning_rate": 0.00015,
63
+ "loss": 3.8664,
64
+ "sae_grad_norm": 84.45994057300459,
65
+ "step": 50
66
+ }
67
+ ],
68
+ "logging_steps": 10,
69
+ "max_steps": 50,
70
+ "num_input_tokens_seen": 0,
71
+ "num_train_epochs": 1,
72
+ "save_steps": 60,
73
+ "stateful_callbacks": {
74
+ "TrainerControl": {
75
+ "args": {
76
+ "should_epoch_stop": false,
77
+ "should_evaluate": false,
78
+ "should_log": false,
79
+ "should_save": true,
80
+ "should_training_stop": true
81
+ },
82
+ "attributes": {}
83
+ }
84
+ },
85
+ "total_flos": 1183163580297216.0,
86
+ "train_batch_size": 2,
87
+ "trial_name": null,
88
+ "trial_params": null
89
+ }