Charlie81 commited on
Commit
d844978
Β·
1 Parent(s): 11fe5f0

Checkpoint at step 60

Browse files
checkpoints/{checkpoint-20 β†’ checkpoint-40}/small_experts_and_gates.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0a1c589f5de5e24b6da0665fb82d1950a722bcadea7cbaab535b51df184e4bc
3
  size 206711911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcad30c967aa7b4708df7b4d5a68f0b6d8c6123a399b6c35b9e0a4e29e67abb8
3
  size 206711911
checkpoints/{checkpoint-20 β†’ checkpoint-60}/config.json RENAMED
File without changes
checkpoints/{checkpoint-20 β†’ checkpoint-60}/generation_config.json RENAMED
File without changes
checkpoints/{checkpoint-20 β†’ checkpoint-60}/model-00001-of-00003.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0da34e74ed72395fa85811978c57461923bcbdbb06f9470601de4746e227016a
3
  size 4998915096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b53e83b3b9e51270e2b63ef55313427723ff57c75e4cf4c8269e04de45b290
3
  size 4998915096
checkpoints/{checkpoint-20 β†’ checkpoint-60}/model-00002-of-00003.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98373ef59e208fd7f0484c3868fa3a90473d26efb42452c34146a9131d6ba93f
3
  size 4998953920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d944c12069808ce0f621767a0eec41762042e9c6daad49b55d161adf7b52702
3
  size 4998953920
checkpoints/{checkpoint-20 β†’ checkpoint-60}/model-00003-of-00003.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a51ed769a193f6f3e113a19b244a56b0f0d4dc4ae073aa3c7703199f6ecb3f89
3
  size 4046769808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b183dccf9c40fe45cbbaea5eed282b461eb701195ae659c5c2f3a6f73d10c88
3
  size 4046769808
checkpoints/{checkpoint-20 β†’ checkpoint-60}/model.safetensors.index.json RENAMED
File without changes
checkpoints/{checkpoint-20 β†’ checkpoint-60}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bafc6c5721f51d42e5675e5a4158ceb3aa937ac367c6910fdaf603bf11e56c1
3
- size 383574247
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f466719a427bface1fffa8fbb6d8e31641cd12d2939657c34d508b7693e82a30
3
+ size 393467848
checkpoints/{checkpoint-20 β†’ checkpoint-60}/rng_state.pth RENAMED
File without changes
checkpoints/{checkpoint-20 β†’ checkpoint-60}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:795c503600292c8c704d0cbf6e623255ed54848f02f567e3d52d4c939d5df78e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e1be529198179cd559ddcb4c59a9f665944a456be4a70f4f5dcf79350fe0534
3
  size 1465
checkpoints/{checkpoint-20 β†’ checkpoint-60}/trainer_state.json RENAMED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0009811316126737675,
6
  "eval_steps": 500,
7
- "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -22,6 +22,34 @@
22
  "learning_rate": 3.106605624591236e-07,
23
  "loss": 106.5866,
24
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 10,
@@ -41,7 +69,7 @@
41
  "attributes": {}
42
  }
43
  },
44
- "total_flos": 5.441244138307584e+16,
45
  "train_batch_size": 2,
46
  "trial_name": null,
47
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0029433948380213027,
6
  "eval_steps": 500,
7
+ "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
22
  "learning_rate": 3.106605624591236e-07,
23
  "loss": 106.5866,
24
  "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.0014716974190106514,
28
+ "grad_norm": 52.75,
29
+ "learning_rate": 4.7416612164813603e-07,
30
+ "loss": 107.8943,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.001962263225347535,
35
+ "grad_norm": 68.5,
36
+ "learning_rate": 6.376716808371485e-07,
37
+ "loss": 109.0438,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.002452829031684419,
42
+ "grad_norm": 53.75,
43
+ "learning_rate": 8.011772400261609e-07,
44
+ "loss": 106.0855,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.0029433948380213027,
49
+ "grad_norm": 63.25,
50
+ "learning_rate": 9.646827992151733e-07,
51
+ "loss": 106.0407,
52
+ "step": 60
53
  }
54
  ],
55
  "logging_steps": 10,
 
69
  "attributes": {}
70
  }
71
  },
72
+ "total_flos": 1.6323732414922752e+17,
73
  "train_batch_size": 2,
74
  "trial_name": null,
75
  "trial_params": null
checkpoints/{checkpoint-20 β†’ checkpoint-60}/training_args.bin RENAMED
File without changes
logs/events.out.tfevents.1756831118.5624918391e3.1490.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b893f207f0aecf3668043c1d7255468fc0a035f088b014a4f0616628302defc
3
- size 6072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a16f40df0bc3e351cb0924387161cfe2a0c9e6a01de958593980e5f0db6e1bb
3
+ size 6486