Charlie81 commited on
Commit
295afd0
Β·
1 Parent(s): d844978

Checkpoint at step 80

Browse files
checkpoints/{checkpoint-40 β†’ checkpoint-60}/small_experts_and_gates.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcad30c967aa7b4708df7b4d5a68f0b6d8c6123a399b6c35b9e0a4e29e67abb8
3
  size 206711911
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e934d0cf6af7ae2d9a784f24f2c7dd73c89d260d3b53c3d0e3d34ef537f65289
3
  size 206711911
checkpoints/{checkpoint-40 β†’ checkpoint-80}/config.json RENAMED
File without changes
checkpoints/{checkpoint-40 β†’ checkpoint-80}/generation_config.json RENAMED
File without changes
checkpoints/{checkpoint-40 β†’ checkpoint-80}/model-00001-of-00003.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:114400ea3c1f6b4bf8985e9f050651f88d39f370453e65343bbaa9d1d34d65a2
3
  size 4998915096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82affd842ec486fef2da08a6bc5620d759625dcd2a9671f7e1633db3e23e8154
3
  size 4998915096
checkpoints/{checkpoint-40 β†’ checkpoint-80}/model-00002-of-00003.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2231902fc66ad75a3b8f3686c1a68d275554007b4678fad5d73d74f364a78a16
3
  size 4998953920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a982717a3e6e6f33e64b592ba98c998d77f1fea7b50229735c94b491857b47a7
3
  size 4998953920
checkpoints/{checkpoint-40 β†’ checkpoint-80}/model-00003-of-00003.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fec36ff27e7cffe29172cd7c3426cec7a63e832e4a36fd3dd8d4cf337082135e
3
  size 4046769808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2321b2ac4c163adff676d015f90a88da265a1515998048140c100ab1b50a7643
3
  size 4046769808
checkpoints/{checkpoint-40 β†’ checkpoint-80}/model.safetensors.index.json RENAMED
File without changes
checkpoints/{checkpoint-40 β†’ checkpoint-80}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5592a107e3807fc0b70ae71b93a98f020ad60d21c3c068b7be1bbbac2001b89e
3
- size 391093394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60afe906089703ce8ff6dc7db2e543bedd92d9c1554d99f956ac873fa29965c8
3
+ size 394655075
checkpoints/{checkpoint-40 β†’ checkpoint-80}/rng_state.pth RENAMED
File without changes
checkpoints/{checkpoint-40 β†’ checkpoint-80}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b724dee474d2f28b5e7cd28f30ad6358e8e4bd90e6205eb9f6ebd8fb296e614
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d484e81fa22cdbcf66c2585a42950fa1db3e6db36dd8936a72f76c2844202008
3
  size 1465
checkpoints/{checkpoint-40 β†’ checkpoint-80}/trainer_state.json RENAMED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.001962263225347535,
6
  "eval_steps": 500,
7
- "global_step": 40,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -36,6 +36,34 @@
36
  "learning_rate": 6.376716808371485e-07,
37
  "loss": 109.0438,
38
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
  ],
41
  "logging_steps": 10,
@@ -55,7 +83,7 @@
55
  "attributes": {}
56
  }
57
  },
58
- "total_flos": 1.0882488276615168e+17,
59
  "train_batch_size": 2,
60
  "trial_name": null,
61
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.00392452645069507,
6
  "eval_steps": 500,
7
+ "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
36
  "learning_rate": 6.376716808371485e-07,
37
  "loss": 109.0438,
38
  "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.002452829031684419,
42
+ "grad_norm": 53.75,
43
+ "learning_rate": 8.011772400261609e-07,
44
+ "loss": 106.0855,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.0029433948380213027,
49
+ "grad_norm": 63.25,
50
+ "learning_rate": 9.646827992151733e-07,
51
+ "loss": 106.0407,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.003433960644358187,
56
+ "grad_norm": 54.0,
57
+ "learning_rate": 1.1281883584041859e-06,
58
+ "loss": 104.9819,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.00392452645069507,
63
+ "grad_norm": 51.25,
64
+ "learning_rate": 1.2916939175931983e-06,
65
+ "loss": 108.4838,
66
+ "step": 80
67
  }
68
  ],
69
  "logging_steps": 10,
 
83
  "attributes": {}
84
  }
85
  },
86
+ "total_flos": 2.1764976553230336e+17,
87
  "train_batch_size": 2,
88
  "trial_name": null,
89
  "trial_params": null
checkpoints/{checkpoint-40 β†’ checkpoint-80}/training_args.bin RENAMED
File without changes
logs/events.out.tfevents.1756831118.5624918391e3.1490.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a16f40df0bc3e351cb0924387161cfe2a0c9e6a01de958593980e5f0db6e1bb
3
- size 6486
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29a79c773a923a3f7506eff6045dcc232e3fb15f00e7e6cc002ad450cad87400
3
+ size 6900