shorecode commited on
Commit
227b08b
·
verified ·
1 Parent(s): 0c3dd8a

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:393c31c029be3e0c558e9b43e5093997ec4c94e8cc3df4239769190abffdda69
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05526172b6b183727ed691244d8fca12632b8705fa3b5f5582a369439119e9f7
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e0467a1e128a83101d757df9edbc49229825b87355d89ca5c115347fdffcc60
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ae74e3c93a2f232f2571925d5902330db686e4ed3351b82bafa9a5dbb3dca60
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8442053f994922dc69efe15ac7d6938fd15f0fd7a705fec6122ab91041dc1f14
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:111dc925ae2f7248f73dcd9582f2230af09b95cb292fde39121b77d0a24595e6
3
  size 14645
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85ab8cc1fdedbccfbfeb8687bbad9305fda8976259d4a6ffe9a48f328a2c592d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec9761731a5aa3d5575ffed14f4165db19c93fbedd2eb3e25069def656bd6d84
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.18950161076369149,
6
  "eval_steps": 500,
7
  "global_step": 1000,
8
  "is_hyper_param_search": false,
@@ -10,52 +10,52 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.04263786242183058,
14
- "grad_norm": NaN,
15
- "learning_rate": 0.0002957551639188933,
16
- "loss": 0.0,
17
- "step": 225
18
  },
19
  {
20
- "epoch": 0.08527572484366117,
21
- "grad_norm": NaN,
22
- "learning_rate": 0.0002914913776767102,
23
- "loss": 0.0,
24
- "step": 450
25
  },
26
  {
27
- "epoch": 0.09475080538184574,
28
- "eval_loss": NaN,
29
- "eval_runtime": 58.7663,
30
- "eval_samples_per_second": 167.613,
31
- "eval_steps_per_second": 5.99,
32
  "step": 500
33
  },
34
  {
35
- "epoch": 0.12791358726549176,
36
- "grad_norm": NaN,
37
- "learning_rate": 0.0002872275914345272,
38
- "loss": 0.0,
39
- "step": 675
40
  },
41
  {
42
- "epoch": 0.17055144968732233,
43
- "grad_norm": NaN,
44
- "learning_rate": 0.0002829638051923441,
45
- "loss": 0.0,
46
- "step": 900
47
  },
48
  {
49
- "epoch": 0.18950161076369149,
50
- "eval_loss": NaN,
51
- "eval_runtime": 58.8198,
52
- "eval_samples_per_second": 167.46,
53
- "eval_steps_per_second": 5.984,
54
  "step": 1000
55
  }
56
  ],
57
- "logging_steps": 225,
58
- "max_steps": 15831,
59
  "num_input_tokens_seen": 0,
60
  "num_train_epochs": 3,
61
  "save_steps": 1000,
@@ -71,8 +71,8 @@
71
  "attributes": {}
72
  }
73
  },
74
- "total_flos": 631844634624000.0,
75
- "train_batch_size": 28,
76
  "trial_name": null,
77
  "trial_params": null
78
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.10828370330265295,
6
  "eval_steps": 500,
7
  "global_step": 1000,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.02707092582566324,
14
+ "grad_norm": 0.8431992530822754,
15
+ "learning_rate": 9.910124526258799e-05,
16
+ "loss": 4.3576,
17
+ "step": 250
18
  },
19
  {
20
+ "epoch": 0.05414185165132648,
21
+ "grad_norm": 0.8061181306838989,
22
+ "learning_rate": 9.819888106839921e-05,
23
+ "loss": 4.2775,
24
+ "step": 500
25
  },
26
  {
27
+ "epoch": 0.05414185165132648,
28
+ "eval_loss": 3.5462777614593506,
29
+ "eval_runtime": 87.734,
30
+ "eval_samples_per_second": 112.271,
31
+ "eval_steps_per_second": 7.021,
32
  "step": 500
33
  },
34
  {
35
+ "epoch": 0.08121277747698971,
36
+ "grad_norm": 0.7875335812568665,
37
+ "learning_rate": 9.729651687421044e-05,
38
+ "loss": 4.1927,
39
+ "step": 750
40
  },
41
  {
42
+ "epoch": 0.10828370330265295,
43
+ "grad_norm": 1.0330173969268799,
44
+ "learning_rate": 9.639415268002166e-05,
45
+ "loss": 4.1178,
46
+ "step": 1000
47
  },
48
  {
49
+ "epoch": 0.10828370330265295,
50
+ "eval_loss": 3.3765828609466553,
51
+ "eval_runtime": 87.6227,
52
+ "eval_samples_per_second": 112.414,
53
+ "eval_steps_per_second": 7.03,
54
  "step": 1000
55
  }
56
  ],
57
+ "logging_steps": 250,
58
+ "max_steps": 27705,
59
  "num_input_tokens_seen": 0,
60
  "num_train_epochs": 3,
61
  "save_steps": 1000,
 
71
  "attributes": {}
72
  }
73
  },
74
+ "total_flos": 361054076928000.0,
75
+ "train_batch_size": 16,
76
  "trial_name": null,
77
  "trial_params": null
78
  }
checkpoint-latest/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:669a48aa09ae0e5107616885177fad4ad55736c6332be20bb6f916b99a7e1ab7
3
  size 6033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f217a06d41692f3ee4559764c88c0b3c805db6ef1541a53dc2503e0626aeaf07
3
  size 6033