Ibisbill commited on
Commit
bf3a457
·
verified ·
1 Parent(s): 6738e6b

Upload checkpoint-10

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4721a01483f7065db72b29b30aa41a3725f3f05915222afc01df7d6721252a2e
3
  size 4967215360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b0616d6995cb0a2d8a87cb072d6b0443d270c09c33ae0344998125b465437f2
3
  size 4967215360
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7289f6a4e673567a87134a0317484d859a53f28e3b7f77992c12b15f7a3b8fa
3
  size 3077766632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eca23c3679dd115d780573bc11162be9b0ddf6a3e882df94185b6353054704cf
3
  size 3077766632
trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.583941605839416,
6
  "eval_steps": 1000,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
@@ -10,80 +10,80 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.058394160583941604,
14
- "grad_norm": 4.656690719780717,
15
  "learning_rate": 0.0,
16
- "loss": 0.4427,
17
  "step": 1
18
  },
19
  {
20
- "epoch": 0.11678832116788321,
21
- "grad_norm": 4.86023891396574,
22
- "learning_rate": 1.6666666666666667e-06,
23
- "loss": 0.4479,
24
  "step": 2
25
  },
26
  {
27
- "epoch": 0.17518248175182483,
28
- "grad_norm": 4.720432036194861,
29
- "learning_rate": 3.3333333333333333e-06,
30
- "loss": 0.438,
31
  "step": 3
32
  },
33
  {
34
- "epoch": 0.23357664233576642,
35
- "grad_norm": 2.7065388639758448,
36
- "learning_rate": 5e-06,
37
- "loss": 0.4169,
38
  "step": 4
39
  },
40
  {
41
- "epoch": 0.291970802919708,
42
- "grad_norm": 2.1869690387653904,
43
- "learning_rate": 6.666666666666667e-06,
44
- "loss": 0.4031,
45
  "step": 5
46
  },
47
  {
48
- "epoch": 0.35036496350364965,
49
- "grad_norm": 1.6374910771466962,
50
- "learning_rate": 8.333333333333334e-06,
51
- "loss": 0.3685,
52
  "step": 6
53
  },
54
  {
55
- "epoch": 0.40875912408759124,
56
- "grad_norm": 1.5026227694746652,
57
- "learning_rate": 1e-05,
58
- "loss": 0.3638,
59
  "step": 7
60
  },
61
  {
62
- "epoch": 0.46715328467153283,
63
- "grad_norm": 1.7874379044083826,
64
- "learning_rate": 9.987820251299121e-06,
65
- "loss": 0.3384,
66
  "step": 8
67
  },
68
  {
69
- "epoch": 0.5255474452554745,
70
- "grad_norm": 1.399545646496995,
71
- "learning_rate": 9.951340343707852e-06,
72
- "loss": 0.3199,
73
  "step": 9
74
  },
75
  {
76
- "epoch": 0.583941605839416,
77
- "grad_norm": 0.9591867357924391,
78
- "learning_rate": 9.890738003669029e-06,
79
- "loss": 0.3052,
80
  "step": 10
81
  }
82
  ],
83
  "logging_steps": 1,
84
- "max_steps": 51,
85
  "num_input_tokens_seen": 0,
86
- "num_train_epochs": 3,
87
  "save_steps": 10,
88
  "stateful_callbacks": {
89
  "TrainerControl": {
@@ -97,8 +97,8 @@
97
  "attributes": {}
98
  }
99
  },
100
- "total_flos": 24520370946048.0,
101
- "train_batch_size": 1,
102
  "trial_name": null,
103
  "trial_params": null
104
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.03707136237256719,
6
  "eval_steps": 1000,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.0037071362372567192,
14
+ "grad_norm": 4.805793835959296,
15
  "learning_rate": 0.0,
16
+ "loss": 0.4732,
17
  "step": 1
18
  },
19
  {
20
+ "epoch": 0.0074142724745134385,
21
+ "grad_norm": 4.999676761082255,
22
+ "learning_rate": 1.8518518518518518e-07,
23
+ "loss": 0.5107,
24
  "step": 2
25
  },
26
  {
27
+ "epoch": 0.011121408711770158,
28
+ "grad_norm": 4.864588159866656,
29
+ "learning_rate": 3.7037037037037036e-07,
30
+ "loss": 0.4708,
31
  "step": 3
32
  },
33
  {
34
+ "epoch": 0.014828544949026877,
35
+ "grad_norm": 5.136331269447859,
36
+ "learning_rate": 5.555555555555555e-07,
37
+ "loss": 0.5024,
38
  "step": 4
39
  },
40
  {
41
+ "epoch": 0.018535681186283594,
42
+ "grad_norm": 4.731519955515801,
43
+ "learning_rate": 7.407407407407407e-07,
44
+ "loss": 0.4598,
45
  "step": 5
46
  },
47
  {
48
+ "epoch": 0.022242817423540315,
49
+ "grad_norm": 4.567784280784228,
50
+ "learning_rate": 9.259259259259259e-07,
51
+ "loss": 0.4717,
52
  "step": 6
53
  },
54
  {
55
+ "epoch": 0.025949953660797033,
56
+ "grad_norm": 4.522993398842187,
57
+ "learning_rate": 1.111111111111111e-06,
58
+ "loss": 0.4649,
59
  "step": 7
60
  },
61
  {
62
+ "epoch": 0.029657089898053754,
63
+ "grad_norm": 2.864282407749261,
64
+ "learning_rate": 1.2962962962962962e-06,
65
+ "loss": 0.4499,
66
  "step": 8
67
  },
68
  {
69
+ "epoch": 0.033364226135310475,
70
+ "grad_norm": 2.781272716749165,
71
+ "learning_rate": 1.4814814814814815e-06,
72
+ "loss": 0.4952,
73
  "step": 9
74
  },
75
  {
76
+ "epoch": 0.03707136237256719,
77
+ "grad_norm": 2.546317122615437,
78
+ "learning_rate": 1.6666666666666667e-06,
79
+ "loss": 0.4487,
80
  "step": 10
81
  }
82
  ],
83
  "logging_steps": 1,
84
+ "max_steps": 538,
85
  "num_input_tokens_seen": 0,
86
+ "num_train_epochs": 2,
87
  "save_steps": 10,
88
  "stateful_callbacks": {
89
  "TrainerControl": {
 
97
  "attributes": {}
98
  }
99
  },
100
+ "total_flos": 2880521601024.0,
101
+ "train_batch_size": 4,
102
  "trial_name": null,
103
  "trial_params": null
104
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53c6f4efddf302c1bffd66ab18844ca3866955a21ba82011ba2b03f6c0e75c9d
3
  size 7672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a64ffa12d6705d8296a8b6f2566b231894f0a3dd59a90b16aa729e1533fd4fa8
3
  size 7672