finalform commited on
Commit
2d7029f
·
verified ·
1 Parent(s): 4129ebf

Upload 13 files

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f45a04fe9da5fd31874472d45fcb72767481687b988e020568d52016e2cbc7f2
3
  size 645975704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:436f36528e0cbad453619703d49a29667fa0654be011985336d1c2b872f526f8
3
  size 645975704
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fce61d0ea58235490c4825f4090dd94b9e8512c88786dcf095db98da30c4d1b0
3
- size 1292087499
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d2149a123e2aa314e8d5a4ff1af83d0915c9962275cc6c598c987ae667380d5
3
+ size 1292087115
rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a9129f688522cbf5074ed3368e37129e6207c159ca1fa46c55499c8edd77f43
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b1f30ed08c0cb1f6faa13f9adf57b5673e7f65618010fb242bd0a3c4bb0882
3
  size 16389
rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21de8159d1625b9d3b0bc58aaf0f5d7a08041f34848b02531ede2496e95c4b22
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7758845bdfcf8fb1634f7caf35462f13639e0941bd48712fadea60c710ba3072
3
  size 16389
rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a368a4902b82f670aeadc98044d7d89a3418d2f388b2e5716f699f17c1ba4734
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cf8621df71a49229ce61172ca361f37587ea13b70aa2190bf8d87a833f8ee10
3
  size 16389
rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e83bd6c48291dbb29300128d5147f1a762b2e5716f690ce6e45a98ec9894099f
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8384fc2bd470fae280e01edd29cc9a2e4a86f8bc2f8d3a58a46b96c73349e6db
3
  size 16389
rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:361985b39f1284b5feb5d413263eb65082264988a9165d0861070486596c40ab
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddff82bc26542fb05efdcf2df590ed29e9d88dbb01de689d83c4414f8885608b
3
  size 16389
rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6e903c9e955adb623d41954e6913b29513486d96d0842b4227e58c17af5983a
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209b7d10ef86fe6e7f8d1749c24e88e06e11f4d90000c239905b4be42662d348
3
  size 16389
rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f87836a1fd83de48b6db7a2199667192f87082204913bc584cd2ebb322d567a
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a2629cdd3b6c3a387977b70399124e4c1f9fe420cd68bca306733f6790d5e1
3
  size 16389
rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:334fb0db4018fb69a8ca2d36ce9a89336f5a66162c5c652a516eed7b9d6beb18
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef570fd2e0fd44060a913d5dfadb2f9f6ae4f10cfb213d7e257e5f528ff6a899
3
  size 16389
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43250e56e4270c5f004d1452fba91772f612821b91b33492716896d2d6057469
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13c878b5181f107dfcc1437e1d485b47bb6612c1058d82c3d92ef97b6425cab6
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 5.0,
6
  "eval_steps": 500,
7
- "global_step": 260,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -92,62 +92,6 @@
92
  "eval_samples_per_second": 8.675,
93
  "eval_steps_per_second": 1.105,
94
  "step": 156
95
- },
96
- {
97
- "epoch": 3.3662650602409636,
98
- "grad_norm": 0.07343257963657379,
99
- "learning_rate": 0.0002861234766624867,
100
- "loss": 0.0345,
101
- "mean_token_accuracy": 0.9456785363168573,
102
- "num_tokens": 71027426.0,
103
- "step": 175
104
- },
105
- {
106
- "epoch": 3.8481927710843373,
107
- "grad_norm": 0.04622579738497734,
108
- "learning_rate": 0.0002293960964917063,
109
- "loss": 0.0259,
110
- "mean_token_accuracy": 0.9330729904770851,
111
- "num_tokens": 81341930.0,
112
- "step": 200
113
- },
114
- {
115
- "epoch": 4.0,
116
- "eval_loss": NaN,
117
- "eval_mean_token_accuracy": 0.9852228583173549,
118
- "eval_num_tokens": 84250831.0,
119
- "eval_runtime": 41.6702,
120
- "eval_samples_per_second": 8.855,
121
- "eval_steps_per_second": 1.128,
122
- "step": 208
123
- },
124
- {
125
- "epoch": 4.327710843373494,
126
- "grad_norm": 0.04485568404197693,
127
- "learning_rate": 0.0001739556124639496,
128
- "loss": 0.0208,
129
- "mean_token_accuracy": 0.9436070158253962,
130
- "num_tokens": 91589095.0,
131
- "step": 225
132
- },
133
- {
134
- "epoch": 4.809638554216868,
135
- "grad_norm": 0.03364564850926399,
136
- "learning_rate": 0.00012253518458496144,
137
- "loss": 0.0185,
138
- "mean_token_accuracy": 0.9403582978248596,
139
- "num_tokens": 101826338.0,
140
- "step": 250
141
- },
142
- {
143
- "epoch": 5.0,
144
- "eval_loss": NaN,
145
- "eval_mean_token_accuracy": 0.9858830913584283,
146
- "eval_num_tokens": 105323752.0,
147
- "eval_runtime": 41.5429,
148
- "eval_samples_per_second": 8.882,
149
- "eval_steps_per_second": 1.131,
150
- "step": 260
151
  }
152
  ],
153
  "logging_steps": 25,
@@ -167,7 +111,7 @@
167
  "attributes": {}
168
  }
169
  },
170
- "total_flos": 4.624520070050087e+18,
171
  "train_batch_size": 1,
172
  "trial_name": null,
173
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 156,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
92
  "eval_samples_per_second": 8.675,
93
  "eval_steps_per_second": 1.105,
94
  "step": 156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  }
96
  ],
97
  "logging_steps": 25,
 
111
  "attributes": {}
112
  }
113
  },
114
+ "total_flos": 2.7746000370139136e+18,
115
  "train_batch_size": 1,
116
  "trial_name": null,
117
  "trial_params": null