Upload model - 20000 iterations, loss: 0.7583
Browse files- config.json +4 -4
- model.safetensors +1 -1
- training_metadata.json +5 -5
config.json
CHANGED
|
@@ -22,9 +22,9 @@
|
|
| 22 |
"transformers_version": "4.35.0",
|
| 23 |
"mlx_training": {
|
| 24 |
"framework": "MLX",
|
| 25 |
-
"iterations":
|
| 26 |
-
"final_loss":
|
| 27 |
-
"dataset": "
|
| 28 |
-
"max_tokens":
|
| 29 |
}
|
| 30 |
}
|
|
|
|
| 22 |
"transformers_version": "4.35.0",
|
| 23 |
"mlx_training": {
|
| 24 |
"framework": "MLX",
|
| 25 |
+
"iterations": 20000,
|
| 26 |
+
"final_loss": 0.7582720518112183,
|
| 27 |
+
"dataset": "tinystories",
|
| 28 |
+
"max_tokens": 2000000
|
| 29 |
}
|
| 30 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 211972024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19aebe40bff044ccfe2f057a81afaf9ced015e98a12a54a198e1d6f3f5c24296
|
| 3 |
size 211972024
|
training_metadata.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
{
|
| 2 |
-
"model_name": "nanogpt-mlx-384d-
|
| 3 |
"architecture": "GPT-2",
|
| 4 |
"parameters": "38,794,752",
|
| 5 |
"training": {
|
| 6 |
-
"iterations":
|
| 7 |
-
"final_loss":
|
| 8 |
-
"dataset": "
|
| 9 |
-
"tokens_trained":
|
| 10 |
"batch_size": 12,
|
| 11 |
"learning_rate": 0.0003,
|
| 12 |
"context_length": 512
|
|
|
|
| 1 |
{
|
| 2 |
+
"model_name": "nanogpt-mlx-384d-20k",
|
| 3 |
"architecture": "GPT-2",
|
| 4 |
"parameters": "38,794,752",
|
| 5 |
"training": {
|
| 6 |
+
"iterations": 20000,
|
| 7 |
+
"final_loss": 0.7582720518112183,
|
| 8 |
+
"dataset": "tinystories",
|
| 9 |
+
"tokens_trained": 2000000,
|
| 10 |
"batch_size": 12,
|
| 11 |
"learning_rate": 0.0003,
|
| 12 |
"context_length": 512
|