jacksuuuu commited on
Commit
6ddf075
·
verified ·
1 Parent(s): 439baa1

Upload model - 20000 iterations, loss: 0.7583

Browse files
Files changed (3) hide show
  1. config.json +4 -4
  2. model.safetensors +1 -1
  3. training_metadata.json +5 -5
config.json CHANGED
@@ -22,9 +22,9 @@
22
  "transformers_version": "4.35.0",
23
  "mlx_training": {
24
  "framework": "MLX",
25
- "iterations": 35000,
26
- "final_loss": 3.4639759063720703,
27
- "dataset": "finewebedu",
28
- "max_tokens": 10000000
29
  }
30
  }
 
22
  "transformers_version": "4.35.0",
23
  "mlx_training": {
24
  "framework": "MLX",
25
+ "iterations": 20000,
26
+ "final_loss": 0.7582720518112183,
27
+ "dataset": "tinystories",
28
+ "max_tokens": 2000000
29
  }
30
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5af8ee2b8124c95ea8667396f3d525176f9e40b8d85f6331034a8bba7245c3e3
3
  size 211972024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19aebe40bff044ccfe2f057a81afaf9ced015e98a12a54a198e1d6f3f5c24296
3
  size 211972024
training_metadata.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "model_name": "nanogpt-mlx-384d-35k",
3
  "architecture": "GPT-2",
4
  "parameters": "38,794,752",
5
  "training": {
6
- "iterations": 35000,
7
- "final_loss": 3.4639759063720703,
8
- "dataset": "finewebedu",
9
- "tokens_trained": 10000000,
10
  "batch_size": 12,
11
  "learning_rate": 0.0003,
12
  "context_length": 512
 
1
  {
2
+ "model_name": "nanogpt-mlx-384d-20k",
3
  "architecture": "GPT-2",
4
  "parameters": "38,794,752",
5
  "training": {
6
+ "iterations": 20000,
7
+ "final_loss": 0.7582720518112183,
8
+ "dataset": "tinystories",
9
+ "tokens_trained": 2000000,
10
  "batch_size": 12,
11
  "learning_rate": 0.0003,
12
  "context_length": 512