flpelerin commited on
Commit
a9054f4
·
verified ·
1 Parent(s): 3968a6a

Upload folder using huggingface_hub

Browse files
snake-c73af121-checkpoint-8/config.h ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #define N_LAYERS 24
4
+ #define VOCAB_SIZE 256
5
+ #define D_MODEL 768
6
+ #define D_INNER 1536
7
+ #define DT_RANK 48
8
+ #define D_STATE 16
9
+ #define D_CONV 4
snake-c73af121-checkpoint-8/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"d_model": 768, "n_layer": 24, "vocab_size": 256, "ssm_cfg": {}, "rms_norm": true, "residual_in_fp32": true, "fused_add_norm": true, "pad_vocab_size_multiple": 8, "tie_embeddings": false}
snake-c73af121-checkpoint-8/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:133d05fdf9e28b244ddd4aecadf5b09c0dc757acaff4cce43737371f90a4a037
3
+ size 363654144
snake-c73af121-checkpoint-8/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14c4d7148326b7de6fce61a836a7feb5e72d0d8aaa3ed15fbc266fe764d22110
3
+ size 363746390
snake-c73af121-checkpoint-8/training_params.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_model": 768,
3
+ "n_layers": 24,
4
+ "vocab_size": 256,
5
+ "model_size": 90913536,
6
+ "learning_rate": 0.0001,
7
+ "seq_length": 1024,
8
+ "batch_size": 4,
9
+ "num_batches": 1241,
10
+ "tokenizer_path": "",
11
+ "dataset_path": "flpelerin/TinyStories-train-1M",
12
+ "model_name": "snake-c73af121",
13
+ "wandb_project": "snake-91M",
14
+ "wandb_user": "florianpelerin110304",
15
+ "hf_repo": "flpelerin/snake-91M",
16
+ "target_epoch": 12,
17
+ "current_epoch": 8
18
+ }