flpelerin commited on
Commit
2dc5fd7
·
verified ·
1 Parent(s): b4f1b35

Upload folder using huggingface_hub

Browse files
snake-45653760-a024c281-checkpoint-0/config.h ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #define N_LAYERS 12
4
+ #define VOCAB_SIZE 256
5
+ #define D_MODEL 768
6
+ #define D_INNER 1536
7
+ #define DT_RANK 48
8
+ #define D_STATE 16
9
+ #define D_CONV 4
snake-45653760-a024c281-checkpoint-0/config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_model": 768,
3
+ "d_intermediate": 0,
4
+ "n_layer": 12,
5
+ "vocab_size": 256,
6
+ "ssm_cfg": {},
7
+ "attn_layer_idx": [],
8
+ "attn_cfg": {},
9
+ "rms_norm": true,
10
+ "residual_in_fp32": true,
11
+ "fused_add_norm": true,
12
+ "pad_vocab_size_multiple": 8,
13
+ "tie_embeddings": false
14
+ }
snake-45653760-a024c281-checkpoint-0/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dce42b36b291dfdb11ab0f87329dd1ad11d9a87386c3160b02088c1bec924e77
3
+ size 182615040
snake-45653760-a024c281-checkpoint-0/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d7d355f2ba83d8d6e50ff90e37de68a500cd3f55623cdaf2869e9f6df7f325
3
+ size 182661686
snake-45653760-a024c281-checkpoint-0/training_params.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_model": 768,
3
+ "n_layers": 12,
4
+ "vocab_size": 256,
5
+ "model_size": 45653760,
6
+ "learning_rate": 0.001,
7
+ "seq_length": 1024,
8
+ "batch_size": 4,
9
+ "num_batches": 229,
10
+ "tokenizer_path": "",
11
+ "dataset_path": "flpelerin/tinystories-1k",
12
+ "model_name": "snake-45653760-a024c281",
13
+ "wandb_project": "snake-training",
14
+ "wandb_user": "florianpelerin110304",
15
+ "hf_repo": "flpelerin/snakes",
16
+ "target_epoch": 10,
17
+ "current_epoch": 0
18
+ }