jalal06 commited on
Commit
9e2de82
·
verified ·
1 Parent(s): 898372f

Upload 3 files

Browse files
12-layers/data_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_registers": 4,
3
+ "modulus": 16,
4
+ "max_program_len": 16,
5
+ "max_exec_steps": 20,
6
+ "num_samples": 1000000,
7
+ "max_loops": 0,
8
+ "max_nesting_depth": 0,
9
+ "mode": "direct_to_final",
10
+ "include_initial_state": true,
11
+ "enable_add_sub": true,
12
+ "enable_cp": true,
13
+ "enable_load": true,
14
+ "enable_loops": false,
15
+ "seed": 42
16
+ }
12-layers/model_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": -1,
3
+ "d_model": 768,
4
+ "n_heads": 12,
5
+ "n_layers": 12,
6
+ "max_len": 512,
7
+ "rope_base": 1000.0,
8
+ "dropout_rate": 0.0,
9
+ "pad_token_id": -1
10
+ }
12-layers/train_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "device": "cuda",
3
+ "batch_size": 64,
4
+ "max_iters": 50000,
5
+ "learning_rate": 0.0003,
6
+ "weight_decay": 0.01,
7
+ "beta1": 0.9,
8
+ "beta2": 0.999,
9
+ "grad_clip": 1.0,
10
+ "decay_lr": true,
11
+ "warmup_iters": 1000,
12
+ "lr_decay_iters": 15000,
13
+ "min_lr": 3e-05,
14
+ "log_interval": 50,
15
+ "eval_interval": 500,
16
+ "eval_iters": 100,
17
+ "seed": 42,
18
+ "train_split": 0.8,
19
+ "use_wandb": true,
20
+ "wandb_project": "assembly-gpt",
21
+ "wandb_entity": null,
22
+ "wandb_run_name": null,
23
+ "wandb_tags": [],
24
+ "wandb_notes": null,
25
+ "base_exp_dir": "/user/jalal.naghiyev/u13581/disk_space/looped_exp",
26
+ "exp_name": null,
27
+ "save_every": 1000,
28
+ "keep_last_n_checkpoints": 5,
29
+ "save_path": null
30
+ }