Smith42 commited on
Commit
0498d47
·
verified ·
1 Parent(s): db6a646

Upload dense/config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. dense/config.json +49 -0
dense/config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "AstroPT-0089.1M",
3
+ "time": 1740574257,
4
+ "log_via_wandb": true,
5
+ "log_emissions": false,
6
+ "out_dir": "logs/astropt070M_dense",
7
+ "eval_interval": 100,
8
+ "log_interval": 10,
9
+ "checkpoint_interval": 500,
10
+ "eval_iters": 100,
11
+ "eval_only": false,
12
+ "always_save_checkpoint": false,
13
+ "init_from": "scratch",
14
+ "hf_url": "smith42/galaxies",
15
+ "stream_hf_dataset": false,
16
+ "gradient_accumulation_steps": 40,
17
+ "batch_size": 64,
18
+ "spiral": true,
19
+ "block_size": 256,
20
+ "image_size": 256,
21
+ "num_workers": 64,
22
+ "n_layer": 12,
23
+ "n_head": 12,
24
+ "n_embd": 768,
25
+ "n_chan": 3,
26
+ "dropout": 0.0,
27
+ "patch_size": 16,
28
+ "bias": false,
29
+ "attn_type": "causal",
30
+ "k_ratio": 0.0,
31
+ "learning_rate": 0.0006,
32
+ "max_iters": 3500,
33
+ "weight_decay": 0.1,
34
+ "beta1": 0.9,
35
+ "beta2": 0.95,
36
+ "grad_clip": 1.0,
37
+ "decay_lr": true,
38
+ "warmup_iters": 2000,
39
+ "lr_decay_iters": 3300.0000000000005,
40
+ "min_lr": "5.9999999999999995e-05",
41
+ "backend": "nccl",
42
+ "device": "cuda",
43
+ "dtype": "bfloat16",
44
+ "compile": true,
45
+ "model_type": "astroPT",
46
+ "architectures": [
47
+ "astroPT"
48
+ ]
49
+ }