Smith42 commited on
Commit
a9900a5
·
1 Parent(s): efa57f5
{sparse → dense}/p32k00/config.json RENAMED
File without changes
sparse/p32k00/ckpt.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ba8202e7bc7cb9079dad75298d58060a2ae6d10266314c5e2fb6762a3802971
3
- size 1112227537
 
 
 
 
sparse/p32k00/hparams.txt DELETED
@@ -1,43 +0,0 @@
1
- AstroPT-0092.6M
2
- time: 1740647970
3
- log_via_wandb: True
4
- log_emissions: False
5
- out_dir: logs/astropt070M_nosparse_32
6
- eval_interval: 100
7
- log_interval: 10
8
- checkpoint_interval: 500
9
- eval_iters: 100
10
- eval_only: False
11
- always_save_checkpoint: False
12
- init_from: scratch
13
- hf_url: smith42/galaxies
14
- stream_hf_dataset: False
15
- gradient_accumulation_steps: 40
16
- batch_size: 64
17
- spiral: True
18
- block_size: 64
19
- image_size: 256
20
- num_workers: 64
21
- n_layer: 12
22
- n_head: 12
23
- n_embd: 768
24
- n_chan: 3
25
- dropout: 0.0
26
- patch_size: 32
27
- bias: False
28
- attn_type: causal
29
- k_ratio: 0.0
30
- learning_rate: 0.0006
31
- max_iters: 3500
32
- weight_decay: 0.1
33
- beta1: 0.9
34
- beta2: 0.95
35
- grad_clip: 1.0
36
- decay_lr: True
37
- warmup_iters: 2000
38
- lr_decay_iters: 3300.0000000000005
39
- min_lr: 5.9999999999999995e-05
40
- backend: nccl
41
- device: cuda
42
- dtype: bfloat16
43
- compile: True