RonanMcGovern commited on
Commit
c1ebe1b
·
verified ·
1 Parent(s): ac42fcc

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +46 -42
config.json CHANGED
@@ -1,52 +1,56 @@
1
  {
2
  "model_version": "smol-v7-1M",
3
  "tag": "aa2",
4
- "vocab_size": 11,
5
- "d_model": 384,
6
- "nhead": 6,
7
- "num_layers": 4,
8
- "max_size": 30,
9
- "embedding_dropout": 0.1,
10
- "input_grid_dropout": 0.05,
11
- "num_timesteps": 128,
12
- "schedule_type": "cosine",
13
- "batch_size": 128,
14
- "learning_rate": 0.0004,
15
- "weight_decay": 0.01,
16
- "optimizer_steps": 1000000,
17
- "lr_warmup_steps": 2000,
18
- "gradient_accumulation_steps": 1,
19
- "augment": true,
20
- "log_every": 100,
21
- "val_every_steps": 1000,
22
- "vis_every_steps": 16000,
23
- "eval_every_steps": 25000,
24
- "num_eval_tasks": 120,
25
- "best_model_metric": "eval_score",
26
- "eval_dataset": "evaluation",
27
- "use_mixed_precision": true,
28
- "pixel_noise_prob": 0.0,
29
- "pixel_noise_rate": 0.0,
30
- "use_ema": true,
31
- "ema_decay": 0.9995,
32
- "ema_warmup_steps": 2000,
33
- "data_dir": "data/arc-prize-2025",
34
- "datasets": [
35
- "training_challenges",
36
- "evaluation_challenges"
37
- ],
38
- "include_training_test_examples": true,
39
- "max_val_examples": 128,
40
- "eval_weight": 10.0,
41
- "output_dir": "experimental/diffusion/outputs/smol",
42
- "use_wandb": true,
43
- "save_best": true,
44
- "save_final": true,
45
  "auxiliary_loss": {
46
  "include_size_head": true,
47
  "size_head_hidden_dim": 256,
48
  "auxiliary_size_loss_weight": 0.1,
49
  "auxiliary_size_loss_warmup": 0
50
  },
51
- "profile_mode": false
 
 
 
 
 
52
  }
 
1
  {
2
  "model_version": "smol-v7-1M",
3
  "tag": "aa2",
4
+ "model": {
5
+ "vocab_size": 11,
6
+ "d_model": 384,
7
+ "nhead": 6,
8
+ "num_layers": 4,
9
+ "max_size": 30,
10
+ "embedding_dropout": 0.1,
11
+ "input_grid_dropout": 0.05
12
+ },
13
+ "training": {
14
+ "num_timesteps": 128,
15
+ "schedule_type": "cosine",
16
+ "batch_size": 128,
17
+ "learning_rate": 0.0004,
18
+ "weight_decay": 0.01,
19
+ "optimizer_steps": 1000000,
20
+ "lr_warmup_steps": 2000,
21
+ "gradient_accumulation_steps": 1,
22
+ "augment": true,
23
+ "log_every": 100,
24
+ "val_every_steps": 1000,
25
+ "vis_every_steps": 16000,
26
+ "eval_every_steps": 25000,
27
+ "num_eval_tasks": 120,
28
+ "best_model_metric": "eval_score",
29
+ "eval_dataset": "evaluation",
30
+ "use_mixed_precision": true,
31
+ "pixel_noise_prob": 0.0,
32
+ "pixel_noise_rate": 0.0,
33
+ "use_ema": true,
34
+ "ema_decay": 0.9995,
35
+ "ema_warmup_steps": 2000
36
+ },
37
+ "data": {
38
+ "data_dir": "data/arc-prize-2025",
39
+ "datasets": ["training_challenges", "evaluation_challenges"],
40
+ "include_training_test_examples": true,
41
+ "max_val_examples": 128,
42
+ "eval_weight": 10.0
43
+ },
 
44
  "auxiliary_loss": {
45
  "include_size_head": true,
46
  "size_head_hidden_dim": 256,
47
  "auxiliary_size_loss_weight": 0.1,
48
  "auxiliary_size_loss_warmup": 0
49
  },
50
+ "output": {
51
+ "output_dir": "experimental/diffusion/outputs/smol",
52
+ "use_wandb": true,
53
+ "save_best": true,
54
+ "save_final": true
55
+ }
56
  }