arc-it / config.json
Teen-Different's picture
Upload ARC-IT model checkpoint
138a6a8 verified
{
"data": {
"arc_agi1_path": "References/ARC-AGI",
"arc_agi2_path": "References/ARC-AGI-2",
"re_arc_path": "References/RE-ARC",
"canvas_size": 64,
"num_colors": 12,
"max_grid_size": 30,
"max_demos": 5,
"re_arc_samples_per_task": 50,
"repeat_factor": 1,
"augmentation": {
"geometric": true,
"color_permutation": true,
"num_color_perms": 10,
"keep_background": true,
"resolution_scaling": true,
"translation": true
}
},
"model": {
"hidden_size": 384,
"mlp_ratio": 2.5,
"tokenizer": {
"patch_size": 4
},
"rule_encoder": {
"pair_layers": 2,
"agg_layers": 2,
"num_heads": 8,
"num_rule_tokens": 64
},
"rule_applier": {
"num_layers": 4,
"num_heads": 8
},
"decoder": {
"upsample_method": "transposed_conv",
"hidden_channels": [
192,
96
]
}
},
"training": {
"batch_size": 64,
"num_workers": 8,
"gradient_clip": 1.0,
"stage1": {
"name": "pretrain",
"data_sources": [
"re_arc"
],
"epochs": 50,
"lr": 0.0003
},
"stage2": {
"name": "finetune",
"data_sources": [
"agi1",
"agi2"
],
"epochs": 30,
"lr": 0.0001
},
"stage3": {
"name": "hard_focus",
"data_sources": [
"agi1",
"agi2"
],
"epochs": 10,
"lr": 3e-05,
"agi2_oversample": 2.0
},
"optimizer": {
"name": "adamw",
"weight_decay": 0.01,
"betas": [
0.9,
0.999
]
},
"scheduler": {
"name": "cosine",
"warmup_ratio": 0.1
},
"log_every_n_steps": 100,
"save_every_n_epochs": 10,
"checkpoint_dir": "checkpoints"
},
"ttt": {
"enabled": true,
"steps": 100,
"lr": 0.0001,
"batch_size": 8,
"num_candidates": 32
},
"evaluation": {
"val_split_ratio": 0.1,
"val_data_sources": [
"agi1",
"agi2"
],
"metrics": [
"pixel_accuracy",
"grid_exact_match"
],
"visualize_every_n_tasks": 50
}
}