Viharikvs
/

ARC2CMBA1step

Model card Files Files and versions

ARC2CMBA1step / all_config.yaml

Viharikvs's picture

Upload ARC2 GLPS checkpoint

63dc939 verified 2 months ago

history blame contribute delete

1.38 kB

	arch:
	H_layers: 2
	L_layers: 6
	dep_rank: 64
	dep_topk: 12
	expansion: 4.0
	forward_dtype: bfloat16
	glps_dep_graph: true
	glps_enabled: true
	glps_fill_obvious: true
	glps_global_propagate_on_low_conf: true
	glps_max_targeted_iters: 4
	glps_tau_halt: 0.92
	glps_tau_uncertain: 0.8
	glps_token_masking: true
	halt_exploration_prob: 0.1
	halt_max_steps: 16
	hidden_size: 512
	loss:
	loss_type: stablemax_cross_entropy
	name: losses@ACTLossHead
	mlp_t: false
	name: recursive_reasoning.glps@GLPS_ACTV1
	num_heads: 8
	pos_encodings: rope
	puzzle_emb_ndim: 512
	rms_norm_eps: 1.0e-05
	rope_theta: 10000.0
	share_levels: true
	shared_layers: 9
	beta1: 0.9
	beta2: 0.95
	checkpoint_every_eval: true
	checkpoint_path: checkpoints/Arc2concept-aug-1000-ACT-torch/pretrain_att_arc2concept_4
	data_paths:
	- data/arc2concept-aug-1000
	data_paths_test: []
	ema: true
	ema_rate: 0.999
	epochs: 100000
	eval_glps_max_targeted_iters: null
	eval_glps_tau_halt: null
	eval_halt_max_steps: null
	eval_interval: 10000
	eval_only: false
	eval_save_outputs: []
	evaluators:
	- name: arc@ARC
	freeze_weights: false
	global_batch_size: 768
	load_checkpoint: null
	lr: 0.0001
	lr_min_ratio: 0.1
	lr_warmup_steps: 2000
	min_eval_interval: 0
	project_name: Arc2concept-aug-1000-ACT-torch
	puzzle_emb_lr: 0.01
	puzzle_emb_weight_decay: 0.1
	run_name: pretrain_att_arc2concept_4
	seed: 0
	weight_decay: 0.1