ATP-Latent-Master / args /test /gsm_coconut_eval_multi.yaml
zz1358m's picture
Upload folder using huggingface_hub
7a92ec5 verified
project: coconut
save_path: .........
name: gsm_coconut_eval
only_eval: True
coconut: True
cot: False
no_thoughts: False
no_cot: False
coconutgpt: True
# visualize: True
c_thought: 2
epochs_per_stage: 3
max_latent_stage: 10
pad_latent_to_max: True
save_only_improve: False
uniform_prob: 0.0
# Model and checkpoint paths (use relative or env variables)
model_id: /scratch/e1374322/meta-llama/Llama-3.2-1B-Instruct
load_model_path: ...
wandb: False
seed: 0
resume: 0
bf16: False
# data
train_path: .../gsm/train.json
val_path: .../gsm/val.json
test_path: .../gsm/MultiArith.json
reset_optimizer: True
batch_size_training: 64
batch_size_validating: 1
debug: False
gradient_accumulation_steps: 1
num_epochs: 35
lr: !!float "1e-4"
weight_decay: 0.01
w_prompt: False
mode: coconutgpt_same_word_embedding_len_vae
explain_mode: v1_aug
training_method: full
train_or_eval: eval