LMMS_phase23_step_3000 / config.json
omrisap's picture
Upload folder using huggingface_hub
d2e9379 verified
{
"model": {
"phase1_dir": "omrisap/LMMS_phase1",
"v_z": 512,
"gumbel_tau_start": 1.0,
"gumbel_tau_end": 0.3,
"gumbel_anneal_steps": 3000,
"z_prefix": "Z_",
"latent_token": "<|latent|>",
"answer_token": "<ANSWER>"
},
"data": {
"dataset_name": "omrisap/phaseZ",
"train_split": "train",
"eval_split": "eval",
"data_path": null,
"max_length": null,
"batch_size": 64,
"rebalance_train": true,
"k_max": 20,
"target_k_dist": {
"K1": 0.075,
"K2": 0.1,
"K3": 0.125,
"K4_7": 0.3,
"K8_12": 0.2,
"K13_20": 0.2
}
},
"loss": {
"lambda_ans": 0.1,
"lambda_ans_start": 0.05,
"lambda_ans_end": 0.5,
"lambda_ans_anneal_steps": 1000,
"lambda_sft": 0.05,
"lambda_cf": 1.0,
"lambda_batch": 0.5,
"lambda_consistency": 0.0,
"lambda_no_answer_on_latent": 0.95,
"digit_temperature": 0.1,
"keep_prob": [
0.02,
0.05,
0.1,
0.5,
1
],
"counterfactual_schedule": {
"1": 0.0,
"2": 0.1,
"3": 0.15,
"4": 0.2,
"5": 0.3,
"6": 0.4,
"7": 0.5,
"8": 0.6,
"9": 0.65,
"10": 0.7,
"11": 0.75,
"12": 0.8,
"13": 0.85,
"14": 0.85,
"15": 0.9,
"16": 0.9,
"17": 0.9,
"18": 0.9,
"19": 0.9,
"20": 0.9
}
},
"train": {
"lr": 3e-05,
"weight_decay": 0.0,
"steps": 3000,
"grad_accum": 1,
"print_every": 5,
"eval_every": 50,
"eval_generate_every_mult": 2,
"eval_generate_max_new_tokens": 64,
"eval_generate_temperature": 1.0,
"eval_generate_top_p": 0.95,
"save_every": 500,
"cf_debug_every": 0,
"cf_warmup_steps": 100,
"cf_bias_anneal_steps": 300,
"cf_attention_bias_strength": 2.0,
"cf_attention_bias_enabled": true,
"cf_bias_apply_cf_path_only": true,
"seed": 42,
"output_dir": "./runs/phase23_gs"
}
}