backpack-gpt2-small-ind / training_args.json
jcblaise's picture
Upload folder using huggingface_hub
790818e verified
{
"add_lm_loss": true,
"align_pct": 0.2,
"clip_grad_norm": 1.0,
"dataset_config": "eng-ind",
"dataset_name": "jcblaise/backpack-parallel",
"dataset_split": "train",
"eval_every_n_steps": 2000,
"freeze_sense_during_polish": true,
"label_smoothing": 0.05,
"learning_rate": 5e-05,
"log_every_n_steps": 100,
"max_checkpoints": 1,
"max_length": 256,
"max_steps": 150000,
"model_id": "/home/mila/b/blaisej/workspace/trained-models/backpack-gpt2-retuned-full",
"normalize_last_token_embeds": true,
"normalize_sense_pooling": true,
"num_workers": 0,
"overwrite_output_dir": false,
"polish_pct": 0.5,
"resume_from_checkpoint": true,
"save_dir": "/home/mila/b/blaisej/workspace/trained-models/backpack-adapted-eng-ind-v5",
"save_every": 2000,
"seed": 1234,
"sense_pool_temp": 0.7,
"src": "eng",
"tau_ctx_end": 0.07,
"tau_ctx_start": 0.07,
"tau_sns_end": 0.05,
"tau_sns_start": 0.05,
"test_batch_size": 64,
"tgt": "ind",
"train_batch_size": 64,
"use_fp16": true,
"use_wandb": true,
"w_ctx_align": 0.45,
"w_ctx_mid": 0.4,
"w_ctx_tail": 0.15,
"w_lm_align": 0.02,
"w_lm_mid": 0.2,
"w_lm_tail": 0.7,
"w_sns_align": 0.55,
"w_sns_mid": 0.4,
"w_sns_tail": 0.15,
"wandb_project": "jan_cruz_backpack",
"wandb_run_id": "",
"wandb_run_name": "adaptation-eng-ind-v5",
"warmup_ratio": 0.1
}