{ "add_lm_loss": true, "align_pct": 0.2, "clip_grad_norm": 1.0, "dataset_config": "eng-ind", "dataset_name": "jcblaise/backpack-parallel", "dataset_split": "train", "eval_every_n_steps": 2000, "freeze_sense_during_polish": true, "label_smoothing": 0.05, "learning_rate": 5e-05, "log_every_n_steps": 100, "max_checkpoints": 1, "max_length": 256, "max_steps": 150000, "model_id": "/home/mila/b/blaisej/workspace/trained-models/backpack-gpt2-retuned-full", "normalize_last_token_embeds": true, "normalize_sense_pooling": true, "num_workers": 0, "overwrite_output_dir": false, "polish_pct": 0.5, "resume_from_checkpoint": true, "save_dir": "/home/mila/b/blaisej/workspace/trained-models/backpack-adapted-eng-ind-v5", "save_every": 2000, "seed": 1234, "sense_pool_temp": 0.7, "src": "eng", "tau_ctx_end": 0.07, "tau_ctx_start": 0.07, "tau_sns_end": 0.05, "tau_sns_start": 0.05, "test_batch_size": 64, "tgt": "ind", "train_batch_size": 64, "use_fp16": true, "use_wandb": true, "w_ctx_align": 0.45, "w_ctx_mid": 0.4, "w_ctx_tail": 0.15, "w_lm_align": 0.02, "w_lm_mid": 0.2, "w_lm_tail": 0.7, "w_sns_align": 0.55, "w_sns_mid": 0.4, "w_sns_tail": 0.15, "wandb_project": "jan_cruz_backpack", "wandb_run_id": "", "wandb_run_name": "adaptation-eng-ind-v5", "warmup_ratio": 0.1 }