| { | |
| "add_lm_loss": true, | |
| "align_pct": 0.2, | |
| "clip_grad_norm": 1.0, | |
| "dataset_config": "eng-ind", | |
| "dataset_name": "jcblaise/backpack-parallel", | |
| "dataset_split": "train", | |
| "eval_every_n_steps": 2000, | |
| "freeze_sense_during_polish": true, | |
| "label_smoothing": 0.05, | |
| "learning_rate": 5e-05, | |
| "log_every_n_steps": 100, | |
| "max_checkpoints": 1, | |
| "max_length": 256, | |
| "max_steps": 150000, | |
| "model_id": "/home/mila/b/blaisej/workspace/trained-models/backpack-gpt2-retuned-full", | |
| "normalize_last_token_embeds": true, | |
| "normalize_sense_pooling": true, | |
| "num_workers": 0, | |
| "overwrite_output_dir": false, | |
| "polish_pct": 0.5, | |
| "resume_from_checkpoint": true, | |
| "save_dir": "/home/mila/b/blaisej/workspace/trained-models/backpack-adapted-eng-ind-v5", | |
| "save_every": 2000, | |
| "seed": 1234, | |
| "sense_pool_temp": 0.7, | |
| "src": "eng", | |
| "tau_ctx_end": 0.07, | |
| "tau_ctx_start": 0.07, | |
| "tau_sns_end": 0.05, | |
| "tau_sns_start": 0.05, | |
| "test_batch_size": 64, | |
| "tgt": "ind", | |
| "train_batch_size": 64, | |
| "use_fp16": true, | |
| "use_wandb": true, | |
| "w_ctx_align": 0.45, | |
| "w_ctx_mid": 0.4, | |
| "w_ctx_tail": 0.15, | |
| "w_lm_align": 0.02, | |
| "w_lm_mid": 0.2, | |
| "w_lm_tail": 0.7, | |
| "w_sns_align": 0.55, | |
| "w_sns_mid": 0.4, | |
| "w_sns_tail": 0.15, | |
| "wandb_project": "jan_cruz_backpack", | |
| "wandb_run_id": "", | |
| "wandb_run_name": "adaptation-eng-ind-v5", | |
| "warmup_ratio": 0.1 | |
| } |