File size: 1,362 Bytes
790818e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
{
  "add_lm_loss": true,
  "align_pct": 0.2,
  "clip_grad_norm": 1.0,
  "dataset_config": "eng-ind",
  "dataset_name": "jcblaise/backpack-parallel",
  "dataset_split": "train",
  "eval_every_n_steps": 2000,
  "freeze_sense_during_polish": true,
  "label_smoothing": 0.05,
  "learning_rate": 5e-05,
  "log_every_n_steps": 100,
  "max_checkpoints": 1,
  "max_length": 256,
  "max_steps": 150000,
  "model_id": "/home/mila/b/blaisej/workspace/trained-models/backpack-gpt2-retuned-full",
  "normalize_last_token_embeds": true,
  "normalize_sense_pooling": true,
  "num_workers": 0,
  "overwrite_output_dir": false,
  "polish_pct": 0.5,
  "resume_from_checkpoint": true,
  "save_dir": "/home/mila/b/blaisej/workspace/trained-models/backpack-adapted-eng-ind-v5",
  "save_every": 2000,
  "seed": 1234,
  "sense_pool_temp": 0.7,
  "src": "eng",
  "tau_ctx_end": 0.07,
  "tau_ctx_start": 0.07,
  "tau_sns_end": 0.05,
  "tau_sns_start": 0.05,
  "test_batch_size": 64,
  "tgt": "ind",
  "train_batch_size": 64,
  "use_fp16": true,
  "use_wandb": true,
  "w_ctx_align": 0.45,
  "w_ctx_mid": 0.4,
  "w_ctx_tail": 0.15,
  "w_lm_align": 0.02,
  "w_lm_mid": 0.2,
  "w_lm_tail": 0.7,
  "w_sns_align": 0.55,
  "w_sns_mid": 0.4,
  "w_sns_tail": 0.15,
  "wandb_project": "jan_cruz_backpack",
  "wandb_run_id": "",
  "wandb_run_name": "adaptation-eng-ind-v5",
  "warmup_ratio": 0.1
}