File size: 907 Bytes
c78c96b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
{
  "train_jsonl": "data/groups_train.jsonl",
  "valid_jsonl": "data/groups_valid.jsonl",
  "run_dir": "runs/planB_robust",
  "resume": "runs/planB_polish/ckpt.pt",
  "backbone": "google/mt5-small",
  "num_latents": 16,
  "latent_dropout": 0.05,
  "latent_noise_std": 0.01,
  "batch_size": 4,
  "grad_accum": 8,
  "epochs": 1,
  "max_doc_len": 256,
  "max_sum_len": 64,
  "eval_every": 400,
  "max_train_examples": 2000,
  "max_valid_examples": 200,
  "lambda_align": 0.5,
  "tau": 0.07,
  "lambda_varcov": 10.0,
  "var_target_std": 0.05,
  "lambda_mean": 0.1,
  "lambda_mean_diff": 0.1,
  "lambda_pair": 0.2,
  "lambda_lang": 0.5,
  "lambda_len": 1.0,
  "adv_start_step": 0,
  "grl_alpha": 1.0,
  "grl_warmup": 200,
  "lr_model": 0.0001,
  "lr_lang": 0.001,
  "lr_len": 0.001,
  "adv_clf_steps": 5,
  "adv_clf_weight_decay": 0.0,
  "adv_queue_size": 4096,
  "adv_clf_batch": 256,
  "adv_mix_current": 0.5
}