File size: 600 Bytes
c78c96b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
{
  "train_jsonl": "data/groups_train.jsonl",
  "valid_jsonl": "data/groups_valid.jsonl",
  "run_dir": "runs/planB_polish_robust_dec_long",
  "resume": "runs/planB_polish/ckpt.pt",
  "backbone": "google/mt5-small",
  "num_latents": 16,
  "batch_size": 4,
  "grad_accum": 8,
  "epochs": 3,
  "max_doc_len": 256,
  "max_sum_len": 64,
  "eval_every": 400,
  "max_train_examples": 0,
  "max_valid_examples": 0,
  "lr": 0.0003,
  "weight_decay": 0.0,
  "latent_dropout": 0.1,
  "latent_noise_std": 0.01,
  "noise_warmup_steps": 500,
  "noise_warmup_start_step": -1,
  "untie_lm_head": true,
  "seed": 42
}