| { | |
| "train_jsonl": "data/groups_train.jsonl", | |
| "valid_jsonl": "data/groups_valid.jsonl", | |
| "run_dir": "runs/planB_polish_robust_dec_long", | |
| "resume": "runs/planB_polish/ckpt.pt", | |
| "backbone": "google/mt5-small", | |
| "num_latents": 16, | |
| "batch_size": 4, | |
| "grad_accum": 8, | |
| "epochs": 3, | |
| "max_doc_len": 256, | |
| "max_sum_len": 64, | |
| "eval_every": 400, | |
| "max_train_examples": 0, | |
| "max_valid_examples": 0, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0, | |
| "latent_dropout": 0.1, | |
| "latent_noise_std": 0.01, | |
| "noise_warmup_steps": 500, | |
| "noise_warmup_start_step": -1, | |
| "untie_lm_head": true, | |
| "seed": 42 | |
| } |