| { | |
| "train_jsonl": "data/groups_train.jsonl", | |
| "valid_jsonl": "data/groups_valid.jsonl", | |
| "run_dir": "runs/planB_robust", | |
| "resume": "runs/planB_polish/ckpt.pt", | |
| "backbone": "google/mt5-small", | |
| "num_latents": 16, | |
| "latent_dropout": 0.05, | |
| "latent_noise_std": 0.01, | |
| "batch_size": 4, | |
| "grad_accum": 8, | |
| "epochs": 1, | |
| "max_doc_len": 256, | |
| "max_sum_len": 64, | |
| "eval_every": 400, | |
| "max_train_examples": 2000, | |
| "max_valid_examples": 200, | |
| "lambda_align": 0.5, | |
| "tau": 0.07, | |
| "lambda_varcov": 10.0, | |
| "var_target_std": 0.05, | |
| "lambda_mean": 0.1, | |
| "lambda_mean_diff": 0.1, | |
| "lambda_pair": 0.2, | |
| "lambda_lang": 0.5, | |
| "lambda_len": 1.0, | |
| "adv_start_step": 0, | |
| "grl_alpha": 1.0, | |
| "grl_warmup": 200, | |
| "lr_model": 0.0001, | |
| "lr_lang": 0.001, | |
| "lr_len": 0.001, | |
| "adv_clf_steps": 5, | |
| "adv_clf_weight_decay": 0.0, | |
| "adv_queue_size": 4096, | |
| "adv_clf_batch": 256, | |
| "adv_mix_current": 0.5 | |
| } |