JohnnyZeppelin commited on
Commit
204556c
·
verified ·
1 Parent(s): 837ed05

Upload folder using huggingface_hub

Browse files
adv0/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bbd25afde49e18a03e740803ba86f0273f6de6d7b858986a4a531e74e1b51b4
3
+ size 3640289065
adv0/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_jsonl": "data/groups_train.jsonl",
3
+ "valid_jsonl": "data/groups_valid.jsonl",
4
+ "run_dir": "runs/adv0",
5
+ "backbone": "google/mt5-small",
6
+ "num_latents": 16,
7
+ "latent_dropout": 0.1,
8
+ "latent_noise_std": 0.01,
9
+ "batch_size": 4,
10
+ "grad_accum": 8,
11
+ "lr": 0.0001,
12
+ "epochs": 1,
13
+ "max_doc_len": 256,
14
+ "max_sum_len": 64,
15
+ "lambda_align": 0.1,
16
+ "tau": 0.07,
17
+ "lambda_langadv": 0.5,
18
+ "lambda_varcov": 1.0,
19
+ "var_target_std": 0.05,
20
+ "eval_every": 200,
21
+ "max_train_examples": 2000,
22
+ "max_valid_examples": 200
23
+ }
adv0/diag.jsonl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"step": 200, "nll_en": 6.9953523445129395, "nll_zh": 7.015304956436157, "nll": 7.0053286504745484}
2
+ {"step": 400, "nll_en": 6.02718071937561, "nll_zh": 6.022276220321655, "nll": 6.0247284698486325}
adv0/diag_full.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/adv0/ckpt.pt",
3
+ "nll_en": 5.718734880330446,
4
+ "nll_zh": 5.703346028640457,
5
+ "swap_delta_en": 0.20727035107118635,
6
+ "swap_delta_zh": 0.2063286974868613,
7
+ "ablate_zero_delta_en": 11.523488786709486,
8
+ "ablate_mean_delta_en": 0.11044462034616612,
9
+ "ablate_noise_delta_en": 17.804158700697013,
10
+ "ablate_zero_delta_zh": 11.538877658561722,
11
+ "ablate_mean_delta_zh": 0.11326574825585267,
12
+ "ablate_noise_delta_zh": 17.847772932959913,
13
+ "inv_top1_full": 0.2526427209377289,
14
+ "inv_top5_full": 0.5031712651252747,
15
+ "diag_sim_mean": 0.9942412972450256,
16
+ "offdiag_sim_mean": 0.9853827357292175,
17
+ "sim_margin": 0.008858561515808105,
18
+ "n_valid": 946
19
+ }
adv0/logs.jsonl ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 20, "epoch": 0, "loss": 27.650449752807617, "loss_render": 26.763328552246094, "loss_align": 1.3222241401672363, "loss_varcov": 0.06515186280012131, "loss_lang": 0.6897470951080322, "lang_acc_batch": 0.5, "elapsed_sec": 4.6231606006622314}
2
+ {"step": 40, "epoch": 0, "loss": 21.921096801757812, "loss_render": 20.944686889648438, "loss_align": 2.386655569076538, "loss_varcov": 0.05106358975172043, "loss_lang": 0.6866821050643921, "lang_acc_batch": 0.75, "elapsed_sec": 8.637052774429321}
3
+ {"step": 60, "epoch": 0, "loss": 14.394793510437012, "loss_render": 13.525341033935547, "loss_align": 1.1219804286956787, "loss_varcov": 0.06859086453914642, "loss_lang": 0.6886632442474365, "lang_acc_batch": 0.5, "elapsed_sec": 12.949930191040039}
4
+ {"step": 80, "epoch": 0, "loss": 13.06428050994873, "loss_render": 12.203603744506836, "loss_align": 1.0322198867797852, "loss_varcov": 0.06698998063802719, "loss_lang": 0.6904647350311279, "lang_acc_batch": 0.5, "elapsed_sec": 17.287116050720215}
5
+ {"step": 100, "epoch": 0, "loss": 12.487584114074707, "loss_render": 11.546947479248047, "loss_align": 1.9753456115722656, "loss_varcov": 0.060836680233478546, "loss_lang": 0.6822649836540222, "lang_acc_batch": 0.625, "elapsed_sec": 21.59953784942627}
6
+ {"step": 120, "epoch": 0, "loss": 10.319941520690918, "loss_render": 9.414814949035645, "loss_align": 1.428546667098999, "loss_varcov": 0.07183772325515747, "loss_lang": 0.6904347538948059, "lang_acc_batch": 0.5, "elapsed_sec": 26.194863080978394}
7
+ {"step": 140, "epoch": 0, "loss": 9.401609420776367, "loss_render": 8.506437301635742, "loss_align": 1.3234813213348389, "loss_varcov": 0.07503455877304077, "loss_lang": 0.6877903342247009, "lang_acc_batch": 0.5, "elapsed_sec": 30.529441595077515}
8
+ {"step": 160, "epoch": 0, "loss": 8.951837539672852, "loss_render": 8.05506420135498, "loss_align": 1.3446648120880127, "loss_varcov": 0.07413844764232635, "loss_lang": 0.6881682276725769, "lang_acc_batch": 0.5, "elapsed_sec": 35.08437252044678}
9
+ {"step": 180, "epoch": 0, "loss": 8.826312065124512, "loss_render": 7.923300266265869, "loss_align": 1.3445956707000732, "loss_varcov": 0.07693865150213242, "loss_lang": 0.6916133761405945, "lang_acc_batch": 0.5, "elapsed_sec": 39.36867332458496}
10
+ {"step": 200, "epoch": 0, "loss": 8.421167373657227, "loss_render": 7.520854949951172, "loss_align": 1.3326096534729004, "loss_varcov": 0.0758049413561821, "loss_lang": 0.691246509552002, "lang_acc_batch": 0.5, "elapsed_sec": 44.02493953704834}
11
+ {"step": 220, "epoch": 0, "loss": 7.999876022338867, "loss_render": 7.092364311218262, "loss_align": 1.362046718597412, "loss_varcov": 0.07929220795631409, "loss_lang": 0.6920146942138672, "lang_acc_batch": 0.5, "elapsed_sec": 56.575820446014404}
12
+ {"step": 240, "epoch": 0, "loss": 7.74422550201416, "loss_render": 6.839077949523926, "loss_align": 1.3439791202545166, "loss_varcov": 0.07875196635723114, "loss_lang": 0.6919975280761719, "lang_acc_batch": 0.5, "elapsed_sec": 60.66001582145691}
13
+ {"step": 260, "epoch": 0, "loss": 7.985320568084717, "loss_render": 7.076610565185547, "loss_align": 1.3665584325790405, "loss_varcov": 0.07948065549135208, "loss_lang": 0.6925735473632812, "lang_acc_batch": 0.5, "elapsed_sec": 64.76579260826111}
14
+ {"step": 280, "epoch": 0, "loss": 6.990633010864258, "loss_render": 6.081690788269043, "loss_align": 1.3663575649261475, "loss_varcov": 0.07955020666122437, "loss_lang": 0.6927560567855835, "lang_acc_batch": 0.5, "elapsed_sec": 68.87916564941406}
15
+ {"step": 300, "epoch": 0, "loss": 7.652427673339844, "loss_render": 6.746022701263428, "loss_align": 1.3422842025756836, "loss_varcov": 0.07915754616260529, "loss_lang": 0.6930192112922668, "lang_acc_batch": 0.5, "elapsed_sec": 72.96279907226562}
16
+ {"step": 320, "epoch": 0, "loss": 7.309179782867432, "loss_render": 6.399218559265137, "loss_align": 1.3768812417984009, "loss_varcov": 0.07963863015174866, "loss_lang": 0.6926344633102417, "lang_acc_batch": 0.5, "elapsed_sec": 77.34232926368713}
17
+ {"step": 340, "epoch": 0, "loss": 6.789853096008301, "loss_render": 5.8831634521484375, "loss_align": 1.3496586084365845, "loss_varcov": 0.07924623787403107, "loss_lang": 0.6924774646759033, "lang_acc_batch": 0.5, "elapsed_sec": 81.83927726745605}
18
+ {"step": 360, "epoch": 0, "loss": 7.03304386138916, "loss_render": 6.125219821929932, "loss_align": 1.3604636192321777, "loss_varcov": 0.0794791430234909, "loss_lang": 0.692298412322998, "lang_acc_batch": 0.5, "elapsed_sec": 86.17622494697571}
19
+ {"step": 380, "epoch": 0, "loss": 6.368840217590332, "loss_render": 5.467444896697998, "loss_align": 1.3064045906066895, "loss_varcov": 0.07869096100330353, "loss_lang": 0.6920640468597412, "lang_acc_batch": 0.5, "elapsed_sec": 90.45712161064148}
20
+ {"step": 400, "epoch": 0, "loss": 6.572549819946289, "loss_render": 5.664807319641113, "loss_align": 1.3603339195251465, "loss_varcov": 0.0794277936220169, "loss_lang": 0.6922812461853027, "lang_acc_batch": 0.5, "elapsed_sec": 94.69851660728455}
21
+ {"step": 420, "epoch": 0, "loss": 6.588827610015869, "loss_render": 5.679479598999023, "loss_align": 1.37607741355896, "loss_varcov": 0.07970018684864044, "loss_lang": 0.6920405626296997, "lang_acc_batch": 0.5, "elapsed_sec": 107.52089738845825}
22
+ {"step": 440, "epoch": 0, "loss": 6.435425758361816, "loss_render": 5.532404899597168, "loss_align": 1.3244233131408691, "loss_varcov": 0.0789901465177536, "loss_lang": 0.6915886402130127, "lang_acc_batch": 0.5, "elapsed_sec": 111.51784372329712}
23
+ {"step": 460, "epoch": 0, "loss": 6.379398345947266, "loss_render": 5.477296352386475, "loss_align": 1.3135215044021606, "loss_varcov": 0.07883299887180328, "loss_lang": 0.6919170022010803, "lang_acc_batch": 0.5, "elapsed_sec": 115.94771218299866}
24
+ {"step": 480, "epoch": 0, "loss": 6.698791980743408, "loss_render": 5.80061149597168, "loss_align": 1.280196189880371, "loss_varcov": 0.07822120189666748, "loss_lang": 0.6919394731521606, "lang_acc_batch": 0.5, "elapsed_sec": 120.18537902832031}
25
+ {"step": 500, "epoch": 0, "loss": 6.528753757476807, "loss_render": 5.626822471618652, "loss_align": 1.3126040697097778, "loss_varcov": 0.07868070155382156, "loss_lang": 0.6919903755187988, "lang_acc_batch": 0.5, "elapsed_sec": 123.56391143798828}
adv0/probe_leakage.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/adv0/ckpt.pt",
3
+ "max_train_groups": 8000,
4
+ "max_valid_groups": 946,
5
+ "probe_hidden": 0,
6
+ "epochs": 20,
7
+ "lr": 0.01,
8
+ "baseline_majority_lang_acc": 0.5,
9
+ "baseline_majority_len_acc": 0.9271249771118164,
10
+ "lang_probe": {
11
+ "train_acc": 0.99825,
12
+ "valid_acc": 0.9984143763213531,
13
+ "best_valid_acc": 0.9984143763213531
14
+ },
15
+ "len_probe": {
16
+ "train_acc": 0.936125,
17
+ "valid_acc": 0.9143763213530656,
18
+ "best_valid_acc": 0.9143763213530656
19
+ }
20
+ }
bottleneck_only0/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ff572956f9d416446e21a81dd9eea00aef3bad17d88a555d95e0783049e262
3
+ size 3640274577
bottleneck_only0/config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_jsonl": "data/groups_train.jsonl",
3
+ "valid_jsonl": "data/groups_valid.jsonl",
4
+ "run_dir": "runs/bottleneck_only0",
5
+ "backbone": "google/mt5-small",
6
+ "num_latents": 16,
7
+ "latent_dropout": 0.1,
8
+ "latent_noise_std": 0.01,
9
+ "batch_size": 4,
10
+ "grad_accum": 8,
11
+ "lr": 0.0001,
12
+ "epochs": 1,
13
+ "max_doc_len": 256,
14
+ "max_sum_len": 64,
15
+ "lambda_align": 0.0,
16
+ "tau": 0.07,
17
+ "eval_every": 500,
18
+ "max_train_examples": 0,
19
+ "max_valid_examples": 0
20
+ }
bottleneck_only0/diag.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"step": 500, "nll": 6.2587387980164735, "nll_en": 6.258790666406805, "nll_zh": 6.2586869245855805, "inv_top1": 0.9492600422832981}
2
+ {"step": 1000, "nll": 4.235596902778738, "nll_en": 4.234024102289632, "nll_zh": 4.237169725446318, "inv_top1": 0.9693446088794926}
3
+ {"step": 1500, "nll": 3.8439625564639717, "nll_en": 3.842405145818537, "nll_zh": 3.8455199570282823, "inv_top1": 0.9862579281183932}
4
+ {"step": 2000, "nll": 3.670006343728644, "nll_en": 3.6681871091588607, "nll_zh": 3.671825564184854, "inv_top1": 0.9894291754756871}
5
+ {"step": 2500, "nll": 3.554669790489729, "nll_en": 3.5535303160201672, "nll_zh": 3.555809265967403, "inv_top1": 0.9904862579281184}
6
+ {"step": 3000, "nll": 3.4688394346902536, "nll_en": 3.465840362847229, "nll_zh": 3.471838506533278, "inv_top1": 0.992600422832981}
7
+ {"step": 3500, "nll": 3.4021803894204266, "nll_en": 3.3997549501584396, "nll_zh": 3.404605824649964, "inv_top1": 0.9915433403805497}
8
+ {"step": 4000, "nll": 3.358851924759137, "nll_en": 3.354143586269645, "nll_zh": 3.363560257704011, "inv_top1": 0.9947145877378436}
bottleneck_only0/diag_full.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/bottleneck_only0/ckpt.pt",
3
+ "nll_en": 3.321001886565388,
4
+ "nll_zh": 3.3306097278635294,
5
+ "swap_delta_en": 0.7685747267579931,
6
+ "swap_delta_zh": 0.7492649681220347,
7
+ "ablate_zero_delta_en": 7.139587867083559,
8
+ "ablate_mean_delta_en": 0.3234971925521754,
9
+ "ablate_noise_delta_en": 18.582997932998627,
10
+ "ablate_zero_delta_zh": 7.129979997558271,
11
+ "ablate_mean_delta_zh": 0.3176661006491986,
12
+ "ablate_noise_delta_zh": 18.648924954597845,
13
+ "inv_top1_full": 0.8023256063461304,
14
+ "inv_top5_full": 0.957716703414917,
15
+ "diag_sim_mean": 0.9992262125015259,
16
+ "offdiag_sim_mean": 0.9929117560386658,
17
+ "sim_margin": 0.006314456462860107,
18
+ "n_valid": 946
19
+ }
bottleneck_only0/logs.jsonl ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 20, "epoch": 0, "loss": 25.068214416503906, "loss_render": 25.068214416503906, "loss_align": 1.2560474872589111, "elapsed_sec": 4.62065315246582}
2
+ {"step": 40, "epoch": 0, "loss": 17.173673629760742, "loss_render": 17.173673629760742, "loss_align": 1.4288201332092285, "elapsed_sec": 9.055688619613647}
3
+ {"step": 20, "epoch": 0, "loss": 25.917964935302734, "loss_render": 25.917964935302734, "loss_align": 2.1944735050201416, "elapsed_sec": 4.335532903671265}
4
+ {"step": 40, "epoch": 0, "loss": 18.152647018432617, "loss_render": 18.152647018432617, "loss_align": 1.6684024333953857, "elapsed_sec": 8.67372465133667}
5
+ {"step": 60, "epoch": 0, "loss": 14.218826293945312, "loss_render": 14.218826293945312, "loss_align": 1.30879807472229, "elapsed_sec": 12.983349800109863}
6
+ {"step": 80, "epoch": 0, "loss": 12.603299140930176, "loss_render": 12.603299140930176, "loss_align": 0.9370242357254028, "elapsed_sec": 16.93756628036499}
7
+ {"step": 100, "epoch": 0, "loss": 10.894832611083984, "loss_render": 10.894832611083984, "loss_align": 1.377870798110962, "elapsed_sec": 21.421523332595825}
8
+ {"step": 120, "epoch": 0, "loss": 10.246227264404297, "loss_render": 10.246227264404297, "loss_align": 1.3244850635528564, "elapsed_sec": 25.768774032592773}
9
+ {"step": 140, "epoch": 0, "loss": 8.805749893188477, "loss_render": 8.805749893188477, "loss_align": 1.3966960906982422, "elapsed_sec": 29.968562602996826}
10
+ {"step": 160, "epoch": 0, "loss": 8.589908599853516, "loss_render": 8.589908599853516, "loss_align": 1.1094694137573242, "elapsed_sec": 34.163904666900635}
11
+ {"step": 180, "epoch": 0, "loss": 8.528124809265137, "loss_render": 8.528124809265137, "loss_align": 1.2340401411056519, "elapsed_sec": 38.619858741760254}
12
+ {"step": 200, "epoch": 0, "loss": 8.019331932067871, "loss_render": 8.019331932067871, "loss_align": 1.3434840440750122, "elapsed_sec": 43.52604579925537}
13
+ {"step": 220, "epoch": 0, "loss": 7.343768119812012, "loss_render": 7.343768119812012, "loss_align": 1.3655617237091064, "elapsed_sec": 48.001105308532715}
14
+ {"step": 240, "epoch": 0, "loss": 7.666085243225098, "loss_render": 7.666085243225098, "loss_align": 1.357412576675415, "elapsed_sec": 51.98193907737732}
15
+ {"step": 260, "epoch": 0, "loss": 7.2019548416137695, "loss_render": 7.2019548416137695, "loss_align": 1.3951079845428467, "elapsed_sec": 56.16705775260925}
16
+ {"step": 280, "epoch": 0, "loss": 7.250019550323486, "loss_render": 7.250019550323486, "loss_align": 1.375781774520874, "elapsed_sec": 59.922574520111084}
17
+ {"step": 300, "epoch": 0, "loss": 7.751049041748047, "loss_render": 7.751049041748047, "loss_align": 1.3309111595153809, "elapsed_sec": 63.60503792762756}
18
+ {"step": 320, "epoch": 0, "loss": 7.136506080627441, "loss_render": 7.136506080627441, "loss_align": 1.3528242111206055, "elapsed_sec": 67.64841198921204}
19
+ {"step": 340, "epoch": 0, "loss": 7.514324188232422, "loss_render": 7.514324188232422, "loss_align": 1.3294233083724976, "elapsed_sec": 71.88786387443542}
20
+ {"step": 360, "epoch": 0, "loss": 7.039865970611572, "loss_render": 7.039865970611572, "loss_align": 1.3642728328704834, "elapsed_sec": 75.95599246025085}
21
+ {"step": 380, "epoch": 0, "loss": 7.33233642578125, "loss_render": 7.33233642578125, "loss_align": 1.3258750438690186, "elapsed_sec": 79.70555639266968}
22
+ {"step": 400, "epoch": 0, "loss": 7.311902046203613, "loss_render": 7.311902046203613, "loss_align": 1.3728034496307373, "elapsed_sec": 83.45053219795227}
23
+ {"step": 420, "epoch": 0, "loss": 7.123697280883789, "loss_render": 7.123697280883789, "loss_align": 1.3485982418060303, "elapsed_sec": 87.14196538925171}
24
+ {"step": 440, "epoch": 0, "loss": 7.125831604003906, "loss_render": 7.125831604003906, "loss_align": 1.3624064922332764, "elapsed_sec": 91.44555616378784}
25
+ {"step": 460, "epoch": 0, "loss": 7.244197845458984, "loss_render": 7.244197845458984, "loss_align": 1.3405232429504395, "elapsed_sec": 95.24549293518066}
26
+ {"step": 480, "epoch": 0, "loss": 7.708622932434082, "loss_render": 7.708622932434082, "loss_align": 1.297819972038269, "elapsed_sec": 99.10843658447266}
27
+ {"step": 500, "epoch": 0, "loss": 7.326488494873047, "loss_render": 7.326488494873047, "loss_align": 1.2589489221572876, "elapsed_sec": 102.78324365615845}
28
+ {"step": 520, "epoch": 0, "loss": 6.1471710205078125, "loss_render": 6.1471710205078125, "loss_align": 1.2746977806091309, "elapsed_sec": 139.3182394504547}
29
+ {"step": 540, "epoch": 0, "loss": 6.2215166091918945, "loss_render": 6.2215166091918945, "loss_align": 1.0991568565368652, "elapsed_sec": 142.9459776878357}
30
+ {"step": 560, "epoch": 0, "loss": 5.882511138916016, "loss_render": 5.882511138916016, "loss_align": 1.0653331279754639, "elapsed_sec": 146.55898141860962}
31
+ {"step": 580, "epoch": 0, "loss": 6.240784168243408, "loss_render": 6.240784168243408, "loss_align": 1.2544007301330566, "elapsed_sec": 150.4371256828308}
32
+ {"step": 600, "epoch": 0, "loss": 5.747830390930176, "loss_render": 5.747830390930176, "loss_align": 1.2261097431182861, "elapsed_sec": 154.3231282234192}
33
+ {"step": 620, "epoch": 0, "loss": 6.342836380004883, "loss_render": 6.342836380004883, "loss_align": 1.1655685901641846, "elapsed_sec": 158.5306167602539}
34
+ {"step": 640, "epoch": 0, "loss": 5.374281883239746, "loss_render": 5.374281883239746, "loss_align": 1.1424990892410278, "elapsed_sec": 162.1269247531891}
35
+ {"step": 660, "epoch": 0, "loss": 6.143170356750488, "loss_render": 6.143170356750488, "loss_align": 1.2058131694793701, "elapsed_sec": 165.76400637626648}
36
+ {"step": 680, "epoch": 0, "loss": 5.639875411987305, "loss_render": 5.639875411987305, "loss_align": 1.253835916519165, "elapsed_sec": 169.377046585083}
37
+ {"step": 700, "epoch": 0, "loss": 4.579581260681152, "loss_render": 4.579581260681152, "loss_align": 1.2069745063781738, "elapsed_sec": 173.08827257156372}
38
+ {"step": 720, "epoch": 0, "loss": 4.981725692749023, "loss_render": 4.981725692749023, "loss_align": 1.2948780059814453, "elapsed_sec": 176.51431703567505}
39
+ {"step": 740, "epoch": 0, "loss": 4.752182960510254, "loss_render": 4.752182960510254, "loss_align": 1.2615938186645508, "elapsed_sec": 180.18725872039795}
40
+ {"step": 760, "epoch": 0, "loss": 4.593062877655029, "loss_render": 4.593062877655029, "loss_align": 1.1857482194900513, "elapsed_sec": 184.2383053302765}
41
+ {"step": 780, "epoch": 0, "loss": 4.672336578369141, "loss_render": 4.672336578369141, "loss_align": 1.1262598037719727, "elapsed_sec": 187.95040440559387}
42
+ {"step": 800, "epoch": 0, "loss": 4.731796741485596, "loss_render": 4.731796741485596, "loss_align": 1.2104741334915161, "elapsed_sec": 191.40123677253723}
43
+ {"step": 820, "epoch": 0, "loss": 4.524990081787109, "loss_render": 4.524990081787109, "loss_align": 1.28558349609375, "elapsed_sec": 195.21972227096558}
44
+ {"step": 840, "epoch": 0, "loss": 4.757625579833984, "loss_render": 4.757625579833984, "loss_align": 1.2989811897277832, "elapsed_sec": 199.13956308364868}
45
+ {"step": 860, "epoch": 0, "loss": 4.205277442932129, "loss_render": 4.205277442932129, "loss_align": 1.2577327489852905, "elapsed_sec": 202.9439618587494}
46
+ {"step": 880, "epoch": 0, "loss": 4.437625885009766, "loss_render": 4.437625885009766, "loss_align": 1.2471349239349365, "elapsed_sec": 206.50619316101074}
47
+ {"step": 900, "epoch": 0, "loss": 4.089728355407715, "loss_render": 4.089728355407715, "loss_align": 1.2031450271606445, "elapsed_sec": 210.38319277763367}
48
+ {"step": 920, "epoch": 0, "loss": 4.457942485809326, "loss_render": 4.457942485809326, "loss_align": 1.2428267002105713, "elapsed_sec": 213.90883421897888}
49
+ {"step": 940, "epoch": 0, "loss": 4.1108245849609375, "loss_render": 4.1108245849609375, "loss_align": 1.2915761470794678, "elapsed_sec": 216.93723940849304}
50
+ {"step": 960, "epoch": 0, "loss": 4.48177433013916, "loss_render": 4.48177433013916, "loss_align": 1.2980643510818481, "elapsed_sec": 220.3911099433899}
51
+ {"step": 980, "epoch": 0, "loss": 10.206438064575195, "loss_render": 10.206438064575195, "loss_align": 4.400123596191406, "elapsed_sec": 224.0379605293274}
52
+ {"step": 1000, "epoch": 0, "loss": 4.1711602210998535, "loss_render": 4.1711602210998535, "loss_align": 1.2913529872894287, "elapsed_sec": 227.59467148780823}
53
+ {"step": 1020, "epoch": 0, "loss": 4.17601203918457, "loss_render": 4.17601203918457, "loss_align": 1.3505561351776123, "elapsed_sec": 265.18921160697937}
54
+ {"step": 1040, "epoch": 0, "loss": 3.9969260692596436, "loss_render": 3.9969260692596436, "loss_align": 1.281299114227295, "elapsed_sec": 268.83770275115967}
55
+ {"step": 1060, "epoch": 0, "loss": 3.907970905303955, "loss_render": 3.907970905303955, "loss_align": 1.3253660202026367, "elapsed_sec": 272.64444160461426}
56
+ {"step": 1080, "epoch": 0, "loss": 4.1193647384643555, "loss_render": 4.1193647384643555, "loss_align": 1.2949368953704834, "elapsed_sec": 276.56428384780884}
57
+ {"step": 1100, "epoch": 0, "loss": 4.072325706481934, "loss_render": 4.072325706481934, "loss_align": 1.2628726959228516, "elapsed_sec": 280.49667835235596}
58
+ {"step": 1120, "epoch": 0, "loss": 4.002856731414795, "loss_render": 4.002856731414795, "loss_align": 1.2051516771316528, "elapsed_sec": 284.46477818489075}
59
+ {"step": 1140, "epoch": 0, "loss": 4.3635969161987305, "loss_render": 4.3635969161987305, "loss_align": 1.2911789417266846, "elapsed_sec": 288.5590832233429}
60
+ {"step": 1160, "epoch": 0, "loss": 4.4030609130859375, "loss_render": 4.4030609130859375, "loss_align": 1.3163681030273438, "elapsed_sec": 292.5012457370758}
61
+ {"step": 1180, "epoch": 0, "loss": 3.747136116027832, "loss_render": 3.747136116027832, "loss_align": 1.305263876914978, "elapsed_sec": 296.2241327762604}
62
+ {"step": 1200, "epoch": 0, "loss": 4.091752052307129, "loss_render": 4.091752052307129, "loss_align": 1.2443466186523438, "elapsed_sec": 299.69353580474854}
63
+ {"step": 1220, "epoch": 0, "loss": 4.026358127593994, "loss_render": 4.026358127593994, "loss_align": 1.2987968921661377, "elapsed_sec": 303.13763403892517}
64
+ {"step": 1240, "epoch": 0, "loss": 3.777825355529785, "loss_render": 3.777825355529785, "loss_align": 1.2726515531539917, "elapsed_sec": 306.6620581150055}
65
+ {"step": 1260, "epoch": 0, "loss": 3.4520959854125977, "loss_render": 3.4520959854125977, "loss_align": 1.3488078117370605, "elapsed_sec": 310.27875447273254}
66
+ {"step": 1280, "epoch": 0, "loss": 3.688685178756714, "loss_render": 3.688685178756714, "loss_align": 1.2463077306747437, "elapsed_sec": 313.67237973213196}
67
+ {"step": 1300, "epoch": 0, "loss": 4.314986228942871, "loss_render": 4.314986228942871, "loss_align": 1.3503814935684204, "elapsed_sec": 317.3606276512146}
68
+ {"step": 1320, "epoch": 0, "loss": 3.3545126914978027, "loss_render": 3.3545126914978027, "loss_align": 1.31829833984375, "elapsed_sec": 320.9035048484802}
69
+ {"step": 1340, "epoch": 0, "loss": 3.781036853790283, "loss_render": 3.781036853790283, "loss_align": 1.32456374168396, "elapsed_sec": 324.59442806243896}
70
+ {"step": 1360, "epoch": 0, "loss": 3.6448142528533936, "loss_render": 3.6448142528533936, "loss_align": 1.3182804584503174, "elapsed_sec": 328.201623916626}
71
+ {"step": 1380, "epoch": 0, "loss": 3.7379813194274902, "loss_render": 3.7379813194274902, "loss_align": 1.274261713027954, "elapsed_sec": 332.35005950927734}
72
+ {"step": 1400, "epoch": 0, "loss": 3.9656317234039307, "loss_render": 3.9656317234039307, "loss_align": 1.302483081817627, "elapsed_sec": 336.05968046188354}
73
+ {"step": 1420, "epoch": 0, "loss": 3.608476161956787, "loss_render": 3.608476161956787, "loss_align": 1.2988253831863403, "elapsed_sec": 339.5888817310333}
74
+ {"step": 1440, "epoch": 0, "loss": 3.686962604522705, "loss_render": 3.686962604522705, "loss_align": 1.3405593633651733, "elapsed_sec": 343.63488698005676}
75
+ {"step": 1460, "epoch": 0, "loss": 3.7350282669067383, "loss_render": 3.7350282669067383, "loss_align": 1.2814233303070068, "elapsed_sec": 347.3592264652252}
76
+ {"step": 1480, "epoch": 0, "loss": 3.6883745193481445, "loss_render": 3.6883745193481445, "loss_align": 1.3187284469604492, "elapsed_sec": 350.8557188510895}
77
+ {"step": 1500, "epoch": 0, "loss": 3.61160945892334, "loss_render": 3.61160945892334, "loss_align": 1.3573780059814453, "elapsed_sec": 354.58983302116394}
78
+ {"step": 1520, "epoch": 0, "loss": 4.204068183898926, "loss_render": 4.204068183898926, "loss_align": 1.302490234375, "elapsed_sec": 392.74396657943726}
79
+ {"step": 1540, "epoch": 0, "loss": 4.208974361419678, "loss_render": 4.208974361419678, "loss_align": 1.323647141456604, "elapsed_sec": 396.3385331630707}
80
+ {"step": 1560, "epoch": 0, "loss": 4.270020484924316, "loss_render": 4.270020484924316, "loss_align": 1.3001823425292969, "elapsed_sec": 399.9747898578644}
81
+ {"step": 1580, "epoch": 0, "loss": 3.744575023651123, "loss_render": 3.744575023651123, "loss_align": 1.323404312133789, "elapsed_sec": 403.7682225704193}
82
+ {"step": 1600, "epoch": 0, "loss": 3.675665855407715, "loss_render": 3.675665855407715, "loss_align": 1.261446475982666, "elapsed_sec": 407.348735332489}
83
+ {"step": 1620, "epoch": 0, "loss": 3.564666271209717, "loss_render": 3.564666271209717, "loss_align": 1.3066948652267456, "elapsed_sec": 410.91013860702515}
84
+ {"step": 1640, "epoch": 0, "loss": 3.991112232208252, "loss_render": 3.991112232208252, "loss_align": 1.342207431793213, "elapsed_sec": 414.51376819610596}
85
+ {"step": 1660, "epoch": 0, "loss": 4.0674872398376465, "loss_render": 4.0674872398376465, "loss_align": 1.3072348833084106, "elapsed_sec": 418.4138958454132}
86
+ {"step": 1680, "epoch": 0, "loss": 4.090538501739502, "loss_render": 4.090538501739502, "loss_align": 1.3547296524047852, "elapsed_sec": 421.9252519607544}
87
+ {"step": 1700, "epoch": 0, "loss": 4.0118560791015625, "loss_render": 4.0118560791015625, "loss_align": 1.2640297412872314, "elapsed_sec": 425.8377320766449}
88
+ {"step": 1720, "epoch": 0, "loss": 3.9725465774536133, "loss_render": 3.9725465774536133, "loss_align": 1.3134448528289795, "elapsed_sec": 429.449254989624}
89
+ {"step": 1740, "epoch": 0, "loss": 3.5649256706237793, "loss_render": 3.5649256706237793, "loss_align": 1.3002586364746094, "elapsed_sec": 433.0415790081024}
90
+ {"step": 1760, "epoch": 0, "loss": 3.6523892879486084, "loss_render": 3.6523892879486084, "loss_align": 1.2715141773223877, "elapsed_sec": 436.49269700050354}
91
+ {"step": 1780, "epoch": 0, "loss": 3.9118521213531494, "loss_render": 3.9118521213531494, "loss_align": 1.3702013492584229, "elapsed_sec": 440.2353720664978}
92
+ {"step": 1800, "epoch": 0, "loss": 3.31128191947937, "loss_render": 3.31128191947937, "loss_align": 1.3256481885910034, "elapsed_sec": 444.1802966594696}
93
+ {"step": 1820, "epoch": 0, "loss": 4.042821884155273, "loss_render": 4.042821884155273, "loss_align": 1.2714600563049316, "elapsed_sec": 447.9678535461426}
94
+ {"step": 1840, "epoch": 0, "loss": 3.760974884033203, "loss_render": 3.760974884033203, "loss_align": 1.2936592102050781, "elapsed_sec": 451.66157245635986}
95
+ {"step": 1860, "epoch": 0, "loss": 4.396261215209961, "loss_render": 4.396261215209961, "loss_align": 1.335903286933899, "elapsed_sec": 455.49061822891235}
96
+ {"step": 1880, "epoch": 0, "loss": 3.3177337646484375, "loss_render": 3.3177337646484375, "loss_align": 1.2679367065429688, "elapsed_sec": 459.45911598205566}
97
+ {"step": 1900, "epoch": 0, "loss": 3.6426124572753906, "loss_render": 3.6426124572753906, "loss_align": 1.331840991973877, "elapsed_sec": 462.96296215057373}
98
+ {"step": 1920, "epoch": 0, "loss": 4.300471305847168, "loss_render": 4.300471305847168, "loss_align": 1.324788212776184, "elapsed_sec": 466.7328338623047}
99
+ {"step": 1940, "epoch": 0, "loss": 3.5377073287963867, "loss_render": 3.5377073287963867, "loss_align": 1.322230577468872, "elapsed_sec": 470.36803126335144}
100
+ {"step": 1960, "epoch": 0, "loss": 3.4510498046875, "loss_render": 3.4510498046875, "loss_align": 1.324146032333374, "elapsed_sec": 474.11961793899536}
101
+ {"step": 1980, "epoch": 0, "loss": 3.3578429222106934, "loss_render": 3.3578429222106934, "loss_align": 1.2893755435943604, "elapsed_sec": 477.7328824996948}
102
+ {"step": 2000, "epoch": 0, "loss": 3.728860855102539, "loss_render": 3.728860855102539, "loss_align": 1.271684169769287, "elapsed_sec": 481.0619614124298}
103
+ {"step": 2020, "epoch": 0, "loss": 3.794705390930176, "loss_render": 3.794705390930176, "loss_align": 1.2950187921524048, "elapsed_sec": 519.6089177131653}
104
+ {"step": 2040, "epoch": 0, "loss": 3.1210739612579346, "loss_render": 3.1210739612579346, "loss_align": 1.2831075191497803, "elapsed_sec": 523.2467148303986}
105
+ {"step": 2060, "epoch": 0, "loss": 3.7204270362854004, "loss_render": 3.7204270362854004, "loss_align": 1.272107720375061, "elapsed_sec": 527.2154533863068}
106
+ {"step": 2080, "epoch": 0, "loss": 3.895073890686035, "loss_render": 3.895073890686035, "loss_align": 1.3083648681640625, "elapsed_sec": 531.0901889801025}
107
+ {"step": 2100, "epoch": 0, "loss": 3.8720345497131348, "loss_render": 3.8720345497131348, "loss_align": 1.3088170289993286, "elapsed_sec": 534.9570457935333}
108
+ {"step": 2120, "epoch": 0, "loss": 3.773664951324463, "loss_render": 3.773664951324463, "loss_align": 1.3160605430603027, "elapsed_sec": 538.7677316665649}
109
+ {"step": 2140, "epoch": 0, "loss": 3.637411594390869, "loss_render": 3.637411594390869, "loss_align": 1.340388536453247, "elapsed_sec": 542.2611155509949}
110
+ {"step": 2160, "epoch": 0, "loss": 3.7036566734313965, "loss_render": 3.7036566734313965, "loss_align": 1.3337763547897339, "elapsed_sec": 546.1059193611145}
111
+ {"step": 2180, "epoch": 0, "loss": 3.7844762802124023, "loss_render": 3.7844762802124023, "loss_align": 1.3149280548095703, "elapsed_sec": 549.8737757205963}
112
+ {"step": 2200, "epoch": 0, "loss": 3.6096854209899902, "loss_render": 3.6096854209899902, "loss_align": 1.3091692924499512, "elapsed_sec": 553.8330833911896}
113
+ {"step": 2220, "epoch": 0, "loss": 3.603646755218506, "loss_render": 3.603646755218506, "loss_align": 1.3281772136688232, "elapsed_sec": 557.7812130451202}
114
+ {"step": 2240, "epoch": 0, "loss": 3.577955722808838, "loss_render": 3.577955722808838, "loss_align": 1.3195786476135254, "elapsed_sec": 561.6335048675537}
115
+ {"step": 2260, "epoch": 0, "loss": 3.6735575199127197, "loss_render": 3.6735575199127197, "loss_align": 1.3043341636657715, "elapsed_sec": 565.8662486076355}
116
+ {"step": 2280, "epoch": 0, "loss": 3.4098072052001953, "loss_render": 3.4098072052001953, "loss_align": 1.3129210472106934, "elapsed_sec": 569.9950351715088}
117
+ {"step": 2300, "epoch": 0, "loss": 3.5220069885253906, "loss_render": 3.5220069885253906, "loss_align": 1.3301793336868286, "elapsed_sec": 573.8146674633026}
118
+ {"step": 2320, "epoch": 0, "loss": 4.460407257080078, "loss_render": 4.460407257080078, "loss_align": 1.3101961612701416, "elapsed_sec": 577.016411781311}
119
+ {"step": 2340, "epoch": 0, "loss": 3.8808064460754395, "loss_render": 3.8808064460754395, "loss_align": 1.3395137786865234, "elapsed_sec": 580.195348739624}
120
+ {"step": 2360, "epoch": 0, "loss": 3.5834298133850098, "loss_render": 3.5834298133850098, "loss_align": 1.3018791675567627, "elapsed_sec": 584.3903532028198}
121
+ {"step": 2380, "epoch": 0, "loss": 3.4221298694610596, "loss_render": 3.4221298694610596, "loss_align": 1.3029401302337646, "elapsed_sec": 588.638747215271}
122
+ {"step": 2400, "epoch": 0, "loss": 3.5018575191497803, "loss_render": 3.5018575191497803, "loss_align": 1.3508882522583008, "elapsed_sec": 593.0044586658478}
123
+ {"step": 2420, "epoch": 0, "loss": 3.502702474594116, "loss_render": 3.502702474594116, "loss_align": 1.2999175786972046, "elapsed_sec": 597.3051435947418}
124
+ {"step": 2440, "epoch": 0, "loss": 3.6807162761688232, "loss_render": 3.6807162761688232, "loss_align": 1.3530789613723755, "elapsed_sec": 601.5814998149872}
125
+ {"step": 2460, "epoch": 0, "loss": 3.1771504878997803, "loss_render": 3.1771504878997803, "loss_align": 1.2571786642074585, "elapsed_sec": 605.664345741272}
126
+ {"step": 2480, "epoch": 0, "loss": 3.3209800720214844, "loss_render": 3.3209800720214844, "loss_align": 1.3444374799728394, "elapsed_sec": 609.7049825191498}
127
+ {"step": 2500, "epoch": 0, "loss": 3.7474570274353027, "loss_render": 3.7474570274353027, "loss_align": 1.30989670753479, "elapsed_sec": 613.1445138454437}
128
+ {"step": 2520, "epoch": 0, "loss": 3.1007232666015625, "loss_render": 3.1007232666015625, "loss_align": 1.29102623462677, "elapsed_sec": 649.6769306659698}
129
+ {"step": 2540, "epoch": 0, "loss": 3.330173969268799, "loss_render": 3.330173969268799, "loss_align": 1.2773791551589966, "elapsed_sec": 653.8328976631165}
130
+ {"step": 2560, "epoch": 0, "loss": 2.983732223510742, "loss_render": 2.983732223510742, "loss_align": 1.3157882690429688, "elapsed_sec": 658.1448090076447}
131
+ {"step": 2580, "epoch": 0, "loss": 3.822010040283203, "loss_render": 3.822010040283203, "loss_align": 1.3517823219299316, "elapsed_sec": 661.9287893772125}
132
+ {"step": 2600, "epoch": 0, "loss": 3.3684892654418945, "loss_render": 3.3684892654418945, "loss_align": 1.3327375650405884, "elapsed_sec": 666.2529532909393}
133
+ {"step": 2620, "epoch": 0, "loss": 3.6741559505462646, "loss_render": 3.6741559505462646, "loss_align": 1.3101791143417358, "elapsed_sec": 670.2937567234039}
134
+ {"step": 2640, "epoch": 0, "loss": 3.5241808891296387, "loss_render": 3.5241808891296387, "loss_align": 1.31425142288208, "elapsed_sec": 674.3010764122009}
135
+ {"step": 2660, "epoch": 0, "loss": 3.394542694091797, "loss_render": 3.394542694091797, "loss_align": 1.2869830131530762, "elapsed_sec": 678.567378282547}
136
+ {"step": 2680, "epoch": 0, "loss": 3.4923617839813232, "loss_render": 3.4923617839813232, "loss_align": 1.3485488891601562, "elapsed_sec": 682.958333492279}
137
+ {"step": 2700, "epoch": 0, "loss": 3.3642659187316895, "loss_render": 3.3642659187316895, "loss_align": 1.3342443704605103, "elapsed_sec": 687.2013468742371}
138
+ {"step": 2720, "epoch": 0, "loss": 3.5672755241394043, "loss_render": 3.5672755241394043, "loss_align": 1.2813721895217896, "elapsed_sec": 691.4832580089569}
139
+ {"step": 2740, "epoch": 0, "loss": 4.537295341491699, "loss_render": 4.537295341491699, "loss_align": 1.3334107398986816, "elapsed_sec": 695.8189036846161}
140
+ {"step": 2760, "epoch": 0, "loss": 3.1378908157348633, "loss_render": 3.1378908157348633, "loss_align": 1.298948049545288, "elapsed_sec": 699.7619595527649}
141
+ {"step": 2780, "epoch": 0, "loss": 3.6751909255981445, "loss_render": 3.6751909255981445, "loss_align": 1.3522818088531494, "elapsed_sec": 703.757246017456}
142
+ {"step": 2800, "epoch": 0, "loss": 4.248785495758057, "loss_render": 4.248785495758057, "loss_align": 1.290888786315918, "elapsed_sec": 707.3820660114288}
143
+ {"step": 2820, "epoch": 0, "loss": 3.3290891647338867, "loss_render": 3.3290891647338867, "loss_align": 1.2983803749084473, "elapsed_sec": 711.3598427772522}
144
+ {"step": 2840, "epoch": 0, "loss": 3.8407440185546875, "loss_render": 3.8407440185546875, "loss_align": 1.3151674270629883, "elapsed_sec": 715.2473719120026}
145
+ {"step": 2860, "epoch": 0, "loss": 3.556464433670044, "loss_render": 3.556464433670044, "loss_align": 1.3364646434783936, "elapsed_sec": 719.0456426143646}
146
+ {"step": 2880, "epoch": 0, "loss": 3.7955799102783203, "loss_render": 3.7955799102783203, "loss_align": 1.2968473434448242, "elapsed_sec": 722.8804559707642}
147
+ {"step": 2900, "epoch": 0, "loss": 3.436861991882324, "loss_render": 3.436861991882324, "loss_align": 1.3074442148208618, "elapsed_sec": 726.6653738021851}
148
+ {"step": 2920, "epoch": 0, "loss": 3.257425308227539, "loss_render": 3.257425308227539, "loss_align": 1.315974473953247, "elapsed_sec": 730.4089157581329}
149
+ {"step": 2940, "epoch": 0, "loss": 3.4747190475463867, "loss_render": 3.4747190475463867, "loss_align": 1.2973198890686035, "elapsed_sec": 734.2148590087891}
150
+ {"step": 2960, "epoch": 0, "loss": 3.2538886070251465, "loss_render": 3.2538886070251465, "loss_align": 1.2823439836502075, "elapsed_sec": 738.2488083839417}
151
+ {"step": 2980, "epoch": 0, "loss": 3.253167152404785, "loss_render": 3.253167152404785, "loss_align": 1.3367148637771606, "elapsed_sec": 742.3728923797607}
152
+ {"step": 3000, "epoch": 0, "loss": 3.307588815689087, "loss_render": 3.307588815689087, "loss_align": 1.35324227809906, "elapsed_sec": 746.4431617259979}
153
+ {"step": 3020, "epoch": 0, "loss": 3.4073219299316406, "loss_render": 3.4073219299316406, "loss_align": 1.3038933277130127, "elapsed_sec": 782.9178764820099}
154
+ {"step": 3040, "epoch": 0, "loss": 3.8293709754943848, "loss_render": 3.8293709754943848, "loss_align": 1.3221344947814941, "elapsed_sec": 786.986828327179}
155
+ {"step": 3060, "epoch": 0, "loss": 2.813988208770752, "loss_render": 2.813988208770752, "loss_align": 1.3039557933807373, "elapsed_sec": 791.031477689743}
156
+ {"step": 3080, "epoch": 0, "loss": 3.3804686069488525, "loss_render": 3.3804686069488525, "loss_align": 1.307481050491333, "elapsed_sec": 794.9832298755646}
157
+ {"step": 3100, "epoch": 0, "loss": 3.9387753009796143, "loss_render": 3.9387753009796143, "loss_align": 1.3430440425872803, "elapsed_sec": 799.0074508190155}
158
+ {"step": 3120, "epoch": 0, "loss": 3.7268102169036865, "loss_render": 3.7268102169036865, "loss_align": 1.3291388750076294, "elapsed_sec": 803.0955741405487}
159
+ {"step": 3140, "epoch": 0, "loss": 3.5934321880340576, "loss_render": 3.5934321880340576, "loss_align": 1.3048486709594727, "elapsed_sec": 807.6380565166473}
160
+ {"step": 3160, "epoch": 0, "loss": 3.806903839111328, "loss_render": 3.806903839111328, "loss_align": 1.3227596282958984, "elapsed_sec": 811.6148297786713}
161
+ {"step": 3180, "epoch": 0, "loss": 3.1972837448120117, "loss_render": 3.1972837448120117, "loss_align": 1.342570424079895, "elapsed_sec": 815.530838727951}
162
+ {"step": 3200, "epoch": 0, "loss": 3.8258919715881348, "loss_render": 3.8258919715881348, "loss_align": 1.3053572177886963, "elapsed_sec": 819.9629707336426}
163
+ {"step": 3220, "epoch": 0, "loss": 3.562363386154175, "loss_render": 3.562363386154175, "loss_align": 1.3167495727539062, "elapsed_sec": 824.1888661384583}
164
+ {"step": 3240, "epoch": 0, "loss": 3.5921406745910645, "loss_render": 3.5921406745910645, "loss_align": 1.3447704315185547, "elapsed_sec": 828.2850723266602}
165
+ {"step": 3260, "epoch": 0, "loss": 3.6050477027893066, "loss_render": 3.6050477027893066, "loss_align": 1.357332468032837, "elapsed_sec": 832.4591248035431}
166
+ {"step": 3280, "epoch": 0, "loss": 3.3253564834594727, "loss_render": 3.3253564834594727, "loss_align": 1.3173449039459229, "elapsed_sec": 836.3291099071503}
167
+ {"step": 3300, "epoch": 0, "loss": 3.200458526611328, "loss_render": 3.200458526611328, "loss_align": 1.3337464332580566, "elapsed_sec": 840.0457291603088}
168
+ {"step": 3320, "epoch": 0, "loss": 3.7407610416412354, "loss_render": 3.7407610416412354, "loss_align": 1.3401203155517578, "elapsed_sec": 844.1602971553802}
169
+ {"step": 3340, "epoch": 0, "loss": 3.380537271499634, "loss_render": 3.380537271499634, "loss_align": 1.3359365463256836, "elapsed_sec": 848.0678279399872}
170
+ {"step": 3360, "epoch": 0, "loss": 3.8489432334899902, "loss_render": 3.8489432334899902, "loss_align": 1.3249884843826294, "elapsed_sec": 852.1380407810211}
171
+ {"step": 3380, "epoch": 0, "loss": 3.4136223793029785, "loss_render": 3.4136223793029785, "loss_align": 1.292271614074707, "elapsed_sec": 856.0447263717651}
172
+ {"step": 3400, "epoch": 0, "loss": 3.659721851348877, "loss_render": 3.659721851348877, "loss_align": 1.3260339498519897, "elapsed_sec": 859.715437412262}
173
+ {"step": 3420, "epoch": 0, "loss": 3.6745357513427734, "loss_render": 3.6745357513427734, "loss_align": 1.3277641534805298, "elapsed_sec": 863.3376669883728}
174
+ {"step": 3440, "epoch": 0, "loss": 3.5116078853607178, "loss_render": 3.5116078853607178, "loss_align": 1.3240690231323242, "elapsed_sec": 867.2579228878021}
175
+ {"step": 3460, "epoch": 0, "loss": 3.734320640563965, "loss_render": 3.734320640563965, "loss_align": 1.333486795425415, "elapsed_sec": 871.2200312614441}
176
+ {"step": 3480, "epoch": 0, "loss": 3.6618905067443848, "loss_render": 3.6618905067443848, "loss_align": 1.2953482866287231, "elapsed_sec": 875.008962392807}
177
+ {"step": 3500, "epoch": 0, "loss": 3.6431941986083984, "loss_render": 3.6431941986083984, "loss_align": 1.3012919425964355, "elapsed_sec": 878.9176557064056}
178
+ {"step": 3520, "epoch": 0, "loss": 3.529413938522339, "loss_render": 3.529413938522339, "loss_align": 1.3196945190429688, "elapsed_sec": 918.0353722572327}
179
+ {"step": 3540, "epoch": 0, "loss": 3.3814449310302734, "loss_render": 3.3814449310302734, "loss_align": 1.3386421203613281, "elapsed_sec": 921.5147044658661}
180
+ {"step": 3560, "epoch": 0, "loss": 3.3931124210357666, "loss_render": 3.3931124210357666, "loss_align": 1.2987191677093506, "elapsed_sec": 925.1459677219391}
181
+ {"step": 3580, "epoch": 0, "loss": 2.9901633262634277, "loss_render": 2.9901633262634277, "loss_align": 1.2852380275726318, "elapsed_sec": 928.6822941303253}
182
+ {"step": 3600, "epoch": 0, "loss": 3.4530773162841797, "loss_render": 3.4530773162841797, "loss_align": 1.3439598083496094, "elapsed_sec": 932.6685266494751}
183
+ {"step": 3620, "epoch": 0, "loss": 3.3586881160736084, "loss_render": 3.3586881160736084, "loss_align": 1.3105850219726562, "elapsed_sec": 936.9401214122772}
184
+ {"step": 3640, "epoch": 0, "loss": 3.2238268852233887, "loss_render": 3.2238268852233887, "loss_align": 1.3051881790161133, "elapsed_sec": 941.1896359920502}
185
+ {"step": 3660, "epoch": 0, "loss": 3.6321115493774414, "loss_render": 3.6321115493774414, "loss_align": 1.3367255926132202, "elapsed_sec": 945.5687658786774}
186
+ {"step": 3680, "epoch": 0, "loss": 3.186959981918335, "loss_render": 3.186959981918335, "loss_align": 1.3239593505859375, "elapsed_sec": 949.653373003006}
187
+ {"step": 3700, "epoch": 0, "loss": 3.1999645233154297, "loss_render": 3.1999645233154297, "loss_align": 1.3040820360183716, "elapsed_sec": 953.76904463768}
188
+ {"step": 3720, "epoch": 0, "loss": 2.6905572414398193, "loss_render": 2.6905572414398193, "loss_align": 1.323084831237793, "elapsed_sec": 957.5499608516693}
189
+ {"step": 3740, "epoch": 0, "loss": 3.0647521018981934, "loss_render": 3.0647521018981934, "loss_align": 1.3019015789031982, "elapsed_sec": 961.6371169090271}
190
+ {"step": 3760, "epoch": 0, "loss": 4.310507774353027, "loss_render": 4.310507774353027, "loss_align": 0.8147823810577393, "elapsed_sec": 966.173490524292}
191
+ {"step": 3780, "epoch": 0, "loss": 3.638549327850342, "loss_render": 3.638549327850342, "loss_align": 1.3138084411621094, "elapsed_sec": 970.3408970832825}
192
+ {"step": 3800, "epoch": 0, "loss": 3.3399221897125244, "loss_render": 3.3399221897125244, "loss_align": 1.3185560703277588, "elapsed_sec": 974.3992867469788}
193
+ {"step": 3820, "epoch": 0, "loss": 3.2794189453125, "loss_render": 3.2794189453125, "loss_align": 1.3398469686508179, "elapsed_sec": 978.5260744094849}
194
+ {"step": 3840, "epoch": 0, "loss": 3.683786392211914, "loss_render": 3.683786392211914, "loss_align": 1.3230746984481812, "elapsed_sec": 982.5338065624237}
195
+ {"step": 3860, "epoch": 0, "loss": 3.0670700073242188, "loss_render": 3.0670700073242188, "loss_align": 1.2995266914367676, "elapsed_sec": 986.711147069931}
196
+ {"step": 3880, "epoch": 0, "loss": 3.644256591796875, "loss_render": 3.644256591796875, "loss_align": 1.3107774257659912, "elapsed_sec": 990.2249693870544}
197
+ {"step": 3900, "epoch": 0, "loss": 3.714061975479126, "loss_render": 3.714061975479126, "loss_align": 1.341538906097412, "elapsed_sec": 993.4486451148987}
198
+ {"step": 3920, "epoch": 0, "loss": 3.6679954528808594, "loss_render": 3.6679954528808594, "loss_align": 1.3169686794281006, "elapsed_sec": 996.8078684806824}
199
+ {"step": 3940, "epoch": 0, "loss": 3.390392303466797, "loss_render": 3.390392303466797, "loss_align": 1.3089807033538818, "elapsed_sec": 1000.2379593849182}
200
+ {"step": 3960, "epoch": 0, "loss": 4.21923828125, "loss_render": 4.21923828125, "loss_align": 1.3227806091308594, "elapsed_sec": 1003.9036755561829}
201
+ {"step": 3980, "epoch": 0, "loss": 3.0390615463256836, "loss_render": 3.0390615463256836, "loss_align": 1.3363401889801025, "elapsed_sec": 1007.6658270359039}
202
+ {"step": 4000, "epoch": 0, "loss": 3.3446357250213623, "loss_render": 3.3446357250213623, "loss_align": 1.3320610523223877, "elapsed_sec": 1011.4687936306}
203
+ {"step": 4020, "epoch": 0, "loss": 3.08164644241333, "loss_render": 3.08164644241333, "loss_align": 1.327989935874939, "elapsed_sec": 1048.417100906372}
204
+ {"step": 4040, "epoch": 0, "loss": 3.3235292434692383, "loss_render": 3.3235292434692383, "loss_align": 1.3209460973739624, "elapsed_sec": 1052.1070799827576}
205
+ {"step": 4060, "epoch": 0, "loss": 3.4119114875793457, "loss_render": 3.4119114875793457, "loss_align": 1.3108515739440918, "elapsed_sec": 1056.0266780853271}
206
+ {"step": 4080, "epoch": 0, "loss": 3.139103412628174, "loss_render": 3.139103412628174, "loss_align": 1.3108247518539429, "elapsed_sec": 1059.856823682785}
207
+ {"step": 4100, "epoch": 0, "loss": 3.3543968200683594, "loss_render": 3.3543968200683594, "loss_align": 1.3377876281738281, "elapsed_sec": 1063.5917732715607}
208
+ {"step": 4120, "epoch": 0, "loss": 3.3362679481506348, "loss_render": 3.3362679481506348, "loss_align": 1.3344186544418335, "elapsed_sec": 1067.46173787117}
209
+ {"step": 4140, "epoch": 0, "loss": 3.683753490447998, "loss_render": 3.683753490447998, "loss_align": 1.3066401481628418, "elapsed_sec": 1071.2787556648254}
210
+ {"step": 4160, "epoch": 0, "loss": 2.914961338043213, "loss_render": 2.914961338043213, "loss_align": 1.3082780838012695, "elapsed_sec": 1075.001387834549}
211
+ {"step": 4180, "epoch": 0, "loss": 2.7455954551696777, "loss_render": 2.7455954551696777, "loss_align": 1.3437139987945557, "elapsed_sec": 1079.4036433696747}
212
+ {"step": 4200, "epoch": 0, "loss": 3.297978162765503, "loss_render": 3.297978162765503, "loss_align": 1.3461363315582275, "elapsed_sec": 1083.4645156860352}
213
+ {"step": 4220, "epoch": 0, "loss": 3.7026872634887695, "loss_render": 3.7026872634887695, "loss_align": 1.314376711845398, "elapsed_sec": 1087.3510656356812}
214
+ {"step": 4240, "epoch": 0, "loss": 3.5696210861206055, "loss_render": 3.5696210861206055, "loss_align": 1.3496253490447998, "elapsed_sec": 1091.2020993232727}
215
+ {"step": 4260, "epoch": 0, "loss": 3.2123312950134277, "loss_render": 3.2123312950134277, "loss_align": 1.3291599750518799, "elapsed_sec": 1094.8272709846497}
216
+ {"step": 4280, "epoch": 0, "loss": 2.923588275909424, "loss_render": 2.923588275909424, "loss_align": 1.3204983472824097, "elapsed_sec": 1098.1318237781525}
217
+ {"step": 4300, "epoch": 0, "loss": 3.5883936882019043, "loss_render": 3.5883936882019043, "loss_align": 1.3213682174682617, "elapsed_sec": 1101.7358391284943}
218
+ {"step": 4320, "epoch": 0, "loss": 3.383995532989502, "loss_render": 3.383995532989502, "loss_align": 1.3129656314849854, "elapsed_sec": 1105.9484043121338}
219
+ {"step": 4340, "epoch": 0, "loss": 3.265796661376953, "loss_render": 3.265796661376953, "loss_align": 1.299530029296875, "elapsed_sec": 1109.8734021186829}
220
+ {"step": 4360, "epoch": 0, "loss": 3.337292194366455, "loss_render": 3.337292194366455, "loss_align": 1.3238856792449951, "elapsed_sec": 1113.808788061142}
221
+ {"step": 4380, "epoch": 0, "loss": 3.2125864028930664, "loss_render": 3.2125864028930664, "loss_align": 1.350915789604187, "elapsed_sec": 1117.5486478805542}
222
+ {"step": 4400, "epoch": 0, "loss": 3.444897174835205, "loss_render": 3.444897174835205, "loss_align": 1.3515822887420654, "elapsed_sec": 1120.8326709270477}
223
+ {"step": 4420, "epoch": 0, "loss": 2.8733880519866943, "loss_render": 2.8733880519866943, "loss_align": 1.339876413345337, "elapsed_sec": 1124.2391066551208}
224
+ {"step": 4440, "epoch": 0, "loss": 3.4946746826171875, "loss_render": 3.4946746826171875, "loss_align": 1.325666904449463, "elapsed_sec": 1128.0023458003998}
225
+ {"step": 4460, "epoch": 0, "loss": 3.7551701068878174, "loss_render": 3.7551701068878174, "loss_align": 1.3138947486877441, "elapsed_sec": 1131.9348709583282}
226
+ {"step": 4480, "epoch": 0, "loss": 3.470797300338745, "loss_render": 3.470797300338745, "loss_align": 1.3383394479751587, "elapsed_sec": 1135.8033244609833}
bottleneck_only0/probe_leakage.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/bottleneck_only0/ckpt.pt",
3
+ "max_train_groups": 8000,
4
+ "max_valid_groups": 946,
5
+ "probe_hidden": 0,
6
+ "epochs": 20,
7
+ "lr": 0.01,
8
+ "baseline_majority_lang_acc": 0.5,
9
+ "baseline_majority_len_acc": 0.9271249771118164,
10
+ "lang_probe": {
11
+ "train_acc": 0.9974375,
12
+ "valid_acc": 0.9963002114164905,
13
+ "best_valid_acc": 0.9963002114164905
14
+ },
15
+ "len_probe": {
16
+ "train_acc": 0.92875,
17
+ "valid_acc": 0.9011627906976745,
18
+ "best_valid_acc": 0.9011627906976745
19
+ }
20
+ }
exp0/.ipynb_checkpoints/diag_full-checkpoint.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/exp0/ckpt.pt",
3
+ "nll_en": 3.3116177857300193,
4
+ "nll_zh": 3.3195825212853656,
5
+ "swap_delta_en": 0.8987819911561608,
6
+ "swap_delta_zh": 0.8837760875895462,
7
+ "ablate_zero_delta_en": 8.373346933602782,
8
+ "ablate_mean_delta_en": 0.36316030927742765,
9
+ "ablate_noise_delta_en": 19.7901852750879,
10
+ "ablate_zero_delta_zh": 8.36538216427567,
11
+ "ablate_mean_delta_zh": 0.3555691831963763,
12
+ "ablate_noise_delta_zh": 19.777301167332848,
13
+ "inv_top1_full": 0.7610993385314941,
14
+ "inv_top5_full": 0.9460887908935547,
15
+ "diag_sim_mean": 0.9987158179283142,
16
+ "offdiag_sim_mean": 0.9765520691871643,
17
+ "sim_margin": 0.022163748741149902,
18
+ "n_valid": 946
19
+ }
exp0/.ipynb_checkpoints/probe_leakage-checkpoint.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/exp0/ckpt.pt",
3
+ "max_train_groups": 8000,
4
+ "max_valid_groups": 946,
5
+ "probe_hidden": 0,
6
+ "epochs": 20,
7
+ "lr": 0.01,
8
+ "baseline_majority_lang_acc": 0.5,
9
+ "baseline_majority_len_acc": 0.9271249771118164,
10
+ "lang_probe": {
11
+ "train_acc": 0.997875,
12
+ "valid_acc": 0.9973572938689218,
13
+ "best_valid_acc": 0.9973572938689218
14
+ },
15
+ "len_probe": {
16
+ "train_acc": 0.929,
17
+ "valid_acc": 0.9011627906976745,
18
+ "best_valid_acc": 0.9011627906976745
19
+ }
20
+ }
exp0/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d655ef09e128e42bbac82505628171708628d3d128a8858974f9e61d6b57fe
3
+ size 3640274577
exp0/config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_jsonl": "data/groups_train.jsonl",
3
+ "valid_jsonl": "data/groups_valid.jsonl",
4
+ "run_dir": "runs/exp0",
5
+ "backbone": "google/mt5-small",
6
+ "num_latents": 16,
7
+ "latent_dropout": 0.1,
8
+ "latent_noise_std": 0.01,
9
+ "batch_size": 4,
10
+ "grad_accum": 8,
11
+ "lr": 0.0001,
12
+ "epochs": 1,
13
+ "max_doc_len": 256,
14
+ "max_sum_len": 64,
15
+ "lambda_align": 0.1,
16
+ "tau": 0.07,
17
+ "eval_every": 500,
18
+ "max_train_examples": 0,
19
+ "max_valid_examples": 0
20
+ }
exp0/diag.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 200, "nll": 7.034399070739746, "nll_en": 7.013759384155273, "nll_zh": 7.055038805007935, "inv_top1": 0.615}
2
+ {"step": 400, "nll": 6.078395929336548, "nll_en": 6.084093170166016, "nll_zh": 6.072698726654052, "inv_top1": 0.91}
3
+ {"step": 500, "nll": 6.085161386534225, "nll_en": 6.090854638726716, "nll_zh": 6.07946814543097, "inv_top1": 0.959830866807611}
4
+ {"step": 1000, "nll": 4.184940963164444, "nll_en": 4.187193909860817, "nll_zh": 4.182688052760118, "inv_top1": 0.9799154334038055}
5
+ {"step": 1500, "nll": 3.792753085525545, "nll_en": 3.7924717425291936, "nll_zh": 3.7930344446516946, "inv_top1": 0.985200845665962}
6
+ {"step": 2000, "nll": 3.639131381698189, "nll_en": 3.6367165286243592, "nll_zh": 3.6415462383004122, "inv_top1": 0.9862579281183932}
7
+ {"step": 2500, "nll": 3.5529123340550237, "nll_en": 3.551709388829689, "nll_zh": 3.5541152772641333, "inv_top1": 0.9883720930232558}
8
+ {"step": 3000, "nll": 3.4537662522233314, "nll_en": 3.4503724010499788, "nll_zh": 3.457160114989976, "inv_top1": 0.992600422832981}
9
+ {"step": 3500, "nll": 3.4046940077686916, "nll_en": 3.4010005748297143, "nll_zh": 3.408387434154938, "inv_top1": 0.9936575052854123}
10
+ {"step": 4000, "nll": 3.344993410856477, "nll_en": 3.3410987889287838, "nll_zh": 3.348888062523485, "inv_top1": 0.9947145877378436}
exp0/diag_full.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/exp0/ckpt.pt",
3
+ "nll_en": 3.3116177857300193,
4
+ "nll_zh": 3.3195825212853656,
5
+ "swap_delta_en": 0.8987819911561608,
6
+ "swap_delta_zh": 0.8837760875895462,
7
+ "ablate_zero_delta_en": 8.373346933602782,
8
+ "ablate_mean_delta_en": 0.36316030927742765,
9
+ "ablate_noise_delta_en": 19.7901852750879,
10
+ "ablate_zero_delta_zh": 8.36538216427567,
11
+ "ablate_mean_delta_zh": 0.3555691831963763,
12
+ "ablate_noise_delta_zh": 19.777301167332848,
13
+ "inv_top1_full": 0.7610993385314941,
14
+ "inv_top5_full": 0.9460887908935547,
15
+ "diag_sim_mean": 0.9987158179283142,
16
+ "offdiag_sim_mean": 0.9765520691871643,
17
+ "sim_margin": 0.022163748741149902,
18
+ "n_valid": 946
19
+ }
exp0/logs.jsonl ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 20, "epoch": 0, "loss": 21.224557876586914, "loss_render": 21.067235946655273, "loss_align": 1.5732271671295166, "elapsed_sec": 4.4022955894470215}
2
+ {"step": 40, "epoch": 0, "loss": 15.360982894897461, "loss_render": 15.244510650634766, "loss_align": 1.1647266149520874, "elapsed_sec": 8.462875366210938}
3
+ {"step": 60, "epoch": 0, "loss": 13.5419282913208, "loss_render": 13.409125328063965, "loss_align": 1.3280320167541504, "elapsed_sec": 12.868719816207886}
4
+ {"step": 80, "epoch": 0, "loss": 10.317197799682617, "loss_render": 10.177434921264648, "loss_align": 1.3976271152496338, "elapsed_sec": 16.943655967712402}
5
+ {"step": 100, "epoch": 0, "loss": 9.295454025268555, "loss_render": 9.164984703063965, "loss_align": 1.3046966791152954, "elapsed_sec": 20.874454021453857}
6
+ {"step": 120, "epoch": 0, "loss": 8.602041244506836, "loss_render": 8.472453117370605, "loss_align": 1.295877456665039, "elapsed_sec": 25.07156491279602}
7
+ {"step": 140, "epoch": 0, "loss": 8.854230880737305, "loss_render": 8.725814819335938, "loss_align": 1.2841646671295166, "elapsed_sec": 28.885475397109985}
8
+ {"step": 160, "epoch": 0, "loss": 9.430089950561523, "loss_render": 9.273889541625977, "loss_align": 1.5620025396347046, "elapsed_sec": 32.54270148277283}
9
+ {"step": 180, "epoch": 0, "loss": 8.34247875213623, "loss_render": 8.20531940460205, "loss_align": 1.371597409248352, "elapsed_sec": 36.89546012878418}
10
+ {"step": 200, "epoch": 0, "loss": 7.910696029663086, "loss_render": 7.775359153747559, "loss_align": 1.3533668518066406, "elapsed_sec": 41.09798073768616}
11
+ {"step": 220, "epoch": 0, "loss": 7.066567420959473, "loss_render": 6.931190013885498, "loss_align": 1.3537718057632446, "elapsed_sec": 56.58197784423828}
12
+ {"step": 240, "epoch": 0, "loss": 6.670694351196289, "loss_render": 6.533330917358398, "loss_align": 1.3736337423324585, "elapsed_sec": 60.625773906707764}
13
+ {"step": 260, "epoch": 0, "loss": 6.55427360534668, "loss_render": 6.420336723327637, "loss_align": 1.3393704891204834, "elapsed_sec": 64.72721481323242}
14
+ {"step": 280, "epoch": 0, "loss": 7.091551780700684, "loss_render": 6.956676959991455, "loss_align": 1.3487468957901, "elapsed_sec": 69.01629519462585}
15
+ {"step": 300, "epoch": 0, "loss": 6.509806156158447, "loss_render": 6.375725746154785, "loss_align": 1.3408057689666748, "elapsed_sec": 73.12118458747864}
16
+ {"step": 320, "epoch": 0, "loss": 6.467694282531738, "loss_render": 6.335711479187012, "loss_align": 1.319826364517212, "elapsed_sec": 77.17262434959412}
17
+ {"step": 340, "epoch": 0, "loss": 6.3551836013793945, "loss_render": 6.2233710289001465, "loss_align": 1.3181260824203491, "elapsed_sec": 81.21651721000671}
18
+ {"step": 360, "epoch": 0, "loss": 6.140443325042725, "loss_render": 6.008922100067139, "loss_align": 1.3152109384536743, "elapsed_sec": 85.43648934364319}
19
+ {"step": 380, "epoch": 0, "loss": 6.360215663909912, "loss_render": 6.227061748504639, "loss_align": 1.3315403461456299, "elapsed_sec": 89.73866510391235}
20
+ {"step": 400, "epoch": 0, "loss": 6.213184356689453, "loss_render": 6.077251434326172, "loss_align": 1.3593274354934692, "elapsed_sec": 93.9893217086792}
21
+ {"step": 420, "epoch": 0, "loss": 6.291845798492432, "loss_render": 6.165009498596191, "loss_align": 1.268361210823059, "elapsed_sec": 109.4705159664154}
22
+ {"step": 440, "epoch": 0, "loss": 5.622682094573975, "loss_render": 5.498342514038086, "loss_align": 1.243397831916809, "elapsed_sec": 113.30699443817139}
23
+ {"step": 460, "epoch": 0, "loss": 5.849954128265381, "loss_render": 5.715363025665283, "loss_align": 1.345911979675293, "elapsed_sec": 116.53437638282776}
24
+ {"step": 480, "epoch": 0, "loss": 5.509498119354248, "loss_render": 5.386768341064453, "loss_align": 1.2272990942001343, "elapsed_sec": 119.99079060554504}
25
+ {"step": 500, "epoch": 0, "loss": 6.094233989715576, "loss_render": 5.96877908706665, "loss_align": 1.2545472383499146, "elapsed_sec": 123.70130062103271}
26
+ {"step": 20, "epoch": 0, "loss": 19.070945739746094, "loss_render": 18.84700584411621, "loss_align": 2.2393996715545654, "elapsed_sec": 4.2826337814331055}
27
+ {"step": 40, "epoch": 0, "loss": 15.92590618133545, "loss_render": 15.771186828613281, "loss_align": 1.5471972227096558, "elapsed_sec": 8.027257919311523}
28
+ {"step": 60, "epoch": 0, "loss": 11.48330020904541, "loss_render": 11.381185531616211, "loss_align": 1.0211436748504639, "elapsed_sec": 11.741723775863647}
29
+ {"step": 80, "epoch": 0, "loss": 10.712089538574219, "loss_render": 10.535165786743164, "loss_align": 1.769235372543335, "elapsed_sec": 16.20432758331299}
30
+ {"step": 100, "epoch": 0, "loss": 8.726888656616211, "loss_render": 8.594282150268555, "loss_align": 1.3260676860809326, "elapsed_sec": 20.485617876052856}
31
+ {"step": 120, "epoch": 0, "loss": 8.815863609313965, "loss_render": 8.68048095703125, "loss_align": 1.3538284301757812, "elapsed_sec": 24.639766693115234}
32
+ {"step": 140, "epoch": 0, "loss": 8.290373802185059, "loss_render": 8.15549087524414, "loss_align": 1.3488279581069946, "elapsed_sec": 28.28698205947876}
33
+ {"step": 160, "epoch": 0, "loss": 8.012615203857422, "loss_render": 7.877848148345947, "loss_align": 1.347669243812561, "elapsed_sec": 32.17251896858215}
34
+ {"step": 180, "epoch": 0, "loss": 7.864224910736084, "loss_render": 7.726920127868652, "loss_align": 1.373045802116394, "elapsed_sec": 35.70785403251648}
35
+ {"step": 200, "epoch": 0, "loss": 7.930390357971191, "loss_render": 7.791510581970215, "loss_align": 1.3887962102890015, "elapsed_sec": 39.338255643844604}
36
+ {"step": 220, "epoch": 0, "loss": 7.689385414123535, "loss_render": 7.554874420166016, "loss_align": 1.3451077938079834, "elapsed_sec": 42.9450044631958}
37
+ {"step": 240, "epoch": 0, "loss": 7.888538360595703, "loss_render": 7.7497663497924805, "loss_align": 1.3877183198928833, "elapsed_sec": 46.93799185752869}
38
+ {"step": 260, "epoch": 0, "loss": 7.579165935516357, "loss_render": 7.441595554351807, "loss_align": 1.3757023811340332, "elapsed_sec": 51.082249879837036}
39
+ {"step": 280, "epoch": 0, "loss": 7.6984171867370605, "loss_render": 7.56091833114624, "loss_align": 1.3749892711639404, "elapsed_sec": 54.938756704330444}
40
+ {"step": 300, "epoch": 0, "loss": 7.647878646850586, "loss_render": 7.509629249572754, "loss_align": 1.3824915885925293, "elapsed_sec": 58.59120297431946}
41
+ {"step": 320, "epoch": 0, "loss": 7.326882839202881, "loss_render": 7.189317226409912, "loss_align": 1.3756581544876099, "elapsed_sec": 62.31136178970337}
42
+ {"step": 340, "epoch": 0, "loss": 7.067880153656006, "loss_render": 6.930825710296631, "loss_align": 1.3705456256866455, "elapsed_sec": 66.15338659286499}
43
+ {"step": 360, "epoch": 0, "loss": 7.112457275390625, "loss_render": 6.976511001586914, "loss_align": 1.359464168548584, "elapsed_sec": 69.56165432929993}
44
+ {"step": 380, "epoch": 0, "loss": 7.260199546813965, "loss_render": 7.12403678894043, "loss_align": 1.3616299629211426, "elapsed_sec": 73.1796646118164}
45
+ {"step": 400, "epoch": 0, "loss": 7.619089126586914, "loss_render": 7.483177185058594, "loss_align": 1.3591183423995972, "elapsed_sec": 76.85512065887451}
46
+ {"step": 420, "epoch": 0, "loss": 6.648477554321289, "loss_render": 6.525115489959717, "loss_align": 1.2336187362670898, "elapsed_sec": 80.48637080192566}
47
+ {"step": 440, "epoch": 0, "loss": 6.975794315338135, "loss_render": 6.841283798217773, "loss_align": 1.3451063632965088, "elapsed_sec": 83.98112845420837}
48
+ {"step": 460, "epoch": 0, "loss": 7.038669109344482, "loss_render": 6.913149833679199, "loss_align": 1.255194902420044, "elapsed_sec": 87.42641162872314}
49
+ {"step": 480, "epoch": 0, "loss": 6.976850986480713, "loss_render": 6.843624591827393, "loss_align": 1.3322619199752808, "elapsed_sec": 90.95184922218323}
50
+ {"step": 500, "epoch": 0, "loss": 7.029325008392334, "loss_render": 6.905562400817871, "loss_align": 1.2376277446746826, "elapsed_sec": 94.70070552825928}
51
+ {"step": 520, "epoch": 0, "loss": 5.812631607055664, "loss_render": 5.698040962219238, "loss_align": 1.1459063291549683, "elapsed_sec": 132.48490953445435}
52
+ {"step": 540, "epoch": 0, "loss": 5.79000186920166, "loss_render": 5.671420097351074, "loss_align": 1.1858175992965698, "elapsed_sec": 136.19478297233582}
53
+ {"step": 560, "epoch": 0, "loss": 5.882811069488525, "loss_render": 5.772146224975586, "loss_align": 1.1066474914550781, "elapsed_sec": 139.77869367599487}
54
+ {"step": 580, "epoch": 0, "loss": 5.619187831878662, "loss_render": 5.494204044342041, "loss_align": 1.2498395442962646, "elapsed_sec": 143.65679216384888}
55
+ {"step": 600, "epoch": 0, "loss": 5.458576202392578, "loss_render": 5.32813835144043, "loss_align": 1.304380178451538, "elapsed_sec": 147.08143186569214}
56
+ {"step": 620, "epoch": 0, "loss": 5.3892502784729, "loss_render": 5.2935309410095215, "loss_align": 0.9571921825408936, "elapsed_sec": 150.50924468040466}
57
+ {"step": 640, "epoch": 0, "loss": 5.240175724029541, "loss_render": 5.136693477630615, "loss_align": 1.0348236560821533, "elapsed_sec": 154.26896572113037}
58
+ {"step": 660, "epoch": 0, "loss": 5.748118877410889, "loss_render": 5.625927925109863, "loss_align": 1.2219114303588867, "elapsed_sec": 157.9432497024536}
59
+ {"step": 680, "epoch": 0, "loss": 5.405661106109619, "loss_render": 5.277651786804199, "loss_align": 1.2800949811935425, "elapsed_sec": 161.3929078578949}
60
+ {"step": 700, "epoch": 0, "loss": 4.730281352996826, "loss_render": 4.611264228820801, "loss_align": 1.1901702880859375, "elapsed_sec": 164.88981294631958}
61
+ {"step": 720, "epoch": 0, "loss": 4.796754360198975, "loss_render": 4.670107841491699, "loss_align": 1.2664663791656494, "elapsed_sec": 168.24689078330994}
62
+ {"step": 740, "epoch": 0, "loss": 4.392581462860107, "loss_render": 4.272594451904297, "loss_align": 1.1998717784881592, "elapsed_sec": 171.93785309791565}
63
+ {"step": 760, "epoch": 0, "loss": 4.702377796173096, "loss_render": 4.579741477966309, "loss_align": 1.2263638973236084, "elapsed_sec": 175.36873865127563}
64
+ {"step": 780, "epoch": 0, "loss": 4.692126750946045, "loss_render": 4.577973365783691, "loss_align": 1.141533374786377, "elapsed_sec": 178.92433977127075}
65
+ {"step": 800, "epoch": 0, "loss": 4.6318817138671875, "loss_render": 4.513406276702881, "loss_align": 1.184752106666565, "elapsed_sec": 182.2117805480957}
66
+ {"step": 820, "epoch": 0, "loss": 4.790045261383057, "loss_render": 4.662418365478516, "loss_align": 1.2762672901153564, "elapsed_sec": 186.0461504459381}
67
+ {"step": 840, "epoch": 0, "loss": 4.7037248611450195, "loss_render": 4.587821006774902, "loss_align": 1.1590384244918823, "elapsed_sec": 189.7503900527954}
68
+ {"step": 860, "epoch": 0, "loss": 4.403511047363281, "loss_render": 4.283609867095947, "loss_align": 1.199009895324707, "elapsed_sec": 193.18198537826538}
69
+ {"step": 880, "epoch": 0, "loss": 4.747882843017578, "loss_render": 4.631610870361328, "loss_align": 1.1627213954925537, "elapsed_sec": 196.45677161216736}
70
+ {"step": 900, "epoch": 0, "loss": 4.82310676574707, "loss_render": 4.703283786773682, "loss_align": 1.198230266571045, "elapsed_sec": 200.18356204032898}
71
+ {"step": 920, "epoch": 0, "loss": 4.2685747146606445, "loss_render": 4.146474361419678, "loss_align": 1.2210032939910889, "elapsed_sec": 203.93334245681763}
72
+ {"step": 940, "epoch": 0, "loss": 4.899518013000488, "loss_render": 4.775814056396484, "loss_align": 1.2370407581329346, "elapsed_sec": 207.5121557712555}
73
+ {"step": 960, "epoch": 0, "loss": 4.229927062988281, "loss_render": 4.099707126617432, "loss_align": 1.3021996021270752, "elapsed_sec": 211.04458856582642}
74
+ {"step": 980, "epoch": 0, "loss": 4.432365894317627, "loss_render": 4.306435585021973, "loss_align": 1.2593035697937012, "elapsed_sec": 214.76176118850708}
75
+ {"step": 1000, "epoch": 0, "loss": 4.595223426818848, "loss_render": 4.468973159790039, "loss_align": 1.2625032663345337, "elapsed_sec": 218.41787695884705}
76
+ {"step": 1020, "epoch": 0, "loss": 4.196355819702148, "loss_render": 4.07044792175293, "loss_align": 1.2590787410736084, "elapsed_sec": 259.28020882606506}
77
+ {"step": 1040, "epoch": 0, "loss": 4.124962329864502, "loss_render": 4.011495113372803, "loss_align": 1.1346709728240967, "elapsed_sec": 263.1357138156891}
78
+ {"step": 1060, "epoch": 0, "loss": 4.123751640319824, "loss_render": 3.996181011199951, "loss_align": 1.275707721710205, "elapsed_sec": 266.9847369194031}
79
+ {"step": 1080, "epoch": 0, "loss": 4.308910846710205, "loss_render": 4.180483818054199, "loss_align": 1.2842681407928467, "elapsed_sec": 271.21414852142334}
80
+ {"step": 1100, "epoch": 0, "loss": 4.096280097961426, "loss_render": 3.9691014289855957, "loss_align": 1.271787405014038, "elapsed_sec": 275.6742253303528}
81
+ {"step": 1120, "epoch": 0, "loss": 4.247128963470459, "loss_render": 4.112787246704102, "loss_align": 1.3434189558029175, "elapsed_sec": 280.0617733001709}
82
+ {"step": 1140, "epoch": 0, "loss": 4.310777187347412, "loss_render": 4.181565284729004, "loss_align": 1.2921206951141357, "elapsed_sec": 284.32492876052856}
83
+ {"step": 1160, "epoch": 0, "loss": 3.992020606994629, "loss_render": 3.858767032623291, "loss_align": 1.3325363397598267, "elapsed_sec": 288.1046493053436}
84
+ {"step": 1180, "epoch": 0, "loss": 3.800358772277832, "loss_render": 3.680168867111206, "loss_align": 1.201899528503418, "elapsed_sec": 291.46919465065}
85
+ {"step": 1200, "epoch": 0, "loss": 4.353869438171387, "loss_render": 4.226016044616699, "loss_align": 1.2785351276397705, "elapsed_sec": 295.63701343536377}
86
+ {"step": 1220, "epoch": 0, "loss": 4.22303581237793, "loss_render": 4.099870204925537, "loss_align": 1.231654167175293, "elapsed_sec": 299.50660824775696}
87
+ {"step": 1240, "epoch": 0, "loss": 3.8257665634155273, "loss_render": 3.699065685272217, "loss_align": 1.2670096158981323, "elapsed_sec": 303.39264965057373}
88
+ {"step": 1260, "epoch": 0, "loss": 4.347028732299805, "loss_render": 4.214409828186035, "loss_align": 1.3261911869049072, "elapsed_sec": 306.9986560344696}
89
+ {"step": 1280, "epoch": 0, "loss": 3.778672933578491, "loss_render": 3.6606764793395996, "loss_align": 1.1799635887145996, "elapsed_sec": 310.51953387260437}
90
+ {"step": 1300, "epoch": 0, "loss": 4.30056095123291, "loss_render": 4.167745113372803, "loss_align": 1.3281562328338623, "elapsed_sec": 314.60113525390625}
91
+ {"step": 1320, "epoch": 0, "loss": 4.544971466064453, "loss_render": 4.418474197387695, "loss_align": 1.2649712562561035, "elapsed_sec": 318.17942214012146}
92
+ {"step": 1340, "epoch": 0, "loss": 3.80814266204834, "loss_render": 3.6893770694732666, "loss_align": 1.1876564025878906, "elapsed_sec": 321.5578272342682}
93
+ {"step": 1360, "epoch": 0, "loss": 4.041284561157227, "loss_render": 3.9171199798583984, "loss_align": 1.241645336151123, "elapsed_sec": 325.324275970459}
94
+ {"step": 1380, "epoch": 0, "loss": 3.741255521774292, "loss_render": 3.610351085662842, "loss_align": 1.3090434074401855, "elapsed_sec": 328.9263265132904}
95
+ {"step": 1400, "epoch": 0, "loss": 4.401944160461426, "loss_render": 4.273633003234863, "loss_align": 1.2831120491027832, "elapsed_sec": 332.38989448547363}
96
+ {"step": 1420, "epoch": 0, "loss": 3.9525275230407715, "loss_render": 3.831545829772949, "loss_align": 1.2098171710968018, "elapsed_sec": 335.85763144493103}
97
+ {"step": 1440, "epoch": 0, "loss": 4.221649169921875, "loss_render": 4.086458206176758, "loss_align": 1.3519110679626465, "elapsed_sec": 339.68673634529114}
98
+ {"step": 1460, "epoch": 0, "loss": 3.6483700275421143, "loss_render": 3.5153861045837402, "loss_align": 1.3298401832580566, "elapsed_sec": 343.71640825271606}
99
+ {"step": 1480, "epoch": 0, "loss": 4.3308587074279785, "loss_render": 4.198127746582031, "loss_align": 1.3273099660873413, "elapsed_sec": 347.3681538105011}
100
+ {"step": 1500, "epoch": 0, "loss": 3.427548408508301, "loss_render": 3.314873218536377, "loss_align": 1.1267509460449219, "elapsed_sec": 351.1673278808594}
101
+ {"step": 1520, "epoch": 0, "loss": 3.757953643798828, "loss_render": 3.6334781646728516, "loss_align": 1.2447543144226074, "elapsed_sec": 387.9661293029785}
102
+ {"step": 1540, "epoch": 0, "loss": 3.594203472137451, "loss_render": 3.472632646560669, "loss_align": 1.2157093286514282, "elapsed_sec": 392.0071156024933}
103
+ {"step": 1560, "epoch": 0, "loss": 3.841197967529297, "loss_render": 3.712519645690918, "loss_align": 1.286782145500183, "elapsed_sec": 395.974228143692}
104
+ {"step": 1580, "epoch": 0, "loss": 4.008372783660889, "loss_render": 3.885647773742676, "loss_align": 1.2272504568099976, "elapsed_sec": 400.1085698604584}
105
+ {"step": 1600, "epoch": 0, "loss": 4.276418209075928, "loss_render": 4.15651798248291, "loss_align": 1.1990017890930176, "elapsed_sec": 403.9680063724518}
106
+ {"step": 1620, "epoch": 0, "loss": 3.8610360622406006, "loss_render": 3.7377514839172363, "loss_align": 1.2328464984893799, "elapsed_sec": 407.7921669483185}
107
+ {"step": 1640, "epoch": 0, "loss": 3.7124037742614746, "loss_render": 3.5836427211761475, "loss_align": 1.2876107692718506, "elapsed_sec": 411.77089524269104}
108
+ {"step": 1660, "epoch": 0, "loss": 3.6762208938598633, "loss_render": 3.558105945587158, "loss_align": 1.181150197982788, "elapsed_sec": 415.7397029399872}
109
+ {"step": 1680, "epoch": 0, "loss": 3.9557228088378906, "loss_render": 3.842298984527588, "loss_align": 1.1342370510101318, "elapsed_sec": 419.1814663410187}
110
+ {"step": 1700, "epoch": 0, "loss": 3.8916444778442383, "loss_render": 3.7674033641815186, "loss_align": 1.24241042137146, "elapsed_sec": 423.0752823352814}
111
+ {"step": 1720, "epoch": 0, "loss": 3.7411422729492188, "loss_render": 3.6112558841705322, "loss_align": 1.2988640069961548, "elapsed_sec": 427.1700642108917}
112
+ {"step": 1740, "epoch": 0, "loss": 3.9185540676116943, "loss_render": 3.7883057594299316, "loss_align": 1.3024824857711792, "elapsed_sec": 431.30755734443665}
113
+ {"step": 1760, "epoch": 0, "loss": 3.819110155105591, "loss_render": 3.691530227661133, "loss_align": 1.2757983207702637, "elapsed_sec": 435.23905873298645}
114
+ {"step": 1780, "epoch": 0, "loss": 3.9335873126983643, "loss_render": 3.806086540222168, "loss_align": 1.275007724761963, "elapsed_sec": 439.2127728462219}
115
+ {"step": 1800, "epoch": 0, "loss": 4.086656093597412, "loss_render": 3.9647040367126465, "loss_align": 1.2195206880569458, "elapsed_sec": 443.03975772857666}
116
+ {"step": 1820, "epoch": 0, "loss": 3.71828031539917, "loss_render": 3.5939784049987793, "loss_align": 1.2430200576782227, "elapsed_sec": 446.8329973220825}
117
+ {"step": 1840, "epoch": 0, "loss": 4.106589317321777, "loss_render": 3.978208541870117, "loss_align": 1.2838071584701538, "elapsed_sec": 450.8165051937103}
118
+ {"step": 1860, "epoch": 0, "loss": 4.120097637176514, "loss_render": 3.992905616760254, "loss_align": 1.2719194889068604, "elapsed_sec": 455.44277334213257}
119
+ {"step": 1880, "epoch": 0, "loss": 3.4663734436035156, "loss_render": 3.344759464263916, "loss_align": 1.2161396741867065, "elapsed_sec": 459.69877219200134}
120
+ {"step": 1900, "epoch": 0, "loss": 4.117480754852295, "loss_render": 3.9923200607299805, "loss_align": 1.2516090869903564, "elapsed_sec": 464.1016643047333}
121
+ {"step": 1920, "epoch": 0, "loss": 4.033799648284912, "loss_render": 3.913688898086548, "loss_align": 1.2011082172393799, "elapsed_sec": 468.4943459033966}
122
+ {"step": 1940, "epoch": 0, "loss": 3.9187936782836914, "loss_render": 3.795642137527466, "loss_align": 1.2315165996551514, "elapsed_sec": 472.3701014518738}
123
+ {"step": 1960, "epoch": 0, "loss": 3.719744920730591, "loss_render": 3.593451738357544, "loss_align": 1.2629308700561523, "elapsed_sec": 476.19396686553955}
124
+ {"step": 1980, "epoch": 0, "loss": 3.732513189315796, "loss_render": 3.6078197956085205, "loss_align": 1.2469336986541748, "elapsed_sec": 479.7407228946686}
125
+ {"step": 2000, "epoch": 0, "loss": 3.6540167331695557, "loss_render": 3.5205092430114746, "loss_align": 1.3350756168365479, "elapsed_sec": 483.2703151702881}
126
+ {"step": 2020, "epoch": 0, "loss": 3.3100626468658447, "loss_render": 3.18483829498291, "loss_align": 1.2522435188293457, "elapsed_sec": 520.5582990646362}
127
+ {"step": 2040, "epoch": 0, "loss": 3.869527816772461, "loss_render": 3.7406649589538574, "loss_align": 1.2886292934417725, "elapsed_sec": 524.1767659187317}
128
+ {"step": 2060, "epoch": 0, "loss": 4.4456963539123535, "loss_render": 4.320549011230469, "loss_align": 1.2514748573303223, "elapsed_sec": 527.8528580665588}
129
+ {"step": 2080, "epoch": 0, "loss": 3.9319570064544678, "loss_render": 3.801260471343994, "loss_align": 1.3069654703140259, "elapsed_sec": 531.0157916545868}
130
+ {"step": 2100, "epoch": 0, "loss": 3.988189220428467, "loss_render": 3.8608415126800537, "loss_align": 1.2734770774841309, "elapsed_sec": 534.1147558689117}
131
+ {"step": 2120, "epoch": 0, "loss": 3.683408260345459, "loss_render": 3.5517852306365967, "loss_align": 1.316230297088623, "elapsed_sec": 537.8404731750488}
132
+ {"step": 2140, "epoch": 0, "loss": 3.3974180221557617, "loss_render": 3.2734038829803467, "loss_align": 1.2401411533355713, "elapsed_sec": 541.5495643615723}
133
+ {"step": 2160, "epoch": 0, "loss": 5.084482669830322, "loss_render": 4.959369659423828, "loss_align": 1.251128911972046, "elapsed_sec": 545.0292248725891}
134
+ {"step": 2180, "epoch": 0, "loss": 3.3941047191619873, "loss_render": 3.274590015411377, "loss_align": 1.1951477527618408, "elapsed_sec": 548.0864658355713}
135
+ {"step": 2200, "epoch": 0, "loss": 3.8321545124053955, "loss_render": 3.7130608558654785, "loss_align": 1.19093656539917, "elapsed_sec": 551.0944242477417}
136
+ {"step": 2220, "epoch": 0, "loss": 3.762334108352661, "loss_render": 3.631870746612549, "loss_align": 1.3046338558197021, "elapsed_sec": 555.0852580070496}
137
+ {"step": 2240, "epoch": 0, "loss": 3.7591655254364014, "loss_render": 3.6302924156188965, "loss_align": 1.2887306213378906, "elapsed_sec": 558.9427144527435}
138
+ {"step": 2260, "epoch": 0, "loss": 3.4532783031463623, "loss_render": 3.321164608001709, "loss_align": 1.3211379051208496, "elapsed_sec": 562.4919002056122}
139
+ {"step": 2280, "epoch": 0, "loss": 3.6359376907348633, "loss_render": 3.5086936950683594, "loss_align": 1.27243971824646, "elapsed_sec": 566.0845420360565}
140
+ {"step": 2300, "epoch": 0, "loss": 3.6633527278900146, "loss_render": 3.548004627227783, "loss_align": 1.1534814834594727, "elapsed_sec": 569.861979007721}
141
+ {"step": 2320, "epoch": 0, "loss": 3.6509766578674316, "loss_render": 3.531930685043335, "loss_align": 1.1904597282409668, "elapsed_sec": 573.5495617389679}
142
+ {"step": 2340, "epoch": 0, "loss": 3.7116434574127197, "loss_render": 3.580857038497925, "loss_align": 1.3078639507293701, "elapsed_sec": 577.3351736068726}
143
+ {"step": 2360, "epoch": 0, "loss": 3.5186362266540527, "loss_render": 3.3914761543273926, "loss_align": 1.2715997695922852, "elapsed_sec": 580.8537561893463}
144
+ {"step": 2380, "epoch": 0, "loss": 3.684113025665283, "loss_render": 3.555558443069458, "loss_align": 1.2855451107025146, "elapsed_sec": 584.4020211696625}
145
+ {"step": 2400, "epoch": 0, "loss": 3.958482027053833, "loss_render": 3.8302037715911865, "loss_align": 1.282783031463623, "elapsed_sec": 587.6288588047028}
146
+ {"step": 2420, "epoch": 0, "loss": 4.480639457702637, "loss_render": 4.354602813720703, "loss_align": 1.260368824005127, "elapsed_sec": 591.0528960227966}
147
+ {"step": 2440, "epoch": 0, "loss": 3.3872504234313965, "loss_render": 3.26253080368042, "loss_align": 1.2471964359283447, "elapsed_sec": 594.4966959953308}
148
+ {"step": 2460, "epoch": 0, "loss": 3.7643017768859863, "loss_render": 3.642849922180176, "loss_align": 1.2145190238952637, "elapsed_sec": 598.2918803691864}
149
+ {"step": 2480, "epoch": 0, "loss": 3.2991783618927, "loss_render": 3.1676251888275146, "loss_align": 1.3155312538146973, "elapsed_sec": 602.0133755207062}
150
+ {"step": 2500, "epoch": 0, "loss": 3.516850233078003, "loss_render": 3.3872244358062744, "loss_align": 1.2962589263916016, "elapsed_sec": 605.9701299667358}
151
+ {"step": 2520, "epoch": 0, "loss": 3.625765323638916, "loss_render": 3.4946165084838867, "loss_align": 1.3114889860153198, "elapsed_sec": 643.1815435886383}
152
+ {"step": 2540, "epoch": 0, "loss": 3.64266037940979, "loss_render": 3.5088815689086914, "loss_align": 1.337787389755249, "elapsed_sec": 647.1685037612915}
153
+ {"step": 2560, "epoch": 0, "loss": 3.9286458492279053, "loss_render": 3.811392307281494, "loss_align": 1.1725361347198486, "elapsed_sec": 651.3283531665802}
154
+ {"step": 2580, "epoch": 0, "loss": 4.0110039710998535, "loss_render": 3.885138511657715, "loss_align": 1.2586548328399658, "elapsed_sec": 655.0110738277435}
155
+ {"step": 2600, "epoch": 0, "loss": 3.6000518798828125, "loss_render": 3.474778652191162, "loss_align": 1.2527333498001099, "elapsed_sec": 658.674483537674}
156
+ {"step": 2620, "epoch": 0, "loss": 3.733151912689209, "loss_render": 3.617654323577881, "loss_align": 1.154976487159729, "elapsed_sec": 662.1629638671875}
157
+ {"step": 2640, "epoch": 0, "loss": 2.9583663940429688, "loss_render": 2.8370306491851807, "loss_align": 1.2133582830429077, "elapsed_sec": 665.6173822879791}
158
+ {"step": 2660, "epoch": 0, "loss": 3.393970251083374, "loss_render": 3.270479202270508, "loss_align": 1.2349107265472412, "elapsed_sec": 669.0827767848969}
159
+ {"step": 2680, "epoch": 0, "loss": 3.7712604999542236, "loss_render": 3.6458511352539062, "loss_align": 1.2540932893753052, "elapsed_sec": 672.4449455738068}
160
+ {"step": 2700, "epoch": 0, "loss": 3.801778793334961, "loss_render": 3.676651954650879, "loss_align": 1.2512683868408203, "elapsed_sec": 675.861204624176}
161
+ {"step": 2720, "epoch": 0, "loss": 3.7881720066070557, "loss_render": 3.674795150756836, "loss_align": 1.1337674856185913, "elapsed_sec": 679.7726430892944}
162
+ {"step": 2740, "epoch": 0, "loss": 3.2474889755249023, "loss_render": 3.117105484008789, "loss_align": 1.3038359880447388, "elapsed_sec": 683.5487442016602}
163
+ {"step": 2760, "epoch": 0, "loss": 3.6887729167938232, "loss_render": 3.569216728210449, "loss_align": 1.1955628395080566, "elapsed_sec": 687.1459310054779}
164
+ {"step": 2780, "epoch": 0, "loss": 3.3323986530303955, "loss_render": 3.2133493423461914, "loss_align": 1.190493106842041, "elapsed_sec": 691.079567193985}
165
+ {"step": 2800, "epoch": 0, "loss": 3.202134847640991, "loss_render": 3.083744764328003, "loss_align": 1.1839005947113037, "elapsed_sec": 694.5943274497986}
166
+ {"step": 2820, "epoch": 0, "loss": 3.4329497814178467, "loss_render": 3.3152427673339844, "loss_align": 1.1770696640014648, "elapsed_sec": 698.0040194988251}
167
+ {"step": 2840, "epoch": 0, "loss": 3.7077035903930664, "loss_render": 3.585219383239746, "loss_align": 1.2248408794403076, "elapsed_sec": 701.6335427761078}
168
+ {"step": 2860, "epoch": 0, "loss": 3.477407932281494, "loss_render": 3.3491742610931396, "loss_align": 1.2823364734649658, "elapsed_sec": 705.644501209259}
169
+ {"step": 2880, "epoch": 0, "loss": 3.610718250274658, "loss_render": 3.4956696033477783, "loss_align": 1.1504876613616943, "elapsed_sec": 709.2923421859741}
170
+ {"step": 2900, "epoch": 0, "loss": 3.8198390007019043, "loss_render": 3.699303150177002, "loss_align": 1.205357313156128, "elapsed_sec": 713.0162396430969}
171
+ {"step": 2920, "epoch": 0, "loss": 3.550257444381714, "loss_render": 3.430603504180908, "loss_align": 1.196540355682373, "elapsed_sec": 716.751672744751}
172
+ {"step": 2940, "epoch": 0, "loss": 3.4662628173828125, "loss_render": 3.345240831375122, "loss_align": 1.2102196216583252, "elapsed_sec": 720.4123275279999}
173
+ {"step": 2960, "epoch": 0, "loss": 3.522108554840088, "loss_render": 3.39461088180542, "loss_align": 1.274977445602417, "elapsed_sec": 724.1647436618805}
174
+ {"step": 2980, "epoch": 0, "loss": 4.294383525848389, "loss_render": 4.172863483428955, "loss_align": 1.2152018547058105, "elapsed_sec": 727.9561200141907}
175
+ {"step": 3000, "epoch": 0, "loss": 3.3332793712615967, "loss_render": 3.212486505508423, "loss_align": 1.2079284191131592, "elapsed_sec": 731.7961676120758}
176
+ {"step": 3020, "epoch": 0, "loss": 3.763676881790161, "loss_render": 3.645622730255127, "loss_align": 1.1805411577224731, "elapsed_sec": 771.1251530647278}
177
+ {"step": 3040, "epoch": 0, "loss": 3.5830156803131104, "loss_render": 3.4571762084960938, "loss_align": 1.2583940029144287, "elapsed_sec": 774.8983845710754}
178
+ {"step": 3060, "epoch": 0, "loss": 3.26604962348938, "loss_render": 3.151447057723999, "loss_align": 1.1460249423980713, "elapsed_sec": 778.99134516716}
179
+ {"step": 3080, "epoch": 0, "loss": 3.757965564727783, "loss_render": 3.6350483894348145, "loss_align": 1.2291722297668457, "elapsed_sec": 783.0125930309296}
180
+ {"step": 3100, "epoch": 0, "loss": 3.6047329902648926, "loss_render": 3.483248233795166, "loss_align": 1.2148466110229492, "elapsed_sec": 787.1167662143707}
181
+ {"step": 3120, "epoch": 0, "loss": 3.492598533630371, "loss_render": 3.3661184310913086, "loss_align": 1.264799952507019, "elapsed_sec": 790.9248690605164}
182
+ {"step": 3140, "epoch": 0, "loss": 3.6819114685058594, "loss_render": 3.556112766265869, "loss_align": 1.2579882144927979, "elapsed_sec": 795.1178963184357}
183
+ {"step": 3160, "epoch": 0, "loss": 3.193608283996582, "loss_render": 3.0682616233825684, "loss_align": 1.253467082977295, "elapsed_sec": 799.0638675689697}
184
+ {"step": 3180, "epoch": 0, "loss": 3.8599345684051514, "loss_render": 3.7407126426696777, "loss_align": 1.1922190189361572, "elapsed_sec": 803.1858396530151}
185
+ {"step": 3200, "epoch": 0, "loss": 3.678039073944092, "loss_render": 3.554908275604248, "loss_align": 1.2313083410263062, "elapsed_sec": 806.7985143661499}
186
+ {"step": 3220, "epoch": 0, "loss": 3.579167604446411, "loss_render": 3.4598031044006348, "loss_align": 1.1936442852020264, "elapsed_sec": 810.6333293914795}
187
+ {"step": 3240, "epoch": 0, "loss": 3.489227294921875, "loss_render": 3.3582277297973633, "loss_align": 1.3099956512451172, "elapsed_sec": 814.5728871822357}
188
+ {"step": 3260, "epoch": 0, "loss": 3.9702558517456055, "loss_render": 3.8403310775756836, "loss_align": 1.2992465496063232, "elapsed_sec": 818.4608001708984}
189
+ {"step": 3280, "epoch": 0, "loss": 3.432121753692627, "loss_render": 3.3070950508117676, "loss_align": 1.2502663135528564, "elapsed_sec": 822.2873334884644}
190
+ {"step": 3300, "epoch": 0, "loss": 3.453719139099121, "loss_render": 3.3387324810028076, "loss_align": 1.1498661041259766, "elapsed_sec": 825.8356127738953}
191
+ {"step": 3320, "epoch": 0, "loss": 3.7275338172912598, "loss_render": 3.6115267276763916, "loss_align": 1.1600706577301025, "elapsed_sec": 829.5245726108551}
192
+ {"step": 3340, "epoch": 0, "loss": 3.3945090770721436, "loss_render": 3.277679920196533, "loss_align": 1.168290615081787, "elapsed_sec": 833.4057989120483}
193
+ {"step": 3360, "epoch": 0, "loss": 3.7562968730926514, "loss_render": 3.639965534210205, "loss_align": 1.163313627243042, "elapsed_sec": 837.206550359726}
194
+ {"step": 3380, "epoch": 0, "loss": 3.203108310699463, "loss_render": 3.0800843238830566, "loss_align": 1.230240821838379, "elapsed_sec": 841.0832140445709}
195
+ {"step": 3400, "epoch": 0, "loss": 3.339219093322754, "loss_render": 3.21042537689209, "loss_align": 1.2879371643066406, "elapsed_sec": 844.7776381969452}
196
+ {"step": 3420, "epoch": 0, "loss": 3.614652156829834, "loss_render": 3.498612880706787, "loss_align": 1.1603939533233643, "elapsed_sec": 848.6378741264343}
197
+ {"step": 3440, "epoch": 0, "loss": 3.998430013656616, "loss_render": 3.8696250915527344, "loss_align": 1.2880486249923706, "elapsed_sec": 852.9136519432068}
198
+ {"step": 3460, "epoch": 0, "loss": 3.6031110286712646, "loss_render": 3.4862232208251953, "loss_align": 1.168877124786377, "elapsed_sec": 856.8733851909637}
199
+ {"step": 3480, "epoch": 0, "loss": 3.48071551322937, "loss_render": 3.3639144897460938, "loss_align": 1.1680107116699219, "elapsed_sec": 860.7452211380005}
200
+ {"step": 3500, "epoch": 0, "loss": 3.4674816131591797, "loss_render": 3.3584442138671875, "loss_align": 1.09037446975708, "elapsed_sec": 864.5137982368469}
201
+ {"step": 3520, "epoch": 0, "loss": 3.92568039894104, "loss_render": 3.794065475463867, "loss_align": 1.3161487579345703, "elapsed_sec": 902.8110258579254}
202
+ {"step": 3540, "epoch": 0, "loss": 3.7790255546569824, "loss_render": 3.6630678176879883, "loss_align": 1.159576654434204, "elapsed_sec": 906.8911426067352}
203
+ {"step": 3560, "epoch": 0, "loss": 3.3313887119293213, "loss_render": 3.2007365226745605, "loss_align": 1.306522011756897, "elapsed_sec": 911.452953338623}
204
+ {"step": 3580, "epoch": 0, "loss": 3.5839266777038574, "loss_render": 3.459840774536133, "loss_align": 1.240859031677246, "elapsed_sec": 915.6381876468658}
205
+ {"step": 3600, "epoch": 0, "loss": 3.6318130493164062, "loss_render": 3.5238289833068848, "loss_align": 1.0798414945602417, "elapsed_sec": 919.6602568626404}
206
+ {"step": 3620, "epoch": 0, "loss": 3.693441152572632, "loss_render": 3.564162254333496, "loss_align": 1.2927886247634888, "elapsed_sec": 923.9816317558289}
207
+ {"step": 3640, "epoch": 0, "loss": 3.9405136108398438, "loss_render": 3.811929941177368, "loss_align": 1.2858368158340454, "elapsed_sec": 927.6108286380768}
208
+ {"step": 3660, "epoch": 0, "loss": 3.508798837661743, "loss_render": 3.389765977859497, "loss_align": 1.19032883644104, "elapsed_sec": 931.5460360050201}
209
+ {"step": 3680, "epoch": 0, "loss": 3.7283475399017334, "loss_render": 3.6098413467407227, "loss_align": 1.1850619316101074, "elapsed_sec": 935.5508029460907}
210
+ {"step": 3700, "epoch": 0, "loss": 3.731823205947876, "loss_render": 3.6040802001953125, "loss_align": 1.2774311304092407, "elapsed_sec": 939.4104900360107}
211
+ {"step": 3720, "epoch": 0, "loss": 3.8094801902770996, "loss_render": 3.694281578063965, "loss_align": 1.1519858837127686, "elapsed_sec": 943.3178899288177}
212
+ {"step": 3740, "epoch": 0, "loss": 3.5219295024871826, "loss_render": 3.412532329559326, "loss_align": 1.0939717292785645, "elapsed_sec": 946.9874022006989}
213
+ {"step": 3760, "epoch": 0, "loss": 3.8859002590179443, "loss_render": 3.772655963897705, "loss_align": 1.1324422359466553, "elapsed_sec": 950.8187129497528}
214
+ {"step": 3780, "epoch": 0, "loss": 3.3421733379364014, "loss_render": 3.216413736343384, "loss_align": 1.2575953006744385, "elapsed_sec": 954.9314012527466}
215
+ {"step": 3800, "epoch": 0, "loss": 3.6438233852386475, "loss_render": 3.526167392730713, "loss_align": 1.176559329032898, "elapsed_sec": 958.9616413116455}
216
+ {"step": 3820, "epoch": 0, "loss": 3.0369374752044678, "loss_render": 2.929780960083008, "loss_align": 1.0715659856796265, "elapsed_sec": 962.6312022209167}
217
+ {"step": 3840, "epoch": 0, "loss": 3.5926194190979004, "loss_render": 3.4737915992736816, "loss_align": 1.1882779598236084, "elapsed_sec": 966.2185957431793}
218
+ {"step": 3860, "epoch": 0, "loss": 3.2884552478790283, "loss_render": 3.16025447845459, "loss_align": 1.2820082902908325, "elapsed_sec": 969.7711503505707}
219
+ {"step": 3880, "epoch": 0, "loss": 3.2774734497070312, "loss_render": 3.1592886447906494, "loss_align": 1.1818480491638184, "elapsed_sec": 973.7929191589355}
220
+ {"step": 3900, "epoch": 0, "loss": 4.2674360275268555, "loss_render": 4.14381742477417, "loss_align": 1.2361855506896973, "elapsed_sec": 977.6318266391754}
221
+ {"step": 3920, "epoch": 0, "loss": 3.5760293006896973, "loss_render": 3.4464430809020996, "loss_align": 1.295861005783081, "elapsed_sec": 981.5292584896088}
222
+ {"step": 3940, "epoch": 0, "loss": 3.587008476257324, "loss_render": 3.46262788772583, "loss_align": 1.243804693222046, "elapsed_sec": 985.1478042602539}
223
+ {"step": 3960, "epoch": 0, "loss": 3.1360161304473877, "loss_render": 3.0152907371520996, "loss_align": 1.2072539329528809, "elapsed_sec": 989.015040397644}
224
+ {"step": 3980, "epoch": 0, "loss": 3.370725631713867, "loss_render": 3.2408528327941895, "loss_align": 1.2987267971038818, "elapsed_sec": 992.9622645378113}
225
+ {"step": 4000, "epoch": 0, "loss": 3.8980789184570312, "loss_render": 3.7780675888061523, "loss_align": 1.2001123428344727, "elapsed_sec": 997.1103825569153}
226
+ {"step": 4020, "epoch": 0, "loss": 3.326850175857544, "loss_render": 3.2110419273376465, "loss_align": 1.158082365989685, "elapsed_sec": 1035.1623759269714}
227
+ {"step": 4040, "epoch": 0, "loss": 3.4633235931396484, "loss_render": 3.3506035804748535, "loss_align": 1.127200722694397, "elapsed_sec": 1039.3294513225555}
228
+ {"step": 4060, "epoch": 0, "loss": 3.6582164764404297, "loss_render": 3.539252758026123, "loss_align": 1.1896368265151978, "elapsed_sec": 1043.7127094268799}
229
+ {"step": 4080, "epoch": 0, "loss": 3.2295382022857666, "loss_render": 3.1028456687927246, "loss_align": 1.26692533493042, "elapsed_sec": 1047.6403563022614}
230
+ {"step": 4100, "epoch": 0, "loss": 3.5388264656066895, "loss_render": 3.414259910583496, "loss_align": 1.2456663846969604, "elapsed_sec": 1051.9047062397003}
231
+ {"step": 4120, "epoch": 0, "loss": 3.102558135986328, "loss_render": 2.9749369621276855, "loss_align": 1.276210904121399, "elapsed_sec": 1055.9027354717255}
232
+ {"step": 4140, "epoch": 0, "loss": 3.613921642303467, "loss_render": 3.483802556991577, "loss_align": 1.3011901378631592, "elapsed_sec": 1059.7571215629578}
233
+ {"step": 4160, "epoch": 0, "loss": 3.7531354427337646, "loss_render": 3.636660575866699, "loss_align": 1.164747714996338, "elapsed_sec": 1063.5592312812805}
234
+ {"step": 4180, "epoch": 0, "loss": 3.5994529724121094, "loss_render": 3.4811575412750244, "loss_align": 1.1829543113708496, "elapsed_sec": 1067.3089151382446}
235
+ {"step": 4200, "epoch": 0, "loss": 3.7699365615844727, "loss_render": 3.652500629425049, "loss_align": 1.1743605136871338, "elapsed_sec": 1070.8856303691864}
236
+ {"step": 4220, "epoch": 0, "loss": 4.001293659210205, "loss_render": 3.873422622680664, "loss_align": 1.278710961341858, "elapsed_sec": 1074.7016582489014}
237
+ {"step": 4240, "epoch": 0, "loss": 3.844808578491211, "loss_render": 3.7275915145874023, "loss_align": 1.172170877456665, "elapsed_sec": 1078.6928749084473}
238
+ {"step": 4260, "epoch": 0, "loss": 3.179842472076416, "loss_render": 3.067607879638672, "loss_align": 1.122344732284546, "elapsed_sec": 1082.427574634552}
239
+ {"step": 4280, "epoch": 0, "loss": 3.4151194095611572, "loss_render": 3.3015847206115723, "loss_align": 1.1353471279144287, "elapsed_sec": 1086.3091685771942}
240
+ {"step": 4300, "epoch": 0, "loss": 3.7402732372283936, "loss_render": 3.6284329891204834, "loss_align": 1.1184014081954956, "elapsed_sec": 1089.946856021881}
241
+ {"step": 4320, "epoch": 0, "loss": 5.8849005699157715, "loss_render": 5.821453094482422, "loss_align": 0.6344740390777588, "elapsed_sec": 1093.2023885250092}
242
+ {"step": 4340, "epoch": 0, "loss": 3.585944890975952, "loss_render": 3.468257427215576, "loss_align": 1.1768745183944702, "elapsed_sec": 1097.1242127418518}
243
+ {"step": 4360, "epoch": 0, "loss": 3.1561732292175293, "loss_render": 3.036223888397217, "loss_align": 1.1994928121566772, "elapsed_sec": 1101.1091113090515}
244
+ {"step": 4380, "epoch": 0, "loss": 3.432767629623413, "loss_render": 3.3097732067108154, "loss_align": 1.2299437522888184, "elapsed_sec": 1104.7331609725952}
245
+ {"step": 4400, "epoch": 0, "loss": 3.468825340270996, "loss_render": 3.3565783500671387, "loss_align": 1.1224689483642578, "elapsed_sec": 1108.536845445633}
246
+ {"step": 4420, "epoch": 0, "loss": 3.5238516330718994, "loss_render": 3.3963050842285156, "loss_align": 1.2754662036895752, "elapsed_sec": 1112.4748859405518}
247
+ {"step": 4440, "epoch": 0, "loss": 3.6073813438415527, "loss_render": 3.490178108215332, "loss_align": 1.1720327138900757, "elapsed_sec": 1116.5838479995728}
248
+ {"step": 4460, "epoch": 0, "loss": 3.678609609603882, "loss_render": 3.556097984313965, "loss_align": 1.2251169681549072, "elapsed_sec": 1120.4309985637665}
249
+ {"step": 4480, "epoch": 0, "loss": 3.525902271270752, "loss_render": 3.411059856414795, "loss_align": 1.1484230756759644, "elapsed_sec": 1124.224265575409}
exp0/probe_leakage.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/exp0/ckpt.pt",
3
+ "max_train_groups": 8000,
4
+ "max_valid_groups": 946,
5
+ "probe_hidden": 0,
6
+ "epochs": 20,
7
+ "lr": 0.01,
8
+ "baseline_majority_lang_acc": 0.5,
9
+ "baseline_majority_len_acc": 0.9271249771118164,
10
+ "lang_probe": {
11
+ "train_acc": 0.997875,
12
+ "valid_acc": 0.9973572938689218,
13
+ "best_valid_acc": 0.9973572938689218
14
+ },
15
+ "len_probe": {
16
+ "train_acc": 0.929,
17
+ "valid_acc": 0.9011627906976745,
18
+ "best_valid_acc": 0.9011627906976745
19
+ }
20
+ }
seq2seq0/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2dfc0444e2753dd3151dcd77dd8bd0072c956ac824321b4aee32805fc9d800b
3
+ size 3602331157
seq2seq0/config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_jsonl": "data/groups_train.jsonl",
3
+ "valid_jsonl": "data/groups_valid.jsonl",
4
+ "run_dir": "runs/seq2seq0",
5
+ "backbone": "google/mt5-small",
6
+ "batch_size": 4,
7
+ "grad_accum": 8,
8
+ "lr": 0.0001,
9
+ "epochs": 1,
10
+ "max_doc_len": 256,
11
+ "max_sum_len": 64,
12
+ "eval_every": 500,
13
+ "max_train_examples": 0,
14
+ "max_valid_examples": 0
15
+ }
seq2seq0/diag.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"step": 500, "nll": 4.083519705758286, "nll_en": 3.7302262178183105, "nll_zh": 4.436813186137419}
2
+ {"step": 1000, "nll": 3.1696802163476923, "nll_en": 2.970382956571357, "nll_zh": 3.368977451425274}
3
+ {"step": 1500, "nll": 2.938471950385838, "nll_en": 2.751776049051426, "nll_zh": 3.1251678436553503}
4
+ {"step": 2000, "nll": 2.85019242536694, "nll_en": 2.668898694863027, "nll_zh": 3.031486177041213}
5
+ {"step": 2500, "nll": 2.8020481927198286, "nll_en": 2.624236168367414, "nll_zh": 2.979860214551962}
6
+ {"step": 3000, "nll": 2.769220844635752, "nll_en": 2.5925423972954458, "nll_zh": 2.9458992839111584}
7
+ {"step": 3500, "nll": 2.7439324649141406, "nll_en": 2.5695289658189577, "nll_zh": 2.9183359514079186}
8
+ {"step": 4000, "nll": 2.719873531676246, "nll_en": 2.5470015916965476, "nll_zh": 2.892745458046427}
seq2seq0/diag_full.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/seq2seq0/ckpt.pt",
3
+ "nll_en": 2.5393521402653376,
4
+ "nll_zh": 2.8792598635651343,
5
+ "swap_delta_en": 1.3924288331328185,
6
+ "swap_delta_zh": 1.045276309168616,
7
+ "ablate_zero_delta_en": 1.9174702117105367,
8
+ "ablate_mean_delta_en": 1.2883884206130691,
9
+ "ablate_noise_delta_en": 16.08244441679618,
10
+ "ablate_zero_delta_zh": 1.5775624894188525,
11
+ "ablate_mean_delta_zh": 0.9820897937073012,
12
+ "ablate_noise_delta_zh": 15.75471107652273,
13
+ "inv_top1_full": 0.7822409868240356,
14
+ "inv_top5_full": 0.9355179667472839,
15
+ "diag_sim_mean": 0.894648015499115,
16
+ "offdiag_sim_mean": 0.7207438349723816,
17
+ "sim_margin": 0.1739041805267334,
18
+ "n_valid": 946
19
+ }
seq2seq0/logs.jsonl ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 20, "epoch": 0, "loss": 19.958553314208984, "loss_en": 22.793672561645508, "loss_zh": 17.123435974121094, "elapsed_sec": 4.198706388473511}
2
+ {"step": 40, "epoch": 0, "loss": 26.172998428344727, "loss_en": 32.592918395996094, "loss_zh": 19.75307846069336, "elapsed_sec": 8.103334188461304}
3
+ {"step": 60, "epoch": 0, "loss": 15.988649368286133, "loss_en": 13.840924263000488, "loss_zh": 18.136375427246094, "elapsed_sec": 12.070148229598999}
4
+ {"step": 80, "epoch": 0, "loss": 15.599878311157227, "loss_en": 18.505041122436523, "loss_zh": 12.69471549987793, "elapsed_sec": 15.71264362335205}
5
+ {"step": 100, "epoch": 0, "loss": 13.34775161743164, "loss_en": 12.416041374206543, "loss_zh": 14.279461860656738, "elapsed_sec": 19.5399808883667}
6
+ {"step": 120, "epoch": 0, "loss": 12.484651565551758, "loss_en": 11.235444068908691, "loss_zh": 13.733858108520508, "elapsed_sec": 23.436214685440063}
7
+ {"step": 140, "epoch": 0, "loss": 11.203065872192383, "loss_en": 10.484219551086426, "loss_zh": 11.921911239624023, "elapsed_sec": 27.402586936950684}
8
+ {"step": 160, "epoch": 0, "loss": 10.720988273620605, "loss_en": 9.378963470458984, "loss_zh": 12.063013076782227, "elapsed_sec": 31.277592658996582}
9
+ {"step": 180, "epoch": 0, "loss": 10.150270462036133, "loss_en": 8.581932067871094, "loss_zh": 11.718609809875488, "elapsed_sec": 35.189231872558594}
10
+ {"step": 200, "epoch": 0, "loss": 9.412128448486328, "loss_en": 9.294561386108398, "loss_zh": 9.529695510864258, "elapsed_sec": 39.13556170463562}
11
+ {"step": 220, "epoch": 0, "loss": 9.052875518798828, "loss_en": 8.062944412231445, "loss_zh": 10.042807579040527, "elapsed_sec": 43.11750102043152}
12
+ {"step": 240, "epoch": 0, "loss": 8.293829917907715, "loss_en": 8.202460289001465, "loss_zh": 8.385199546813965, "elapsed_sec": 47.331522941589355}
13
+ {"step": 260, "epoch": 0, "loss": 7.132965564727783, "loss_en": 6.372021198272705, "loss_zh": 7.893909931182861, "elapsed_sec": 51.19337797164917}
14
+ {"step": 280, "epoch": 0, "loss": 8.807894706726074, "loss_en": 7.7100982666015625, "loss_zh": 9.905691146850586, "elapsed_sec": 54.8304705619812}
15
+ {"step": 300, "epoch": 0, "loss": 7.605258941650391, "loss_en": 7.196909427642822, "loss_zh": 8.0136079788208, "elapsed_sec": 58.53506326675415}
16
+ {"step": 320, "epoch": 0, "loss": 7.079022407531738, "loss_en": 6.878782272338867, "loss_zh": 7.279262065887451, "elapsed_sec": 61.75762462615967}
17
+ {"step": 340, "epoch": 0, "loss": 6.78070068359375, "loss_en": 6.136783599853516, "loss_zh": 7.424618244171143, "elapsed_sec": 65.08121347427368}
18
+ {"step": 360, "epoch": 0, "loss": 6.757793426513672, "loss_en": 6.254908561706543, "loss_zh": 7.260678768157959, "elapsed_sec": 68.8686990737915}
19
+ {"step": 380, "epoch": 0, "loss": 6.392443656921387, "loss_en": 6.305323123931885, "loss_zh": 6.4795637130737305, "elapsed_sec": 73.01840543746948}
20
+ {"step": 400, "epoch": 0, "loss": 5.946022033691406, "loss_en": 5.6474151611328125, "loss_zh": 6.24462890625, "elapsed_sec": 76.9156665802002}
21
+ {"step": 420, "epoch": 0, "loss": 6.163216590881348, "loss_en": 5.411000728607178, "loss_zh": 6.915432929992676, "elapsed_sec": 81.39415836334229}
22
+ {"step": 440, "epoch": 0, "loss": 6.169548511505127, "loss_en": 5.6542649269104, "loss_zh": 6.6848320960998535, "elapsed_sec": 85.73707461357117}
23
+ {"step": 460, "epoch": 0, "loss": 5.654720306396484, "loss_en": 4.883309364318848, "loss_zh": 6.426131248474121, "elapsed_sec": 89.98200869560242}
24
+ {"step": 480, "epoch": 0, "loss": 4.90985107421875, "loss_en": 4.54976749420166, "loss_zh": 5.26993465423584, "elapsed_sec": 94.0572862625122}
25
+ {"step": 500, "epoch": 0, "loss": 4.765139579772949, "loss_en": 4.720239162445068, "loss_zh": 4.810039520263672, "elapsed_sec": 98.55633473396301}
26
+ {"step": 520, "epoch": 0, "loss": 4.03904914855957, "loss_en": 3.6468799114227295, "loss_zh": 4.43121862411499, "elapsed_sec": 121.02724575996399}
27
+ {"step": 540, "epoch": 0, "loss": 3.616494655609131, "loss_en": 3.233731746673584, "loss_zh": 3.9992575645446777, "elapsed_sec": 124.87634563446045}
28
+ {"step": 560, "epoch": 0, "loss": 3.7991209030151367, "loss_en": 3.3848564624786377, "loss_zh": 4.213385581970215, "elapsed_sec": 129.0922553539276}
29
+ {"step": 580, "epoch": 0, "loss": 3.743335723876953, "loss_en": 3.4475491046905518, "loss_zh": 4.039122104644775, "elapsed_sec": 132.81642055511475}
30
+ {"step": 600, "epoch": 0, "loss": 3.2886626720428467, "loss_en": 3.0488266944885254, "loss_zh": 3.528498649597168, "elapsed_sec": 136.93406224250793}
31
+ {"step": 620, "epoch": 0, "loss": 4.001443862915039, "loss_en": 3.70090913772583, "loss_zh": 4.30197811126709, "elapsed_sec": 141.28837156295776}
32
+ {"step": 640, "epoch": 0, "loss": 3.367436408996582, "loss_en": 3.0827510356903076, "loss_zh": 3.6521215438842773, "elapsed_sec": 145.51539969444275}
33
+ {"step": 660, "epoch": 0, "loss": 3.83431339263916, "loss_en": 3.699383020401001, "loss_zh": 3.9692440032958984, "elapsed_sec": 149.31648921966553}
34
+ {"step": 680, "epoch": 0, "loss": 3.2584495544433594, "loss_en": 3.105294704437256, "loss_zh": 3.411604166030884, "elapsed_sec": 152.6927933692932}
35
+ {"step": 700, "epoch": 0, "loss": 3.2971158027648926, "loss_en": 3.010849952697754, "loss_zh": 3.5833816528320312, "elapsed_sec": 156.49797105789185}
36
+ {"step": 720, "epoch": 0, "loss": 3.531404972076416, "loss_en": 3.2872002124786377, "loss_zh": 3.7756099700927734, "elapsed_sec": 160.25540781021118}
37
+ {"step": 740, "epoch": 0, "loss": 3.333153009414673, "loss_en": 3.2654595375061035, "loss_zh": 3.400846481323242, "elapsed_sec": 164.26452136039734}
38
+ {"step": 760, "epoch": 0, "loss": 3.0705299377441406, "loss_en": 2.844557046890259, "loss_zh": 3.2965025901794434, "elapsed_sec": 168.0465316772461}
39
+ {"step": 780, "epoch": 0, "loss": 3.3941526412963867, "loss_en": 3.144383192062378, "loss_zh": 3.6439220905303955, "elapsed_sec": 171.5515432357788}
40
+ {"step": 800, "epoch": 0, "loss": 3.3890252113342285, "loss_en": 3.119081497192383, "loss_zh": 3.658968687057495, "elapsed_sec": 175.01897621154785}
41
+ {"step": 820, "epoch": 0, "loss": 3.375675678253174, "loss_en": 2.9780030250549316, "loss_zh": 3.773348331451416, "elapsed_sec": 178.57539796829224}
42
+ {"step": 840, "epoch": 0, "loss": 3.113314628601074, "loss_en": 2.923736333847046, "loss_zh": 3.3028926849365234, "elapsed_sec": 181.73106002807617}
43
+ {"step": 860, "epoch": 0, "loss": 3.4478917121887207, "loss_en": 3.1893107891082764, "loss_zh": 3.706472396850586, "elapsed_sec": 185.6530158519745}
44
+ {"step": 880, "epoch": 0, "loss": 3.1208255290985107, "loss_en": 2.921964645385742, "loss_zh": 3.3196864128112793, "elapsed_sec": 189.7071578502655}
45
+ {"step": 900, "epoch": 0, "loss": 3.149831533432007, "loss_en": 3.106837749481201, "loss_zh": 3.1928253173828125, "elapsed_sec": 193.66635990142822}
46
+ {"step": 920, "epoch": 0, "loss": 3.700915813446045, "loss_en": 3.496631383895874, "loss_zh": 3.905200242996216, "elapsed_sec": 197.51753616333008}
47
+ {"step": 940, "epoch": 0, "loss": 3.075161933898926, "loss_en": 2.87642502784729, "loss_zh": 3.2738986015319824, "elapsed_sec": 201.2185263633728}
48
+ {"step": 960, "epoch": 0, "loss": 2.902559280395508, "loss_en": 2.67034649848938, "loss_zh": 3.1347718238830566, "elapsed_sec": 205.0543327331543}
49
+ {"step": 980, "epoch": 0, "loss": 2.8350062370300293, "loss_en": 2.643165349960327, "loss_zh": 3.0268473625183105, "elapsed_sec": 209.12538385391235}
50
+ {"step": 1000, "epoch": 0, "loss": 3.1799252033233643, "loss_en": 3.017549991607666, "loss_zh": 3.3423004150390625, "elapsed_sec": 212.9950077533722}
51
+ {"step": 1020, "epoch": 0, "loss": 3.6379222869873047, "loss_en": 3.534482717514038, "loss_zh": 3.741361618041992, "elapsed_sec": 235.81631565093994}
52
+ {"step": 1040, "epoch": 0, "loss": 3.177799701690674, "loss_en": 2.9195380210876465, "loss_zh": 3.436061382293701, "elapsed_sec": 239.69328713417053}
53
+ {"step": 1060, "epoch": 0, "loss": 2.9638831615448, "loss_en": 2.8056447505950928, "loss_zh": 3.122121572494507, "elapsed_sec": 243.71403074264526}
54
+ {"step": 1080, "epoch": 0, "loss": 2.8412563800811768, "loss_en": 2.742892265319824, "loss_zh": 2.9396204948425293, "elapsed_sec": 247.5266396999359}
55
+ {"step": 1100, "epoch": 0, "loss": 2.9404349327087402, "loss_en": 2.834890604019165, "loss_zh": 3.0459790229797363, "elapsed_sec": 251.361811876297}
56
+ {"step": 1120, "epoch": 0, "loss": 2.9857234954833984, "loss_en": 2.930567979812622, "loss_zh": 3.0408787727355957, "elapsed_sec": 255.08666253089905}
57
+ {"step": 1140, "epoch": 0, "loss": 3.4752089977264404, "loss_en": 3.2585411071777344, "loss_zh": 3.6918768882751465, "elapsed_sec": 258.6802861690521}
58
+ {"step": 1160, "epoch": 0, "loss": 3.0683488845825195, "loss_en": 2.968440055847168, "loss_zh": 3.16825795173645, "elapsed_sec": 261.81062173843384}
59
+ {"step": 1180, "epoch": 0, "loss": 3.3352630138397217, "loss_en": 3.1413745880126953, "loss_zh": 3.529151439666748, "elapsed_sec": 264.90733885765076}
60
+ {"step": 1200, "epoch": 0, "loss": 3.385345697402954, "loss_en": 3.1838603019714355, "loss_zh": 3.5868310928344727, "elapsed_sec": 268.02124643325806}
61
+ {"step": 1220, "epoch": 0, "loss": 2.551145076751709, "loss_en": 2.3247532844543457, "loss_zh": 2.7775371074676514, "elapsed_sec": 270.98907136917114}
62
+ {"step": 1240, "epoch": 0, "loss": 3.2335715293884277, "loss_en": 3.1693408489227295, "loss_zh": 3.297802448272705, "elapsed_sec": 273.85680294036865}
63
+ {"step": 1260, "epoch": 0, "loss": 2.7534844875335693, "loss_en": 2.5668442249298096, "loss_zh": 2.940124750137329, "elapsed_sec": 277.27816581726074}
64
+ {"step": 1280, "epoch": 0, "loss": 3.088207721710205, "loss_en": 2.9462790489196777, "loss_zh": 3.2301361560821533, "elapsed_sec": 280.69331669807434}
65
+ {"step": 1300, "epoch": 0, "loss": 3.0984764099121094, "loss_en": 2.790520191192627, "loss_zh": 3.406432867050171, "elapsed_sec": 284.4216945171356}
66
+ {"step": 1320, "epoch": 0, "loss": 2.826920509338379, "loss_en": 2.621642827987671, "loss_zh": 3.032198190689087, "elapsed_sec": 287.67301750183105}
67
+ {"step": 1340, "epoch": 0, "loss": 2.5568861961364746, "loss_en": 2.374157428741455, "loss_zh": 2.7396152019500732, "elapsed_sec": 291.21101903915405}
68
+ {"step": 1360, "epoch": 0, "loss": 3.304105758666992, "loss_en": 3.1232481002807617, "loss_zh": 3.4849634170532227, "elapsed_sec": 294.51373648643494}
69
+ {"step": 1380, "epoch": 0, "loss": 3.431056022644043, "loss_en": 3.364529848098755, "loss_zh": 3.497582197189331, "elapsed_sec": 297.93639874458313}
70
+ {"step": 1400, "epoch": 0, "loss": 3.453933000564575, "loss_en": 3.4327991008758545, "loss_zh": 3.475066900253296, "elapsed_sec": 301.35156416893005}
71
+ {"step": 1420, "epoch": 0, "loss": 2.819824695587158, "loss_en": 2.6990394592285156, "loss_zh": 2.94061017036438, "elapsed_sec": 304.4319956302643}
72
+ {"step": 1440, "epoch": 0, "loss": 2.748256206512451, "loss_en": 2.669332981109619, "loss_zh": 2.827179431915283, "elapsed_sec": 307.8180546760559}
73
+ {"step": 1460, "epoch": 0, "loss": 2.890794515609741, "loss_en": 2.689507007598877, "loss_zh": 3.0920820236206055, "elapsed_sec": 311.65062284469604}
74
+ {"step": 1480, "epoch": 0, "loss": 3.3032078742980957, "loss_en": 3.09891676902771, "loss_zh": 3.5074992179870605, "elapsed_sec": 315.2243959903717}
75
+ {"step": 1500, "epoch": 0, "loss": 2.8289999961853027, "loss_en": 2.5417163372039795, "loss_zh": 3.116283893585205, "elapsed_sec": 318.89541387557983}
76
+ {"step": 1520, "epoch": 0, "loss": 3.4278740882873535, "loss_en": 3.338641881942749, "loss_zh": 3.517106533050537, "elapsed_sec": 340.8435151576996}
77
+ {"step": 1540, "epoch": 0, "loss": 2.7045698165893555, "loss_en": 2.467622756958008, "loss_zh": 2.941516637802124, "elapsed_sec": 344.88645458221436}
78
+ {"step": 1560, "epoch": 0, "loss": 2.9270870685577393, "loss_en": 2.723598003387451, "loss_zh": 3.1305761337280273, "elapsed_sec": 349.05961561203003}
79
+ {"step": 1580, "epoch": 0, "loss": 2.553027629852295, "loss_en": 2.2864990234375, "loss_zh": 2.819556474685669, "elapsed_sec": 352.85885643959045}
80
+ {"step": 1600, "epoch": 0, "loss": 2.752288341522217, "loss_en": 2.507803201675415, "loss_zh": 2.9967732429504395, "elapsed_sec": 356.25116991996765}
81
+ {"step": 1620, "epoch": 0, "loss": 2.70786190032959, "loss_en": 2.5169167518615723, "loss_zh": 2.8988070487976074, "elapsed_sec": 359.6559660434723}
82
+ {"step": 1640, "epoch": 0, "loss": 2.672903060913086, "loss_en": 2.1826224327087402, "loss_zh": 3.1631834506988525, "elapsed_sec": 363.48325395584106}
83
+ {"step": 1660, "epoch": 0, "loss": 2.8565173149108887, "loss_en": 2.6300599575042725, "loss_zh": 3.082974910736084, "elapsed_sec": 367.27752208709717}
84
+ {"step": 1680, "epoch": 0, "loss": 2.5396170616149902, "loss_en": 2.307021379470825, "loss_zh": 2.7722127437591553, "elapsed_sec": 370.9925911426544}
85
+ {"step": 1700, "epoch": 0, "loss": 2.7349886894226074, "loss_en": 2.6084930896759033, "loss_zh": 2.8614840507507324, "elapsed_sec": 373.84737968444824}
86
+ {"step": 1720, "epoch": 0, "loss": 2.368365526199341, "loss_en": 2.231144905090332, "loss_zh": 2.5055861473083496, "elapsed_sec": 376.57011818885803}
87
+ {"step": 1740, "epoch": 0, "loss": 2.5535035133361816, "loss_en": 2.4052233695983887, "loss_zh": 2.7017838954925537, "elapsed_sec": 379.354287147522}
88
+ {"step": 1760, "epoch": 0, "loss": 2.815798759460449, "loss_en": 2.620495557785034, "loss_zh": 3.011101722717285, "elapsed_sec": 382.08083987236023}
89
+ {"step": 1780, "epoch": 0, "loss": 2.764671802520752, "loss_en": 2.6547741889953613, "loss_zh": 2.8745694160461426, "elapsed_sec": 385.4583978652954}
90
+ {"step": 1800, "epoch": 0, "loss": 2.655404806137085, "loss_en": 2.4332571029663086, "loss_zh": 2.8775525093078613, "elapsed_sec": 388.7912435531616}
91
+ {"step": 1820, "epoch": 0, "loss": 2.7101292610168457, "loss_en": 2.5971550941467285, "loss_zh": 2.823103666305542, "elapsed_sec": 392.566499710083}
92
+ {"step": 1840, "epoch": 0, "loss": 3.05311918258667, "loss_en": 2.968299388885498, "loss_zh": 3.137938976287842, "elapsed_sec": 395.9359850883484}
93
+ {"step": 1860, "epoch": 0, "loss": 2.742724895477295, "loss_en": 2.621105670928955, "loss_zh": 2.864344358444214, "elapsed_sec": 399.23328733444214}
94
+ {"step": 1880, "epoch": 0, "loss": 3.037224531173706, "loss_en": 2.787285566329956, "loss_zh": 3.287163496017456, "elapsed_sec": 402.64269042015076}
95
+ {"step": 1900, "epoch": 0, "loss": 2.498868942260742, "loss_en": 2.124903678894043, "loss_zh": 2.8728342056274414, "elapsed_sec": 406.13378143310547}
96
+ {"step": 1920, "epoch": 0, "loss": 2.5489158630371094, "loss_en": 2.2647972106933594, "loss_zh": 2.8330345153808594, "elapsed_sec": 409.45444416999817}
97
+ {"step": 1940, "epoch": 0, "loss": 3.0023765563964844, "loss_en": 2.907174825668335, "loss_zh": 3.097578525543213, "elapsed_sec": 412.94355869293213}
98
+ {"step": 1960, "epoch": 0, "loss": 2.4419944286346436, "loss_en": 2.033252239227295, "loss_zh": 2.850736618041992, "elapsed_sec": 416.28390288352966}
99
+ {"step": 1980, "epoch": 0, "loss": 2.5496463775634766, "loss_en": 2.3367342948913574, "loss_zh": 2.7625582218170166, "elapsed_sec": 419.8820881843567}
100
+ {"step": 2000, "epoch": 0, "loss": 2.984041213989258, "loss_en": 2.618994951248169, "loss_zh": 3.3490874767303467, "elapsed_sec": 423.2843990325928}
101
+ {"step": 2020, "epoch": 0, "loss": 2.8971991539001465, "loss_en": 2.7913098335266113, "loss_zh": 3.0030884742736816, "elapsed_sec": 446.0056116580963}
102
+ {"step": 2040, "epoch": 0, "loss": 2.741555690765381, "loss_en": 2.455355167388916, "loss_zh": 3.027756452560425, "elapsed_sec": 449.53636264801025}
103
+ {"step": 2060, "epoch": 0, "loss": 2.8138084411621094, "loss_en": 2.6791114807128906, "loss_zh": 2.948505163192749, "elapsed_sec": 453.19211435317993}
104
+ {"step": 2080, "epoch": 0, "loss": 2.5126140117645264, "loss_en": 2.454984426498413, "loss_zh": 2.5702435970306396, "elapsed_sec": 457.22622561454773}
105
+ {"step": 2100, "epoch": 0, "loss": 2.614959239959717, "loss_en": 2.4207212924957275, "loss_zh": 2.809197425842285, "elapsed_sec": 461.1113624572754}
106
+ {"step": 2120, "epoch": 0, "loss": 3.526927947998047, "loss_en": 3.304453134536743, "loss_zh": 3.7494027614593506, "elapsed_sec": 465.05714178085327}
107
+ {"step": 2140, "epoch": 0, "loss": 2.8603644371032715, "loss_en": 2.6752848625183105, "loss_zh": 3.0454442501068115, "elapsed_sec": 469.27594566345215}
108
+ {"step": 2160, "epoch": 0, "loss": 2.763164520263672, "loss_en": 2.676156997680664, "loss_zh": 2.8501720428466797, "elapsed_sec": 472.9707407951355}
109
+ {"step": 2180, "epoch": 0, "loss": 2.799755096435547, "loss_en": 2.720726251602173, "loss_zh": 2.8787841796875, "elapsed_sec": 476.2732501029968}
110
+ {"step": 2200, "epoch": 0, "loss": 2.816256284713745, "loss_en": 2.770751953125, "loss_zh": 2.8617606163024902, "elapsed_sec": 479.77174973487854}
111
+ {"step": 2220, "epoch": 0, "loss": 2.769911766052246, "loss_en": 2.581306219100952, "loss_zh": 2.95851731300354, "elapsed_sec": 483.7051248550415}
112
+ {"step": 2240, "epoch": 0, "loss": 2.7940196990966797, "loss_en": 2.7260923385620117, "loss_zh": 2.8619472980499268, "elapsed_sec": 487.59430408477783}
113
+ {"step": 2260, "epoch": 0, "loss": 2.751661777496338, "loss_en": 2.6292455196380615, "loss_zh": 2.8740780353546143, "elapsed_sec": 491.5258870124817}
114
+ {"step": 2280, "epoch": 0, "loss": 2.923036575317383, "loss_en": 2.6797852516174316, "loss_zh": 3.166287899017334, "elapsed_sec": 495.0082275867462}
115
+ {"step": 2300, "epoch": 0, "loss": 2.9323275089263916, "loss_en": 2.8167855739593506, "loss_zh": 3.0478694438934326, "elapsed_sec": 498.79952597618103}
116
+ {"step": 2320, "epoch": 0, "loss": 3.061702251434326, "loss_en": 2.8404946327209473, "loss_zh": 3.282909870147705, "elapsed_sec": 502.51557540893555}
117
+ {"step": 2340, "epoch": 0, "loss": 2.8311243057250977, "loss_en": 2.6865432262420654, "loss_zh": 2.97570538520813, "elapsed_sec": 506.44561672210693}
118
+ {"step": 2360, "epoch": 0, "loss": 3.088996648788452, "loss_en": 2.933044910430908, "loss_zh": 3.244948387145996, "elapsed_sec": 510.1709876060486}
119
+ {"step": 2380, "epoch": 0, "loss": 2.7652997970581055, "loss_en": 2.530214786529541, "loss_zh": 3.000385046005249, "elapsed_sec": 513.8160424232483}
120
+ {"step": 2400, "epoch": 0, "loss": 2.607923746109009, "loss_en": 2.4835567474365234, "loss_zh": 2.732290744781494, "elapsed_sec": 517.6522035598755}
121
+ {"step": 2420, "epoch": 0, "loss": 3.020596981048584, "loss_en": 2.813929319381714, "loss_zh": 3.227264642715454, "elapsed_sec": 521.2472031116486}
122
+ {"step": 2440, "epoch": 0, "loss": 2.9155120849609375, "loss_en": 2.7227230072021484, "loss_zh": 3.1083009243011475, "elapsed_sec": 524.8803403377533}
123
+ {"step": 2460, "epoch": 0, "loss": 2.6297764778137207, "loss_en": 2.491191864013672, "loss_zh": 2.7683608531951904, "elapsed_sec": 528.3640823364258}
124
+ {"step": 2480, "epoch": 0, "loss": 2.8307271003723145, "loss_en": 2.733002185821533, "loss_zh": 2.9284520149230957, "elapsed_sec": 532.2507631778717}
125
+ {"step": 2500, "epoch": 0, "loss": 2.7968618869781494, "loss_en": 2.3598849773406982, "loss_zh": 3.2338387966156006, "elapsed_sec": 536.0524888038635}
126
+ {"step": 2520, "epoch": 0, "loss": 2.3986263275146484, "loss_en": 2.2707319259643555, "loss_zh": 2.5265204906463623, "elapsed_sec": 558.7142806053162}
127
+ {"step": 2540, "epoch": 0, "loss": 2.9391860961914062, "loss_en": 2.773148775100708, "loss_zh": 3.1052231788635254, "elapsed_sec": 562.0695896148682}
128
+ {"step": 2560, "epoch": 0, "loss": 3.458186388015747, "loss_en": 3.222871780395508, "loss_zh": 3.6935009956359863, "elapsed_sec": 565.4880850315094}
129
+ {"step": 2580, "epoch": 0, "loss": 3.2996857166290283, "loss_en": 3.1047608852386475, "loss_zh": 3.494610548019409, "elapsed_sec": 569.0583865642548}
130
+ {"step": 2600, "epoch": 0, "loss": 2.6272854804992676, "loss_en": 2.600250005722046, "loss_zh": 2.65432071685791, "elapsed_sec": 572.2312695980072}
131
+ {"step": 2620, "epoch": 0, "loss": 2.764815092086792, "loss_en": 2.472907066345215, "loss_zh": 3.056723117828369, "elapsed_sec": 575.8414421081543}
132
+ {"step": 2640, "epoch": 0, "loss": 2.512575149536133, "loss_en": 2.3414711952209473, "loss_zh": 2.6836793422698975, "elapsed_sec": 579.5740571022034}
133
+ {"step": 2660, "epoch": 0, "loss": 2.4943835735321045, "loss_en": 2.2229065895080566, "loss_zh": 2.7658605575561523, "elapsed_sec": 583.3068318367004}
134
+ {"step": 2680, "epoch": 0, "loss": 2.833723545074463, "loss_en": 2.6906490325927734, "loss_zh": 2.9767982959747314, "elapsed_sec": 587.0648281574249}
135
+ {"step": 2700, "epoch": 0, "loss": 3.145646095275879, "loss_en": 2.9706032276153564, "loss_zh": 3.3206887245178223, "elapsed_sec": 590.667902469635}
136
+ {"step": 2720, "epoch": 0, "loss": 2.6689939498901367, "loss_en": 2.422137498855591, "loss_zh": 2.9158501625061035, "elapsed_sec": 594.6421582698822}
137
+ {"step": 2740, "epoch": 0, "loss": 3.042304515838623, "loss_en": 2.8663413524627686, "loss_zh": 3.2182676792144775, "elapsed_sec": 598.231849193573}
138
+ {"step": 2760, "epoch": 0, "loss": 2.632537603378296, "loss_en": 2.5083906650543213, "loss_zh": 2.7566845417022705, "elapsed_sec": 602.2512514591217}
139
+ {"step": 2780, "epoch": 0, "loss": 2.808640956878662, "loss_en": 2.6505041122436523, "loss_zh": 2.966778039932251, "elapsed_sec": 606.4693043231964}
140
+ {"step": 2800, "epoch": 0, "loss": 2.6631369590759277, "loss_en": 2.4198720455169678, "loss_zh": 2.906402111053467, "elapsed_sec": 610.3039569854736}
141
+ {"step": 2820, "epoch": 0, "loss": 3.129357099533081, "loss_en": 2.949167013168335, "loss_zh": 3.309547185897827, "elapsed_sec": 614.4265367984772}
142
+ {"step": 2840, "epoch": 0, "loss": 2.880627155303955, "loss_en": 2.759845733642578, "loss_zh": 3.001408815383911, "elapsed_sec": 618.1155879497528}
143
+ {"step": 2860, "epoch": 0, "loss": 2.7344212532043457, "loss_en": 2.602353572845459, "loss_zh": 2.8664889335632324, "elapsed_sec": 621.5696713924408}
144
+ {"step": 2880, "epoch": 0, "loss": 2.8390002250671387, "loss_en": 2.8078744411468506, "loss_zh": 2.8701260089874268, "elapsed_sec": 625.3454191684723}
145
+ {"step": 2900, "epoch": 0, "loss": 2.6499979496002197, "loss_en": 2.5216286182403564, "loss_zh": 2.778367280960083, "elapsed_sec": 629.118412733078}
146
+ {"step": 2920, "epoch": 0, "loss": 2.3813576698303223, "loss_en": 2.1227400302886963, "loss_zh": 2.6399755477905273, "elapsed_sec": 633.1225066184998}
147
+ {"step": 2940, "epoch": 0, "loss": 2.902944564819336, "loss_en": 2.8351149559020996, "loss_zh": 2.9707744121551514, "elapsed_sec": 636.863737821579}
148
+ {"step": 2960, "epoch": 0, "loss": 2.5021724700927734, "loss_en": 2.2719180583953857, "loss_zh": 2.732426643371582, "elapsed_sec": 640.7029006481171}
149
+ {"step": 2980, "epoch": 0, "loss": 2.719264507293701, "loss_en": 2.5404632091522217, "loss_zh": 2.8980655670166016, "elapsed_sec": 644.1325256824493}
150
+ {"step": 3000, "epoch": 0, "loss": 3.013031005859375, "loss_en": 2.868255376815796, "loss_zh": 3.157806634902954, "elapsed_sec": 647.5999202728271}
151
+ {"step": 3020, "epoch": 0, "loss": 2.9003100395202637, "loss_en": 2.7702651023864746, "loss_zh": 3.0303549766540527, "elapsed_sec": 670.5569019317627}
152
+ {"step": 3040, "epoch": 0, "loss": 2.7593472003936768, "loss_en": 2.6388895511627197, "loss_zh": 2.879804849624634, "elapsed_sec": 674.41841173172}
153
+ {"step": 3060, "epoch": 0, "loss": 2.7090296745300293, "loss_en": 2.5160269737243652, "loss_zh": 2.9020326137542725, "elapsed_sec": 678.525176525116}
154
+ {"step": 3080, "epoch": 0, "loss": 2.3367695808410645, "loss_en": 2.2606661319732666, "loss_zh": 2.412872791290283, "elapsed_sec": 682.4749524593353}
155
+ {"step": 3100, "epoch": 0, "loss": 2.566432476043701, "loss_en": 2.149158000946045, "loss_zh": 2.9837069511413574, "elapsed_sec": 686.2843286991119}
156
+ {"step": 3120, "epoch": 0, "loss": 2.9830801486968994, "loss_en": 2.7817397117614746, "loss_zh": 3.184420585632324, "elapsed_sec": 690.0860016345978}
157
+ {"step": 3140, "epoch": 0, "loss": 2.5297842025756836, "loss_en": 2.3280935287475586, "loss_zh": 2.7314751148223877, "elapsed_sec": 694.205691576004}
158
+ {"step": 3160, "epoch": 0, "loss": 2.8116650581359863, "loss_en": 2.7368452548980713, "loss_zh": 2.8864850997924805, "elapsed_sec": 697.6342072486877}
159
+ {"step": 3180, "epoch": 0, "loss": 2.690246105194092, "loss_en": 2.5631024837493896, "loss_zh": 2.817389488220215, "elapsed_sec": 701.2021601200104}
160
+ {"step": 3200, "epoch": 0, "loss": 2.838886260986328, "loss_en": 2.581108570098877, "loss_zh": 3.0966639518737793, "elapsed_sec": 704.6447794437408}
161
+ {"step": 3220, "epoch": 0, "loss": 3.1918296813964844, "loss_en": 3.043659210205078, "loss_zh": 3.3400001525878906, "elapsed_sec": 708.1530072689056}
162
+ {"step": 3240, "epoch": 0, "loss": 2.8224878311157227, "loss_en": 2.8209502696990967, "loss_zh": 2.8240256309509277, "elapsed_sec": 711.7496147155762}
163
+ {"step": 3260, "epoch": 0, "loss": 2.755758047103882, "loss_en": 2.368210554122925, "loss_zh": 3.143305540084839, "elapsed_sec": 715.1883590221405}
164
+ {"step": 3280, "epoch": 0, "loss": 3.0267934799194336, "loss_en": 2.836432933807373, "loss_zh": 3.2171542644500732, "elapsed_sec": 718.6247766017914}
165
+ {"step": 3300, "epoch": 0, "loss": 3.0683236122131348, "loss_en": 2.920625925064087, "loss_zh": 3.2160210609436035, "elapsed_sec": 722.389007806778}
166
+ {"step": 3320, "epoch": 0, "loss": 2.545910596847534, "loss_en": 2.280404806137085, "loss_zh": 2.8114163875579834, "elapsed_sec": 726.1156675815582}
167
+ {"step": 3340, "epoch": 0, "loss": 2.4272170066833496, "loss_en": 2.1476333141326904, "loss_zh": 2.706800699234009, "elapsed_sec": 729.8638515472412}
168
+ {"step": 3360, "epoch": 0, "loss": 2.6030001640319824, "loss_en": 2.4472668170928955, "loss_zh": 2.7587332725524902, "elapsed_sec": 733.2340908050537}
169
+ {"step": 3380, "epoch": 0, "loss": 2.661376476287842, "loss_en": 2.515986442565918, "loss_zh": 2.8067665100097656, "elapsed_sec": 736.7651977539062}
170
+ {"step": 3400, "epoch": 0, "loss": 2.7315659523010254, "loss_en": 2.6237075328826904, "loss_zh": 2.8394241333007812, "elapsed_sec": 740.3357717990875}
171
+ {"step": 3420, "epoch": 0, "loss": 2.8538808822631836, "loss_en": 2.6504318714141846, "loss_zh": 3.0573296546936035, "elapsed_sec": 743.5593569278717}
172
+ {"step": 3440, "epoch": 0, "loss": 2.6777472496032715, "loss_en": 2.622728109359741, "loss_zh": 2.7327663898468018, "elapsed_sec": 747.3223748207092}
173
+ {"step": 3460, "epoch": 0, "loss": 2.7335457801818848, "loss_en": 2.637943744659424, "loss_zh": 2.829148054122925, "elapsed_sec": 751.1211133003235}
174
+ {"step": 3480, "epoch": 0, "loss": 2.6403274536132812, "loss_en": 2.466327667236328, "loss_zh": 2.8143270015716553, "elapsed_sec": 755.1746351718903}
175
+ {"step": 3500, "epoch": 0, "loss": 2.7790679931640625, "loss_en": 2.5257163047790527, "loss_zh": 3.0324199199676514, "elapsed_sec": 758.8425724506378}
176
+ {"step": 3520, "epoch": 0, "loss": 2.5418272018432617, "loss_en": 2.435535430908203, "loss_zh": 2.6481189727783203, "elapsed_sec": 782.7929248809814}
177
+ {"step": 3540, "epoch": 0, "loss": 3.1339573860168457, "loss_en": 2.8511905670166016, "loss_zh": 3.416724443435669, "elapsed_sec": 786.4950683116913}
178
+ {"step": 3560, "epoch": 0, "loss": 2.409252643585205, "loss_en": 2.13630747795105, "loss_zh": 2.6821980476379395, "elapsed_sec": 790.1224589347839}
179
+ {"step": 3580, "epoch": 0, "loss": 3.128756523132324, "loss_en": 2.9529428482055664, "loss_zh": 3.304570198059082, "elapsed_sec": 793.9147424697876}
180
+ {"step": 3600, "epoch": 0, "loss": 3.0961031913757324, "loss_en": 2.9756760597229004, "loss_zh": 3.2165303230285645, "elapsed_sec": 797.6883389949799}
181
+ {"step": 3620, "epoch": 0, "loss": 3.0069870948791504, "loss_en": 2.9084670543670654, "loss_zh": 3.1055068969726562, "elapsed_sec": 801.3975503444672}
182
+ {"step": 3640, "epoch": 0, "loss": 2.489809036254883, "loss_en": 2.2120156288146973, "loss_zh": 2.7676022052764893, "elapsed_sec": 804.8894393444061}
183
+ {"step": 3660, "epoch": 0, "loss": 2.8423004150390625, "loss_en": 2.552793264389038, "loss_zh": 3.131807327270508, "elapsed_sec": 808.468403339386}
184
+ {"step": 3680, "epoch": 0, "loss": 2.5372633934020996, "loss_en": 2.336249351501465, "loss_zh": 2.7382771968841553, "elapsed_sec": 812.2446646690369}
185
+ {"step": 3700, "epoch": 0, "loss": 2.797964096069336, "loss_en": 2.6745975017547607, "loss_zh": 2.921330690383911, "elapsed_sec": 816.0881781578064}
186
+ {"step": 3720, "epoch": 0, "loss": 2.6974291801452637, "loss_en": 2.2805113792419434, "loss_zh": 3.114347219467163, "elapsed_sec": 820.1067442893982}
187
+ {"step": 3740, "epoch": 0, "loss": 2.642305850982666, "loss_en": 2.454754114151001, "loss_zh": 2.829857587814331, "elapsed_sec": 824.0653774738312}
188
+ {"step": 3760, "epoch": 0, "loss": 2.3605141639709473, "loss_en": 2.3194069862365723, "loss_zh": 2.4016213417053223, "elapsed_sec": 828.005321264267}
189
+ {"step": 3780, "epoch": 0, "loss": 3.1603569984436035, "loss_en": 2.944838285446167, "loss_zh": 3.375875473022461, "elapsed_sec": 831.7441589832306}
190
+ {"step": 3800, "epoch": 0, "loss": 2.63377046585083, "loss_en": 2.567460060119629, "loss_zh": 2.7000811100006104, "elapsed_sec": 835.8810338973999}
191
+ {"step": 3820, "epoch": 0, "loss": 2.673095226287842, "loss_en": 2.514841079711914, "loss_zh": 2.8313493728637695, "elapsed_sec": 839.8410212993622}
192
+ {"step": 3840, "epoch": 0, "loss": 2.7557506561279297, "loss_en": 2.6359758377075195, "loss_zh": 2.8755252361297607, "elapsed_sec": 843.385849237442}
193
+ {"step": 3860, "epoch": 0, "loss": 2.7710683345794678, "loss_en": 2.611361503601074, "loss_zh": 2.9307751655578613, "elapsed_sec": 846.9589951038361}
194
+ {"step": 3880, "epoch": 0, "loss": 2.663418769836426, "loss_en": 2.518388271331787, "loss_zh": 2.8084495067596436, "elapsed_sec": 850.2679152488708}
195
+ {"step": 3900, "epoch": 0, "loss": 2.6004886627197266, "loss_en": 2.2952752113342285, "loss_zh": 2.9057021141052246, "elapsed_sec": 853.5375227928162}
196
+ {"step": 3920, "epoch": 0, "loss": 2.7454030513763428, "loss_en": 2.5733797550201416, "loss_zh": 2.917426347732544, "elapsed_sec": 857.0770959854126}
197
+ {"step": 3940, "epoch": 0, "loss": 2.394606590270996, "loss_en": 2.27539324760437, "loss_zh": 2.513820171356201, "elapsed_sec": 860.8597528934479}
198
+ {"step": 3960, "epoch": 0, "loss": 2.5442724227905273, "loss_en": 2.3535008430480957, "loss_zh": 2.735044002532959, "elapsed_sec": 864.5193626880646}
199
+ {"step": 3980, "epoch": 0, "loss": 2.6199214458465576, "loss_en": 2.4250659942626953, "loss_zh": 2.81477689743042, "elapsed_sec": 868.4990699291229}
200
+ {"step": 4000, "epoch": 0, "loss": 2.8970935344696045, "loss_en": 2.866617202758789, "loss_zh": 2.92756986618042, "elapsed_sec": 871.8378446102142}
201
+ {"step": 4020, "epoch": 0, "loss": 2.829970359802246, "loss_en": 2.5029795169830322, "loss_zh": 3.156960964202881, "elapsed_sec": 895.8810954093933}
202
+ {"step": 4040, "epoch": 0, "loss": 2.9328598976135254, "loss_en": 2.6317825317382812, "loss_zh": 3.2339370250701904, "elapsed_sec": 899.5657403469086}
203
+ {"step": 4060, "epoch": 0, "loss": 2.7865805625915527, "loss_en": 2.5966200828552246, "loss_zh": 2.97654128074646, "elapsed_sec": 903.044584274292}
204
+ {"step": 4080, "epoch": 0, "loss": 2.7077560424804688, "loss_en": 2.551176071166992, "loss_zh": 2.864335775375366, "elapsed_sec": 906.5699329376221}
205
+ {"step": 4100, "epoch": 0, "loss": 2.599005937576294, "loss_en": 2.5054309368133545, "loss_zh": 2.6925809383392334, "elapsed_sec": 910.1288907527924}
206
+ {"step": 4120, "epoch": 0, "loss": 3.2881150245666504, "loss_en": 3.169931650161743, "loss_zh": 3.4062983989715576, "elapsed_sec": 913.5733063220978}
207
+ {"step": 4140, "epoch": 0, "loss": 2.710763931274414, "loss_en": 2.4960825443267822, "loss_zh": 2.925445556640625, "elapsed_sec": 917.1031558513641}
208
+ {"step": 4160, "epoch": 0, "loss": 2.6032934188842773, "loss_en": 2.500432252883911, "loss_zh": 2.7061548233032227, "elapsed_sec": 920.4941296577454}
209
+ {"step": 4180, "epoch": 0, "loss": 2.744601249694824, "loss_en": 2.4626998901367188, "loss_zh": 3.026502847671509, "elapsed_sec": 923.771121263504}
210
+ {"step": 4200, "epoch": 0, "loss": 2.8496527671813965, "loss_en": 2.5911622047424316, "loss_zh": 3.1081430912017822, "elapsed_sec": 927.2853314876556}
211
+ {"step": 4220, "epoch": 0, "loss": 2.052157402038574, "loss_en": 1.6923019886016846, "loss_zh": 2.412013053894043, "elapsed_sec": 930.9663474559784}
212
+ {"step": 4240, "epoch": 0, "loss": 2.5358026027679443, "loss_en": 2.440366744995117, "loss_zh": 2.6312384605407715, "elapsed_sec": 935.0861761569977}
213
+ {"step": 4260, "epoch": 0, "loss": 2.8003454208374023, "loss_en": 2.5898213386535645, "loss_zh": 3.0108697414398193, "elapsed_sec": 939.3373069763184}
214
+ {"step": 4280, "epoch": 0, "loss": 2.642455816268921, "loss_en": 2.445002317428589, "loss_zh": 2.839909315109253, "elapsed_sec": 942.9591519832611}
215
+ {"step": 4300, "epoch": 0, "loss": 2.3475632667541504, "loss_en": 2.1976280212402344, "loss_zh": 2.4974982738494873, "elapsed_sec": 946.7653801441193}
216
+ {"step": 4320, "epoch": 0, "loss": 2.9191246032714844, "loss_en": 2.708181381225586, "loss_zh": 3.1300675868988037, "elapsed_sec": 950.2790491580963}
217
+ {"step": 4340, "epoch": 0, "loss": 2.635903835296631, "loss_en": 2.3671650886535645, "loss_zh": 2.904642343521118, "elapsed_sec": 953.9004445075989}
218
+ {"step": 4360, "epoch": 0, "loss": 2.5053811073303223, "loss_en": 2.406114101409912, "loss_zh": 2.6046481132507324, "elapsed_sec": 957.6598467826843}
219
+ {"step": 4380, "epoch": 0, "loss": 2.562072277069092, "loss_en": 2.5234265327453613, "loss_zh": 2.6007182598114014, "elapsed_sec": 961.0346031188965}
220
+ {"step": 4400, "epoch": 0, "loss": 2.2321741580963135, "loss_en": 2.1251015663146973, "loss_zh": 2.3392467498779297, "elapsed_sec": 965.1091108322144}
221
+ {"step": 4420, "epoch": 0, "loss": 2.950532913208008, "loss_en": 2.6192660331726074, "loss_zh": 3.2818000316619873, "elapsed_sec": 969.1725993156433}
222
+ {"step": 4440, "epoch": 0, "loss": 2.712175130844116, "loss_en": 2.505016803741455, "loss_zh": 2.9193334579467773, "elapsed_sec": 973.5211043357849}
223
+ {"step": 4460, "epoch": 0, "loss": 2.5691637992858887, "loss_en": 2.4321024417877197, "loss_zh": 2.7062253952026367, "elapsed_sec": 977.5741245746613}
224
+ {"step": 4480, "epoch": 0, "loss": 2.7240004539489746, "loss_en": 2.5562641620635986, "loss_zh": 2.8917369842529297, "elapsed_sec": 981.360995054245}
seq2seq0/probe_leakage.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ckpt": "runs/seq2seq0/ckpt.pt",
3
+ "max_train_groups": 8000,
4
+ "max_valid_groups": 946,
5
+ "probe_hidden": 0,
6
+ "epochs": 20,
7
+ "lr": 0.01,
8
+ "baseline_majority_lang_acc": 0.5,
9
+ "baseline_majority_len_acc": 0.9271249771118164,
10
+ "lang_probe": {
11
+ "train_acc": 0.9989375,
12
+ "valid_acc": 1.0,
13
+ "best_valid_acc": 1.0
14
+ },
15
+ "len_probe": {
16
+ "train_acc": 0.9823125,
17
+ "valid_acc": 0.9756871035940803,
18
+ "best_valid_acc": 0.9756871035940803
19
+ }
20
+ }