Upload folder using huggingface_hub
Browse files- adv2_smoke/ckpt.pt +3 -0
- adv2_smoke/config.json +25 -0
- adv2_smoke/diag.jsonl +2 -0
- adv2_smoke/diag_full.json +19 -0
- adv2_smoke/logs.jsonl +25 -0
- adv2_smoke/probe_leakage.json +20 -0
- adv3_smoke/ckpt.pt +3 -0
- adv3_smoke/config.json +29 -0
- adv3_smoke/diag.jsonl +2 -0
- adv3_smoke/diag_full.json +19 -0
- adv3_smoke/logs.jsonl +25 -0
- adv3_smoke/probe_leakage.json +20 -0
- adv4_smoke/ckpt.pt +3 -0
- adv4_smoke/config.json +30 -0
- adv4_smoke/diag.jsonl +2 -0
- adv4_smoke/diag_full.json +19 -0
- adv4_smoke/logs.jsonl +25 -0
- adv4_smoke/probe_leakage.json +20 -0
- adv4_strong_smoke/ckpt.pt +3 -0
- adv4_strong_smoke/config.json +30 -0
- adv4_strong_smoke/diag.jsonl +2 -0
- adv4_strong_smoke/diag_full.json +19 -0
- adv4_strong_smoke/logs.jsonl +25 -0
- adv4_strong_smoke/probe_leakage.json +20 -0
adv2_smoke/ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d808412fca27a8ca9b633977b4692bbde1c448eb49f6e57ec3ac7c297a39947c
|
| 3 |
+
size 3640289321
|
adv2_smoke/config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_jsonl": "data/groups_train.jsonl",
|
| 3 |
+
"valid_jsonl": "data/groups_valid.jsonl",
|
| 4 |
+
"run_dir": "runs/adv2_smoke",
|
| 5 |
+
"backbone": "google/mt5-small",
|
| 6 |
+
"num_latents": 16,
|
| 7 |
+
"latent_dropout": 0.1,
|
| 8 |
+
"latent_noise_std": 0.01,
|
| 9 |
+
"batch_size": 4,
|
| 10 |
+
"grad_accum": 8,
|
| 11 |
+
"epochs": 1,
|
| 12 |
+
"max_doc_len": 256,
|
| 13 |
+
"max_sum_len": 64,
|
| 14 |
+
"eval_every": 200,
|
| 15 |
+
"max_train_examples": 2000,
|
| 16 |
+
"max_valid_examples": 200,
|
| 17 |
+
"lambda_align": 0.2,
|
| 18 |
+
"tau": 0.07,
|
| 19 |
+
"lambda_langadv": 1.0,
|
| 20 |
+
"lang_steps": 3,
|
| 21 |
+
"lambda_varcov": 50.0,
|
| 22 |
+
"var_target_std": 0.05,
|
| 23 |
+
"lr_model": 0.0001,
|
| 24 |
+
"lr_lang": 0.01
|
| 25 |
+
}
|
adv2_smoke/diag.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 200, "nll_en": 6.921462745666504, "nll_zh": 6.952427787780762, "nll": 6.936945266723633}
|
| 2 |
+
{"step": 400, "nll_en": 6.2942057132720945, "nll_zh": 6.331760501861572, "nll": 6.312983107566834}
|
adv2_smoke/diag_full.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ckpt": "runs/adv2_smoke/ckpt.pt",
|
| 3 |
+
"nll_en": 6.233604056134537,
|
| 4 |
+
"nll_zh": 6.20711564513644,
|
| 5 |
+
"swap_delta_en": 0.6715848370283906,
|
| 6 |
+
"swap_delta_zh": 0.6861200786292175,
|
| 7 |
+
"ablate_zero_delta_en": 7.869903961649404,
|
| 8 |
+
"ablate_mean_delta_en": 0.015260196387389749,
|
| 9 |
+
"ablate_noise_delta_en": 16.63688709267098,
|
| 10 |
+
"ablate_zero_delta_zh": 7.896392352485253,
|
| 11 |
+
"ablate_mean_delta_zh": 0.03996281089540768,
|
| 12 |
+
"ablate_noise_delta_zh": 16.663976290261267,
|
| 13 |
+
"inv_top1_full": 0.11839323490858078,
|
| 14 |
+
"inv_top5_full": 0.2906976640224457,
|
| 15 |
+
"diag_sim_mean": 0.9153905510902405,
|
| 16 |
+
"offdiag_sim_mean": 0.3318096995353699,
|
| 17 |
+
"sim_margin": 0.5835808515548706,
|
| 18 |
+
"n_valid": 946
|
| 19 |
+
}
|
adv2_smoke/logs.jsonl
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 20, "epoch": 0, "loss": 26.01366424560547, "loss_render": 22.701311111450195, "loss_align": 1.7653541564941406, "loss_varcov": 0.06054317206144333, "lang_loss_clf": 0.06853032112121582, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.06787724792957306, "elapsed_sec": 3.827160120010376}
|
| 2 |
+
{"step": 40, "epoch": 0, "loss": 19.441516876220703, "loss_render": 15.991222381591797, "loss_align": 1.54671049118042, "loss_varcov": 0.06339457631111145, "lang_loss_clf": 0.042344413697719574, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.0287768617272377, "elapsed_sec": 7.013241767883301}
|
| 3 |
+
{"step": 60, "epoch": 0, "loss": 14.930947303771973, "loss_render": 11.352689743041992, "loss_align": 1.2424647808074951, "loss_varcov": 0.06777901202440262, "lang_loss_clf": 0.0681106448173523, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.059186242520809174, "elapsed_sec": 10.28891921043396}
|
| 4 |
+
{"step": 80, "epoch": 0, "loss": 14.528449058532715, "loss_render": 11.368682861328125, "loss_align": 1.2299816608428955, "loss_varcov": 0.06541957706212997, "lang_loss_clf": 0.3479321599006653, "lang_acc_clf_detached": 0.875, "lang_loss_for_encoder": 0.35720959305763245, "elapsed_sec": 13.362754344940186}
|
| 5 |
+
{"step": 100, "epoch": 0, "loss": 13.175678253173828, "loss_render": 9.374868392944336, "loss_align": 1.2407596111297607, "loss_varcov": 0.07199987769126892, "lang_loss_clf": 0.05528043955564499, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.0473356619477272, "elapsed_sec": 16.290427207946777}
|
| 6 |
+
{"step": 120, "epoch": 0, "loss": 12.470551490783691, "loss_render": 8.60978889465332, "loss_align": 1.2617219686508179, "loss_varcov": 0.07325197011232376, "lang_loss_clf": 0.051412083208560944, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.05417979508638382, "elapsed_sec": 19.27555274963379}
|
| 7 |
+
{"step": 140, "epoch": 0, "loss": 12.06535530090332, "loss_render": 8.119982719421387, "loss_align": 1.3705811500549316, "loss_varcov": 0.07474038749933243, "lang_loss_clf": 0.0684259906411171, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.06576387584209442, "elapsed_sec": 22.282530307769775}
|
| 8 |
+
{"step": 160, "epoch": 0, "loss": 12.033821105957031, "loss_render": 8.149459838867188, "loss_align": 1.2687556743621826, "loss_varcov": 0.07419806718826294, "lang_loss_clf": 0.09626789391040802, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.07929232716560364, "elapsed_sec": 25.230902671813965}
|
| 9 |
+
{"step": 180, "epoch": 0, "loss": 12.016547203063965, "loss_render": 8.179622650146484, "loss_align": 1.2885589599609375, "loss_varcov": 0.0738195925951004, "lang_loss_clf": 0.12900276482105255, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.11176709830760956, "elapsed_sec": 28.127777099609375}
|
| 10 |
+
{"step": 200, "epoch": 0, "loss": 11.218146324157715, "loss_render": 7.402112007141113, "loss_align": 1.3608659505844116, "loss_varcov": 0.07524444907903671, "lang_loss_clf": 0.209208682179451, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.21836160123348236, "elapsed_sec": 31.06750750541687}
|
| 11 |
+
{"step": 220, "epoch": 0, "loss": 10.84925365447998, "loss_render": 6.9797258377075195, "loss_align": 1.2122578620910645, "loss_varcov": 0.0762825608253479, "lang_loss_clf": 0.18836568295955658, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.18705201148986816, "elapsed_sec": 40.27459168434143}
|
| 12 |
+
{"step": 240, "epoch": 0, "loss": 10.537125587463379, "loss_render": 6.861736297607422, "loss_align": 1.0021635293960571, "loss_varcov": 0.07355406880378723, "lang_loss_clf": 0.20388610661029816, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.2027462124824524, "elapsed_sec": 43.077449798583984}
|
| 13 |
+
{"step": 260, "epoch": 0, "loss": 10.51772689819336, "loss_render": 6.918465614318848, "loss_align": 1.090893268585205, "loss_varcov": 0.0747125968337059, "lang_loss_clf": 0.34920722246170044, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.354547917842865, "elapsed_sec": 45.946648359298706}
|
| 14 |
+
{"step": 280, "epoch": 0, "loss": 9.397680282592773, "loss_render": 6.038473129272461, "loss_align": 0.967448353767395, "loss_varcov": 0.07310031354427338, "lang_loss_clf": 0.5011372566223145, "lang_acc_clf_detached": 0.75, "lang_loss_for_encoder": 0.4892982840538025, "elapsed_sec": 48.85692071914673}
|
| 15 |
+
{"step": 300, "epoch": 0, "loss": 10.132863998413086, "loss_render": 6.888652324676514, "loss_align": 0.8837790489196777, "loss_varcov": 0.07114173471927643, "lang_loss_clf": 0.5173168182373047, "lang_acc_clf_detached": 0.625, "lang_loss_for_encoder": 0.4896310269832611, "elapsed_sec": 51.6617488861084}
|
| 16 |
+
{"step": 320, "epoch": 0, "loss": 9.236344337463379, "loss_render": 6.228811264038086, "loss_align": 0.825596034526825, "loss_varcov": 0.06644503772258759, "lang_loss_clf": 0.5163775682449341, "lang_acc_clf_detached": 0.75, "lang_loss_for_encoder": 0.47983795404434204, "elapsed_sec": 54.54331040382385}
|
| 17 |
+
{"step": 340, "epoch": 0, "loss": 8.45924186706543, "loss_render": 5.947011947631836, "loss_align": 0.3475229740142822, "loss_varcov": 0.060677506029605865, "lang_loss_clf": 0.5935571193695068, "lang_acc_clf_detached": 0.75, "lang_loss_for_encoder": 0.5911505222320557, "elapsed_sec": 57.35739517211914}
|
| 18 |
+
{"step": 360, "epoch": 0, "loss": 8.630573272705078, "loss_render": 6.244958400726318, "loss_align": 0.3616149425506592, "loss_varcov": 0.05735646188259125, "lang_loss_clf": 0.5729612112045288, "lang_acc_clf_detached": 0.625, "lang_loss_for_encoder": 0.5545310378074646, "elapsed_sec": 60.20985507965088}
|
| 19 |
+
{"step": 380, "epoch": 0, "loss": 7.809354305267334, "loss_render": 5.671352863311768, "loss_align": 0.45548760890960693, "loss_varcov": 0.05194557458162308, "lang_loss_clf": 0.563302755355835, "lang_acc_clf_detached": 0.75, "lang_loss_for_encoder": 0.5503745675086975, "elapsed_sec": 63.066800355911255}
|
| 20 |
+
{"step": 400, "epoch": 0, "loss": 8.306041717529297, "loss_render": 6.253077507019043, "loss_align": 0.42017048597335815, "loss_varcov": 0.05265771970152855, "lang_loss_clf": 0.6757326126098633, "lang_acc_clf_detached": 0.625, "lang_loss_for_encoder": 0.663955807685852, "elapsed_sec": 66.19383811950684}
|
| 21 |
+
{"step": 420, "epoch": 0, "loss": 8.729920387268066, "loss_render": 6.199739456176758, "loss_align": 0.8452728986740112, "loss_varcov": 0.0584808886051178, "lang_loss_clf": 0.5925207734107971, "lang_acc_clf_detached": 0.625, "lang_loss_for_encoder": 0.5629174709320068, "elapsed_sec": 80.46020889282227}
|
| 22 |
+
{"step": 440, "epoch": 0, "loss": 8.001361846923828, "loss_render": 5.966494083404541, "loss_align": 0.11838580667972565, "loss_varcov": 0.049905925989151, "lang_loss_clf": 0.4980943500995636, "lang_acc_clf_detached": 0.75, "lang_loss_for_encoder": 0.48410558700561523, "elapsed_sec": 83.30399203300476}
|
| 23 |
+
{"step": 460, "epoch": 0, "loss": 7.870355606079102, "loss_render": 6.068554878234863, "loss_align": 0.5152812004089355, "loss_varcov": 0.049815185368061066, "lang_loss_clf": 0.8281128406524658, "lang_acc_clf_detached": 0.375, "lang_loss_for_encoder": 0.7920153141021729, "elapsed_sec": 86.40033268928528}
|
| 24 |
+
{"step": 480, "epoch": 0, "loss": 7.518971920013428, "loss_render": 5.996004581451416, "loss_align": 0.0897776409983635, "loss_varcov": 0.041865669190883636, "lang_loss_clf": 0.6214006543159485, "lang_acc_clf_detached": 0.5, "lang_loss_for_encoder": 0.5882717967033386, "elapsed_sec": 89.51149439811707}
|
| 25 |
+
{"step": 500, "epoch": 0, "loss": 6.795851707458496, "loss_render": 5.801303863525391, "loss_align": 0.3024139106273651, "loss_varcov": 0.03272250294685364, "lang_loss_clf": 0.7320167422294617, "lang_acc_clf_detached": 0.625, "lang_loss_for_encoder": 0.702060341835022, "elapsed_sec": 92.54206848144531}
|
adv2_smoke/probe_leakage.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ckpt": "runs/adv2_smoke/ckpt.pt",
|
| 3 |
+
"max_train_groups": 8000,
|
| 4 |
+
"max_valid_groups": 946,
|
| 5 |
+
"probe_hidden": 0,
|
| 6 |
+
"epochs": 20,
|
| 7 |
+
"lr": 0.01,
|
| 8 |
+
"baseline_majority_lang_acc": 0.5,
|
| 9 |
+
"baseline_majority_len_acc": 0.9271249771118164,
|
| 10 |
+
"lang_probe": {
|
| 11 |
+
"train_acc": 0.9988125,
|
| 12 |
+
"valid_acc": 0.9994714587737844,
|
| 13 |
+
"best_valid_acc": 0.9994714587737844
|
| 14 |
+
},
|
| 15 |
+
"len_probe": {
|
| 16 |
+
"train_acc": 0.9438125,
|
| 17 |
+
"valid_acc": 0.919661733615222,
|
| 18 |
+
"best_valid_acc": 0.9207188160676533
|
| 19 |
+
}
|
| 20 |
+
}
|
adv3_smoke/ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f963be021fd7bf4fbfb8a4decf8c2b773499541544fa636626e6b5c24eb47837
|
| 3 |
+
size 3643443265
|
adv3_smoke/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_jsonl": "data/groups_train.jsonl",
|
| 3 |
+
"valid_jsonl": "data/groups_valid.jsonl",
|
| 4 |
+
"run_dir": "runs/adv3_smoke",
|
| 5 |
+
"backbone": "google/mt5-small",
|
| 6 |
+
"num_latents": 16,
|
| 7 |
+
"latent_dropout": 0.1,
|
| 8 |
+
"latent_noise_std": 0.01,
|
| 9 |
+
"batch_size": 4,
|
| 10 |
+
"grad_accum": 8,
|
| 11 |
+
"epochs": 1,
|
| 12 |
+
"max_doc_len": 256,
|
| 13 |
+
"max_sum_len": 64,
|
| 14 |
+
"eval_every": 200,
|
| 15 |
+
"max_train_examples": 2000,
|
| 16 |
+
"max_valid_examples": 200,
|
| 17 |
+
"lambda_align": 0.5,
|
| 18 |
+
"tau": 0.07,
|
| 19 |
+
"lambda_varcov": 10.0,
|
| 20 |
+
"var_target_std": 0.05,
|
| 21 |
+
"lambda_norm": 1.0,
|
| 22 |
+
"lambda_mean": 1.0,
|
| 23 |
+
"lambda_mean_diff": 1.0,
|
| 24 |
+
"lambda_langadv": 0.5,
|
| 25 |
+
"lang_steps": 3,
|
| 26 |
+
"adv_start_step": 300,
|
| 27 |
+
"lr_model": 0.0001,
|
| 28 |
+
"lr_lang": 0.01
|
| 29 |
+
}
|
adv3_smoke/diag.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 200, "nll_en": 6.888075132369995, "nll_zh": 6.897639436721802, "nll": 6.892857284545898}
|
| 2 |
+
{"step": 400, "nll_en": 6.011003665924072, "nll_zh": 6.025835027694702, "nll": 6.018419346809387}
|
adv3_smoke/diag_full.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ckpt": "runs/adv3_smoke/ckpt.pt",
|
| 3 |
+
"nll_en": 5.695753517917044,
|
| 4 |
+
"nll_zh": 5.707797478923838,
|
| 5 |
+
"swap_delta_en": 0.6152991381558505,
|
| 6 |
+
"swap_delta_zh": 0.6365903810013173,
|
| 7 |
+
"ablate_zero_delta_en": 11.453003095017932,
|
| 8 |
+
"ablate_mean_delta_en": 0.11614449583702813,
|
| 9 |
+
"ablate_noise_delta_en": 17.562626889089703,
|
| 10 |
+
"ablate_zero_delta_zh": 11.440959188449206,
|
| 11 |
+
"ablate_mean_delta_zh": 0.11405051636645457,
|
| 12 |
+
"ablate_noise_delta_zh": 17.31058040885038,
|
| 13 |
+
"inv_top1_full": 0.5010570883750916,
|
| 14 |
+
"inv_top5_full": 0.726215660572052,
|
| 15 |
+
"diag_sim_mean": 0.9785749912261963,
|
| 16 |
+
"offdiag_sim_mean": 0.7828513383865356,
|
| 17 |
+
"sim_margin": 0.19572365283966064,
|
| 18 |
+
"n_valid": 946
|
| 19 |
+
}
|
adv3_smoke/logs.jsonl
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 20, "epoch": 0, "loss": 24.91742706298828, "loss_render": 23.773582458496094, "loss_align": 1.097640037536621, "loss_varcov": 0.05930384621024132, "loss_norm": 0.0, "loss_mean": 0.001455051125958562, "loss_mean_diff": 0.0005307550309225917, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 3.075855255126953}
|
| 2 |
+
{"step": 40, "epoch": 0, "loss": 19.152389526367188, "loss_render": 17.75581169128418, "loss_align": 1.560633897781372, "loss_varcov": 0.06142370402812958, "loss_norm": 4.440892098500626e-16, "loss_mean": 0.0015163009520620108, "loss_mean_diff": 0.0005077974637970328, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 5.6378395557403564}
|
| 3 |
+
{"step": 60, "epoch": 0, "loss": 14.176985740661621, "loss_render": 12.948507308959961, "loss_align": 1.1057791709899902, "loss_varcov": 0.06735758483409882, "loss_norm": 1.3322676295501878e-15, "loss_mean": 0.0016839567106217146, "loss_mean_diff": 0.00032911746529862285, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 8.104898452758789}
|
| 4 |
+
{"step": 80, "epoch": 0, "loss": 13.14710521697998, "loss_render": 12.017267227172852, "loss_align": 0.9126105308532715, "loss_varcov": 0.0671679750084877, "loss_norm": 8.881784197001252e-16, "loss_mean": 0.0017318788450211287, "loss_mean_diff": 0.00012121950567234308, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 10.746770143508911}
|
| 5 |
+
{"step": 100, "epoch": 0, "loss": 12.340912818908691, "loss_render": 10.941017150878906, "loss_align": 1.4772891998291016, "loss_varcov": 0.0659283697605133, "loss_norm": 1.3322676295501878e-15, "loss_mean": 0.001658549765124917, "loss_mean_diff": 0.0003087844233959913, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 13.309589624404907}
|
| 6 |
+
{"step": 120, "epoch": 0, "loss": 10.596786499023438, "loss_render": 9.21932601928711, "loss_align": 1.2842016220092773, "loss_varcov": 0.07333570718765259, "loss_norm": 4.440892098500626e-16, "loss_mean": 0.001824642764404416, "loss_mean_diff": 0.00017749314429238439, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 15.83854079246521}
|
| 7 |
+
{"step": 140, "epoch": 0, "loss": 9.913776397705078, "loss_render": 8.527271270751953, "loss_align": 1.3272709846496582, "loss_varcov": 0.07208719849586487, "loss_norm": 2.220446049250313e-15, "loss_mean": 0.0017944993451237679, "loss_mean_diff": 0.000203330724616535, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 18.419480562210083}
|
| 8 |
+
{"step": 160, "epoch": 0, "loss": 9.571101188659668, "loss_render": 8.164871215820312, "loss_align": 1.3765511512756348, "loss_varcov": 0.07160010188817978, "loss_norm": 8.881784197001252e-16, "loss_mean": 0.0018011229112744331, "loss_mean_diff": 0.00015261837688740343, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 21.098198890686035}
|
| 9 |
+
{"step": 180, "epoch": 0, "loss": 9.619966506958008, "loss_render": 8.223077774047852, "loss_align": 1.2920491695404053, "loss_varcov": 0.0748891830444336, "loss_norm": 2.6645352591003757e-15, "loss_mean": 0.0018649350386112928, "loss_mean_diff": 0.00010646976443240419, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 23.694143772125244}
|
| 10 |
+
{"step": 200, "epoch": 0, "loss": 9.096923828125, "loss_render": 7.67396354675293, "loss_align": 1.3397053480148315, "loss_varcov": 0.07511485368013382, "loss_norm": 0.0, "loss_mean": 0.0018728700233623385, "loss_mean_diff": 8.666135545354337e-05, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 26.298076152801514}
|
| 11 |
+
{"step": 220, "epoch": 0, "loss": 8.227168083190918, "loss_render": 6.800909996032715, "loss_align": 1.2949825525283813, "loss_varcov": 0.0776786208152771, "loss_norm": 4.440892098500626e-16, "loss_mean": 0.0019097942858934402, "loss_mean_diff": 7.092615851433948e-05, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 34.94481325149536}
|
| 12 |
+
{"step": 240, "epoch": 0, "loss": 8.046826362609863, "loss_render": 6.6693572998046875, "loss_align": 1.2179815769195557, "loss_varcov": 0.07665148377418518, "loss_norm": 4.440892098500626e-16, "loss_mean": 0.001898725051432848, "loss_mean_diff": 6.508878141175956e-05, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 37.290642738342285}
|
| 13 |
+
{"step": 260, "epoch": 0, "loss": 8.305371284484863, "loss_render": 6.907618999481201, "loss_align": 1.244159460067749, "loss_varcov": 0.07737051695585251, "loss_norm": 8.881784197001252e-16, "loss_mean": 0.0019085912499576807, "loss_mean_diff": 5.913519999012351e-05, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 39.49288606643677}
|
| 14 |
+
{"step": 280, "epoch": 0, "loss": 7.299009799957275, "loss_render": 5.901230812072754, "loss_align": 1.2405908107757568, "loss_varcov": 0.07755205035209656, "loss_norm": 1.3322676295501878e-15, "loss_mean": 0.0019134900067001581, "loss_mean_diff": 4.943773819832131e-05, "lang_loss_clf": 0.0, "lang_acc_clf_detached": 0.0, "lang_loss_for_encoder": 0.0, "elapsed_sec": 41.763418674468994}
|
| 15 |
+
{"step": 300, "epoch": 0, "loss": 7.746583461761475, "loss_render": 6.748476505279541, "loss_align": 1.0907561779022217, "loss_varcov": 0.07546515017747879, "loss_norm": 1.3322676295501878e-15, "loss_mean": 0.0018891538493335247, "loss_mean_diff": 4.122305108467117e-05, "lang_loss_clf": 0.6982012391090393, "lang_acc_clf_detached": 0.5, "lang_loss_for_encoder": 0.6077049970626831, "elapsed_sec": 43.99915933609009}
|
| 16 |
+
{"step": 320, "epoch": 0, "loss": 7.539364814758301, "loss_render": 6.195427894592285, "loss_align": 1.1730680465698242, "loss_varcov": 0.0763925313949585, "loss_norm": 1.7763568394002505e-15, "loss_mean": 0.0018974498379975557, "loss_mean_diff": 5.333943772711791e-05, "lang_loss_clf": 0.027339227497577667, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.016945630311965942, "elapsed_sec": 46.355828523635864}
|
| 17 |
+
{"step": 340, "epoch": 0, "loss": 7.0724196434021, "loss_render": 5.855766773223877, "loss_align": 0.9586459398269653, "loss_varcov": 0.0735437422990799, "loss_norm": 8.881784197001252e-16, "loss_mean": 0.001855564652942121, "loss_mean_diff": 5.371747101889923e-05, "lang_loss_clf": 4.7292291128542274e-05, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 3.3004489523591474e-05, "elapsed_sec": 48.593570709228516}
|
| 18 |
+
{"step": 360, "epoch": 0, "loss": 7.213748931884766, "loss_render": 6.006307601928711, "loss_align": 0.9421965479850769, "loss_varcov": 0.07345137000083923, "loss_norm": 8.881784197001252e-16, "loss_mean": 0.0018556221621111035, "loss_mean_diff": 5.7431378081673756e-05, "lang_loss_clf": 5.893827255931683e-05, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.00016754708485677838, "elapsed_sec": 50.940390825271606}
|
| 19 |
+
{"step": 380, "epoch": 0, "loss": 6.364777088165283, "loss_render": 5.356549263000488, "loss_align": 0.6661635637283325, "loss_varcov": 0.06736285984516144, "loss_norm": 4.440892098500626e-16, "loss_mean": 0.001730419578962028, "loss_mean_diff": 5.256064468994737e-05, "lang_loss_clf": 0.0005864675622433424, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.0005303089856170118, "elapsed_sec": 53.27943563461304}
|
| 20 |
+
{"step": 400, "epoch": 0, "loss": 6.960882186889648, "loss_render": 5.913108825683594, "loss_align": 0.7085837125778198, "loss_varcov": 0.06918392330408096, "loss_norm": 1.7763568394002505e-15, "loss_mean": 0.001782494829967618, "loss_mean_diff": 5.0203474529553205e-05, "lang_loss_clf": 0.0004652995557989925, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.0003806088352575898, "elapsed_sec": 55.74656867980957}
|
| 21 |
+
{"step": 420, "epoch": 0, "loss": 7.099796295166016, "loss_render": 5.808730125427246, "loss_align": 1.081965684890747, "loss_varcov": 0.07481332868337631, "loss_norm": 0.0, "loss_mean": 0.0018688378622755408, "loss_mean_diff": 8.206500206142664e-05, "lang_loss_clf": 2.473586164342123e-06, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 5.364415187614213e-07, "elapsed_sec": 67.64627814292908}
|
| 22 |
+
{"step": 440, "epoch": 0, "loss": 6.428005218505859, "loss_render": 5.619255065917969, "loss_align": 0.3764345943927765, "loss_varcov": 0.062247149646282196, "loss_norm": 4.440892098500626e-16, "loss_mean": 0.0016414267010986805, "loss_mean_diff": 4.854144208366051e-05, "lang_loss_clf": 0.0016228127060458064, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.007256625685840845, "elapsed_sec": 70.11229133605957}
|
| 23 |
+
{"step": 460, "epoch": 0, "loss": 6.631974697113037, "loss_render": 5.819986343383789, "loss_align": 0.40843212604522705, "loss_varcov": 0.06209617108106613, "loss_norm": 1.3322676295501878e-15, "loss_mean": 0.0016276981914415956, "loss_mean_diff": 5.094557491247542e-05, "lang_loss_clf": 0.0015304856933653355, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.029736438766121864, "elapsed_sec": 72.6886088848114}
|
| 24 |
+
{"step": 480, "epoch": 0, "loss": 6.681015968322754, "loss_render": 5.790185928344727, "loss_align": 0.493865430355072, "loss_varcov": 0.064691923558712, "loss_norm": 4.440892098500626e-16, "loss_mean": 0.001690360251814127, "loss_mean_diff": 4.408272070577368e-05, "lang_loss_clf": 0.016515063121914864, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.00951192993670702, "elapsed_sec": 75.35664677619934}
|
| 25 |
+
{"step": 500, "epoch": 0, "loss": 6.282341957092285, "loss_render": 5.530722141265869, "loss_align": 0.3297235667705536, "loss_varcov": 0.05877729132771492, "loss_norm": 0.0, "loss_mean": 0.001564867328852415, "loss_mean_diff": 2.70104192168219e-05, "lang_loss_clf": 0.002191757084801793, "lang_acc_clf_detached": 1.0, "lang_loss_for_encoder": 0.005213525146245956, "elapsed_sec": 77.8887836933136}
|
adv3_smoke/probe_leakage.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ckpt": "runs/adv3_smoke/ckpt.pt",
|
| 3 |
+
"max_train_groups": 8000,
|
| 4 |
+
"max_valid_groups": 946,
|
| 5 |
+
"probe_hidden": 0,
|
| 6 |
+
"epochs": 20,
|
| 7 |
+
"lr": 0.01,
|
| 8 |
+
"baseline_majority_lang_acc": 0.5,
|
| 9 |
+
"baseline_majority_len_acc": 0.9271249771118164,
|
| 10 |
+
"lang_probe": {
|
| 11 |
+
"train_acc": 0.99825,
|
| 12 |
+
"valid_acc": 0.9994714587737844,
|
| 13 |
+
"best_valid_acc": 0.9994714587737844
|
| 14 |
+
},
|
| 15 |
+
"len_probe": {
|
| 16 |
+
"train_acc": 0.95875,
|
| 17 |
+
"valid_acc": 0.9392177589852009,
|
| 18 |
+
"best_valid_acc": 0.9392177589852009
|
| 19 |
+
}
|
| 20 |
+
}
|
adv4_smoke/ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04feb004329b6d52b11f1e012276e327a46c7015daba8082280be2b329f38db6
|
| 3 |
+
size 3643443329
|
adv4_smoke/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_jsonl": "data/groups_train.jsonl",
|
| 3 |
+
"valid_jsonl": "data/groups_valid.jsonl",
|
| 4 |
+
"run_dir": "runs/adv4_smoke",
|
| 5 |
+
"backbone": "google/mt5-small",
|
| 6 |
+
"num_latents": 16,
|
| 7 |
+
"latent_dropout": 0.1,
|
| 8 |
+
"latent_noise_std": 0.01,
|
| 9 |
+
"batch_size": 4,
|
| 10 |
+
"grad_accum": 8,
|
| 11 |
+
"epochs": 1,
|
| 12 |
+
"max_doc_len": 256,
|
| 13 |
+
"max_sum_len": 64,
|
| 14 |
+
"eval_every": 200,
|
| 15 |
+
"max_train_examples": 2000,
|
| 16 |
+
"max_valid_examples": 200,
|
| 17 |
+
"lambda_align": 0.5,
|
| 18 |
+
"tau": 0.07,
|
| 19 |
+
"lambda_varcov": 10.0,
|
| 20 |
+
"var_target_std": 0.05,
|
| 21 |
+
"lambda_mean": 0.1,
|
| 22 |
+
"lambda_mean_diff": 0.1,
|
| 23 |
+
"lambda_pair": 0.2,
|
| 24 |
+
"lambda_lang": 1.0,
|
| 25 |
+
"adv_start_step": 300,
|
| 26 |
+
"grl_alpha": 1.0,
|
| 27 |
+
"grl_warmup": 200,
|
| 28 |
+
"lr_model": 0.0001,
|
| 29 |
+
"lr_lang": 0.001
|
| 30 |
+
}
|
adv4_smoke/diag.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 200, "nll_en": 6.96965633392334, "nll_zh": 6.9684418773651124, "nll": 6.969049105644226}
|
| 2 |
+
{"step": 400, "nll_en": 6.254790468215942, "nll_zh": 6.211220149993896, "nll": 6.23300530910492}
|
adv4_smoke/diag_full.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ckpt": "runs/adv4_smoke/ckpt.pt",
|
| 3 |
+
"nll_en": 5.918526216994884,
|
| 4 |
+
"nll_zh": 5.921291329139887,
|
| 5 |
+
"swap_delta_en": 0.6822249138330052,
|
| 6 |
+
"swap_delta_zh": 0.707949943824752,
|
| 7 |
+
"ablate_zero_delta_en": 8.685084957905097,
|
| 8 |
+
"ablate_mean_delta_en": 0.13749601573823117,
|
| 9 |
+
"ablate_noise_delta_en": 17.660213676113912,
|
| 10 |
+
"ablate_zero_delta_zh": 8.682319834670858,
|
| 11 |
+
"ablate_mean_delta_zh": 0.13413880894602478,
|
| 12 |
+
"ablate_noise_delta_zh": 17.741045526419835,
|
| 13 |
+
"inv_top1_full": 0.429175466299057,
|
| 14 |
+
"inv_top5_full": 0.6818181872367859,
|
| 15 |
+
"diag_sim_mean": 0.9796865582466125,
|
| 16 |
+
"offdiag_sim_mean": 0.7884430289268494,
|
| 17 |
+
"sim_margin": 0.19124352931976318,
|
| 18 |
+
"n_valid": 946
|
| 19 |
+
}
|
adv4_smoke/logs.jsonl
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 20, "epoch": 0, "loss": 24.72283172607422, "loss_render": 23.547510147094727, "loss_align": 0.9425052404403687, "loss_varcov": 0.0604049488902092, "loss_mean": 0.768377423286438, "loss_mean_diff": 0.23014025390148163, "loss_pair": 0.0008376870537176728, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 3.0165867805480957}
|
| 2 |
+
{"step": 40, "epoch": 0, "loss": 19.417558670043945, "loss_render": 17.916568756103516, "loss_align": 1.5371567010879517, "loss_varcov": 0.06276179105043411, "loss_mean": 0.791458785533905, "loss_mean_diff": 0.2544426918029785, "loss_pair": 0.0010251605417579412, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 5.2628560066223145}
|
| 3 |
+
{"step": 60, "epoch": 0, "loss": 13.352556228637695, "loss_render": 11.97525691986084, "loss_align": 1.1626840829849243, "loss_varcov": 0.06931138038635254, "loss_mean": 0.8880023956298828, "loss_mean_diff": 0.13944241404533386, "loss_pair": 0.0004970942391082644, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 7.503626346588135}
|
| 4 |
+
{"step": 80, "epoch": 0, "loss": 12.354641914367676, "loss_render": 11.04284954071045, "loss_align": 1.0368200540542603, "loss_varcov": 0.06964626908302307, "loss_mean": 0.9112157821655273, "loss_mean_diff": 0.05741399526596069, "loss_pair": 0.000285154179437086, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 9.816405773162842}
|
| 5 |
+
{"step": 100, "epoch": 0, "loss": 11.493684768676758, "loss_render": 10.022943496704102, "loss_align": 1.3745707273483276, "loss_varcov": 0.0684390664100647, "loss_mean": 0.8887839317321777, "loss_mean_diff": 0.10086219757795334, "loss_pair": 0.0004990469897165895, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 12.069944620132446}
|
| 6 |
+
{"step": 120, "epoch": 0, "loss": 10.281822204589844, "loss_render": 8.76795768737793, "loss_align": 1.340401530265808, "loss_varcov": 0.0742584615945816, "loss_mean": 0.9480116367340088, "loss_mean_diff": 0.06229076534509659, "loss_pair": 0.00024846242740750313, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 14.402632474899292}
|
| 7 |
+
{"step": 140, "epoch": 0, "loss": 9.654067039489746, "loss_render": 8.134597778320312, "loss_align": 1.357208490371704, "loss_varcov": 0.07399539649486542, "loss_mean": 0.9447805881500244, "loss_mean_diff": 0.06380180269479752, "loss_pair": 0.0002656003925949335, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 16.74499821662903}
|
| 8 |
+
{"step": 160, "epoch": 0, "loss": 9.568921089172363, "loss_render": 8.039554595947266, "loss_align": 1.3885021209716797, "loss_varcov": 0.07356099784374237, "loss_mean": 0.9459364414215088, "loss_mean_diff": 0.0486053004860878, "loss_pair": 0.0002532517828512937, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 19.37409210205078}
|
| 9 |
+
{"step": 180, "epoch": 0, "loss": 9.483607292175293, "loss_render": 7.945327281951904, "loss_align": 1.3505332469940186, "loss_varcov": 0.07626023888587952, "loss_mean": 0.9689017534255981, "loss_mean_diff": 0.034911200404167175, "loss_pair": 0.0001452037540730089, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 21.797194004058838}
|
| 10 |
+
{"step": 200, "epoch": 0, "loss": 8.917795181274414, "loss_render": 7.39015531539917, "loss_align": 1.329958438873291, "loss_varcov": 0.07628859579563141, "loss_mean": 0.9714730978012085, "loss_mean_diff": 0.02603764645755291, "loss_pair": 0.00012058994616381824, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 24.3278329372406}
|
| 11 |
+
{"step": 220, "epoch": 0, "loss": 8.542162895202637, "loss_render": 6.996428966522217, "loss_align": 1.3209110498428345, "loss_varcov": 0.0784527063369751, "loss_mean": 0.9862959384918213, "loss_mean_diff": 0.021098170429468155, "loss_pair": 5.573605085373856e-05, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 32.84887361526489}
|
| 12 |
+
{"step": 240, "epoch": 0, "loss": 8.494622230529785, "loss_render": 6.974963188171387, "loss_align": 1.2857677936553955, "loss_varcov": 0.077643483877182, "loss_mean": 0.9811775088310242, "loss_mean_diff": 0.022096550092101097, "loss_pair": 6.638251943513751e-05, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 35.21742033958435}
|
| 13 |
+
{"step": 260, "epoch": 0, "loss": 8.650955200195312, "loss_render": 7.095550537109375, "loss_align": 1.333694577217102, "loss_varcov": 0.07878552377223969, "loss_mean": 0.9889304637908936, "loss_mean_diff": 0.017994893714785576, "loss_pair": 4.6030370867811143e-05, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 37.667585611343384}
|
| 14 |
+
{"step": 280, "epoch": 0, "loss": 7.585478782653809, "loss_render": 6.044614315032959, "loss_align": 1.310089111328125, "loss_varcov": 0.07853716611862183, "loss_mean": 0.9878650307655334, "loss_mean_diff": 0.016530364751815796, "loss_pair": 4.200523835606873e-05, "loss_lang": 0.0, "lang_acc_batch": 0.0, "elapsed_sec": 40.08102607727051}
|
| 15 |
+
{"step": 300, "epoch": 0, "loss": 9.065315246582031, "loss_render": 6.895183563232422, "loss_align": 1.211582899093628, "loss_varcov": 0.07714667171239853, "loss_mean": 0.9800739288330078, "loss_mean_diff": 0.014206988736987114, "loss_pair": 4.14829482906498e-05, "loss_lang": 0.6934365034103394, "lang_acc_batch": 0.5, "elapsed_sec": 42.37378239631653}
|
| 16 |
+
{"step": 320, "epoch": 0, "loss": 8.669504165649414, "loss_render": 6.466433525085449, "loss_align": 1.279140830039978, "loss_varcov": 0.07798397541046143, "loss_mean": 0.9840797185897827, "loss_mean_diff": 0.01861432194709778, "loss_pair": 4.950887887389399e-05, "loss_lang": 0.6833813190460205, "lang_acc_batch": 0.875, "elapsed_sec": 44.75836205482483}
|
| 17 |
+
{"step": 340, "epoch": 0, "loss": 8.077033042907715, "loss_render": 6.064611434936523, "loss_align": 1.0131821632385254, "loss_varcov": 0.07426470518112183, "loss_mean": 0.9578843116760254, "loss_mean_diff": 0.020526418462395668, "loss_pair": 5.843583130626939e-05, "loss_lang": 0.6653302311897278, "lang_acc_batch": 1.0, "elapsed_sec": 47.102705240249634}
|
| 18 |
+
{"step": 360, "epoch": 0, "loss": 8.202704429626465, "loss_render": 6.180863380432129, "loss_align": 1.048385500907898, "loss_varcov": 0.07493708282709122, "loss_mean": 0.9632831811904907, "loss_mean_diff": 0.02101622149348259, "loss_pair": 5.9058649640064687e-05, "loss_lang": 0.6498361825942993, "lang_acc_batch": 1.0, "elapsed_sec": 49.45169639587402}
|
| 19 |
+
{"step": 380, "epoch": 0, "loss": 7.449904918670654, "loss_render": 5.655429840087891, "loss_align": 0.7571913003921509, "loss_varcov": 0.06962733715772629, "loss_mean": 0.9152027368545532, "loss_mean_diff": 0.021279973909258842, "loss_pair": 6.418152042897418e-05, "loss_lang": 0.6259446740150452, "lang_acc_batch": 1.0, "elapsed_sec": 51.761659383773804}
|
| 20 |
+
{"step": 400, "epoch": 0, "loss": 8.016307830810547, "loss_render": 6.10238790512085, "loss_align": 0.9504098892211914, "loss_varcov": 0.07342643290758133, "loss_mean": 0.9508310556411743, "loss_mean_diff": 0.02113826386630535, "loss_pair": 6.30878348601982e-05, "loss_lang": 0.6072407960891724, "lang_acc_batch": 1.0, "elapsed_sec": 54.25155687332153}
|
| 21 |
+
{"step": 420, "epoch": 0, "loss": 8.019879341125488, "loss_render": 6.005566120147705, "loss_align": 1.1964120864868164, "loss_varcov": 0.07658414542675018, "loss_mean": 0.9718189239501953, "loss_mean_diff": 0.032870396971702576, "loss_pair": 8.736167364986613e-05, "loss_lang": 0.5497793555259705, "lang_acc_batch": 1.0, "elapsed_sec": 66.5071382522583}
|
| 22 |
+
{"step": 440, "epoch": 0, "loss": 7.460047721862793, "loss_render": 5.9159321784973145, "loss_align": 0.4935169816017151, "loss_varcov": 0.06510508060455322, "loss_mean": 0.8728588223457336, "loss_mean_diff": 0.021061759442090988, "loss_pair": 6.73907416057773e-05, "loss_lang": 0.5569002628326416, "lang_acc_batch": 1.0, "elapsed_sec": 68.88803434371948}
|
| 23 |
+
{"step": 460, "epoch": 0, "loss": 7.359249591827393, "loss_render": 5.871917724609375, "loss_align": 0.4575628936290741, "loss_varcov": 0.06329986453056335, "loss_mean": 0.8516653776168823, "loss_mean_diff": 0.020374227315187454, "loss_pair": 7.266722241183743e-05, "loss_lang": 0.5383338332176208, "lang_acc_batch": 1.0, "elapsed_sec": 71.29257798194885}
|
| 24 |
+
{"step": 480, "epoch": 0, "loss": 7.445590019226074, "loss_render": 5.938446998596191, "loss_align": 0.4598678946495056, "loss_varcov": 0.06467342376708984, "loss_mean": 0.8648350834846497, "loss_mean_diff": 0.02138454094529152, "loss_pair": 8.406119013670832e-05, "loss_lang": 0.541836142539978, "lang_acc_batch": 1.0, "elapsed_sec": 73.7085657119751}
|
| 25 |
+
{"step": 500, "epoch": 0, "loss": 7.138613700866699, "loss_render": 5.660837173461914, "loss_align": 0.3657577335834503, "loss_varcov": 0.0611129030585289, "loss_mean": 0.8270612359046936, "loss_mean_diff": 0.015113018453121185, "loss_pair": 5.436208812170662e-05, "loss_lang": 0.599540114402771, "lang_acc_batch": 0.875, "elapsed_sec": 76.04010605812073}
|
adv4_smoke/probe_leakage.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ckpt": "runs/adv4_smoke/ckpt.pt",
|
| 3 |
+
"max_train_groups": 8000,
|
| 4 |
+
"max_valid_groups": 946,
|
| 5 |
+
"probe_hidden": 0,
|
| 6 |
+
"epochs": 20,
|
| 7 |
+
"lr": 0.01,
|
| 8 |
+
"baseline_majority_lang_acc": 0.5,
|
| 9 |
+
"baseline_majority_len_acc": 0.9271249771118164,
|
| 10 |
+
"lang_probe": {
|
| 11 |
+
"train_acc": 0.9981875,
|
| 12 |
+
"valid_acc": 0.9984143763213531,
|
| 13 |
+
"best_valid_acc": 0.9984143763213531
|
| 14 |
+
},
|
| 15 |
+
"len_probe": {
|
| 16 |
+
"train_acc": 0.954875,
|
| 17 |
+
"valid_acc": 0.9344608879492601,
|
| 18 |
+
"best_valid_acc": 0.9418604651162791
|
| 19 |
+
}
|
| 20 |
+
}
|
adv4_strong_smoke/ckpt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:492c659f8c3b7b4908fc0a9fb53fb4b4bfba92475657194d0b1f9f390d6f6773
|
| 3 |
+
size 3643443329
|
adv4_strong_smoke/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_jsonl": "data/groups_train.jsonl",
|
| 3 |
+
"valid_jsonl": "data/groups_valid.jsonl",
|
| 4 |
+
"run_dir": "runs/adv4_strong_smoke",
|
| 5 |
+
"backbone": "google/mt5-small",
|
| 6 |
+
"num_latents": 16,
|
| 7 |
+
"latent_dropout": 0.1,
|
| 8 |
+
"latent_noise_std": 0.01,
|
| 9 |
+
"batch_size": 4,
|
| 10 |
+
"grad_accum": 8,
|
| 11 |
+
"epochs": 1,
|
| 12 |
+
"max_doc_len": 256,
|
| 13 |
+
"max_sum_len": 64,
|
| 14 |
+
"eval_every": 200,
|
| 15 |
+
"max_train_examples": 2000,
|
| 16 |
+
"max_valid_examples": 200,
|
| 17 |
+
"lambda_align": 0.5,
|
| 18 |
+
"tau": 0.07,
|
| 19 |
+
"lambda_varcov": 10.0,
|
| 20 |
+
"var_target_std": 0.05,
|
| 21 |
+
"lambda_mean": 0.1,
|
| 22 |
+
"lambda_mean_diff": 5.0,
|
| 23 |
+
"lambda_pair": 0.2,
|
| 24 |
+
"lambda_lang": 10.0,
|
| 25 |
+
"adv_start_step": 0,
|
| 26 |
+
"grl_alpha": 5.0,
|
| 27 |
+
"grl_warmup": 0,
|
| 28 |
+
"lr_model": 0.0003,
|
| 29 |
+
"lr_lang": 0.0003
|
| 30 |
+
}
|
adv4_strong_smoke/diag.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 200, "nll_en": 6.645587558746338, "nll_zh": 6.641072587966919, "nll": 6.643330073356628}
|
| 2 |
+
{"step": 400, "nll_en": 5.708486814498901, "nll_zh": 5.7333665943145755, "nll": 5.720926704406739}
|
adv4_strong_smoke/diag_full.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ckpt": "runs/adv4_strong_smoke/ckpt.pt",
|
| 3 |
+
"nll_en": 5.25965905340731,
|
| 4 |
+
"nll_zh": 5.26640438277424,
|
| 5 |
+
"swap_delta_en": 0.6382062893841282,
|
| 6 |
+
"swap_delta_zh": 0.6400147966300206,
|
| 7 |
+
"ablate_zero_delta_en": 11.029595883149677,
|
| 8 |
+
"ablate_mean_delta_en": 0.20425609705564082,
|
| 9 |
+
"ablate_noise_delta_en": 16.80559626895832,
|
| 10 |
+
"ablate_zero_delta_zh": 11.022850528579937,
|
| 11 |
+
"ablate_mean_delta_zh": 0.20288502287915092,
|
| 12 |
+
"ablate_noise_delta_zh": 16.803700138600128,
|
| 13 |
+
"inv_top1_full": 0.17019027471542358,
|
| 14 |
+
"inv_top5_full": 0.31712472438812256,
|
| 15 |
+
"diag_sim_mean": 0.9793476462364197,
|
| 16 |
+
"offdiag_sim_mean": 0.5936431884765625,
|
| 17 |
+
"sim_margin": 0.3857044577598572,
|
| 18 |
+
"n_valid": 946
|
| 19 |
+
}
|
adv4_strong_smoke/logs.jsonl
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 20, "epoch": 0, "loss": 24.358551025390625, "loss_render": 15.426189422607422, "loss_align": 1.1068222522735596, "loss_varcov": 0.06721384823322296, "loss_mean": 0.8660869002342224, "loss_mean_diff": 0.1500086486339569, "loss_pair": 0.0005593104287981987, "loss_lang": 0.6870046854019165, "lang_acc_batch": 0.5, "elapsed_sec": 2.987924814224243}
|
| 2 |
+
{"step": 40, "epoch": 0, "loss": 19.27250099182129, "loss_render": 10.490840911865234, "loss_align": 1.4721579551696777, "loss_varcov": 0.07239441573619843, "loss_mean": 0.9314608573913574, "loss_mean_diff": 0.06969726085662842, "loss_pair": 0.00034151587169617414, "loss_lang": 0.6879935264587402, "lang_acc_batch": 0.875, "elapsed_sec": 5.618208885192871}
|
| 3 |
+
{"step": 60, "epoch": 0, "loss": 17.083253860473633, "loss_render": 8.560813903808594, "loss_align": 1.3577772378921509, "loss_varcov": 0.07729271054267883, "loss_mean": 0.9802981615066528, "loss_mean_diff": 0.015478027984499931, "loss_pair": 8.413517934968695e-05, "loss_lang": 0.6895186901092529, "lang_acc_batch": 0.625, "elapsed_sec": 8.23490047454834}
|
| 4 |
+
{"step": 80, "epoch": 0, "loss": 16.976730346679688, "loss_render": 8.490270614624023, "loss_align": 1.3249222040176392, "loss_varcov": 0.07720647752285004, "loss_mean": 0.981532871723175, "loss_mean_diff": 0.007807200774550438, "loss_pair": 6.244565884117037e-05, "loss_lang": 0.6914732456207275, "lang_acc_batch": 0.5, "elapsed_sec": 10.950377702713013}
|
| 5 |
+
{"step": 100, "epoch": 0, "loss": 16.358787536621094, "loss_render": 7.838540077209473, "loss_align": 1.366581678390503, "loss_varcov": 0.07773439586162567, "loss_mean": 0.9843357801437378, "loss_mean_diff": 0.009339343756437302, "loss_pair": 6.48140994599089e-05, "loss_lang": 0.6914469003677368, "lang_acc_batch": 0.5, "elapsed_sec": 13.529285192489624}
|
| 6 |
+
{"step": 120, "epoch": 0, "loss": 16.037458419799805, "loss_render": 7.505511283874512, "loss_align": 1.3843297958374023, "loss_varcov": 0.07834576070308685, "loss_mean": 0.9889047145843506, "loss_mean_diff": 0.0068482328206300735, "loss_pair": 4.945392720401287e-05, "loss_lang": 0.6923184394836426, "lang_acc_batch": 0.75, "elapsed_sec": 16.093585968017578}
|
| 7 |
+
{"step": 140, "epoch": 0, "loss": 15.622644424438477, "loss_render": 7.100217342376709, "loss_align": 1.3818671703338623, "loss_varcov": 0.07837220281362534, "loss_mean": 0.9889189004898071, "loss_mean_diff": 0.00675200205296278, "loss_pair": 4.8623485781718045e-05, "loss_lang": 0.6915109157562256, "lang_acc_batch": 0.625, "elapsed_sec": 18.620471000671387}
|
| 8 |
+
{"step": 160, "epoch": 0, "loss": 15.638633728027344, "loss_render": 7.137982368469238, "loss_align": 1.376845359802246, "loss_varcov": 0.07827111333608627, "loss_mean": 0.988618016242981, "loss_mean_diff": 0.0058237179182469845, "loss_pair": 4.7519566578557715e-05, "loss_lang": 0.6901527643203735, "lang_acc_batch": 0.5, "elapsed_sec": 21.227479219436646}
|
| 9 |
+
{"step": 180, "epoch": 0, "loss": 15.643528938293457, "loss_render": 7.1163530349731445, "loss_align": 1.3783313035964966, "loss_varcov": 0.0785001739859581, "loss_mean": 0.9899085760116577, "loss_mean_diff": 0.006091669667512178, "loss_pair": 4.316946433391422e-05, "loss_lang": 0.6923550367355347, "lang_acc_batch": 0.5, "elapsed_sec": 23.841876983642578}
|
| 10 |
+
{"step": 200, "epoch": 0, "loss": 15.217899322509766, "loss_render": 6.699363708496094, "loss_align": 1.3819103240966797, "loss_varcov": 0.07835385203361511, "loss_mean": 0.9892261624336243, "loss_mean_diff": 0.004719927906990051, "loss_pair": 4.546478157863021e-05, "loss_lang": 0.6921509504318237, "lang_acc_batch": 0.75, "elapsed_sec": 26.502048015594482}
|
| 11 |
+
{"step": 220, "epoch": 0, "loss": 15.230162620544434, "loss_render": 6.730316162109375, "loss_align": 1.3596181869506836, "loss_varcov": 0.07957720756530762, "loss_mean": 0.9976720213890076, "loss_mean_diff": 0.00041867559775710106, "loss_pair": 2.107354248437332e-06, "loss_lang": 0.6922405362129211, "lang_acc_batch": 0.875, "elapsed_sec": 35.050835847854614}
|
| 12 |
+
{"step": 240, "epoch": 0, "loss": 14.97906494140625, "loss_render": 6.497296333312988, "loss_align": 1.3333766460418701, "loss_varcov": 0.07915233820676804, "loss_mean": 0.9952565431594849, "loss_mean_diff": 0.0006926361238583922, "loss_pair": 4.3187924347876105e-06, "loss_lang": 0.6920567750930786, "lang_acc_batch": 0.5, "elapsed_sec": 37.490808725357056}
|
| 13 |
+
{"step": 260, "epoch": 0, "loss": 15.303447723388672, "loss_render": 6.816764831542969, "loss_align": 1.3451306819915771, "loss_varcov": 0.0793096125125885, "loss_mean": 0.9961249828338623, "loss_mean_diff": 0.0007303876336663961, "loss_pair": 4.346224613982486e-06, "loss_lang": 0.6917755603790283, "lang_acc_batch": 0.5, "elapsed_sec": 39.82817602157593}
|
| 14 |
+
{"step": 280, "epoch": 0, "loss": 14.098390579223633, "loss_render": 5.826684951782227, "loss_align": 1.0144506692886353, "loss_varcov": 0.07467873394489288, "loss_mean": 0.9638773798942566, "loss_mean_diff": 0.0007874940056353807, "loss_pair": 4.182312295597512e-06, "loss_lang": 0.6917366981506348, "lang_acc_batch": 0.5, "elapsed_sec": 42.15418767929077}
|
| 15 |
+
{"step": 300, "epoch": 0, "loss": 14.733768463134766, "loss_render": 6.557484149932861, "loss_align": 0.8414218425750732, "loss_varcov": 0.07098853588104248, "loss_mean": 0.9299811720848083, "loss_mean_diff": 0.007388835307210684, "loss_pair": 2.982076875923667e-05, "loss_lang": 0.6915738582611084, "lang_acc_batch": 0.75, "elapsed_sec": 44.42831039428711}
|
| 16 |
+
{"step": 320, "epoch": 0, "loss": 14.478191375732422, "loss_render": 6.1297807693481445, "loss_align": 1.1153764724731445, "loss_varcov": 0.0762094110250473, "loss_mean": 0.9761601090431213, "loss_mean_diff": 0.0033764378167688847, "loss_pair": 1.2732704817608465e-05, "loss_lang": 0.6914127469062805, "lang_acc_batch": 0.625, "elapsed_sec": 46.85067319869995}
|
| 17 |
+
{"step": 340, "epoch": 0, "loss": 13.947319030761719, "loss_render": 5.9377899169921875, "loss_align": 0.7586307525634766, "loss_varcov": 0.06291775405406952, "loss_mean": 0.8381441831588745, "loss_mean_diff": 0.001488365582190454, "loss_pair": 9.81738958216738e-06, "loss_lang": 0.6909778118133545, "lang_acc_batch": 0.5, "elapsed_sec": 49.10358142852783}
|
| 18 |
+
{"step": 360, "epoch": 0, "loss": 13.808891296386719, "loss_render": 6.024482250213623, "loss_align": 0.3572639226913452, "loss_varcov": 0.060096193104982376, "loss_mean": 0.8146143555641174, "loss_mean_diff": 0.003286336548626423, "loss_pair": 2.5152683519991115e-05, "loss_lang": 0.6906917095184326, "lang_acc_batch": 0.875, "elapsed_sec": 51.428688287734985}
|
| 19 |
+
{"step": 380, "epoch": 0, "loss": 13.192407608032227, "loss_render": 5.320857524871826, "loss_align": 0.6716236472129822, "loss_varcov": 0.05337817221879959, "loss_mean": 0.6801819801330566, "loss_mean_diff": 0.0027167126536369324, "loss_pair": 1.8854985682992265e-05, "loss_lang": 0.6920351982116699, "lang_acc_batch": 0.5, "elapsed_sec": 53.78521466255188}
|
| 20 |
+
{"step": 400, "epoch": 0, "loss": 13.696573257446289, "loss_render": 5.678582191467285, "loss_align": 0.6780731678009033, "loss_varcov": 0.06714846193790436, "loss_mean": 0.8961491584777832, "loss_mean_diff": 0.0025570648722350597, "loss_pair": 2.8722504794131964e-05, "loss_lang": 0.6905064582824707, "lang_acc_batch": 0.5, "elapsed_sec": 56.22617959976196}
|
| 21 |
+
{"step": 420, "epoch": 0, "loss": 13.517683029174805, "loss_render": 5.419142723083496, "loss_align": 0.8099405765533447, "loss_varcov": 0.06822170317173004, "loss_mean": 0.9010855555534363, "loss_mean_diff": 0.005053877830505371, "loss_pair": 3.0353778129210696e-05, "loss_lang": 0.689596951007843, "lang_acc_batch": 0.5, "elapsed_sec": 67.7585711479187}
|
| 22 |
+
{"step": 440, "epoch": 0, "loss": 13.234602928161621, "loss_render": 5.33465576171875, "loss_align": 0.5888980627059937, "loss_varcov": 0.06171121448278427, "loss_mean": 0.8321233987808228, "loss_mean_diff": 0.0019290667260065675, "loss_pair": 1.6018990208976902e-05, "loss_lang": 0.6895524859428406, "lang_acc_batch": 0.5, "elapsed_sec": 70.09298157691956}
|
| 23 |
+
{"step": 460, "epoch": 0, "loss": 12.852123260498047, "loss_render": 5.279627799987793, "loss_align": 0.17034724354743958, "loss_varcov": 0.04973532259464264, "loss_mean": 0.6539790630340576, "loss_mean_diff": 0.003116890322417021, "loss_pair": 1.628775498829782e-05, "loss_lang": 0.6908983588218689, "lang_acc_batch": 0.5, "elapsed_sec": 72.41321444511414}
|
| 24 |
+
{"step": 480, "epoch": 0, "loss": 13.19127082824707, "loss_render": 5.466687202453613, "loss_align": 0.41611266136169434, "loss_varcov": 0.051108382642269135, "loss_mean": 0.655809223651886, "loss_mean_diff": 0.006208684761077166, "loss_pair": 3.0602808692492545e-05, "loss_lang": 0.6908812522888184, "lang_acc_batch": 0.75, "elapsed_sec": 74.78872013092041}
|
| 25 |
+
{"step": 500, "epoch": 0, "loss": 12.886927604675293, "loss_render": 5.131045341491699, "loss_align": 0.3131446838378906, "loss_varcov": 0.05863296240568161, "loss_mean": 0.8018555641174316, "loss_mean_diff": 0.008092537522315979, "loss_pair": 3.448034112807363e-05, "loss_lang": 0.689232587814331, "lang_acc_batch": 0.5, "elapsed_sec": 77.15285086631775}
|
adv4_strong_smoke/probe_leakage.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ckpt": "runs/adv4_strong_smoke/ckpt.pt",
|
| 3 |
+
"max_train_groups": 8000,
|
| 4 |
+
"max_valid_groups": 946,
|
| 5 |
+
"probe_hidden": 0,
|
| 6 |
+
"epochs": 20,
|
| 7 |
+
"lr": 0.01,
|
| 8 |
+
"baseline_majority_lang_acc": 0.5,
|
| 9 |
+
"baseline_majority_len_acc": 0.9271249771118164,
|
| 10 |
+
"lang_probe": {
|
| 11 |
+
"train_acc": 0.9345,
|
| 12 |
+
"valid_acc": 0.9344608879492601,
|
| 13 |
+
"best_valid_acc": 0.959830866807611
|
| 14 |
+
},
|
| 15 |
+
"len_probe": {
|
| 16 |
+
"train_acc": 0.9296875,
|
| 17 |
+
"valid_acc": 0.9043340380549683,
|
| 18 |
+
"best_valid_acc": 0.9043340380549683
|
| 19 |
+
}
|
| 20 |
+
}
|