|
|
/home/yuqian_fu |
|
|
here1 |
|
|
here2 |
|
|
{'gpu': '0', 'data': 'cartoon', 'ntr': None, 'translate': None, 'autoaug': 'CA_multiple', 'n': 3, 'stride': 5, 'factor_num': 16, 'epochs': 70, 'nbatch': 100, 'batchsize': 6, 'lr': 0.01, 'lr_scheduler': 'cosine', 'svroot': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//cartoon/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1', 'clsadapt': True, 'lambda_causal': 1.0, 'lambda_re': 1.0, 'randm': True, 'randn': True, 'network': 'resnet18'} |
|
|
stride: 5 |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/cartoon_train.hdf5 torch.Size([2107, 3, 227, 227]) torch.Size([2107]) |
|
|
--------------------------CA_multiple-------------------------- |
|
|
---------------------------16 factors----------------- |
|
|
randm: True |
|
|
randn: True |
|
|
n: 3 |
|
|
randm: False |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/cartoon_val.hdf5 torch.Size([237, 3, 227, 227]) torch.Size([237]) |
|
|
-------------------------------------loading pretrain weights---------------------------------- |
|
|
Epoch 1, weight, value: tensor([[-0.0100, 0.0028, 0.0031, ..., -0.0029, -0.0129, 0.0067], |
|
|
[ 0.0146, 0.0136, 0.0209, ..., 0.0182, -0.0162, -0.0063], |
|
|
[ 0.0144, -0.0044, -0.0032, ..., 0.0196, 0.0067, 0.0059], |
|
|
..., |
|
|
[-0.0163, -0.0022, 0.0082, ..., 0.0012, 0.0086, -0.0212], |
|
|
[-0.0167, -0.0119, 0.0066, ..., -0.0117, 0.0125, -0.0117], |
|
|
[-0.0118, -0.0128, -0.0087, ..., -0.0020, 0.0197, 0.0048]], |
|
|
device='cuda:0'), grad: None |
|
|
Epoch 1, bias, value: tensor([-0.0072, -0.0193, 0.0095, 0.0119, 0.0171, 0.0063, -0.0017], |
|
|
device='cuda:0'), grad: None |
|
|
351 |
|
|
0.01 |
|
|
changing lr |
|
|
---------------------saving model at epoch 0---------------------------------------------------- |
|
|
epoch 0, time 485.47, cls_loss 16.9782 cls_loss_mapping 2.0665 cls_loss_causal 2.0978 re_mapping 0.2883 re_causal 0.2878 |
|
|
Epoch 2, weight, value: tensor([[ 0.0028, -0.0005, 0.0340, ..., -0.0002, 0.0233, 0.0462], |
|
|
[ 0.0658, 0.0861, 0.0585, ..., 0.0947, 0.0172, 0.0346], |
|
|
[ 0.0262, -0.0145, 0.0102, ..., 0.0781, 0.0733, 0.0588], |
|
|
..., |
|
|
[-0.1058, -0.0852, -0.0944, ..., 0.0447, 0.0601, 0.0102], |
|
|
[ 0.0011, 0.0218, 0.0048, ..., -0.0536, -0.0391, -0.0657], |
|
|
[-0.0314, -0.0301, 0.0041, ..., -0.0928, -0.0691, -0.0766]], |
|
|
device='cuda:0'), grad: tensor([[-0.0698, -0.0076, -0.0194, ..., -0.0092, -0.0196, -0.0026], |
|
|
[-0.0486, -0.0072, -0.0131, ..., -0.0160, -0.0171, -0.0081], |
|
|
[ 0.0616, 0.0076, 0.0175, ..., 0.0109, 0.0186, 0.0045], |
|
|
..., |
|
|
[ 0.0679, 0.0086, 0.0195, ..., 0.0142, 0.0205, 0.0061], |
|
|
[ 0.0363, 0.0044, 0.0101, ..., 0.0059, 0.0107, 0.0021], |
|
|
[ 0.0342, 0.0043, 0.0096, ..., 0.0069, 0.0106, 0.0030]], |
|
|
device='cuda:0') |
|
|
Epoch 2, bias, value: tensor([-0.0115, -0.2892, 0.0229, -0.1414, -0.0596, 0.3315, 0.1640], |
|
|
device='cuda:0'), grad: tensor([-0.1210, -0.2117, 0.1382, -0.1489, 0.1758, 0.0772, 0.0903], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009994965332706574 |
|
|
changing lr |
|
|
---------------------saving model at epoch 1---------------------------------------------------- |
|
|
epoch 1, time 485.94, cls_loss 2.0030 cls_loss_mapping 1.9396 cls_loss_causal 1.9387 re_mapping 0.0647 re_causal 0.0644 |
|
|
Epoch 3, weight, value: tensor([[ 3.4078e-04, 1.6000e-03, 3.1084e-02, ..., -1.0942e-03, |
|
|
2.9201e-02, 5.4861e-02], |
|
|
[ 6.9312e-02, 8.8419e-02, 5.7617e-02, ..., 9.5143e-02, |
|
|
1.9121e-02, 3.5263e-02], |
|
|
[ 2.8979e-02, -1.5773e-02, 2.5921e-02, ..., 6.5189e-02, |
|
|
6.9842e-02, 5.3385e-02], |
|
|
..., |
|
|
[-1.1333e-01, -9.1322e-02, -1.0055e-01, ..., 5.0898e-02, |
|
|
6.5492e-02, 1.1030e-02], |
|
|
[-5.0384e-03, 1.4584e-02, 4.9783e-04, ..., -4.5509e-02, |
|
|
-4.2445e-02, -7.5379e-02], |
|
|
[-2.9434e-02, -2.5451e-02, 6.7909e-05, ..., -9.6225e-02, |
|
|
-7.2842e-02, -7.1301e-02]], device='cuda:0'), grad: tensor([[ 0.0200, 0.0067, 0.0079, ..., 0.0014, 0.0034, -0.0002], |
|
|
[ 0.0065, 0.0008, 0.0016, ..., 0.0011, 0.0014, 0.0007], |
|
|
[ 0.0329, 0.0065, 0.0102, ..., 0.0055, 0.0080, 0.0035], |
|
|
..., |
|
|
[ 0.0395, 0.0060, 0.0104, ..., 0.0067, 0.0091, 0.0043], |
|
|
[-0.0082, 0.0013, -0.0012, ..., -0.0020, -0.0016, -0.0008], |
|
|
[-0.0192, -0.0043, -0.0041, ..., -0.0010, -0.0025, -0.0002]], |
|
|
device='cuda:0') |
|
|
Epoch 3, bias, value: tensor([-0.0612, -0.3493, 0.0748, -0.2091, -0.0425, 0.3946, 0.2087], |
|
|
device='cuda:0'), grad: tensor([ 0.0385, 0.0165, 0.0712, -0.1410, 0.0951, -0.0314, -0.0487], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009979871469976196 |
|
|
changing lr |
|
|
epoch 2, time 484.44, cls_loss 1.5550 cls_loss_mapping 1.8738 cls_loss_causal 1.9254 re_mapping 0.0608 re_causal 0.0606 |
|
|
Epoch 4, weight, value: tensor([[-0.0137, -0.0117, 0.0201, ..., -0.0003, 0.0411, 0.0727], |
|
|
[ 0.0566, 0.0884, 0.0561, ..., 0.0828, 0.0134, 0.0221], |
|
|
[ 0.0523, 0.0052, 0.0546, ..., 0.0551, 0.0651, 0.0509], |
|
|
..., |
|
|
[-0.1301, -0.1026, -0.1092, ..., 0.0596, 0.0749, 0.0162], |
|
|
[-0.0134, 0.0029, -0.0080, ..., -0.0267, -0.0355, -0.0760], |
|
|
[-0.0125, -0.0185, -0.0061, ..., -0.1114, -0.0908, -0.0794]], |
|
|
device='cuda:0'), grad: tensor([[-0.1074, -0.0163, -0.0219, ..., -0.0164, -0.0173, -0.0177], |
|
|
[-0.0030, -0.0003, -0.0006, ..., -0.0013, -0.0013, -0.0013], |
|
|
[ 0.0052, 0.0004, 0.0009, ..., 0.0014, 0.0014, 0.0014], |
|
|
..., |
|
|
[ 0.0270, 0.0045, 0.0053, ..., 0.0024, 0.0023, 0.0031], |
|
|
[ 0.0175, 0.0033, 0.0036, ..., 0.0030, 0.0029, 0.0029], |
|
|
[ 0.0384, 0.0056, 0.0080, ..., 0.0066, 0.0071, 0.0070]], |
|
|
device='cuda:0') |
|
|
Epoch 4, bias, value: tensor([-0.0258, -0.3557, 0.0860, -0.2310, -0.0076, 0.3735, 0.1763], |
|
|
device='cuda:0'), grad: tensor([-0.2710, -0.0077, 0.0140, 0.0579, 0.0644, 0.0446, 0.0978], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009954748808839675 |
|
|
changing lr |
|
|
---------------------saving model at epoch 3---------------------------------------------------- |
|
|
epoch 3, time 488.33, cls_loss 1.3546 cls_loss_mapping 1.7404 cls_loss_causal 1.8961 re_mapping 0.0588 re_causal 0.0585 |
|
|
Epoch 5, weight, value: tensor([[-0.0074, -0.0123, 0.0209, ..., -0.0032, 0.0541, 0.0888], |
|
|
[ 0.0611, 0.0895, 0.0580, ..., 0.0690, 0.0110, 0.0133], |
|
|
[ 0.0503, 0.0057, 0.0587, ..., 0.0498, 0.0610, 0.0490], |
|
|
..., |
|
|
[-0.1420, -0.1151, -0.1146, ..., 0.0570, 0.0713, 0.0154], |
|
|
[-0.0224, -0.0038, -0.0130, ..., -0.0028, -0.0191, -0.0638], |
|
|
[-0.0048, -0.0111, -0.0072, ..., -0.1203, -0.1105, -0.0966]], |
|
|
device='cuda:0'), grad: tensor([[ 1.4026e-01, 1.3710e-02, 3.0167e-02, ..., 3.7689e-02, |
|
|
3.3051e-02, 3.6926e-02], |
|
|
[ 8.0729e-04, 8.0884e-05, 1.7869e-04, ..., 1.8716e-04, |
|
|
1.3447e-04, 1.5807e-04], |
|
|
[ 2.3224e-02, 2.2202e-03, 5.0621e-03, ..., 5.9090e-03, |
|
|
5.0125e-03, 6.0768e-03], |
|
|
..., |
|
|
[-8.1116e-02, -8.6365e-03, -1.6846e-02, ..., -1.8463e-02, |
|
|
-1.6403e-02, -1.9104e-02], |
|
|
[ 4.2725e-03, 4.3392e-04, 9.1553e-04, ..., 1.0796e-03, |
|
|
7.8964e-04, 9.8324e-04], |
|
|
[-9.1919e-02, -8.2321e-03, -2.0401e-02, ..., -2.7817e-02, |
|
|
-2.3865e-02, -2.6306e-02]], device='cuda:0') |
|
|
Epoch 5, bias, value: tensor([-0.0100, -0.3700, 0.0890, -0.2639, 0.0169, 0.3545, 0.1994], |
|
|
device='cuda:0'), grad: tensor([ 0.4014, 0.0023, 0.0675, 0.0128, -0.2286, 0.0120, -0.2673], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009919647942993149 |
|
|
changing lr |
|
|
epoch 4, time 490.81, cls_loss 1.1636 cls_loss_mapping 1.6025 cls_loss_causal 1.8435 re_mapping 0.0581 re_causal 0.0578 |
|
|
Epoch 6, weight, value: tensor([[-0.0092, -0.0115, 0.0200, ..., -0.0068, 0.0601, 0.0934], |
|
|
[ 0.0657, 0.0900, 0.0604, ..., 0.0629, 0.0111, 0.0086], |
|
|
[ 0.0585, 0.0162, 0.0694, ..., 0.0436, 0.0533, 0.0450], |
|
|
..., |
|
|
[-0.1502, -0.1233, -0.1191, ..., 0.0550, 0.0668, 0.0162], |
|
|
[-0.0251, -0.0028, -0.0127, ..., 0.0180, -0.0031, -0.0522], |
|
|
[-0.0082, -0.0168, -0.0138, ..., -0.1336, -0.1261, -0.1108]], |
|
|
device='cuda:0'), grad: tensor([[ 0.0056, 0.0005, 0.0013, ..., 0.0016, 0.0015, 0.0011], |
|
|
[-0.0229, -0.0027, -0.0048, ..., -0.0075, -0.0068, -0.0052], |
|
|
[ 0.0082, 0.0008, 0.0021, ..., 0.0037, 0.0028, 0.0022], |
|
|
..., |
|
|
[-0.0790, -0.0061, -0.0199, ..., -0.0227, -0.0231, -0.0172], |
|
|
[ 0.0002, 0.0001, 0.0004, ..., -0.0002, 0.0006, 0.0005], |
|
|
[ 0.0731, 0.0060, 0.0168, ..., 0.0200, 0.0208, 0.0149]], |
|
|
device='cuda:0') |
|
|
Epoch 6, bias, value: tensor([-0.0260, -0.3347, 0.0891, -0.2536, 0.0157, 0.3505, 0.1747], |
|
|
device='cuda:0'), grad: tensor([ 0.0153, -0.0533, 0.0237, 0.0438, -0.2416, 0.0005, 0.2114], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009874639560909117 |
|
|
changing lr |
|
|
epoch 5, time 491.16, cls_loss 0.9659 cls_loss_mapping 1.4805 cls_loss_causal 1.8159 re_mapping 0.0580 re_causal 0.0577 |
|
|
Epoch 7, weight, value: tensor([[-0.0087, -0.0102, 0.0192, ..., -0.0007, 0.0825, 0.1130], |
|
|
[ 0.0642, 0.0912, 0.0624, ..., 0.0528, 0.0038, -0.0031], |
|
|
[ 0.0583, 0.0211, 0.0742, ..., 0.0440, 0.0475, 0.0432], |
|
|
..., |
|
|
[-0.1536, -0.1256, -0.1215, ..., 0.0537, 0.0628, 0.0130], |
|
|
[-0.0222, -0.0026, -0.0132, ..., 0.0380, 0.0139, -0.0401], |
|
|
[-0.0087, -0.0225, -0.0157, ..., -0.1544, -0.1509, -0.1300]], |
|
|
device='cuda:0'), grad: tensor([[ 2.9419e-02, 3.1052e-03, 8.2626e-03, ..., 1.6357e-02, |
|
|
1.6663e-02, 1.5762e-02], |
|
|
[ 1.7862e-03, 1.0294e-04, 4.2748e-04, ..., 4.0174e-04, |
|
|
4.5538e-04, 3.9983e-04], |
|
|
[ 6.8115e-02, 1.1337e-02, 2.0950e-02, ..., 1.4877e-02, |
|
|
1.6281e-02, 1.6724e-02], |
|
|
..., |
|
|
[-7.9285e-02, -1.3260e-02, -2.5253e-02, ..., -2.6566e-02, |
|
|
-2.7786e-02, -2.7908e-02], |
|
|
[-8.7678e-05, 6.4820e-07, -4.7654e-05, ..., -4.4513e-04, |
|
|
-1.5175e-04, -1.6284e-04], |
|
|
[-4.0710e-02, -2.8210e-03, -9.1019e-03, ..., -1.0178e-02, |
|
|
-1.1513e-02, -1.0269e-02]], device='cuda:0') |
|
|
Epoch 7, bias, value: tensor([-0.0170, -0.3173, 0.0795, -0.2454, 0.0139, 0.3444, 0.1572], |
|
|
device='cuda:0'), grad: tensor([ 0.0901, 0.0056, 0.1676, 0.0627, -0.2017, -0.0006, -0.1240], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009819814303479266 |
|
|
changing lr |
|
|
epoch 6, time 485.87, cls_loss 0.8556 cls_loss_mapping 1.3801 cls_loss_causal 1.7626 re_mapping 0.0591 re_causal 0.0588 |
|
|
Epoch 8, weight, value: tensor([[-0.0206, -0.0165, 0.0098, ..., -0.0012, 0.0856, 0.1165], |
|
|
[ 0.0669, 0.0905, 0.0646, ..., 0.0561, 0.0145, 0.0017], |
|
|
[ 0.0613, 0.0221, 0.0739, ..., 0.0347, 0.0348, 0.0349], |
|
|
..., |
|
|
[-0.1449, -0.1215, -0.1127, ..., 0.0389, 0.0514, 0.0033], |
|
|
[-0.0210, -0.0022, -0.0147, ..., 0.0576, 0.0264, -0.0269], |
|
|
[-0.0144, -0.0258, -0.0177, ..., -0.1550, -0.1571, -0.1350]], |
|
|
device='cuda:0'), grad: tensor([[-1.8387e-02, 2.7108e-04, -4.0817e-03, ..., 1.6880e-03, |
|
|
-1.1263e-03, 6.9761e-04], |
|
|
[ 2.3499e-02, 4.3144e-03, 4.4899e-03, ..., 8.8425e-03, |
|
|
8.3237e-03, 5.8784e-03], |
|
|
[ 1.4435e-02, 1.4095e-03, 2.9545e-03, ..., 3.0537e-03, |
|
|
3.5877e-03, 2.1877e-03], |
|
|
..., |
|
|
[-4.3671e-02, -9.4910e-03, -8.1940e-03, ..., -1.9760e-02, |
|
|
-1.8127e-02, -1.3351e-02], |
|
|
[-3.3302e-03, -8.7172e-06, -5.5170e-04, ..., -8.2636e-04, |
|
|
-2.4056e-04, -1.7405e-04], |
|
|
[ 1.0490e-02, 1.2846e-03, 1.9875e-03, ..., 2.7065e-03, |
|
|
2.8782e-03, 1.7948e-03]], device='cuda:0') |
|
|
Epoch 8, bias, value: tensor([-0.0110, -0.3280, 0.0902, -0.2517, 0.0217, 0.3492, 0.1446], |
|
|
device='cuda:0'), grad: tensor([-0.0444, 0.0637, 0.0409, 0.0447, -0.1157, -0.0180, 0.0287], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009755282581475767 |
|
|
changing lr |
|
|
epoch 7, time 487.93, cls_loss 0.7146 cls_loss_mapping 1.2877 cls_loss_causal 1.6994 re_mapping 0.0594 re_causal 0.0592 |
|
|
Epoch 9, weight, value: tensor([[-0.0208, -0.0210, 0.0043, ..., -0.0036, 0.0879, 0.1206], |
|
|
[ 0.0655, 0.0872, 0.0628, ..., 0.0514, 0.0141, -0.0047], |
|
|
[ 0.0660, 0.0297, 0.0799, ..., 0.0356, 0.0282, 0.0334], |
|
|
..., |
|
|
[-0.1432, -0.1176, -0.1069, ..., 0.0358, 0.0539, 0.0041], |
|
|
[-0.0209, -0.0021, -0.0152, ..., 0.0649, 0.0277, -0.0243], |
|
|
[-0.0217, -0.0287, -0.0192, ..., -0.1577, -0.1638, -0.1406]], |
|
|
device='cuda:0'), grad: tensor([[-7.3004e-04, -3.0255e-04, -2.6321e-04, ..., 3.6263e-04, |
|
|
1.9729e-04, 2.9969e-04], |
|
|
[ 4.7493e-04, 2.4632e-05, 8.4996e-05, ..., 2.2411e-04, |
|
|
2.1279e-04, 2.3365e-04], |
|
|
[-1.7941e-05, -1.2837e-05, -1.4558e-05, ..., 2.2948e-06, |
|
|
-4.7162e-06, -4.2170e-06], |
|
|
..., |
|
|
[-1.8625e-03, -1.7643e-05, -2.9802e-04, ..., -1.2522e-03, |
|
|
-1.2255e-03, -1.3466e-03], |
|
|
[ 4.2328e-02, 1.7605e-03, 7.6981e-03, ..., 1.1963e-02, |
|
|
7.0267e-03, 8.6975e-03], |
|
|
[-4.2358e-02, -1.7443e-03, -7.6790e-03, ..., -1.1917e-02, |
|
|
-6.9313e-03, -8.6136e-03]], device='cuda:0') |
|
|
Epoch 9, bias, value: tensor([-0.0195, -0.3211, 0.0706, -0.2405, 0.0008, 0.3643, 0.1601], |
|
|
device='cuda:0'), grad: tensor([-1.0853e-03, 1.5802e-03, -1.3411e-05, 6.2561e-03, -6.5804e-03, |
|
|
1.4355e-01, -1.4368e-01], device='cuda:0') |
|
|
351 |
|
|
0.009681174353198686 |
|
|
changing lr |
|
|
epoch 8, time 487.83, cls_loss 0.6065 cls_loss_mapping 1.1835 cls_loss_causal 1.6608 re_mapping 0.0617 re_causal 0.0615 |
|
|
Epoch 10, weight, value: tensor([[-2.4050e-02, -1.7761e-02, 3.4237e-03, ..., -1.7390e-03, |
|
|
8.9277e-02, 1.2650e-01], |
|
|
[ 6.3247e-02, 8.4007e-02, 6.1289e-02, ..., 5.2398e-02, |
|
|
1.3658e-02, -8.1415e-03], |
|
|
[ 6.9869e-02, 3.1879e-02, 8.0394e-02, ..., 2.8270e-02, |
|
|
1.8491e-02, 2.4165e-02], |
|
|
..., |
|
|
[-1.4282e-01, -1.2039e-01, -1.0256e-01, ..., 2.5484e-02, |
|
|
5.2426e-02, 1.4157e-04], |
|
|
[-2.1257e-02, -1.3096e-03, -1.7762e-02, ..., 7.3701e-02, |
|
|
2.8826e-02, -2.2097e-02], |
|
|
[-2.1198e-02, -2.8250e-02, -1.8889e-02, ..., -1.6167e-01, |
|
|
-1.6683e-01, -1.4144e-01]], device='cuda:0'), grad: tensor([[ 1.7500e-03, 3.7360e-04, 5.3549e-04, ..., 3.4451e-04, |
|
|
4.9019e-04, 4.7493e-04], |
|
|
[ 2.2078e-04, 5.2124e-05, 7.2539e-05, ..., 4.3780e-05, |
|
|
6.1393e-05, 5.5730e-05], |
|
|
[-4.3671e-02, -8.8120e-03, -1.5808e-02, ..., -1.7059e-02, |
|
|
-1.6403e-02, -1.5350e-02], |
|
|
..., |
|
|
[ 3.6896e-02, 7.3662e-03, 1.3657e-02, ..., 1.5488e-02, |
|
|
1.4389e-02, 1.3420e-02], |
|
|
[-5.0688e-04, 7.3314e-06, -6.0052e-05, ..., -1.6499e-04, |
|
|
-7.8738e-05, -2.9728e-05], |
|
|
[ 1.5955e-03, 2.3997e-04, 4.1461e-04, ..., 3.8505e-04, |
|
|
3.8671e-04, 3.2830e-04]], device='cuda:0') |
|
|
Epoch 10, bias, value: tensor([-0.0185, -0.3178, 0.0688, -0.2319, 0.0088, 0.3629, 0.1423], |
|
|
device='cuda:0'), grad: tensor([ 0.0046, 0.0006, -0.1209, 0.0099, 0.1028, -0.0015, 0.0044], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009597638862757255 |
|
|
changing lr |
|
|
---------------------saving model at epoch 9---------------------------------------------------- |
|
|
epoch 9, time 496.62, cls_loss 0.5204 cls_loss_mapping 1.0647 cls_loss_causal 1.5735 re_mapping 0.0619 re_causal 0.0617 |
|
|
Epoch 11, weight, value: tensor([[-0.0143, -0.0127, 0.0037, ..., 0.0025, 0.0978, 0.1330], |
|
|
[ 0.0605, 0.0804, 0.0595, ..., 0.0587, 0.0218, -0.0022], |
|
|
[ 0.0698, 0.0287, 0.0795, ..., 0.0239, 0.0185, 0.0256], |
|
|
..., |
|
|
[-0.1494, -0.1267, -0.1035, ..., 0.0184, 0.0450, -0.0050], |
|
|
[-0.0252, 0.0017, -0.0171, ..., 0.0766, 0.0278, -0.0212], |
|
|
[-0.0184, -0.0282, -0.0203, ..., -0.1655, -0.1755, -0.1504]], |
|
|
device='cuda:0'), grad: tensor([[ 1.2459e-02, 1.3876e-03, 2.9373e-03, ..., 2.5539e-03, |
|
|
2.7657e-03, 3.2463e-03], |
|
|
[ 1.9388e-03, 1.8775e-04, 4.2844e-04, ..., 4.3082e-04, |
|
|
4.6372e-04, 5.4216e-04], |
|
|
[ 5.2612e-02, 1.4328e-02, 2.0798e-02, ..., 4.3564e-03, |
|
|
5.9853e-03, 6.3286e-03], |
|
|
..., |
|
|
[ 2.6886e-02, 2.6340e-03, 6.0768e-03, ..., 5.0621e-03, |
|
|
5.5580e-03, 6.7787e-03], |
|
|
[ 6.6996e-05, 9.0525e-06, 1.7419e-05, ..., 3.0063e-06, |
|
|
5.5954e-06, 9.2536e-06], |
|
|
[-9.5337e-02, -1.8707e-02, -3.0579e-02, ..., -1.2718e-02, |
|
|
-1.5114e-02, -1.7288e-02]], device='cuda:0') |
|
|
Epoch 11, bias, value: tensor([ 0.0015, -0.3192, 0.0555, -0.2433, 0.0173, 0.3439, 0.1586], |
|
|
device='cuda:0'), grad: tensor([ 4.2358e-02, 6.6528e-03, 1.5479e-01, 4.8218e-03, 9.2896e-02, |
|
|
2.2638e-04, -3.0176e-01], device='cuda:0') |
|
|
351 |
|
|
0.009504844339512096 |
|
|
changing lr |
|
|
epoch 10, time 488.71, cls_loss 0.4416 cls_loss_mapping 0.9614 cls_loss_causal 1.5411 re_mapping 0.0639 re_causal 0.0637 |
|
|
Epoch 12, weight, value: tensor([[-0.0199, -0.0105, 0.0022, ..., 0.0044, 0.1020, 0.1346], |
|
|
[ 0.0602, 0.0779, 0.0589, ..., 0.0587, 0.0236, -0.0019], |
|
|
[ 0.0649, 0.0265, 0.0755, ..., 0.0258, 0.0215, 0.0291], |
|
|
..., |
|
|
[-0.1561, -0.1310, -0.1052, ..., 0.0121, 0.0383, -0.0080], |
|
|
[-0.0179, 0.0020, -0.0169, ..., 0.0814, 0.0301, -0.0190], |
|
|
[-0.0118, -0.0252, -0.0170, ..., -0.1677, -0.1806, -0.1532]], |
|
|
device='cuda:0'), grad: tensor([[ 6.2752e-04, 8.8215e-05, 1.4699e-04, ..., 1.3494e-04, |
|
|
8.3327e-05, 4.4316e-05], |
|
|
[ 3.3450e-04, 4.9382e-05, 9.3460e-05, ..., 3.2276e-05, |
|
|
-1.6272e-05, -3.8147e-05], |
|
|
[-1.0910e-02, -1.5612e-03, -2.5730e-03, ..., -2.1000e-03, |
|
|
-1.0977e-03, -4.0007e-04], |
|
|
..., |
|
|
[ 5.4455e-04, 8.6308e-05, 1.0788e-04, ..., 6.6996e-05, |
|
|
9.3579e-06, -3.2425e-05], |
|
|
[ 3.1796e-03, 4.5466e-04, 7.5006e-04, ..., 6.1369e-04, |
|
|
3.2234e-04, 1.1897e-04], |
|
|
[ 3.2673e-03, 4.6039e-04, 7.7772e-04, ..., 6.8188e-04, |
|
|
4.0054e-04, 1.9753e-04]], device='cuda:0') |
|
|
Epoch 12, bias, value: tensor([-0.0175, -0.3137, 0.0387, -0.2354, -0.0062, 0.3748, 0.1735], |
|
|
device='cuda:0'), grad: tensor([ 0.0018, 0.0009, -0.0317, 0.0086, 0.0015, 0.0092, 0.0096], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009402977659283692 |
|
|
changing lr |
|
|
---------------------saving model at epoch 11---------------------------------------------------- |
|
|
epoch 11, time 494.18, cls_loss 0.3891 cls_loss_mapping 0.8157 cls_loss_causal 1.4446 re_mapping 0.0666 re_causal 0.0663 |
|
|
Epoch 13, weight, value: tensor([[-0.0114, -0.0099, 0.0014, ..., 0.0046, 0.1073, 0.1377], |
|
|
[ 0.0535, 0.0753, 0.0569, ..., 0.0620, 0.0234, -0.0039], |
|
|
[ 0.0735, 0.0337, 0.0822, ..., 0.0240, 0.0212, 0.0305], |
|
|
..., |
|
|
[-0.1620, -0.1316, -0.1037, ..., 0.0099, 0.0368, -0.0077], |
|
|
[-0.0194, 0.0019, -0.0192, ..., 0.0848, 0.0300, -0.0185], |
|
|
[-0.0162, -0.0297, -0.0210, ..., -0.1704, -0.1856, -0.1569]], |
|
|
device='cuda:0'), grad: tensor([[ 4.6611e-05, 1.1183e-05, 1.9923e-05, ..., -2.2575e-05, |
|
|
-2.5004e-05, -2.1055e-05], |
|
|
[ 1.0766e-05, 3.0249e-06, 4.0717e-06, ..., 2.8312e-06, |
|
|
2.3656e-06, 2.8126e-06], |
|
|
[-1.6193e-03, -5.1022e-04, -6.5613e-04, ..., -5.1498e-04, |
|
|
-3.5930e-04, -4.0555e-04], |
|
|
..., |
|
|
[ 1.3876e-04, 3.0369e-05, 4.1813e-05, ..., 6.6936e-05, |
|
|
7.4625e-05, 8.1360e-05], |
|
|
[ 2.7239e-05, 7.3612e-06, 1.0043e-05, ..., 7.8529e-06, |
|
|
6.9402e-06, 8.0913e-06], |
|
|
[ 6.2734e-06, 2.1681e-05, 1.8269e-05, ..., 1.8656e-05, |
|
|
-9.0450e-06, -1.5110e-05]], device='cuda:0') |
|
|
Epoch 13, bias, value: tensor([-9.4110e-05, -3.2179e-01, 3.0043e-02, -2.2424e-01, -2.7306e-02, |
|
|
3.7682e-01, 1.8037e-01], device='cuda:0'), grad: tensor([ 1.3185e-04, 2.7537e-05, -3.9215e-03, 3.3684e-03, 3.8481e-04, |
|
|
7.0870e-05, -6.4135e-05], device='cuda:0') |
|
|
351 |
|
|
0.009292243968009333 |
|
|
changing lr |
|
|
epoch 12, time 487.57, cls_loss 0.3192 cls_loss_mapping 0.7209 cls_loss_causal 1.3775 re_mapping 0.0656 re_causal 0.0653 |
|
|
Epoch 14, weight, value: tensor([[-0.0128, -0.0094, 0.0006, ..., 0.0067, 0.1094, 0.1410], |
|
|
[ 0.0535, 0.0740, 0.0570, ..., 0.0602, 0.0213, -0.0071], |
|
|
[ 0.0651, 0.0308, 0.0764, ..., 0.0198, 0.0181, 0.0274], |
|
|
..., |
|
|
[-0.1485, -0.1289, -0.0992, ..., 0.0107, 0.0365, -0.0067], |
|
|
[-0.0197, 0.0018, -0.0197, ..., 0.0855, 0.0319, -0.0186], |
|
|
[-0.0218, -0.0307, -0.0218, ..., -0.1687, -0.1871, -0.1557]], |
|
|
device='cuda:0'), grad: tensor([[ 3.2940e-03, 4.0150e-04, 5.7077e-04, ..., 5.7507e-04, |
|
|
1.0185e-03, 9.1982e-04], |
|
|
[ 1.8632e-04, 2.0161e-05, 5.4568e-05, ..., 1.0991e-04, |
|
|
1.2201e-04, 1.3220e-04], |
|
|
[ 1.4639e-04, 1.1936e-05, 3.6836e-05, ..., 5.2154e-05, |
|
|
4.9412e-05, 5.9545e-05], |
|
|
..., |
|
|
[ 3.3045e-04, 9.2089e-06, 7.9036e-05, ..., 4.6194e-05, |
|
|
-1.2025e-05, 2.9653e-05], |
|
|
[ 4.0016e-03, 2.8110e-04, 1.0900e-03, ..., 1.6613e-03, |
|
|
1.4162e-03, 1.8120e-03], |
|
|
[-8.0948e-03, -7.3528e-04, -1.8692e-03, ..., -2.5005e-03, |
|
|
-2.6455e-03, -3.0155e-03]], device='cuda:0') |
|
|
Epoch 14, bias, value: tensor([-0.0019, -0.3214, 0.0035, -0.2192, 0.0107, 0.3626, 0.1791], |
|
|
device='cuda:0'), grad: tensor([ 0.0094, 0.0007, 0.0006, 0.0006, 0.0016, 0.0171, -0.0299], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.009172866268606516 |
|
|
changing lr |
|
|
epoch 13, time 489.06, cls_loss 0.2306 cls_loss_mapping 0.6268 cls_loss_causal 1.2624 re_mapping 0.0687 re_causal 0.0684 |
|
|
Epoch 15, weight, value: tensor([[-0.0146, -0.0088, -0.0035, ..., 0.0025, 0.1056, 0.1395], |
|
|
[ 0.0602, 0.0743, 0.0604, ..., 0.0627, 0.0234, -0.0063], |
|
|
[ 0.0683, 0.0323, 0.0766, ..., 0.0169, 0.0138, 0.0250], |
|
|
..., |
|
|
[-0.1461, -0.1274, -0.0961, ..., 0.0092, 0.0416, -0.0031], |
|
|
[-0.0188, 0.0011, -0.0206, ..., 0.0915, 0.0352, -0.0144], |
|
|
[-0.0272, -0.0327, -0.0228, ..., -0.1682, -0.1902, -0.1596]], |
|
|
device='cuda:0'), grad: tensor([[ 0.0088, 0.0010, 0.0025, ..., 0.0017, 0.0029, 0.0036], |
|
|
[ 0.0007, 0.0002, 0.0003, ..., 0.0004, 0.0003, 0.0004], |
|
|
[-0.0638, -0.0165, -0.0286, ..., -0.0233, -0.0222, -0.0276], |
|
|
..., |
|
|
[ 0.0265, 0.0029, 0.0075, ..., 0.0056, 0.0092, 0.0112], |
|
|
[ 0.0006, 0.0003, 0.0004, ..., 0.0004, 0.0002, 0.0003], |
|
|
[ 0.0212, 0.0095, 0.0139, ..., 0.0119, 0.0074, 0.0095]], |
|
|
device='cuda:0') |
|
|
Epoch 15, bias, value: tensor([-0.0048, -0.3123, 0.0152, -0.2279, 0.0035, 0.3635, 0.1762], |
|
|
device='cuda:0'), grad: tensor([ 0.0277, 0.0026, -0.2307, 0.0250, 0.0837, 0.0027, 0.0891], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.00904508497187474 |
|
|
changing lr |
|
|
epoch 14, time 487.91, cls_loss 0.2457 cls_loss_mapping 0.5984 cls_loss_causal 1.2677 re_mapping 0.0668 re_causal 0.0666 |
|
|
Epoch 16, weight, value: tensor([[-0.0190, -0.0126, -0.0079, ..., -0.0027, 0.1032, 0.1370], |
|
|
[ 0.0610, 0.0744, 0.0602, ..., 0.0630, 0.0232, -0.0067], |
|
|
[ 0.0704, 0.0346, 0.0775, ..., 0.0177, 0.0127, 0.0243], |
|
|
..., |
|
|
[-0.1485, -0.1284, -0.0956, ..., 0.0113, 0.0454, 0.0003], |
|
|
[-0.0218, 0.0008, -0.0216, ..., 0.0921, 0.0343, -0.0144], |
|
|
[-0.0231, -0.0305, -0.0202, ..., -0.1639, -0.1877, -0.1558]], |
|
|
device='cuda:0'), grad: tensor([[ 3.6216e-04, 6.3956e-05, 7.0810e-05, ..., 1.5664e-04, |
|
|
1.2243e-04, 1.6391e-04], |
|
|
[ 3.8025e-02, 7.5226e-03, 1.1467e-02, ..., 1.0361e-02, |
|
|
1.5282e-02, 1.5373e-02], |
|
|
[-3.8666e-02, -7.6523e-03, -1.1673e-02, ..., -1.0529e-02, |
|
|
-1.5549e-02, -1.5640e-02], |
|
|
..., |
|
|
[ 1.2600e-04, 4.2140e-05, 8.7678e-05, ..., -1.9401e-05, |
|
|
7.9334e-05, 3.8594e-05], |
|
|
[ 1.6558e-04, 3.2425e-05, 4.8608e-05, ..., 4.7058e-05, |
|
|
6.5863e-05, 6.7592e-05], |
|
|
[ 6.8665e-05, 1.3933e-05, 2.0951e-05, ..., 1.8701e-05, |
|
|
2.7299e-05, 2.7537e-05]], device='cuda:0') |
|
|
Epoch 16, bias, value: tensor([-0.0068, -0.2992, 0.0154, -0.2199, -0.0004, 0.3446, 0.1795], |
|
|
device='cuda:0'), grad: tensor([ 1.2054e-03, 1.0278e-01, -1.0455e-01, -1.5152e-04, 6.4135e-05, |
|
|
4.5562e-04, 1.8466e-04], device='cuda:0') |
|
|
351 |
|
|
0.008909157412340152 |
|
|
changing lr |
|
|
epoch 15, time 487.09, cls_loss 0.1644 cls_loss_mapping 0.5142 cls_loss_causal 1.1892 re_mapping 0.0672 re_causal 0.0669 |
|
|
Epoch 17, weight, value: tensor([[-0.0198, -0.0133, -0.0073, ..., -0.0034, 0.1047, 0.1385], |
|
|
[ 0.0540, 0.0720, 0.0575, ..., 0.0566, 0.0184, -0.0133], |
|
|
[ 0.0707, 0.0354, 0.0767, ..., 0.0200, 0.0152, 0.0280], |
|
|
..., |
|
|
[-0.1433, -0.1255, -0.0916, ..., 0.0082, 0.0417, -0.0034], |
|
|
[-0.0192, 0.0005, -0.0220, ..., 0.0945, 0.0362, -0.0125], |
|
|
[-0.0222, -0.0315, -0.0223, ..., -0.1588, -0.1861, -0.1524]], |
|
|
device='cuda:0'), grad: tensor([[ 5.0049e-03, 6.1369e-04, 1.1215e-03, ..., 2.3670e-03, |
|
|
2.7428e-03, 3.0746e-03], |
|
|
[-5.3482e-03, -6.4516e-04, -1.1797e-03, ..., -2.5311e-03, |
|
|
-2.9335e-03, -3.2787e-03], |
|
|
[ 1.3359e-05, 2.4792e-06, 3.2727e-06, ..., 4.5523e-06, |
|
|
4.6417e-06, 5.2229e-06], |
|
|
..., |
|
|
[ 4.4912e-05, 8.4862e-06, 1.0416e-05, ..., 1.3746e-05, |
|
|
1.2174e-05, 1.3910e-05], |
|
|
[ 2.6345e-04, 5.5492e-05, 6.3419e-05, ..., 6.0797e-05, |
|
|
4.9591e-05, 5.6654e-05], |
|
|
[ 1.0535e-05, -3.6955e-05, -2.1502e-05, ..., 8.2850e-05, |
|
|
1.2177e-04, 1.2362e-04]], device='cuda:0') |
|
|
Epoch 17, bias, value: tensor([-0.0110, -0.3091, 0.0176, -0.2245, 0.0073, 0.3497, 0.1830], |
|
|
device='cuda:0'), grad: tensor([ 1.5442e-02, -1.6510e-02, 3.9279e-05, 2.3291e-05, 1.3268e-04, |
|
|
7.6675e-04, 9.7513e-05], device='cuda:0') |
|
|
351 |
|
|
0.00876535733001806 |
|
|
changing lr |
|
|
epoch 16, time 486.79, cls_loss 0.1451 cls_loss_mapping 0.4625 cls_loss_causal 1.1703 re_mapping 0.0665 re_causal 0.0663 |
|
|
Epoch 18, weight, value: tensor([[-0.0251, -0.0154, -0.0094, ..., -0.0082, 0.1041, 0.1376], |
|
|
[ 0.0554, 0.0698, 0.0561, ..., 0.0582, 0.0230, -0.0094], |
|
|
[ 0.0726, 0.0386, 0.0782, ..., 0.0188, 0.0155, 0.0284], |
|
|
..., |
|
|
[-0.1426, -0.1242, -0.0895, ..., 0.0081, 0.0359, -0.0078], |
|
|
[-0.0184, 0.0003, -0.0225, ..., 0.0962, 0.0364, -0.0117], |
|
|
[-0.0208, -0.0312, -0.0229, ..., -0.1566, -0.1876, -0.1535]], |
|
|
device='cuda:0'), grad: tensor([[-3.5076e-03, -3.7932e-04, -9.4175e-04, ..., -8.6021e-04, |
|
|
-2.0180e-03, -2.1801e-03], |
|
|
[ 3.5357e-04, 4.5151e-05, 9.7215e-05, ..., 8.0526e-05, |
|
|
1.9038e-04, 2.0647e-04], |
|
|
[-2.5787e-03, -8.8978e-04, -1.0452e-03, ..., -4.4012e-04, |
|
|
-3.0732e-04, -2.9087e-04], |
|
|
..., |
|
|
[ 3.7956e-04, 7.5400e-05, 1.2279e-04, ..., 8.1301e-05, |
|
|
1.5175e-04, 1.6046e-04], |
|
|
[ 4.0770e-04, 1.2398e-04, 1.5557e-04, ..., 7.5758e-05, |
|
|
7.9870e-05, 8.0645e-05], |
|
|
[ 3.2368e-03, 4.5490e-04, 9.3269e-04, ..., 7.6056e-04, |
|
|
1.6651e-03, 1.7910e-03]], device='cuda:0') |
|
|
Epoch 18, bias, value: tensor([-0.0241, -0.2958, 0.0095, -0.2232, 0.0036, 0.3491, 0.1936], |
|
|
device='cuda:0'), grad: tensor([-0.0110, 0.0011, -0.0058, 0.0039, 0.0011, 0.0010, 0.0097], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.008613974319136962 |
|
|
changing lr |
|
|
epoch 17, time 490.87, cls_loss 0.1359 cls_loss_mapping 0.4107 cls_loss_causal 1.1271 re_mapping 0.0690 re_causal 0.0689 |
|
|
Epoch 19, weight, value: tensor([[-0.0262, -0.0149, -0.0089, ..., -0.0061, 0.1036, 0.1387], |
|
|
[ 0.0530, 0.0664, 0.0550, ..., 0.0562, 0.0261, -0.0083], |
|
|
[ 0.0759, 0.0417, 0.0804, ..., 0.0192, 0.0141, 0.0275], |
|
|
..., |
|
|
[-0.1386, -0.1222, -0.0873, ..., 0.0064, 0.0339, -0.0098], |
|
|
[-0.0211, -0.0006, -0.0238, ..., 0.0952, 0.0354, -0.0127], |
|
|
[-0.0220, -0.0315, -0.0255, ..., -0.1548, -0.1872, -0.1521]], |
|
|
device='cuda:0'), grad: tensor([[ 2.1145e-05, 1.6410e-06, 2.7213e-06, ..., 1.2532e-05, |
|
|
9.9614e-06, 1.2040e-05], |
|
|
[ 4.3586e-06, 5.0291e-07, 8.1584e-07, ..., 2.3693e-06, |
|
|
1.9353e-06, 2.2948e-06], |
|
|
[-1.2871e-06, -4.6566e-07, -8.1956e-07, ..., 7.4506e-09, |
|
|
-4.6380e-07, -6.3702e-07], |
|
|
..., |
|
|
[-3.9369e-05, -3.1348e-06, -5.4277e-06, ..., -2.2262e-05, |
|
|
-1.7211e-05, -2.0772e-05], |
|
|
[ 7.0855e-06, 7.5996e-07, 1.2480e-06, ..., 3.7327e-06, |
|
|
2.9169e-06, 3.5390e-06], |
|
|
[ 8.8438e-06, 1.6782e-06, 2.3842e-06, ..., 4.1127e-06, |
|
|
3.3025e-06, 4.1015e-06]], device='cuda:0') |
|
|
Epoch 19, bias, value: tensor([-0.0223, -0.2936, 0.0166, -0.2197, 0.0082, 0.3351, 0.1881], |
|
|
device='cuda:0'), grad: tensor([ 6.7830e-05, 1.3463e-05, -3.2894e-06, 3.5204e-07, -1.2648e-04, |
|
|
2.2292e-05, 2.5839e-05], device='cuda:0') |
|
|
351 |
|
|
0.008455313244934327 |
|
|
changing lr |
|
|
epoch 18, time 489.60, cls_loss 0.0976 cls_loss_mapping 0.3568 cls_loss_causal 1.0910 re_mapping 0.0673 re_causal 0.0673 |
|
|
Epoch 20, weight, value: tensor([[-0.0217, -0.0144, -0.0075, ..., -0.0047, 0.1070, 0.1415], |
|
|
[ 0.0498, 0.0659, 0.0546, ..., 0.0559, 0.0258, -0.0093], |
|
|
[ 0.0745, 0.0411, 0.0786, ..., 0.0179, 0.0144, 0.0282], |
|
|
..., |
|
|
[-0.1379, -0.1209, -0.0862, ..., 0.0031, 0.0304, -0.0127], |
|
|
[-0.0152, 0.0002, -0.0226, ..., 0.0979, 0.0375, -0.0094], |
|
|
[-0.0275, -0.0336, -0.0282, ..., -0.1550, -0.1903, -0.1554]], |
|
|
device='cuda:0'), grad: tensor([[ 6.8140e-04, 9.6321e-05, 2.1148e-04, ..., 1.2398e-04, |
|
|
2.8491e-04, 2.6059e-04], |
|
|
[ 8.2016e-05, 1.1437e-05, 2.5108e-05, ..., 1.6108e-05, |
|
|
3.4273e-05, 3.1531e-05], |
|
|
[ 7.6714e-03, 1.0843e-03, 2.3823e-03, ..., 1.3924e-03, |
|
|
3.2101e-03, 2.9316e-03], |
|
|
..., |
|
|
[-8.6060e-03, -1.2188e-03, -2.6779e-03, ..., -1.5488e-03, |
|
|
-3.6030e-03, -3.2883e-03], |
|
|
[ 8.0839e-06, 2.9299e-06, 6.7875e-06, ..., -1.3940e-05, |
|
|
3.6377e-06, 3.3900e-07], |
|
|
[ 8.2791e-05, 1.1660e-05, 2.5585e-05, ..., 1.5527e-05, |
|
|
3.4660e-05, 3.1769e-05]], device='cuda:0') |
|
|
Epoch 20, bias, value: tensor([-0.0104, -0.3015, 0.0106, -0.2195, 0.0064, 0.3531, 0.1737], |
|
|
device='cuda:0'), grad: tensor([ 2.2831e-03, 2.7514e-04, 2.5711e-02, 2.7728e-04, -2.8854e-02, |
|
|
2.4706e-05, 2.7752e-04], device='cuda:0') |
|
|
351 |
|
|
0.008289693629698565 |
|
|
changing lr |
|
|
---------------------saving model at epoch 19---------------------------------------------------- |
|
|
epoch 19, time 497.06, cls_loss 0.1102 cls_loss_mapping 0.3219 cls_loss_causal 1.0679 re_mapping 0.0690 re_causal 0.0690 |
|
|
Epoch 21, weight, value: tensor([[-0.0224, -0.0154, -0.0073, ..., -0.0084, 0.1079, 0.1397], |
|
|
[ 0.0464, 0.0648, 0.0531, ..., 0.0535, 0.0237, -0.0116], |
|
|
[ 0.0759, 0.0387, 0.0755, ..., 0.0178, 0.0120, 0.0267], |
|
|
..., |
|
|
[-0.1408, -0.1212, -0.0868, ..., 0.0026, 0.0276, -0.0143], |
|
|
[-0.0176, -0.0003, -0.0230, ..., 0.0976, 0.0375, -0.0091], |
|
|
[-0.0218, -0.0310, -0.0260, ..., -0.1495, -0.1864, -0.1501]], |
|
|
device='cuda:0'), grad: tensor([[ 4.0054e-05, 2.4103e-06, 7.0930e-06, ..., 1.6317e-05, |
|
|
9.4175e-06, 1.6287e-05], |
|
|
[ 2.0787e-06, 9.4995e-08, 3.2783e-07, ..., 8.7358e-07, |
|
|
4.6566e-07, 8.5682e-07], |
|
|
[ 8.4378e-07, 3.7253e-08, 1.4342e-07, ..., 3.5577e-07, |
|
|
2.1048e-07, 3.4459e-07], |
|
|
..., |
|
|
[-1.4752e-06, -2.1793e-07, -4.3400e-07, ..., -1.2107e-06, |
|
|
-1.3504e-06, -1.4361e-06], |
|
|
[ 1.5274e-06, 6.7055e-08, 2.4587e-07, ..., 6.0722e-07, |
|
|
3.1851e-07, 6.0722e-07], |
|
|
[-4.6432e-05, -3.0883e-06, -8.5682e-06, ..., -1.7300e-05, |
|
|
-9.3505e-06, -1.7017e-05]], device='cuda:0') |
|
|
Epoch 21, bias, value: tensor([-0.0157, -0.3083, 0.0252, -0.2177, -0.0058, 0.3422, 0.1923], |
|
|
device='cuda:0'), grad: tensor([ 1.3244e-04, 6.9551e-06, 2.8443e-06, 9.5814e-06, -4.6417e-06, |
|
|
5.1148e-06, -1.5259e-04], device='cuda:0') |
|
|
351 |
|
|
0.00811744900929367 |
|
|
changing lr |
|
|
---------------------saving model at epoch 20---------------------------------------------------- |
|
|
epoch 20, time 492.87, cls_loss 0.0760 cls_loss_mapping 0.2840 cls_loss_causal 1.0343 re_mapping 0.0687 re_causal 0.0688 |
|
|
Epoch 22, weight, value: tensor([[-2.5705e-02, -1.5634e-02, -7.1052e-03, ..., -1.0747e-02, |
|
|
1.0806e-01, 1.3788e-01], |
|
|
[ 5.1844e-02, 6.5237e-02, 5.4339e-02, ..., 5.6280e-02, |
|
|
2.6188e-02, -9.1537e-03], |
|
|
[ 6.8620e-02, 3.6508e-02, 7.1406e-02, ..., 1.6478e-02, |
|
|
9.5231e-03, 2.5444e-02], |
|
|
..., |
|
|
[-1.3768e-01, -1.1991e-01, -8.5010e-02, ..., 2.3305e-03, |
|
|
2.6403e-02, -1.5035e-02], |
|
|
[-1.5751e-02, -7.5948e-05, -2.3335e-02, ..., 9.7384e-02, |
|
|
3.7156e-02, -9.2214e-03], |
|
|
[-2.1356e-02, -3.0200e-02, -2.5406e-02, ..., -1.4869e-01, |
|
|
-1.8599e-01, -1.4877e-01]], device='cuda:0'), grad: tensor([[ 2.1867e-06, 2.5891e-07, 4.9360e-07, ..., 2.1048e-07, |
|
|
5.0291e-08, 2.0303e-07], |
|
|
[ 5.4315e-06, 6.2026e-07, 1.2163e-06, ..., 5.7369e-07, |
|
|
2.3656e-07, 6.0908e-07], |
|
|
[ 5.8375e-06, 2.1812e-06, 3.0119e-06, ..., 1.9372e-06, |
|
|
2.6189e-06, 2.4084e-06], |
|
|
..., |
|
|
[ 5.4896e-05, 2.3603e-05, 3.2067e-05, ..., 1.9833e-05, |
|
|
2.8804e-05, 2.5585e-05], |
|
|
[ 2.3600e-06, 1.9185e-07, 4.9174e-07, ..., -3.5390e-08, |
|
|
-1.6019e-07, 3.3528e-08], |
|
|
[-1.3016e-05, -1.3337e-06, -2.7455e-06, ..., -1.2256e-06, |
|
|
-3.1479e-07, -1.2722e-06]], device='cuda:0') |
|
|
Epoch 22, bias, value: tensor([-0.0246, -0.2882, 0.0090, -0.2145, -0.0027, 0.3421, 0.1908], |
|
|
device='cuda:0'), grad: tensor([ 7.1079e-06, 1.7703e-05, 1.3880e-05, -1.2386e-04, 1.2010e-04, |
|
|
7.9721e-06, -4.2945e-05], device='cuda:0') |
|
|
351 |
|
|
0.007938926261462368 |
|
|
changing lr |
|
|
---------------------saving model at epoch 21---------------------------------------------------- |
|
|
epoch 21, time 495.37, cls_loss 0.0555 cls_loss_mapping 0.2618 cls_loss_causal 1.0042 re_mapping 0.0691 re_causal 0.0694 |
|
|
Epoch 23, weight, value: tensor([[-0.0250, -0.0152, -0.0076, ..., -0.0115, 0.1060, 0.1355], |
|
|
[ 0.0525, 0.0647, 0.0542, ..., 0.0553, 0.0270, -0.0090], |
|
|
[ 0.0669, 0.0349, 0.0696, ..., 0.0153, 0.0090, 0.0249], |
|
|
..., |
|
|
[-0.1370, -0.1186, -0.0835, ..., 0.0030, 0.0262, -0.0145], |
|
|
[-0.0132, -0.0003, -0.0236, ..., 0.0985, 0.0381, -0.0077], |
|
|
[-0.0240, -0.0302, -0.0255, ..., -0.1479, -0.1849, -0.1475]], |
|
|
device='cuda:0'), grad: tensor([[-5.6386e-05, -6.1654e-06, -1.6123e-05, ..., -5.9873e-05, |
|
|
-7.1406e-05, -7.0572e-05], |
|
|
[ 2.3186e-04, 3.1978e-05, 7.4446e-05, ..., 1.0973e-04, |
|
|
9.6023e-05, 1.0985e-04], |
|
|
[-6.2808e-06, -3.4869e-06, -7.7672e-07, ..., 2.3004e-06, |
|
|
1.0543e-06, 1.2200e-06], |
|
|
..., |
|
|
[ 5.5641e-05, 1.2450e-05, 1.6242e-05, ..., 1.5393e-05, |
|
|
9.5591e-06, 1.1526e-05], |
|
|
[ 5.9242e-03, 7.5436e-04, 2.0351e-03, ..., 1.7452e-03, |
|
|
8.3017e-04, 1.3838e-03], |
|
|
[-6.1455e-03, -7.8821e-04, -2.1076e-03, ..., -1.8110e-03, |
|
|
-8.6451e-04, -1.4343e-03]], device='cuda:0') |
|
|
Epoch 23, bias, value: tensor([-0.0237, -0.2811, 0.0093, -0.2139, -0.0032, 0.3464, 0.1781], |
|
|
device='cuda:0'), grad: tensor([-1.8227e-04, 7.1716e-04, -3.7491e-05, -1.1344e-06, 1.5235e-04, |
|
|
1.8356e-02, -1.8997e-02], device='cuda:0') |
|
|
351 |
|
|
0.007754484907260515 |
|
|
changing lr |
|
|
epoch 22, time 489.20, cls_loss 0.0471 cls_loss_mapping 0.2469 cls_loss_causal 0.9975 re_mapping 0.0680 re_causal 0.0683 |
|
|
Epoch 24, weight, value: tensor([[-0.0221, -0.0143, -0.0068, ..., -0.0097, 0.1085, 0.1372], |
|
|
[ 0.0452, 0.0622, 0.0522, ..., 0.0519, 0.0224, -0.0141], |
|
|
[ 0.0691, 0.0355, 0.0695, ..., 0.0148, 0.0087, 0.0250], |
|
|
..., |
|
|
[-0.1330, -0.1165, -0.0813, ..., 0.0051, 0.0275, -0.0122], |
|
|
[-0.0186, -0.0018, -0.0254, ..., 0.0961, 0.0369, -0.0089], |
|
|
[-0.0209, -0.0295, -0.0248, ..., -0.1462, -0.1833, -0.1452]], |
|
|
device='cuda:0'), grad: tensor([[ 1.4174e-04, 1.3284e-05, 2.9057e-05, ..., 1.0335e-04, |
|
|
7.4446e-05, 7.3075e-05], |
|
|
[ 1.5712e-04, 1.6555e-05, 2.9877e-05, ..., 8.7261e-05, |
|
|
7.5936e-05, 7.2062e-05], |
|
|
[ 1.4429e-03, 1.5485e-04, 2.7990e-04, ..., 8.6403e-04, |
|
|
7.5531e-04, 7.2527e-04], |
|
|
..., |
|
|
[ 1.9875e-03, 2.1029e-04, 3.7670e-04, ..., 1.0900e-03, |
|
|
9.5558e-04, 9.0551e-04], |
|
|
[-4.4212e-03, -4.6921e-04, -8.4877e-04, ..., -2.5463e-03, |
|
|
-2.2163e-03, -2.1152e-03], |
|
|
[ 4.1676e-04, 4.4823e-05, 7.9811e-05, ..., 2.3985e-04, |
|
|
2.1195e-04, 2.0242e-04]], device='cuda:0') |
|
|
Epoch 24, bias, value: tensor([-0.0165, -0.2987, 0.0153, -0.2091, 0.0057, 0.3272, 0.1878], |
|
|
device='cuda:0'), grad: tensor([ 0.0006, 0.0006, 0.0057, 0.0011, 0.0080, -0.0178, 0.0017], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.007564496387029534 |
|
|
changing lr |
|
|
epoch 23, time 487.18, cls_loss 0.0468 cls_loss_mapping 0.2215 cls_loss_causal 0.9253 re_mapping 0.0658 re_causal 0.0661 |
|
|
Epoch 25, weight, value: tensor([[-0.0234, -0.0146, -0.0073, ..., -0.0096, 0.1084, 0.1368], |
|
|
[ 0.0481, 0.0616, 0.0525, ..., 0.0520, 0.0238, -0.0126], |
|
|
[ 0.0674, 0.0362, 0.0695, ..., 0.0141, 0.0085, 0.0248], |
|
|
..., |
|
|
[-0.1308, -0.1160, -0.0815, ..., 0.0056, 0.0258, -0.0134], |
|
|
[-0.0168, -0.0017, -0.0253, ..., 0.0961, 0.0370, -0.0083], |
|
|
[-0.0243, -0.0299, -0.0251, ..., -0.1461, -0.1824, -0.1447]], |
|
|
device='cuda:0'), grad: tensor([[ 6.6817e-05, 1.1541e-05, 1.8194e-05, ..., 4.5657e-05, |
|
|
4.5270e-05, 4.9025e-05], |
|
|
[ 6.1035e-05, 1.0975e-05, 1.7136e-05, ..., 4.0889e-05, |
|
|
4.0948e-05, 4.4376e-05], |
|
|
[-1.2368e-05, -4.3958e-06, -4.6641e-06, ..., -1.9595e-06, |
|
|
-1.4678e-06, -2.7642e-06], |
|
|
..., |
|
|
[-5.5218e-04, -9.5844e-05, -1.5271e-04, ..., -3.7932e-04, |
|
|
-3.8004e-04, -4.1032e-04], |
|
|
[ 3.8296e-05, 7.4282e-06, 1.0990e-05, ..., 2.3633e-05, |
|
|
2.3440e-05, 2.5749e-05], |
|
|
[ 4.2021e-06, 1.1772e-06, 1.3858e-06, ..., 1.8775e-06, |
|
|
1.8701e-06, 2.2650e-06]], device='cuda:0') |
|
|
Epoch 25, bias, value: tensor([-0.0207, -0.2837, 0.0064, -0.2068, 0.0094, 0.3279, 0.1792], |
|
|
device='cuda:0'), grad: tensor([ 1.8096e-04, 1.6356e-04, -3.0279e-05, 1.0567e-03, -1.4839e-03, |
|
|
1.0246e-04, 1.0207e-05], device='cuda:0') |
|
|
351 |
|
|
0.007369343312364995 |
|
|
changing lr |
|
|
epoch 24, time 489.43, cls_loss 0.0561 cls_loss_mapping 0.2156 cls_loss_causal 0.9517 re_mapping 0.0632 re_causal 0.0636 |
|
|
Epoch 26, weight, value: tensor([[-0.0244, -0.0149, -0.0076, ..., -0.0097, 0.1082, 0.1363], |
|
|
[ 0.0462, 0.0608, 0.0517, ..., 0.0509, 0.0229, -0.0135], |
|
|
[ 0.0703, 0.0368, 0.0696, ..., 0.0148, 0.0095, 0.0267], |
|
|
..., |
|
|
[-0.1327, -0.1161, -0.0814, ..., 0.0048, 0.0241, -0.0150], |
|
|
[-0.0165, -0.0017, -0.0252, ..., 0.0958, 0.0370, -0.0079], |
|
|
[-0.0249, -0.0295, -0.0252, ..., -0.1451, -0.1814, -0.1440]], |
|
|
device='cuda:0'), grad: tensor([[ 5.2631e-05, 7.4133e-06, 9.4920e-06, ..., 2.3797e-05, |
|
|
2.7403e-05, 2.1726e-05], |
|
|
[ 1.2308e-05, 2.1905e-06, 8.9407e-08, ..., 5.4166e-06, |
|
|
1.6391e-06, -1.0058e-06], |
|
|
[ 6.5804e-04, 8.4043e-05, 1.1456e-04, ..., 1.8108e-04, |
|
|
1.8704e-04, 1.4186e-04], |
|
|
..., |
|
|
[ 1.3752e-03, 2.1374e-04, 1.9753e-04, ..., 6.8903e-04, |
|
|
6.8665e-04, 4.8661e-04], |
|
|
[-1.8311e-03, -2.8539e-04, -2.6178e-04, ..., -9.2459e-04, |
|
|
-9.2125e-04, -6.5231e-04], |
|
|
[-4.1652e-04, -4.5180e-05, -8.1360e-05, ..., -4.9084e-05, |
|
|
-5.5760e-05, -4.9591e-05]], device='cuda:0') |
|
|
Epoch 26, bias, value: tensor([-0.0235, -0.2851, 0.0169, -0.1963, 0.0014, 0.3240, 0.1741], |
|
|
device='cuda:0'), grad: tensor([ 1.5664e-04, 3.3677e-05, 2.0599e-03, 4.2963e-04, 3.9406e-03, |
|
|
-5.2376e-03, -1.3800e-03], device='cuda:0') |
|
|
351 |
|
|
0.0071694186955877925 |
|
|
changing lr |
|
|
epoch 25, time 486.77, cls_loss 0.0445 cls_loss_mapping 0.2002 cls_loss_causal 0.9541 re_mapping 0.0637 re_causal 0.0641 |
|
|
Epoch 27, weight, value: tensor([[-0.0224, -0.0148, -0.0069, ..., -0.0090, 0.1102, 0.1378], |
|
|
[ 0.0452, 0.0599, 0.0507, ..., 0.0500, 0.0225, -0.0140], |
|
|
[ 0.0656, 0.0353, 0.0671, ..., 0.0134, 0.0080, 0.0254], |
|
|
..., |
|
|
[-0.1328, -0.1142, -0.0798, ..., 0.0032, 0.0210, -0.0175], |
|
|
[-0.0156, -0.0017, -0.0253, ..., 0.0967, 0.0379, -0.0066], |
|
|
[-0.0209, -0.0287, -0.0242, ..., -0.1424, -0.1791, -0.1420]], |
|
|
device='cuda:0'), grad: tensor([[ 2.1248e-03, 3.3641e-04, 6.0511e-04, ..., 6.1703e-04, |
|
|
1.2503e-03, 1.2083e-03], |
|
|
[-2.7657e-03, -4.6110e-04, -7.8583e-04, ..., -7.3004e-04, |
|
|
-1.3189e-03, -1.2741e-03], |
|
|
[ 3.1161e-04, 5.1141e-05, 9.0480e-05, ..., 1.1814e-04, |
|
|
1.1945e-04, 1.3459e-04], |
|
|
..., |
|
|
[-6.3467e-04, -1.0312e-04, -1.8442e-04, ..., -2.4652e-04, |
|
|
-2.5082e-04, -2.8372e-04], |
|
|
[ 2.0123e-04, 3.3110e-05, 5.8502e-05, ..., 7.1347e-05, |
|
|
7.2539e-05, 8.2135e-05], |
|
|
[ 7.2813e-04, 1.3781e-04, 2.0635e-04, ..., 1.5700e-04, |
|
|
1.1438e-04, 1.1861e-04]], device='cuda:0') |
|
|
Epoch 27, bias, value: tensor([-0.0163, -0.2837, 0.0050, -0.1952, -0.0045, 0.3244, 0.1816], |
|
|
device='cuda:0'), grad: tensor([ 0.0065, -0.0087, 0.0010, 0.0001, -0.0021, 0.0007, 0.0025], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.0069651251582696205 |
|
|
changing lr |
|
|
epoch 26, time 487.73, cls_loss 0.0368 cls_loss_mapping 0.1828 cls_loss_causal 0.8843 re_mapping 0.0628 re_causal 0.0632 |
|
|
Epoch 28, weight, value: tensor([[-0.0236, -0.0151, -0.0075, ..., -0.0090, 0.1096, 0.1365], |
|
|
[ 0.0429, 0.0591, 0.0491, ..., 0.0491, 0.0215, -0.0149], |
|
|
[ 0.0693, 0.0359, 0.0674, ..., 0.0138, 0.0074, 0.0253], |
|
|
..., |
|
|
[-0.1272, -0.1129, -0.0777, ..., 0.0041, 0.0222, -0.0156], |
|
|
[-0.0171, -0.0017, -0.0255, ..., 0.0952, 0.0373, -0.0070], |
|
|
[-0.0240, -0.0291, -0.0241, ..., -0.1416, -0.1779, -0.1408]], |
|
|
device='cuda:0'), grad: tensor([[ 6.0856e-05, 2.1964e-05, 2.7269e-05, ..., 6.5342e-06, |
|
|
-1.4685e-05, -5.9530e-06], |
|
|
[-6.1631e-05, -9.1717e-06, -6.6832e-06, ..., -4.4763e-05, |
|
|
-1.7956e-05, -2.1353e-05], |
|
|
[ 9.1195e-05, 2.1666e-05, 5.9724e-05, ..., 3.2812e-05, |
|
|
1.0960e-05, 2.4229e-05], |
|
|
..., |
|
|
[-7.8297e-04, -1.6737e-04, -3.1304e-04, ..., -1.6844e-04, |
|
|
-1.1462e-04, -1.4091e-04], |
|
|
[ 7.5996e-05, 1.4573e-05, 2.2784e-05, ..., 1.7464e-05, |
|
|
1.4804e-05, 1.4916e-05], |
|
|
[ 2.5916e-04, 5.2959e-05, 8.9407e-05, ..., 5.7548e-05, |
|
|
4.3720e-05, 4.9055e-05]], device='cuda:0') |
|
|
Epoch 28, bias, value: tensor([-0.0193, -0.2885, 0.0172, -0.1947, 0.0115, 0.3121, 0.1729], |
|
|
device='cuda:0'), grad: tensor([ 0.0004, -0.0001, 0.0008, 0.0014, -0.0038, 0.0003, 0.0011], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.006756874120406716 |
|
|
changing lr |
|
|
epoch 27, time 487.15, cls_loss 0.0301 cls_loss_mapping 0.1562 cls_loss_causal 0.8664 re_mapping 0.0620 re_causal 0.0625 |
|
|
Epoch 29, weight, value: tensor([[-0.0220, -0.0148, -0.0074, ..., -0.0088, 0.1090, 0.1355], |
|
|
[ 0.0451, 0.0592, 0.0495, ..., 0.0491, 0.0212, -0.0149], |
|
|
[ 0.0643, 0.0347, 0.0653, ..., 0.0122, 0.0056, 0.0231], |
|
|
..., |
|
|
[-0.1302, -0.1122, -0.0772, ..., 0.0024, 0.0208, -0.0162], |
|
|
[-0.0137, -0.0015, -0.0252, ..., 0.0964, 0.0387, -0.0057], |
|
|
[-0.0215, -0.0287, -0.0234, ..., -0.1391, -0.1752, -0.1381]], |
|
|
device='cuda:0'), grad: tensor([[-3.0112e-04, -3.0011e-05, -8.4877e-05, ..., -1.0002e-04, |
|
|
-1.4138e-04, -1.4341e-04], |
|
|
[-2.8610e-05, -2.1160e-06, -7.0333e-06, ..., -5.9530e-06, |
|
|
-9.5963e-06, -8.3521e-06], |
|
|
[-8.0168e-05, -2.8774e-05, -3.0190e-05, ..., -7.9945e-06, |
|
|
-3.1590e-06, -7.7263e-06], |
|
|
..., |
|
|
[ 1.4508e-04, 1.4298e-05, 4.1276e-05, ..., 4.0680e-05, |
|
|
6.0260e-05, 6.1095e-05], |
|
|
[ 4.0799e-05, 8.3372e-06, 1.2673e-05, ..., 9.0152e-06, |
|
|
1.1228e-05, 1.1653e-05], |
|
|
[ 1.9717e-04, 3.2246e-05, 5.9605e-05, ..., 5.6714e-05, |
|
|
7.3969e-05, 7.7367e-05]], device='cuda:0') |
|
|
Epoch 29, bias, value: tensor([-0.0145, -0.2783, 0.0044, -0.1967, -0.0009, 0.3187, 0.1783], |
|
|
device='cuda:0'), grad: tensor([-1.0118e-03, -8.4460e-05, -1.9419e-04, 7.6890e-05, 4.8494e-04, |
|
|
1.1617e-04, 6.1226e-04], device='cuda:0') |
|
|
351 |
|
|
0.00654508497187474 |
|
|
changing lr |
|
|
epoch 28, time 488.45, cls_loss 0.0259 cls_loss_mapping 0.1594 cls_loss_causal 0.8755 re_mapping 0.0618 re_causal 0.0624 |
|
|
Epoch 30, weight, value: tensor([[-0.0226, -0.0141, -0.0070, ..., -0.0097, 0.1078, 0.1339], |
|
|
[ 0.0451, 0.0585, 0.0489, ..., 0.0486, 0.0208, -0.0149], |
|
|
[ 0.0660, 0.0352, 0.0655, ..., 0.0120, 0.0055, 0.0230], |
|
|
..., |
|
|
[-0.1285, -0.1112, -0.0761, ..., 0.0028, 0.0212, -0.0155], |
|
|
[-0.0149, -0.0017, -0.0254, ..., 0.0952, 0.0380, -0.0061], |
|
|
[-0.0242, -0.0300, -0.0249, ..., -0.1378, -0.1735, -0.1366]], |
|
|
device='cuda:0'), grad: tensor([[-2.3043e-04, -2.5898e-05, -3.6359e-05, ..., -6.9618e-05, |
|
|
-6.9380e-05, -6.5029e-05], |
|
|
[ 1.2040e-05, 9.2015e-07, 2.3283e-06, ..., 4.3623e-06, |
|
|
7.4543e-06, 6.0536e-06], |
|
|
[ 6.5751e-06, 5.9977e-07, 1.3076e-06, ..., 2.2091e-06, |
|
|
3.5539e-06, 2.9653e-06], |
|
|
..., |
|
|
[-2.4462e-04, -1.8463e-05, -5.0098e-05, ..., -1.2875e-04, |
|
|
-9.0241e-05, -9.0182e-05], |
|
|
[ 1.3925e-05, 1.1101e-06, 2.8498e-06, ..., 6.5938e-06, |
|
|
5.9046e-06, 5.5246e-06], |
|
|
[ 2.1410e-04, 2.4378e-05, 3.3259e-05, ..., 6.6340e-05, |
|
|
5.7548e-05, 5.6148e-05]], device='cuda:0') |
|
|
Epoch 30, bias, value: tensor([-0.0197, -0.2738, 0.0092, -0.1897, 0.0017, 0.3110, 0.1723], |
|
|
device='cuda:0'), grad: tensor([-7.7391e-04, 4.0770e-05, 2.1964e-05, 8.1396e-04, -8.7309e-04, |
|
|
4.8757e-05, 7.2145e-04], device='cuda:0') |
|
|
351 |
|
|
0.006330184227833378 |
|
|
changing lr |
|
|
epoch 29, time 490.75, cls_loss 0.0292 cls_loss_mapping 0.1531 cls_loss_causal 0.8828 re_mapping 0.0614 re_causal 0.0621 |
|
|
Epoch 31, weight, value: tensor([[-0.0218, -0.0140, -0.0068, ..., -0.0089, 0.1091, 0.1349], |
|
|
[ 0.0433, 0.0577, 0.0479, ..., 0.0471, 0.0192, -0.0167], |
|
|
[ 0.0632, 0.0351, 0.0644, ..., 0.0113, 0.0043, 0.0219], |
|
|
..., |
|
|
[-0.1234, -0.1095, -0.0737, ..., 0.0036, 0.0218, -0.0143], |
|
|
[-0.0140, -0.0017, -0.0254, ..., 0.0953, 0.0381, -0.0057], |
|
|
[-0.0247, -0.0296, -0.0247, ..., -0.1371, -0.1730, -0.1360]], |
|
|
device='cuda:0'), grad: tensor([[ 1.1892e-03, 1.4639e-04, 3.5691e-04, ..., 1.6296e-04, |
|
|
1.0908e-04, 1.8024e-04], |
|
|
[ 1.6034e-04, 2.6032e-05, 4.6492e-05, ..., 1.7181e-05, |
|
|
9.0823e-06, 1.5661e-05], |
|
|
[ 2.4438e-04, 3.5912e-05, 7.3314e-05, ..., 3.0607e-05, |
|
|
1.9833e-05, 3.1680e-05], |
|
|
..., |
|
|
[ 1.6680e-03, 2.5249e-04, 4.9782e-04, ..., 1.9598e-04, |
|
|
1.2219e-04, 1.9646e-04], |
|
|
[ 1.1700e-04, 1.9327e-05, 3.4690e-05, ..., 1.2390e-05, |
|
|
7.2531e-06, 1.1504e-05], |
|
|
[-3.3855e-03, -4.7874e-04, -1.0099e-03, ..., -4.1747e-04, |
|
|
-2.6464e-04, -4.3321e-04]], device='cuda:0') |
|
|
Epoch 31, bias, value: tensor([-0.0185, -0.2735, 0.0005, -0.1896, 0.0133, 0.3104, 0.1683], |
|
|
device='cuda:0'), grad: tensor([ 3.7479e-03, 4.8637e-04, 7.4959e-04, 2.9132e-05, 5.0888e-03, |
|
|
3.5143e-04, -1.0460e-02], device='cuda:0') |
|
|
351 |
|
|
0.006112604669781575 |
|
|
changing lr |
|
|
epoch 30, time 493.20, cls_loss 0.0291 cls_loss_mapping 0.1489 cls_loss_causal 0.8701 re_mapping 0.0617 re_causal 0.0625 |
|
|
Epoch 32, weight, value: tensor([[-0.0220, -0.0139, -0.0067, ..., -0.0089, 0.1086, 0.1337], |
|
|
[ 0.0467, 0.0574, 0.0485, ..., 0.0477, 0.0197, -0.0158], |
|
|
[ 0.0662, 0.0358, 0.0649, ..., 0.0117, 0.0051, 0.0228], |
|
|
..., |
|
|
[-0.1264, -0.1089, -0.0743, ..., 0.0026, 0.0201, -0.0159], |
|
|
[-0.0151, -0.0019, -0.0255, ..., 0.0943, 0.0375, -0.0061], |
|
|
[-0.0254, -0.0300, -0.0251, ..., -0.1358, -0.1713, -0.1342]], |
|
|
device='cuda:0'), grad: tensor([[-1.3389e-05, -1.6242e-06, -1.9707e-06, ..., -2.7418e-06, |
|
|
-9.4846e-06, -9.3952e-06], |
|
|
[-1.7524e-05, -2.8014e-06, -3.5129e-06, ..., -1.0431e-06, |
|
|
4.0978e-07, 1.3411e-07], |
|
|
[ 6.8881e-06, 1.2890e-06, 1.4454e-06, ..., 2.3954e-06, |
|
|
2.4922e-06, 2.6710e-06], |
|
|
..., |
|
|
[ 1.1548e-05, 1.7248e-06, 1.8440e-06, ..., 2.1644e-06, |
|
|
5.9679e-06, 5.9269e-06], |
|
|
[-3.1888e-06, -1.3709e-06, -8.5682e-07, ..., -4.1500e-06, |
|
|
-3.0473e-06, -3.2410e-06], |
|
|
[ 1.1280e-05, 1.9930e-06, 2.2314e-06, ..., 2.0228e-06, |
|
|
1.6540e-06, 1.8477e-06]], device='cuda:0') |
|
|
Epoch 32, bias, value: tensor([-0.0184, -0.2584, 0.0100, -0.1898, -0.0010, 0.3036, 0.1646], |
|
|
device='cuda:0'), grad: tensor([-4.1336e-05, -5.4091e-05, 2.0459e-05, 1.2487e-05, 3.4273e-05, |
|
|
-5.7705e-06, 3.3677e-05], device='cuda:0') |
|
|
351 |
|
|
0.005892784473993186 |
|
|
changing lr |
|
|
epoch 31, time 488.22, cls_loss 0.0271 cls_loss_mapping 0.1293 cls_loss_causal 0.8446 re_mapping 0.0604 re_causal 0.0613 |
|
|
Epoch 33, weight, value: tensor([[-0.0197, -0.0135, -0.0061, ..., -0.0079, 0.1092, 0.1343], |
|
|
[ 0.0433, 0.0561, 0.0470, ..., 0.0464, 0.0185, -0.0169], |
|
|
[ 0.0662, 0.0359, 0.0648, ..., 0.0113, 0.0046, 0.0225], |
|
|
..., |
|
|
[-0.1251, -0.1077, -0.0734, ..., 0.0020, 0.0190, -0.0164], |
|
|
[-0.0145, -0.0019, -0.0255, ..., 0.0947, 0.0381, -0.0053], |
|
|
[-0.0254, -0.0299, -0.0248, ..., -0.1351, -0.1700, -0.1333]], |
|
|
device='cuda:0'), grad: tensor([[ 7.5288e-06, 2.6189e-06, 3.0771e-06, ..., 2.4885e-06, |
|
|
1.8999e-06, 2.2314e-06], |
|
|
[-1.2450e-05, -1.6689e-06, -3.1479e-06, ..., -3.0622e-06, |
|
|
-2.6710e-06, -4.0866e-06], |
|
|
[ 1.7472e-06, 7.2643e-07, 7.4133e-07, ..., 7.0408e-07, |
|
|
4.8056e-07, 4.6566e-07], |
|
|
..., |
|
|
[ 4.3586e-07, 2.6822e-07, 2.6822e-07, ..., -1.4901e-08, |
|
|
6.3330e-08, 4.8429e-08], |
|
|
[ 2.8573e-06, 9.9838e-07, 1.1586e-06, ..., 9.1642e-07, |
|
|
7.1153e-07, 8.3074e-07], |
|
|
[ 1.7941e-05, 4.2692e-06, 5.8673e-06, ..., 5.0589e-06, |
|
|
4.1910e-06, 5.6028e-06]], device='cuda:0') |
|
|
Epoch 33, bias, value: tensor([-1.1186e-02, -2.6577e-01, 1.0681e-02, -1.8894e-01, -2.5201e-04, |
|
|
3.0202e-01, 1.6403e-01], device='cuda:0'), grad: tensor([ 1.7270e-05, -3.9309e-05, 3.2783e-06, -3.6538e-05, 1.3039e-07, |
|
|
6.4075e-06, 4.8697e-05], device='cuda:0') |
|
|
351 |
|
|
0.00567116632908828 |
|
|
changing lr |
|
|
epoch 32, time 494.20, cls_loss 0.0140 cls_loss_mapping 0.1146 cls_loss_causal 0.8057 re_mapping 0.0611 re_causal 0.0621 |
|
|
Epoch 34, weight, value: tensor([[-0.0195, -0.0134, -0.0060, ..., -0.0085, 0.1079, 0.1328], |
|
|
[ 0.0439, 0.0556, 0.0467, ..., 0.0464, 0.0186, -0.0164], |
|
|
[ 0.0643, 0.0355, 0.0638, ..., 0.0109, 0.0038, 0.0215], |
|
|
..., |
|
|
[-0.1249, -0.1069, -0.0729, ..., 0.0022, 0.0192, -0.0160], |
|
|
[-0.0141, -0.0020, -0.0255, ..., 0.0944, 0.0382, -0.0049], |
|
|
[-0.0244, -0.0296, -0.0242, ..., -0.1340, -0.1684, -0.1319]], |
|
|
device='cuda:0'), grad: tensor([[ 1.2890e-06, 1.9372e-07, 2.7195e-07, ..., 2.6450e-07, |
|
|
2.2352e-08, 1.7509e-07], |
|
|
[ 9.8720e-07, 5.5879e-08, 1.4529e-07, ..., 1.4156e-07, |
|
|
2.5705e-07, 3.3528e-07], |
|
|
[ 1.4901e-07, -1.4901e-08, -7.4506e-09, ..., 7.4506e-08, |
|
|
5.9605e-08, 6.7055e-08], |
|
|
..., |
|
|
[ 1.3672e-06, 4.4703e-07, 5.2899e-07, ..., 6.1840e-07, |
|
|
5.4017e-07, 6.1095e-07], |
|
|
[ 7.1228e-06, 6.7800e-07, 1.1809e-06, ..., 1.3076e-06, |
|
|
9.6112e-07, 1.6615e-06], |
|
|
[-9.7528e-06, -8.9779e-07, -1.5907e-06, ..., -1.7695e-06, |
|
|
-1.2815e-06, -2.2501e-06]], device='cuda:0') |
|
|
Epoch 34, bias, value: tensor([-0.0111, -0.2602, 0.0059, -0.1873, -0.0030, 0.3006, 0.1656], |
|
|
device='cuda:0'), grad: tensor([ 4.3325e-06, 3.6284e-06, 6.5938e-07, -2.7381e-06, 3.4980e-06, |
|
|
2.5198e-05, -3.4690e-05], device='cuda:0') |
|
|
351 |
|
|
0.00544819654451717 |
|
|
changing lr |
|
|
epoch 33, time 487.32, cls_loss 0.0128 cls_loss_mapping 0.1002 cls_loss_causal 0.7890 re_mapping 0.0586 re_causal 0.0596 |
|
|
Epoch 35, weight, value: tensor([[-0.0213, -0.0138, -0.0065, ..., -0.0094, 0.1059, 0.1304], |
|
|
[ 0.0420, 0.0549, 0.0458, ..., 0.0458, 0.0181, -0.0168], |
|
|
[ 0.0661, 0.0362, 0.0639, ..., 0.0114, 0.0046, 0.0225], |
|
|
..., |
|
|
[-0.1238, -0.1061, -0.0723, ..., 0.0023, 0.0190, -0.0159], |
|
|
[-0.0145, -0.0020, -0.0256, ..., 0.0938, 0.0380, -0.0048], |
|
|
[-0.0227, -0.0296, -0.0236, ..., -0.1327, -0.1667, -0.1304]], |
|
|
device='cuda:0'), grad: tensor([[ 1.6081e-04, 1.3858e-05, 3.4720e-05, ..., 4.6670e-05, |
|
|
2.1726e-05, 4.5151e-05], |
|
|
[ 1.0207e-05, 5.5134e-07, 2.1979e-06, ..., 2.7604e-06, |
|
|
1.0394e-06, 2.5928e-06], |
|
|
[ 2.6911e-05, 1.4007e-06, 5.7444e-06, ..., 7.3723e-06, |
|
|
2.7679e-06, 6.8769e-06], |
|
|
..., |
|
|
[-6.6936e-05, -9.1121e-06, -1.4573e-05, ..., -2.1294e-05, |
|
|
-1.2442e-05, -2.1517e-05], |
|
|
[ 1.9655e-05, 1.0766e-06, 4.2394e-06, ..., 5.3495e-06, |
|
|
2.0340e-06, 5.0180e-06], |
|
|
[-1.6916e-04, -8.7768e-06, -3.6329e-05, ..., -4.5896e-05, |
|
|
-1.7032e-05, -4.2856e-05]], device='cuda:0') |
|
|
Epoch 35, bias, value: tensor([-0.0173, -0.2628, 0.0116, -0.1854, -0.0026, 0.2957, 0.1711], |
|
|
device='cuda:0'), grad: tensor([ 5.9175e-04, 4.2528e-05, 1.1235e-04, 7.7784e-05, -1.9562e-04, |
|
|
8.1778e-05, -7.1049e-04], device='cuda:0') |
|
|
351 |
|
|
0.005224324151752577 |
|
|
changing lr |
|
|
epoch 34, time 489.98, cls_loss 0.0186 cls_loss_mapping 0.1047 cls_loss_causal 0.7736 re_mapping 0.0585 re_causal 0.0595 |
|
|
Epoch 36, weight, value: tensor([[-0.0216, -0.0141, -0.0069, ..., -0.0092, 0.1056, 0.1298], |
|
|
[ 0.0414, 0.0545, 0.0456, ..., 0.0451, 0.0177, -0.0170], |
|
|
[ 0.0655, 0.0358, 0.0632, ..., 0.0109, 0.0041, 0.0221], |
|
|
..., |
|
|
[-0.1214, -0.1049, -0.0712, ..., 0.0022, 0.0184, -0.0160], |
|
|
[-0.0141, -0.0021, -0.0256, ..., 0.0938, 0.0382, -0.0044], |
|
|
[-0.0250, -0.0298, -0.0240, ..., -0.1324, -0.1659, -0.1300]], |
|
|
device='cuda:0'), grad: tensor([[ 3.3408e-05, 6.4932e-06, 9.7007e-06, ..., 1.7822e-05, |
|
|
2.0593e-05, 2.3276e-05], |
|
|
[-1.0021e-06, -6.2026e-07, -3.3528e-07, ..., 6.7614e-07, |
|
|
3.2391e-06, 3.2559e-06], |
|
|
[ 2.3395e-05, 4.2990e-06, 7.3910e-06, ..., 1.0505e-05, |
|
|
1.4037e-05, 1.5646e-05], |
|
|
..., |
|
|
[-7.9870e-05, -1.5073e-05, -2.4453e-05, ..., -3.7611e-05, |
|
|
-5.0575e-05, -5.6446e-05], |
|
|
[-1.4920e-06, -2.2538e-07, -2.7567e-07, ..., -1.7807e-06, |
|
|
-7.6927e-07, -5.2899e-07], |
|
|
[ 1.7151e-05, 3.1851e-06, 5.3979e-06, ..., 7.2606e-06, |
|
|
9.5218e-06, 1.0468e-05]], device='cuda:0') |
|
|
Epoch 36, bias, value: tensor([-0.0171, -0.2617, 0.0107, -0.1806, 0.0014, 0.2941, 0.1635], |
|
|
device='cuda:0'), grad: tensor([ 1.2863e-04, 1.5218e-06, 8.2374e-05, 2.6941e-05, -2.9230e-04, |
|
|
-4.2953e-06, 5.7399e-05], device='cuda:0') |
|
|
351 |
|
|
0.005000000000000003 |
|
|
changing lr |
|
|
epoch 35, time 489.59, cls_loss 0.0186 cls_loss_mapping 0.0992 cls_loss_causal 0.7708 re_mapping 0.0567 re_causal 0.0578 |
|
|
Epoch 37, weight, value: tensor([[-0.0225, -0.0142, -0.0072, ..., -0.0097, 0.1046, 0.1286], |
|
|
[ 0.0419, 0.0541, 0.0454, ..., 0.0452, 0.0181, -0.0164], |
|
|
[ 0.0645, 0.0356, 0.0626, ..., 0.0108, 0.0036, 0.0217], |
|
|
..., |
|
|
[-0.1206, -0.1042, -0.0707, ..., 0.0022, 0.0184, -0.0158], |
|
|
[-0.0141, -0.0022, -0.0255, ..., 0.0934, 0.0381, -0.0043], |
|
|
[-0.0259, -0.0298, -0.0239, ..., -0.1321, -0.1653, -0.1296]], |
|
|
device='cuda:0'), grad: tensor([[ 7.0870e-05, 1.1154e-05, 2.2084e-05, ..., 2.9728e-05, |
|
|
2.9624e-05, 3.3706e-05], |
|
|
[-1.6823e-05, -2.8890e-06, -5.2638e-06, ..., -1.3612e-05, |
|
|
-1.8016e-05, -1.7941e-05], |
|
|
[-8.9824e-05, -2.4319e-05, -2.8491e-05, ..., -6.5714e-06, |
|
|
-1.2629e-05, -1.4260e-05], |
|
|
..., |
|
|
[ 4.0799e-05, 7.8306e-06, 1.2830e-05, ..., 1.1452e-05, |
|
|
1.1101e-05, 1.3262e-05], |
|
|
[ 4.5776e-05, 7.6741e-06, 1.4372e-05, ..., 1.4342e-05, |
|
|
1.1854e-05, 1.5013e-05], |
|
|
[-1.1605e-04, -1.4335e-05, -3.6120e-05, ..., -4.6641e-05, |
|
|
-3.4481e-05, -4.4852e-05]], device='cuda:0') |
|
|
Epoch 37, bias, value: tensor([-0.0201, -0.2563, 0.0100, -0.1743, 0.0009, 0.2915, 0.1585], |
|
|
device='cuda:0'), grad: tensor([ 2.4629e-04, -4.9651e-05, -2.0874e-04, 1.8287e-04, 1.3006e-04, |
|
|
1.5986e-04, -4.6062e-04], device='cuda:0') |
|
|
351 |
|
|
0.004775675848247429 |
|
|
changing lr |
|
|
epoch 36, time 488.84, cls_loss 0.0166 cls_loss_mapping 0.0919 cls_loss_causal 0.7734 re_mapping 0.0554 re_causal 0.0565 |
|
|
Epoch 38, weight, value: tensor([[-0.0223, -0.0141, -0.0071, ..., -0.0098, 0.1041, 0.1278], |
|
|
[ 0.0414, 0.0536, 0.0449, ..., 0.0448, 0.0178, -0.0165], |
|
|
[ 0.0643, 0.0358, 0.0624, ..., 0.0106, 0.0035, 0.0215], |
|
|
..., |
|
|
[-0.1181, -0.1033, -0.0697, ..., 0.0027, 0.0188, -0.0149], |
|
|
[-0.0145, -0.0024, -0.0256, ..., 0.0929, 0.0379, -0.0042], |
|
|
[-0.0246, -0.0293, -0.0234, ..., -0.1310, -0.1640, -0.1285]], |
|
|
device='cuda:0'), grad: tensor([[ 1.3316e-04, 1.7837e-05, 2.2829e-05, ..., 1.4164e-05, |
|
|
1.2748e-05, 2.1756e-05], |
|
|
[ 5.2564e-06, 4.0606e-07, 8.4192e-07, ..., 5.1595e-07, |
|
|
6.2212e-07, 8.6613e-07], |
|
|
[ 3.3081e-05, 2.6152e-06, 5.2229e-06, ..., 3.0957e-06, |
|
|
3.8035e-06, 5.3719e-06], |
|
|
..., |
|
|
[-5.8189e-06, -4.7125e-07, -1.5218e-06, ..., -1.4678e-06, |
|
|
-1.4100e-06, -1.4715e-06], |
|
|
[ 2.6207e-06, 1.7136e-07, 3.7812e-07, ..., 3.9116e-08, |
|
|
2.0675e-07, 3.4086e-07], |
|
|
[-1.6868e-04, -2.0489e-05, -2.7731e-05, ..., -1.6347e-05, |
|
|
-1.6004e-05, -2.6926e-05]], device='cuda:0') |
|
|
Epoch 38, bias, value: tensor([-0.0192, -0.2548, 0.0095, -0.1794, 0.0065, 0.2877, 0.1599], |
|
|
device='cuda:0'), grad: tensor([ 4.4608e-04, 1.7881e-05, 1.1235e-04, 1.7788e-06, -1.9386e-05, |
|
|
9.0301e-06, -5.6791e-04], device='cuda:0') |
|
|
351 |
|
|
0.004551803455482836 |
|
|
changing lr |
|
|
epoch 37, time 487.50, cls_loss 0.0129 cls_loss_mapping 0.0849 cls_loss_causal 0.7819 re_mapping 0.0550 re_causal 0.0562 |
|
|
Epoch 39, weight, value: tensor([[-0.0228, -0.0142, -0.0072, ..., -0.0100, 0.1035, 0.1270], |
|
|
[ 0.0410, 0.0531, 0.0445, ..., 0.0444, 0.0176, -0.0165], |
|
|
[ 0.0628, 0.0354, 0.0617, ..., 0.0099, 0.0029, 0.0208], |
|
|
..., |
|
|
[-0.1189, -0.1028, -0.0695, ..., 0.0021, 0.0180, -0.0154], |
|
|
[-0.0129, -0.0021, -0.0254, ..., 0.0936, 0.0385, -0.0033], |
|
|
[-0.0235, -0.0291, -0.0230, ..., -0.1298, -0.1629, -0.1274]], |
|
|
device='cuda:0'), grad: tensor([[ 1.1024e-03, 1.6773e-04, 1.4496e-04, ..., 3.4833e-04, |
|
|
2.5558e-04, 3.3259e-04], |
|
|
[ 4.5925e-05, 1.2770e-05, 1.7360e-06, ..., 1.1012e-05, |
|
|
1.0118e-05, 1.5013e-05], |
|
|
[-1.8396e-03, -5.5504e-04, -3.8934e-04, ..., -4.4203e-04, |
|
|
-5.5504e-04, -6.4707e-04], |
|
|
..., |
|
|
[-9.4032e-04, -4.8190e-05, -7.2837e-05, ..., -3.4523e-04, |
|
|
-1.7154e-04, -2.5058e-04], |
|
|
[ 2.1040e-04, 2.8461e-05, 2.7418e-05, ..., 6.7711e-05, |
|
|
4.3333e-05, 5.9068e-05], |
|
|
[ 8.0061e-04, 2.2531e-04, 1.6654e-04, ..., 2.0170e-04, |
|
|
2.3818e-04, 2.7800e-04]], device='cuda:0') |
|
|
Epoch 39, bias, value: tensor([-0.0210, -0.2532, 0.0062, -0.1753, 0.0016, 0.2897, 0.1620], |
|
|
device='cuda:0'), grad: tensor([ 0.0037, 0.0001, -0.0050, 0.0018, -0.0036, 0.0007, 0.0022], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.004328833670911726 |
|
|
changing lr |
|
|
epoch 38, time 486.95, cls_loss 0.0092 cls_loss_mapping 0.0883 cls_loss_causal 0.7727 re_mapping 0.0537 re_causal 0.0550 |
|
|
Epoch 40, weight, value: tensor([[-0.0224, -0.0141, -0.0071, ..., -0.0100, 0.1032, 0.1266], |
|
|
[ 0.0395, 0.0524, 0.0439, ..., 0.0437, 0.0169, -0.0171], |
|
|
[ 0.0637, 0.0356, 0.0616, ..., 0.0100, 0.0029, 0.0208], |
|
|
..., |
|
|
[-0.1182, -0.1022, -0.0690, ..., 0.0022, 0.0181, -0.0152], |
|
|
[-0.0136, -0.0023, -0.0255, ..., 0.0932, 0.0385, -0.0032], |
|
|
[-0.0221, -0.0287, -0.0226, ..., -0.1290, -0.1619, -0.1264]], |
|
|
device='cuda:0'), grad: tensor([[ 1.0431e-04, 8.6427e-06, 8.2403e-06, ..., 3.6299e-05, |
|
|
2.9549e-05, 2.5302e-05], |
|
|
[ 2.6047e-05, 1.6242e-06, 3.4273e-07, ..., 8.7395e-06, |
|
|
7.0557e-06, 4.9658e-06], |
|
|
[ 9.1696e-04, 7.5698e-05, 5.7697e-05, ..., 3.6097e-04, |
|
|
2.8181e-04, 2.4402e-04], |
|
|
..., |
|
|
[ 1.0424e-03, 7.0632e-05, 5.5134e-05, ..., 3.4261e-04, |
|
|
2.7657e-04, 2.2483e-04], |
|
|
[-2.4719e-03, -1.8394e-04, -1.4257e-04, ..., -8.9836e-04, |
|
|
-7.1335e-04, -6.0034e-04], |
|
|
[ 2.6083e-04, 1.5914e-05, 1.1899e-05, ..., 9.9719e-05, |
|
|
7.9751e-05, 6.6876e-05]], device='cuda:0') |
|
|
Epoch 40, bias, value: tensor([-0.0200, -0.2550, 0.0099, -0.1766, 0.0015, 0.2851, 0.1650], |
|
|
device='cuda:0'), grad: tensor([ 3.3784e-04, 8.6427e-05, 2.9964e-03, 3.9721e-04, 3.4218e-03, |
|
|
-8.0948e-03, 8.5974e-04], device='cuda:0') |
|
|
351 |
|
|
0.0041072155260068206 |
|
|
changing lr |
|
|
epoch 39, time 489.22, cls_loss 0.0097 cls_loss_mapping 0.0775 cls_loss_causal 0.7592 re_mapping 0.0543 re_causal 0.0557 |
|
|
Epoch 41, weight, value: tensor([[-0.0227, -0.0141, -0.0072, ..., -0.0101, 0.1026, 0.1259], |
|
|
[ 0.0403, 0.0520, 0.0439, ..., 0.0437, 0.0173, -0.0165], |
|
|
[ 0.0628, 0.0355, 0.0611, ..., 0.0095, 0.0024, 0.0202], |
|
|
..., |
|
|
[-0.1180, -0.1017, -0.0687, ..., 0.0020, 0.0177, -0.0154], |
|
|
[-0.0123, -0.0022, -0.0253, ..., 0.0937, 0.0388, -0.0026], |
|
|
[-0.0233, -0.0287, -0.0227, ..., -0.1287, -0.1614, -0.1262]], |
|
|
device='cuda:0'), grad: tensor([[ 4.6196e-03, 4.9496e-04, 5.7077e-04, ..., 1.1282e-03, |
|
|
2.0866e-03, 2.3994e-03], |
|
|
[ 2.5436e-05, 1.1943e-05, 7.2420e-06, ..., -7.5214e-06, |
|
|
-7.6517e-06, -4.2208e-06], |
|
|
[-5.7106e-03, -7.9346e-04, -8.9979e-04, ..., -1.3742e-03, |
|
|
-2.3251e-03, -2.7313e-03], |
|
|
..., |
|
|
[ 2.3162e-04, 5.5492e-05, 6.3598e-05, ..., 5.3585e-05, |
|
|
6.6578e-05, 8.4639e-05], |
|
|
[ 5.7983e-04, 1.6105e-04, 1.8013e-04, ..., 1.3936e-04, |
|
|
1.2082e-04, 1.7250e-04], |
|
|
[ 1.0353e-04, 2.8998e-05, 3.2276e-05, ..., 2.4587e-05, |
|
|
2.3708e-05, 3.2336e-05]], device='cuda:0') |
|
|
Epoch 41, bias, value: tensor([-2.1788e-02, -2.5003e-01, 8.0240e-03, -1.7373e-01, -1.4076e-04, |
|
|
2.8711e-01, 1.6039e-01], device='cuda:0'), grad: tensor([ 1.5137e-02, 5.2959e-05, -1.8082e-02, 4.0960e-04, 6.5422e-04, |
|
|
1.5545e-03, 2.7752e-04], device='cuda:0') |
|
|
351 |
|
|
0.0038873953302184317 |
|
|
changing lr |
|
|
epoch 40, time 485.05, cls_loss 0.0115 cls_loss_mapping 0.0751 cls_loss_causal 0.7536 re_mapping 0.0549 re_causal 0.0564 |
|
|
Epoch 42, weight, value: tensor([[-0.0213, -0.0140, -0.0069, ..., -0.0095, 0.1029, 0.1261], |
|
|
[ 0.0404, 0.0518, 0.0437, ..., 0.0434, 0.0173, -0.0165], |
|
|
[ 0.0623, 0.0354, 0.0608, ..., 0.0093, 0.0022, 0.0200], |
|
|
..., |
|
|
[-0.1180, -0.1012, -0.0685, ..., 0.0017, 0.0173, -0.0155], |
|
|
[-0.0132, -0.0024, -0.0255, ..., 0.0930, 0.0385, -0.0028], |
|
|
[-0.0228, -0.0285, -0.0225, ..., -0.1279, -0.1605, -0.1255]], |
|
|
device='cuda:0'), grad: tensor([[ 5.6076e-03, 1.2617e-03, 1.4315e-03, ..., 2.1935e-03, |
|
|
1.7796e-03, 3.0479e-03], |
|
|
[ 4.9362e-03, 2.6488e-04, 7.0047e-04, ..., 1.4935e-03, |
|
|
1.3771e-03, 1.4277e-03], |
|
|
[ 1.0651e-04, -7.0781e-07, 8.5086e-06, ..., 3.8654e-05, |
|
|
3.2961e-05, 3.8892e-05], |
|
|
..., |
|
|
[ 3.0041e-04, 2.2113e-05, 4.7088e-05, ..., 9.2387e-05, |
|
|
8.4519e-05, 9.2983e-05], |
|
|
[ 9.2316e-03, 3.9482e-04, 1.2436e-03, ..., 2.7428e-03, |
|
|
2.5539e-03, 2.5234e-03], |
|
|
[-2.0370e-02, -1.9531e-03, -3.4580e-03, ..., -6.6147e-03, |
|
|
-5.8784e-03, -7.1831e-03]], device='cuda:0') |
|
|
Epoch 42, bias, value: tensor([-0.0175, -0.2477, 0.0069, -0.1734, -0.0019, 0.2824, 0.1610], |
|
|
device='cuda:0'), grad: tensor([ 0.0323, 0.0152, 0.0005, 0.0005, 0.0010, 0.0268, -0.0762], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.003669815772166629 |
|
|
changing lr |
|
|
epoch 41, time 487.61, cls_loss 0.0074 cls_loss_mapping 0.0727 cls_loss_causal 0.7325 re_mapping 0.0550 re_causal 0.0565 |
|
|
Epoch 43, weight, value: tensor([[-0.0212, -0.0139, -0.0070, ..., -0.0095, 0.1025, 0.1256], |
|
|
[ 0.0400, 0.0514, 0.0434, ..., 0.0431, 0.0171, -0.0166], |
|
|
[ 0.0619, 0.0353, 0.0605, ..., 0.0092, 0.0022, 0.0200], |
|
|
..., |
|
|
[-0.1162, -0.1007, -0.0678, ..., 0.0020, 0.0174, -0.0152], |
|
|
[-0.0136, -0.0025, -0.0255, ..., 0.0928, 0.0385, -0.0026], |
|
|
[-0.0236, -0.0285, -0.0226, ..., -0.1276, -0.1601, -0.1253]], |
|
|
device='cuda:0'), grad: tensor([[ 1.8328e-05, 3.5167e-06, 4.0494e-06, ..., 6.2659e-06, |
|
|
3.9861e-06, 5.5842e-06], |
|
|
[ 3.2261e-06, 8.4192e-07, 1.1250e-06, ..., 1.3001e-06, |
|
|
1.0841e-06, 1.3933e-06], |
|
|
[ 2.5794e-05, 9.8720e-06, 1.1310e-05, ..., 9.2685e-06, |
|
|
9.9540e-06, 1.1086e-05], |
|
|
..., |
|
|
[ 2.9549e-05, 1.0736e-05, 1.2115e-05, ..., 1.0125e-05, |
|
|
1.0498e-05, 1.1921e-05], |
|
|
[ 1.4000e-05, 2.1495e-06, 3.7774e-06, ..., 5.8562e-06, |
|
|
3.5763e-06, 5.4203e-06], |
|
|
[-3.0845e-05, -3.8296e-06, -5.8822e-06, ..., -1.1764e-05, |
|
|
-6.1207e-06, -9.7081e-06]], device='cuda:0') |
|
|
Epoch 43, bias, value: tensor([-0.0182, -0.2470, 0.0065, -0.1719, 0.0034, 0.2799, 0.1570], |
|
|
device='cuda:0'), grad: tensor([ 5.5403e-05, 8.9854e-06, 5.5552e-05, -1.2755e-04, 6.5923e-05, |
|
|
4.7505e-05, -1.0574e-04], device='cuda:0') |
|
|
351 |
|
|
0.0034549150281252667 |
|
|
changing lr |
|
|
epoch 42, time 486.22, cls_loss 0.0069 cls_loss_mapping 0.0656 cls_loss_causal 0.7226 re_mapping 0.0538 re_causal 0.0554 |
|
|
Epoch 44, weight, value: tensor([[-0.0216, -0.0140, -0.0071, ..., -0.0096, 0.1021, 0.1251], |
|
|
[ 0.0402, 0.0512, 0.0433, ..., 0.0430, 0.0172, -0.0164], |
|
|
[ 0.0620, 0.0353, 0.0603, ..., 0.0091, 0.0021, 0.0199], |
|
|
..., |
|
|
[-0.1160, -0.1002, -0.0676, ..., 0.0017, 0.0170, -0.0154], |
|
|
[-0.0131, -0.0025, -0.0254, ..., 0.0929, 0.0387, -0.0023], |
|
|
[-0.0234, -0.0283, -0.0224, ..., -0.1271, -0.1595, -0.1248]], |
|
|
device='cuda:0'), grad: tensor([[ 1.6963e-04, 2.2471e-05, 2.5943e-05, ..., 8.0049e-05, |
|
|
6.7055e-05, 7.7963e-05], |
|
|
[-3.7432e-05, -6.4336e-06, -1.2316e-05, ..., -4.7833e-05, |
|
|
-5.9873e-05, -6.4135e-05], |
|
|
[-1.2279e-04, -5.3316e-05, -5.8621e-05, ..., -1.4551e-05, |
|
|
-3.0726e-05, -4.2588e-05], |
|
|
..., |
|
|
[ 3.2067e-04, 7.4446e-05, 8.3625e-05, ..., 9.1612e-05, |
|
|
9.7215e-05, 1.2577e-04], |
|
|
[ 5.6219e-04, 5.9366e-05, 6.4909e-05, ..., 1.5295e-04, |
|
|
1.1712e-04, 1.6892e-04], |
|
|
[-9.2030e-04, -9.9599e-05, -1.0693e-04, ..., -2.6965e-04, |
|
|
-1.9670e-04, -2.7466e-04]], device='cuda:0') |
|
|
Epoch 44, bias, value: tensor([-0.0191, -0.2443, 0.0069, -0.1719, 0.0013, 0.2796, 0.1572], |
|
|
device='cuda:0'), grad: tensor([ 6.8665e-04, 2.4974e-05, -2.1517e-04, 9.7215e-05, 9.1743e-04, |
|
|
2.0123e-03, -3.5248e-03], device='cuda:0') |
|
|
351 |
|
|
0.0032431258795932905 |
|
|
changing lr |
|
|
epoch 43, time 489.72, cls_loss 0.0105 cls_loss_mapping 0.0591 cls_loss_causal 0.7122 re_mapping 0.0533 re_causal 0.0549 |
|
|
Epoch 45, weight, value: tensor([[-0.0222, -0.0140, -0.0073, ..., -0.0099, 0.1017, 0.1245], |
|
|
[ 0.0400, 0.0509, 0.0431, ..., 0.0428, 0.0171, -0.0163], |
|
|
[ 0.0616, 0.0353, 0.0601, ..., 0.0087, 0.0019, 0.0196], |
|
|
..., |
|
|
[-0.1159, -0.0999, -0.0675, ..., 0.0017, 0.0166, -0.0157], |
|
|
[-0.0133, -0.0026, -0.0255, ..., 0.0928, 0.0388, -0.0020], |
|
|
[-0.0227, -0.0282, -0.0221, ..., -0.1265, -0.1588, -0.1241]], |
|
|
device='cuda:0'), grad: tensor([[-8.6203e-06, -1.0803e-06, -2.7604e-06, ..., -4.3027e-06, |
|
|
-4.8839e-06, -7.2680e-06], |
|
|
[-3.8981e-04, -2.3216e-05, -9.8407e-05, ..., -1.4544e-04, |
|
|
-2.0409e-04, -1.9073e-04], |
|
|
[-3.0082e-06, -1.6894e-06, -1.5181e-06, ..., 3.5763e-07, |
|
|
3.6880e-07, 1.2107e-07], |
|
|
..., |
|
|
[ 3.8028e-04, 2.3350e-05, 9.6440e-05, ..., 1.4150e-04, |
|
|
1.9836e-04, 1.8644e-04], |
|
|
[ 6.8918e-06, 5.0664e-07, 1.7155e-06, ..., 2.4550e-06, |
|
|
3.3509e-06, 3.2224e-06], |
|
|
[ 8.7097e-06, 1.1623e-06, 2.7977e-06, ..., 3.7644e-06, |
|
|
4.5709e-06, 5.8189e-06]], device='cuda:0') |
|
|
Epoch 45, bias, value: tensor([-2.0831e-02, -2.4265e-01, 5.8777e-03, -1.6848e-01, 1.9903e-05, |
|
|
2.7736e-01, 1.5830e-01], device='cuda:0'), grad: tensor([-3.1412e-05, -1.2970e-03, -5.6811e-06, 1.4350e-05, 1.2646e-03, |
|
|
2.2843e-05, 2.9579e-05], device='cuda:0') |
|
|
351 |
|
|
0.0030348748417303863 |
|
|
changing lr |
|
|
epoch 44, time 487.09, cls_loss 0.0084 cls_loss_mapping 0.0627 cls_loss_causal 0.7363 re_mapping 0.0523 re_causal 0.0540 |
|
|
Epoch 46, weight, value: tensor([[-0.0226, -0.0141, -0.0073, ..., -0.0101, 0.1011, 0.1239], |
|
|
[ 0.0396, 0.0506, 0.0428, ..., 0.0426, 0.0171, -0.0163], |
|
|
[ 0.0621, 0.0355, 0.0601, ..., 0.0086, 0.0018, 0.0196], |
|
|
..., |
|
|
[-0.1147, -0.0993, -0.0669, ..., 0.0018, 0.0167, -0.0154], |
|
|
[-0.0123, -0.0025, -0.0252, ..., 0.0933, 0.0392, -0.0013], |
|
|
[-0.0237, -0.0283, -0.0223, ..., -0.1264, -0.1586, -0.1242]], |
|
|
device='cuda:0'), grad: tensor([[ 6.1005e-05, 8.5235e-06, 2.0206e-05, ..., 3.7134e-05, |
|
|
4.8548e-05, 5.1439e-05], |
|
|
[-5.2303e-05, -7.2159e-06, -1.8224e-05, ..., -3.3587e-05, |
|
|
-4.5151e-05, -4.7475e-05], |
|
|
[-2.4978e-06, -8.5682e-07, -1.0654e-06, ..., 8.8662e-07, |
|
|
-8.7544e-08, -2.1793e-07], |
|
|
..., |
|
|
[ 2.4624e-06, 2.3469e-07, 5.1782e-07, ..., 1.7583e-06, |
|
|
1.6093e-06, 1.7621e-06], |
|
|
[-6.5379e-07, -3.5018e-07, 9.3132e-08, ..., -8.7172e-06, |
|
|
-6.4522e-06, -6.6496e-06], |
|
|
[-8.6203e-06, -1.7323e-07, -1.4603e-06, ..., 1.7472e-06, |
|
|
8.6240e-07, 4.0419e-07]], device='cuda:0') |
|
|
Epoch 46, bias, value: tensor([-0.0229, -0.2422, 0.0083, -0.1695, 0.0023, 0.2799, 0.1536], |
|
|
device='cuda:0'), grad: tensor([ 2.0087e-04, -1.7345e-04, -5.4091e-06, 2.8592e-06, 8.5533e-06, |
|
|
-5.4836e-06, -2.7969e-05], device='cuda:0') |
|
|
351 |
|
|
0.0028305813044122124 |
|
|
changing lr |
|
|
---------------------saving model at epoch 45---------------------------------------------------- |
|
|
epoch 45, time 492.10, cls_loss 0.0091 cls_loss_mapping 0.0608 cls_loss_causal 0.7182 re_mapping 0.0522 re_causal 0.0539 |
|
|
Epoch 47, weight, value: tensor([[-0.0212, -0.0139, -0.0069, ..., -0.0095, 0.1019, 0.1245], |
|
|
[ 0.0388, 0.0502, 0.0424, ..., 0.0422, 0.0167, -0.0167], |
|
|
[ 0.0622, 0.0355, 0.0600, ..., 0.0085, 0.0017, 0.0195], |
|
|
..., |
|
|
[-0.1149, -0.0990, -0.0669, ..., 0.0016, 0.0161, -0.0159], |
|
|
[-0.0125, -0.0025, -0.0252, ..., 0.0931, 0.0391, -0.0013], |
|
|
[-0.0236, -0.0282, -0.0222, ..., -0.1260, -0.1581, -0.1238]], |
|
|
device='cuda:0'), grad: tensor([[-2.3961e-04, -1.5020e-05, -4.5061e-05, ..., -7.1526e-05, |
|
|
-1.4293e-04, -1.5366e-04], |
|
|
[ 4.7952e-05, 6.9402e-06, 1.2204e-05, ..., 1.8135e-05, |
|
|
2.4453e-05, 2.5079e-05], |
|
|
[-3.4273e-06, -3.3509e-06, -2.6412e-06, ..., 1.6391e-06, |
|
|
3.9525e-06, 3.6862e-06], |
|
|
..., |
|
|
[ 1.8501e-04, 1.4961e-05, 3.5077e-05, ..., 5.6356e-05, |
|
|
1.0079e-04, 1.0961e-04], |
|
|
[-6.5863e-05, -2.5541e-05, -2.4706e-05, ..., -6.4194e-05, |
|
|
-4.3005e-05, -4.3362e-05], |
|
|
[-2.0619e-06, 1.1697e-06, -4.8615e-07, ..., 8.0764e-06, |
|
|
8.3670e-06, 9.6112e-06]], device='cuda:0') |
|
|
Epoch 47, bias, value: tensor([-1.8068e-02, -2.4327e-01, 9.5686e-03, -1.6941e-01, -7.8014e-05, |
|
|
2.7772e-01, 1.5303e-01], device='cuda:0'), grad: tensor([-8.3208e-04, 1.5223e-04, 4.2468e-07, 2.0671e-04, 6.2895e-04, |
|
|
-1.4079e-04, -1.5587e-05], device='cuda:0') |
|
|
351 |
|
|
0.0026306566876350096 |
|
|
changing lr |
|
|
epoch 46, time 489.43, cls_loss 0.0071 cls_loss_mapping 0.0539 cls_loss_causal 0.6687 re_mapping 0.0523 re_causal 0.0540 |
|
|
Epoch 48, weight, value: tensor([[-0.0214, -0.0139, -0.0070, ..., -0.0097, 0.1014, 0.1239], |
|
|
[ 0.0393, 0.0501, 0.0423, ..., 0.0422, 0.0167, -0.0165], |
|
|
[ 0.0616, 0.0354, 0.0597, ..., 0.0082, 0.0015, 0.0192], |
|
|
..., |
|
|
[-0.1144, -0.0986, -0.0665, ..., 0.0016, 0.0162, -0.0156], |
|
|
[-0.0122, -0.0024, -0.0251, ..., 0.0932, 0.0393, -0.0009], |
|
|
[-0.0237, -0.0282, -0.0221, ..., -0.1258, -0.1577, -0.1235]], |
|
|
device='cuda:0'), grad: tensor([[ 4.4298e-04, 6.1333e-05, 1.1182e-04, ..., 2.1458e-04, |
|
|
1.8668e-04, 1.7023e-04], |
|
|
[ 1.8826e-03, 2.6131e-04, 4.8971e-04, ..., 1.0023e-03, |
|
|
8.5163e-04, 7.7057e-04], |
|
|
[ 2.8515e-04, 3.9458e-05, 6.5804e-05, ..., 7.5281e-05, |
|
|
7.8142e-05, 8.3566e-05], |
|
|
..., |
|
|
[-2.8276e-04, -4.5151e-05, -1.1039e-04, ..., -2.2113e-04, |
|
|
-1.5962e-04, -2.0111e-04], |
|
|
[ 5.2691e-04, 7.3135e-05, 1.3399e-04, ..., 2.4748e-04, |
|
|
2.1160e-04, 2.0254e-04], |
|
|
[-2.9583e-03, -4.0483e-04, -7.1955e-04, ..., -1.3676e-03, |
|
|
-1.2093e-03, -1.0691e-03]], device='cuda:0') |
|
|
Epoch 48, bias, value: tensor([-1.8857e-02, -2.4018e-01, 8.1123e-03, -1.6882e-01, -1.7916e-04, |
|
|
2.7774e-01, 1.5164e-01], device='cuda:0'), grad: tensor([ 0.0014, 0.0060, 0.0009, 0.0003, -0.0010, 0.0017, -0.0095], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.0024355036129704724 |
|
|
changing lr |
|
|
epoch 47, time 487.96, cls_loss 0.0077 cls_loss_mapping 0.0550 cls_loss_causal 0.6829 re_mapping 0.0517 re_causal 0.0534 |
|
|
Epoch 49, weight, value: tensor([[-0.0218, -0.0139, -0.0071, ..., -0.0099, 0.1009, 0.1234], |
|
|
[ 0.0387, 0.0498, 0.0420, ..., 0.0420, 0.0165, -0.0166], |
|
|
[ 0.0617, 0.0354, 0.0596, ..., 0.0082, 0.0014, 0.0191], |
|
|
..., |
|
|
[-0.1136, -0.0983, -0.0662, ..., 0.0017, 0.0162, -0.0154], |
|
|
[-0.0129, -0.0026, -0.0253, ..., 0.0928, 0.0391, -0.0010], |
|
|
[-0.0224, -0.0280, -0.0218, ..., -0.1249, -0.1569, -0.1228]], |
|
|
device='cuda:0'), grad: tensor([[ 1.1139e-05, 3.9563e-06, 4.2059e-06, ..., 5.1297e-06, |
|
|
-3.5092e-06, -1.6186e-06], |
|
|
[ 1.8522e-05, 5.1521e-06, 6.6422e-06, ..., 5.1931e-06, |
|
|
4.4443e-06, 4.9062e-06], |
|
|
[-1.5345e-03, -5.2452e-04, -6.5041e-04, ..., -2.7299e-04, |
|
|
-2.6488e-04, -3.1328e-04], |
|
|
..., |
|
|
[ 2.2805e-04, 7.3135e-05, 9.2268e-05, ..., 4.5002e-05, |
|
|
4.8876e-05, 5.4628e-05], |
|
|
[ 1.2875e-05, 2.3752e-05, 2.5123e-05, ..., -2.8685e-05, |
|
|
-1.7419e-05, -1.6347e-05], |
|
|
[ 1.1272e-03, 3.7432e-04, 4.6635e-04, ..., 2.1875e-04, |
|
|
2.0671e-04, 2.4176e-04]], device='cuda:0') |
|
|
Epoch 49, bias, value: tensor([-0.0203, -0.2404, 0.0085, -0.1690, 0.0013, 0.2743, 0.1549], |
|
|
device='cuda:0'), grad: tensor([ 2.9311e-05, 4.7684e-05, -3.3817e-03, 3.1281e-04, 5.2595e-04, |
|
|
-8.5831e-05, 2.5520e-03], device='cuda:0') |
|
|
351 |
|
|
0.00224551509273949 |
|
|
changing lr |
|
|
epoch 48, time 489.20, cls_loss 0.0051 cls_loss_mapping 0.0480 cls_loss_causal 0.6975 re_mapping 0.0510 re_causal 0.0528 |
|
|
Epoch 50, weight, value: tensor([[-0.0211, -0.0138, -0.0069, ..., -0.0097, 0.1009, 0.1233], |
|
|
[ 0.0382, 0.0495, 0.0417, ..., 0.0417, 0.0164, -0.0167], |
|
|
[ 0.0627, 0.0359, 0.0599, ..., 0.0083, 0.0016, 0.0193], |
|
|
..., |
|
|
[-0.1137, -0.0980, -0.0661, ..., 0.0015, 0.0159, -0.0155], |
|
|
[-0.0126, -0.0026, -0.0252, ..., 0.0929, 0.0392, -0.0008], |
|
|
[-0.0227, -0.0280, -0.0217, ..., -0.1247, -0.1566, -0.1226]], |
|
|
device='cuda:0'), grad: tensor([[ 1.1253e-03, 1.8346e-04, 4.4370e-04, ..., 3.2568e-04, |
|
|
3.2568e-04, 3.5405e-04], |
|
|
[-1.6794e-03, -2.7108e-04, -6.5708e-04, ..., -4.8637e-04, |
|
|
-4.8971e-04, -5.3120e-04], |
|
|
[ 1.3448e-05, 1.6382e-06, 4.7870e-06, ..., 4.1053e-06, |
|
|
4.4927e-06, 4.7088e-06], |
|
|
..., |
|
|
[ 6.8322e-06, 7.2643e-07, 1.7742e-06, ..., 1.8915e-06, |
|
|
2.3209e-06, 2.2016e-06], |
|
|
[ 1.9342e-05, 3.0510e-06, 7.3537e-06, ..., 5.6028e-06, |
|
|
5.7891e-06, 6.2510e-06], |
|
|
[ 5.0974e-04, 8.1480e-05, 1.9753e-04, ..., 1.4758e-04, |
|
|
1.5008e-04, 1.6248e-04]], device='cuda:0') |
|
|
Epoch 50, bias, value: tensor([-0.0180, -0.2408, 0.0113, -0.1703, -0.0003, 0.2742, 0.1533], |
|
|
device='cuda:0'), grad: tensor([ 3.3379e-03, -4.9934e-03, 4.1991e-05, 8.9183e-06, 2.1189e-05, |
|
|
5.7757e-05, 1.5182e-03], device='cuda:0') |
|
|
351 |
|
|
0.002061073738537637 |
|
|
changing lr |
|
|
epoch 49, time 484.91, cls_loss 0.0058 cls_loss_mapping 0.0522 cls_loss_causal 0.6878 re_mapping 0.0512 re_causal 0.0531 |
|
|
Epoch 51, weight, value: tensor([[-0.0213, -0.0139, -0.0069, ..., -0.0098, 0.1006, 0.1229], |
|
|
[ 0.0383, 0.0494, 0.0416, ..., 0.0417, 0.0165, -0.0165], |
|
|
[ 0.0622, 0.0358, 0.0597, ..., 0.0082, 0.0014, 0.0190], |
|
|
..., |
|
|
[-0.1135, -0.0978, -0.0659, ..., 0.0015, 0.0158, -0.0155], |
|
|
[-0.0126, -0.0026, -0.0252, ..., 0.0928, 0.0393, -0.0006], |
|
|
[-0.0230, -0.0279, -0.0217, ..., -0.1246, -0.1563, -0.1224]], |
|
|
device='cuda:0'), grad: tensor([[ 3.5453e-04, 4.2230e-05, 8.2910e-05, ..., 1.2863e-04, |
|
|
1.9825e-04, 1.6248e-04], |
|
|
[ 4.3213e-05, 5.1521e-06, 8.0839e-06, ..., 2.7746e-05, |
|
|
2.8342e-05, 2.6956e-05], |
|
|
[ 2.9758e-05, 3.7849e-06, 5.3681e-06, ..., 1.8895e-05, |
|
|
1.9521e-05, 1.8239e-05], |
|
|
..., |
|
|
[-3.7861e-04, -3.9101e-05, -9.9599e-05, ..., -7.7963e-05, |
|
|
-2.0134e-04, -1.4389e-04], |
|
|
[-1.8530e-03, -3.1042e-04, -3.1281e-04, ..., -1.2970e-03, |
|
|
-1.1034e-03, -1.1406e-03], |
|
|
[ 1.6336e-03, 2.7657e-04, 2.7704e-04, ..., 1.1320e-03, |
|
|
9.6369e-04, 9.9659e-04]], device='cuda:0') |
|
|
Epoch 51, bias, value: tensor([-0.0185, -0.2392, 0.0103, -0.1683, -0.0005, 0.2735, 0.1519], |
|
|
device='cuda:0'), grad: tensor([ 1.2074e-03, 1.3626e-04, 9.5785e-05, 5.7364e-04, -1.3552e-03, |
|
|
-5.5962e-03, 4.9400e-03], device='cuda:0') |
|
|
351 |
|
|
0.0018825509907063344 |
|
|
changing lr |
|
|
epoch 50, time 487.79, cls_loss 0.0047 cls_loss_mapping 0.0466 cls_loss_causal 0.6943 re_mapping 0.0511 re_causal 0.0530 |
|
|
Epoch 52, weight, value: tensor([[-0.0216, -0.0139, -0.0070, ..., -0.0099, 0.1002, 0.1225], |
|
|
[ 0.0385, 0.0493, 0.0415, ..., 0.0417, 0.0165, -0.0164], |
|
|
[ 0.0622, 0.0358, 0.0596, ..., 0.0081, 0.0013, 0.0190], |
|
|
..., |
|
|
[-0.1131, -0.0975, -0.0657, ..., 0.0015, 0.0157, -0.0155], |
|
|
[-0.0127, -0.0027, -0.0252, ..., 0.0927, 0.0393, -0.0006], |
|
|
[-0.0229, -0.0279, -0.0216, ..., -0.1242, -0.1559, -0.1221]], |
|
|
device='cuda:0'), grad: tensor([[-1.2982e-04, -1.2673e-05, -4.6909e-05, ..., -6.3837e-05, |
|
|
-1.1736e-04, -1.3304e-04], |
|
|
[-2.3592e-04, -4.2081e-05, -3.9279e-05, ..., -8.7023e-05, |
|
|
-8.8811e-05, -6.4790e-05], |
|
|
[-3.4034e-05, -2.2024e-05, -2.2888e-05, ..., 6.2622e-06, |
|
|
9.6485e-06, 3.5074e-06], |
|
|
..., |
|
|
[ 2.4235e-04, 3.5316e-05, 6.6459e-05, ..., 1.0478e-04, |
|
|
1.4913e-04, 1.5426e-04], |
|
|
[-9.1434e-05, 1.8887e-06, -5.2340e-06, ..., -7.2837e-05, |
|
|
-4.9442e-05, -4.6045e-05], |
|
|
[ 1.8239e-04, 2.4110e-05, 3.0428e-05, ..., 9.0301e-05, |
|
|
7.6056e-05, 6.6817e-05]], device='cuda:0') |
|
|
Epoch 52, bias, value: tensor([-0.0198, -0.2375, 0.0106, -0.1674, -0.0003, 0.2722, 0.1516], |
|
|
device='cuda:0'), grad: tensor([-5.4979e-04, -6.8712e-04, -4.6283e-05, 1.8716e-04, 8.5592e-04, |
|
|
-3.2711e-04, 5.6744e-04], device='cuda:0') |
|
|
351 |
|
|
0.0017103063703014388 |
|
|
changing lr |
|
|
epoch 51, time 488.35, cls_loss 0.0062 cls_loss_mapping 0.0457 cls_loss_causal 0.6795 re_mapping 0.0503 re_causal 0.0522 |
|
|
Epoch 53, weight, value: tensor([[-0.0217, -0.0139, -0.0071, ..., -0.0100, 0.1000, 0.1222], |
|
|
[ 0.0387, 0.0492, 0.0415, ..., 0.0417, 0.0166, -0.0162], |
|
|
[ 0.0617, 0.0357, 0.0594, ..., 0.0078, 0.0011, 0.0187], |
|
|
..., |
|
|
[-0.1128, -0.0972, -0.0655, ..., 0.0015, 0.0156, -0.0155], |
|
|
[-0.0125, -0.0027, -0.0252, ..., 0.0927, 0.0394, -0.0004], |
|
|
[-0.0228, -0.0278, -0.0215, ..., -0.1240, -0.1556, -0.1218]], |
|
|
device='cuda:0'), grad: tensor([[-4.3184e-05, -7.6443e-06, -1.2971e-05, ..., -8.9481e-06, |
|
|
-2.2054e-05, -2.4438e-05], |
|
|
[ 3.1084e-05, 5.4389e-06, 8.3521e-06, ..., 8.5458e-06, |
|
|
1.3798e-05, 1.4573e-05], |
|
|
[ 1.3590e-04, 1.5318e-05, 2.5660e-05, ..., 6.3658e-05, |
|
|
4.5538e-05, 4.2319e-05], |
|
|
..., |
|
|
[-1.6212e-04, -1.7196e-05, -2.8029e-05, ..., -8.2672e-05, |
|
|
-5.1290e-05, -4.5180e-05], |
|
|
[ 2.1487e-05, 2.5854e-06, 4.3064e-06, ..., 9.0301e-06, |
|
|
6.7316e-06, 6.4820e-06], |
|
|
[-1.1288e-05, -1.6820e-06, -2.6543e-06, ..., -2.7064e-06, |
|
|
-1.9800e-06, -2.3786e-06]], device='cuda:0') |
|
|
Epoch 53, bias, value: tensor([-2.0178e-02, -2.3575e-01, 9.2648e-03, -1.6716e-01, -2.3270e-04, |
|
|
2.7194e-01, 1.5140e-01], device='cuda:0'), grad: tensor([-1.4365e-04, 9.9480e-05, 4.1962e-04, 8.7023e-05, -4.9305e-04, |
|
|
6.7174e-05, -3.6746e-05], device='cuda:0') |
|
|
351 |
|
|
0.0015446867550656784 |
|
|
changing lr |
|
|
epoch 52, time 486.84, cls_loss 0.0055 cls_loss_mapping 0.0471 cls_loss_causal 0.6569 re_mapping 0.0497 re_causal 0.0515 |
|
|
Epoch 54, weight, value: tensor([[-2.1269e-02, -1.3865e-02, -6.9552e-03, ..., -9.8699e-03, |
|
|
1.0012e-01, 1.2224e-01], |
|
|
[ 3.8476e-02, 4.8979e-02, 4.1318e-02, ..., 4.1490e-02, |
|
|
1.6448e-02, -1.6356e-02], |
|
|
[ 6.1428e-02, 3.5670e-02, 5.9260e-02, ..., 7.6740e-03, |
|
|
9.0968e-04, 1.8529e-02], |
|
|
..., |
|
|
[-1.1242e-01, -9.6968e-02, -6.5334e-02, ..., 1.5697e-03, |
|
|
1.5542e-02, -1.5468e-02], |
|
|
[-1.2210e-02, -2.6671e-03, -2.5152e-02, ..., 9.2897e-02, |
|
|
3.9566e-02, -1.3846e-04], |
|
|
[-2.3079e-02, -2.7743e-02, -2.1526e-02, ..., -1.2388e-01, |
|
|
-1.5544e-01, -1.2169e-01]], device='cuda:0'), grad: tensor([[-6.1512e-04, -2.2143e-05, -1.5163e-04, ..., -1.7321e-04, |
|
|
-3.0017e-04, -3.2377e-04], |
|
|
[ 3.2759e-04, 6.2287e-05, 9.6262e-05, ..., 7.9751e-05, |
|
|
8.0824e-05, 9.2447e-05], |
|
|
[ 1.1148e-03, 3.7837e-04, 4.0412e-04, ..., 3.4690e-04, |
|
|
2.2399e-04, 3.0065e-04], |
|
|
..., |
|
|
[ 6.1750e-04, 8.4043e-05, 1.8024e-04, ..., 1.6785e-04, |
|
|
2.0778e-04, 2.3592e-04], |
|
|
[ 6.5422e-04, 2.2161e-04, 2.3806e-04, ..., 1.9085e-04, |
|
|
1.0508e-04, 1.4663e-04], |
|
|
[ 2.6894e-04, 1.5521e-04, 1.3745e-04, ..., 1.1659e-04, |
|
|
3.2067e-05, 6.8426e-05]], device='cuda:0') |
|
|
Epoch 54, bias, value: tensor([-1.8857e-02, -2.3560e-01, 8.6756e-03, -1.6740e-01, 2.6472e-05, |
|
|
2.7247e-01, 1.4994e-01], device='cuda:0'), grad: tensor([-0.0022, 0.0010, 0.0029, -0.0059, 0.0020, 0.0017, 0.0005], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.001386025680863044 |
|
|
changing lr |
|
|
---------------------saving model at epoch 53---------------------------------------------------- |
|
|
epoch 53, time 490.61, cls_loss 0.0052 cls_loss_mapping 0.0430 cls_loss_causal 0.7053 re_mapping 0.0491 re_causal 0.0511 |
|
|
Epoch 55, weight, value: tensor([[-2.1294e-02, -1.3864e-02, -6.9845e-03, ..., -9.8950e-03, |
|
|
9.9935e-02, 1.2198e-01], |
|
|
[ 3.8158e-02, 4.8834e-02, 4.1167e-02, ..., 4.1333e-02, |
|
|
1.6349e-02, -1.6418e-02], |
|
|
[ 6.1134e-02, 3.5595e-02, 5.9100e-02, ..., 7.4893e-03, |
|
|
7.9670e-04, 1.8389e-02], |
|
|
..., |
|
|
[-1.1226e-01, -9.6775e-02, -6.5204e-02, ..., 1.5158e-03, |
|
|
1.5462e-02, -1.5474e-02], |
|
|
[-1.2280e-02, -2.6849e-03, -2.5158e-02, ..., 9.2897e-02, |
|
|
3.9609e-02, -5.1898e-05], |
|
|
[-2.2534e-02, -2.7644e-02, -2.1351e-02, ..., -1.2353e-01, |
|
|
-1.5505e-01, -1.2130e-01]], device='cuda:0'), grad: tensor([[ 3.6740e-04, 1.1533e-04, 1.4138e-04, ..., 7.1287e-05, |
|
|
6.3181e-05, 8.7380e-05], |
|
|
[ 1.0300e-04, 1.9923e-05, 2.7791e-05, ..., 2.7940e-05, |
|
|
3.5048e-05, 3.9577e-05], |
|
|
[-4.7874e-03, -1.3676e-03, -1.7347e-03, ..., -1.0529e-03, |
|
|
-1.1044e-03, -1.3723e-03], |
|
|
..., |
|
|
[ 2.4045e-04, 5.3853e-05, 6.8605e-05, ..., 5.7757e-05, |
|
|
6.1929e-05, 7.4923e-05], |
|
|
[ 7.4482e-04, 1.7750e-04, 2.0897e-04, ..., 1.5354e-04, |
|
|
1.3340e-04, 1.7190e-04], |
|
|
[ 1.6146e-03, 5.3740e-04, 6.7186e-04, ..., 3.2306e-04, |
|
|
3.2139e-04, 4.0817e-04]], device='cuda:0') |
|
|
Epoch 55, bias, value: tensor([-1.9001e-02, -2.3573e-01, 8.0842e-03, -1.6670e-01, -2.0496e-04, |
|
|
2.7154e-01, 1.5125e-01], device='cuda:0'), grad: tensor([ 0.0010, 0.0003, -0.0144, 0.0048, 0.0007, 0.0022, 0.0053], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.0012346426699819469 |
|
|
changing lr |
|
|
epoch 54, time 488.59, cls_loss 0.0049 cls_loss_mapping 0.0387 cls_loss_causal 0.6318 re_mapping 0.0490 re_causal 0.0508 |
|
|
Epoch 56, weight, value: tensor([[-2.1449e-02, -1.3913e-02, -7.0644e-03, ..., -9.9782e-03, |
|
|
9.9706e-02, 1.2171e-01], |
|
|
[ 3.8039e-02, 4.8727e-02, 4.1080e-02, ..., 4.1237e-02, |
|
|
1.6312e-02, -1.6407e-02], |
|
|
[ 6.1227e-02, 3.5646e-02, 5.9084e-02, ..., 7.4554e-03, |
|
|
7.6650e-04, 1.8350e-02], |
|
|
..., |
|
|
[-1.1180e-01, -9.6573e-02, -6.5017e-02, ..., 1.5741e-03, |
|
|
1.5466e-02, -1.5377e-02], |
|
|
[-1.2142e-02, -2.6935e-03, -2.5135e-02, ..., 9.2940e-02, |
|
|
3.9678e-02, 7.4000e-05], |
|
|
[-2.2755e-02, -2.7644e-02, -2.1360e-02, ..., -1.2344e-01, |
|
|
-1.5488e-01, -1.2119e-01]], device='cuda:0'), grad: tensor([[ 1.8072e-04, 2.8282e-05, 2.8521e-05, ..., 1.1289e-04, |
|
|
1.1140e-04, 1.0514e-04], |
|
|
[ 8.2701e-06, 2.1160e-06, -5.6066e-07, ..., 1.0535e-05, |
|
|
9.0897e-06, 9.0003e-06], |
|
|
[ 2.6870e-04, 4.2528e-05, 4.1068e-05, ..., 1.8346e-04, |
|
|
1.8668e-04, 1.6940e-04], |
|
|
..., |
|
|
[ 1.0824e-04, 2.4393e-05, 2.8685e-05, ..., 4.8369e-05, |
|
|
4.9680e-05, 4.8935e-05], |
|
|
[-6.2132e-04, -8.6784e-05, -7.6830e-05, ..., -4.2725e-04, |
|
|
-4.1485e-04, -3.9148e-04], |
|
|
[ 1.1188e-04, 1.4149e-05, 1.2986e-05, ..., 7.1883e-05, |
|
|
6.5327e-05, 6.6519e-05]], device='cuda:0') |
|
|
Epoch 56, bias, value: tensor([-0.0195, -0.2354, 0.0085, -0.1666, 0.0006, 0.2715, 0.1501], |
|
|
device='cuda:0'), grad: tensor([ 5.4550e-04, 2.2292e-05, 7.8392e-04, -1.4389e-04, 3.1972e-04, |
|
|
-1.8892e-03, 3.6216e-04], device='cuda:0') |
|
|
351 |
|
|
0.0010908425876598518 |
|
|
changing lr |
|
|
epoch 55, time 487.74, cls_loss 0.0051 cls_loss_mapping 0.0425 cls_loss_causal 0.6995 re_mapping 0.0486 re_causal 0.0505 |
|
|
Epoch 57, weight, value: tensor([[-2.1218e-02, -1.3897e-02, -7.0272e-03, ..., -9.9669e-03, |
|
|
9.9634e-02, 1.2159e-01], |
|
|
[ 3.7903e-02, 4.8628e-02, 4.0986e-02, ..., 4.1183e-02, |
|
|
1.6288e-02, -1.6388e-02], |
|
|
[ 6.1276e-02, 3.5676e-02, 5.9051e-02, ..., 7.4110e-03, |
|
|
7.6182e-04, 1.8332e-02], |
|
|
..., |
|
|
[-1.1156e-01, -9.6415e-02, -6.4905e-02, ..., 1.6178e-03, |
|
|
1.5434e-02, -1.5334e-02], |
|
|
[-1.2345e-02, -2.7407e-03, -2.5176e-02, ..., 9.2830e-02, |
|
|
3.9633e-02, 7.1650e-05], |
|
|
[-2.2691e-02, -2.7605e-02, -2.1297e-02, ..., -1.2326e-01, |
|
|
-1.5467e-01, -1.2102e-01]], device='cuda:0'), grad: tensor([[ 3.3617e-04, 3.0011e-05, 6.1452e-05, ..., 1.2374e-04, |
|
|
1.1009e-04, 1.2720e-04], |
|
|
[-1.7416e-04, -1.3128e-05, -3.3319e-05, ..., -8.8274e-05, |
|
|
-1.1045e-04, -1.1855e-04], |
|
|
[-4.5091e-05, -2.1130e-05, -1.8314e-05, ..., -1.2778e-05, |
|
|
-1.1884e-05, -1.6809e-05], |
|
|
..., |
|
|
[-3.4451e-04, -2.5943e-05, -5.5701e-05, ..., -9.2685e-05, |
|
|
-3.5495e-05, -5.2899e-05], |
|
|
[ 5.7518e-05, 9.0301e-06, 1.2822e-05, ..., 1.7747e-05, |
|
|
1.5348e-05, 1.8820e-05], |
|
|
[ 1.1241e-04, 1.0774e-05, 1.9923e-05, ..., 3.4451e-05, |
|
|
2.0042e-05, 2.6032e-05]], device='cuda:0') |
|
|
Epoch 57, bias, value: tensor([-0.0187, -0.2352, 0.0087, -0.1668, 0.0009, 0.2703, 0.1501], |
|
|
device='cuda:0'), grad: tensor([ 0.0011, -0.0005, -0.0001, 0.0002, -0.0012, 0.0002, 0.0004], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.000954915028125264 |
|
|
changing lr |
|
|
epoch 56, time 487.53, cls_loss 0.0046 cls_loss_mapping 0.0386 cls_loss_causal 0.6371 re_mapping 0.0485 re_causal 0.0503 |
|
|
Epoch 58, weight, value: tensor([[-2.1349e-02, -1.3977e-02, -7.1024e-03, ..., -1.0061e-02, |
|
|
9.9467e-02, 1.2139e-01], |
|
|
[ 3.7762e-02, 4.8537e-02, 4.0907e-02, ..., 4.1115e-02, |
|
|
1.6256e-02, -1.6382e-02], |
|
|
[ 6.0902e-02, 3.5573e-02, 5.8886e-02, ..., 7.3153e-03, |
|
|
6.9126e-04, 1.8243e-02], |
|
|
..., |
|
|
[-1.1138e-01, -9.6263e-02, -6.4804e-02, ..., 1.6073e-03, |
|
|
1.5345e-02, -1.5362e-02], |
|
|
[-1.2528e-02, -2.7840e-03, -2.5215e-02, ..., 9.2736e-02, |
|
|
3.9597e-02, 7.3678e-05], |
|
|
[-2.2541e-02, -2.7571e-02, -2.1249e-02, ..., -1.2309e-01, |
|
|
-1.5450e-01, -1.2087e-01]], device='cuda:0'), grad: tensor([[ 8.9228e-05, 8.5235e-06, 1.4402e-05, ..., 4.6790e-05, |
|
|
3.9130e-05, 4.3839e-05], |
|
|
[-4.0841e-04, -3.3736e-05, -6.2108e-05, ..., -1.9312e-04, |
|
|
-1.0115e-04, -1.4365e-04], |
|
|
[ 2.2307e-05, 1.9856e-06, 3.4813e-06, ..., 1.3821e-05, |
|
|
1.5780e-05, 1.5527e-05], |
|
|
..., |
|
|
[ 5.1230e-05, 4.4182e-06, 7.8455e-06, ..., 2.4691e-05, |
|
|
1.4320e-05, 1.9193e-05], |
|
|
[-8.1837e-05, -8.5905e-06, -1.2361e-05, ..., -6.3300e-05, |
|
|
-8.9049e-05, -8.0585e-05], |
|
|
[ 3.2496e-04, 2.7806e-05, 4.8935e-05, ..., 1.6892e-04, |
|
|
1.1927e-04, 1.4389e-04]], device='cuda:0') |
|
|
Epoch 58, bias, value: tensor([-0.0190, -0.2351, 0.0078, -0.1651, 0.0010, 0.2693, 0.1504], |
|
|
device='cuda:0'), grad: tensor([ 3.0637e-04, -1.2379e-03, 9.0182e-05, 1.1995e-05, 1.5855e-04, |
|
|
-3.9029e-04, 1.0614e-03], device='cuda:0') |
|
|
351 |
|
|
0.0008271337313934874 |
|
|
changing lr |
|
|
epoch 57, time 488.10, cls_loss 0.0042 cls_loss_mapping 0.0381 cls_loss_causal 0.6417 re_mapping 0.0488 re_causal 0.0507 |
|
|
Epoch 59, weight, value: tensor([[-2.1408e-02, -1.3989e-02, -7.1333e-03, ..., -1.0104e-02, |
|
|
9.9326e-02, 1.2121e-01], |
|
|
[ 3.7841e-02, 4.8474e-02, 4.0876e-02, ..., 4.1118e-02, |
|
|
1.6304e-02, -1.6304e-02], |
|
|
[ 6.1134e-02, 3.5647e-02, 5.8916e-02, ..., 7.3222e-03, |
|
|
6.9682e-04, 1.8255e-02], |
|
|
..., |
|
|
[-1.1137e-01, -9.6154e-02, -6.4747e-02, ..., 1.5936e-03, |
|
|
1.5274e-02, -1.5383e-02], |
|
|
[-1.2516e-02, -2.8023e-03, -2.5213e-02, ..., 9.2714e-02, |
|
|
3.9614e-02, 1.2627e-04], |
|
|
[-2.2560e-02, -2.7561e-02, -2.1224e-02, ..., -1.2300e-01, |
|
|
-1.5434e-01, -1.2075e-01]], device='cuda:0'), grad: tensor([[-6.8173e-06, 4.6566e-09, -1.0632e-05, ..., -4.0606e-07, |
|
|
-1.4566e-05, -1.8001e-05], |
|
|
[ 2.6083e-04, 4.6819e-05, 7.5758e-05, ..., 6.0946e-05, |
|
|
6.7711e-05, 6.5386e-05], |
|
|
[ 6.2656e-04, 6.6280e-05, 7.1764e-05, ..., 1.4627e-04, |
|
|
1.2958e-04, 1.4138e-04], |
|
|
..., |
|
|
[ 2.2799e-05, 2.8126e-06, 5.0887e-06, ..., 3.4329e-06, |
|
|
6.0424e-06, 6.2250e-06], |
|
|
[ 1.7595e-04, 2.1204e-05, 2.5839e-05, ..., 4.1038e-05, |
|
|
3.7640e-05, 3.9846e-05], |
|
|
[-1.1568e-03, -1.4627e-04, -1.7905e-04, ..., -2.6917e-04, |
|
|
-2.4295e-04, -2.5249e-04]], device='cuda:0') |
|
|
Epoch 59, bias, value: tensor([-0.0193, -0.2344, 0.0084, -0.1653, 0.0006, 0.2690, 0.1501], |
|
|
device='cuda:0'), grad: tensor([-1.3560e-05, 7.8678e-04, 1.9627e-03, 2.4247e-04, 6.6400e-05, |
|
|
5.4741e-04, -3.5896e-03], device='cuda:0') |
|
|
351 |
|
|
0.00070775603199067 |
|
|
changing lr |
|
|
epoch 58, time 484.91, cls_loss 0.0032 cls_loss_mapping 0.0359 cls_loss_causal 0.6249 re_mapping 0.0487 re_causal 0.0506 |
|
|
Epoch 60, weight, value: tensor([[-0.0212, -0.0140, -0.0071, ..., -0.0101, 0.0993, 0.1212], |
|
|
[ 0.0379, 0.0484, 0.0408, ..., 0.0411, 0.0163, -0.0163], |
|
|
[ 0.0611, 0.0356, 0.0589, ..., 0.0073, 0.0007, 0.0182], |
|
|
..., |
|
|
[-0.1116, -0.0961, -0.0648, ..., 0.0015, 0.0151, -0.0155], |
|
|
[-0.0125, -0.0028, -0.0252, ..., 0.0927, 0.0397, 0.0002], |
|
|
[-0.0226, -0.0275, -0.0212, ..., -0.1229, -0.1542, -0.1207]], |
|
|
device='cuda:0'), grad: tensor([[ 4.3243e-05, 3.6228e-06, 9.9689e-06, ..., 4.7050e-06, |
|
|
9.7156e-06, 7.1637e-06], |
|
|
[ 1.1969e-04, 9.7975e-06, 2.7657e-05, ..., 1.1198e-05, |
|
|
2.6122e-05, 1.8969e-05], |
|
|
[ 1.9163e-05, 1.2685e-06, 4.2096e-06, ..., 2.0973e-06, |
|
|
4.4741e-06, 3.3099e-06], |
|
|
..., |
|
|
[ 1.0237e-05, 1.1064e-06, 2.3302e-06, ..., 2.5518e-06, |
|
|
3.0324e-06, 2.6748e-06], |
|
|
[ 6.7830e-05, 5.7332e-06, 1.5661e-05, ..., 7.5512e-06, |
|
|
1.5438e-05, 1.1593e-05], |
|
|
[-2.8563e-04, -2.3916e-05, -6.5863e-05, ..., -3.0905e-05, |
|
|
-6.4492e-05, -4.7982e-05]], device='cuda:0') |
|
|
Epoch 60, bias, value: tensor([-0.0185, -0.2338, 0.0084, -0.1651, -0.0005, 0.2689, 0.1499], |
|
|
device='cuda:0'), grad: tensor([ 1.4329e-04, 3.9768e-04, 6.4731e-05, 8.4817e-05, 3.3200e-05, |
|
|
2.2471e-04, -9.4748e-04], device='cuda:0') |
|
|
351 |
|
|
0.0005970223407163104 |
|
|
changing lr |
|
|
epoch 59, time 492.38, cls_loss 0.0040 cls_loss_mapping 0.0384 cls_loss_causal 0.6520 re_mapping 0.0485 re_causal 0.0505 |
|
|
Epoch 61, weight, value: tensor([[-0.0212, -0.0140, -0.0071, ..., -0.0101, 0.0992, 0.1211], |
|
|
[ 0.0379, 0.0484, 0.0408, ..., 0.0411, 0.0163, -0.0162], |
|
|
[ 0.0611, 0.0357, 0.0589, ..., 0.0073, 0.0006, 0.0182], |
|
|
..., |
|
|
[-0.1115, -0.0960, -0.0647, ..., 0.0015, 0.0151, -0.0155], |
|
|
[-0.0122, -0.0028, -0.0252, ..., 0.0928, 0.0397, 0.0003], |
|
|
[-0.0227, -0.0275, -0.0212, ..., -0.1229, -0.1541, -0.1206]], |
|
|
device='cuda:0'), grad: tensor([[-2.4581e-04, -2.5958e-05, -5.0008e-05, ..., -4.5031e-05, |
|
|
-1.1921e-04, -8.9407e-05], |
|
|
[ 8.8871e-05, 5.2899e-06, 7.9200e-06, ..., 3.9667e-05, |
|
|
2.7984e-05, 2.2709e-05], |
|
|
[ 2.7013e-04, 3.2693e-05, 3.7163e-05, ..., 1.3626e-04, |
|
|
1.0562e-04, 8.9467e-05], |
|
|
..., |
|
|
[ 5.0497e-04, 4.4256e-05, 7.9632e-05, ..., 1.6642e-04, |
|
|
2.0587e-04, 1.6856e-04], |
|
|
[-3.8967e-03, -2.2078e-04, -4.4274e-04, ..., -1.4009e-03, |
|
|
-1.1187e-03, -9.2793e-04], |
|
|
[ 3.1357e-03, 1.4865e-04, 3.4857e-04, ..., 1.0357e-03, |
|
|
8.4352e-04, 6.8998e-04]], device='cuda:0') |
|
|
Epoch 61, bias, value: tensor([-0.0187, -0.2335, 0.0085, -0.1652, -0.0006, 0.2693, 0.1494], |
|
|
device='cuda:0'), grad: tensor([-0.0008, 0.0003, 0.0008, 0.0005, 0.0016, -0.0127, 0.0103], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.0004951556604879052 |
|
|
changing lr |
|
|
epoch 60, time 489.26, cls_loss 0.0038 cls_loss_mapping 0.0370 cls_loss_causal 0.6870 re_mapping 0.0484 re_causal 0.0504 |
|
|
Epoch 62, weight, value: tensor([[-0.0214, -0.0140, -0.0071, ..., -0.0102, 0.0991, 0.1210], |
|
|
[ 0.0379, 0.0483, 0.0408, ..., 0.0411, 0.0163, -0.0162], |
|
|
[ 0.0610, 0.0357, 0.0588, ..., 0.0072, 0.0006, 0.0182], |
|
|
..., |
|
|
[-0.1114, -0.0959, -0.0647, ..., 0.0015, 0.0151, -0.0155], |
|
|
[-0.0122, -0.0028, -0.0252, ..., 0.0928, 0.0397, 0.0003], |
|
|
[-0.0226, -0.0275, -0.0212, ..., -0.1228, -0.1540, -0.1205]], |
|
|
device='cuda:0'), grad: tensor([[ 2.2188e-05, 8.2105e-06, 8.0541e-06, ..., 3.5763e-06, |
|
|
9.4622e-07, 1.9614e-06], |
|
|
[-4.1723e-05, -2.1998e-06, -4.1761e-06, ..., -1.7449e-05, |
|
|
-1.4827e-05, -1.3500e-05], |
|
|
[-4.8637e-04, -1.8167e-04, -1.8668e-04, ..., -9.2447e-05, |
|
|
-6.3539e-05, -8.6546e-05], |
|
|
..., |
|
|
[ 7.5512e-06, 1.9222e-06, 2.1514e-06, ..., 2.1532e-06, |
|
|
1.8720e-06, 1.9912e-06], |
|
|
[ 1.2040e-04, 4.2289e-05, 4.3929e-05, ..., 2.5019e-05, |
|
|
1.8135e-05, 2.3142e-05], |
|
|
[ 1.1921e-04, 3.4183e-05, 3.6806e-05, ..., 2.8491e-05, |
|
|
2.1860e-05, 2.5377e-05]], device='cuda:0') |
|
|
Epoch 62, bias, value: tensor([-0.0192, -0.2331, 0.0083, -0.1652, -0.0005, 0.2692, 0.1497], |
|
|
device='cuda:0'), grad: tensor([ 5.2631e-05, -1.3030e-04, -1.1740e-03, 6.2513e-04, 2.0266e-05, |
|
|
2.9707e-04, 3.0971e-04], device='cuda:0') |
|
|
351 |
|
|
0.00040236113724274745 |
|
|
changing lr |
|
|
epoch 61, time 489.43, cls_loss 0.0045 cls_loss_mapping 0.0367 cls_loss_causal 0.6441 re_mapping 0.0480 re_causal 0.0499 |
|
|
Epoch 63, weight, value: tensor([[-0.0213, -0.0140, -0.0071, ..., -0.0101, 0.0991, 0.1209], |
|
|
[ 0.0378, 0.0483, 0.0407, ..., 0.0410, 0.0163, -0.0162], |
|
|
[ 0.0609, 0.0356, 0.0588, ..., 0.0072, 0.0006, 0.0181], |
|
|
..., |
|
|
[-0.1114, -0.0959, -0.0646, ..., 0.0015, 0.0151, -0.0155], |
|
|
[-0.0123, -0.0028, -0.0252, ..., 0.0927, 0.0397, 0.0003], |
|
|
[-0.0224, -0.0275, -0.0211, ..., -0.1227, -0.1539, -0.1204]], |
|
|
device='cuda:0'), grad: tensor([[ 1.2529e-04, 2.2218e-05, 2.8625e-05, ..., 3.4153e-05, |
|
|
2.4438e-05, 2.4959e-05], |
|
|
[ 1.5825e-05, 1.6261e-06, 1.8962e-06, ..., 8.0094e-06, |
|
|
6.0126e-06, 6.2659e-06], |
|
|
[-2.7156e-04, -8.5413e-05, -9.2268e-05, ..., -2.8670e-05, |
|
|
-1.1556e-05, -9.0450e-06], |
|
|
..., |
|
|
[-2.2966e-06, 7.2680e-06, 4.5262e-06, ..., -5.1968e-07, |
|
|
2.3246e-06, -6.3702e-07], |
|
|
[-3.3855e-05, 2.4676e-05, 1.8314e-05, ..., -9.7752e-05, |
|
|
-9.1434e-05, -9.2506e-05], |
|
|
[ 4.2379e-05, 4.2804e-06, 8.4341e-06, ..., 4.6492e-05, |
|
|
4.1276e-05, 4.2289e-05]], device='cuda:0') |
|
|
Epoch 63, bias, value: tensor([-0.0190, -0.2331, 0.0080, -0.1650, -0.0006, 0.2687, 0.1501], |
|
|
device='cuda:0'), grad: tensor([ 3.8457e-04, 5.1320e-05, -7.3528e-04, 3.6812e-04, -5.8979e-05, |
|
|
-1.7655e-04, 1.6725e-04], device='cuda:0') |
|
|
351 |
|
|
0.00031882564680131423 |
|
|
changing lr |
|
|
epoch 62, time 483.86, cls_loss 0.0042 cls_loss_mapping 0.0371 cls_loss_causal 0.6606 re_mapping 0.0480 re_causal 0.0500 |
|
|
Epoch 64, weight, value: tensor([[-0.0214, -0.0140, -0.0071, ..., -0.0102, 0.0990, 0.1208], |
|
|
[ 0.0379, 0.0483, 0.0407, ..., 0.0410, 0.0163, -0.0162], |
|
|
[ 0.0608, 0.0356, 0.0587, ..., 0.0072, 0.0006, 0.0181], |
|
|
..., |
|
|
[-0.1112, -0.0958, -0.0646, ..., 0.0016, 0.0151, -0.0154], |
|
|
[-0.0124, -0.0028, -0.0252, ..., 0.0927, 0.0397, 0.0003], |
|
|
[-0.0224, -0.0275, -0.0211, ..., -0.1227, -0.1538, -0.1203]], |
|
|
device='cuda:0'), grad: tensor([[ 2.7698e-06, 5.7667e-06, 3.1590e-06, ..., -3.6303e-06, |
|
|
-1.3135e-05, -1.1802e-05], |
|
|
[ 4.9829e-05, 7.4059e-06, 1.1750e-05, ..., 9.0599e-06, |
|
|
1.1452e-05, 1.4067e-05], |
|
|
[-1.3173e-04, -5.1230e-05, -5.5671e-05, ..., -1.5497e-05, |
|
|
-5.6326e-06, -1.2428e-05], |
|
|
..., |
|
|
[ 2.7090e-05, 5.1446e-06, 6.8285e-06, ..., 3.5875e-06, |
|
|
2.9411e-06, 4.6305e-06], |
|
|
[ 5.3972e-05, 1.4357e-05, 1.7211e-05, ..., 3.2391e-06, |
|
|
7.2457e-07, 4.1276e-06], |
|
|
[-5.3346e-05, 9.4622e-07, -2.8946e-06, ..., -4.3772e-06, |
|
|
-1.2368e-06, -6.2324e-06]], device='cuda:0') |
|
|
Epoch 64, bias, value: tensor([-1.9339e-02, -2.3265e-01, 7.7978e-03, -1.6499e-01, -7.8075e-05, |
|
|
2.6835e-01, 1.5003e-01], device='cuda:0'), grad: tensor([-2.4382e-06, 1.5533e-04, -3.2139e-04, 1.3471e-04, 8.2672e-05, |
|
|
1.5342e-04, -2.0194e-04], device='cuda:0') |
|
|
351 |
|
|
0.0002447174185242325 |
|
|
changing lr |
|
|
epoch 63, time 490.22, cls_loss 0.0036 cls_loss_mapping 0.0347 cls_loss_causal 0.6470 re_mapping 0.0480 re_causal 0.0500 |
|
|
Epoch 65, weight, value: tensor([[-0.0214, -0.0140, -0.0071, ..., -0.0102, 0.0990, 0.1208], |
|
|
[ 0.0379, 0.0483, 0.0407, ..., 0.0410, 0.0163, -0.0162], |
|
|
[ 0.0608, 0.0356, 0.0587, ..., 0.0071, 0.0006, 0.0181], |
|
|
..., |
|
|
[-0.1111, -0.0958, -0.0645, ..., 0.0016, 0.0151, -0.0154], |
|
|
[-0.0124, -0.0028, -0.0252, ..., 0.0927, 0.0397, 0.0003], |
|
|
[-0.0224, -0.0275, -0.0211, ..., -0.1226, -0.1538, -0.1203]], |
|
|
device='cuda:0'), grad: tensor([[ 5.2881e-04, 4.0114e-05, 1.0240e-04, ..., 1.3661e-04, |
|
|
1.6046e-04, 1.8704e-04], |
|
|
[-8.0919e-04, -6.5684e-05, -7.5638e-05, ..., -4.0150e-04, |
|
|
-2.9635e-04, -3.4237e-04], |
|
|
[ 7.9060e-04, 5.5760e-05, 1.4138e-04, ..., 2.0337e-04, |
|
|
2.2817e-04, 2.6584e-04], |
|
|
..., |
|
|
[-1.2274e-03, -8.4937e-05, -2.8634e-04, ..., -2.2125e-04, |
|
|
-3.6216e-04, -4.2987e-04], |
|
|
[ 3.4070e-04, 2.7388e-05, 4.6611e-05, ..., 1.2219e-04, |
|
|
1.0574e-04, 1.2124e-04], |
|
|
[ 1.5497e-04, 1.1295e-05, 2.6509e-05, ..., 1.0812e-04, |
|
|
9.7275e-05, 1.2004e-04]], device='cuda:0') |
|
|
Epoch 65, bias, value: tensor([-1.9334e-02, -2.3244e-01, 7.7977e-03, -1.6502e-01, 1.9134e-05, |
|
|
2.6818e-01, 1.4991e-01], device='cuda:0'), grad: tensor([ 0.0018, -0.0027, 0.0026, 0.0007, -0.0042, 0.0011, 0.0006], |
|
|
device='cuda:0') |
|
|
351 |
|
|
0.0001801856965207339 |
|
|
changing lr |
|
|
epoch 64, time 492.97, cls_loss 0.0040 cls_loss_mapping 0.0365 cls_loss_causal 0.6452 re_mapping 0.0477 re_causal 0.0498 |
|
|
Epoch 66, weight, value: tensor([[-0.0214, -0.0140, -0.0071, ..., -0.0102, 0.0990, 0.1208], |
|
|
[ 0.0379, 0.0482, 0.0407, ..., 0.0410, 0.0163, -0.0162], |
|
|
[ 0.0608, 0.0356, 0.0587, ..., 0.0071, 0.0006, 0.0181], |
|
|
..., |
|
|
[-0.1110, -0.0957, -0.0645, ..., 0.0016, 0.0151, -0.0154], |
|
|
[-0.0124, -0.0028, -0.0252, ..., 0.0927, 0.0397, 0.0003], |
|
|
[-0.0224, -0.0275, -0.0211, ..., -0.1226, -0.1538, -0.1203]], |
|
|
device='cuda:0'), grad: tensor([[ 4.5419e-05, 6.3926e-06, 1.3947e-05, ..., 1.1124e-05, |
|
|
1.1191e-05, 1.2450e-05], |
|
|
[-1.0870e-05, -2.7269e-06, -2.4606e-06, ..., -3.9451e-06, |
|
|
-4.3772e-06, -5.6140e-06], |
|
|
[ 4.3422e-05, 5.9567e-06, 1.4521e-05, ..., 7.3612e-06, |
|
|
5.9381e-06, 7.0743e-06], |
|
|
..., |
|
|
[ 1.1817e-05, 2.1253e-06, 4.8801e-06, ..., -5.6345e-07, |
|
|
-2.2799e-06, -1.7304e-06], |
|
|
[ 2.2992e-05, 3.1963e-06, 7.7263e-06, ..., 3.9227e-06, |
|
|
3.1628e-06, 3.7048e-06], |
|
|
[-1.1885e-04, -1.5810e-05, -4.0680e-05, ..., -1.8969e-05, |
|
|
-1.4514e-05, -1.6913e-05]], device='cuda:0') |
|
|
Epoch 66, bias, value: tensor([-1.9370e-02, -2.3244e-01, 7.9069e-03, -1.6501e-01, 9.5384e-05, |
|
|
2.6809e-01, 1.4984e-01], device='cuda:0'), grad: tensor([ 1.8620e-04, -2.0117e-05, 1.9896e-04, 2.7701e-05, 6.4075e-05, |
|
|
1.0514e-04, -5.6219e-04], device='cuda:0') |
|
|
351 |
|
|
0.000125360439090882 |
|
|
changing lr |
|
|
epoch 65, time 491.54, cls_loss 0.0036 cls_loss_mapping 0.0321 cls_loss_causal 0.6499 re_mapping 0.0478 re_causal 0.0498 |
|
|
Epoch 67, weight, value: tensor([[-0.0214, -0.0140, -0.0071, ..., -0.0102, 0.0990, 0.1208], |
|
|
[ 0.0379, 0.0482, 0.0407, ..., 0.0410, 0.0163, -0.0162], |
|
|
[ 0.0608, 0.0356, 0.0587, ..., 0.0071, 0.0005, 0.0181], |
|
|
..., |
|
|
[-0.1110, -0.0957, -0.0645, ..., 0.0016, 0.0151, -0.0154], |
|
|
[-0.0124, -0.0028, -0.0252, ..., 0.0927, 0.0397, 0.0003], |
|
|
[-0.0224, -0.0275, -0.0211, ..., -0.1226, -0.1538, -0.1203]], |
|
|
device='cuda:0'), grad: tensor([[ 7.2360e-05, 5.2638e-06, 1.7837e-05, ..., 1.5587e-05, |
|
|
2.2396e-05, 2.0936e-05], |
|
|
[ 9.2804e-05, 4.1537e-06, 2.0787e-05, ..., 2.1264e-05, |
|
|
3.0354e-05, 2.8640e-05], |
|
|
[-9.5293e-06, -6.4522e-06, -6.5379e-06, ..., -1.0915e-06, |
|
|
-2.5313e-06, -3.1143e-06], |
|
|
..., |
|
|
[ 9.6187e-06, 5.1688e-07, 2.1979e-06, ..., 1.9427e-06, |
|
|
3.2876e-06, 2.9653e-06], |
|
|
[ 7.5519e-05, 4.3809e-06, 1.7673e-05, ..., 1.7107e-05, |
|
|
2.4602e-05, 2.3276e-05], |
|
|
[-2.6488e-04, -9.6783e-06, -5.7906e-05, ..., -6.0499e-05, |
|
|
-8.6486e-05, -8.0824e-05]], device='cuda:0') |
|
|
Epoch 67, bias, value: tensor([-1.9302e-02, -2.3238e-01, 7.8917e-03, -1.6504e-01, 1.5820e-04, |
|
|
2.6802e-01, 1.4977e-01], device='cuda:0'), grad: tensor([ 2.1708e-04, 2.8634e-04, -1.5043e-05, 7.3016e-05, 2.8566e-05, |
|
|
2.3031e-04, -8.1968e-04], device='cuda:0') |
|
|
351 |
|
|
8.03520570068517e-05 |
|
|
changing lr |
|
|
epoch 66, time 488.28, cls_loss 0.0043 cls_loss_mapping 0.0363 cls_loss_causal 0.6060 re_mapping 0.0477 re_causal 0.0496 |
|
|
Epoch 68, weight, value: tensor([[-0.0214, -0.0140, -0.0071, ..., -0.0102, 0.0990, 0.1208], |
|
|
[ 0.0379, 0.0482, 0.0407, ..., 0.0410, 0.0163, -0.0162], |
|
|
[ 0.0609, 0.0356, 0.0587, ..., 0.0071, 0.0006, 0.0181], |
|
|
..., |
|
|
[-0.1110, -0.0957, -0.0645, ..., 0.0016, 0.0151, -0.0154], |
|
|
[-0.0124, -0.0028, -0.0252, ..., 0.0927, 0.0397, 0.0003], |
|
|
[-0.0225, -0.0275, -0.0211, ..., -0.1226, -0.1538, -0.1203]], |
|
|
device='cuda:0'), grad: tensor([[ 1.7738e-04, 3.6508e-05, 4.7892e-05, ..., 3.3021e-05, |
|
|
5.1409e-05, 5.8830e-05], |
|
|
[-1.1176e-04, -1.2986e-05, -2.6107e-05, ..., -6.1005e-05, |
|
|
-7.1406e-05, -7.7963e-05], |
|
|
[-1.8620e-04, -4.3780e-05, -5.1647e-05, ..., -1.6481e-05, |
|
|
-4.3839e-05, -4.8906e-05], |
|
|
..., |
|
|
[ 9.0122e-05, 1.1779e-05, 2.0489e-05, ..., 2.9355e-05, |
|
|
3.2455e-05, 3.6657e-05], |
|
|
[ 1.6510e-04, 2.1622e-05, 3.7253e-05, ..., 5.1230e-05, |
|
|
5.6177e-05, 6.3777e-05], |
|
|
[-2.3258e-04, -2.8268e-05, -5.0306e-05, ..., -5.5730e-05, |
|
|
-4.7892e-05, -5.9724e-05]], device='cuda:0') |
|
|
Epoch 68, bias, value: tensor([-1.9269e-02, -2.3232e-01, 8.0016e-03, -1.6504e-01, 1.6669e-04, |
|
|
2.6794e-01, 1.4963e-01], device='cuda:0'), grad: tensor([ 0.0005, -0.0004, -0.0005, 0.0003, 0.0003, 0.0005, -0.0007], |
|
|
device='cuda:0') |
|
|
351 |
|
|
4.5251191160326525e-05 |
|
|
changing lr |
|
|
epoch 67, time 486.30, cls_loss 0.0037 cls_loss_mapping 0.0329 cls_loss_causal 0.6237 re_mapping 0.0478 re_causal 0.0498 |
|
|
Epoch 69, weight, value: tensor([[-0.0214, -0.0140, -0.0071, ..., -0.0102, 0.0990, 0.1208], |
|
|
[ 0.0379, 0.0482, 0.0407, ..., 0.0410, 0.0163, -0.0162], |
|
|
[ 0.0609, 0.0356, 0.0587, ..., 0.0071, 0.0006, 0.0181], |
|
|
..., |
|
|
[-0.1110, -0.0957, -0.0645, ..., 0.0016, 0.0151, -0.0154], |
|
|
[-0.0124, -0.0028, -0.0252, ..., 0.0927, 0.0397, 0.0003], |
|
|
[-0.0225, -0.0275, -0.0211, ..., -0.1226, -0.1538, -0.1203]], |
|
|
device='cuda:0'), grad: tensor([[ 8.3113e-04, 7.4923e-05, 1.7071e-04, ..., 2.6250e-04, |
|
|
3.9792e-04, 3.2210e-04], |
|
|
[-9.2602e-04, -8.4460e-05, -1.9145e-04, ..., -2.9302e-04, |
|
|
-4.3583e-04, -3.5167e-04], |
|
|
[-5.4128e-06, -2.5220e-06, -2.1048e-06, ..., 1.5944e-06, |
|
|
1.8477e-06, 1.1027e-06], |
|
|
..., |
|
|
[ 2.8223e-05, 2.8014e-06, 6.0685e-06, ..., 8.5831e-06, |
|
|
1.2398e-05, 1.0081e-05], |
|
|
[ 2.3380e-05, 2.6226e-06, 5.3160e-06, ..., 7.4133e-06, |
|
|
8.4266e-06, 6.3777e-06], |
|
|
[ 4.0412e-05, 4.8652e-06, 9.2611e-06, ..., 1.1846e-05, |
|
|
1.4052e-05, 1.0982e-05]], device='cuda:0') |
|
|
Epoch 69, bias, value: tensor([-1.9275e-02, -2.3228e-01, 8.0206e-03, -1.6505e-01, 1.7925e-04, |
|
|
2.6789e-01, 1.4963e-01], device='cuda:0'), grad: tensor([ 2.4948e-03, -2.7771e-03, -1.3143e-05, 2.4006e-05, 8.4937e-05, |
|
|
6.9320e-05, 1.1951e-04], device='cuda:0') |
|
|
351 |
|
|
2.0128530023804673e-05 |
|
|
changing lr |
|
|
epoch 68, time 488.59, cls_loss 0.0038 cls_loss_mapping 0.0348 cls_loss_causal 0.6464 re_mapping 0.0477 re_causal 0.0497 |
|
|
Epoch 70, weight, value: tensor([[-0.0214, -0.0140, -0.0071, ..., -0.0102, 0.0990, 0.1208], |
|
|
[ 0.0379, 0.0482, 0.0407, ..., 0.0410, 0.0163, -0.0162], |
|
|
[ 0.0609, 0.0356, 0.0587, ..., 0.0071, 0.0006, 0.0181], |
|
|
..., |
|
|
[-0.1110, -0.0957, -0.0645, ..., 0.0016, 0.0151, -0.0154], |
|
|
[-0.0124, -0.0028, -0.0252, ..., 0.0927, 0.0397, 0.0003], |
|
|
[-0.0224, -0.0275, -0.0211, ..., -0.1226, -0.1538, -0.1203]], |
|
|
device='cuda:0'), grad: tensor([[ 2.3559e-05, 6.6012e-06, 1.9409e-06, ..., -3.4779e-05, |
|
|
-7.4029e-05, -6.5267e-05], |
|
|
[ 2.2388e-04, 2.8431e-05, 4.1455e-05, ..., 3.1710e-05, |
|
|
5.0277e-05, 5.2512e-05], |
|
|
[-5.5850e-05, -4.7028e-05, -3.0845e-05, ..., -2.1219e-05, |
|
|
1.6883e-05, 5.6773e-06], |
|
|
..., |
|
|
[ 7.6115e-05, 1.3202e-05, 1.9178e-05, ..., 2.7522e-05, |
|
|
4.1485e-05, 4.3154e-05], |
|
|
[ 4.0293e-04, 5.9634e-05, 8.0347e-05, ..., 5.4657e-05, |
|
|
7.3731e-05, 7.9453e-05], |
|
|
[-8.0967e-04, -9.9838e-05, -1.5163e-04, ..., -9.5963e-05, |
|
|
-1.4210e-04, -1.5724e-04]], device='cuda:0') |
|
|
Epoch 70, bias, value: tensor([-1.9308e-02, -2.3226e-01, 8.0159e-03, -1.6505e-01, 1.7581e-04, |
|
|
2.6787e-01, 1.4967e-01], device='cuda:0'), grad: tensor([ 5.3346e-05, 7.1144e-04, -1.0407e-04, 4.0460e-04, 2.4176e-04, |
|
|
1.2579e-03, -2.5673e-03], device='cuda:0') |
|
|
351 |
|
|
5.034667293427056e-06 |
|
|
changing lr |
|
|
epoch 69, time 490.70, cls_loss 0.0033 cls_loss_mapping 0.0336 cls_loss_causal 0.6347 re_mapping 0.0477 re_causal 0.0497 |
|
|
---------------------saving last model at epoch 69---------------------------------------------------- |
|
|
/home/yuqian_fu |
|
|
{'gpu': '0', 'svroot': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//cartoon/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1', 'source_domain': 'cartoon', 'svpath': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//cartoon/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1/cartoon_16factor_best_test_check.csv', 'factor_num': 16, 'epoch': 'best', 'stride': 5, 'eval_mapping': False, 'network': 'resnet18'} |
|
|
-------------------------------------loading pretrain weights---------------------------------- |
|
|
loading weight of best |
|
|
randm: False |
|
|
stride: 5 |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
columns: ['cartoon', 'art_painting', 'photo', 'sketch'] |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/cartoon_test.hdf5 torch.Size([2344, 3, 227, 227]) torch.Size([2344]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/art_painting_test.hdf5 torch.Size([2048, 3, 227, 227]) torch.Size([2048]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/photo_test.hdf5 torch.Size([1670, 3, 227, 227]) torch.Size([1670]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/sketch_test.hdf5 torch.Size([3929, 3, 227, 227]) torch.Size([3929]) |
|
|
cartoon art_painting photo sketch Avg |
|
|
w/o do (original x) 99.061433 54.6875 66.047904 60.066175 60.267193 |
|
|
cartoon art_painting photo sketch Avg |
|
|
do 99.146758 55.712891 72.45509 63.425808 63.864596 |
|
|
|