|
|
/home/yuqian_fu |
|
|
here1 |
|
|
here2 |
|
|
{'gpu': '0', 'data': 'art_painting', 'ntr': None, 'translate': None, 'autoaug': 'CA_multiple', 'n': 3, 'stride': 5, 'factor_num': 16, 'epochs': 70, 'nbatch': 100, 'batchsize': 6, 'lr': 0.01, 'lr_scheduler': 'cosine', 'svroot': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//art_painting/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1', 'clsadapt': True, 'lambda_causal': 1.0, 'lambda_re': 1.0, 'randm': True, 'randn': True, 'network': 'resnet18'} |
|
|
stride: 5 |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/art_painting_train.hdf5 torch.Size([1840, 3, 227, 227]) torch.Size([1840]) |
|
|
--------------------------CA_multiple-------------------------- |
|
|
---------------------------16 factors----------------- |
|
|
randm: True |
|
|
randn: True |
|
|
n: 3 |
|
|
randm: False |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/art_painting_val.hdf5 torch.Size([208, 3, 227, 227]) torch.Size([208]) |
|
|
-------------------------------------loading pretrain weights---------------------------------- |
|
|
Epoch 1, weight, value: tensor([[ 0.0106, -0.0051, 0.0193, ..., 0.0075, 0.0158, -0.0062], |
|
|
[-0.0130, -0.0094, -0.0199, ..., 0.0148, 0.0091, 0.0089], |
|
|
[ 0.0217, 0.0123, -0.0198, ..., -0.0208, 0.0086, -0.0179], |
|
|
..., |
|
|
[-0.0085, -0.0153, 0.0125, ..., 0.0016, 0.0065, 0.0184], |
|
|
[ 0.0111, -0.0125, 0.0214, ..., -0.0080, 0.0129, -0.0186], |
|
|
[-0.0095, 0.0164, 0.0024, ..., -0.0037, 0.0123, 0.0207]], |
|
|
device='cuda:0'), grad: None |
|
|
Epoch 1, bias, value: tensor([ 0.0156, -0.0099, -0.0060, -0.0064, -0.0070, 0.0143, 0.0059], |
|
|
device='cuda:0'), grad: None |
|
|
306 |
|
|
0.01 |
|
|
changing lr |
|
|
---------------------saving model at epoch 0---------------------------------------------------- |
|
|
epoch 0, time 421.76, cls_loss 11.6044 cls_loss_mapping 1.8986 cls_loss_causal 1.9230 re_mapping 0.4058 re_causal 0.4051 |
|
|
Epoch 2, weight, value: tensor([[ 0.1878, 0.1646, 0.2100, ..., -0.0196, 0.0020, -0.0184], |
|
|
[-0.0649, -0.0708, -0.0772, ..., 0.0933, 0.0843, 0.0784], |
|
|
[-0.0386, -0.0153, -0.0629, ..., 0.0813, 0.0574, 0.0432], |
|
|
..., |
|
|
[-0.0308, -0.0403, -0.0028, ..., 0.0012, 0.0620, 0.0384], |
|
|
[ 0.0231, 0.0282, 0.0095, ..., -0.1716, -0.1842, -0.2046], |
|
|
[-0.0140, 0.0138, -0.0027, ..., -0.0242, -0.0221, 0.0152]], |
|
|
device='cuda:0'), grad: tensor([[-0.2456, -0.2186, -0.2211, ..., -0.0522, -0.0388, -0.0158], |
|
|
[-0.0894, -0.0504, -0.0532, ..., -0.0684, -0.0511, -0.0178], |
|
|
[ 0.0267, 0.0195, 0.0190, ..., 0.0144, 0.0101, 0.0034], |
|
|
..., |
|
|
[-0.0347, -0.0211, -0.0188, ..., -0.0300, -0.0179, -0.0050], |
|
|
[ 0.0645, 0.0405, 0.0429, ..., 0.0440, 0.0329, 0.0115], |
|
|
[ 0.2252, 0.1888, 0.1958, ..., 0.0678, 0.0501, 0.0192]], |
|
|
device='cuda:0') |
|
|
Epoch 2, bias, value: tensor([-0.0260, 0.0482, -0.0075, -0.0372, 0.0386, -0.0156, 0.0055], |
|
|
device='cuda:0'), grad: tensor([-0.2336, -0.2844, 0.0637, 0.0666, -0.1208, 0.2062, 0.3025], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009994965332706574 |
|
|
changing lr |
|
|
epoch 1, time 424.69, cls_loss 2.0222 cls_loss_mapping 1.6048 cls_loss_causal 1.8042 re_mapping 0.1042 re_causal 0.1037 |
|
|
Epoch 3, weight, value: tensor([[ 0.1727, 0.1568, 0.1972, ..., -0.0276, -0.0070, -0.0236], |
|
|
[-0.1169, -0.1227, -0.1246, ..., 0.0940, 0.0914, 0.0822], |
|
|
[-0.0560, -0.0360, -0.0763, ..., 0.0755, 0.0478, 0.0340], |
|
|
..., |
|
|
[-0.0177, -0.0277, 0.0238, ..., -0.0073, 0.0621, 0.0368], |
|
|
[ 0.0245, 0.0343, 0.0103, ..., -0.1734, -0.1889, -0.2067], |
|
|
[ 0.0536, 0.0736, 0.0462, ..., -0.0229, -0.0228, 0.0129]], |
|
|
device='cuda:0'), grad: tensor([[ 4.2572e-02, 3.2379e-02, 3.3173e-02, ..., 1.3580e-02, |
|
|
9.7656e-03, 4.5853e-03], |
|
|
[ 2.6875e-03, 1.4639e-03, 1.4391e-03, ..., 1.2617e-03, |
|
|
9.5606e-04, 5.2929e-04], |
|
|
[ 1.7932e-01, 1.0278e-01, 1.1157e-01, ..., 6.3354e-02, |
|
|
4.5898e-02, 2.2125e-02], |
|
|
..., |
|
|
[-2.3206e-01, -1.4355e-01, -1.5369e-01, ..., -7.8003e-02, |
|
|
-5.5969e-02, -2.5665e-02], |
|
|
[ 7.9679e-04, 4.0364e-04, 3.8266e-04, ..., 3.4571e-04, |
|
|
2.4199e-04, 1.1581e-04], |
|
|
[ 4.0680e-02, 2.0706e-02, 2.0401e-02, ..., 1.9073e-02, |
|
|
1.4320e-02, 7.4272e-03]], device='cuda:0') |
|
|
Epoch 3, bias, value: tensor([-0.0599, 0.0352, -0.0025, -0.0539, 0.0827, -0.0219, 0.0262], |
|
|
device='cuda:0'), grad: tensor([ 0.0748, 0.0074, 0.2949, -0.1169, -0.3730, 0.0019, 0.1111], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009979871469976196 |
|
|
changing lr |
|
|
epoch 2, time 425.75, cls_loss 1.2357 cls_loss_mapping 1.2540 cls_loss_causal 1.6276 re_mapping 0.0906 re_causal 0.0899 |
|
|
Epoch 4, weight, value: tensor([[ 0.1852, 0.1727, 0.2084, ..., -0.0183, -0.0016, -0.0191], |
|
|
[-0.1073, -0.1069, -0.1134, ..., 0.0933, 0.0911, 0.0836], |
|
|
[-0.0701, -0.0514, -0.0840, ..., 0.0735, 0.0460, 0.0309], |
|
|
..., |
|
|
[-0.0694, -0.0779, -0.0236, ..., -0.0143, 0.0575, 0.0329], |
|
|
[ 0.0138, 0.0213, -0.0054, ..., -0.1757, -0.1922, -0.2086], |
|
|
[ 0.1033, 0.1200, 0.0955, ..., -0.0222, -0.0174, 0.0177]], |
|
|
device='cuda:0'), grad: tensor([[-2.4323e-02, -1.7715e-02, -1.8951e-02, ..., -3.5667e-03, |
|
|
-2.6474e-03, -1.3180e-03], |
|
|
[ 2.2745e-04, 1.7011e-04, 1.8191e-04, ..., 2.9743e-05, |
|
|
2.2605e-05, 1.0028e-05], |
|
|
[ 2.1515e-03, 1.2884e-03, 1.3847e-03, ..., 5.4646e-04, |
|
|
3.7622e-04, 2.5415e-04], |
|
|
..., |
|
|
[ 1.3123e-02, 9.5825e-03, 1.0254e-02, ..., 1.9026e-03, |
|
|
1.4153e-03, 6.9475e-04], |
|
|
[ 3.9124e-04, 3.5381e-04, 3.7313e-04, ..., 8.3372e-06, |
|
|
1.3404e-05, 1.1101e-06], |
|
|
[ 6.6795e-03, 5.0278e-03, 5.3711e-03, ..., 8.4639e-04, |
|
|
6.4564e-04, 2.7847e-04]], device='cuda:0') |
|
|
Epoch 4, bias, value: tensor([-0.0368, 0.0249, -0.0431, -0.0514, 0.0937, -0.0193, 0.0377], |
|
|
device='cuda:0'), grad: tensor([-0.0337, 0.0003, 0.0039, 0.0023, 0.0181, 0.0003, 0.0088], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009954748808839675 |
|
|
changing lr |
|
|
---------------------saving model at epoch 3---------------------------------------------------- |
|
|
epoch 3, time 431.13, cls_loss 0.8660 cls_loss_mapping 0.9373 cls_loss_causal 1.4483 re_mapping 0.0863 re_causal 0.0854 |
|
|
Epoch 5, weight, value: tensor([[ 0.1969, 0.1932, 0.2240, ..., -0.0106, 0.0083, -0.0084], |
|
|
[-0.1106, -0.1103, -0.1220, ..., 0.0942, 0.0883, 0.0799], |
|
|
[-0.0832, -0.0670, -0.0937, ..., 0.0688, 0.0420, 0.0268], |
|
|
..., |
|
|
[-0.0814, -0.0917, -0.0321, ..., -0.0199, 0.0522, 0.0261], |
|
|
[ 0.0065, 0.0142, -0.0115, ..., -0.1784, -0.1959, -0.2123], |
|
|
[ 0.1163, 0.1315, 0.1102, ..., -0.0206, -0.0112, 0.0235]], |
|
|
device='cuda:0'), grad: tensor([[ 5.9433e-03, 2.8019e-03, 2.9926e-03, ..., 1.8187e-03, |
|
|
1.3828e-03, 1.1148e-03], |
|
|
[-5.0812e-02, -2.6550e-02, -2.8870e-02, ..., -1.4725e-02, |
|
|
-1.0307e-02, -8.5678e-03], |
|
|
[ 4.2009e-04, 1.5187e-04, 1.5056e-04, ..., 1.4293e-04, |
|
|
1.2743e-04, 9.7096e-05], |
|
|
..., |
|
|
[ 5.1514e-02, 2.4612e-02, 2.6321e-02, ..., 1.5656e-02, |
|
|
1.1826e-02, 9.5596e-03], |
|
|
[-1.1978e-02, -2.7523e-03, -2.3022e-03, ..., -4.5547e-03, |
|
|
-4.4975e-03, -3.3283e-03], |
|
|
[ 3.6449e-03, 1.3599e-03, 1.3704e-03, ..., 1.2245e-03, |
|
|
1.0624e-03, 8.1873e-04]], device='cuda:0') |
|
|
Epoch 5, bias, value: tensor([-0.0341, 0.0254, -0.0705, -0.0229, 0.1050, -0.0339, 0.0366], |
|
|
device='cuda:0'), grad: tensor([ 0.0132, -0.0865, 0.0014, 0.0052, 0.1113, -0.0564, 0.0118], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009919647942993149 |
|
|
changing lr |
|
|
---------------------saving model at epoch 4---------------------------------------------------- |
|
|
epoch 4, time 432.30, cls_loss 0.6247 cls_loss_mapping 0.7047 cls_loss_causal 1.2669 re_mapping 0.0851 re_causal 0.0840 |
|
|
Epoch 6, weight, value: tensor([[ 0.1957, 0.1933, 0.2247, ..., -0.0115, 0.0068, -0.0090], |
|
|
[-0.1062, -0.1071, -0.1227, ..., 0.0857, 0.0783, 0.0707], |
|
|
[-0.0818, -0.0668, -0.0912, ..., 0.0748, 0.0483, 0.0343], |
|
|
..., |
|
|
[-0.0890, -0.1005, -0.0373, ..., -0.0144, 0.0582, 0.0325], |
|
|
[-0.0086, 0.0016, -0.0259, ..., -0.1826, -0.1993, -0.2163], |
|
|
[ 0.1395, 0.1505, 0.1312, ..., -0.0195, -0.0080, 0.0255]], |
|
|
device='cuda:0'), grad: tensor([[ 0.0750, 0.0472, 0.0440, ..., 0.0164, 0.0148, 0.0158], |
|
|
[-0.0565, -0.0350, -0.0352, ..., -0.0148, -0.0146, -0.0162], |
|
|
[ 0.0156, 0.0057, 0.0061, ..., 0.0040, 0.0032, 0.0034], |
|
|
..., |
|
|
[ 0.0461, 0.0297, 0.0238, ..., 0.0068, 0.0044, 0.0037], |
|
|
[-0.0116, -0.0024, -0.0030, ..., -0.0031, -0.0021, -0.0022], |
|
|
[-0.0692, -0.0453, -0.0359, ..., -0.0096, -0.0058, -0.0047]], |
|
|
device='cuda:0') |
|
|
Epoch 6, bias, value: tensor([-0.0279, 0.0292, -0.0618, -0.0331, 0.0900, -0.0454, 0.0549], |
|
|
device='cuda:0'), grad: tensor([ 0.1162, -0.0898, 0.0411, 0.0019, 0.0696, -0.0381, -0.1008], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009874639560909117 |
|
|
changing lr |
|
|
epoch 5, time 426.44, cls_loss 0.4380 cls_loss_mapping 0.5281 cls_loss_causal 1.2226 re_mapping 0.0861 re_causal 0.0850 |
|
|
Epoch 7, weight, value: tensor([[ 0.2098, 0.2093, 0.2438, ..., -0.0112, 0.0055, -0.0106], |
|
|
[-0.0974, -0.0969, -0.1146, ..., 0.0915, 0.0849, 0.0769], |
|
|
[-0.0816, -0.0701, -0.0943, ..., 0.0685, 0.0410, 0.0278], |
|
|
..., |
|
|
[-0.1026, -0.1140, -0.0502, ..., -0.0106, 0.0625, 0.0370], |
|
|
[-0.0079, 0.0012, -0.0266, ..., -0.1819, -0.1995, -0.2156], |
|
|
[ 0.1374, 0.1491, 0.1289, ..., -0.0233, -0.0101, 0.0223]], |
|
|
device='cuda:0'), grad: tensor([[-0.0735, -0.0327, -0.0339, ..., -0.0254, -0.0235, -0.0245], |
|
|
[ 0.0145, 0.0052, 0.0056, ..., 0.0056, 0.0052, 0.0054], |
|
|
[-0.0057, -0.0010, -0.0008, ..., -0.0023, -0.0022, -0.0022], |
|
|
..., |
|
|
[ 0.0115, 0.0060, 0.0061, ..., 0.0034, 0.0032, 0.0033], |
|
|
[ 0.0025, 0.0004, 0.0004, ..., 0.0010, 0.0009, 0.0009], |
|
|
[ 0.0484, 0.0214, 0.0222, ..., 0.0167, 0.0155, 0.0161]], |
|
|
device='cuda:0') |
|
|
Epoch 7, bias, value: tensor([-0.0215, 0.0466, -0.0830, -0.0234, 0.0787, -0.0530, 0.0614], |
|
|
device='cuda:0'), grad: tensor([-0.1722, 0.0393, -0.0172, 0.0070, 0.0224, 0.0075, 0.1133], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009819814303479266 |
|
|
changing lr |
|
|
---------------------saving model at epoch 6---------------------------------------------------- |
|
|
epoch 6, time 427.73, cls_loss 0.3658 cls_loss_mapping 0.4601 cls_loss_causal 1.1575 re_mapping 0.0819 re_causal 0.0808 |
|
|
Epoch 8, weight, value: tensor([[ 0.2240, 0.2268, 0.2634, ..., -0.0166, 0.0008, -0.0141], |
|
|
[-0.0867, -0.0825, -0.1027, ..., 0.0913, 0.0859, 0.0786], |
|
|
[-0.0880, -0.0760, -0.0994, ..., 0.0664, 0.0399, 0.0277], |
|
|
..., |
|
|
[-0.1128, -0.1233, -0.0609, ..., -0.0075, 0.0650, 0.0393], |
|
|
[-0.0067, 0.0008, -0.0251, ..., -0.1829, -0.1997, -0.2169], |
|
|
[ 0.1252, 0.1315, 0.1129, ..., -0.0217, -0.0112, 0.0210]], |
|
|
device='cuda:0'), grad: tensor([[-1.6870e-03, -1.3332e-03, -9.8896e-04, ..., -2.5082e-04, |
|
|
-2.2459e-04, -2.6464e-04], |
|
|
[ 2.4211e-04, 6.1810e-05, 4.8459e-05, ..., 1.4257e-04, |
|
|
1.3053e-04, 1.4758e-04], |
|
|
[ 2.3880e-03, 1.4086e-03, 1.0519e-03, ..., 7.5197e-04, |
|
|
6.8378e-04, 7.8249e-04], |
|
|
..., |
|
|
[ 8.5688e-04, 1.7285e-04, 1.3876e-04, ..., 5.4407e-04, |
|
|
4.9782e-04, 5.6219e-04], |
|
|
[-1.4257e-04, -3.1739e-05, -2.5257e-06, ..., -5.1796e-05, |
|
|
-5.4002e-05, -5.2422e-05], |
|
|
[ 4.9734e-04, 1.1754e-04, 9.1851e-05, ..., 3.0231e-04, |
|
|
2.7633e-04, 3.1233e-04]], device='cuda:0') |
|
|
Epoch 8, bias, value: tensor([-0.0263, 0.0549, -0.0846, -0.0161, 0.0702, -0.0534, 0.0610], |
|
|
device='cuda:0'), grad: tensor([-0.0018, 0.0007, 0.0042, -0.0068, 0.0026, -0.0004, 0.0015], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009755282581475767 |
|
|
changing lr |
|
|
epoch 7, time 428.13, cls_loss 0.3124 cls_loss_mapping 0.3850 cls_loss_causal 1.0907 re_mapping 0.0829 re_causal 0.0819 |
|
|
Epoch 9, weight, value: tensor([[ 0.2525, 0.2555, 0.2922, ..., -0.0049, 0.0127, -0.0029], |
|
|
[-0.0902, -0.0873, -0.1086, ..., 0.0879, 0.0829, 0.0759], |
|
|
[-0.0888, -0.0797, -0.1016, ..., 0.0671, 0.0397, 0.0283], |
|
|
..., |
|
|
[-0.1185, -0.1264, -0.0642, ..., -0.0080, 0.0637, 0.0385], |
|
|
[-0.0113, -0.0028, -0.0307, ..., -0.1846, -0.2024, -0.2191], |
|
|
[ 0.1178, 0.1233, 0.1060, ..., -0.0286, -0.0175, 0.0142]], |
|
|
device='cuda:0'), grad: tensor([[ 2.7314e-05, -1.1764e-05, -1.4231e-05, ..., 2.3901e-05, |
|
|
2.5332e-05, 2.7269e-05], |
|
|
[ 3.2067e-04, 1.0395e-04, 6.3479e-05, ..., 1.3578e-04, |
|
|
1.5271e-04, 1.6749e-04], |
|
|
[-8.8024e-04, -2.0969e-04, -1.2898e-04, ..., -3.6907e-04, |
|
|
-3.9673e-04, -4.4155e-04], |
|
|
..., |
|
|
[ 2.5272e-05, 4.2580e-06, 2.9281e-06, ..., 1.1168e-05, |
|
|
1.1407e-05, 1.2547e-05], |
|
|
[ 5.6177e-05, 1.7658e-05, 1.0453e-05, ..., 2.3872e-05, |
|
|
2.6867e-05, 2.9534e-05], |
|
|
[ 8.3685e-05, 2.6420e-05, 2.0012e-05, ..., 3.2485e-05, |
|
|
3.4392e-05, 3.7998e-05]], device='cuda:0') |
|
|
Epoch 9, bias, value: tensor([-0.0317, 0.0606, -0.0707, -0.0167, 0.0594, -0.0630, 0.0678], |
|
|
device='cuda:0'), grad: tensor([ 1.6165e-04, 1.2770e-03, -3.2120e-03, 1.1988e-03, 8.2850e-05, |
|
|
2.2674e-04, 2.6727e-04], device='cuda:0') |
|
|
306 |
|
|
0.009681174353198686 |
|
|
changing lr |
|
|
epoch 8, time 424.95, cls_loss 0.2002 cls_loss_mapping 0.3516 cls_loss_causal 1.0047 re_mapping 0.0820 re_causal 0.0811 |
|
|
Epoch 10, weight, value: tensor([[ 0.2614, 0.2600, 0.2987, ..., -0.0098, 0.0075, -0.0080], |
|
|
[-0.0923, -0.0862, -0.1080, ..., 0.0822, 0.0782, 0.0712], |
|
|
[-0.0955, -0.0838, -0.1060, ..., 0.0645, 0.0378, 0.0267], |
|
|
..., |
|
|
[-0.1192, -0.1275, -0.0653, ..., -0.0048, 0.0652, 0.0409], |
|
|
[-0.0104, -0.0028, -0.0313, ..., -0.1847, -0.2024, -0.2193], |
|
|
[ 0.1168, 0.1226, 0.1041, ..., -0.0272, -0.0155, 0.0157]], |
|
|
device='cuda:0'), grad: tensor([[ 2.3193e-03, 1.1292e-03, 1.1568e-03, ..., 1.0481e-03, |
|
|
1.2121e-03, 1.2636e-03], |
|
|
[ 2.1305e-03, 1.9920e-04, 2.6274e-04, ..., 1.1711e-03, |
|
|
1.3361e-03, 1.3800e-03], |
|
|
[-5.6152e-03, -1.1911e-03, -1.2016e-03, ..., -2.9316e-03, |
|
|
-3.5038e-03, -3.5591e-03], |
|
|
..., |
|
|
[-1.7195e-03, -1.0481e-03, -1.0672e-03, ..., -7.2527e-04, |
|
|
-8.3351e-04, -8.7643e-04], |
|
|
[-7.3552e-05, -1.5363e-05, -6.2399e-06, ..., -2.7969e-05, |
|
|
-3.7313e-05, -3.1382e-05], |
|
|
[ 4.4250e-04, 7.7963e-05, 8.1897e-05, ..., 2.3389e-04, |
|
|
2.7561e-04, 2.8086e-04]], device='cuda:0') |
|
|
Epoch 10, bias, value: tensor([-0.0325, 0.0560, -0.0762, -0.0128, 0.0681, -0.0589, 0.0619], |
|
|
device='cuda:0'), grad: tensor([ 0.0045, 0.0079, -0.0192, 0.0078, -0.0023, -0.0003, 0.0016], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009597638862757255 |
|
|
changing lr |
|
|
---------------------saving model at epoch 9---------------------------------------------------- |
|
|
epoch 9, time 428.66, cls_loss 0.1527 cls_loss_mapping 0.2936 cls_loss_causal 0.9805 re_mapping 0.0773 re_causal 0.0766 |
|
|
Epoch 11, weight, value: tensor([[ 0.2543, 0.2539, 0.2933, ..., -0.0140, 0.0034, -0.0122], |
|
|
[-0.0843, -0.0777, -0.1006, ..., 0.0825, 0.0791, 0.0716], |
|
|
[-0.0936, -0.0842, -0.1065, ..., 0.0610, 0.0347, 0.0243], |
|
|
..., |
|
|
[-0.1203, -0.1270, -0.0658, ..., -0.0010, 0.0682, 0.0443], |
|
|
[-0.0038, 0.0005, -0.0281, ..., -0.1811, -0.1990, -0.2157], |
|
|
[ 0.1156, 0.1199, 0.1026, ..., -0.0263, -0.0146, 0.0164]], |
|
|
device='cuda:0'), grad: tensor([[ 2.4765e-02, 6.7558e-03, 6.9962e-03, ..., 1.8234e-02, |
|
|
1.8661e-02, 1.9272e-02], |
|
|
[ 4.1313e-03, 1.1272e-03, 1.1683e-03, ..., 3.0441e-03, |
|
|
3.1166e-03, 3.2177e-03], |
|
|
[ 7.8964e-03, 2.1553e-03, 2.2316e-03, ..., 5.8098e-03, |
|
|
5.9509e-03, 6.1417e-03], |
|
|
..., |
|
|
[-3.7140e-02, -1.0139e-02, -1.0498e-02, ..., -2.7344e-02, |
|
|
-2.7985e-02, -2.8900e-02], |
|
|
[ 1.7428e-04, 4.7594e-05, 4.9263e-05, ..., 1.2815e-04, |
|
|
1.3125e-04, 1.3554e-04], |
|
|
[ 8.5831e-05, 2.3320e-05, 2.4110e-05, ..., 6.3360e-05, |
|
|
6.4850e-05, 6.6936e-05]], device='cuda:0') |
|
|
Epoch 11, bias, value: tensor([-0.0377, 0.0606, -0.0670, -0.0292, 0.0683, -0.0484, 0.0588], |
|
|
device='cuda:0'), grad: tensor([ 0.0754, 0.0126, 0.0241, 0.0003, -0.1132, 0.0005, 0.0003], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009504844339512096 |
|
|
changing lr |
|
|
epoch 10, time 432.92, cls_loss 0.1024 cls_loss_mapping 0.2664 cls_loss_causal 0.9671 re_mapping 0.0781 re_causal 0.0776 |
|
|
Epoch 12, weight, value: tensor([[ 0.2556, 0.2549, 0.2939, ..., -0.0154, 0.0019, -0.0134], |
|
|
[-0.0869, -0.0772, -0.1012, ..., 0.0757, 0.0722, 0.0644], |
|
|
[-0.0960, -0.0876, -0.1101, ..., 0.0613, 0.0361, 0.0255], |
|
|
..., |
|
|
[-0.1176, -0.1258, -0.0646, ..., 0.0041, 0.0723, 0.0490], |
|
|
[-0.0108, -0.0027, -0.0305, ..., -0.1790, -0.1969, -0.2136], |
|
|
[ 0.1190, 0.1213, 0.1047, ..., -0.0270, -0.0154, 0.0159]], |
|
|
device='cuda:0'), grad: tensor([[ 4.6939e-06, -6.5342e-06, -7.6517e-06, ..., 2.5146e-06, |
|
|
3.9227e-06, 4.4741e-06], |
|
|
[ 2.5302e-05, 8.0988e-06, 5.8897e-06, ..., 4.1500e-06, |
|
|
5.7928e-06, 6.3777e-06], |
|
|
[ 6.5279e-04, 9.4473e-05, 5.7399e-05, ..., 3.0136e-04, |
|
|
3.3832e-04, 3.9124e-04], |
|
|
..., |
|
|
[-3.4866e-03, -3.1996e-04, -1.4079e-04, ..., -1.8482e-03, |
|
|
-2.0466e-03, -2.3766e-03], |
|
|
[-6.5207e-05, -2.3693e-05, -1.9684e-05, ..., -6.7875e-06, |
|
|
-3.9041e-06, -1.0878e-06], |
|
|
[-1.3351e-04, -4.1932e-05, -2.8417e-05, ..., -1.0617e-05, |
|
|
-1.9982e-05, -2.1055e-05]], device='cuda:0') |
|
|
Epoch 12, bias, value: tensor([-0.0359, 0.0466, -0.0613, -0.0207, 0.0722, -0.0636, 0.0679], |
|
|
device='cuda:0'), grad: tensor([ 3.7521e-05, 6.2823e-05, 1.5831e-03, 7.4615e-03, -8.5754e-03, |
|
|
-2.2757e-04, -3.4595e-04], device='cuda:0') |
|
|
306 |
|
|
0.009402977659283692 |
|
|
changing lr |
|
|
epoch 11, time 428.82, cls_loss 0.1045 cls_loss_mapping 0.2426 cls_loss_causal 0.9590 re_mapping 0.0746 re_causal 0.0742 |
|
|
Epoch 13, weight, value: tensor([[ 0.2544, 0.2549, 0.2941, ..., -0.0189, -0.0027, -0.0172], |
|
|
[-0.0718, -0.0682, -0.0925, ..., 0.0745, 0.0723, 0.0643], |
|
|
[-0.0980, -0.0885, -0.1105, ..., 0.0600, 0.0352, 0.0247], |
|
|
..., |
|
|
[-0.1210, -0.1289, -0.0685, ..., 0.0058, 0.0737, 0.0503], |
|
|
[-0.0109, -0.0042, -0.0327, ..., -0.1759, -0.1932, -0.2096], |
|
|
[ 0.1122, 0.1174, 0.1006, ..., -0.0270, -0.0160, 0.0152]], |
|
|
device='cuda:0'), grad: tensor([[-3.5858e-03, -1.9474e-03, -1.8644e-03, ..., -1.1988e-03, |
|
|
-1.4086e-03, -1.4277e-03], |
|
|
[ 3.4409e-03, 1.7090e-03, 1.6346e-03, ..., 1.1806e-03, |
|
|
1.3924e-03, 1.4219e-03], |
|
|
[-1.8692e-03, -2.7037e-04, -2.5535e-04, ..., -6.8378e-04, |
|
|
-8.3160e-04, -9.0551e-04], |
|
|
..., |
|
|
[ 3.8457e-04, 1.0127e-04, 9.6619e-05, ..., 1.2803e-04, |
|
|
1.5509e-04, 1.6749e-04], |
|
|
[ 8.3780e-04, 1.4365e-04, 1.3626e-04, ..., 3.0255e-04, |
|
|
3.6740e-04, 3.9887e-04], |
|
|
[ 1.8275e-04, 4.3809e-05, 4.1515e-05, ..., 6.1333e-05, |
|
|
7.4029e-05, 8.0884e-05]], device='cuda:0') |
|
|
Epoch 13, bias, value: tensor([-0.0398, 0.0663, -0.0684, -0.0271, 0.0654, -0.0569, 0.0658], |
|
|
device='cuda:0'), grad: tensor([-0.0050, 0.0056, -0.0061, 0.0013, 0.0010, 0.0026, 0.0005], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.009292243968009333 |
|
|
changing lr |
|
|
---------------------saving model at epoch 12---------------------------------------------------- |
|
|
epoch 12, time 433.28, cls_loss 0.1435 cls_loss_mapping 0.2366 cls_loss_causal 0.9508 re_mapping 0.0761 re_causal 0.0760 |
|
|
Epoch 14, weight, value: tensor([[ 0.2648, 0.2631, 0.3023, ..., -0.0203, -0.0039, -0.0183], |
|
|
[-0.0708, -0.0647, -0.0908, ..., 0.0738, 0.0713, 0.0633], |
|
|
[-0.1016, -0.0902, -0.1106, ..., 0.0580, 0.0337, 0.0228], |
|
|
..., |
|
|
[-0.1377, -0.1389, -0.0794, ..., 0.0060, 0.0728, 0.0497], |
|
|
[-0.0093, -0.0050, -0.0338, ..., -0.1737, -0.1908, -0.2077], |
|
|
[ 0.1134, 0.1155, 0.1006, ..., -0.0270, -0.0161, 0.0155]], |
|
|
device='cuda:0'), grad: tensor([[-1.0452e-02, -4.9820e-03, -4.7569e-03, ..., -1.1320e-03, |
|
|
-1.4286e-03, -1.5659e-03], |
|
|
[ 9.8109e-05, 4.7505e-05, 4.5270e-05, ..., 1.0222e-05, |
|
|
1.2979e-05, 1.4357e-05], |
|
|
[ 4.1217e-05, 1.9357e-05, 1.8522e-05, ..., 4.6566e-06, |
|
|
5.8375e-06, 6.3404e-06], |
|
|
..., |
|
|
[ 1.2827e-03, 6.1560e-04, 5.8699e-04, ..., 1.3649e-04, |
|
|
1.7273e-04, 1.9014e-04], |
|
|
[ 6.7101e-03, 3.0994e-03, 2.9678e-03, ..., 7.8487e-04, |
|
|
9.7942e-04, 1.0557e-03], |
|
|
[ 1.6441e-03, 8.8120e-04, 8.3113e-04, ..., 1.2231e-04, |
|
|
1.6546e-04, 1.9848e-04]], device='cuda:0') |
|
|
Epoch 14, bias, value: tensor([-0.0348, 0.0573, -0.0797, -0.0097, 0.0495, -0.0497, 0.0722], |
|
|
device='cuda:0'), grad: tensor([-1.6800e-02, 1.5473e-04, 6.7532e-05, 1.0681e-03, 2.0447e-03, |
|
|
1.1192e-02, 2.2488e-03], device='cuda:0') |
|
|
306 |
|
|
0.009172866268606516 |
|
|
changing lr |
|
|
epoch 13, time 428.41, cls_loss 0.0724 cls_loss_mapping 0.2087 cls_loss_causal 0.9581 re_mapping 0.0757 re_causal 0.0758 |
|
|
Epoch 15, weight, value: tensor([[ 0.2678, 0.2653, 0.3044, ..., -0.0203, -0.0042, -0.0183], |
|
|
[-0.0688, -0.0640, -0.0901, ..., 0.0728, 0.0709, 0.0626], |
|
|
[-0.1001, -0.0903, -0.1106, ..., 0.0604, 0.0366, 0.0265], |
|
|
..., |
|
|
[-0.1351, -0.1369, -0.0781, ..., 0.0080, 0.0743, 0.0514], |
|
|
[-0.0108, -0.0065, -0.0349, ..., -0.1723, -0.1900, -0.2064], |
|
|
[ 0.1121, 0.1141, 0.0995, ..., -0.0277, -0.0169, 0.0143]], |
|
|
device='cuda:0'), grad: tensor([[ 1.1940e-02, 4.6654e-03, 3.6144e-03, ..., 3.5648e-03, |
|
|
3.6526e-03, 4.1809e-03], |
|
|
[ 7.1526e-04, 2.5606e-04, 1.9670e-04, ..., 2.4796e-04, |
|
|
2.5439e-04, 2.9278e-04], |
|
|
[ 3.0756e-04, 9.9123e-05, 7.3135e-05, ..., 1.2970e-04, |
|
|
1.3614e-04, 1.5116e-04], |
|
|
..., |
|
|
[-1.6800e-02, -6.4583e-03, -4.9934e-03, ..., -5.1765e-03, |
|
|
-5.3024e-03, -6.0768e-03], |
|
|
[ 3.9649e-04, 1.4973e-04, 1.1551e-04, ..., 1.2648e-04, |
|
|
1.2982e-04, 1.4853e-04], |
|
|
[ 3.3360e-03, 1.2503e-03, 9.6607e-04, ..., 1.0710e-03, |
|
|
1.0958e-03, 1.2627e-03]], device='cuda:0') |
|
|
Epoch 15, bias, value: tensor([-0.0334, 0.0568, -0.0657, -0.0270, 0.0542, -0.0515, 0.0717], |
|
|
device='cuda:0'), grad: tensor([ 0.0220, 0.0015, 0.0007, 0.0002, -0.0316, 0.0008, 0.0065], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.00904508497187474 |
|
|
changing lr |
|
|
epoch 14, time 427.18, cls_loss 0.0752 cls_loss_mapping 0.2063 cls_loss_causal 0.9385 re_mapping 0.0745 re_causal 0.0746 |
|
|
Epoch 16, weight, value: tensor([[ 0.2597, 0.2606, 0.2997, ..., -0.0224, -0.0069, -0.0210], |
|
|
[-0.0762, -0.0676, -0.0934, ..., 0.0694, 0.0677, 0.0592], |
|
|
[-0.0925, -0.0868, -0.1069, ..., 0.0608, 0.0379, 0.0282], |
|
|
..., |
|
|
[-0.1296, -0.1333, -0.0759, ..., 0.0107, 0.0755, 0.0534], |
|
|
[-0.0116, -0.0077, -0.0362, ..., -0.1713, -0.1888, -0.2052], |
|
|
[ 0.1154, 0.1160, 0.1024, ..., -0.0267, -0.0156, 0.0152]], |
|
|
device='cuda:0'), grad: tensor([[-1.1276e-02, -7.5569e-03, -7.1335e-03, ..., -9.6512e-04, |
|
|
-1.2064e-03, -1.4009e-03], |
|
|
[ 9.4399e-06, 6.8583e-06, 6.3330e-06, ..., 1.8068e-06, |
|
|
1.8124e-06, 1.7043e-06], |
|
|
[ 3.1776e-03, 1.9855e-03, 1.8578e-03, ..., 3.5858e-04, |
|
|
4.3797e-04, 4.9114e-04], |
|
|
..., |
|
|
[-5.6791e-04, -1.5104e-04, -1.1533e-04, ..., -1.8704e-04, |
|
|
-2.1851e-04, -2.2566e-04], |
|
|
[ 1.1760e-04, 4.9055e-05, 4.1932e-05, ..., 2.5496e-05, |
|
|
3.0220e-05, 3.1352e-05], |
|
|
[ 8.4915e-03, 5.6458e-03, 5.3215e-03, ..., 7.5531e-04, |
|
|
9.4271e-04, 1.0891e-03]], device='cuda:0') |
|
|
Epoch 16, bias, value: tensor([-0.0451, 0.0468, -0.0566, -0.0274, 0.0629, -0.0538, 0.0782], |
|
|
device='cuda:0'), grad: tensor([-1.2550e-02, 7.4729e-06, 4.0016e-03, 7.3373e-05, -1.3828e-03, |
|
|
2.2531e-04, 9.6130e-03], device='cuda:0') |
|
|
306 |
|
|
0.008909157412340152 |
|
|
changing lr |
|
|
epoch 15, time 429.27, cls_loss 0.0493 cls_loss_mapping 0.1644 cls_loss_causal 0.8087 re_mapping 0.0737 re_causal 0.0738 |
|
|
Epoch 17, weight, value: tensor([[ 0.2622, 0.2626, 0.3013, ..., -0.0197, -0.0045, -0.0183], |
|
|
[-0.0737, -0.0665, -0.0920, ..., 0.0687, 0.0676, 0.0594], |
|
|
[-0.1002, -0.0888, -0.1083, ..., 0.0566, 0.0336, 0.0238], |
|
|
..., |
|
|
[-0.1308, -0.1338, -0.0773, ..., 0.0105, 0.0745, 0.0524], |
|
|
[-0.0091, -0.0070, -0.0355, ..., -0.1685, -0.1859, -0.2023], |
|
|
[ 0.1094, 0.1124, 0.0990, ..., -0.0280, -0.0173, 0.0129]], |
|
|
device='cuda:0'), grad: tensor([[ 3.2349e-03, 9.7322e-04, 1.0643e-03, ..., 9.9182e-04, |
|
|
1.1444e-03, 1.1063e-03], |
|
|
[ 1.0931e-04, 3.7700e-05, 2.9683e-05, ..., 3.7044e-05, |
|
|
4.3064e-05, 4.2886e-05], |
|
|
[ 3.2806e-04, 1.1384e-04, 1.1349e-04, ..., 1.0669e-04, |
|
|
1.2398e-04, 1.2791e-04], |
|
|
..., |
|
|
[-9.7275e-03, -3.2368e-03, -3.2558e-03, ..., -3.1281e-03, |
|
|
-3.6716e-03, -3.7193e-03], |
|
|
[ 2.3975e-03, 7.9107e-04, 8.0633e-04, ..., 7.6532e-04, |
|
|
8.9788e-04, 9.0694e-04], |
|
|
[ 4.2582e-04, 1.5390e-04, 1.4496e-04, ..., 1.4293e-04, |
|
|
1.7035e-04, 1.7917e-04]], device='cuda:0') |
|
|
Epoch 17, bias, value: tensor([-0.0407, 0.0508, -0.0761, -0.0104, 0.0615, -0.0460, 0.0660], |
|
|
device='cuda:0'), grad: tensor([ 0.0076, 0.0002, 0.0007, 0.0070, -0.0219, 0.0054, 0.0009], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.00876535733001806 |
|
|
changing lr |
|
|
epoch 16, time 430.23, cls_loss 0.0479 cls_loss_mapping 0.1632 cls_loss_causal 0.8588 re_mapping 0.0722 re_causal 0.0726 |
|
|
Epoch 18, weight, value: tensor([[ 0.2592, 0.2595, 0.2979, ..., -0.0207, -0.0055, -0.0192], |
|
|
[-0.0728, -0.0663, -0.0919, ..., 0.0676, 0.0669, 0.0586], |
|
|
[-0.0975, -0.0859, -0.1046, ..., 0.0558, 0.0330, 0.0236], |
|
|
..., |
|
|
[-0.1267, -0.1304, -0.0751, ..., 0.0126, 0.0759, 0.0540], |
|
|
[-0.0114, -0.0086, -0.0370, ..., -0.1673, -0.1846, -0.2010], |
|
|
[ 0.1103, 0.1118, 0.0986, ..., -0.0272, -0.0167, 0.0132]], |
|
|
device='cuda:0'), grad: tensor([[ 1.6737e-04, 5.5701e-05, 5.5939e-05, ..., 5.0575e-05, |
|
|
5.4538e-05, 6.0648e-05], |
|
|
[ 4.0913e-04, 1.3793e-04, 1.3995e-04, ..., 1.1837e-04, |
|
|
1.2434e-04, 1.3864e-04], |
|
|
[-6.3133e-03, -1.0328e-03, -1.0738e-03, ..., -2.9068e-03, |
|
|
-2.9488e-03, -3.3131e-03], |
|
|
..., |
|
|
[ 5.3711e-03, 8.8596e-04, 9.2173e-04, ..., 2.4643e-03, |
|
|
2.5005e-03, 2.8095e-03], |
|
|
[ 1.8072e-04, 4.8667e-05, 4.9621e-05, ..., 6.4075e-05, |
|
|
6.6817e-05, 7.4565e-05], |
|
|
[-3.5214e-04, -2.0921e-04, -2.0885e-04, ..., -1.7747e-05, |
|
|
-3.2753e-05, -3.3498e-05]], device='cuda:0') |
|
|
Epoch 18, bias, value: tensor([-0.0402, 0.0505, -0.0748, -0.0158, 0.0623, -0.0484, 0.0714], |
|
|
device='cuda:0'), grad: tensor([ 0.0004, 0.0009, -0.0178, 0.0014, 0.0151, 0.0004, -0.0005], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.008613974319136962 |
|
|
changing lr |
|
|
epoch 17, time 429.92, cls_loss 0.0243 cls_loss_mapping 0.1391 cls_loss_causal 0.8789 re_mapping 0.0726 re_causal 0.0731 |
|
|
Epoch 19, weight, value: tensor([[ 0.2583, 0.2586, 0.2964, ..., -0.0196, -0.0048, -0.0185], |
|
|
[-0.0752, -0.0660, -0.0914, ..., 0.0652, 0.0644, 0.0560], |
|
|
[-0.0976, -0.0868, -0.1054, ..., 0.0534, 0.0304, 0.0213], |
|
|
..., |
|
|
[-0.1255, -0.1298, -0.0747, ..., 0.0132, 0.0766, 0.0549], |
|
|
[-0.0113, -0.0088, -0.0373, ..., -0.1651, -0.1825, -0.1985], |
|
|
[ 0.1100, 0.1122, 0.0997, ..., -0.0276, -0.0173, 0.0123]], |
|
|
device='cuda:0'), grad: tensor([[ 1.8403e-06, -8.6240e-07, -1.1362e-06, ..., 1.7639e-06, |
|
|
1.7975e-06, 2.0228e-06], |
|
|
[ 8.0653e-07, 2.6636e-07, 1.9558e-07, ..., 2.4587e-07, |
|
|
2.5518e-07, 2.9616e-07], |
|
|
[ 1.5311e-06, 3.5390e-07, 1.6950e-07, ..., 6.5006e-07, |
|
|
5.9977e-07, 7.3202e-07], |
|
|
..., |
|
|
[-3.5584e-05, -6.4559e-06, -2.1216e-06, ..., -1.6674e-05, |
|
|
-1.6719e-05, -1.9029e-05], |
|
|
[-7.9721e-07, -1.6205e-07, -3.1292e-07, ..., 3.2410e-07, |
|
|
2.8685e-07, 2.4214e-07], |
|
|
[ 9.9838e-07, 6.4634e-07, 5.4948e-07, ..., 1.3597e-07, |
|
|
1.3970e-07, 1.5274e-07]], device='cuda:0') |
|
|
Epoch 19, bias, value: tensor([-0.0405, 0.0421, -0.0691, -0.0113, 0.0613, -0.0455, 0.0680], |
|
|
device='cuda:0'), grad: tensor([ 8.4639e-06, 1.9073e-06, 3.9414e-06, 8.4400e-05, -9.7275e-05, |
|
|
-2.9840e-06, 1.5069e-06], device='cuda:0') |
|
|
306 |
|
|
0.008455313244934327 |
|
|
changing lr |
|
|
epoch 18, time 426.88, cls_loss 0.0267 cls_loss_mapping 0.1306 cls_loss_causal 0.8283 re_mapping 0.0700 re_causal 0.0706 |
|
|
Epoch 20, weight, value: tensor([[ 0.2555, 0.2564, 0.2938, ..., -0.0193, -0.0046, -0.0181], |
|
|
[-0.0724, -0.0644, -0.0896, ..., 0.0648, 0.0644, 0.0561], |
|
|
[-0.0933, -0.0848, -0.1034, ..., 0.0538, 0.0313, 0.0223], |
|
|
..., |
|
|
[-0.1238, -0.1286, -0.0743, ..., 0.0136, 0.0762, 0.0548], |
|
|
[-0.0132, -0.0105, -0.0388, ..., -0.1628, -0.1805, -0.1963], |
|
|
[ 0.1110, 0.1128, 0.1005, ..., -0.0283, -0.0180, 0.0112]], |
|
|
device='cuda:0'), grad: tensor([[ 2.0943e-03, 6.0558e-04, 6.5231e-04, ..., 1.0023e-03, |
|
|
9.3126e-04, 1.0624e-03], |
|
|
[ 3.7360e-04, 1.0639e-04, 1.0651e-04, ..., 1.6189e-04, |
|
|
1.5295e-04, 1.7846e-04], |
|
|
[ 3.8662e-03, 9.4938e-04, 7.3290e-04, ..., 1.3113e-03, |
|
|
1.5354e-03, 1.6270e-03], |
|
|
..., |
|
|
[-8.3771e-03, -2.2507e-03, -2.1477e-03, ..., -3.5439e-03, |
|
|
-3.6964e-03, -4.0207e-03], |
|
|
[ 4.0007e-04, 1.0943e-04, 1.0467e-04, ..., 1.6475e-04, |
|
|
1.6594e-04, 1.8454e-04], |
|
|
[ 8.8573e-05, 3.6508e-05, 7.8022e-05, ..., 1.6296e-04, |
|
|
1.9848e-04, 1.7130e-04]], device='cuda:0') |
|
|
Epoch 20, bias, value: tensor([-0.0415, 0.0456, -0.0612, -0.0222, 0.0607, -0.0479, 0.0713], |
|
|
device='cuda:0'), grad: tensor([ 0.0055, 0.0010, 0.0094, 0.0041, -0.0212, 0.0010, 0.0003], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.008289693629698565 |
|
|
changing lr |
|
|
---------------------saving model at epoch 19---------------------------------------------------- |
|
|
epoch 19, time 432.12, cls_loss 0.0267 cls_loss_mapping 0.1288 cls_loss_causal 0.8148 re_mapping 0.0707 re_causal 0.0716 |
|
|
Epoch 21, weight, value: tensor([[ 0.2600, 0.2587, 0.2960, ..., -0.0177, -0.0030, -0.0163], |
|
|
[-0.0687, -0.0632, -0.0889, ..., 0.0651, 0.0654, 0.0569], |
|
|
[-0.0958, -0.0865, -0.1049, ..., 0.0533, 0.0306, 0.0220], |
|
|
..., |
|
|
[-0.1248, -0.1282, -0.0745, ..., 0.0130, 0.0747, 0.0536], |
|
|
[-0.0146, -0.0111, -0.0389, ..., -0.1617, -0.1793, -0.1949], |
|
|
[ 0.1092, 0.1118, 0.0995, ..., -0.0295, -0.0196, 0.0092]], |
|
|
device='cuda:0'), grad: tensor([[ 1.8910e-05, 6.5379e-06, 4.4592e-06, ..., 6.7987e-06, |
|
|
7.1824e-06, 7.8008e-06], |
|
|
[-1.4128e-06, -1.0086e-06, -1.0710e-06, ..., -6.9104e-07, |
|
|
-1.0608e-06, -9.7603e-07], |
|
|
[ 4.8101e-05, 1.2361e-05, 5.9381e-06, ..., 8.7246e-06, |
|
|
5.6922e-06, 8.7470e-06], |
|
|
..., |
|
|
[-6.1803e-06, -3.6955e-06, -3.3882e-06, ..., -5.7295e-06, |
|
|
-7.3798e-06, -7.0296e-06], |
|
|
[-1.5056e-04, -3.7521e-05, -1.7092e-05, ..., -2.5272e-05, |
|
|
-1.4730e-05, -2.4661e-05], |
|
|
[ 3.8624e-05, 1.0028e-05, 4.9323e-06, ..., 6.9328e-06, |
|
|
4.5486e-06, 6.9775e-06]], device='cuda:0') |
|
|
Epoch 21, bias, value: tensor([-0.0320, 0.0540, -0.0647, -0.0240, 0.0552, -0.0512, 0.0674], |
|
|
device='cuda:0'), grad: tensor([ 4.3154e-05, -3.6228e-07, 1.2267e-04, 1.3447e-04, -1.0043e-05, |
|
|
-3.8791e-04, 9.7752e-05], device='cuda:0') |
|
|
306 |
|
|
0.00811744900929367 |
|
|
changing lr |
|
|
epoch 20, time 429.61, cls_loss 0.0334 cls_loss_mapping 0.1398 cls_loss_causal 0.8032 re_mapping 0.0706 re_causal 0.0716 |
|
|
Epoch 22, weight, value: tensor([[ 0.2576, 0.2569, 0.2942, ..., -0.0177, -0.0028, -0.0160], |
|
|
[-0.0645, -0.0590, -0.0853, ..., 0.0635, 0.0634, 0.0551], |
|
|
[-0.0925, -0.0848, -0.1032, ..., 0.0532, 0.0312, 0.0227], |
|
|
..., |
|
|
[-0.1247, -0.1288, -0.0756, ..., 0.0144, 0.0759, 0.0548], |
|
|
[-0.0171, -0.0135, -0.0409, ..., -0.1600, -0.1781, -0.1935], |
|
|
[ 0.1069, 0.1103, 0.0983, ..., -0.0300, -0.0205, 0.0080]], |
|
|
device='cuda:0'), grad: tensor([[-8.5533e-05, -6.3598e-05, -6.1393e-05, ..., -1.3635e-05, |
|
|
-1.5028e-05, -1.5661e-05], |
|
|
[-9.6858e-05, -2.0891e-05, -1.3158e-05, ..., -1.9193e-05, |
|
|
-2.6330e-05, -3.3528e-05], |
|
|
[ 1.8907e-04, 4.3601e-05, 3.0041e-05, ..., 4.0948e-05, |
|
|
4.7296e-05, 5.7191e-05], |
|
|
..., |
|
|
[ 8.9645e-05, 4.6551e-05, 4.2140e-05, ..., 1.4164e-05, |
|
|
1.6123e-05, 1.9029e-05], |
|
|
[ 3.4887e-06, 1.4324e-06, 1.4063e-06, ..., -2.2911e-07, |
|
|
7.2271e-07, 9.7789e-07], |
|
|
[-1.1152e-04, -9.6262e-06, -6.3889e-07, ..., -2.4766e-05, |
|
|
-2.5764e-05, -3.1590e-05]], device='cuda:0') |
|
|
Epoch 22, bias, value: tensor([-0.0325, 0.0521, -0.0610, -0.0267, 0.0597, -0.0502, 0.0632], |
|
|
device='cuda:0'), grad: tensor([-9.4354e-05, -2.4581e-04, 4.6301e-04, 2.9758e-05, 1.5485e-04, |
|
|
6.5416e-06, -3.1376e-04], device='cuda:0') |
|
|
306 |
|
|
0.007938926261462368 |
|
|
changing lr |
|
|
epoch 21, time 426.58, cls_loss 0.0379 cls_loss_mapping 0.1226 cls_loss_causal 0.8209 re_mapping 0.0672 re_causal 0.0683 |
|
|
Epoch 23, weight, value: tensor([[ 0.2543, 0.2548, 0.2918, ..., -0.0181, -0.0035, -0.0166], |
|
|
[-0.0610, -0.0557, -0.0815, ..., 0.0627, 0.0633, 0.0549], |
|
|
[-0.0944, -0.0856, -0.1037, ..., 0.0523, 0.0305, 0.0222], |
|
|
..., |
|
|
[-0.1257, -0.1288, -0.0764, ..., 0.0143, 0.0749, 0.0540], |
|
|
[-0.0155, -0.0136, -0.0409, ..., -0.1578, -0.1759, -0.1909], |
|
|
[ 0.1065, 0.1089, 0.0971, ..., -0.0297, -0.0205, 0.0078]], |
|
|
device='cuda:0'), grad: tensor([[-2.0206e-05, -1.4797e-05, -1.2912e-05, ..., -2.2482e-06, |
|
|
-2.2426e-06, -2.8517e-06], |
|
|
[ 3.0920e-07, 3.2224e-07, 3.0361e-07, ..., -7.0781e-08, |
|
|
-1.7323e-07, -1.7136e-07], |
|
|
[ 8.7991e-06, 3.8929e-06, 3.2075e-06, ..., 2.0005e-06, |
|
|
2.0061e-06, 2.2966e-06], |
|
|
..., |
|
|
[-2.0675e-07, 1.2089e-06, 1.0785e-06, ..., -2.2911e-06, |
|
|
-2.1961e-06, -2.2668e-06], |
|
|
[-6.8881e-06, -1.1437e-06, -6.6683e-07, ..., -1.4920e-06, |
|
|
-1.4491e-06, -1.6559e-06], |
|
|
[ 1.3739e-05, 9.4026e-06, 8.1286e-06, ..., 1.7323e-06, |
|
|
1.7434e-06, 2.1514e-06]], device='cuda:0') |
|
|
Epoch 23, bias, value: tensor([-0.0375, 0.0531, -0.0654, -0.0235, 0.0556, -0.0433, 0.0657], |
|
|
device='cuda:0'), grad: tensor([-1.7449e-05, -2.8312e-07, 1.7524e-05, 1.3448e-05, -7.6182e-06, |
|
|
-2.0295e-05, 1.4588e-05], device='cuda:0') |
|
|
306 |
|
|
0.007754484907260515 |
|
|
changing lr |
|
|
epoch 22, time 426.67, cls_loss 0.0169 cls_loss_mapping 0.1011 cls_loss_causal 0.7419 re_mapping 0.0658 re_causal 0.0668 |
|
|
Epoch 24, weight, value: tensor([[ 0.2518, 0.2533, 0.2901, ..., -0.0181, -0.0037, -0.0170], |
|
|
[-0.0619, -0.0557, -0.0812, ..., 0.0615, 0.0621, 0.0538], |
|
|
[-0.0946, -0.0862, -0.1040, ..., 0.0516, 0.0301, 0.0220], |
|
|
..., |
|
|
[-0.1201, -0.1249, -0.0734, ..., 0.0155, 0.0755, 0.0550], |
|
|
[-0.0152, -0.0134, -0.0403, ..., -0.1562, -0.1744, -0.1893], |
|
|
[ 0.1044, 0.1070, 0.0952, ..., -0.0296, -0.0204, 0.0076]], |
|
|
device='cuda:0'), grad: tensor([[ 3.6269e-05, 1.0885e-05, 5.3830e-06, ..., 2.0787e-05, |
|
|
2.2650e-05, 2.5585e-05], |
|
|
[ 7.8753e-06, 2.4289e-06, 1.2554e-06, ..., 4.4480e-06, |
|
|
4.8541e-06, 5.4799e-06], |
|
|
[ 9.0837e-05, 2.9549e-05, 1.5378e-05, ..., 5.3287e-05, |
|
|
5.8591e-05, 6.5625e-05], |
|
|
..., |
|
|
[-1.1673e-03, -3.6597e-04, -1.8895e-04, ..., -6.7091e-04, |
|
|
-7.3385e-04, -8.2588e-04], |
|
|
[ 8.7166e-04, 2.7275e-04, 1.4091e-04, ..., 5.0020e-04, |
|
|
5.4693e-04, 6.1560e-04], |
|
|
[ 4.8757e-05, 1.5154e-05, 7.8380e-06, ..., 2.7969e-05, |
|
|
3.0503e-05, 3.4362e-05]], device='cuda:0') |
|
|
Epoch 24, bias, value: tensor([-0.0378, 0.0497, -0.0652, -0.0239, 0.0594, -0.0413, 0.0638], |
|
|
device='cuda:0'), grad: tensor([ 1.0443e-04, 2.2441e-05, 2.6131e-04, 3.2115e-04, -3.3417e-03, |
|
|
2.4929e-03, 1.3912e-04], device='cuda:0') |
|
|
306 |
|
|
0.007564496387029534 |
|
|
changing lr |
|
|
epoch 23, time 429.30, cls_loss 0.0200 cls_loss_mapping 0.1184 cls_loss_causal 0.7899 re_mapping 0.0647 re_causal 0.0658 |
|
|
Epoch 25, weight, value: tensor([[ 0.2588, 0.2577, 0.2946, ..., -0.0171, -0.0030, -0.0160], |
|
|
[-0.0635, -0.0567, -0.0819, ..., 0.0609, 0.0619, 0.0534], |
|
|
[-0.0968, -0.0871, -0.1049, ..., 0.0503, 0.0291, 0.0212], |
|
|
..., |
|
|
[-0.1225, -0.1247, -0.0735, ..., 0.0152, 0.0745, 0.0544], |
|
|
[-0.0159, -0.0147, -0.0414, ..., -0.1544, -0.1725, -0.1874], |
|
|
[ 0.1051, 0.1059, 0.0938, ..., -0.0290, -0.0201, 0.0078]], |
|
|
device='cuda:0'), grad: tensor([[-7.3481e-04, -4.4537e-04, -4.3368e-04, ..., -7.9334e-05, |
|
|
-8.5056e-05, -9.8109e-05], |
|
|
[ 3.8147e-05, 1.6361e-05, 1.4454e-05, ..., 9.4771e-06, |
|
|
1.0312e-05, 1.1541e-05], |
|
|
[ 2.7347e-04, 1.6248e-04, 1.5748e-04, ..., 3.1322e-05, |
|
|
3.3647e-05, 3.8832e-05], |
|
|
..., |
|
|
[ 1.1367e-04, 6.1572e-05, 5.8204e-05, ..., 1.6898e-05, |
|
|
1.8463e-05, 2.0921e-05], |
|
|
[ 1.5259e-04, 1.1063e-04, 1.1152e-04, ..., 3.8184e-06, |
|
|
4.9807e-06, 5.8860e-06], |
|
|
[ 6.7055e-05, 4.6581e-05, 4.6998e-05, ..., 3.6433e-06, |
|
|
2.3656e-06, 3.5577e-06]], device='cuda:0') |
|
|
Epoch 25, bias, value: tensor([-0.0299, 0.0463, -0.0686, -0.0247, 0.0534, -0.0402, 0.0682], |
|
|
device='cuda:0'), grad: tensor([-9.9564e-04, 7.6473e-05, 3.8290e-04, 1.4734e-04, 1.8203e-04, |
|
|
1.3793e-04, 6.9320e-05], device='cuda:0') |
|
|
306 |
|
|
0.007369343312364995 |
|
|
changing lr |
|
|
epoch 24, time 426.07, cls_loss 0.0251 cls_loss_mapping 0.1051 cls_loss_causal 0.7912 re_mapping 0.0639 re_causal 0.0653 |
|
|
Epoch 26, weight, value: tensor([[ 0.2537, 0.2548, 0.2915, ..., -0.0177, -0.0040, -0.0170], |
|
|
[-0.0597, -0.0541, -0.0789, ..., 0.0608, 0.0620, 0.0536], |
|
|
[-0.0957, -0.0868, -0.1045, ..., 0.0499, 0.0289, 0.0212], |
|
|
..., |
|
|
[-0.1195, -0.1230, -0.0726, ..., 0.0159, 0.0747, 0.0549], |
|
|
[-0.0175, -0.0150, -0.0414, ..., -0.1536, -0.1717, -0.1865], |
|
|
[ 0.1010, 0.1035, 0.0916, ..., -0.0302, -0.0214, 0.0061]], |
|
|
device='cuda:0'), grad: tensor([[ 1.2159e-04, 5.0753e-05, 4.7743e-05, ..., 3.1084e-05, |
|
|
3.1054e-05, 3.8862e-05], |
|
|
[ 2.5213e-05, 1.1273e-05, 1.0602e-05, ..., 5.8673e-06, |
|
|
5.9754e-06, 7.4059e-06], |
|
|
[ 1.0826e-05, 5.6848e-06, 5.4725e-06, ..., 2.8498e-06, |
|
|
3.1032e-06, 3.4459e-06], |
|
|
..., |
|
|
[-3.8218e-04, -1.7190e-04, -1.6272e-04, ..., -9.3102e-05, |
|
|
-9.3400e-05, -1.1605e-04], |
|
|
[ 5.5507e-06, 4.1053e-06, 4.2394e-06, ..., 1.8217e-06, |
|
|
1.5423e-06, 1.8962e-06], |
|
|
[ 3.5197e-05, 1.7121e-05, 1.6272e-05, ..., 7.8604e-06, |
|
|
7.9349e-06, 9.8199e-06]], device='cuda:0') |
|
|
Epoch 26, bias, value: tensor([-0.0365, 0.0497, -0.0662, -0.0146, 0.0565, -0.0447, 0.0602], |
|
|
device='cuda:0'), grad: tensor([ 2.1684e-04, 4.3213e-05, 1.8209e-05, 3.1137e-04, -6.5041e-04, |
|
|
4.7274e-06, 5.6416e-05], device='cuda:0') |
|
|
306 |
|
|
0.0071694186955877925 |
|
|
changing lr |
|
|
epoch 25, time 425.98, cls_loss 0.0207 cls_loss_mapping 0.1023 cls_loss_causal 0.7798 re_mapping 0.0635 re_causal 0.0652 |
|
|
Epoch 27, weight, value: tensor([[ 0.2559, 0.2547, 0.2909, ..., -0.0169, -0.0032, -0.0158], |
|
|
[-0.0587, -0.0531, -0.0774, ..., 0.0610, 0.0621, 0.0538], |
|
|
[-0.0948, -0.0862, -0.1037, ..., 0.0492, 0.0285, 0.0209], |
|
|
..., |
|
|
[-0.1254, -0.1251, -0.0754, ..., 0.0145, 0.0725, 0.0526], |
|
|
[-0.0178, -0.0154, -0.0415, ..., -0.1522, -0.1701, -0.1848], |
|
|
[ 0.1037, 0.1041, 0.0924, ..., -0.0293, -0.0205, 0.0069]], |
|
|
device='cuda:0'), grad: tensor([[ 0.0110, 0.0050, 0.0045, ..., 0.0032, 0.0036, 0.0042], |
|
|
[ 0.0048, 0.0021, 0.0020, ..., 0.0013, 0.0015, 0.0018], |
|
|
[-0.0494, -0.0227, -0.0208, ..., -0.0142, -0.0161, -0.0184], |
|
|
..., |
|
|
[-0.0014, -0.0005, -0.0005, ..., -0.0004, -0.0004, -0.0005], |
|
|
[ 0.0221, 0.0103, 0.0094, ..., 0.0062, 0.0070, 0.0081], |
|
|
[ 0.0116, 0.0054, 0.0049, ..., 0.0033, 0.0037, 0.0043]], |
|
|
device='cuda:0') |
|
|
Epoch 27, bias, value: tensor([-0.0291, 0.0499, -0.0656, -0.0168, 0.0440, -0.0445, 0.0666], |
|
|
device='cuda:0'), grad: tensor([ 0.0222, 0.0096, -0.0975, 0.0028, -0.0031, 0.0432, 0.0229], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.0069651251582696205 |
|
|
changing lr |
|
|
epoch 26, time 430.16, cls_loss 0.0187 cls_loss_mapping 0.0969 cls_loss_causal 0.7636 re_mapping 0.0622 re_causal 0.0637 |
|
|
Epoch 28, weight, value: tensor([[ 0.2514, 0.2523, 0.2883, ..., -0.0173, -0.0038, -0.0165], |
|
|
[-0.0548, -0.0502, -0.0744, ..., 0.0596, 0.0609, 0.0527], |
|
|
[-0.0947, -0.0856, -0.1030, ..., 0.0477, 0.0272, 0.0195], |
|
|
..., |
|
|
[-0.1235, -0.1248, -0.0757, ..., 0.0164, 0.0739, 0.0545], |
|
|
[-0.0161, -0.0152, -0.0411, ..., -0.1507, -0.1689, -0.1832], |
|
|
[ 0.1032, 0.1029, 0.0916, ..., -0.0280, -0.0190, 0.0081]], |
|
|
device='cuda:0'), grad: tensor([[-6.0415e-04, -3.0947e-04, -3.3808e-04, ..., -1.3041e-04, |
|
|
-1.2791e-04, -1.2898e-04], |
|
|
[ 4.1664e-05, 2.1353e-05, 2.3395e-05, ..., 9.0152e-06, |
|
|
8.7842e-06, 8.8438e-06], |
|
|
[ 1.7655e-04, 9.0182e-05, 9.8526e-05, ..., 3.8236e-05, |
|
|
3.7462e-05, 3.7819e-05], |
|
|
..., |
|
|
[ 1.4520e-04, 7.4148e-05, 8.1003e-05, ..., 3.1382e-05, |
|
|
3.0756e-05, 3.1054e-05], |
|
|
[ 1.5426e-04, 7.8619e-05, 8.5890e-05, ..., 3.3408e-05, |
|
|
3.2693e-05, 3.3081e-05], |
|
|
[ 3.6776e-05, 1.9848e-05, 2.1681e-05, ..., 7.6070e-06, |
|
|
7.5847e-06, 7.5288e-06]], device='cuda:0') |
|
|
Epoch 28, bias, value: tensor([-0.0358, 0.0521, -0.0681, -0.0243, 0.0515, -0.0394, 0.0684], |
|
|
device='cuda:0'), grad: tensor([-9.4175e-04, 6.4969e-05, 2.7657e-04, 7.8797e-05, 2.2757e-04, |
|
|
2.4247e-04, 5.2065e-05], device='cuda:0') |
|
|
306 |
|
|
0.006756874120406716 |
|
|
changing lr |
|
|
epoch 27, time 426.89, cls_loss 0.0093 cls_loss_mapping 0.0824 cls_loss_causal 0.7365 re_mapping 0.0618 re_causal 0.0635 |
|
|
Epoch 29, weight, value: tensor([[ 0.2525, 0.2524, 0.2881, ..., -0.0166, -0.0033, -0.0158], |
|
|
[-0.0550, -0.0497, -0.0737, ..., 0.0594, 0.0608, 0.0527], |
|
|
[-0.0931, -0.0852, -0.1024, ..., 0.0472, 0.0270, 0.0195], |
|
|
..., |
|
|
[-0.1213, -0.1236, -0.0752, ..., 0.0161, 0.0732, 0.0540], |
|
|
[-0.0171, -0.0156, -0.0414, ..., -0.1496, -0.1678, -0.1820], |
|
|
[ 0.0988, 0.1004, 0.0894, ..., -0.0284, -0.0196, 0.0071]], |
|
|
device='cuda:0'), grad: tensor([[ 8.8453e-04, 5.9783e-05, 7.9036e-05, ..., 3.7479e-04, |
|
|
4.0603e-04, 4.2248e-04], |
|
|
[ 8.6021e-04, 1.4782e-04, 1.5628e-04, ..., 3.4332e-04, |
|
|
3.6836e-04, 3.8695e-04], |
|
|
[-3.3932e-03, -5.7173e-04, -6.1417e-04, ..., -1.2398e-03, |
|
|
-1.3380e-03, -1.4076e-03], |
|
|
..., |
|
|
[ 7.9203e-04, 1.7011e-04, 1.7536e-04, ..., 2.8706e-04, |
|
|
3.0875e-04, 3.2568e-04], |
|
|
[ 9.2268e-04, 1.6189e-04, 1.6904e-04, ..., 3.9434e-04, |
|
|
4.2653e-04, 4.4417e-04], |
|
|
[ 6.0749e-04, 1.1957e-04, 1.2189e-04, ..., 2.7061e-04, |
|
|
2.9230e-04, 3.0398e-04]], device='cuda:0') |
|
|
Epoch 29, bias, value: tensor([-0.0325, 0.0499, -0.0641, -0.0240, 0.0539, -0.0409, 0.0621], |
|
|
device='cuda:0'), grad: tensor([ 0.0026, 0.0023, -0.0089, -0.0022, 0.0020, 0.0025, 0.0017], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.00654508497187474 |
|
|
changing lr |
|
|
epoch 28, time 428.99, cls_loss 0.0094 cls_loss_mapping 0.0876 cls_loss_causal 0.7353 re_mapping 0.0604 re_causal 0.0621 |
|
|
Epoch 30, weight, value: tensor([[ 0.2499, 0.2511, 0.2864, ..., -0.0170, -0.0040, -0.0164], |
|
|
[-0.0549, -0.0491, -0.0729, ..., 0.0583, 0.0598, 0.0516], |
|
|
[-0.0926, -0.0852, -0.1023, ..., 0.0469, 0.0268, 0.0194], |
|
|
..., |
|
|
[-0.1186, -0.1221, -0.0742, ..., 0.0167, 0.0734, 0.0544], |
|
|
[-0.0186, -0.0162, -0.0417, ..., -0.1486, -0.1666, -0.1807], |
|
|
[ 0.0981, 0.0995, 0.0886, ..., -0.0280, -0.0192, 0.0073]], |
|
|
device='cuda:0'), grad: tensor([[-1.0252e-04, -6.0618e-05, -6.3598e-05, ..., -1.8939e-05, |
|
|
-2.0102e-05, -2.1175e-05], |
|
|
[ 1.9372e-07, 9.1270e-07, 1.0356e-06, ..., -1.5274e-07, |
|
|
-1.6391e-07, -1.3039e-07], |
|
|
[ 8.8155e-05, 4.9591e-05, 5.1677e-05, ..., 1.5959e-05, |
|
|
1.6943e-05, 1.8016e-05], |
|
|
..., |
|
|
[ 2.2665e-05, 5.7630e-06, 4.2319e-06, ..., 5.5581e-06, |
|
|
6.2585e-06, 7.0035e-06], |
|
|
[-1.6429e-06, 1.9372e-07, 3.2037e-07, ..., 1.7881e-07, |
|
|
2.0862e-07, -2.9802e-08], |
|
|
[-8.9109e-06, 3.0212e-06, 5.1446e-06, ..., -2.9318e-06, |
|
|
-3.5129e-06, -4.0941e-06]], device='cuda:0') |
|
|
Epoch 30, bias, value: tensor([-0.0365, 0.0475, -0.0623, -0.0214, 0.0576, -0.0432, 0.0627], |
|
|
device='cuda:0'), grad: tensor([-1.2743e-04, -6.9290e-07, 1.1796e-04, 3.1106e-06, 5.4836e-05, |
|
|
-1.0312e-05, -3.7611e-05], device='cuda:0') |
|
|
306 |
|
|
0.006330184227833378 |
|
|
changing lr |
|
|
---------------------saving model at epoch 29---------------------------------------------------- |
|
|
epoch 29, time 434.47, cls_loss 0.0094 cls_loss_mapping 0.0785 cls_loss_causal 0.7117 re_mapping 0.0606 re_causal 0.0626 |
|
|
Epoch 31, weight, value: tensor([[ 0.2503, 0.2510, 0.2860, ..., -0.0165, -0.0035, -0.0157], |
|
|
[-0.0539, -0.0484, -0.0720, ..., 0.0578, 0.0594, 0.0514], |
|
|
[-0.0942, -0.0859, -0.1026, ..., 0.0460, 0.0261, 0.0188], |
|
|
..., |
|
|
[-0.1188, -0.1221, -0.0747, ..., 0.0166, 0.0727, 0.0539], |
|
|
[-0.0162, -0.0152, -0.0408, ..., -0.1465, -0.1646, -0.1787], |
|
|
[ 0.0972, 0.0984, 0.0877, ..., -0.0281, -0.0194, 0.0069]], |
|
|
device='cuda:0'), grad: tensor([[-4.4346e-04, -2.6488e-04, -2.6155e-04, ..., -9.3997e-05, |
|
|
-1.1146e-04, -1.1992e-04], |
|
|
[-1.3721e-04, -4.3064e-05, -4.4137e-05, ..., -5.4270e-05, |
|
|
-5.3287e-05, -5.9724e-05], |
|
|
[ 3.0375e-04, 1.5330e-04, 1.5008e-04, ..., 8.3685e-05, |
|
|
9.2566e-05, 1.0157e-04], |
|
|
..., |
|
|
[ 1.5867e-04, 9.5129e-05, 9.6679e-05, ..., 3.1501e-05, |
|
|
3.5971e-05, 3.8385e-05], |
|
|
[ 3.0577e-05, 1.7941e-05, 1.7643e-05, ..., 7.5772e-06, |
|
|
8.9332e-06, 9.7007e-06], |
|
|
[ 5.4836e-05, 2.5928e-05, 2.5794e-05, ..., 1.6108e-05, |
|
|
1.7121e-05, 1.8835e-05]], device='cuda:0') |
|
|
Epoch 31, bias, value: tensor([-0.0348, 0.0479, -0.0655, -0.0245, 0.0561, -0.0370, 0.0620], |
|
|
device='cuda:0'), grad: tensor([-5.3549e-04, -4.0984e-04, 5.2595e-04, 6.1989e-05, 2.0742e-04, |
|
|
4.1455e-05, 1.0896e-04], device='cuda:0') |
|
|
306 |
|
|
0.006112604669781575 |
|
|
changing lr |
|
|
epoch 30, time 430.27, cls_loss 0.0094 cls_loss_mapping 0.0758 cls_loss_causal 0.7057 re_mapping 0.0586 re_causal 0.0606 |
|
|
Epoch 32, weight, value: tensor([[ 0.2509, 0.2513, 0.2860, ..., -0.0162, -0.0034, -0.0155], |
|
|
[-0.0541, -0.0483, -0.0717, ..., 0.0573, 0.0590, 0.0510], |
|
|
[-0.0944, -0.0854, -0.1020, ..., 0.0452, 0.0255, 0.0182], |
|
|
..., |
|
|
[-0.1200, -0.1226, -0.0757, ..., 0.0161, 0.0717, 0.0530], |
|
|
[-0.0130, -0.0142, -0.0394, ..., -0.1452, -0.1632, -0.1771], |
|
|
[ 0.0928, 0.0963, 0.0856, ..., -0.0284, -0.0198, 0.0063]], |
|
|
device='cuda:0'), grad: tensor([[ 1.1826e-03, 5.4789e-04, 4.1938e-04, ..., 3.5620e-04, |
|
|
4.4441e-04, 4.4823e-04], |
|
|
[-3.4008e-03, -1.2341e-03, -9.7752e-04, ..., -9.1982e-04, |
|
|
-1.2970e-03, -1.2903e-03], |
|
|
[ 1.4651e-04, 4.3333e-05, 2.9847e-05, ..., 3.4630e-05, |
|
|
6.0350e-05, 5.6565e-05], |
|
|
..., |
|
|
[ 8.8120e-04, 2.8610e-04, 2.3878e-04, ..., 1.9145e-04, |
|
|
3.2616e-04, 3.1114e-04], |
|
|
[ 3.8791e-04, 9.6798e-05, 7.8917e-05, ..., 1.2290e-04, |
|
|
1.5807e-04, 1.6606e-04], |
|
|
[ 5.0402e-04, 1.7214e-04, 1.4174e-04, ..., 1.1754e-04, |
|
|
1.8585e-04, 1.8072e-04]], device='cuda:0') |
|
|
Epoch 32, bias, value: tensor([-0.0336, 0.0461, -0.0671, -0.0183, 0.0533, -0.0309, 0.0549], |
|
|
device='cuda:0'), grad: tensor([ 0.0021, -0.0077, 0.0004, 0.0007, 0.0023, 0.0010, 0.0012], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.005892784473993186 |
|
|
changing lr |
|
|
---------------------saving model at epoch 31---------------------------------------------------- |
|
|
epoch 31, time 433.10, cls_loss 0.0076 cls_loss_mapping 0.0626 cls_loss_causal 0.6597 re_mapping 0.0570 re_causal 0.0588 |
|
|
Epoch 33, weight, value: tensor([[ 0.2517, 0.2513, 0.2859, ..., -0.0157, -0.0029, -0.0148], |
|
|
[-0.0529, -0.0478, -0.0712, ..., 0.0578, 0.0593, 0.0514], |
|
|
[-0.0946, -0.0857, -0.1020, ..., 0.0441, 0.0248, 0.0176], |
|
|
..., |
|
|
[-0.1194, -0.1216, -0.0752, ..., 0.0161, 0.0713, 0.0527], |
|
|
[-0.0172, -0.0160, -0.0411, ..., -0.1449, -0.1629, -0.1767], |
|
|
[ 0.0949, 0.0966, 0.0861, ..., -0.0278, -0.0193, 0.0065]], |
|
|
device='cuda:0'), grad: tensor([[-1.3514e-03, -7.8011e-04, -7.8249e-04, ..., -2.5344e-04, |
|
|
-2.6321e-04, -3.0375e-04], |
|
|
[ 2.7561e-04, 1.6153e-04, 1.6010e-04, ..., 5.0634e-05, |
|
|
5.1320e-05, 6.0827e-05], |
|
|
[ 2.5606e-04, 1.3661e-04, 1.3423e-04, ..., 5.1290e-05, |
|
|
5.2512e-05, 6.0558e-05], |
|
|
..., |
|
|
[ 3.1090e-04, 1.7726e-04, 1.7977e-04, ..., 5.7518e-05, |
|
|
6.1095e-05, 6.9439e-05], |
|
|
[ 5.1171e-05, 4.8816e-05, 5.0664e-05, ..., 1.1854e-05, |
|
|
1.3553e-05, 1.4424e-05], |
|
|
[ 2.8563e-04, 1.6415e-04, 1.6391e-04, ..., 5.2303e-05, |
|
|
5.2512e-05, 6.1929e-05]], device='cuda:0') |
|
|
Epoch 33, bias, value: tensor([-0.0312, 0.0491, -0.0674, -0.0199, 0.0518, -0.0381, 0.0600], |
|
|
device='cuda:0'), grad: tensor([-1.9341e-03, 3.7980e-04, 4.0054e-04, 2.6917e-04, 4.5681e-04, |
|
|
2.0683e-05, 4.0674e-04], device='cuda:0') |
|
|
306 |
|
|
0.00567116632908828 |
|
|
changing lr |
|
|
epoch 32, time 426.99, cls_loss 0.0106 cls_loss_mapping 0.0678 cls_loss_causal 0.6951 re_mapping 0.0560 re_causal 0.0581 |
|
|
Epoch 34, weight, value: tensor([[ 0.2521, 0.2514, 0.2857, ..., -0.0157, -0.0028, -0.0148], |
|
|
[-0.0508, -0.0470, -0.0703, ..., 0.0579, 0.0594, 0.0517], |
|
|
[-0.0956, -0.0860, -0.1020, ..., 0.0434, 0.0242, 0.0169], |
|
|
..., |
|
|
[-0.1187, -0.1210, -0.0750, ..., 0.0163, 0.0710, 0.0527], |
|
|
[-0.0184, -0.0165, -0.0415, ..., -0.1441, -0.1620, -0.1757], |
|
|
[ 0.0944, 0.0958, 0.0854, ..., -0.0276, -0.0192, 0.0065]], |
|
|
device='cuda:0'), grad: tensor([[ 5.7459e-04, 2.7680e-04, 2.7418e-04, ..., 5.6207e-05, |
|
|
6.7353e-05, 1.0335e-04], |
|
|
[ 1.3304e-04, 6.0856e-05, 5.9932e-05, ..., 1.6704e-05, |
|
|
1.9088e-05, 2.7254e-05], |
|
|
[-1.2275e-06, 2.3041e-06, 2.5667e-06, ..., -1.2163e-06, |
|
|
-2.1253e-06, -1.9837e-06], |
|
|
..., |
|
|
[ 2.1264e-05, 1.0088e-05, 1.0043e-05, ..., 2.2147e-06, |
|
|
2.6375e-06, 3.8743e-06], |
|
|
[ 1.0014e-04, 4.5031e-05, 4.4435e-05, ..., 1.4551e-05, |
|
|
1.6063e-05, 2.1860e-05], |
|
|
[-8.6927e-04, -4.2367e-04, -4.2057e-04, ..., -8.1062e-05, |
|
|
-9.7871e-05, -1.5199e-04]], device='cuda:0') |
|
|
Epoch 34, bias, value: tensor([-0.0304, 0.0528, -0.0694, -0.0211, 0.0513, -0.0399, 0.0607], |
|
|
device='cuda:0'), grad: tensor([ 1.0033e-03, 2.4414e-04, -6.8992e-06, 3.7402e-05, 3.8028e-05, |
|
|
1.8930e-04, -1.5049e-03], device='cuda:0') |
|
|
306 |
|
|
0.00544819654451717 |
|
|
changing lr |
|
|
epoch 33, time 427.05, cls_loss 0.0080 cls_loss_mapping 0.0678 cls_loss_causal 0.7349 re_mapping 0.0560 re_causal 0.0582 |
|
|
Epoch 35, weight, value: tensor([[ 0.2496, 0.2498, 0.2841, ..., -0.0159, -0.0032, -0.0152], |
|
|
[-0.0509, -0.0468, -0.0699, ..., 0.0572, 0.0588, 0.0512], |
|
|
[-0.0940, -0.0851, -0.1011, ..., 0.0432, 0.0242, 0.0171], |
|
|
..., |
|
|
[-0.1182, -0.1201, -0.0745, ..., 0.0161, 0.0704, 0.0523], |
|
|
[-0.0183, -0.0166, -0.0416, ..., -0.1430, -0.1609, -0.1745], |
|
|
[ 0.0933, 0.0948, 0.0846, ..., -0.0276, -0.0192, 0.0062]], |
|
|
device='cuda:0'), grad: tensor([[-2.4395e-03, -1.4315e-03, -1.4219e-03, ..., -6.6614e-04, |
|
|
-6.5660e-04, -7.2098e-04], |
|
|
[ 4.0746e-04, 1.6570e-04, 1.5414e-04, ..., 8.9586e-05, |
|
|
9.1910e-05, 1.0550e-04], |
|
|
[ 4.8685e-04, 2.4652e-04, 2.3901e-04, ..., 1.1039e-04, |
|
|
1.1253e-04, 1.2529e-04], |
|
|
..., |
|
|
[ 8.7929e-04, 5.0354e-04, 4.9829e-04, ..., 2.5129e-04, |
|
|
2.4438e-04, 2.7108e-04], |
|
|
[-3.9077e-04, 3.8408e-06, 4.0323e-05, ..., -4.2140e-05, |
|
|
-4.7147e-05, -6.9320e-05], |
|
|
[ 8.9645e-04, 4.3797e-04, 4.2105e-04, ..., 2.1875e-04, |
|
|
2.1732e-04, 2.4557e-04]], device='cuda:0') |
|
|
Epoch 35, bias, value: tensor([-0.0333, 0.0510, -0.0670, -0.0178, 0.0498, -0.0383, 0.0596], |
|
|
device='cuda:0'), grad: tensor([-0.0039, 0.0008, 0.0008, 0.0003, 0.0015, -0.0012, 0.0016], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.005224324151752577 |
|
|
changing lr |
|
|
epoch 34, time 428.33, cls_loss 0.0042 cls_loss_mapping 0.0575 cls_loss_causal 0.6963 re_mapping 0.0553 re_causal 0.0577 |
|
|
Epoch 36, weight, value: tensor([[ 0.2492, 0.2493, 0.2833, ..., -0.0158, -0.0032, -0.0151], |
|
|
[-0.0521, -0.0472, -0.0701, ..., 0.0566, 0.0582, 0.0505], |
|
|
[-0.0929, -0.0847, -0.1006, ..., 0.0430, 0.0242, 0.0171], |
|
|
..., |
|
|
[-0.1164, -0.1190, -0.0738, ..., 0.0165, 0.0704, 0.0524], |
|
|
[-0.0172, -0.0164, -0.0414, ..., -0.1417, -0.1595, -0.1728], |
|
|
[ 0.0920, 0.0938, 0.0838, ..., -0.0276, -0.0193, 0.0059]], |
|
|
device='cuda:0'), grad: tensor([[-2.1362e-04, -1.1706e-04, -1.1629e-04, ..., -5.4300e-05, |
|
|
-5.6475e-05, -5.9038e-05], |
|
|
[ 2.7761e-05, 1.4283e-05, 1.4096e-05, ..., 6.8471e-06, |
|
|
7.1414e-06, 7.5735e-06], |
|
|
[ 1.2493e-04, 6.5327e-05, 6.4611e-05, ..., 3.1233e-05, |
|
|
3.2574e-05, 3.4422e-05], |
|
|
..., |
|
|
[ 1.7971e-05, 9.3132e-06, 9.1493e-06, ..., 3.6974e-06, |
|
|
3.9116e-06, 4.1649e-06], |
|
|
[ 4.7088e-05, 2.2233e-05, 2.1875e-05, ..., 1.0654e-05, |
|
|
1.1109e-05, 1.2226e-05], |
|
|
[-1.9550e-05, -2.3656e-06, -1.5628e-06, ..., -1.5832e-06, |
|
|
-1.8850e-06, -3.2447e-06]], device='cuda:0') |
|
|
Epoch 36, bias, value: tensor([-0.0330, 0.0474, -0.0645, -0.0211, 0.0514, -0.0336, 0.0575], |
|
|
device='cuda:0'), grad: tensor([-3.4666e-04, 4.8816e-05, 2.1541e-04, 2.4825e-05, 3.2306e-05, |
|
|
9.2864e-05, -6.8188e-05], device='cuda:0') |
|
|
306 |
|
|
0.005000000000000003 |
|
|
changing lr |
|
|
epoch 35, time 428.68, cls_loss 0.0057 cls_loss_mapping 0.0564 cls_loss_causal 0.6790 re_mapping 0.0534 re_causal 0.0557 |
|
|
Epoch 37, weight, value: tensor([[ 0.2490, 0.2492, 0.2831, ..., -0.0157, -0.0032, -0.0150], |
|
|
[-0.0533, -0.0474, -0.0702, ..., 0.0559, 0.0575, 0.0498], |
|
|
[-0.0930, -0.0845, -0.1003, ..., 0.0425, 0.0239, 0.0169], |
|
|
..., |
|
|
[-0.1153, -0.1182, -0.0733, ..., 0.0167, 0.0703, 0.0524], |
|
|
[-0.0177, -0.0169, -0.0417, ..., -0.1408, -0.1588, -0.1719], |
|
|
[ 0.0928, 0.0935, 0.0835, ..., -0.0270, -0.0188, 0.0063]], |
|
|
device='cuda:0'), grad: tensor([[-1.9026e-04, -1.2153e-04, -1.2040e-04, ..., -2.5526e-05, |
|
|
-2.8968e-05, -3.4362e-05], |
|
|
[ 1.2696e-04, 4.9949e-05, 4.9144e-05, ..., 3.4660e-05, |
|
|
4.0740e-05, 4.1753e-05], |
|
|
[ 8.4221e-05, 4.2409e-05, 4.1664e-05, ..., 1.6063e-05, |
|
|
1.8761e-05, 2.1011e-05], |
|
|
..., |
|
|
[-4.0221e-04, -1.1349e-04, -1.1182e-04, ..., -1.4126e-04, |
|
|
-1.6749e-04, -1.6463e-04], |
|
|
[ 1.6600e-05, 1.8686e-05, 2.0117e-05, ..., 3.7812e-07, |
|
|
2.9206e-06, 2.1532e-06], |
|
|
[ 1.7178e-04, 6.0230e-05, 5.8681e-05, ..., 5.3525e-05, |
|
|
6.0827e-05, 6.0886e-05]], device='cuda:0') |
|
|
Epoch 37, bias, value: tensor([-0.0331, 0.0438, -0.0652, -0.0216, 0.0517, -0.0324, 0.0609], |
|
|
device='cuda:0'), grad: tensor([-2.2066e-04, 2.5630e-04, 1.3638e-04, 4.3583e-04, -9.7466e-04, |
|
|
-8.4341e-06, 3.7503e-04], device='cuda:0') |
|
|
306 |
|
|
0.004775675848247429 |
|
|
changing lr |
|
|
epoch 36, time 426.09, cls_loss 0.0056 cls_loss_mapping 0.0548 cls_loss_causal 0.6659 re_mapping 0.0529 re_causal 0.0553 |
|
|
Epoch 38, weight, value: tensor([[ 0.2494, 0.2492, 0.2830, ..., -0.0157, -0.0032, -0.0149], |
|
|
[-0.0519, -0.0468, -0.0695, ..., 0.0557, 0.0575, 0.0499], |
|
|
[-0.0936, -0.0847, -0.1003, ..., 0.0419, 0.0234, 0.0164], |
|
|
..., |
|
|
[-0.1138, -0.1172, -0.0726, ..., 0.0171, 0.0703, 0.0526], |
|
|
[-0.0186, -0.0172, -0.0419, ..., -0.1402, -0.1581, -0.1712], |
|
|
[ 0.0912, 0.0921, 0.0822, ..., -0.0268, -0.0187, 0.0063]], |
|
|
device='cuda:0'), grad: tensor([[ 3.4750e-05, 1.2450e-05, 1.1384e-05, ..., 8.5309e-06, |
|
|
9.8050e-06, 1.1593e-05], |
|
|
[-1.0271e-03, -4.1318e-04, -3.9172e-04, ..., -1.4448e-04, |
|
|
-1.7440e-04, -2.1589e-04], |
|
|
[ 5.4985e-06, 2.5854e-06, 2.5835e-06, ..., 2.0489e-08, |
|
|
1.8626e-08, 2.0862e-07], |
|
|
..., |
|
|
[-4.6492e-06, -3.4589e-06, -3.4049e-06, ..., -4.2692e-06, |
|
|
-5.1111e-06, -5.2936e-06], |
|
|
[-1.3888e-05, -8.7917e-07, -6.4634e-07, ..., -3.5372e-06, |
|
|
-2.5574e-06, -3.6340e-06], |
|
|
[ 9.9373e-04, 3.9911e-04, 3.7861e-04, ..., 1.4126e-04, |
|
|
1.6975e-04, 2.0993e-04]], device='cuda:0') |
|
|
Epoch 38, bias, value: tensor([-0.0323, 0.0466, -0.0664, -0.0230, 0.0530, -0.0339, 0.0600], |
|
|
device='cuda:0'), grad: tensor([ 7.5400e-05, -1.8406e-03, 8.1286e-06, 3.1173e-05, -1.1809e-05, |
|
|
-4.4286e-05, 1.7853e-03], device='cuda:0') |
|
|
306 |
|
|
0.004551803455482836 |
|
|
changing lr |
|
|
epoch 37, time 425.12, cls_loss 0.0061 cls_loss_mapping 0.0590 cls_loss_causal 0.6774 re_mapping 0.0525 re_causal 0.0551 |
|
|
Epoch 39, weight, value: tensor([[ 0.2483, 0.2485, 0.2821, ..., -0.0158, -0.0035, -0.0151], |
|
|
[-0.0507, -0.0462, -0.0687, ..., 0.0556, 0.0575, 0.0499], |
|
|
[-0.0933, -0.0845, -0.1000, ..., 0.0415, 0.0232, 0.0162], |
|
|
..., |
|
|
[-0.1130, -0.1164, -0.0722, ..., 0.0172, 0.0700, 0.0525], |
|
|
[-0.0192, -0.0175, -0.0421, ..., -0.1396, -0.1574, -0.1704], |
|
|
[ 0.0900, 0.0911, 0.0812, ..., -0.0268, -0.0187, 0.0061]], |
|
|
device='cuda:0'), grad: tensor([[-1.6487e-04, -1.0222e-04, -1.0294e-04, ..., -1.8775e-05, |
|
|
-1.5110e-05, -1.9088e-05], |
|
|
[ 2.0981e-05, 3.2540e-06, 2.7195e-06, ..., 5.4128e-06, |
|
|
6.1393e-06, 7.4096e-06], |
|
|
[ 1.8418e-04, 1.0049e-04, 1.0014e-04, ..., 2.5466e-05, |
|
|
2.3037e-05, 2.8580e-05], |
|
|
..., |
|
|
[ 4.7654e-05, 9.2238e-06, 7.8902e-06, ..., 1.1139e-05, |
|
|
1.2316e-05, 1.5184e-05], |
|
|
[-1.4281e-04, -1.4052e-05, -9.4771e-06, ..., -3.4928e-05, |
|
|
-3.9577e-05, -4.9472e-05], |
|
|
[ 8.9034e-06, -2.5947e-06, -2.8443e-06, ..., 2.9244e-07, |
|
|
4.1351e-07, 1.5832e-06]], device='cuda:0') |
|
|
Epoch 39, bias, value: tensor([-0.0338, 0.0484, -0.0660, -0.0216, 0.0525, -0.0344, 0.0589], |
|
|
device='cuda:0'), grad: tensor([-1.9264e-04, 6.2644e-05, 2.6774e-04, 1.4007e-04, 1.3340e-04, |
|
|
-4.5323e-04, 4.2528e-05], device='cuda:0') |
|
|
306 |
|
|
0.004328833670911726 |
|
|
changing lr |
|
|
---------------------saving model at epoch 38---------------------------------------------------- |
|
|
epoch 38, time 430.89, cls_loss 0.0048 cls_loss_mapping 0.0498 cls_loss_causal 0.6565 re_mapping 0.0502 re_causal 0.0527 |
|
|
Epoch 40, weight, value: tensor([[ 0.2478, 0.2481, 0.2815, ..., -0.0158, -0.0035, -0.0151], |
|
|
[-0.0505, -0.0459, -0.0683, ..., 0.0551, 0.0570, 0.0494], |
|
|
[-0.0923, -0.0841, -0.0996, ..., 0.0414, 0.0233, 0.0164], |
|
|
..., |
|
|
[-0.1132, -0.1161, -0.0721, ..., 0.0171, 0.0696, 0.0521], |
|
|
[-0.0200, -0.0178, -0.0422, ..., -0.1390, -0.1568, -0.1697], |
|
|
[ 0.0894, 0.0902, 0.0805, ..., -0.0267, -0.0188, 0.0059]], |
|
|
device='cuda:0'), grad: tensor([[-3.8218e-04, -2.5368e-04, -2.5129e-04, ..., -3.4750e-05, |
|
|
-4.4465e-05, -5.1945e-05], |
|
|
[ 9.3281e-05, 5.9783e-05, 5.9187e-05, ..., 1.0341e-05, |
|
|
1.3016e-05, 1.4789e-05], |
|
|
[ 1.3657e-05, 1.0043e-05, 9.9838e-06, ..., 1.3318e-06, |
|
|
1.8254e-06, 1.9278e-06], |
|
|
..., |
|
|
[ 5.6207e-05, 3.2425e-05, 3.2097e-05, ..., 8.9929e-06, |
|
|
1.1019e-05, 1.1913e-05], |
|
|
[ 4.3213e-06, 8.9258e-06, 9.3654e-06, ..., -5.5321e-07, |
|
|
8.8103e-07, 4.0978e-07], |
|
|
[ 2.1148e-04, 1.3483e-04, 1.3328e-04, ..., 2.1800e-05, |
|
|
2.6941e-05, 3.1203e-05]], device='cuda:0') |
|
|
Epoch 40, bias, value: tensor([-0.0341, 0.0476, -0.0636, -0.0197, 0.0505, -0.0361, 0.0592], |
|
|
device='cuda:0'), grad: tensor([-3.9268e-04, 1.0526e-04, 1.1511e-05, -2.8744e-05, 7.9453e-05, |
|
|
-1.2942e-05, 2.3806e-04], device='cuda:0') |
|
|
306 |
|
|
0.0041072155260068206 |
|
|
changing lr |
|
|
epoch 39, time 431.70, cls_loss 0.0051 cls_loss_mapping 0.0535 cls_loss_causal 0.7021 re_mapping 0.0495 re_causal 0.0520 |
|
|
Epoch 41, weight, value: tensor([[ 0.2513, 0.2500, 0.2832, ..., -0.0149, -0.0026, -0.0140], |
|
|
[-0.0511, -0.0459, -0.0682, ..., 0.0546, 0.0565, 0.0489], |
|
|
[-0.0920, -0.0841, -0.0995, ..., 0.0412, 0.0232, 0.0164], |
|
|
..., |
|
|
[-0.1122, -0.1155, -0.0718, ..., 0.0172, 0.0693, 0.0519], |
|
|
[-0.0206, -0.0180, -0.0424, ..., -0.1384, -0.1561, -0.1690], |
|
|
[ 0.0860, 0.0879, 0.0783, ..., -0.0271, -0.0193, 0.0051]], |
|
|
device='cuda:0'), grad: tensor([[-9.6977e-05, -7.2122e-05, -7.0632e-05, ..., -4.6343e-06, |
|
|
-6.8955e-06, -6.5528e-06], |
|
|
[ 7.1883e-05, 1.8924e-05, 1.8641e-05, ..., 2.4423e-05, |
|
|
2.8446e-05, 3.0786e-05], |
|
|
[-1.3530e-04, -5.0068e-06, -4.9546e-06, ..., -5.1588e-05, |
|
|
-5.1260e-05, -5.7846e-05], |
|
|
..., |
|
|
[ 1.7941e-04, 4.3571e-05, 4.2886e-05, ..., 5.6475e-05, |
|
|
6.2644e-05, 6.8307e-05], |
|
|
[-4.5598e-05, 6.7055e-08, 9.5367e-07, ..., -1.6466e-05, |
|
|
-2.1920e-05, -2.2978e-05], |
|
|
[-3.1441e-05, 5.8487e-07, -5.1036e-07, ..., -2.8342e-05, |
|
|
-3.4481e-05, -3.7223e-05]], device='cuda:0') |
|
|
Epoch 41, bias, value: tensor([-0.0283, 0.0453, -0.0618, -0.0206, 0.0508, -0.0370, 0.0554], |
|
|
device='cuda:0'), grad: tensor([-7.4744e-05, 1.7619e-04, -4.0269e-04, 1.4830e-04, 4.4298e-04, |
|
|
-2.0444e-04, -8.5533e-05], device='cuda:0') |
|
|
306 |
|
|
0.0038873953302184317 |
|
|
changing lr |
|
|
epoch 40, time 429.47, cls_loss 0.0052 cls_loss_mapping 0.0468 cls_loss_causal 0.6742 re_mapping 0.0483 re_causal 0.0508 |
|
|
Epoch 42, weight, value: tensor([[ 0.2490, 0.2490, 0.2821, ..., -0.0153, -0.0030, -0.0145], |
|
|
[-0.0508, -0.0456, -0.0678, ..., 0.0542, 0.0562, 0.0486], |
|
|
[-0.0907, -0.0838, -0.0991, ..., 0.0412, 0.0233, 0.0167], |
|
|
..., |
|
|
[-0.1121, -0.1149, -0.0715, ..., 0.0172, 0.0689, 0.0516], |
|
|
[-0.0202, -0.0181, -0.0423, ..., -0.1376, -0.1552, -0.1681], |
|
|
[ 0.0861, 0.0873, 0.0777, ..., -0.0269, -0.0191, 0.0052]], |
|
|
device='cuda:0'), grad: tensor([[ 7.0524e-04, 2.4354e-04, 2.1636e-04, ..., 9.5367e-05, |
|
|
1.0043e-04, 1.0616e-04], |
|
|
[ 8.6203e-06, -1.9744e-06, -2.6431e-06, ..., -6.4969e-06, |
|
|
-6.2473e-06, -7.8455e-06], |
|
|
[ 1.5199e-04, 4.8757e-05, 4.3005e-05, ..., 3.3259e-05, |
|
|
3.6716e-05, 3.8743e-05], |
|
|
..., |
|
|
[ 9.5427e-05, 3.2485e-05, 2.9102e-05, ..., 1.6078e-05, |
|
|
1.7956e-05, 1.9237e-05], |
|
|
[ 7.1466e-05, 2.4527e-05, 2.2009e-05, ..., 1.1332e-05, |
|
|
1.2673e-05, 1.3590e-05], |
|
|
[-9.4128e-04, -3.2496e-04, -2.8896e-04, ..., -1.0651e-04, |
|
|
-1.1295e-04, -1.1897e-04]], device='cuda:0') |
|
|
Epoch 42, bias, value: tensor([-0.0322, 0.0448, -0.0585, -0.0213, 0.0490, -0.0356, 0.0576], |
|
|
device='cuda:0'), grad: tensor([ 1.4572e-03, 3.4839e-05, 3.1948e-04, -2.0826e-04, 1.9515e-04, |
|
|
1.4567e-04, -1.9464e-03], device='cuda:0') |
|
|
306 |
|
|
0.003669815772166629 |
|
|
changing lr |
|
|
epoch 41, time 427.88, cls_loss 0.0065 cls_loss_mapping 0.0534 cls_loss_causal 0.6753 re_mapping 0.0489 re_causal 0.0515 |
|
|
Epoch 43, weight, value: tensor([[ 0.2492, 0.2492, 0.2821, ..., -0.0153, -0.0032, -0.0146], |
|
|
[-0.0510, -0.0455, -0.0676, ..., 0.0537, 0.0557, 0.0482], |
|
|
[-0.0909, -0.0837, -0.0989, ..., 0.0408, 0.0231, 0.0165], |
|
|
..., |
|
|
[-0.1110, -0.1143, -0.0712, ..., 0.0175, 0.0690, 0.0519], |
|
|
[-0.0209, -0.0183, -0.0424, ..., -0.1370, -0.1546, -0.1674], |
|
|
[ 0.0852, 0.0864, 0.0768, ..., -0.0269, -0.0192, 0.0050]], |
|
|
device='cuda:0'), grad: tensor([[ 1.2779e-04, 4.0859e-05, 3.5703e-05, ..., 3.8385e-05, |
|
|
4.5091e-05, 5.1558e-05], |
|
|
[ 7.3612e-06, 2.3488e-06, 2.0191e-06, ..., 2.3954e-06, |
|
|
2.9132e-06, 3.2187e-06], |
|
|
[-6.0797e-05, -1.8999e-05, -1.8507e-05, ..., -4.4629e-06, |
|
|
-9.8720e-07, -7.0706e-06], |
|
|
..., |
|
|
[-1.0753e-04, -3.7402e-05, -3.1054e-05, ..., -4.6730e-05, |
|
|
-5.9545e-05, -6.1870e-05], |
|
|
[-4.8988e-06, 8.9221e-07, 1.3467e-06, ..., 1.0617e-07, |
|
|
-4.2282e-07, -1.7136e-07], |
|
|
[ 1.0051e-05, 3.7905e-06, 3.2373e-06, ..., 1.7025e-06, |
|
|
2.3823e-06, 2.7604e-06]], device='cuda:0') |
|
|
Epoch 43, bias, value: tensor([-0.0323, 0.0438, -0.0593, -0.0198, 0.0506, -0.0368, 0.0577], |
|
|
device='cuda:0'), grad: tensor([ 2.9945e-04, 1.6332e-05, -1.7369e-04, 6.4611e-05, -2.1088e-04, |
|
|
-1.8358e-05, 2.2486e-05], device='cuda:0') |
|
|
306 |
|
|
0.0034549150281252667 |
|
|
changing lr |
|
|
epoch 42, time 431.83, cls_loss 0.0043 cls_loss_mapping 0.0461 cls_loss_causal 0.6401 re_mapping 0.0486 re_causal 0.0514 |
|
|
Epoch 44, weight, value: tensor([[ 0.2495, 0.2492, 0.2820, ..., -0.0153, -0.0032, -0.0145], |
|
|
[-0.0501, -0.0451, -0.0671, ..., 0.0536, 0.0556, 0.0481], |
|
|
[-0.0910, -0.0837, -0.0989, ..., 0.0405, 0.0229, 0.0163], |
|
|
..., |
|
|
[-0.1111, -0.1140, -0.0711, ..., 0.0175, 0.0687, 0.0517], |
|
|
[-0.0213, -0.0185, -0.0426, ..., -0.1364, -0.1540, -0.1667], |
|
|
[ 0.0847, 0.0858, 0.0763, ..., -0.0268, -0.0192, 0.0050]], |
|
|
device='cuda:0'), grad: tensor([[-1.4976e-05, -1.8388e-05, -1.7866e-05, ..., 1.8664e-06, |
|
|
2.2594e-06, 2.4643e-06], |
|
|
[ 1.0014e-04, 9.9018e-06, 6.0499e-06, ..., 3.9488e-05, |
|
|
4.5002e-05, 4.9442e-05], |
|
|
[ 2.6536e-04, 2.3797e-05, 1.4089e-05, ..., 1.0502e-04, |
|
|
1.2076e-04, 1.3196e-04], |
|
|
..., |
|
|
[-6.3515e-04, -4.6462e-05, -2.4155e-05, ..., -2.5392e-04, |
|
|
-2.9373e-04, -3.2043e-04], |
|
|
[ 5.5671e-05, 5.0776e-06, 3.2317e-06, ..., 2.2560e-05, |
|
|
2.5466e-05, 2.7850e-05], |
|
|
[ 1.9002e-04, 2.3529e-05, 1.6689e-05, ..., 7.2062e-05, |
|
|
8.2552e-05, 9.0301e-05]], device='cuda:0') |
|
|
Epoch 44, bias, value: tensor([-0.0314, 0.0445, -0.0594, -0.0199, 0.0491, -0.0369, 0.0577], |
|
|
device='cuda:0'), grad: tensor([ 1.7658e-05, 3.0422e-04, 8.1301e-04, 1.1760e-04, -1.9779e-03, |
|
|
1.6999e-04, 5.5742e-04], device='cuda:0') |
|
|
306 |
|
|
0.0032431258795932905 |
|
|
changing lr |
|
|
epoch 43, time 427.82, cls_loss 0.0039 cls_loss_mapping 0.0450 cls_loss_causal 0.6249 re_mapping 0.0474 re_causal 0.0502 |
|
|
Epoch 45, weight, value: tensor([[ 0.2480, 0.2484, 0.2811, ..., -0.0155, -0.0035, -0.0148], |
|
|
[-0.0493, -0.0446, -0.0666, ..., 0.0534, 0.0555, 0.0480], |
|
|
[-0.0911, -0.0836, -0.0987, ..., 0.0401, 0.0227, 0.0161], |
|
|
..., |
|
|
[-0.1105, -0.1136, -0.0709, ..., 0.0176, 0.0687, 0.0517], |
|
|
[-0.0208, -0.0185, -0.0425, ..., -0.1358, -0.1533, -0.1659], |
|
|
[ 0.0842, 0.0853, 0.0759, ..., -0.0268, -0.0191, 0.0049]], |
|
|
device='cuda:0'), grad: tensor([[ 3.8862e-04, 1.0622e-04, 1.0890e-04, ..., 1.5783e-04, |
|
|
1.7059e-04, 1.8311e-04], |
|
|
[ 3.5614e-05, 1.0513e-05, 8.5682e-06, ..., 1.9521e-05, |
|
|
2.2337e-05, 2.3574e-05], |
|
|
[-2.2268e-04, -3.7193e-05, -4.0859e-05, ..., -7.3195e-05, |
|
|
-7.8142e-05, -9.1970e-05], |
|
|
..., |
|
|
[-3.6907e-04, -1.2648e-04, -1.1647e-04, ..., -1.8442e-04, |
|
|
-2.0373e-04, -2.0933e-04], |
|
|
[ 4.9770e-05, 1.2524e-05, 1.2390e-05, ..., 2.1741e-05, |
|
|
2.4214e-05, 2.6256e-05], |
|
|
[ 3.8534e-05, 1.1489e-05, 9.8050e-06, ..., 1.9610e-05, |
|
|
2.1860e-05, 2.2933e-05]], device='cuda:0') |
|
|
Epoch 45, bias, value: tensor([-0.0337, 0.0451, -0.0600, -0.0192, 0.0491, -0.0350, 0.0576], |
|
|
device='cuda:0'), grad: tensor([ 8.6308e-04, 7.6950e-05, -5.8699e-04, 1.7869e-04, -7.2908e-04, |
|
|
1.1402e-04, 8.3148e-05], device='cuda:0') |
|
|
306 |
|
|
0.0030348748417303863 |
|
|
changing lr |
|
|
epoch 44, time 426.53, cls_loss 0.0031 cls_loss_mapping 0.0422 cls_loss_causal 0.6510 re_mapping 0.0467 re_causal 0.0495 |
|
|
Epoch 46, weight, value: tensor([[ 0.2471, 0.2479, 0.2805, ..., -0.0157, -0.0037, -0.0151], |
|
|
[-0.0492, -0.0445, -0.0663, ..., 0.0532, 0.0554, 0.0479], |
|
|
[-0.0904, -0.0834, -0.0984, ..., 0.0401, 0.0227, 0.0162], |
|
|
..., |
|
|
[-0.1096, -0.1130, -0.0704, ..., 0.0179, 0.0687, 0.0519], |
|
|
[-0.0199, -0.0182, -0.0422, ..., -0.1351, -0.1525, -0.1650], |
|
|
[ 0.0826, 0.0844, 0.0751, ..., -0.0269, -0.0193, 0.0046]], |
|
|
device='cuda:0'), grad: tensor([[ 2.2233e-05, 4.0531e-06, 3.6173e-06, ..., 7.3127e-06, |
|
|
8.8513e-06, 9.5591e-06], |
|
|
[ 2.4855e-05, 4.6380e-06, 4.7497e-06, ..., 1.1235e-05, |
|
|
1.2487e-05, 1.3188e-05], |
|
|
[-1.9088e-05, -3.9786e-06, -2.7493e-06, ..., 3.8594e-06, |
|
|
1.6009e-06, -4.5635e-07], |
|
|
..., |
|
|
[ 7.0989e-05, 8.3148e-06, 9.3728e-06, ..., 3.5614e-05, |
|
|
3.9279e-05, 4.1932e-05], |
|
|
[ 1.1116e-04, 1.2904e-05, 1.5192e-05, ..., 6.3360e-05, |
|
|
6.8307e-05, 7.0989e-05], |
|
|
[-3.1441e-05, -1.5251e-05, -1.2942e-05, ..., -6.5006e-07, |
|
|
-3.1181e-06, -3.5092e-06]], device='cuda:0') |
|
|
Epoch 46, bias, value: tensor([-0.0350, 0.0450, -0.0580, -0.0206, 0.0497, -0.0327, 0.0553], |
|
|
device='cuda:0'), grad: tensor([ 6.0409e-05, 6.4909e-05, -5.6982e-05, -5.0735e-04, 1.9813e-04, |
|
|
3.0398e-04, -6.2764e-05], device='cuda:0') |
|
|
306 |
|
|
0.0028305813044122124 |
|
|
changing lr |
|
|
epoch 45, time 428.43, cls_loss 0.0033 cls_loss_mapping 0.0372 cls_loss_causal 0.6389 re_mapping 0.0461 re_causal 0.0489 |
|
|
Epoch 47, weight, value: tensor([[ 0.2468, 0.2476, 0.2801, ..., -0.0156, -0.0037, -0.0151], |
|
|
[-0.0488, -0.0443, -0.0660, ..., 0.0530, 0.0552, 0.0478], |
|
|
[-0.0904, -0.0833, -0.0983, ..., 0.0398, 0.0225, 0.0160], |
|
|
..., |
|
|
[-0.1086, -0.1124, -0.0699, ..., 0.0182, 0.0688, 0.0521], |
|
|
[-0.0199, -0.0184, -0.0423, ..., -0.1346, -0.1520, -0.1645], |
|
|
[ 0.0815, 0.0837, 0.0744, ..., -0.0270, -0.0195, 0.0043]], |
|
|
device='cuda:0'), grad: tensor([[ 2.9951e-05, -2.6792e-05, -3.1501e-05, ..., 8.2031e-06, |
|
|
8.5086e-06, 1.1154e-05], |
|
|
[-1.4961e-04, -6.3956e-05, -6.0052e-05, ..., -5.1737e-05, |
|
|
-6.2943e-05, -6.9201e-05], |
|
|
[-9.4604e-04, -1.0973e-04, -5.1677e-05, ..., -1.7416e-04, |
|
|
-1.8346e-04, -2.0885e-04], |
|
|
..., |
|
|
[ 3.8338e-04, 8.6546e-05, 6.5982e-05, ..., 9.0778e-05, |
|
|
1.0258e-04, 1.1402e-04], |
|
|
[ 4.9114e-04, 6.0260e-05, 3.3230e-05, ..., 8.3089e-05, |
|
|
8.5413e-05, 9.7692e-05], |
|
|
[ 1.1992e-04, 3.7611e-05, 3.2216e-05, ..., 2.8685e-05, |
|
|
3.3110e-05, 3.6567e-05]], device='cuda:0') |
|
|
Epoch 47, bias, value: tensor([-0.0350, 0.0456, -0.0582, -0.0206, 0.0506, -0.0320, 0.0536], |
|
|
device='cuda:0'), grad: tensor([ 0.0002, -0.0003, -0.0027, 0.0002, 0.0010, 0.0014, 0.0003], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.0026306566876350096 |
|
|
changing lr |
|
|
epoch 46, time 470.73, cls_loss 0.0037 cls_loss_mapping 0.0379 cls_loss_causal 0.6401 re_mapping 0.0456 re_causal 0.0483 |
|
|
Epoch 48, weight, value: tensor([[ 0.2481, 0.2483, 0.2807, ..., -0.0153, -0.0034, -0.0146], |
|
|
[-0.0491, -0.0443, -0.0660, ..., 0.0528, 0.0550, 0.0475], |
|
|
[-0.0905, -0.0833, -0.0982, ..., 0.0396, 0.0224, 0.0159], |
|
|
..., |
|
|
[-0.1091, -0.1123, -0.0700, ..., 0.0180, 0.0684, 0.0517], |
|
|
[-0.0202, -0.0186, -0.0425, ..., -0.1343, -0.1516, -0.1641], |
|
|
[ 0.0814, 0.0831, 0.0739, ..., -0.0267, -0.0193, 0.0045]], |
|
|
device='cuda:0'), grad: tensor([[-6.1356e-06, -1.0031e-04, -1.0329e-04, ..., -3.7737e-06, |
|
|
4.6641e-06, -4.1816e-07], |
|
|
[ 1.4558e-05, 8.7991e-06, 8.2701e-06, ..., 1.1362e-07, |
|
|
1.1306e-06, 1.6019e-06], |
|
|
[-2.8872e-04, -2.8953e-05, -2.7329e-05, ..., -4.5776e-05, |
|
|
-5.9903e-05, -6.5863e-05], |
|
|
..., |
|
|
[ 1.9741e-04, 9.6142e-05, 9.7275e-05, ..., 3.3945e-05, |
|
|
3.6329e-05, 4.4256e-05], |
|
|
[ 5.7846e-05, 1.5557e-05, 1.5691e-05, ..., 1.1273e-05, |
|
|
1.2152e-05, 1.4022e-05], |
|
|
[ 1.4775e-05, 4.5337e-06, 5.0813e-06, ..., 3.1181e-06, |
|
|
4.2319e-06, 4.7013e-06]], device='cuda:0') |
|
|
Epoch 48, bias, value: tensor([-0.0331, 0.0446, -0.0586, -0.0211, 0.0488, -0.0323, 0.0554], |
|
|
device='cuda:0'), grad: tensor([ 2.4533e-04, 1.8641e-05, -8.0538e-04, 1.8939e-05, 3.4881e-04, |
|
|
1.3614e-04, 3.7193e-05], device='cuda:0') |
|
|
306 |
|
|
0.0024355036129704724 |
|
|
changing lr |
|
|
epoch 47, time 429.21, cls_loss 0.0029 cls_loss_mapping 0.0359 cls_loss_causal 0.6213 re_mapping 0.0454 re_causal 0.0483 |
|
|
Epoch 49, weight, value: tensor([[ 0.2480, 0.2482, 0.2805, ..., -0.0153, -0.0034, -0.0146], |
|
|
[-0.0491, -0.0442, -0.0658, ..., 0.0525, 0.0547, 0.0473], |
|
|
[-0.0902, -0.0832, -0.0981, ..., 0.0395, 0.0223, 0.0158], |
|
|
..., |
|
|
[-0.1089, -0.1120, -0.0699, ..., 0.0180, 0.0682, 0.0516], |
|
|
[-0.0203, -0.0188, -0.0426, ..., -0.1339, -0.1512, -0.1637], |
|
|
[ 0.0814, 0.0828, 0.0736, ..., -0.0265, -0.0191, 0.0047]], |
|
|
device='cuda:0'), grad: tensor([[-2.2233e-04, -1.4102e-04, -1.4091e-04, ..., -3.0786e-05, |
|
|
-3.1620e-05, -3.5584e-05], |
|
|
[ 7.0989e-05, 1.6347e-05, 1.2673e-05, ..., 1.3053e-05, |
|
|
1.4298e-05, 1.8820e-05], |
|
|
[ 9.7334e-05, 4.0352e-05, 3.7849e-05, ..., 1.6138e-05, |
|
|
1.6898e-05, 2.0996e-05], |
|
|
..., |
|
|
[ 1.5485e-04, 8.2076e-05, 8.0466e-05, ..., 2.3916e-05, |
|
|
2.5123e-05, 2.9176e-05], |
|
|
[-1.0288e-04, -8.0243e-06, 7.8324e-07, ..., -2.0772e-05, |
|
|
-2.2560e-05, -3.1799e-05], |
|
|
[-3.6329e-05, 1.5283e-06, 2.0489e-06, ..., -9.4473e-06, |
|
|
-1.0073e-05, -1.0923e-05]], device='cuda:0') |
|
|
Epoch 49, bias, value: tensor([-0.0331, 0.0438, -0.0579, -0.0215, 0.0480, -0.0320, 0.0564], |
|
|
device='cuda:0'), grad: tensor([-0.0002, 0.0002, 0.0002, 0.0001, 0.0002, -0.0004, -0.0001], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.00224551509273949 |
|
|
changing lr |
|
|
epoch 48, time 427.84, cls_loss 0.0030 cls_loss_mapping 0.0376 cls_loss_causal 0.6181 re_mapping 0.0457 re_causal 0.0488 |
|
|
Epoch 50, weight, value: tensor([[ 0.2472, 0.2477, 0.2799, ..., -0.0154, -0.0036, -0.0148], |
|
|
[-0.0493, -0.0442, -0.0657, ..., 0.0523, 0.0545, 0.0471], |
|
|
[-0.0901, -0.0832, -0.0980, ..., 0.0394, 0.0223, 0.0158], |
|
|
..., |
|
|
[-0.1086, -0.1115, -0.0695, ..., 0.0177, 0.0678, 0.0513], |
|
|
[-0.0209, -0.0190, -0.0428, ..., -0.1336, -0.1509, -0.1634], |
|
|
[ 0.0821, 0.0828, 0.0736, ..., -0.0262, -0.0188, 0.0050]], |
|
|
device='cuda:0'), grad: tensor([[ 1.4400e-04, 4.0770e-05, 3.5912e-05, ..., 4.9591e-05, |
|
|
5.7667e-05, 5.9694e-05], |
|
|
[-2.9826e-04, -7.5936e-05, -6.6042e-05, ..., -1.1307e-04, |
|
|
-1.3459e-04, -1.3673e-04], |
|
|
[ 1.9684e-05, 1.6894e-06, 8.9593e-07, ..., 1.1876e-05, |
|
|
1.4775e-05, 1.4365e-05], |
|
|
..., |
|
|
[ 2.1636e-05, 5.9158e-06, 5.1521e-06, ..., 7.8231e-06, |
|
|
9.0152e-06, 9.3728e-06], |
|
|
[ 7.5512e-06, -1.0384e-06, -7.2550e-07, ..., 5.0589e-06, |
|
|
8.4639e-06, 7.0296e-06], |
|
|
[ 9.3520e-05, 2.5243e-05, 2.1860e-05, ..., 3.4422e-05, |
|
|
3.9756e-05, 4.1187e-05]], device='cuda:0') |
|
|
Epoch 50, bias, value: tensor([-0.0340, 0.0430, -0.0577, -0.0202, 0.0473, -0.0331, 0.0585], |
|
|
device='cuda:0'), grad: tensor([ 3.0828e-04, -6.5708e-04, 5.1737e-05, 2.4900e-05, 4.6939e-05, |
|
|
2.0772e-05, 2.0397e-04], device='cuda:0') |
|
|
306 |
|
|
0.002061073738537637 |
|
|
changing lr |
|
|
epoch 49, time 427.43, cls_loss 0.0027 cls_loss_mapping 0.0358 cls_loss_causal 0.6541 re_mapping 0.0448 re_causal 0.0479 |
|
|
Epoch 51, weight, value: tensor([[ 0.2476, 0.2479, 0.2800, ..., -0.0152, -0.0035, -0.0146], |
|
|
[-0.0483, -0.0439, -0.0654, ..., 0.0525, 0.0547, 0.0474], |
|
|
[-0.0902, -0.0832, -0.0979, ..., 0.0392, 0.0221, 0.0157], |
|
|
..., |
|
|
[-0.1085, -0.1113, -0.0694, ..., 0.0177, 0.0677, 0.0511], |
|
|
[-0.0210, -0.0191, -0.0428, ..., -0.1333, -0.1506, -0.1629], |
|
|
[ 0.0808, 0.0820, 0.0729, ..., -0.0264, -0.0190, 0.0046]], |
|
|
device='cuda:0'), grad: tensor([[-7.3761e-06, -6.8903e-05, -6.1393e-05, ..., 3.7342e-05, |
|
|
4.6521e-05, 4.6879e-05], |
|
|
[-4.7827e-04, -1.3089e-04, -1.4806e-04, ..., -1.6093e-04, |
|
|
-1.8084e-04, -1.7262e-04], |
|
|
[ 9.6321e-05, 4.4078e-05, 4.3571e-05, ..., 2.0713e-05, |
|
|
2.2292e-05, 2.2203e-05], |
|
|
..., |
|
|
[ 1.0103e-04, 4.2081e-05, 4.1813e-05, ..., 2.4498e-05, |
|
|
2.7284e-05, 2.7090e-05], |
|
|
[-7.2420e-05, -5.0105e-06, -3.0901e-06, ..., -3.3885e-05, |
|
|
-3.8385e-05, -3.9697e-05], |
|
|
[ 3.3355e-04, 1.0943e-04, 1.1837e-04, ..., 1.0401e-04, |
|
|
1.1396e-04, 1.0717e-04]], device='cuda:0') |
|
|
Epoch 51, bias, value: tensor([-0.0331, 0.0450, -0.0578, -0.0209, 0.0469, -0.0330, 0.0567], |
|
|
device='cuda:0'), grad: tensor([ 1.2612e-04, -9.6321e-04, 1.7142e-04, 6.3956e-05, 1.8120e-04, |
|
|
-2.7323e-04, 6.9284e-04], device='cuda:0') |
|
|
306 |
|
|
0.0018825509907063344 |
|
|
changing lr |
|
|
epoch 50, time 426.95, cls_loss 0.0033 cls_loss_mapping 0.0344 cls_loss_causal 0.6313 re_mapping 0.0445 re_causal 0.0478 |
|
|
Epoch 52, weight, value: tensor([[ 0.2476, 0.2479, 0.2798, ..., -0.0152, -0.0035, -0.0146], |
|
|
[-0.0481, -0.0439, -0.0653, ..., 0.0525, 0.0548, 0.0474], |
|
|
[-0.0902, -0.0832, -0.0979, ..., 0.0390, 0.0220, 0.0156], |
|
|
..., |
|
|
[-0.1074, -0.1107, -0.0689, ..., 0.0179, 0.0678, 0.0512], |
|
|
[-0.0213, -0.0192, -0.0429, ..., -0.1330, -0.1503, -0.1626], |
|
|
[ 0.0802, 0.0816, 0.0724, ..., -0.0264, -0.0191, 0.0045]], |
|
|
device='cuda:0'), grad: tensor([[ 1.1891e-04, 3.1561e-05, 2.2739e-05, ..., 3.7551e-05, |
|
|
2.8744e-05, 3.4332e-05], |
|
|
[ 2.6679e-04, 6.5207e-05, 4.4525e-05, ..., 7.9095e-05, |
|
|
5.8681e-05, 7.3135e-05], |
|
|
[ 7.6234e-05, 1.8701e-05, 1.0729e-05, ..., 2.2352e-05, |
|
|
1.4529e-05, 1.7613e-05], |
|
|
..., |
|
|
[ 3.4004e-05, 5.2080e-06, 1.2452e-06, ..., 9.4548e-06, |
|
|
6.0275e-06, 6.8285e-06], |
|
|
[-1.1311e-03, -2.8086e-04, -1.7083e-04, ..., -3.4928e-04, |
|
|
-2.4164e-04, -2.8658e-04], |
|
|
[ 3.1185e-04, 7.9513e-05, 4.2439e-05, ..., 1.0026e-04, |
|
|
6.4373e-05, 7.1526e-05]], device='cuda:0') |
|
|
Epoch 52, bias, value: tensor([-0.0330, 0.0449, -0.0578, -0.0217, 0.0485, -0.0332, 0.0560], |
|
|
device='cuda:0'), grad: tensor([ 0.0003, 0.0008, 0.0002, 0.0009, 0.0001, -0.0033, 0.0009], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.0017103063703014388 |
|
|
changing lr |
|
|
epoch 51, time 429.18, cls_loss 0.0033 cls_loss_mapping 0.0337 cls_loss_causal 0.6361 re_mapping 0.0436 re_causal 0.0467 |
|
|
Epoch 53, weight, value: tensor([[ 0.2471, 0.2476, 0.2795, ..., -0.0152, -0.0035, -0.0147], |
|
|
[-0.0487, -0.0439, -0.0653, ..., 0.0522, 0.0545, 0.0471], |
|
|
[-0.0902, -0.0831, -0.0978, ..., 0.0388, 0.0219, 0.0155], |
|
|
..., |
|
|
[-0.1067, -0.1102, -0.0686, ..., 0.0180, 0.0678, 0.0513], |
|
|
[-0.0216, -0.0193, -0.0429, ..., -0.1328, -0.1500, -0.1623], |
|
|
[ 0.0808, 0.0815, 0.0723, ..., -0.0261, -0.0189, 0.0047]], |
|
|
device='cuda:0'), grad: tensor([[-2.8858e-03, -1.5879e-03, -1.5993e-03, ..., -4.1032e-04, |
|
|
-5.5933e-04, -5.7220e-04], |
|
|
[ 2.7132e-04, 1.4031e-04, 1.3936e-04, ..., 5.5403e-05, |
|
|
6.7294e-05, 6.7949e-05], |
|
|
[ 7.6151e-04, 4.0030e-04, 4.0221e-04, ..., 1.1492e-04, |
|
|
1.5497e-04, 1.5962e-04], |
|
|
..., |
|
|
[ 1.3828e-03, 7.1383e-04, 7.1812e-04, ..., 1.9825e-04, |
|
|
2.7800e-04, 2.9016e-04], |
|
|
[-2.2149e-04, 8.4192e-06, 1.9401e-05, ..., -1.2338e-04, |
|
|
-1.3256e-04, -1.3959e-04], |
|
|
[ 5.5552e-04, 2.9540e-04, 2.9325e-04, ..., 1.1337e-04, |
|
|
1.3423e-04, 1.3423e-04]], device='cuda:0') |
|
|
Epoch 53, bias, value: tensor([-0.0338, 0.0429, -0.0577, -0.0219, 0.0494, -0.0337, 0.0586], |
|
|
device='cuda:0'), grad: tensor([-0.0040, 0.0004, 0.0011, 0.0004, 0.0021, -0.0009, 0.0009], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.0015446867550656784 |
|
|
changing lr |
|
|
epoch 52, time 430.64, cls_loss 0.0035 cls_loss_mapping 0.0352 cls_loss_causal 0.6378 re_mapping 0.0435 re_causal 0.0466 |
|
|
Epoch 54, weight, value: tensor([[ 0.2474, 0.2477, 0.2796, ..., -0.0150, -0.0034, -0.0145], |
|
|
[-0.0488, -0.0439, -0.0652, ..., 0.0520, 0.0543, 0.0469], |
|
|
[-0.0901, -0.0832, -0.0979, ..., 0.0387, 0.0218, 0.0155], |
|
|
..., |
|
|
[-0.1063, -0.1099, -0.0683, ..., 0.0181, 0.0678, 0.0514], |
|
|
[-0.0212, -0.0193, -0.0429, ..., -0.1324, -0.1496, -0.1619], |
|
|
[ 0.0799, 0.0810, 0.0718, ..., -0.0262, -0.0191, 0.0045]], |
|
|
device='cuda:0'), grad: tensor([[ 4.4417e-04, 1.6558e-04, 1.5855e-04, ..., 2.1517e-04, |
|
|
2.1636e-04, 2.3675e-04], |
|
|
[ 9.7871e-05, 3.1441e-05, 2.6315e-05, ..., 5.4896e-05, |
|
|
5.1409e-05, 5.7578e-05], |
|
|
[ 5.0247e-05, 2.3901e-05, 2.0757e-05, ..., 3.8058e-05, |
|
|
3.3408e-05, 3.6001e-05], |
|
|
..., |
|
|
[-2.1362e-04, -1.0002e-04, -1.0782e-04, ..., -8.4937e-05, |
|
|
-9.6142e-05, -1.0109e-04], |
|
|
[ 1.8752e-04, 5.8442e-05, 4.9949e-05, ..., 9.8109e-05, |
|
|
9.3877e-05, 1.0526e-04], |
|
|
[ 1.3046e-03, 4.1962e-04, 3.4881e-04, ..., 7.3671e-04, |
|
|
6.8760e-04, 7.7057e-04]], device='cuda:0') |
|
|
Epoch 54, bias, value: tensor([-0.0333, 0.0422, -0.0572, -0.0223, 0.0495, -0.0324, 0.0572], |
|
|
device='cuda:0'), grad: tensor([ 0.0009, 0.0002, 0.0001, -0.0045, -0.0004, 0.0004, 0.0031], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.001386025680863044 |
|
|
changing lr |
|
|
epoch 53, time 427.28, cls_loss 0.0021 cls_loss_mapping 0.0327 cls_loss_causal 0.5944 re_mapping 0.0434 re_causal 0.0465 |
|
|
Epoch 55, weight, value: tensor([[ 0.2475, 0.2476, 0.2795, ..., -0.0149, -0.0033, -0.0144], |
|
|
[-0.0489, -0.0439, -0.0652, ..., 0.0518, 0.0542, 0.0468], |
|
|
[-0.0900, -0.0831, -0.0978, ..., 0.0387, 0.0218, 0.0155], |
|
|
..., |
|
|
[-0.1068, -0.1099, -0.0684, ..., 0.0180, 0.0676, 0.0511], |
|
|
[-0.0210, -0.0193, -0.0429, ..., -0.1321, -0.1493, -0.1615], |
|
|
[ 0.0799, 0.0808, 0.0717, ..., -0.0261, -0.0190, 0.0045]], |
|
|
device='cuda:0'), grad: tensor([[ 3.1680e-05, 1.2154e-06, -2.8219e-07, ..., 1.5661e-05, |
|
|
1.7688e-05, 1.8716e-05], |
|
|
[ 2.3949e-04, 2.4959e-05, 1.8969e-05, ..., 1.3852e-04, |
|
|
1.4877e-04, 1.5247e-04], |
|
|
[ 1.6904e-04, 3.0249e-05, 1.9073e-05, ..., 8.3625e-05, |
|
|
9.3639e-05, 9.9063e-05], |
|
|
..., |
|
|
[ 3.6716e-04, 5.6565e-05, 3.5137e-05, ..., 1.9920e-04, |
|
|
2.1935e-04, 2.2936e-04], |
|
|
[ 4.8161e-05, 3.6024e-06, -6.8396e-06, ..., 5.0902e-05, |
|
|
5.4926e-05, 5.8174e-05], |
|
|
[ 2.0003e-04, 3.6567e-05, 2.5868e-05, ..., 9.4771e-05, |
|
|
1.0562e-04, 1.1134e-04]], device='cuda:0') |
|
|
Epoch 55, bias, value: tensor([-0.0327, 0.0418, -0.0569, -0.0223, 0.0481, -0.0316, 0.0574], |
|
|
device='cuda:0'), grad: tensor([ 9.8288e-05, 6.6566e-04, 4.4632e-04, -2.8362e-03, 9.7656e-04, |
|
|
1.2803e-04, 5.2500e-04], device='cuda:0') |
|
|
306 |
|
|
0.0012346426699819469 |
|
|
changing lr |
|
|
epoch 54, time 429.05, cls_loss 0.0017 cls_loss_mapping 0.0296 cls_loss_causal 0.6024 re_mapping 0.0432 re_causal 0.0464 |
|
|
Epoch 56, weight, value: tensor([[ 0.2472, 0.2475, 0.2793, ..., -0.0149, -0.0033, -0.0144], |
|
|
[-0.0489, -0.0439, -0.0651, ..., 0.0517, 0.0541, 0.0467], |
|
|
[-0.0899, -0.0831, -0.0977, ..., 0.0386, 0.0218, 0.0155], |
|
|
..., |
|
|
[-0.1066, -0.1098, -0.0684, ..., 0.0180, 0.0675, 0.0511], |
|
|
[-0.0209, -0.0193, -0.0429, ..., -0.1319, -0.1491, -0.1613], |
|
|
[ 0.0798, 0.0807, 0.0716, ..., -0.0261, -0.0190, 0.0045]], |
|
|
device='cuda:0'), grad: tensor([[-3.3474e-03, -2.1229e-03, -2.0885e-03, ..., -2.0981e-04, |
|
|
-3.1662e-04, -3.3784e-04], |
|
|
[ 2.4605e-04, 1.4138e-04, 1.3793e-04, ..., 3.1292e-05, |
|
|
3.9667e-05, 4.2975e-05], |
|
|
[ 1.2693e-03, 7.1526e-04, 6.9714e-04, ..., 1.6069e-04, |
|
|
2.0373e-04, 2.1863e-04], |
|
|
..., |
|
|
[ 6.1178e-04, 3.5381e-04, 3.4547e-04, ..., 7.1764e-05, |
|
|
9.2328e-05, 9.9242e-05], |
|
|
[ 6.0844e-04, 2.5725e-04, 2.4486e-04, ..., 1.1837e-04, |
|
|
1.4007e-04, 1.4651e-04], |
|
|
[ 1.3056e-03, 7.2861e-04, 7.1001e-04, ..., 1.7309e-04, |
|
|
2.1768e-04, 2.3365e-04]], device='cuda:0') |
|
|
Epoch 56, bias, value: tensor([-0.0329, 0.0416, -0.0569, -0.0224, 0.0480, -0.0312, 0.0576], |
|
|
device='cuda:0'), grad: tensor([-0.0040, 0.0004, 0.0019, -0.0025, 0.0009, 0.0013, 0.0020], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.0010908425876598518 |
|
|
changing lr |
|
|
epoch 55, time 426.80, cls_loss 0.0022 cls_loss_mapping 0.0329 cls_loss_causal 0.6184 re_mapping 0.0427 re_causal 0.0458 |
|
|
Epoch 57, weight, value: tensor([[ 0.2475, 0.2476, 0.2794, ..., -0.0149, -0.0033, -0.0143], |
|
|
[-0.0490, -0.0439, -0.0651, ..., 0.0516, 0.0539, 0.0466], |
|
|
[-0.0899, -0.0830, -0.0976, ..., 0.0385, 0.0217, 0.0154], |
|
|
..., |
|
|
[-0.1065, -0.1096, -0.0683, ..., 0.0180, 0.0674, 0.0511], |
|
|
[-0.0207, -0.0193, -0.0429, ..., -0.1316, -0.1488, -0.1610], |
|
|
[ 0.0791, 0.0803, 0.0713, ..., -0.0262, -0.0191, 0.0043]], |
|
|
device='cuda:0'), grad: tensor([[ 4.2391e-04, 1.2326e-04, 1.0896e-04, ..., 1.5497e-04, |
|
|
1.8597e-04, 2.0492e-04], |
|
|
[-1.5807e-04, -5.4806e-05, -4.8727e-05, ..., -3.1233e-05, |
|
|
-3.6001e-05, -4.9263e-05], |
|
|
[ 2.2995e-04, 7.9930e-05, 6.9261e-05, ..., 8.5711e-05, |
|
|
1.0091e-04, 9.5487e-05], |
|
|
..., |
|
|
[-5.3787e-04, -1.6129e-04, -1.4102e-04, ..., -2.2840e-04, |
|
|
-2.7609e-04, -2.7633e-04], |
|
|
[ 2.2441e-05, 1.0282e-05, 1.1332e-05, ..., 7.6592e-06, |
|
|
1.0222e-05, 1.0461e-05], |
|
|
[ 9.7454e-06, 8.0746e-07, -2.0936e-06, ..., 9.2760e-06, |
|
|
1.2159e-05, 1.1526e-05]], device='cuda:0') |
|
|
Epoch 57, bias, value: tensor([-0.0323, 0.0411, -0.0570, -0.0218, 0.0477, -0.0305, 0.0565], |
|
|
device='cuda:0'), grad: tensor([ 1.1177e-03, -3.0947e-04, 5.4264e-04, 2.3723e-05, -1.4601e-03, |
|
|
4.5985e-05, 4.0382e-05], device='cuda:0') |
|
|
306 |
|
|
0.000954915028125264 |
|
|
changing lr |
|
|
epoch 56, time 428.67, cls_loss 0.0014 cls_loss_mapping 0.0284 cls_loss_causal 0.5831 re_mapping 0.0422 re_causal 0.0451 |
|
|
Epoch 58, weight, value: tensor([[ 0.2476, 0.2476, 0.2793, ..., -0.0148, -0.0033, -0.0143], |
|
|
[-0.0488, -0.0438, -0.0650, ..., 0.0515, 0.0539, 0.0466], |
|
|
[-0.0900, -0.0830, -0.0976, ..., 0.0384, 0.0216, 0.0153], |
|
|
..., |
|
|
[-0.1065, -0.1095, -0.0683, ..., 0.0180, 0.0674, 0.0510], |
|
|
[-0.0208, -0.0194, -0.0429, ..., -0.1315, -0.1486, -0.1608], |
|
|
[ 0.0788, 0.0800, 0.0710, ..., -0.0262, -0.0191, 0.0043]], |
|
|
device='cuda:0'), grad: tensor([[ 1.0282e-05, -2.3227e-06, -4.9472e-06, ..., 5.3830e-06, |
|
|
5.2825e-06, 6.8247e-06], |
|
|
[-1.4150e-04, -3.4779e-05, -2.1845e-05, ..., -4.9174e-05, |
|
|
-5.9396e-05, -6.2883e-05], |
|
|
[ 5.2303e-05, 1.5765e-05, 1.3009e-05, ..., 1.6302e-05, |
|
|
1.9863e-05, 2.0280e-05], |
|
|
..., |
|
|
[ 2.0757e-05, 5.6326e-06, 4.7274e-06, ..., 6.5118e-06, |
|
|
7.2829e-06, 7.7114e-06], |
|
|
[-4.5560e-06, 1.7434e-06, 8.5402e-07, ..., -4.2468e-06, |
|
|
-4.0457e-06, -3.2615e-06], |
|
|
[ 5.4955e-05, 1.2606e-05, 7.1153e-06, ..., 2.2396e-05, |
|
|
2.7567e-05, 2.8089e-05]], device='cuda:0') |
|
|
Epoch 58, bias, value: tensor([-0.0320, 0.0412, -0.0571, -0.0216, 0.0475, -0.0304, 0.0562], |
|
|
device='cuda:0'), grad: tensor([ 5.3912e-05, -3.7479e-04, 1.2082e-04, 2.0117e-05, 5.1171e-05, |
|
|
-1.5542e-05, 1.4389e-04], device='cuda:0') |
|
|
306 |
|
|
0.0008271337313934874 |
|
|
changing lr |
|
|
epoch 57, time 423.71, cls_loss 0.0016 cls_loss_mapping 0.0269 cls_loss_causal 0.5705 re_mapping 0.0421 re_causal 0.0451 |
|
|
Epoch 59, weight, value: tensor([[ 0.2474, 0.2474, 0.2791, ..., -0.0149, -0.0033, -0.0143], |
|
|
[-0.0487, -0.0437, -0.0649, ..., 0.0515, 0.0538, 0.0465], |
|
|
[-0.0899, -0.0830, -0.0975, ..., 0.0383, 0.0216, 0.0153], |
|
|
..., |
|
|
[-0.1063, -0.1094, -0.0682, ..., 0.0181, 0.0674, 0.0510], |
|
|
[-0.0207, -0.0194, -0.0429, ..., -0.1313, -0.1485, -0.1606], |
|
|
[ 0.0785, 0.0799, 0.0709, ..., -0.0262, -0.0192, 0.0042]], |
|
|
device='cuda:0'), grad: tensor([[-9.9182e-05, -6.8128e-05, -6.8247e-05, ..., -9.6485e-06, |
|
|
-9.5665e-06, -1.1303e-05], |
|
|
[-1.1706e-04, -3.6120e-05, -3.3170e-05, ..., -3.1769e-05, |
|
|
-3.9518e-05, -4.3839e-05], |
|
|
[ 1.0276e-04, 3.3408e-05, 3.0905e-05, ..., 3.1918e-05, |
|
|
3.6389e-05, 3.8296e-05], |
|
|
..., |
|
|
[-8.3864e-05, 3.0044e-06, 7.2084e-06, ..., -4.6194e-05, |
|
|
-5.0426e-05, -5.1051e-05], |
|
|
[ 8.7619e-05, 2.3827e-05, 2.1413e-05, ..., 3.0056e-05, |
|
|
3.3259e-05, 3.5048e-05], |
|
|
[ 8.0526e-05, 3.5167e-05, 3.3796e-05, ..., 1.6659e-05, |
|
|
1.9476e-05, 2.1666e-05]], device='cuda:0') |
|
|
Epoch 59, bias, value: tensor([-0.0322, 0.0413, -0.0569, -0.0217, 0.0476, -0.0303, 0.0558], |
|
|
device='cuda:0'), grad: tensor([-1.0520e-04, -2.8086e-04, 2.3329e-04, 6.8605e-05, -2.8443e-04, |
|
|
2.1255e-04, 1.5569e-04], device='cuda:0') |
|
|
306 |
|
|
0.00070775603199067 |
|
|
changing lr |
|
|
epoch 58, time 421.70, cls_loss 0.0019 cls_loss_mapping 0.0311 cls_loss_causal 0.5978 re_mapping 0.0416 re_causal 0.0446 |
|
|
Epoch 60, weight, value: tensor([[ 0.2471, 0.2473, 0.2789, ..., -0.0149, -0.0034, -0.0144], |
|
|
[-0.0483, -0.0436, -0.0647, ..., 0.0515, 0.0539, 0.0466], |
|
|
[-0.0900, -0.0830, -0.0975, ..., 0.0382, 0.0215, 0.0152], |
|
|
..., |
|
|
[-0.1060, -0.1092, -0.0680, ..., 0.0181, 0.0674, 0.0510], |
|
|
[-0.0210, -0.0194, -0.0429, ..., -0.1313, -0.1484, -0.1605], |
|
|
[ 0.0782, 0.0797, 0.0707, ..., -0.0263, -0.0192, 0.0042]], |
|
|
device='cuda:0'), grad: tensor([[ 9.6798e-05, 3.5226e-05, 3.2395e-05, ..., 2.2635e-05, |
|
|
2.4319e-05, 2.8834e-05], |
|
|
[-1.2958e-04, -2.9504e-05, -2.5034e-05, ..., -5.7399e-05, |
|
|
-6.4015e-05, -7.1168e-05], |
|
|
[ 2.7716e-05, 7.5512e-06, 6.8322e-06, ..., 8.3372e-06, |
|
|
8.8662e-06, 1.0163e-05], |
|
|
..., |
|
|
[ 1.3828e-05, 3.9861e-06, 3.5781e-06, ..., 7.3947e-06, |
|
|
5.8748e-06, 6.3442e-06], |
|
|
[-9.2566e-05, -2.2456e-05, -2.1636e-05, ..., -2.6435e-05, |
|
|
-1.8954e-05, -2.1636e-05], |
|
|
[-9.8124e-06, -1.6779e-05, -1.5572e-05, ..., 9.1493e-06, |
|
|
7.4469e-06, 6.7241e-06]], device='cuda:0') |
|
|
Epoch 60, bias, value: tensor([-0.0326, 0.0420, -0.0573, -0.0211, 0.0478, -0.0307, 0.0556], |
|
|
device='cuda:0'), grad: tensor([ 2.0659e-04, -3.7932e-04, 6.7472e-05, 2.5487e-04, 4.3511e-05, |
|
|
-2.1946e-04, 2.6584e-05], device='cuda:0') |
|
|
306 |
|
|
0.0005970223407163104 |
|
|
changing lr |
|
|
epoch 59, time 419.85, cls_loss 0.0019 cls_loss_mapping 0.0281 cls_loss_causal 0.6170 re_mapping 0.0414 re_causal 0.0446 |
|
|
Epoch 61, weight, value: tensor([[ 0.2471, 0.2472, 0.2789, ..., -0.0149, -0.0034, -0.0144], |
|
|
[-0.0482, -0.0435, -0.0646, ..., 0.0515, 0.0538, 0.0465], |
|
|
[-0.0900, -0.0830, -0.0975, ..., 0.0382, 0.0215, 0.0152], |
|
|
..., |
|
|
[-0.1060, -0.1092, -0.0680, ..., 0.0181, 0.0673, 0.0510], |
|
|
[-0.0211, -0.0195, -0.0430, ..., -0.1312, -0.1483, -0.1604], |
|
|
[ 0.0781, 0.0796, 0.0706, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[ 3.0851e-04, 8.7559e-05, 8.5950e-05, ..., 8.6188e-05, |
|
|
9.1970e-05, 9.9599e-05], |
|
|
[ 9.4950e-05, 2.9132e-05, 2.7582e-05, ..., 3.2783e-05, |
|
|
3.2723e-05, 3.5316e-05], |
|
|
[ 2.3806e-04, 6.7055e-05, 6.4909e-05, ..., 6.5923e-05, |
|
|
6.9439e-05, 7.5936e-05], |
|
|
..., |
|
|
[-6.3276e-04, -1.7273e-04, -1.7107e-04, ..., -1.5140e-04, |
|
|
-1.6677e-04, -1.8263e-04], |
|
|
[ 4.5270e-05, 1.0677e-05, 1.3418e-05, ..., 2.3127e-05, |
|
|
2.5794e-05, 2.4602e-05], |
|
|
[ 1.1426e-04, 3.2634e-05, 3.1173e-05, ..., 3.8356e-05, |
|
|
4.0770e-05, 4.3154e-05]], device='cuda:0') |
|
|
Epoch 61, bias, value: tensor([-0.0325, 0.0420, -0.0571, -0.0209, 0.0476, -0.0309, 0.0554], |
|
|
device='cuda:0'), grad: tensor([ 6.9857e-04, 2.1350e-04, 5.4169e-04, -3.6597e-04, -1.4334e-03, |
|
|
8.5652e-05, 2.5988e-04], device='cuda:0') |
|
|
306 |
|
|
0.0004951556604879052 |
|
|
changing lr |
|
|
epoch 60, time 418.06, cls_loss 0.0022 cls_loss_mapping 0.0259 cls_loss_causal 0.6041 re_mapping 0.0412 re_causal 0.0443 |
|
|
Epoch 62, weight, value: tensor([[ 0.2470, 0.2472, 0.2788, ..., -0.0149, -0.0034, -0.0144], |
|
|
[-0.0483, -0.0435, -0.0646, ..., 0.0514, 0.0538, 0.0465], |
|
|
[-0.0900, -0.0830, -0.0975, ..., 0.0382, 0.0215, 0.0152], |
|
|
..., |
|
|
[-0.1060, -0.1091, -0.0680, ..., 0.0181, 0.0673, 0.0510], |
|
|
[-0.0210, -0.0195, -0.0430, ..., -0.1311, -0.1482, -0.1603], |
|
|
[ 0.0780, 0.0795, 0.0705, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[-1.7226e-04, -1.2827e-04, -1.2863e-04, ..., -2.8118e-05, |
|
|
-2.7418e-05, -3.1918e-05], |
|
|
[ 7.6115e-05, 2.7344e-05, 2.4825e-05, ..., 2.2009e-05, |
|
|
1.9506e-05, 2.3991e-05], |
|
|
[-1.7416e-04, -4.3243e-05, -3.6567e-05, ..., -5.5730e-05, |
|
|
-6.6042e-05, -7.8082e-05], |
|
|
..., |
|
|
[ 1.4949e-04, 5.6535e-05, 5.2422e-05, ..., 3.9339e-05, |
|
|
4.3750e-05, 5.2392e-05], |
|
|
[ 3.4302e-05, 1.2308e-05, 1.1645e-05, ..., -4.5598e-06, |
|
|
4.0010e-06, 5.4576e-06], |
|
|
[ 2.7195e-05, 5.8621e-05, 6.1750e-05, ..., 8.6352e-06, |
|
|
8.2105e-06, 5.7817e-06]], device='cuda:0') |
|
|
Epoch 62, bias, value: tensor([-0.0326, 0.0417, -0.0571, -0.0207, 0.0475, -0.0307, 0.0555], |
|
|
device='cuda:0'), grad: tensor([-1.1986e-04, 1.8978e-04, -5.1594e-04, 1.7297e-04, 3.4547e-04, |
|
|
1.6943e-05, -8.9169e-05], device='cuda:0') |
|
|
306 |
|
|
0.00040236113724274745 |
|
|
changing lr |
|
|
epoch 61, time 417.60, cls_loss 0.0014 cls_loss_mapping 0.0258 cls_loss_causal 0.5725 re_mapping 0.0409 re_causal 0.0440 |
|
|
Epoch 63, weight, value: tensor([[ 0.2470, 0.2471, 0.2787, ..., -0.0149, -0.0034, -0.0144], |
|
|
[-0.0483, -0.0435, -0.0645, ..., 0.0514, 0.0538, 0.0465], |
|
|
[-0.0899, -0.0830, -0.0975, ..., 0.0381, 0.0214, 0.0152], |
|
|
..., |
|
|
[-0.1060, -0.1090, -0.0679, ..., 0.0181, 0.0672, 0.0509], |
|
|
[-0.0210, -0.0195, -0.0430, ..., -0.1310, -0.1482, -0.1602], |
|
|
[ 0.0780, 0.0794, 0.0704, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[-3.8743e-06, -7.7784e-06, -8.5309e-06, ..., 9.4250e-07, |
|
|
1.3597e-06, 1.5721e-06], |
|
|
[ 1.4007e-05, 4.0494e-06, 3.5055e-06, ..., 4.4778e-06, |
|
|
4.9956e-06, 5.0962e-06], |
|
|
[-1.5117e-05, -8.3074e-07, 8.9593e-07, ..., -2.8815e-06, |
|
|
-3.7625e-06, -4.4890e-06], |
|
|
..., |
|
|
[ 3.1926e-06, 2.3656e-07, -2.8312e-07, ..., 2.5332e-07, |
|
|
3.2783e-07, 6.7800e-07], |
|
|
[-1.6674e-05, -3.1460e-06, -2.5742e-06, ..., -6.2101e-06, |
|
|
-7.0110e-06, -7.0184e-06], |
|
|
[ 1.7866e-05, 6.9812e-06, 6.5118e-06, ..., 4.3884e-06, |
|
|
4.9882e-06, 5.0478e-06]], device='cuda:0') |
|
|
Epoch 63, bias, value: tensor([-0.0326, 0.0417, -0.0570, -0.0206, 0.0473, -0.0306, 0.0555], |
|
|
device='cuda:0'), grad: tensor([ 9.9242e-06, 3.3975e-05, -4.4495e-05, 3.4831e-07, 9.3728e-06, |
|
|
-4.6074e-05, 3.7163e-05], device='cuda:0') |
|
|
306 |
|
|
0.00031882564680131423 |
|
|
changing lr |
|
|
epoch 62, time 418.99, cls_loss 0.0020 cls_loss_mapping 0.0323 cls_loss_causal 0.5851 re_mapping 0.0406 re_causal 0.0436 |
|
|
Epoch 64, weight, value: tensor([[ 0.2469, 0.2471, 0.2787, ..., -0.0149, -0.0034, -0.0144], |
|
|
[-0.0482, -0.0434, -0.0645, ..., 0.0514, 0.0538, 0.0465], |
|
|
[-0.0899, -0.0829, -0.0975, ..., 0.0381, 0.0214, 0.0152], |
|
|
..., |
|
|
[-0.1060, -0.1090, -0.0679, ..., 0.0181, 0.0672, 0.0509], |
|
|
[-0.0210, -0.0195, -0.0430, ..., -0.1310, -0.1481, -0.1602], |
|
|
[ 0.0778, 0.0793, 0.0704, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[ 1.2884e-03, 1.9336e-04, 1.0115e-04, ..., 3.2020e-04, |
|
|
3.5286e-04, 3.9339e-04], |
|
|
[ 6.8521e-04, 1.0777e-04, 5.8651e-05, ..., 1.6892e-04, |
|
|
1.8585e-04, 2.0766e-04], |
|
|
[ 1.1377e-03, 1.6427e-04, 7.5102e-05, ..., 2.8133e-04, |
|
|
3.1304e-04, 3.5000e-04], |
|
|
..., |
|
|
[ 2.3975e-03, 3.2902e-04, 1.3816e-04, ..., 5.9986e-04, |
|
|
6.6805e-04, 7.4720e-04], |
|
|
[-8.9111e-03, -1.2150e-03, -5.0020e-04, ..., -2.2259e-03, |
|
|
-2.4834e-03, -2.7771e-03], |
|
|
[ 3.9291e-04, 1.2249e-05, -3.6687e-05, ..., 1.0353e-04, |
|
|
1.2201e-04, 1.3936e-04]], device='cuda:0') |
|
|
Epoch 64, bias, value: tensor([-0.0327, 0.0418, -0.0569, -0.0207, 0.0473, -0.0304, 0.0553], |
|
|
device='cuda:0'), grad: tensor([ 0.0043, 0.0023, 0.0039, 0.0104, 0.0083, -0.0308, 0.0017], |
|
|
device='cuda:0') |
|
|
306 |
|
|
0.0002447174185242325 |
|
|
changing lr |
|
|
epoch 63, time 414.73, cls_loss 0.0017 cls_loss_mapping 0.0244 cls_loss_causal 0.6037 re_mapping 0.0406 re_causal 0.0438 |
|
|
Epoch 65, weight, value: tensor([[ 0.2470, 0.2471, 0.2787, ..., -0.0149, -0.0034, -0.0144], |
|
|
[-0.0482, -0.0434, -0.0645, ..., 0.0514, 0.0538, 0.0465], |
|
|
[-0.0899, -0.0829, -0.0975, ..., 0.0381, 0.0214, 0.0152], |
|
|
..., |
|
|
[-0.1059, -0.1090, -0.0679, ..., 0.0181, 0.0672, 0.0509], |
|
|
[-0.0209, -0.0195, -0.0429, ..., -0.1309, -0.1480, -0.1601], |
|
|
[ 0.0777, 0.0792, 0.0703, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[-4.4964e-06, -1.0490e-05, -1.0796e-05, ..., 1.2871e-06, |
|
|
1.4622e-06, 2.0340e-06], |
|
|
[ 7.4387e-05, 1.5885e-05, 1.3448e-05, ..., 1.7881e-05, |
|
|
1.8552e-05, 2.2933e-05], |
|
|
[ 9.9063e-05, 2.2203e-05, 1.9148e-05, ..., 2.7061e-05, |
|
|
2.4825e-05, 3.1203e-05], |
|
|
..., |
|
|
[ 2.1532e-05, 5.4277e-06, 4.3362e-06, ..., -1.9260e-06, |
|
|
-1.6876e-06, -8.2701e-07], |
|
|
[ 1.1361e-04, 2.1651e-05, 1.7896e-05, ..., 3.0607e-05, |
|
|
2.9683e-05, 3.7044e-05], |
|
|
[-7.2420e-05, -2.1517e-05, -1.9461e-05, ..., -1.2890e-05, |
|
|
-1.8656e-05, -2.1428e-05]], device='cuda:0') |
|
|
Epoch 65, bias, value: tensor([-0.0326, 0.0418, -0.0570, -0.0208, 0.0472, -0.0302, 0.0552], |
|
|
device='cuda:0'), grad: tensor([ 2.3663e-05, 1.9288e-04, 2.5082e-04, -6.5517e-04, 5.3644e-05, |
|
|
3.0255e-04, -1.6916e-04], device='cuda:0') |
|
|
306 |
|
|
0.0001801856965207339 |
|
|
changing lr |
|
|
epoch 64, time 413.22, cls_loss 0.0016 cls_loss_mapping 0.0248 cls_loss_causal 0.6161 re_mapping 0.0405 re_causal 0.0437 |
|
|
Epoch 66, weight, value: tensor([[ 0.2470, 0.2471, 0.2787, ..., -0.0149, -0.0034, -0.0144], |
|
|
[-0.0481, -0.0434, -0.0645, ..., 0.0513, 0.0538, 0.0465], |
|
|
[-0.0898, -0.0829, -0.0974, ..., 0.0381, 0.0214, 0.0152], |
|
|
..., |
|
|
[-0.1059, -0.1090, -0.0679, ..., 0.0180, 0.0672, 0.0509], |
|
|
[-0.0209, -0.0195, -0.0430, ..., -0.1309, -0.1480, -0.1601], |
|
|
[ 0.0777, 0.0792, 0.0703, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[-7.0512e-05, -4.7058e-05, -5.0157e-05, ..., -8.2478e-06, |
|
|
-8.2850e-06, -9.5665e-06], |
|
|
[ 2.0787e-05, 9.5218e-06, 9.8273e-06, ..., 3.1665e-06, |
|
|
3.2280e-06, 4.0270e-06], |
|
|
[-4.8801e-06, 1.4380e-06, 2.3991e-06, ..., -1.5441e-06, |
|
|
-1.7043e-06, -3.0026e-06], |
|
|
..., |
|
|
[ 7.8917e-05, 3.0667e-05, 3.0696e-05, ..., 1.4104e-05, |
|
|
1.4886e-05, 1.8507e-05], |
|
|
[ 4.3333e-05, 1.1683e-05, 1.1191e-05, ..., 9.6634e-06, |
|
|
9.5069e-06, 1.1683e-05], |
|
|
[-9.7632e-05, -2.0340e-05, -1.8418e-05, ..., -2.1979e-05, |
|
|
-2.2665e-05, -2.7910e-05]], device='cuda:0') |
|
|
Epoch 66, bias, value: tensor([-0.0326, 0.0419, -0.0568, -0.0208, 0.0471, -0.0302, 0.0551], |
|
|
device='cuda:0'), grad: tensor([-8.1718e-05, 3.6746e-05, -2.0251e-05, 5.1826e-05, 1.5628e-04, |
|
|
1.0252e-04, -2.4605e-04], device='cuda:0') |
|
|
306 |
|
|
0.000125360439090882 |
|
|
changing lr |
|
|
epoch 65, time 415.02, cls_loss 0.0015 cls_loss_mapping 0.0254 cls_loss_causal 0.5788 re_mapping 0.0404 re_causal 0.0434 |
|
|
Epoch 67, weight, value: tensor([[ 0.2470, 0.2471, 0.2787, ..., -0.0149, -0.0034, -0.0144], |
|
|
[-0.0481, -0.0434, -0.0645, ..., 0.0513, 0.0537, 0.0465], |
|
|
[-0.0898, -0.0829, -0.0974, ..., 0.0381, 0.0214, 0.0152], |
|
|
..., |
|
|
[-0.1060, -0.1090, -0.0679, ..., 0.0180, 0.0672, 0.0509], |
|
|
[-0.0209, -0.0195, -0.0430, ..., -0.1309, -0.1480, -0.1600], |
|
|
[ 0.0777, 0.0792, 0.0702, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[ 4.1676e-04, 1.7440e-04, 1.6701e-04, ..., 7.9632e-05, |
|
|
8.6963e-05, 9.7990e-05], |
|
|
[ 1.0794e-04, 4.8518e-05, 4.6700e-05, ..., 2.0698e-05, |
|
|
2.1353e-05, 2.3842e-05], |
|
|
[ 1.4484e-04, 6.3241e-05, 6.0648e-05, ..., 2.8014e-05, |
|
|
3.1024e-05, 3.4124e-05], |
|
|
..., |
|
|
[-2.1820e-03, -9.4128e-04, -9.0265e-04, ..., -4.2129e-04, |
|
|
-4.6349e-04, -5.1355e-04], |
|
|
[ 8.1873e-04, 3.5405e-04, 3.3951e-04, ..., 1.5831e-04, |
|
|
1.7440e-04, 1.9288e-04], |
|
|
[ 6.5470e-04, 2.8372e-04, 2.7227e-04, ..., 1.2708e-04, |
|
|
1.4138e-04, 1.5545e-04]], device='cuda:0') |
|
|
Epoch 67, bias, value: tensor([-0.0325, 0.0418, -0.0568, -0.0208, 0.0470, -0.0302, 0.0551], |
|
|
device='cuda:0'), grad: tensor([ 9.2602e-04, 2.3615e-04, 3.1257e-04, 8.7500e-05, -4.7569e-03, |
|
|
1.7815e-03, 1.4172e-03], device='cuda:0') |
|
|
306 |
|
|
8.03520570068517e-05 |
|
|
changing lr |
|
|
epoch 66, time 411.65, cls_loss 0.0014 cls_loss_mapping 0.0245 cls_loss_causal 0.5859 re_mapping 0.0403 re_causal 0.0433 |
|
|
Epoch 68, weight, value: tensor([[ 0.2470, 0.2471, 0.2787, ..., -0.0149, -0.0034, -0.0143], |
|
|
[-0.0482, -0.0434, -0.0645, ..., 0.0513, 0.0537, 0.0464], |
|
|
[-0.0898, -0.0829, -0.0974, ..., 0.0381, 0.0214, 0.0152], |
|
|
..., |
|
|
[-0.1060, -0.1089, -0.0679, ..., 0.0180, 0.0671, 0.0509], |
|
|
[-0.0209, -0.0195, -0.0430, ..., -0.1309, -0.1480, -0.1600], |
|
|
[ 0.0776, 0.0792, 0.0702, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[ 6.9320e-05, 2.5973e-05, 2.5213e-05, ..., 1.3977e-05, |
|
|
1.4402e-05, 1.9848e-05], |
|
|
[ 9.1970e-05, 1.9833e-05, 1.6674e-05, ..., 5.2363e-05, |
|
|
5.4508e-05, 6.0260e-05], |
|
|
[ 1.2094e-04, 4.5180e-05, 4.3631e-05, ..., 2.4304e-05, |
|
|
2.4974e-05, 3.4660e-05], |
|
|
..., |
|
|
[ 1.0721e-05, 3.2503e-06, 2.9113e-06, ..., 4.1090e-06, |
|
|
4.3735e-06, 5.1148e-06], |
|
|
[-1.9260e-06, 8.3819e-08, 8.9779e-07, ..., 8.9593e-07, |
|
|
7.9162e-07, 1.0133e-06], |
|
|
[-2.2829e-04, -8.7917e-05, -8.5890e-05, ..., -4.2439e-05, |
|
|
-4.3660e-05, -6.2108e-05]], device='cuda:0') |
|
|
Epoch 68, bias, value: tensor([-0.0325, 0.0418, -0.0568, -0.0208, 0.0470, -0.0302, 0.0551], |
|
|
device='cuda:0'), grad: tensor([ 1.3232e-04, 2.3067e-04, 2.3127e-04, -1.8311e-04, 2.3663e-05, |
|
|
-9.8050e-06, -4.2439e-04], device='cuda:0') |
|
|
306 |
|
|
4.5251191160326525e-05 |
|
|
changing lr |
|
|
epoch 67, time 411.01, cls_loss 0.0014 cls_loss_mapping 0.0238 cls_loss_causal 0.6263 re_mapping 0.0403 re_causal 0.0434 |
|
|
Epoch 69, weight, value: tensor([[ 0.2470, 0.2471, 0.2787, ..., -0.0149, -0.0034, -0.0143], |
|
|
[-0.0482, -0.0434, -0.0645, ..., 0.0513, 0.0537, 0.0464], |
|
|
[-0.0898, -0.0829, -0.0974, ..., 0.0381, 0.0214, 0.0152], |
|
|
..., |
|
|
[-0.1060, -0.1089, -0.0679, ..., 0.0180, 0.0671, 0.0509], |
|
|
[-0.0209, -0.0195, -0.0430, ..., -0.1309, -0.1479, -0.1600], |
|
|
[ 0.0776, 0.0792, 0.0702, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[-1.0309e-03, -6.2466e-04, -6.1512e-04, ..., -9.6560e-05, |
|
|
-1.5414e-04, -1.9073e-04], |
|
|
[ 5.1165e-04, 2.3520e-04, 2.2519e-04, ..., 1.5378e-04, |
|
|
1.6558e-04, 1.8859e-04], |
|
|
[ 3.4738e-04, 1.6952e-04, 1.6379e-04, ..., 7.6950e-05, |
|
|
8.8632e-05, 1.0359e-04], |
|
|
..., |
|
|
[-6.6471e-04, -1.8024e-04, -1.5926e-04, ..., -3.7694e-04, |
|
|
-3.6836e-04, -4.0412e-04], |
|
|
[ 2.2173e-04, 7.6950e-05, 7.1943e-05, ..., 7.9215e-05, |
|
|
8.2910e-05, 9.3460e-05], |
|
|
[ 4.4799e-04, 2.6631e-04, 2.6083e-04, ..., 8.7559e-05, |
|
|
1.0908e-04, 1.2445e-04]], device='cuda:0') |
|
|
Epoch 69, bias, value: tensor([-0.0325, 0.0418, -0.0568, -0.0208, 0.0470, -0.0301, 0.0551], |
|
|
device='cuda:0'), grad: tensor([-0.0012, 0.0010, 0.0006, 0.0004, -0.0018, 0.0005, 0.0005], |
|
|
device='cuda:0') |
|
|
306 |
|
|
2.0128530023804673e-05 |
|
|
changing lr |
|
|
epoch 68, time 410.85, cls_loss 0.0015 cls_loss_mapping 0.0246 cls_loss_causal 0.5761 re_mapping 0.0403 re_causal 0.0433 |
|
|
Epoch 70, weight, value: tensor([[ 0.2470, 0.2471, 0.2787, ..., -0.0149, -0.0034, -0.0143], |
|
|
[-0.0482, -0.0434, -0.0645, ..., 0.0513, 0.0537, 0.0464], |
|
|
[-0.0898, -0.0829, -0.0974, ..., 0.0381, 0.0214, 0.0152], |
|
|
..., |
|
|
[-0.1059, -0.1089, -0.0679, ..., 0.0180, 0.0671, 0.0509], |
|
|
[-0.0209, -0.0195, -0.0430, ..., -0.1309, -0.1479, -0.1600], |
|
|
[ 0.0776, 0.0791, 0.0702, ..., -0.0262, -0.0192, 0.0041]], |
|
|
device='cuda:0'), grad: tensor([[ 3.0696e-05, 8.4490e-06, 8.9705e-06, ..., 7.9796e-06, |
|
|
8.0392e-06, 9.2313e-06], |
|
|
[ 2.1577e-05, 5.4277e-06, 5.2042e-06, ..., 7.7486e-06, |
|
|
7.9274e-06, 8.6129e-06], |
|
|
[ 1.7853e-06, 1.0766e-06, 1.6764e-08, ..., 1.8803e-06, |
|
|
2.0973e-06, 1.9260e-06], |
|
|
..., |
|
|
[ 7.2084e-06, 2.0005e-06, 1.8878e-06, ..., 1.7714e-06, |
|
|
1.9129e-06, 2.2277e-06], |
|
|
[-7.9811e-05, -2.2769e-05, -2.2128e-05, ..., -2.4080e-05, |
|
|
-2.4781e-05, -2.7478e-05], |
|
|
[ 1.5169e-05, 3.6657e-06, 3.6526e-06, ..., 5.2936e-06, |
|
|
5.4389e-06, 5.9977e-06]], device='cuda:0') |
|
|
Epoch 70, bias, value: tensor([-0.0325, 0.0418, -0.0568, -0.0208, 0.0470, -0.0301, 0.0551], |
|
|
device='cuda:0'), grad: tensor([ 8.1778e-05, 6.2466e-05, 9.5889e-06, 1.1146e-05, 2.0608e-05, |
|
|
-2.3401e-04, 4.8488e-05], device='cuda:0') |
|
|
306 |
|
|
5.034667293427056e-06 |
|
|
changing lr |
|
|
epoch 69, time 414.33, cls_loss 0.0018 cls_loss_mapping 0.0269 cls_loss_causal 0.6085 re_mapping 0.0402 re_causal 0.0433 |
|
|
---------------------saving last model at epoch 69---------------------------------------------------- |
|
|
/home/yuqian_fu |
|
|
{'gpu': '0', 'svroot': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//art_painting/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1', 'source_domain': 'art_painting', 'svpath': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//art_painting/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1/art_painting_16factor_best_test_check.csv', 'factor_num': 16, 'epoch': 'best', 'stride': 5, 'eval_mapping': False, 'network': 'resnet18'} |
|
|
-------------------------------------loading pretrain weights---------------------------------- |
|
|
loading weight of best |
|
|
randm: False |
|
|
stride: 5 |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
columns: ['art_painting', 'cartoon', 'photo', 'sketch'] |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/art_painting_test.hdf5 torch.Size([2048, 3, 227, 227]) torch.Size([2048]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/cartoon_test.hdf5 torch.Size([2344, 3, 227, 227]) torch.Size([2344]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/photo_test.hdf5 torch.Size([1670, 3, 227, 227]) torch.Size([1670]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/sketch_test.hdf5 torch.Size([3929, 3, 227, 227]) torch.Size([3929]) |
|
|
art_painting cartoon photo sketch Avg |
|
|
w/o do (original x) 99.023438 69.795222 90.778443 73.199287 77.924317 |
|
|
art_painting cartoon photo sketch Avg |
|
|
do 99.023438 72.1843 92.035928 72.410283 78.876837 |
|
|
|