|
|
/home/yuqian_fu |
|
|
here1 |
|
|
here2 |
|
|
{'gpu': '0', 'data': 'photo', 'ntr': None, 'translate': None, 'autoaug': 'CA_multiple', 'n': 3, 'stride': 5, 'factor_num': 16, 'epochs': 70, 'nbatch': 100, 'batchsize': 6, 'lr': 0.01, 'lr_scheduler': 'cosine', 'svroot': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//photo/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1', 'clsadapt': True, 'lambda_causal': 1.0, 'lambda_re': 1.0, 'randm': True, 'randn': True, 'network': 'resnet18'} |
|
|
stride: 5 |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/photo_train.hdf5 torch.Size([1499, 3, 227, 227]) torch.Size([1499]) |
|
|
--------------------------CA_multiple-------------------------- |
|
|
---------------------------16 factors----------------- |
|
|
randm: True |
|
|
randn: True |
|
|
n: 3 |
|
|
randm: False |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/photo_val.hdf5 torch.Size([171, 3, 227, 227]) torch.Size([171]) |
|
|
-------------------------------------loading pretrain weights---------------------------------- |
|
|
Epoch 1, weight, value: tensor([[ 1.3781e-02, -2.1813e-02, -1.7643e-02, ..., -3.4786e-03, |
|
|
7.0747e-03, 1.6051e-02], |
|
|
[ 1.6921e-02, 1.6796e-02, 3.9765e-03, ..., 6.6386e-03, |
|
|
2.4196e-03, 9.7383e-03], |
|
|
[ 1.8894e-03, -6.7549e-03, 2.1032e-02, ..., -1.9916e-02, |
|
|
-1.0781e-02, 1.7924e-02], |
|
|
..., |
|
|
[ 4.6711e-04, -7.8218e-03, 8.8535e-03, ..., -1.7695e-02, |
|
|
-1.9742e-02, -5.5186e-03], |
|
|
[-1.4236e-02, 1.5676e-02, -6.0038e-03, ..., 1.4177e-02, |
|
|
1.0025e-02, -3.0311e-03], |
|
|
[-1.4947e-02, 1.6332e-02, 1.3555e-02, ..., 1.0778e-02, |
|
|
9.8178e-05, -1.3844e-02]], device='cuda:0'), grad: None |
|
|
Epoch 1, bias, value: tensor([ 0.0165, -0.0171, 0.0209, -0.0155, 0.0020, -0.0164, 0.0005], |
|
|
device='cuda:0'), grad: None |
|
|
249 |
|
|
0.01 |
|
|
changing lr |
|
|
---------------------saving model at epoch 0---------------------------------------------------- |
|
|
epoch 0, time 348.21, cls_loss 9.8684 cls_loss_mapping 1.6022 cls_loss_causal 1.9054 re_mapping 1.0159 re_causal 1.0189 |
|
|
Epoch 2, weight, value: tensor([[ 0.0774, 0.0641, 0.0080, ..., -0.0354, -0.0471, -0.0399], |
|
|
[-0.0260, -0.0458, -0.0257, ..., -0.0312, -0.0105, -0.0277], |
|
|
[-0.0029, -0.0232, 0.0263, ..., -0.0459, -0.0319, -0.0043], |
|
|
..., |
|
|
[ 0.0248, 0.0389, 0.0264, ..., 0.0109, -0.0060, 0.0109], |
|
|
[-0.0430, -0.0148, -0.0446, ..., -0.0595, -0.0504, -0.0497], |
|
|
[ 0.0564, 0.0909, 0.0901, ..., 0.0745, 0.0518, 0.0574]], |
|
|
device='cuda:0'), grad: tensor([[-1.0231e-02, -3.4180e-03, -1.1459e-02, ..., 7.9918e-04, |
|
|
-4.7612e-04, -1.6251e-03], |
|
|
[ 1.5697e-03, 1.3151e-03, 1.7748e-03, ..., 1.2712e-03, |
|
|
1.7195e-03, 1.3046e-03], |
|
|
[ 7.8049e-03, 2.6760e-03, 9.8877e-03, ..., 4.4975e-03, |
|
|
4.5662e-03, 3.6087e-03], |
|
|
..., |
|
|
[ 2.2011e-03, 1.6279e-03, 2.6894e-03, ..., 2.0332e-03, |
|
|
2.4071e-03, 1.8253e-03], |
|
|
[ 1.2942e-05, 4.3437e-06, 1.5251e-05, ..., 2.3097e-06, |
|
|
3.3155e-06, 3.5837e-06], |
|
|
[-3.1967e-03, -2.8076e-03, -3.6583e-03, ..., -2.7180e-03, |
|
|
-3.6221e-03, -2.7809e-03]], device='cuda:0') |
|
|
Epoch 2, bias, value: tensor([ 0.0328, -0.0842, 0.2049, 0.0465, 0.0064, -0.1213, -0.0942], |
|
|
device='cuda:0'), grad: tensor([-0.0866, 0.0029, 0.1262, -0.0544, 0.0152, 0.0001, -0.0035], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009994965332706574 |
|
|
changing lr |
|
|
---------------------saving model at epoch 1---------------------------------------------------- |
|
|
epoch 1, time 347.28, cls_loss 3.2468 cls_loss_mapping 1.3975 cls_loss_causal 1.8708 re_mapping 0.7421 re_causal 0.7474 |
|
|
Epoch 3, weight, value: tensor([[ 0.0785, 0.0747, 0.0011, ..., -0.0510, -0.0911, -0.0519], |
|
|
[-0.0322, -0.0574, -0.0140, ..., -0.0477, -0.0259, -0.0421], |
|
|
[-0.0056, -0.0181, 0.0213, ..., -0.0507, -0.0635, -0.0161], |
|
|
..., |
|
|
[-0.0080, 0.0068, -0.0124, ..., -0.0106, -0.0226, -0.0205], |
|
|
[-0.0517, -0.0187, -0.0551, ..., -0.0376, -0.0179, -0.0239], |
|
|
[ 0.0674, 0.1059, 0.1026, ..., 0.1048, 0.0980, 0.0961]], |
|
|
device='cuda:0'), grad: tensor([[ 3.9101e-05, 3.8743e-05, 1.3423e-04, ..., 1.6487e-04, |
|
|
1.4436e-04, 1.5497e-04], |
|
|
[ 1.8263e-03, 1.4849e-03, 3.7289e-03, ..., 2.7828e-03, |
|
|
2.1725e-03, 2.8553e-03], |
|
|
[ 4.3368e-04, 9.7847e-04, 3.6945e-03, ..., 7.3967e-03, |
|
|
7.4463e-03, 7.1487e-03], |
|
|
..., |
|
|
[-2.4109e-03, -1.7910e-03, -5.5389e-03, ..., -3.6659e-03, |
|
|
-2.2316e-03, -3.3226e-03], |
|
|
[ 1.4687e-04, 1.4591e-04, 7.2622e-04, ..., 9.2077e-04, |
|
|
7.4911e-04, 8.0156e-04], |
|
|
[ 3.0994e-06, 2.9132e-06, 1.0602e-05, ..., 1.2219e-05, |
|
|
1.0259e-05, 1.1235e-05]], device='cuda:0') |
|
|
Epoch 3, bias, value: tensor([-0.0016, -0.0449, 0.2193, 0.0254, 0.0386, -0.1846, -0.0613], |
|
|
device='cuda:0'), grad: tensor([ 0.0033, 0.0715, 0.1316, -0.1288, -0.0958, 0.0180, 0.0003], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009979871469976196 |
|
|
changing lr |
|
|
epoch 2, time 346.20, cls_loss 3.2564 cls_loss_mapping 1.4641 cls_loss_causal 1.8684 re_mapping 0.6795 re_causal 0.6891 |
|
|
Epoch 4, weight, value: tensor([[ 0.1029, 0.0873, 0.0093, ..., -0.0784, -0.1080, -0.0719], |
|
|
[-0.0437, -0.0644, -0.0257, ..., -0.0425, -0.0232, -0.0373], |
|
|
[-0.0310, -0.0245, -0.0142, ..., -0.0414, -0.0979, -0.0245], |
|
|
..., |
|
|
[-0.0239, -0.0071, -0.0055, ..., -0.0321, -0.0321, -0.0325], |
|
|
[-0.0364, -0.0142, -0.0472, ..., -0.0315, -0.0031, -0.0247], |
|
|
[ 0.0723, 0.1110, 0.0884, ..., 0.1055, 0.1089, 0.1103]], |
|
|
device='cuda:0'), grad: tensor([[-7.9393e-04, -1.0939e-03, -4.1733e-03, ..., -1.1044e-03, |
|
|
-8.3113e-04, -1.1721e-03], |
|
|
[ 2.8372e-04, 3.9291e-04, 2.0332e-03, ..., 5.7697e-04, |
|
|
4.7493e-04, 6.3467e-04], |
|
|
[ 1.4377e-04, 1.9968e-04, 1.2112e-03, ..., 3.5334e-04, |
|
|
3.0017e-04, 3.9363e-04], |
|
|
..., |
|
|
[ 4.0245e-04, 5.5695e-04, 2.8667e-03, ..., 8.1253e-04, |
|
|
6.6805e-04, 8.9312e-04], |
|
|
[ 6.9797e-05, 9.6500e-05, 5.6171e-04, ..., 1.6296e-04, |
|
|
1.3769e-04, 1.8096e-04], |
|
|
[-1.5214e-05, 1.3635e-06, 7.7248e-05, ..., 1.8284e-05, |
|
|
-2.4140e-06, 2.1279e-05]], device='cuda:0') |
|
|
Epoch 4, bias, value: tensor([-0.0196, 0.1142, 0.2746, 0.0413, -0.0684, -0.2400, -0.1112], |
|
|
device='cuda:0'), grad: tensor([-0.0831, 0.0550, 0.0363, -0.1049, 0.0771, 0.0164, 0.0032], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009954748808839675 |
|
|
changing lr |
|
|
epoch 3, time 349.42, cls_loss 2.7298 cls_loss_mapping 1.5867 cls_loss_causal 2.0432 re_mapping 0.5943 re_causal 0.6005 |
|
|
Epoch 5, weight, value: tensor([[ 0.1081, 0.0900, 0.0151, ..., -0.0868, -0.1245, -0.0882], |
|
|
[-0.0605, -0.0741, -0.0279, ..., -0.0441, -0.0276, -0.0324], |
|
|
[-0.0019, -0.0298, 0.0355, ..., -0.0761, -0.1008, -0.0539], |
|
|
..., |
|
|
[-0.0150, -0.0192, -0.0049, ..., -0.0336, -0.0156, -0.0245], |
|
|
[-0.0533, -0.0477, -0.0658, ..., -0.0337, -0.0129, -0.0378], |
|
|
[ 0.1000, 0.1785, 0.0972, ..., 0.1097, 0.1032, 0.1183]], |
|
|
device='cuda:0'), grad: tensor([[-1.2827e-03, -1.2982e-04, -2.0485e-03, ..., -5.8632e-03, |
|
|
-4.2610e-03, -4.8866e-03], |
|
|
[ 7.7248e-04, 4.5151e-05, 1.0691e-03, ..., 3.7918e-03, |
|
|
3.3112e-03, 3.3703e-03], |
|
|
[ 1.0872e-04, 1.4193e-05, 1.6677e-04, ..., 4.7517e-04, |
|
|
3.9291e-04, 4.0770e-04], |
|
|
..., |
|
|
[ 5.5408e-04, 1.0329e-04, 8.8072e-04, ..., 2.1973e-03, |
|
|
1.8730e-03, 1.8682e-03], |
|
|
[ 1.1146e-05, 1.7807e-06, 1.7256e-05, ..., 4.6402e-05, |
|
|
3.9756e-05, 3.9905e-05], |
|
|
[ 1.4134e-05, 2.3544e-06, 1.9610e-05, ..., 5.8502e-05, |
|
|
6.0886e-05, 5.3585e-05]], device='cuda:0') |
|
|
Epoch 5, bias, value: tensor([ 0.0686, 0.2019, 0.1705, 0.0255, 0.1121, -0.3466, -0.2412], |
|
|
device='cuda:0'), grad: tensor([-0.1044, 0.0511, 0.0085, -0.0029, 0.0458, 0.0009, 0.0010], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009919647942993149 |
|
|
changing lr |
|
|
epoch 4, time 346.66, cls_loss 3.5903 cls_loss_mapping 1.6275 cls_loss_causal 2.0572 re_mapping 0.5120 re_causal 0.5143 |
|
|
Epoch 6, weight, value: tensor([[ 0.0966, 0.1068, 0.0187, ..., -0.0904, -0.1378, -0.0916], |
|
|
[-0.0324, -0.0771, 0.0030, ..., -0.0663, -0.0303, -0.0512], |
|
|
[ 0.0144, -0.0241, 0.0409, ..., -0.0741, -0.0996, -0.0597], |
|
|
..., |
|
|
[-0.0197, -0.0390, -0.0097, ..., -0.0399, -0.0355, -0.0279], |
|
|
[-0.0444, -0.0515, -0.0625, ..., -0.0262, 0.0110, -0.0326], |
|
|
[ 0.0648, 0.2060, 0.0403, ..., 0.1140, 0.0686, 0.1289]], |
|
|
device='cuda:0'), grad: tensor([[ 3.0975e-03, 6.9201e-05, 4.6806e-03, ..., 4.5848e-04, |
|
|
1.0166e-03, 3.1519e-04], |
|
|
[ 1.7853e-02, 2.6822e-04, 2.4368e-02, ..., 1.7824e-03, |
|
|
5.9357e-03, 9.9754e-04], |
|
|
[-3.5362e-03, -1.0973e-04, -6.5269e-03, ..., -1.0843e-03, |
|
|
3.8576e-04, 3.3927e-04], |
|
|
..., |
|
|
[-1.7624e-02, -7.7367e-05, -1.9379e-02, ..., 6.7770e-05, |
|
|
-6.5308e-03, 1.6078e-05], |
|
|
[-3.2139e-03, -2.4211e-04, -8.5297e-03, ..., -1.7824e-03, |
|
|
-1.9932e-03, -2.1458e-03], |
|
|
[ 1.1911e-03, 2.1294e-05, 1.6680e-03, ..., 1.2493e-04, |
|
|
4.1246e-04, 9.4652e-05]], device='cuda:0') |
|
|
Epoch 6, bias, value: tensor([ 0.0241, 0.2085, 0.0226, 0.1522, 0.1168, -0.1597, -0.3738], |
|
|
device='cuda:0'), grad: tensor([ 0.0638, 0.2766, -0.0494, 0.0591, -0.0909, -0.2791, 0.0196], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009874639560909117 |
|
|
changing lr |
|
|
epoch 5, time 347.35, cls_loss 3.8974 cls_loss_mapping 1.7399 cls_loss_causal 2.0707 re_mapping 0.4239 re_causal 0.4300 |
|
|
Epoch 7, weight, value: tensor([[ 0.0807, 0.1011, 0.0078, ..., -0.0864, -0.1360, -0.0869], |
|
|
[-0.0310, -0.0750, 0.0037, ..., -0.0671, -0.0333, -0.0504], |
|
|
[ 0.0193, -0.0341, 0.0472, ..., -0.1011, -0.0952, -0.0832], |
|
|
..., |
|
|
[-0.0081, -0.0604, 0.0070, ..., -0.0446, -0.0309, -0.0428], |
|
|
[-0.0455, -0.0510, -0.0497, ..., -0.0187, 0.0106, -0.0391], |
|
|
[ 0.0654, 0.2450, 0.0348, ..., 0.1289, 0.0681, 0.1527]], |
|
|
device='cuda:0'), grad: tensor([[ 2.0275e-03, 4.0388e-04, 5.3253e-03, ..., 4.7417e-03, |
|
|
5.2872e-03, 4.8218e-03], |
|
|
[-2.3365e-03, -5.3227e-05, -5.9280e-03, ..., -1.7691e-03, |
|
|
-9.7513e-04, -1.3580e-03], |
|
|
[ 9.0122e-04, 1.6940e-04, 2.2984e-03, ..., 1.9093e-03, |
|
|
2.0332e-03, 1.8892e-03], |
|
|
..., |
|
|
[ 1.4031e-04, 2.3878e-04, 8.2350e-04, ..., -4.3373e-03, |
|
|
-4.6272e-03, -4.6959e-03], |
|
|
[-1.5945e-03, -9.1267e-04, -4.8599e-03, ..., -2.5234e-03, |
|
|
-3.9253e-03, -2.6951e-03], |
|
|
[ 3.6925e-05, 6.7800e-06, 9.6202e-05, ..., 7.4565e-05, |
|
|
8.3208e-05, 7.4804e-05]], device='cuda:0') |
|
|
Epoch 7, bias, value: tensor([-0.0282, 0.2684, 0.0592, 0.0523, 0.3811, -0.0482, -0.6935], |
|
|
device='cuda:0'), grad: tensor([ 0.1420, -0.1486, 0.0656, 0.0565, 0.0802, -0.1981, 0.0025], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009819814303479266 |
|
|
changing lr |
|
|
epoch 6, time 348.88, cls_loss 2.1611 cls_loss_mapping 1.5626 cls_loss_causal 2.0639 re_mapping 0.4027 re_causal 0.4165 |
|
|
Epoch 8, weight, value: tensor([[ 0.0680, 0.0850, -0.0052, ..., -0.0881, -0.1487, -0.0975], |
|
|
[-0.0428, -0.0737, 0.0020, ..., -0.0586, -0.0247, -0.0390], |
|
|
[ 0.0264, -0.0334, 0.0473, ..., -0.1011, -0.1059, -0.0839], |
|
|
..., |
|
|
[-0.0076, -0.0591, 0.0107, ..., -0.0418, -0.0316, -0.0460], |
|
|
[-0.0439, -0.0489, -0.0498, ..., -0.0385, -0.0017, -0.0530], |
|
|
[ 0.0812, 0.2544, 0.0455, ..., 0.1305, 0.0698, 0.1522]], |
|
|
device='cuda:0'), grad: tensor([[ 5.3692e-04, 2.7537e-05, 2.0752e-03, ..., -8.0633e-04, |
|
|
-4.9686e-04, -8.4734e-04], |
|
|
[-4.8578e-05, 6.2227e-05, -8.9824e-05, ..., 7.8249e-04, |
|
|
6.9737e-06, 7.3862e-04], |
|
|
[ 3.7956e-04, 1.3542e-04, 1.1082e-03, ..., 8.0729e-04, |
|
|
7.6675e-04, 7.0953e-04], |
|
|
..., |
|
|
[-6.8426e-04, -2.0921e-05, -2.8839e-03, ..., -1.2426e-03, |
|
|
-8.3065e-04, -1.3056e-03], |
|
|
[-5.6648e-04, -3.2020e-04, -1.4095e-03, ..., -2.0278e-04, |
|
|
-7.4446e-05, 1.4007e-04], |
|
|
[ 1.3745e-04, 3.2336e-05, 4.5538e-04, ..., 2.6965e-04, |
|
|
2.5702e-04, 2.5105e-04]], device='cuda:0') |
|
|
Epoch 8, bias, value: tensor([ 0.1659, 0.0559, 0.0831, -0.0525, 0.2321, 0.0564, -0.5498], |
|
|
device='cuda:0'), grad: tensor([ 0.0200, 0.0316, 0.0316, 0.0189, -0.0889, -0.0259, 0.0126], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009755282581475767 |
|
|
changing lr |
|
|
epoch 7, time 347.51, cls_loss 1.8054 cls_loss_mapping 1.4719 cls_loss_causal 1.9194 re_mapping 0.3440 re_causal 0.3578 |
|
|
Epoch 9, weight, value: tensor([[ 0.0677, 0.0792, 0.0004, ..., -0.0899, -0.1448, -0.1001], |
|
|
[-0.0454, -0.0682, -0.0072, ..., -0.0556, -0.0266, -0.0361], |
|
|
[ 0.0299, -0.0330, 0.0590, ..., -0.0957, -0.0870, -0.0778], |
|
|
..., |
|
|
[-0.0150, -0.0534, 0.0009, ..., -0.0312, -0.0406, -0.0365], |
|
|
[-0.0395, -0.0464, -0.0477, ..., -0.0495, -0.0057, -0.0608], |
|
|
[ 0.0828, 0.2522, 0.0470, ..., 0.1284, 0.0701, 0.1498]], |
|
|
device='cuda:0'), grad: tensor([[ 9.4354e-05, 2.9802e-08, 5.6028e-04, ..., 2.4009e-04, |
|
|
2.0373e-04, 1.9014e-04], |
|
|
[ 3.6687e-05, 0.0000e+00, 8.0585e-05, ..., 9.3341e-05, |
|
|
6.4671e-05, 7.3910e-05], |
|
|
[ 8.9228e-05, 0.0000e+00, 5.1832e-04, ..., 2.2709e-04, |
|
|
1.9145e-04, 1.7989e-04], |
|
|
..., |
|
|
[ 1.3673e-04, 0.0000e+00, 8.4925e-04, ..., 3.4785e-04, |
|
|
2.9922e-04, 2.7561e-04], |
|
|
[-4.1175e-04, 0.0000e+00, -2.3327e-03, ..., -1.0481e-03, |
|
|
-8.7738e-04, -8.2970e-04], |
|
|
[ 1.2487e-05, -2.9802e-08, 7.4208e-05, ..., 3.1859e-05, |
|
|
2.6986e-05, 2.5228e-05]], device='cuda:0') |
|
|
Epoch 9, bias, value: tensor([ 0.2562, 0.0744, 0.0247, -0.0490, 0.3360, -0.0729, -0.5780], |
|
|
device='cuda:0'), grad: tensor([ 0.0390, -0.0083, 0.0350, 0.0175, 0.0630, -0.1512, 0.0052], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009681174353198686 |
|
|
changing lr |
|
|
epoch 8, time 344.34, cls_loss 1.1558 cls_loss_mapping 1.3434 cls_loss_causal 1.8095 re_mapping 0.3223 re_causal 0.3378 |
|
|
Epoch 10, weight, value: tensor([[ 0.0637, 0.0797, -0.0172, ..., -0.0936, -0.1668, -0.1064], |
|
|
[-0.0445, -0.0665, -0.0026, ..., -0.0505, -0.0229, -0.0326], |
|
|
[ 0.0233, -0.0360, 0.0487, ..., -0.1026, -0.0940, -0.0814], |
|
|
..., |
|
|
[-0.0145, -0.0573, 0.0077, ..., -0.0440, -0.0493, -0.0489], |
|
|
[-0.0382, -0.0424, -0.0457, ..., -0.0357, 0.0062, -0.0461], |
|
|
[ 0.0888, 0.2514, 0.0565, ..., 0.1306, 0.0868, 0.1515]], |
|
|
device='cuda:0'), grad: tensor([[-1.7223e-03, 3.1531e-05, -6.3858e-03, ..., -2.5387e-03, |
|
|
-4.1237e-03, -2.4738e-03], |
|
|
[ 3.2926e-04, 4.4525e-05, 6.4516e-04, ..., -1.1349e-03, |
|
|
-1.8263e-03, -1.7786e-03], |
|
|
[ 6.6805e-04, 3.7193e-05, 2.1992e-03, ..., 1.1168e-03, |
|
|
1.1148e-03, 6.0797e-04], |
|
|
..., |
|
|
[-7.1383e-04, -2.6226e-04, -6.6233e-04, ..., -3.1734e-04, |
|
|
3.4962e-03, 3.0727e-03], |
|
|
[ 7.5817e-04, 8.6546e-05, 2.1133e-03, ..., 1.5717e-03, |
|
|
4.4703e-04, 1.1426e-04], |
|
|
[ 1.2231e-04, 9.2536e-06, 3.8886e-04, ..., 2.2089e-04, |
|
|
1.8620e-04, 1.0014e-04]], device='cuda:0') |
|
|
Epoch 10, bias, value: tensor([ 0.1999, 0.0502, -0.0396, 0.0345, 0.3870, -0.0554, -0.5850], |
|
|
device='cuda:0'), grad: tensor([-0.1238, 0.0103, 0.0461, 0.0386, -0.0331, 0.0534, 0.0084], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009597638862757255 |
|
|
changing lr |
|
|
epoch 9, time 346.82, cls_loss 1.0152 cls_loss_mapping 1.2447 cls_loss_causal 1.7866 re_mapping 0.2933 re_causal 0.2910 |
|
|
Epoch 11, weight, value: tensor([[ 6.1647e-02, 7.1985e-02, -6.6718e-03, ..., -8.4501e-02, |
|
|
-1.5132e-01, -9.8548e-02], |
|
|
[-4.4163e-02, -6.3831e-02, -2.0746e-05, ..., -5.3221e-02, |
|
|
-2.4077e-02, -3.4561e-02], |
|
|
[ 2.2785e-02, -3.1338e-02, 4.7170e-02, ..., -1.0755e-01, |
|
|
-1.0055e-01, -8.6465e-02], |
|
|
..., |
|
|
[-1.1486e-02, -5.7438e-02, 8.9108e-03, ..., -4.2754e-02, |
|
|
-4.6038e-02, -4.9284e-02], |
|
|
[-3.6297e-02, -4.0168e-02, -5.0431e-02, ..., -3.7683e-02, |
|
|
1.5160e-04, -4.6720e-02], |
|
|
[ 8.4497e-02, 2.4673e-01, 4.9078e-02, ..., 1.2770e-01, |
|
|
8.1287e-02, 1.4858e-01]], device='cuda:0'), grad: tensor([[-7.1973e-06, -1.4484e-05, -4.7159e-04, ..., -8.3542e-04, |
|
|
-3.4523e-04, -3.3140e-04], |
|
|
[ 5.9009e-05, 1.4454e-06, 4.6563e-04, ..., 1.5724e-04, |
|
|
2.6941e-04, 1.0693e-04], |
|
|
[ 7.9334e-05, 3.0696e-06, 6.5899e-04, ..., 2.7585e-04, |
|
|
3.8648e-04, 1.6868e-04], |
|
|
..., |
|
|
[-2.1803e-04, 7.0184e-06, -1.3599e-03, ..., 1.3018e-04, |
|
|
-7.1812e-04, -1.1444e-04], |
|
|
[ 2.7567e-05, 1.3560e-06, 2.3830e-04, ..., 1.1498e-04, |
|
|
1.4412e-04, 6.7711e-05], |
|
|
[ 1.1414e-05, 3.4273e-07, 9.2506e-05, ..., 3.5554e-05, |
|
|
5.5403e-05, 2.3544e-05]], device='cuda:0') |
|
|
Epoch 11, bias, value: tensor([ 0.3634, -0.0167, -0.1066, 0.0363, 0.4038, -0.0840, -0.6044], |
|
|
device='cuda:0'), grad: tensor([-0.0597, 0.0193, 0.0304, 0.0157, -0.0216, 0.0118, 0.0040], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009504844339512096 |
|
|
changing lr |
|
|
---------------------saving model at epoch 10---------------------------------------------------- |
|
|
epoch 10, time 346.95, cls_loss 0.7606 cls_loss_mapping 1.2027 cls_loss_causal 1.7721 re_mapping 0.2745 re_causal 0.2954 |
|
|
Epoch 12, weight, value: tensor([[ 0.0608, 0.0733, -0.0066, ..., -0.0808, -0.1450, -0.0952], |
|
|
[-0.0455, -0.0656, -0.0015, ..., -0.0516, -0.0244, -0.0344], |
|
|
[ 0.0261, -0.0287, 0.0484, ..., -0.1044, -0.0978, -0.0841], |
|
|
..., |
|
|
[-0.0148, -0.0566, 0.0036, ..., -0.0439, -0.0561, -0.0477], |
|
|
[-0.0346, -0.0400, -0.0446, ..., -0.0398, 0.0050, -0.0485], |
|
|
[ 0.0834, 0.2440, 0.0476, ..., 0.1261, 0.0795, 0.1467]], |
|
|
device='cuda:0'), grad: tensor([[ 2.9125e-03, 2.1839e-03, 1.1055e-02, ..., 5.8022e-03, |
|
|
1.0353e-02, 4.9248e-03], |
|
|
[ 6.7568e-04, 3.6812e-04, 2.3785e-03, ..., 8.6689e-04, |
|
|
1.7233e-03, 8.5974e-04], |
|
|
[ 2.8276e-04, 1.4174e-04, 1.0948e-03, ..., 5.7745e-04, |
|
|
9.5844e-04, 4.5371e-04], |
|
|
..., |
|
|
[-4.9820e-03, -3.3283e-03, -1.8692e-02, ..., -9.2621e-03, |
|
|
-1.6571e-02, -7.9346e-03], |
|
|
[ 2.0456e-04, 1.1301e-04, 7.2622e-04, ..., 2.7800e-04, |
|
|
5.3978e-04, 2.6870e-04], |
|
|
[ 5.4210e-05, 3.0264e-05, 2.0385e-04, ..., 9.9778e-05, |
|
|
1.7345e-04, 8.3029e-05]], device='cuda:0') |
|
|
Epoch 12, bias, value: tensor([ 0.2281, 0.0016, -0.0989, 0.1680, 0.3630, -0.0823, -0.5875], |
|
|
device='cuda:0'), grad: tensor([ 0.1564, 0.0358, 0.0146, 0.0443, -0.2646, 0.0108, 0.0028], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009402977659283692 |
|
|
changing lr |
|
|
epoch 11, time 348.26, cls_loss 0.8984 cls_loss_mapping 1.2257 cls_loss_causal 1.7599 re_mapping 0.2440 re_causal 0.2828 |
|
|
Epoch 13, weight, value: tensor([[ 0.0547, 0.0676, -0.0145, ..., -0.0803, -0.1415, -0.0967], |
|
|
[-0.0441, -0.0585, -0.0022, ..., -0.0485, -0.0291, -0.0319], |
|
|
[ 0.0192, -0.0320, 0.0357, ..., -0.1065, -0.1117, -0.0865], |
|
|
..., |
|
|
[-0.0133, -0.0545, 0.0126, ..., -0.0470, -0.0490, -0.0475], |
|
|
[-0.0264, -0.0354, -0.0327, ..., -0.0376, 0.0110, -0.0460], |
|
|
[ 0.0860, 0.2429, 0.0513, ..., 0.1244, 0.0811, 0.1448]], |
|
|
device='cuda:0'), grad: tensor([[ 8.0948e-03, 4.7188e-03, 2.4841e-02, ..., 6.6223e-03, |
|
|
2.3666e-02, 5.8098e-03], |
|
|
[-1.4984e-02, -7.6065e-03, -5.0842e-02, ..., -1.6510e-02, |
|
|
-5.0995e-02, -1.5388e-02], |
|
|
[ 3.6883e-04, 9.5189e-05, 1.7767e-03, ..., 6.6376e-04, |
|
|
1.8787e-03, 6.5994e-04], |
|
|
..., |
|
|
[ 4.9591e-03, 2.2087e-03, 1.6663e-02, ..., 5.1651e-03, |
|
|
1.6617e-02, 4.8523e-03], |
|
|
[-4.7541e-04, 1.4246e-04, -1.5488e-03, ..., 4.7541e-04, |
|
|
-9.6083e-04, 4.7874e-04], |
|
|
[ 5.4026e-04, 1.0842e-04, 2.3575e-03, ..., 7.8869e-04, |
|
|
2.4357e-03, 7.8535e-04]], device='cuda:0') |
|
|
Epoch 13, bias, value: tensor([ 0.2046, 0.0119, -0.1103, 0.1370, 0.3917, -0.0697, -0.5732], |
|
|
device='cuda:0'), grad: tensor([ 0.1096, -0.2756, 0.0134, 0.0499, 0.0839, 0.0035, 0.0153], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.009292243968009333 |
|
|
changing lr |
|
|
epoch 12, time 346.66, cls_loss 0.8350 cls_loss_mapping 1.2619 cls_loss_causal 1.8637 re_mapping 0.2095 re_causal 0.2569 |
|
|
Epoch 14, weight, value: tensor([[ 0.0415, 0.0553, -0.0194, ..., -0.0846, -0.1425, -0.1021], |
|
|
[-0.0380, -0.0485, -0.0086, ..., -0.0332, -0.0208, -0.0135], |
|
|
[ 0.0273, -0.0239, 0.0513, ..., -0.1055, -0.0992, -0.0865], |
|
|
..., |
|
|
[-0.0156, -0.0573, 0.0202, ..., -0.0573, -0.0553, -0.0609], |
|
|
[-0.0235, -0.0368, -0.0384, ..., -0.0439, -0.0034, -0.0513], |
|
|
[ 0.0841, 0.2409, 0.0416, ..., 0.1204, 0.0700, 0.1413]], |
|
|
device='cuda:0'), grad: tensor([[ 5.5084e-03, 3.4237e-03, 2.1042e-02, ..., 5.4054e-03, |
|
|
2.2049e-02, 3.4885e-03], |
|
|
[ 2.6642e-02, 8.3694e-03, 8.7708e-02, ..., 2.2125e-02, |
|
|
9.0027e-02, 1.5335e-02], |
|
|
[-2.5024e-03, -1.3599e-03, -6.5117e-03, ..., -8.6164e-04, |
|
|
-6.2943e-03, -9.2697e-04], |
|
|
..., |
|
|
[-2.9495e-02, -1.0429e-02, -1.0193e-01, ..., -2.6520e-02, |
|
|
-1.0535e-01, -1.7776e-02], |
|
|
[-4.4012e-04, -1.0741e-04, -1.1673e-03, ..., -3.8886e-04, |
|
|
-1.3380e-03, -3.0255e-04], |
|
|
[ 2.0817e-05, 5.5730e-06, 5.6982e-05, ..., 1.8135e-05, |
|
|
6.4135e-05, 1.3933e-05]], device='cuda:0') |
|
|
Epoch 14, bias, value: tensor([ 0.3738, -0.1402, -0.0957, 0.1669, 0.4431, -0.1405, -0.6153], |
|
|
device='cuda:0'), grad: tensor([ 7.6782e-02, 2.4097e-01, -1.0109e-02, 2.5806e-03, -3.0664e-01, |
|
|
-3.6335e-03, 1.7524e-04], device='cuda:0') |
|
|
249 |
|
|
0.009172866268606516 |
|
|
changing lr |
|
|
epoch 13, time 350.62, cls_loss 0.6228 cls_loss_mapping 1.0631 cls_loss_causal 1.7189 re_mapping 0.2041 re_causal 0.2671 |
|
|
Epoch 15, weight, value: tensor([[ 0.0415, 0.0524, -0.0151, ..., -0.0856, -0.1345, -0.1026], |
|
|
[-0.0382, -0.0479, -0.0106, ..., -0.0319, -0.0248, -0.0121], |
|
|
[ 0.0226, -0.0257, 0.0433, ..., -0.1051, -0.1057, -0.0868], |
|
|
..., |
|
|
[-0.0088, -0.0529, 0.0357, ..., -0.0457, -0.0336, -0.0496], |
|
|
[-0.0274, -0.0357, -0.0453, ..., -0.0479, -0.0161, -0.0550], |
|
|
[ 0.0828, 0.2384, 0.0393, ..., 0.1179, 0.0668, 0.1387]], |
|
|
device='cuda:0'), grad: tensor([[ 1.0033e-03, 3.1304e-04, 4.0283e-03, ..., 3.7632e-03, |
|
|
6.0463e-03, 3.6373e-03], |
|
|
[ 4.2419e-03, 8.4877e-04, 2.1423e-02, ..., 8.8348e-03, |
|
|
1.9821e-02, 5.6648e-03], |
|
|
[ 1.6034e-04, 7.2122e-05, 6.4850e-04, ..., 5.4169e-04, |
|
|
8.7976e-04, 5.1785e-04], |
|
|
..., |
|
|
[-5.4092e-03, -7.6914e-04, -2.6001e-02, ..., -1.4404e-02, |
|
|
-2.8580e-02, -1.1177e-02], |
|
|
[-8.4305e-04, -7.4387e-04, -3.4904e-03, ..., -1.8578e-03, |
|
|
-3.1853e-03, -1.6642e-03], |
|
|
[ 1.1188e-04, 4.2140e-05, 4.5061e-04, ..., 4.0054e-04, |
|
|
6.4659e-04, 3.8576e-04]], device='cuda:0') |
|
|
Epoch 15, bias, value: tensor([ 0.3647, -0.0599, -0.0534, 0.0527, 0.4296, -0.1354, -0.6058], |
|
|
device='cuda:0'), grad: tensor([ 0.0326, 0.1567, 0.0054, 0.0238, -0.1915, -0.0308, 0.0037], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.00904508497187474 |
|
|
changing lr |
|
|
epoch 14, time 345.66, cls_loss 0.4806 cls_loss_mapping 0.9650 cls_loss_causal 1.6150 re_mapping 0.1894 re_causal 0.2416 |
|
|
Epoch 16, weight, value: tensor([[ 0.0310, 0.0436, -0.0326, ..., -0.0893, -0.1463, -0.1070], |
|
|
[-0.0336, -0.0450, -0.0082, ..., -0.0312, -0.0237, -0.0113], |
|
|
[ 0.0246, -0.0232, 0.0485, ..., -0.1049, -0.0998, -0.0867], |
|
|
..., |
|
|
[-0.0121, -0.0519, 0.0299, ..., -0.0440, -0.0457, -0.0475], |
|
|
[-0.0270, -0.0393, -0.0407, ..., -0.0581, -0.0167, -0.0640], |
|
|
[ 0.0877, 0.2420, 0.0476, ..., 0.1297, 0.0832, 0.1521]], |
|
|
device='cuda:0'), grad: tensor([[-2.5749e-03, -2.3155e-03, -2.6566e-02, ..., -1.3832e-02, |
|
|
-3.0853e-02, -1.3977e-02], |
|
|
[ 1.1644e-03, 1.1311e-03, 1.2085e-02, ..., 7.2594e-03, |
|
|
1.5259e-02, 7.4463e-03], |
|
|
[ 1.4566e-05, 1.0222e-05, 8.0287e-05, ..., 4.5151e-05, |
|
|
8.7559e-05, 4.4107e-05], |
|
|
..., |
|
|
[ 1.1015e-03, 9.5701e-04, 1.2695e-02, ..., 5.4970e-03, |
|
|
1.3535e-02, 5.4626e-03], |
|
|
[ 1.5274e-06, 1.0803e-06, 7.8306e-06, ..., 5.0291e-06, |
|
|
8.9854e-06, 4.9099e-06], |
|
|
[ 1.0610e-04, 7.4029e-05, 5.2881e-04, ..., 3.1948e-04, |
|
|
5.9080e-04, 3.1304e-04]], device='cuda:0') |
|
|
Epoch 16, bias, value: tensor([ 0.3305, -0.0721, -0.0274, 0.0394, 0.4979, -0.1767, -0.5990], |
|
|
device='cuda:0'), grad: tensor([-2.5024e-01, 1.2463e-01, 6.4611e-04, 1.0529e-02, 1.1017e-01, |
|
|
6.5565e-05, 4.2992e-03], device='cuda:0') |
|
|
249 |
|
|
0.008909157412340152 |
|
|
changing lr |
|
|
epoch 15, time 350.72, cls_loss 0.4993 cls_loss_mapping 0.9868 cls_loss_causal 1.6746 re_mapping 0.1866 re_causal 0.2383 |
|
|
Epoch 17, weight, value: tensor([[ 0.0314, 0.0435, -0.0261, ..., -0.0824, -0.1354, -0.1003], |
|
|
[-0.0328, -0.0435, -0.0109, ..., -0.0273, -0.0238, -0.0068], |
|
|
[ 0.0247, -0.0233, 0.0477, ..., -0.1042, -0.1016, -0.0861], |
|
|
..., |
|
|
[-0.0157, -0.0528, 0.0222, ..., -0.0466, -0.0491, -0.0507], |
|
|
[-0.0192, -0.0351, -0.0167, ..., -0.0552, 0.0035, -0.0613], |
|
|
[ 0.0835, 0.2381, 0.0352, ..., 0.1240, 0.0684, 0.1463]], |
|
|
device='cuda:0'), grad: tensor([[-2.4104e-04, -2.0754e-04, -8.3399e-04, ..., -1.1355e-04, |
|
|
-6.2180e-04, -2.6512e-04], |
|
|
[ 2.5183e-05, 2.0832e-05, 1.2672e-04, ..., 7.1526e-05, |
|
|
1.3828e-04, 7.4565e-05], |
|
|
[ 5.3018e-05, 3.4392e-05, 4.8375e-04, ..., 4.2129e-04, |
|
|
6.4516e-04, 3.6454e-04], |
|
|
..., |
|
|
[ 1.0633e-04, 1.0949e-04, -1.2743e-04, ..., -6.1083e-04, |
|
|
-5.7030e-04, -3.9434e-04], |
|
|
[ 7.9200e-06, 6.3106e-06, 4.0919e-05, ..., 2.1562e-05, |
|
|
4.3303e-05, 2.2501e-05], |
|
|
[ 1.1288e-05, 8.6278e-06, 6.8188e-05, ..., 4.3750e-05, |
|
|
7.8440e-05, 4.2111e-05]], device='cuda:0') |
|
|
Epoch 17, bias, value: tensor([ 0.4034, -0.0693, -0.0067, -0.0020, 0.4321, -0.1292, -0.6354], |
|
|
device='cuda:0'), grad: tensor([-0.0068, 0.0015, 0.0075, 0.0033, -0.0069, 0.0005, 0.0009], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.00876535733001806 |
|
|
changing lr |
|
|
---------------------saving model at epoch 16---------------------------------------------------- |
|
|
epoch 16, time 349.48, cls_loss 0.4967 cls_loss_mapping 0.9364 cls_loss_causal 1.6148 re_mapping 0.1727 re_causal 0.2309 |
|
|
Epoch 18, weight, value: tensor([[ 0.0292, 0.0414, -0.0274, ..., -0.0836, -0.1345, -0.1007], |
|
|
[-0.0355, -0.0440, -0.0144, ..., -0.0241, -0.0254, -0.0043], |
|
|
[ 0.0262, -0.0213, 0.0525, ..., -0.0980, -0.0898, -0.0800], |
|
|
..., |
|
|
[-0.0131, -0.0515, 0.0331, ..., -0.0464, -0.0410, -0.0511], |
|
|
[-0.0188, -0.0325, -0.0242, ..., -0.0596, -0.0079, -0.0654], |
|
|
[ 0.0825, 0.2355, 0.0323, ..., 0.1215, 0.0647, 0.1435]], |
|
|
device='cuda:0'), grad: tensor([[-1.1539e-04, 5.9366e-04, -4.9591e-03, ..., 2.3193e-03, |
|
|
-3.9482e-03, 1.3437e-03], |
|
|
[ 1.4591e-04, 1.9825e-04, 1.5459e-03, ..., 2.2554e-04, |
|
|
1.3132e-03, 2.3484e-04], |
|
|
[ 6.3598e-05, 2.2009e-05, 8.8978e-04, ..., 1.7858e-04, |
|
|
8.3733e-04, 1.6987e-04], |
|
|
..., |
|
|
[-5.5456e-04, -8.7023e-04, -4.5128e-03, ..., -3.4504e-03, |
|
|
-4.6577e-03, -2.6188e-03], |
|
|
[ 2.9516e-04, 2.9728e-05, 4.5509e-03, ..., 4.1389e-04, |
|
|
4.1580e-03, 5.2357e-04], |
|
|
[ 1.5050e-05, 3.7774e-06, 2.1791e-04, ..., 3.6508e-05, |
|
|
2.0361e-04, 3.6627e-05]], device='cuda:0') |
|
|
Epoch 18, bias, value: tensor([ 0.4269, 0.0323, -0.0444, -0.0081, 0.4233, -0.1873, -0.6500], |
|
|
device='cuda:0'), grad: tensor([-0.0424, 0.0184, 0.0097, 0.0242, -0.0605, 0.0482, 0.0024], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.008613974319136962 |
|
|
changing lr |
|
|
epoch 17, time 349.89, cls_loss 0.4311 cls_loss_mapping 0.8407 cls_loss_causal 1.5558 re_mapping 0.1541 re_causal 0.2379 |
|
|
Epoch 19, weight, value: tensor([[ 0.0282, 0.0388, -0.0318, ..., -0.0847, -0.1366, -0.1021], |
|
|
[-0.0396, -0.0462, -0.0243, ..., -0.0225, -0.0308, -0.0026], |
|
|
[ 0.0270, -0.0203, 0.0469, ..., -0.1002, -0.0983, -0.0844], |
|
|
..., |
|
|
[-0.0153, -0.0498, 0.0313, ..., -0.0474, -0.0472, -0.0527], |
|
|
[-0.0148, -0.0298, -0.0123, ..., -0.0554, 0.0059, -0.0606], |
|
|
[ 0.0825, 0.2338, 0.0333, ..., 0.1194, 0.0642, 0.1420]], |
|
|
device='cuda:0'), grad: tensor([[ 0.0000e+00, 0.0000e+00, 2.8238e-06, ..., 3.3155e-06, |
|
|
5.3868e-06, 3.4422e-06], |
|
|
[ 0.0000e+00, 0.0000e+00, -5.2392e-05, ..., -6.1572e-05, |
|
|
-1.0008e-04, -6.3956e-05], |
|
|
[-0.0000e+00, -0.0000e+00, 4.7684e-07, ..., 5.7369e-07, |
|
|
9.2387e-07, 5.9605e-07], |
|
|
..., |
|
|
[ 0.0000e+00, 0.0000e+00, 4.4107e-05, ..., 5.1796e-05, |
|
|
8.4221e-05, 5.3823e-05], |
|
|
[ 0.0000e+00, 0.0000e+00, 2.0340e-06, ..., 2.3842e-06, |
|
|
3.8743e-06, 2.4736e-06], |
|
|
[ 0.0000e+00, 0.0000e+00, 9.5367e-07, ..., 1.1250e-06, |
|
|
1.8254e-06, 1.1697e-06]], device='cuda:0') |
|
|
Epoch 19, bias, value: tensor([ 0.3902, -0.0056, -0.0767, -0.0025, 0.4757, -0.1395, -0.6489], |
|
|
device='cuda:0'), grad: tensor([ 7.1049e-05, -1.3189e-03, 1.2085e-05, 4.8608e-05, 1.1101e-03, |
|
|
5.1111e-05, 2.4080e-05], device='cuda:0') |
|
|
249 |
|
|
0.008455313244934327 |
|
|
changing lr |
|
|
epoch 18, time 349.75, cls_loss 0.2844 cls_loss_mapping 0.8367 cls_loss_causal 1.5332 re_mapping 0.1430 re_causal 0.2201 |
|
|
Epoch 20, weight, value: tensor([[ 0.0257, 0.0360, -0.0373, ..., -0.0822, -0.1377, -0.0995], |
|
|
[-0.0379, -0.0446, -0.0158, ..., -0.0208, -0.0206, -0.0008], |
|
|
[ 0.0269, -0.0205, 0.0475, ..., -0.1011, -0.0969, -0.0853], |
|
|
..., |
|
|
[-0.0122, -0.0477, 0.0364, ..., -0.0440, -0.0426, -0.0496], |
|
|
[-0.0130, -0.0280, -0.0092, ..., -0.0557, 0.0070, -0.0598], |
|
|
[ 0.0787, 0.2307, 0.0253, ..., 0.1169, 0.0551, 0.1387]], |
|
|
device='cuda:0'), grad: tensor([[ 0.0000e+00, 0.0000e+00, 3.0565e-04, ..., 6.5416e-06, |
|
|
1.8394e-04, 6.3702e-06], |
|
|
[ 7.4506e-09, 7.4506e-09, -9.8419e-04, ..., -1.6794e-05, |
|
|
-5.8889e-04, -1.6853e-05], |
|
|
[ 7.4506e-09, 7.4506e-09, 2.3067e-05, ..., 1.3933e-06, |
|
|
1.4633e-05, 1.2591e-06], |
|
|
..., |
|
|
[ 1.4901e-08, 7.4506e-09, 6.2418e-04, ..., 1.3180e-05, |
|
|
3.7551e-04, 1.2860e-05], |
|
|
[-2.9802e-08, -2.2352e-08, 1.2152e-05, ..., 1.8477e-06, |
|
|
8.6427e-06, 1.6168e-06], |
|
|
[ 0.0000e+00, 0.0000e+00, 5.0813e-06, ..., 1.1027e-06, |
|
|
3.8818e-06, 9.6112e-07]], device='cuda:0') |
|
|
Epoch 20, bias, value: tensor([ 0.4344, 0.0165, -0.0767, -0.0469, 0.4770, -0.1534, -0.6582], |
|
|
device='cuda:0'), grad: tensor([ 0.0161, -0.0517, 0.0012, 0.0008, 0.0328, 0.0006, 0.0003], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.008289693629698565 |
|
|
changing lr |
|
|
epoch 19, time 345.25, cls_loss 0.4219 cls_loss_mapping 0.7627 cls_loss_causal 1.4024 re_mapping 0.1221 re_causal 0.1921 |
|
|
Epoch 21, weight, value: tensor([[ 0.0246, 0.0335, -0.0398, ..., -0.0794, -0.1339, -0.0970], |
|
|
[-0.0375, -0.0443, -0.0158, ..., -0.0173, -0.0177, 0.0023], |
|
|
[ 0.0239, -0.0216, 0.0428, ..., -0.1032, -0.1000, -0.0871], |
|
|
..., |
|
|
[-0.0065, -0.0431, 0.0462, ..., -0.0464, -0.0408, -0.0515], |
|
|
[-0.0135, -0.0270, -0.0098, ..., -0.0579, 0.0038, -0.0615], |
|
|
[ 0.0774, 0.2280, 0.0237, ..., 0.1151, 0.0532, 0.1367]], |
|
|
device='cuda:0'), grad: tensor([[ 3.1769e-05, -1.3721e-04, -6.9380e-05, ..., 1.2875e-04, |
|
|
-1.4472e-04, 1.0723e-04], |
|
|
[-6.3133e-04, -3.1090e-04, -3.1681e-03, ..., -6.6280e-04, |
|
|
-3.2272e-03, -6.7329e-04], |
|
|
[ 1.6415e-04, 7.4446e-05, 1.1692e-03, ..., 2.9182e-04, |
|
|
1.1387e-03, 2.0289e-04], |
|
|
..., |
|
|
[ 2.4462e-04, 2.8396e-04, 3.3855e-04, ..., -3.3498e-04, |
|
|
5.1117e-04, -5.7593e-06], |
|
|
[ 1.1869e-05, 5.7518e-06, 7.9215e-05, ..., 1.9401e-05, |
|
|
7.8201e-05, 1.4484e-05], |
|
|
[ 2.7493e-06, 1.6987e-06, 2.1398e-05, ..., 7.3612e-06, |
|
|
2.2352e-05, 5.3942e-06]], device='cuda:0') |
|
|
Epoch 21, bias, value: tensor([ 0.3833, 0.0472, -0.0680, -0.0074, 0.4733, -0.1728, -0.6628], |
|
|
device='cuda:0'), grad: tensor([-0.0019, -0.0153, 0.0072, 0.0116, -0.0022, 0.0005, 0.0001], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.00811744900929367 |
|
|
changing lr |
|
|
epoch 20, time 346.79, cls_loss 0.2886 cls_loss_mapping 0.6949 cls_loss_causal 1.3418 re_mapping 0.1059 re_causal 0.1685 |
|
|
Epoch 22, weight, value: tensor([[ 0.0233, 0.0313, -0.0416, ..., -0.0790, -0.1330, -0.0969], |
|
|
[-0.0378, -0.0435, -0.0128, ..., -0.0185, -0.0118, 0.0010], |
|
|
[ 0.0220, -0.0224, 0.0384, ..., -0.1022, -0.1030, -0.0863], |
|
|
..., |
|
|
[-0.0045, -0.0410, 0.0448, ..., -0.0452, -0.0463, -0.0504], |
|
|
[-0.0108, -0.0260, -0.0016, ..., -0.0557, 0.0125, -0.0590], |
|
|
[ 0.0770, 0.2260, 0.0238, ..., 0.1138, 0.0528, 0.1352]], |
|
|
device='cuda:0'), grad: tensor([[-1.5383e-03, -1.8444e-03, -2.1469e-02, ..., -1.0399e-02, |
|
|
-2.5879e-02, -9.4986e-03], |
|
|
[ 2.7609e-04, 3.4833e-04, 4.5853e-03, ..., 2.2602e-03, |
|
|
5.6686e-03, 2.0962e-03], |
|
|
[ 7.6443e-06, 6.0983e-06, 1.0163e-04, ..., 5.5999e-05, |
|
|
1.0902e-04, 4.1693e-05], |
|
|
..., |
|
|
[ 1.1730e-03, 1.4095e-03, 1.6357e-02, ..., 7.8964e-03, |
|
|
1.9699e-02, 7.2136e-03], |
|
|
[ 3.9965e-05, 3.9756e-05, 2.0468e-04, ..., 8.4817e-05, |
|
|
1.8144e-04, 6.2823e-05], |
|
|
[ 1.2055e-05, 1.1958e-05, 6.2764e-05, ..., 2.6688e-05, |
|
|
5.6267e-05, 1.9848e-05]], device='cuda:0') |
|
|
Epoch 22, bias, value: tensor([ 0.3787, 0.0097, -0.0376, 0.0187, 0.4699, -0.1906, -0.6559], |
|
|
device='cuda:0'), grad: tensor([-0.2013, 0.0428, 0.0012, 0.0016, 0.1532, 0.0020, 0.0006], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.007938926261462368 |
|
|
changing lr |
|
|
epoch 21, time 346.47, cls_loss 0.2398 cls_loss_mapping 0.5935 cls_loss_causal 1.1991 re_mapping 0.0881 re_causal 0.1494 |
|
|
Epoch 23, weight, value: tensor([[ 0.0253, 0.0321, -0.0354, ..., -0.0819, -0.1262, -0.0989], |
|
|
[-0.0420, -0.0463, -0.0222, ..., -0.0196, -0.0175, -0.0004], |
|
|
[ 0.0206, -0.0235, 0.0366, ..., -0.1000, -0.1013, -0.0838], |
|
|
..., |
|
|
[-0.0007, -0.0374, 0.0553, ..., -0.0389, -0.0385, -0.0454], |
|
|
[-0.0104, -0.0250, -0.0011, ..., -0.0555, 0.0127, -0.0587], |
|
|
[ 0.0764, 0.2240, 0.0234, ..., 0.1123, 0.0518, 0.1337]], |
|
|
device='cuda:0'), grad: tensor([[-5.4389e-05, -7.5281e-05, 5.6076e-04, ..., 1.3840e-04, |
|
|
6.6090e-04, 8.7142e-05], |
|
|
[ 2.1443e-05, 2.7061e-05, 3.8767e-04, ..., 8.2433e-05, |
|
|
3.6097e-04, 7.1824e-05], |
|
|
[ 5.1558e-06, 3.9563e-06, 4.6968e-05, ..., 9.5740e-06, |
|
|
4.2051e-05, 7.6294e-06], |
|
|
..., |
|
|
[ 2.5108e-05, 4.2379e-05, -1.0281e-03, ..., -2.3723e-04, |
|
|
-1.0948e-03, -1.7190e-04], |
|
|
[ 7.1526e-07, 4.7684e-07, 8.8960e-06, ..., 1.8738e-06, |
|
|
8.3223e-06, 1.4454e-06], |
|
|
[ 5.3644e-07, 4.7684e-07, 3.5278e-06, ..., 6.8918e-07, |
|
|
2.9393e-06, 5.9232e-07]], device='cuda:0') |
|
|
Epoch 23, bias, value: tensor([ 0.3388, -0.0143, -0.0064, 0.0110, 0.5262, -0.2086, -0.6536], |
|
|
device='cuda:0'), grad: tensor([ 4.0855e-03, 3.0880e-03, 3.2306e-04, 1.3936e-04, -7.7248e-03, |
|
|
6.1810e-05, 2.4408e-05], device='cuda:0') |
|
|
249 |
|
|
0.007754484907260515 |
|
|
changing lr |
|
|
epoch 22, time 343.89, cls_loss 0.2083 cls_loss_mapping 0.5953 cls_loss_causal 1.3084 re_mapping 0.0874 re_causal 0.1513 |
|
|
Epoch 24, weight, value: tensor([[ 0.0259, 0.0315, -0.0369, ..., -0.0825, -0.1337, -0.1016], |
|
|
[-0.0418, -0.0450, -0.0224, ..., -0.0180, -0.0164, 0.0007], |
|
|
[ 0.0223, -0.0211, 0.0504, ..., -0.0949, -0.0819, -0.0779], |
|
|
..., |
|
|
[-0.0006, -0.0368, 0.0494, ..., -0.0380, -0.0434, -0.0445], |
|
|
[-0.0110, -0.0256, -0.0048, ..., -0.0579, 0.0065, -0.0612], |
|
|
[ 0.0736, 0.2195, 0.0208, ..., 0.1090, 0.0525, 0.1319]], |
|
|
device='cuda:0'), grad: tensor([[-9.3132e-08, -1.9372e-07, -6.5938e-07, ..., -2.3469e-07, |
|
|
-7.3388e-07, -2.3097e-07], |
|
|
[ 7.4506e-09, 1.4901e-08, 4.8429e-08, ..., 1.8626e-08, |
|
|
5.5879e-08, 1.8626e-08], |
|
|
[ 1.1176e-08, 7.4506e-09, 5.2154e-08, ..., 2.2352e-08, |
|
|
7.0781e-08, 2.6077e-08], |
|
|
..., |
|
|
[ 8.5682e-08, 1.7509e-07, 6.0350e-07, ..., 2.1607e-07, |
|
|
6.7055e-07, 2.1234e-07], |
|
|
[-1.1176e-08, -7.4506e-09, -5.2154e-08, ..., -1.8626e-08, |
|
|
-6.7055e-08, -2.6077e-08], |
|
|
[ 0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, |
|
|
0.0000e+00, 0.0000e+00]], device='cuda:0') |
|
|
Epoch 24, bias, value: tensor([ 0.3312, -0.0065, 0.0246, 0.0131, 0.5065, -0.2245, -0.6513], |
|
|
device='cuda:0'), grad: tensor([-2.1271e-06, 1.5646e-07, 1.3411e-07, 0.0000e+00, 1.9483e-06, |
|
|
-1.1921e-07, 0.0000e+00], device='cuda:0') |
|
|
249 |
|
|
0.007564496387029534 |
|
|
changing lr |
|
|
---------------------saving model at epoch 23---------------------------------------------------- |
|
|
epoch 23, time 350.19, cls_loss 0.2486 cls_loss_mapping 0.5770 cls_loss_causal 1.2395 re_mapping 0.0702 re_causal 0.1356 |
|
|
Epoch 25, weight, value: tensor([[ 0.0267, 0.0314, -0.0344, ..., -0.0778, -0.1292, -0.0977], |
|
|
[-0.0408, -0.0440, -0.0236, ..., -0.0215, -0.0203, -0.0027], |
|
|
[ 0.0218, -0.0228, 0.0440, ..., -0.0951, -0.0880, -0.0785], |
|
|
..., |
|
|
[-0.0041, -0.0360, 0.0495, ..., -0.0334, -0.0376, -0.0394], |
|
|
[-0.0122, -0.0253, -0.0072, ..., -0.0591, 0.0014, -0.0627], |
|
|
[ 0.0729, 0.2174, 0.0201, ..., 0.1076, 0.0512, 0.1303]], |
|
|
device='cuda:0'), grad: tensor([[ 2.5064e-05, 6.3181e-06, 5.1546e-04, ..., 1.9991e-04, |
|
|
5.3024e-04, 2.0480e-04], |
|
|
[ 5.9462e-04, 3.4310e-06, 1.1429e-02, ..., 1.4524e-03, |
|
|
8.9111e-03, 1.6298e-03], |
|
|
[ 1.6734e-05, 6.5640e-06, 2.9111e-04, ..., 1.4937e-04, |
|
|
3.1781e-04, 1.5056e-04], |
|
|
..., |
|
|
[-6.4468e-04, -1.8567e-05, -1.2383e-02, ..., -1.8587e-03, |
|
|
-9.9106e-03, -2.0428e-03], |
|
|
[ 3.2559e-06, 7.9721e-07, 5.6028e-05, ..., 2.0042e-05, |
|
|
5.3853e-05, 2.0519e-05], |
|
|
[ 1.3672e-06, 3.1665e-07, 2.4319e-05, ..., 8.2403e-06, |
|
|
2.2992e-05, 8.4564e-06]], device='cuda:0') |
|
|
Epoch 25, bias, value: tensor([ 0.3551, -0.0608, 0.0083, -0.0027, 0.5596, -0.2145, -0.6520], |
|
|
device='cuda:0'), grad: tensor([ 0.0057, 0.1221, 0.0036, 0.0008, -0.1331, 0.0007, 0.0003], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.007369343312364995 |
|
|
changing lr |
|
|
---------------------saving model at epoch 24---------------------------------------------------- |
|
|
epoch 24, time 349.95, cls_loss 0.1266 cls_loss_mapping 0.4742 cls_loss_causal 1.1183 re_mapping 0.0595 re_causal 0.1259 |
|
|
Epoch 26, weight, value: tensor([[ 2.6065e-02, 3.1333e-02, -3.2591e-02, ..., -7.8665e-02, |
|
|
-1.2607e-01, -9.7870e-02], |
|
|
[-4.1028e-02, -4.3888e-02, -2.8106e-02, ..., -2.2541e-02, |
|
|
-2.3828e-02, -3.8757e-03], |
|
|
[ 2.0624e-02, -2.3310e-02, 3.7576e-02, ..., -9.6356e-02, |
|
|
-9.4617e-02, -8.0039e-02], |
|
|
..., |
|
|
[-3.9162e-03, -3.5971e-02, 4.8947e-02, ..., -3.0996e-02, |
|
|
-3.8739e-02, -3.7762e-02], |
|
|
[-1.0823e-02, -2.4368e-02, -2.1525e-04, ..., -5.7820e-02, |
|
|
8.0542e-03, -6.0702e-02], |
|
|
[ 7.2154e-02, 2.1539e-01, 1.9567e-02, ..., 1.0641e-01, |
|
|
5.0375e-02, 1.2895e-01]], device='cuda:0'), grad: tensor([[ 8.8072e-04, 5.1594e-04, 4.9973e-03, ..., 1.3981e-03, |
|
|
6.2180e-03, 2.2316e-03], |
|
|
[-1.3784e-07, -5.2899e-07, -1.6801e-06, ..., 4.0233e-07, |
|
|
-1.3262e-06, 4.5076e-07], |
|
|
[ 1.2982e-04, 7.1049e-05, 7.2861e-04, ..., 1.9443e-04, |
|
|
9.0170e-04, 3.2187e-04], |
|
|
..., |
|
|
[-1.8549e-03, -1.0757e-03, -1.0483e-02, ..., -2.9087e-03, |
|
|
-1.3023e-02, -4.6730e-03], |
|
|
[ 6.5744e-05, 3.9637e-05, 3.7694e-04, ..., 1.0800e-04, |
|
|
4.7064e-04, 1.6940e-04], |
|
|
[ 1.5274e-07, 1.1176e-07, 1.0990e-06, ..., 3.8370e-07, |
|
|
1.4305e-06, 5.2527e-07]], device='cuda:0') |
|
|
Epoch 26, bias, value: tensor([ 0.3469, -0.1051, -0.0202, 0.0171, 0.5930, -0.1889, -0.6496], |
|
|
device='cuda:0'), grad: tensor([ 9.0256e-03, -6.0312e-06, 1.2798e-03, 7.8125e-03, -1.8814e-02, |
|
|
6.9237e-04, 2.4401e-06], device='cuda:0') |
|
|
249 |
|
|
0.0071694186955877925 |
|
|
changing lr |
|
|
epoch 25, time 346.90, cls_loss 0.1057 cls_loss_mapping 0.4313 cls_loss_causal 1.1527 re_mapping 0.0601 re_causal 0.1291 |
|
|
Epoch 27, weight, value: tensor([[ 0.0240, 0.0297, -0.0393, ..., -0.0773, -0.1310, -0.0966], |
|
|
[-0.0409, -0.0436, -0.0290, ..., -0.0234, -0.0260, -0.0050], |
|
|
[ 0.0212, -0.0224, 0.0399, ..., -0.0959, -0.0926, -0.0799], |
|
|
..., |
|
|
[-0.0022, -0.0348, 0.0558, ..., -0.0309, -0.0303, -0.0371], |
|
|
[-0.0113, -0.0246, -0.0006, ..., -0.0579, 0.0076, -0.0603], |
|
|
[ 0.0715, 0.2134, 0.0192, ..., 0.1053, 0.0496, 0.1276]], |
|
|
device='cuda:0'), grad: tensor([[ 4.4703e-08, 6.7055e-08, 8.5682e-07, ..., 2.0489e-07, |
|
|
6.3702e-07, 2.1979e-07], |
|
|
[-2.1234e-07, -9.6112e-07, -1.2949e-05, ..., -8.9407e-08, |
|
|
-6.5267e-06, -3.5390e-07], |
|
|
[ 9.3132e-08, 9.6858e-08, 9.4250e-07, ..., 6.2585e-07, |
|
|
1.1213e-06, 6.5938e-07], |
|
|
..., |
|
|
[ 2.7940e-07, 9.5367e-07, 1.2428e-05, ..., 7.0408e-07, |
|
|
6.9104e-06, 9.6858e-07], |
|
|
[ 1.2256e-06, 1.0133e-06, 8.2999e-06, ..., 8.7470e-06, |
|
|
1.3277e-05, 9.1121e-06], |
|
|
[ 1.0058e-07, 8.1956e-08, 6.8918e-07, ..., 7.0781e-07, |
|
|
1.0803e-06, 7.3761e-07]], device='cuda:0') |
|
|
Epoch 27, bias, value: tensor([ 0.3509, -0.1067, -0.0124, 0.0109, 0.5675, -0.1701, -0.6468], |
|
|
device='cuda:0'), grad: tensor([ 1.0535e-05, -1.7989e-04, 9.7156e-06, -8.5473e-05, 1.6975e-04, |
|
|
6.9559e-05, 5.8375e-06], device='cuda:0') |
|
|
249 |
|
|
0.0069651251582696205 |
|
|
changing lr |
|
|
---------------------saving model at epoch 26---------------------------------------------------- |
|
|
epoch 26, time 350.28, cls_loss 0.1538 cls_loss_mapping 0.4250 cls_loss_causal 1.0776 re_mapping 0.0632 re_causal 0.1255 |
|
|
Epoch 28, weight, value: tensor([[ 0.0238, 0.0302, -0.0388, ..., -0.0751, -0.1280, -0.0947], |
|
|
[-0.0399, -0.0428, -0.0256, ..., -0.0245, -0.0237, -0.0060], |
|
|
[ 0.0207, -0.0227, 0.0378, ..., -0.0960, -0.0952, -0.0803], |
|
|
..., |
|
|
[-0.0025, -0.0350, 0.0536, ..., -0.0307, -0.0333, -0.0368], |
|
|
[-0.0113, -0.0245, -0.0016, ..., -0.0580, 0.0047, -0.0603], |
|
|
[ 0.0711, 0.2119, 0.0192, ..., 0.1043, 0.0492, 0.1265]], |
|
|
device='cuda:0'), grad: tensor([[ 4.2057e-04, 4.4632e-04, 1.9722e-03, ..., 1.6460e-03, |
|
|
2.2316e-03, 1.5688e-03], |
|
|
[-2.1400e-03, -2.7809e-03, -1.2062e-02, ..., -2.9259e-03, |
|
|
-8.4991e-03, -3.2768e-03], |
|
|
[ 1.3411e-04, 1.1194e-04, 5.8365e-04, ..., 5.7316e-04, |
|
|
7.2050e-04, 5.3406e-04], |
|
|
..., |
|
|
[ 1.4725e-03, 2.1286e-03, 9.0027e-03, ..., 2.7800e-04, |
|
|
4.9706e-03, 7.7105e-04], |
|
|
[ 3.4392e-05, 3.2216e-05, 1.6570e-04, ..., 1.0234e-04, |
|
|
1.6093e-04, 9.8109e-05], |
|
|
[ 1.6198e-05, 1.4491e-05, 7.2718e-05, ..., 6.4850e-05, |
|
|
8.5056e-05, 6.0856e-05]], device='cuda:0') |
|
|
Epoch 28, bias, value: tensor([ 0.3468, -0.1148, -0.0048, 0.0030, 0.5668, -0.1701, -0.6335], |
|
|
device='cuda:0'), grad: tensor([ 0.0176, -0.1155, 0.0043, 0.0018, 0.0899, 0.0013, 0.0006], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.006756874120406716 |
|
|
changing lr |
|
|
epoch 27, time 347.40, cls_loss 0.1100 cls_loss_mapping 0.3891 cls_loss_causal 1.0223 re_mapping 0.0498 re_causal 0.1108 |
|
|
Epoch 29, weight, value: tensor([[ 0.0251, 0.0313, -0.0354, ..., -0.0745, -0.1234, -0.0937], |
|
|
[-0.0389, -0.0416, -0.0225, ..., -0.0243, -0.0201, -0.0057], |
|
|
[ 0.0203, -0.0227, 0.0378, ..., -0.0959, -0.0940, -0.0799], |
|
|
..., |
|
|
[-0.0031, -0.0347, 0.0522, ..., -0.0271, -0.0322, -0.0340], |
|
|
[-0.0118, -0.0255, -0.0050, ..., -0.0594, -0.0014, -0.0617], |
|
|
[ 0.0705, 0.2101, 0.0188, ..., 0.1032, 0.0484, 0.1253]], |
|
|
device='cuda:0'), grad: tensor([[ 9.3341e-05, 2.8419e-04, -1.9245e-03, ..., -8.4925e-04, |
|
|
-1.9855e-03, -7.6866e-04], |
|
|
[ 8.4639e-06, 6.5193e-07, 1.0949e-04, ..., 4.6611e-05, |
|
|
1.0580e-04, 4.3750e-05], |
|
|
[-4.2582e-04, -4.3154e-04, -9.9754e-04, ..., -3.8195e-04, |
|
|
-7.8344e-04, -4.0030e-04], |
|
|
..., |
|
|
[ 3.0875e-04, 1.4150e-04, 2.6627e-03, ..., 1.1206e-03, |
|
|
2.5196e-03, 1.0653e-03], |
|
|
[ 7.2010e-06, 3.9637e-06, 5.4628e-05, ..., 2.2888e-05, |
|
|
5.1260e-05, 2.1860e-05], |
|
|
[ 2.4959e-06, 8.6799e-07, 2.4691e-05, ..., 1.0438e-05, |
|
|
2.3559e-05, 9.8720e-06]], device='cuda:0') |
|
|
Epoch 29, bias, value: tensor([ 0.3258, -0.1182, -0.0050, -0.0046, 0.5885, -0.1621, -0.6312], |
|
|
device='cuda:0'), grad: tensor([-0.0291, 0.0014, -0.0080, 0.0009, 0.0337, 0.0007, 0.0003], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.00654508497187474 |
|
|
changing lr |
|
|
---------------------saving model at epoch 28---------------------------------------------------- |
|
|
epoch 28, time 350.09, cls_loss 0.0846 cls_loss_mapping 0.3395 cls_loss_causal 1.0291 re_mapping 0.0492 re_causal 0.0999 |
|
|
Epoch 30, weight, value: tensor([[ 0.0245, 0.0303, -0.0354, ..., -0.0729, -0.1203, -0.0921], |
|
|
[-0.0388, -0.0409, -0.0223, ..., -0.0253, -0.0204, -0.0068], |
|
|
[ 0.0205, -0.0223, 0.0394, ..., -0.0949, -0.0916, -0.0788], |
|
|
..., |
|
|
[-0.0039, -0.0353, 0.0485, ..., -0.0274, -0.0367, -0.0344], |
|
|
[-0.0116, -0.0252, -0.0048, ..., -0.0589, -0.0014, -0.0611], |
|
|
[ 0.0707, 0.2090, 0.0198, ..., 0.1023, 0.0487, 0.1243]], |
|
|
device='cuda:0'), grad: tensor([[ 1.7681e-03, 4.8485e-03, 2.3468e-02, ..., 1.2390e-02, |
|
|
3.2959e-02, 1.2566e-02], |
|
|
[ 7.0073e-06, 1.9208e-05, 9.2983e-05, ..., 4.9084e-05, |
|
|
1.3053e-04, 4.9770e-05], |
|
|
[ 2.7776e-05, 7.6175e-05, 3.6860e-04, ..., 1.9467e-04, |
|
|
5.1785e-04, 1.9753e-04], |
|
|
..., |
|
|
[-1.8711e-03, -5.1308e-03, -2.4826e-02, ..., -1.3115e-02, |
|
|
-3.4882e-02, -1.3306e-02], |
|
|
[ 6.5006e-07, 1.8254e-06, 8.8066e-06, ..., 4.6603e-06, |
|
|
1.2383e-05, 4.7237e-06], |
|
|
[ 8.1956e-08, 2.2165e-07, 1.0766e-06, ..., 5.6811e-07, |
|
|
1.5125e-06, 5.7742e-07]], device='cuda:0') |
|
|
Epoch 30, bias, value: tensor([ 0.3038, -0.1232, 0.0082, -0.0091, 0.5983, -0.1606, -0.6240], |
|
|
device='cuda:0'), grad: tensor([ 7.3120e-02, 2.8968e-04, 1.1492e-03, 2.8114e-03, -7.7393e-02, |
|
|
2.7552e-05, 3.3565e-06], device='cuda:0') |
|
|
249 |
|
|
0.006330184227833378 |
|
|
changing lr |
|
|
epoch 29, time 347.50, cls_loss 0.0666 cls_loss_mapping 0.3109 cls_loss_causal 1.0087 re_mapping 0.0451 re_causal 0.1122 |
|
|
Epoch 31, weight, value: tensor([[ 0.0241, 0.0298, -0.0359, ..., -0.0724, -0.1209, -0.0915], |
|
|
[-0.0386, -0.0409, -0.0225, ..., -0.0254, -0.0200, -0.0071], |
|
|
[ 0.0206, -0.0218, 0.0393, ..., -0.0944, -0.0911, -0.0786], |
|
|
..., |
|
|
[-0.0044, -0.0350, 0.0488, ..., -0.0263, -0.0352, -0.0333], |
|
|
[-0.0110, -0.0247, -0.0016, ..., -0.0572, 0.0027, -0.0592], |
|
|
[ 0.0708, 0.2078, 0.0205, ..., 0.1016, 0.0490, 0.1235]], |
|
|
device='cuda:0'), grad: tensor([[ 2.9802e-08, 3.3528e-07, 2.7847e-06, ..., 2.5500e-06, |
|
|
4.0308e-06, 2.2575e-06], |
|
|
[ 8.9407e-08, 4.4703e-08, 4.9360e-07, ..., 1.6578e-07, |
|
|
4.1164e-07, 1.8440e-07], |
|
|
[ 6.7055e-08, 6.3330e-08, 6.0722e-07, ..., 3.5018e-07, |
|
|
6.5938e-07, 3.3528e-07], |
|
|
..., |
|
|
[ 1.2107e-07, -2.7753e-07, -2.0042e-06, ..., -2.3581e-06, |
|
|
-3.4366e-06, -1.9912e-06], |
|
|
[-3.6135e-07, -2.0303e-07, -2.2780e-06, ..., -8.9034e-07, |
|
|
-2.0526e-06, -9.7975e-07], |
|
|
[ 4.6566e-08, 2.7940e-08, 2.9802e-07, ..., 1.2666e-07, |
|
|
2.7753e-07, 1.3597e-07]], device='cuda:0') |
|
|
Epoch 31, bias, value: tensor([ 0.3060, -0.1185, 0.0095, -0.0269, 0.5951, -0.1523, -0.6192], |
|
|
device='cuda:0'), grad: tensor([ 5.0515e-05, 3.3751e-06, 6.9812e-06, 1.1921e-06, -4.6074e-05, |
|
|
-1.8597e-05, 2.5593e-06], device='cuda:0') |
|
|
249 |
|
|
0.006112604669781575 |
|
|
changing lr |
|
|
epoch 30, time 348.46, cls_loss 0.0678 cls_loss_mapping 0.3041 cls_loss_causal 1.0036 re_mapping 0.0422 re_causal 0.0991 |
|
|
Epoch 32, weight, value: tensor([[ 0.0240, 0.0295, -0.0348, ..., -0.0705, -0.1179, -0.0899], |
|
|
[-0.0389, -0.0411, -0.0259, ..., -0.0262, -0.0234, -0.0080], |
|
|
[ 0.0193, -0.0227, 0.0352, ..., -0.0947, -0.0946, -0.0793], |
|
|
..., |
|
|
[-0.0031, -0.0331, 0.0529, ..., -0.0269, -0.0323, -0.0332], |
|
|
[-0.0111, -0.0248, -0.0027, ..., -0.0571, 0.0015, -0.0591], |
|
|
[ 0.0704, 0.2064, 0.0204, ..., 0.1008, 0.0485, 0.1226]], |
|
|
device='cuda:0'), grad: tensor([[ 5.2303e-06, 1.8626e-09, 7.0512e-05, ..., 1.5959e-05, |
|
|
4.7475e-05, 1.5870e-05], |
|
|
[ 1.6205e-07, 3.7253e-09, 2.1849e-06, ..., 4.0792e-07, |
|
|
1.4659e-06, 4.2841e-07], |
|
|
[ 3.4273e-07, 0.0000e+00, 4.6194e-06, ..., 1.0468e-06, |
|
|
3.1106e-06, 1.0412e-06], |
|
|
..., |
|
|
[-6.1430e-06, -5.5879e-09, -8.2850e-05, ..., -1.8671e-05, |
|
|
-5.5790e-05, -1.8582e-05], |
|
|
[ 6.1467e-08, 0.0000e+00, 8.2888e-07, ..., 1.8813e-07, |
|
|
5.5879e-07, 1.8626e-07], |
|
|
[ 1.0431e-07, 0.0000e+00, 1.4119e-06, ..., 3.2037e-07, |
|
|
9.5181e-07, 3.1851e-07]], device='cuda:0') |
|
|
Epoch 32, bias, value: tensor([ 0.2986, -0.1310, 0.0074, -0.0110, 0.5969, -0.1544, -0.6129], |
|
|
device='cuda:0'), grad: tensor([ 8.8167e-04, 2.6435e-05, 5.7757e-05, 4.0114e-05, -1.0347e-03, |
|
|
1.0364e-05, 1.7658e-05], device='cuda:0') |
|
|
249 |
|
|
0.005892784473993186 |
|
|
changing lr |
|
|
epoch 31, time 347.30, cls_loss 0.0762 cls_loss_mapping 0.2870 cls_loss_causal 0.9118 re_mapping 0.0449 re_causal 0.0952 |
|
|
Epoch 33, weight, value: tensor([[ 0.0234, 0.0290, -0.0348, ..., -0.0716, -0.1188, -0.0907], |
|
|
[-0.0386, -0.0408, -0.0268, ..., -0.0260, -0.0243, -0.0082], |
|
|
[ 0.0210, -0.0213, 0.0404, ..., -0.0924, -0.0888, -0.0774], |
|
|
..., |
|
|
[-0.0047, -0.0339, 0.0466, ..., -0.0284, -0.0371, -0.0344], |
|
|
[-0.0111, -0.0246, -0.0016, ..., -0.0565, 0.0029, -0.0583], |
|
|
[ 0.0698, 0.2048, 0.0199, ..., 0.0998, 0.0478, 0.1215]], |
|
|
device='cuda:0'), grad: tensor([[ 1.2469e-04, 3.1799e-05, 1.4935e-03, ..., 7.4530e-04, |
|
|
1.5926e-03, 7.6580e-04], |
|
|
[ 1.1921e-04, 3.4094e-05, 1.4572e-03, ..., 7.0524e-04, |
|
|
1.5268e-03, 7.2336e-04], |
|
|
[ 1.2279e-04, 3.2246e-05, 1.4772e-03, ..., 7.3195e-04, |
|
|
1.5697e-03, 7.5197e-04], |
|
|
..., |
|
|
[-4.2677e-04, -1.1748e-04, -5.1804e-03, ..., -2.5349e-03, |
|
|
-5.4626e-03, -2.6016e-03], |
|
|
[ 2.8372e-05, 9.4175e-06, 3.5691e-04, ..., 1.6570e-04, |
|
|
3.6502e-04, 1.6952e-04], |
|
|
[ 9.0078e-06, 2.6394e-06, 1.1051e-04, ..., 5.3346e-05, |
|
|
1.1563e-04, 5.4717e-05]], device='cuda:0') |
|
|
Epoch 33, bias, value: tensor([ 0.3002, -0.1302, 0.0179, 0.0061, 0.5638, -0.1528, -0.6115], |
|
|
device='cuda:0'), grad: tensor([ 0.0169, 0.0172, 0.0169, 0.0035, -0.0601, 0.0044, 0.0013], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.00567116632908828 |
|
|
changing lr |
|
|
epoch 32, time 344.88, cls_loss 0.0540 cls_loss_mapping 0.2908 cls_loss_causal 0.9360 re_mapping 0.0471 re_causal 0.0968 |
|
|
Epoch 34, weight, value: tensor([[ 0.0228, 0.0284, -0.0369, ..., -0.0712, -0.1194, -0.0903], |
|
|
[-0.0384, -0.0407, -0.0253, ..., -0.0263, -0.0227, -0.0082], |
|
|
[ 0.0209, -0.0210, 0.0396, ..., -0.0922, -0.0895, -0.0773], |
|
|
..., |
|
|
[-0.0041, -0.0327, 0.0500, ..., -0.0258, -0.0327, -0.0321], |
|
|
[-0.0109, -0.0245, -0.0009, ..., -0.0557, 0.0040, -0.0574], |
|
|
[ 0.0694, 0.2034, 0.0198, ..., 0.0990, 0.0474, 0.1206]], |
|
|
device='cuda:0'), grad: tensor([[-2.0489e-07, -1.2107e-07, -1.1250e-06, ..., -2.6077e-07, |
|
|
-7.8417e-07, -2.5891e-07], |
|
|
[ 2.7940e-08, 2.0489e-08, 1.2480e-07, ..., 3.3528e-08, |
|
|
8.9407e-08, 3.3528e-08], |
|
|
[-1.8999e-07, -1.6950e-07, -4.6194e-07, ..., -1.8440e-07, |
|
|
-3.6508e-07, -1.8626e-07], |
|
|
..., |
|
|
[ 3.1851e-07, 2.3097e-07, 1.3150e-06, ..., 3.6135e-07, |
|
|
9.4995e-07, 3.6322e-07], |
|
|
[ 2.4214e-08, 2.0489e-08, 7.0781e-08, ..., 2.4214e-08, |
|
|
5.4017e-08, 2.4214e-08], |
|
|
[ 1.6764e-08, 1.4901e-08, 5.5879e-08, ..., 1.8626e-08, |
|
|
4.2841e-08, 1.8626e-08]], device='cuda:0') |
|
|
Epoch 34, bias, value: tensor([ 0.2845, -0.1277, 0.0171, -0.0108, 0.5882, -0.1503, -0.6074], |
|
|
device='cuda:0'), grad: tensor([-8.8513e-06, 9.9279e-07, -3.8035e-06, 1.6205e-07, 1.0483e-05, |
|
|
5.7369e-07, 4.5635e-07], device='cuda:0') |
|
|
249 |
|
|
0.00544819654451717 |
|
|
changing lr |
|
|
epoch 33, time 347.79, cls_loss 0.0434 cls_loss_mapping 0.2676 cls_loss_causal 0.8887 re_mapping 0.0472 re_causal 0.0963 |
|
|
Epoch 35, weight, value: tensor([[ 0.0221, 0.0274, -0.0383, ..., -0.0715, -0.1193, -0.0896], |
|
|
[-0.0379, -0.0402, -0.0244, ..., -0.0261, -0.0218, -0.0081], |
|
|
[ 0.0206, -0.0210, 0.0386, ..., -0.0917, -0.0898, -0.0771], |
|
|
..., |
|
|
[-0.0036, -0.0317, 0.0507, ..., -0.0256, -0.0323, -0.0328], |
|
|
[-0.0106, -0.0242, 0.0007, ..., -0.0543, 0.0056, -0.0559], |
|
|
[ 0.0689, 0.2020, 0.0196, ..., 0.0982, 0.0469, 0.1197]], |
|
|
device='cuda:0'), grad: tensor([[ 3.2410e-07, 2.4214e-07, 5.4874e-06, ..., 4.0196e-06, |
|
|
6.5789e-06, 2.8349e-06], |
|
|
[ 1.8626e-09, 1.8626e-09, 2.6077e-08, ..., 1.6764e-08, |
|
|
2.7940e-08, 1.1176e-08], |
|
|
[ 3.3528e-08, 2.4214e-08, 5.6624e-07, ..., 4.1351e-07, |
|
|
6.7614e-07, 2.9057e-07], |
|
|
..., |
|
|
[-3.6322e-07, -2.7195e-07, -6.1877e-06, ..., -4.5486e-06, |
|
|
-7.4320e-06, -3.2056e-06], |
|
|
[-3.7253e-09, -0.0000e+00, -1.8626e-09, ..., 1.6764e-08, |
|
|
1.4901e-08, 1.1176e-08], |
|
|
[ 3.7253e-09, 1.8626e-09, 4.4703e-08, ..., 3.1665e-08, |
|
|
5.2154e-08, 2.2352e-08]], device='cuda:0') |
|
|
Epoch 35, bias, value: tensor([ 0.2687, -0.1285, 0.0204, -0.0102, 0.5901, -0.1419, -0.6049], |
|
|
device='cuda:0'), grad: tensor([ 5.6118e-05, 2.5332e-07, 5.7742e-06, 5.8673e-07, -6.3360e-05, |
|
|
7.8231e-08, 4.5076e-07], device='cuda:0') |
|
|
249 |
|
|
0.005224324151752577 |
|
|
changing lr |
|
|
---------------------saving model at epoch 34---------------------------------------------------- |
|
|
epoch 34, time 349.39, cls_loss 0.0430 cls_loss_mapping 0.2367 cls_loss_causal 0.8582 re_mapping 0.0450 re_causal 0.0985 |
|
|
Epoch 36, weight, value: tensor([[ 0.0217, 0.0271, -0.0392, ..., -0.0714, -0.1198, -0.0895], |
|
|
[-0.0401, -0.0425, -0.0308, ..., -0.0291, -0.0304, -0.0152], |
|
|
[ 0.0203, -0.0210, 0.0371, ..., -0.0920, -0.0909, -0.0774], |
|
|
..., |
|
|
[-0.0033, -0.0313, 0.0507, ..., -0.0255, -0.0322, -0.0326], |
|
|
[-0.0104, -0.0240, 0.0017, ..., -0.0535, 0.0067, -0.0551], |
|
|
[ 0.0708, 0.2032, 0.0263, ..., 0.1006, 0.0557, 0.1260]], |
|
|
device='cuda:0'), grad: tensor([[ 2.0303e-07, 3.7253e-09, 3.2932e-06, ..., 1.4752e-06, |
|
|
3.1497e-06, 9.6299e-07], |
|
|
[ 0.0000e+00, 0.0000e+00, 9.3132e-09, ..., 3.7253e-09, |
|
|
7.4506e-09, 1.8626e-09], |
|
|
[ 5.5879e-09, -1.6764e-08, 3.4459e-07, ..., 1.5274e-07, |
|
|
3.3528e-07, 9.6858e-08], |
|
|
..., |
|
|
[-2.3469e-07, -3.7253e-09, -3.8296e-06, ..., -1.7155e-06, |
|
|
-3.6620e-06, -1.1194e-06], |
|
|
[ 1.8626e-08, 1.4901e-08, 8.7544e-08, ..., 4.0978e-08, |
|
|
7.8231e-08, 2.9802e-08], |
|
|
[ 3.7253e-09, 0.0000e+00, 3.7253e-08, ..., 1.6764e-08, |
|
|
3.5390e-08, 1.1176e-08]], device='cuda:0') |
|
|
Epoch 36, bias, value: tensor([ 0.2641, -0.1327, 0.0109, -0.0064, 0.5864, -0.1347, -0.5939], |
|
|
device='cuda:0'), grad: tensor([ 2.8789e-05, 7.4506e-08, 3.0044e-06, 4.7125e-07, -3.3468e-05, |
|
|
7.7486e-07, 3.1851e-07], device='cuda:0') |
|
|
249 |
|
|
0.005000000000000003 |
|
|
changing lr |
|
|
epoch 35, time 348.58, cls_loss 0.0310 cls_loss_mapping 0.2276 cls_loss_causal 0.8888 re_mapping 0.0457 re_causal 0.1002 |
|
|
Epoch 37, weight, value: tensor([[ 0.0220, 0.0273, -0.0368, ..., -0.0704, -0.1163, -0.0880], |
|
|
[-0.0400, -0.0422, -0.0303, ..., -0.0289, -0.0301, -0.0155], |
|
|
[ 0.0201, -0.0209, 0.0377, ..., -0.0911, -0.0898, -0.0768], |
|
|
..., |
|
|
[-0.0037, -0.0315, 0.0476, ..., -0.0260, -0.0347, -0.0332], |
|
|
[-0.0104, -0.0239, 0.0009, ..., -0.0536, 0.0057, -0.0551], |
|
|
[ 0.0705, 0.2020, 0.0262, ..., 0.1000, 0.0555, 0.1256]], |
|
|
device='cuda:0'), grad: tensor([[ 3.0566e-06, 4.5821e-06, 3.4451e-05, ..., 3.8564e-05, |
|
|
5.6267e-05, 3.7909e-05], |
|
|
[-8.5682e-08, -3.3528e-08, 6.5193e-08, ..., 5.1409e-07, |
|
|
5.1036e-07, 4.9546e-07], |
|
|
[ 5.9605e-08, 1.0431e-07, 9.4622e-07, ..., 1.1232e-06, |
|
|
1.6093e-06, 1.1008e-06], |
|
|
..., |
|
|
[ 5.9232e-06, 8.9034e-06, 6.6996e-05, ..., 7.5161e-05, |
|
|
1.0961e-04, 7.3850e-05], |
|
|
[ 3.1758e-06, 2.3935e-06, 1.6183e-05, ..., 1.4700e-05, |
|
|
2.1026e-05, 1.3165e-05], |
|
|
[ 7.0781e-07, 7.9162e-07, 5.7667e-06, ..., 6.0536e-06, |
|
|
8.8066e-06, 5.8040e-06]], device='cuda:0') |
|
|
Epoch 37, bias, value: tensor([ 0.2656, -0.1274, 0.0195, -0.0015, 0.5698, -0.1398, -0.5924], |
|
|
device='cuda:0'), grad: tensor([ 3.1233e-04, 7.2457e-07, 8.8885e-06, -1.0853e-03, 6.0797e-04, |
|
|
1.0645e-04, 4.7445e-05], device='cuda:0') |
|
|
249 |
|
|
0.004775675848247429 |
|
|
changing lr |
|
|
---------------------saving model at epoch 36---------------------------------------------------- |
|
|
epoch 36, time 353.79, cls_loss 0.0258 cls_loss_mapping 0.2135 cls_loss_causal 0.8457 re_mapping 0.0440 re_causal 0.0889 |
|
|
Epoch 38, weight, value: tensor([[ 0.0216, 0.0268, -0.0387, ..., -0.0706, -0.1180, -0.0882], |
|
|
[-0.0392, -0.0414, -0.0269, ..., -0.0278, -0.0259, -0.0145], |
|
|
[ 0.0201, -0.0205, 0.0382, ..., -0.0908, -0.0885, -0.0765], |
|
|
..., |
|
|
[-0.0037, -0.0314, 0.0478, ..., -0.0250, -0.0340, -0.0322], |
|
|
[-0.0105, -0.0239, -0.0002, ..., -0.0537, 0.0045, -0.0552], |
|
|
[ 0.0700, 0.2008, 0.0257, ..., 0.0992, 0.0547, 0.1247]], |
|
|
device='cuda:0'), grad: tensor([[ 1.7226e-04, 2.0072e-05, 1.4858e-03, ..., 5.2261e-04, |
|
|
1.4391e-03, 5.3930e-04], |
|
|
[ 3.0667e-05, 3.6322e-06, 2.6464e-04, ..., 9.3102e-05, |
|
|
2.5630e-04, 9.6083e-05], |
|
|
[ 5.8323e-05, 6.7316e-06, 5.0306e-04, ..., 1.7703e-04, |
|
|
4.8757e-04, 1.8275e-04], |
|
|
..., |
|
|
[ 1.9684e-03, 2.2709e-04, 1.6953e-02, ..., 5.9662e-03, |
|
|
1.6418e-02, 6.1569e-03], |
|
|
[-2.4223e-03, -2.7990e-04, -2.0889e-02, ..., -7.3471e-03, |
|
|
-2.0233e-02, -7.5836e-03], |
|
|
[ 2.8446e-05, 3.3136e-06, 2.4533e-04, ..., 8.6308e-05, |
|
|
2.3758e-04, 8.9049e-05]], device='cuda:0') |
|
|
Epoch 38, bias, value: tensor([ 0.2553, -0.1169, 0.0185, -0.0010, 0.5754, -0.1466, -0.5910], |
|
|
device='cuda:0'), grad: tensor([ 0.0071, 0.0013, 0.0024, 0.0068, 0.0804, -0.0991, 0.0012], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.004551803455482836 |
|
|
changing lr |
|
|
epoch 37, time 345.70, cls_loss 0.0392 cls_loss_mapping 0.2031 cls_loss_causal 0.7827 re_mapping 0.0426 re_causal 0.0847 |
|
|
Epoch 39, weight, value: tensor([[ 0.0215, 0.0267, -0.0387, ..., -0.0700, -0.1176, -0.0875], |
|
|
[-0.0390, -0.0412, -0.0273, ..., -0.0277, -0.0263, -0.0145], |
|
|
[ 0.0200, -0.0202, 0.0388, ..., -0.0900, -0.0872, -0.0757], |
|
|
..., |
|
|
[-0.0041, -0.0317, 0.0457, ..., -0.0263, -0.0372, -0.0337], |
|
|
[-0.0103, -0.0238, 0.0006, ..., -0.0531, 0.0052, -0.0546], |
|
|
[ 0.0696, 0.1996, 0.0253, ..., 0.0986, 0.0543, 0.1239]], |
|
|
device='cuda:0'), grad: tensor([[-5.1069e-04, -5.7316e-04, -1.7967e-03, ..., -6.3467e-04, |
|
|
-1.9684e-03, -6.0940e-04], |
|
|
[-2.6338e-06, -2.0098e-06, -2.4334e-05, ..., -3.8091e-06, |
|
|
-2.3484e-05, -4.0159e-06], |
|
|
[ 3.3885e-05, 1.1325e-04, 6.0272e-04, ..., 9.6262e-05, |
|
|
7.1335e-04, 1.5843e-04], |
|
|
..., |
|
|
[ 4.6396e-04, 4.4703e-04, 1.1797e-03, ..., 5.2500e-04, |
|
|
1.2379e-03, 4.4084e-04], |
|
|
[-1.1455e-07, -2.5146e-08, -4.9360e-07, ..., -8.6613e-08, |
|
|
-5.4948e-07, -1.2945e-07], |
|
|
[ 1.5736e-05, 1.5169e-05, 3.9428e-05, ..., 1.7777e-05, |
|
|
4.1455e-05, 1.4901e-05]], device='cuda:0') |
|
|
Epoch 39, bias, value: tensor([ 0.2524, -0.1184, 0.0241, 0.0039, 0.5624, -0.1418, -0.5888], |
|
|
device='cuda:0'), grad: tensor([-5.0392e-03, -7.8619e-05, 2.1935e-03, 2.6729e-06, 2.8324e-03, |
|
|
-8.0094e-07, 9.3877e-05], device='cuda:0') |
|
|
249 |
|
|
0.004328833670911726 |
|
|
changing lr |
|
|
---------------------saving model at epoch 38---------------------------------------------------- |
|
|
epoch 38, time 348.97, cls_loss 0.0203 cls_loss_mapping 0.1966 cls_loss_causal 0.8061 re_mapping 0.0423 re_causal 0.0850 |
|
|
Epoch 40, weight, value: tensor([[ 0.0215, 0.0266, -0.0386, ..., -0.0695, -0.1169, -0.0868], |
|
|
[-0.0386, -0.0406, -0.0256, ..., -0.0275, -0.0248, -0.0143], |
|
|
[ 0.0196, -0.0203, 0.0365, ..., -0.0902, -0.0895, -0.0764], |
|
|
..., |
|
|
[-0.0039, -0.0316, 0.0467, ..., -0.0254, -0.0348, -0.0325], |
|
|
[-0.0105, -0.0238, -0.0011, ..., -0.0535, 0.0031, -0.0552], |
|
|
[ 0.0690, 0.1984, 0.0247, ..., 0.0979, 0.0535, 0.1230]], |
|
|
device='cuda:0'), grad: tensor([[-7.0781e-08, -1.9558e-08, -9.7137e-07, ..., -4.9174e-07, |
|
|
-1.0999e-06, -4.7404e-07], |
|
|
[ 1.1176e-08, 9.3132e-10, 9.7789e-08, ..., 2.5146e-08, |
|
|
1.0058e-07, 2.6077e-08], |
|
|
[ 1.3039e-08, 2.7940e-09, 1.3132e-07, ..., 4.0978e-08, |
|
|
1.3690e-07, 4.0978e-08], |
|
|
..., |
|
|
[ 7.5437e-08, 1.6764e-08, 8.1304e-07, ..., 2.9150e-07, |
|
|
8.6334e-07, 2.8964e-07], |
|
|
[-1.0058e-07, -1.3970e-08, -7.5065e-07, ..., -4.4703e-08, |
|
|
-6.9104e-07, -6.6124e-08], |
|
|
[ 3.4459e-08, 5.5879e-09, 3.0361e-07, ..., 6.6124e-08, |
|
|
3.0268e-07, 6.9849e-08]], device='cuda:0') |
|
|
Epoch 40, bias, value: tensor([ 0.2518, -0.1152, 0.0227, 0.0056, 0.5624, -0.1456, -0.5879], |
|
|
device='cuda:0'), grad: tensor([-6.8396e-06, 5.5786e-07, 8.0466e-07, 2.2724e-06, 5.1595e-06, |
|
|
-3.6880e-06, 1.7229e-06], device='cuda:0') |
|
|
249 |
|
|
0.0041072155260068206 |
|
|
changing lr |
|
|
epoch 39, time 348.75, cls_loss 0.0249 cls_loss_mapping 0.1840 cls_loss_causal 0.8273 re_mapping 0.0398 re_causal 0.0809 |
|
|
Epoch 41, weight, value: tensor([[ 0.0218, 0.0268, -0.0371, ..., -0.0690, -0.1148, -0.0860], |
|
|
[-0.0383, -0.0403, -0.0250, ..., -0.0273, -0.0241, -0.0142], |
|
|
[ 0.0195, -0.0203, 0.0359, ..., -0.0899, -0.0896, -0.0763], |
|
|
..., |
|
|
[-0.0044, -0.0319, 0.0448, ..., -0.0254, -0.0368, -0.0328], |
|
|
[-0.0102, -0.0233, 0.0007, ..., -0.0526, 0.0053, -0.0540], |
|
|
[ 0.0687, 0.1974, 0.0243, ..., 0.0973, 0.0529, 0.1223]], |
|
|
device='cuda:0'), grad: tensor([[2.2352e-06, 6.1840e-07, 1.8418e-05, ..., 1.3851e-05, 2.5854e-05, |
|
|
1.2666e-05], |
|
|
[2.2817e-07, 7.8231e-08, 1.3914e-06, ..., 8.5030e-07, 1.7053e-06, |
|
|
8.2236e-07], |
|
|
[1.6764e-05, 5.5805e-06, 7.7784e-05, ..., 3.8773e-05, 8.3685e-05, |
|
|
3.9458e-05], |
|
|
..., |
|
|
[8.1360e-06, 2.7195e-06, 3.8117e-05, ..., 1.9133e-05, 4.1217e-05, |
|
|
1.9446e-05], |
|
|
[4.0419e-06, 1.3923e-06, 2.1443e-05, ..., 1.1943e-05, 2.4766e-05, |
|
|
1.1817e-05], |
|
|
[3.3919e-06, 1.2238e-06, 1.7583e-05, ..., 9.4399e-06, 1.9893e-05, |
|
|
9.4622e-06]], device='cuda:0') |
|
|
Epoch 41, bias, value: tensor([ 2.4937e-01, -1.1424e-01, 2.2541e-02, 4.9674e-04, 5.6182e-01, |
|
|
-1.4016e-01, -5.8597e-01], device='cuda:0'), grad: tensor([ 1.0884e-04, 8.0839e-06, 3.7861e-04, -8.8835e-04, 1.8752e-04, |
|
|
1.1289e-04, 9.2149e-05], device='cuda:0') |
|
|
249 |
|
|
0.0038873953302184317 |
|
|
changing lr |
|
|
epoch 40, time 347.58, cls_loss 0.0184 cls_loss_mapping 0.1822 cls_loss_causal 0.8379 re_mapping 0.0405 re_causal 0.0762 |
|
|
Epoch 42, weight, value: tensor([[ 0.0217, 0.0267, -0.0374, ..., -0.0689, -0.1149, -0.0858], |
|
|
[-0.0381, -0.0401, -0.0244, ..., -0.0270, -0.0234, -0.0139], |
|
|
[ 0.0194, -0.0202, 0.0359, ..., -0.0894, -0.0891, -0.0759], |
|
|
..., |
|
|
[-0.0035, -0.0310, 0.0476, ..., -0.0241, -0.0331, -0.0314], |
|
|
[-0.0109, -0.0238, -0.0021, ..., -0.0534, 0.0022, -0.0548], |
|
|
[ 0.0683, 0.1964, 0.0239, ..., 0.0967, 0.0524, 0.1216]], |
|
|
device='cuda:0'), grad: tensor([[ 9.3132e-10, 9.3132e-10, 1.3970e-08, ..., 4.6566e-09, |
|
|
1.3970e-08, 4.6566e-09], |
|
|
[-1.8626e-09, -2.7940e-09, -2.7008e-08, ..., -8.3819e-09, |
|
|
-2.6077e-08, -8.3819e-09], |
|
|
[ 0.0000e+00, 0.0000e+00, 2.7940e-09, ..., 9.3132e-10, |
|
|
3.7253e-09, 9.3132e-10], |
|
|
..., |
|
|
[ 1.2107e-08, 5.5879e-09, 9.8720e-08, ..., 3.0734e-08, |
|
|
1.0617e-07, 4.3772e-08], |
|
|
[-1.5832e-08, -6.5193e-09, -1.2200e-07, ..., -3.8184e-08, |
|
|
-1.3225e-07, -5.5879e-08], |
|
|
[ 1.8626e-09, 9.3132e-10, 1.7695e-08, ..., 5.5879e-09, |
|
|
1.9558e-08, 8.3819e-09]], device='cuda:0') |
|
|
Epoch 42, bias, value: tensor([ 2.4822e-01, -1.1041e-01, 2.3572e-02, -2.5686e-04, 5.6161e-01, |
|
|
-1.4442e-01, -5.8441e-01], device='cuda:0'), grad: tensor([ 1.2759e-07, -2.8498e-07, 1.4901e-08, 7.6368e-08, 5.7649e-07, |
|
|
-5.9418e-07, 9.1270e-08], device='cuda:0') |
|
|
249 |
|
|
0.003669815772166629 |
|
|
changing lr |
|
|
epoch 41, time 349.76, cls_loss 0.0273 cls_loss_mapping 0.1802 cls_loss_causal 0.8285 re_mapping 0.0382 re_causal 0.0830 |
|
|
Epoch 43, weight, value: tensor([[ 0.0215, 0.0264, -0.0379, ..., -0.0689, -0.1151, -0.0857], |
|
|
[-0.0373, -0.0394, -0.0207, ..., -0.0266, -0.0203, -0.0134], |
|
|
[ 0.0193, -0.0201, 0.0359, ..., -0.0890, -0.0885, -0.0755], |
|
|
..., |
|
|
[-0.0041, -0.0313, 0.0442, ..., -0.0241, -0.0355, -0.0316], |
|
|
[-0.0105, -0.0235, 0.0002, ..., -0.0525, 0.0048, -0.0537], |
|
|
[ 0.0679, 0.1955, 0.0237, ..., 0.0962, 0.0519, 0.1210]], |
|
|
device='cuda:0'), grad: tensor([[ 1.7416e-07, 8.6613e-08, 3.6266e-06, ..., 8.9873e-07, |
|
|
3.3863e-06, 7.4785e-07], |
|
|
[-5.5879e-09, -0.0000e+00, -8.6613e-08, ..., -4.6566e-09, |
|
|
-6.0536e-08, -2.7940e-09], |
|
|
[ 3.7253e-09, 9.3132e-10, 1.0710e-07, ..., 2.6077e-08, |
|
|
1.1269e-07, 3.3528e-08], |
|
|
..., |
|
|
[-1.6857e-07, -8.7544e-08, -3.5223e-06, ..., -8.9034e-07, |
|
|
-3.2913e-06, -7.2736e-07], |
|
|
[-5.5879e-09, 9.3132e-10, -2.1793e-07, ..., -5.3085e-08, |
|
|
-2.4587e-07, -8.1956e-08], |
|
|
[ 2.7940e-09, 0.0000e+00, 8.1025e-08, ..., 1.9558e-08, |
|
|
8.7544e-08, 2.7940e-08]], device='cuda:0') |
|
|
Epoch 43, bias, value: tensor([ 0.2411, -0.1017, 0.0264, -0.0056, 0.5517, -0.1354, -0.5827], |
|
|
device='cuda:0'), grad: tensor([ 2.3931e-05, -6.6031e-07, 6.8638e-07, 8.9407e-08, -2.3156e-05, |
|
|
-1.3653e-06, 5.1130e-07], device='cuda:0') |
|
|
249 |
|
|
0.0034549150281252667 |
|
|
changing lr |
|
|
epoch 42, time 349.43, cls_loss 0.0260 cls_loss_mapping 0.1887 cls_loss_causal 0.7472 re_mapping 0.0344 re_causal 0.0714 |
|
|
Epoch 44, weight, value: tensor([[ 0.0212, 0.0261, -0.0394, ..., -0.0695, -0.1166, -0.0861], |
|
|
[-0.0372, -0.0394, -0.0214, ..., -0.0270, -0.0215, -0.0139], |
|
|
[ 0.0194, -0.0199, 0.0365, ..., -0.0884, -0.0874, -0.0750], |
|
|
..., |
|
|
[-0.0035, -0.0308, 0.0474, ..., -0.0223, -0.0309, -0.0298], |
|
|
[-0.0106, -0.0235, -0.0005, ..., -0.0525, 0.0040, -0.0537], |
|
|
[ 0.0676, 0.1946, 0.0233, ..., 0.0957, 0.0514, 0.1203]], |
|
|
device='cuda:0'), grad: tensor([[-1.3959e-04, -9.1136e-05, -1.1940e-03, ..., -1.2386e-04, |
|
|
-1.0395e-03, -1.3590e-04], |
|
|
[ 1.1966e-05, 1.1869e-05, 1.3447e-04, ..., 1.7852e-05, |
|
|
1.3018e-04, 1.9625e-05], |
|
|
[ 2.1867e-06, 1.9129e-06, 2.2575e-05, ..., 2.8014e-06, |
|
|
2.1234e-05, 3.0827e-06], |
|
|
..., |
|
|
[ 1.2386e-04, 7.6175e-05, 1.0223e-03, ..., 1.0151e-04, |
|
|
8.7500e-04, 1.1128e-04], |
|
|
[ 5.2806e-07, 4.9081e-07, 5.6736e-06, ..., 7.2829e-07, |
|
|
5.4091e-06, 8.0094e-07], |
|
|
[ 8.0373e-07, 5.4482e-07, 7.0147e-06, ..., 7.4971e-07, |
|
|
6.1691e-06, 8.2050e-07]], device='cuda:0') |
|
|
Epoch 44, bias, value: tensor([ 2.2749e-01, -1.0423e-01, 3.1431e-02, 1.9333e-04, 5.5837e-01, |
|
|
-1.3793e-01, -5.8138e-01], device='cuda:0'), grad: tensor([-5.3062e-03, 6.4611e-04, 1.0616e-04, 1.3657e-05, 4.4861e-03, |
|
|
2.6956e-05, 3.1412e-05], device='cuda:0') |
|
|
249 |
|
|
0.0032431258795932905 |
|
|
changing lr |
|
|
epoch 43, time 347.80, cls_loss 0.0220 cls_loss_mapping 0.1586 cls_loss_causal 0.7720 re_mapping 0.0327 re_causal 0.0697 |
|
|
Epoch 45, weight, value: tensor([[ 0.0212, 0.0260, -0.0388, ..., -0.0691, -0.1155, -0.0856], |
|
|
[-0.0370, -0.0391, -0.0209, ..., -0.0267, -0.0210, -0.0137], |
|
|
[ 0.0193, -0.0198, 0.0367, ..., -0.0879, -0.0868, -0.0746], |
|
|
..., |
|
|
[-0.0036, -0.0307, 0.0466, ..., -0.0223, -0.0314, -0.0298], |
|
|
[-0.0106, -0.0234, -0.0010, ..., -0.0529, 0.0031, -0.0540], |
|
|
[ 0.0673, 0.1938, 0.0231, ..., 0.0952, 0.0510, 0.1198]], |
|
|
device='cuda:0'), grad: tensor([[ 4.1910e-08, 6.5193e-09, 2.7753e-07, ..., 1.2107e-08, |
|
|
2.2911e-07, 4.1910e-08], |
|
|
[ 6.3330e-08, 9.3132e-09, 4.2841e-07, ..., 1.8626e-08, |
|
|
3.5483e-07, 6.6124e-08], |
|
|
[ 7.0781e-08, 7.4506e-09, 4.8056e-07, ..., 1.6764e-08, |
|
|
3.9861e-07, 7.2643e-08], |
|
|
..., |
|
|
[ 1.8440e-07, 2.7008e-08, 1.2359e-06, ..., 5.4017e-08, |
|
|
1.0226e-06, 1.8999e-07], |
|
|
[-6.0163e-07, -8.5682e-08, -4.0382e-06, ..., -1.6857e-07, |
|
|
-3.3397e-06, -6.1560e-07], |
|
|
[ 1.5274e-07, 2.2352e-08, 1.0235e-06, ..., 4.5635e-08, |
|
|
8.4843e-07, 1.5832e-07]], device='cuda:0') |
|
|
Epoch 45, bias, value: tensor([ 0.2288, -0.1013, 0.0338, 0.0020, 0.5523, -0.1420, -0.5797], |
|
|
device='cuda:0'), grad: tensor([ 1.2945e-06, 2.0079e-06, 2.1942e-06, 2.7083e-06, 5.8040e-06, |
|
|
-1.8835e-05, 4.7907e-06], device='cuda:0') |
|
|
249 |
|
|
0.0030348748417303863 |
|
|
changing lr |
|
|
epoch 44, time 348.82, cls_loss 0.0162 cls_loss_mapping 0.1627 cls_loss_causal 0.7833 re_mapping 0.0324 re_causal 0.0694 |
|
|
Epoch 46, weight, value: tensor([[ 0.0214, 0.0261, -0.0369, ..., -0.0681, -0.1128, -0.0845], |
|
|
[-0.0369, -0.0390, -0.0210, ..., -0.0267, -0.0211, -0.0138], |
|
|
[ 0.0192, -0.0197, 0.0366, ..., -0.0876, -0.0864, -0.0743], |
|
|
..., |
|
|
[-0.0037, -0.0307, 0.0451, ..., -0.0229, -0.0332, -0.0304], |
|
|
[-0.0105, -0.0233, -0.0010, ..., -0.0527, 0.0031, -0.0538], |
|
|
[ 0.0670, 0.1930, 0.0228, ..., 0.0948, 0.0506, 0.1193]], |
|
|
device='cuda:0'), grad: tensor([[-4.6976e-06, -1.3476e-06, -2.8268e-05, ..., -5.4240e-06, |
|
|
-2.3216e-05, -6.0126e-06], |
|
|
[ 9.7230e-07, 2.8871e-07, 5.8413e-06, ..., 1.1381e-06, |
|
|
4.8243e-06, 1.2657e-06], |
|
|
[ 1.6019e-07, 6.9384e-08, 9.8627e-07, ..., 2.4773e-07, |
|
|
8.8522e-07, 2.7753e-07], |
|
|
..., |
|
|
[ 3.2540e-06, 8.7917e-07, 1.9431e-05, ..., 3.5278e-06, |
|
|
1.5780e-05, 3.9414e-06], |
|
|
[-8.3353e-08, -5.3085e-08, -3.2783e-07, ..., -4.0978e-08, |
|
|
-3.4273e-07, -1.0384e-07], |
|
|
[ 3.0082e-07, 1.1595e-07, 1.7677e-06, ..., 3.8277e-07, |
|
|
1.5311e-06, 4.4005e-07]], device='cuda:0') |
|
|
Epoch 46, bias, value: tensor([ 0.2330, -0.1023, 0.0352, 0.0026, 0.5458, -0.1421, -0.5782], |
|
|
device='cuda:0'), grad: tensor([-1.0890e-04, 2.2680e-05, 4.4368e-06, 2.9523e-06, 7.2658e-05, |
|
|
-9.5367e-07, 7.2941e-06], device='cuda:0') |
|
|
249 |
|
|
0.0028305813044122124 |
|
|
changing lr |
|
|
epoch 45, time 347.20, cls_loss 0.0141 cls_loss_mapping 0.1489 cls_loss_causal 0.7867 re_mapping 0.0319 re_causal 0.0649 |
|
|
Epoch 47, weight, value: tensor([[ 0.0213, 0.0260, -0.0365, ..., -0.0677, -0.1122, -0.0841], |
|
|
[-0.0368, -0.0389, -0.0212, ..., -0.0267, -0.0213, -0.0138], |
|
|
[ 0.0191, -0.0197, 0.0361, ..., -0.0874, -0.0865, -0.0742], |
|
|
..., |
|
|
[-0.0037, -0.0305, 0.0450, ..., -0.0228, -0.0331, -0.0303], |
|
|
[-0.0104, -0.0231, -0.0005, ..., -0.0524, 0.0036, -0.0535], |
|
|
[ 0.0668, 0.1923, 0.0226, ..., 0.0944, 0.0503, 0.1188]], |
|
|
device='cuda:0'), grad: tensor([[ 8.6725e-06, 5.0217e-06, 4.3958e-05, ..., 5.4464e-06, |
|
|
3.4899e-05, 6.7502e-06], |
|
|
[ 7.4506e-07, 4.2841e-07, 3.7719e-06, ..., 4.8149e-07, |
|
|
3.0138e-06, 5.9186e-07], |
|
|
[ 1.7090e-07, 9.6858e-08, 8.6566e-07, ..., 1.2107e-07, |
|
|
7.0408e-07, 1.4529e-07], |
|
|
..., |
|
|
[-9.7156e-06, -5.6326e-06, -4.9174e-05, ..., -5.9083e-06, |
|
|
-3.8832e-05, -7.3835e-06], |
|
|
[-1.1781e-07, -2.3283e-08, -5.2340e-07, ..., -1.5181e-07, |
|
|
-5.5414e-07, -1.5413e-07], |
|
|
[ 1.7742e-07, 7.4971e-08, 8.8243e-07, ..., 2.3749e-07, |
|
|
8.6799e-07, 2.5053e-07]], device='cuda:0') |
|
|
Epoch 47, bias, value: tensor([ 0.2331, -0.1031, 0.0329, 0.0037, 0.5448, -0.1405, -0.5769], |
|
|
device='cuda:0'), grad: tensor([ 1.4853e-04, 1.2763e-05, 2.9393e-06, 2.2631e-07, -1.6594e-04, |
|
|
-1.7229e-06, 3.0603e-06], device='cuda:0') |
|
|
249 |
|
|
0.0026306566876350096 |
|
|
changing lr |
|
|
epoch 46, time 351.40, cls_loss 0.0152 cls_loss_mapping 0.1497 cls_loss_causal 0.7486 re_mapping 0.0335 re_causal 0.0688 |
|
|
Epoch 48, weight, value: tensor([[ 0.0212, 0.0259, -0.0366, ..., -0.0673, -0.1120, -0.0837], |
|
|
[-0.0368, -0.0388, -0.0214, ..., -0.0268, -0.0216, -0.0139], |
|
|
[ 0.0191, -0.0196, 0.0359, ..., -0.0873, -0.0864, -0.0741], |
|
|
..., |
|
|
[-0.0035, -0.0303, 0.0454, ..., -0.0227, -0.0324, -0.0301], |
|
|
[-0.0103, -0.0231, -0.0005, ..., -0.0522, 0.0037, -0.0533], |
|
|
[ 0.0665, 0.1917, 0.0224, ..., 0.0940, 0.0500, 0.1183]], |
|
|
device='cuda:0'), grad: tensor([[-3.7253e-08, -2.8871e-08, -4.2934e-07, ..., -1.4808e-07, |
|
|
-5.2014e-07, -1.5087e-07], |
|
|
[ 3.4925e-08, 2.1886e-08, 3.6834e-07, ..., 1.2806e-07, |
|
|
4.2701e-07, 1.3644e-07], |
|
|
[ 6.6590e-08, 2.9802e-08, 4.0792e-07, ..., 1.6857e-07, |
|
|
4.2701e-07, 1.7509e-07], |
|
|
..., |
|
|
[-6.9849e-08, -3.3993e-08, -4.3446e-07, ..., -2.0536e-07, |
|
|
-4.4284e-07, -2.0489e-07], |
|
|
[-2.5611e-08, 1.3970e-09, -1.7043e-07, ..., -7.9162e-09, |
|
|
-1.7602e-07, -4.1910e-08], |
|
|
[ 1.8161e-08, 4.6566e-09, 1.5087e-07, ..., 3.3993e-08, |
|
|
1.6671e-07, 4.8429e-08]], device='cuda:0') |
|
|
Epoch 48, bias, value: tensor([ 0.2353, -0.1035, 0.0320, 0.0043, 0.5411, -0.1397, -0.5755], |
|
|
device='cuda:0'), grad: tensor([-2.6878e-06, 2.3711e-06, 2.2128e-06, 5.7975e-07, -2.6841e-06, |
|
|
-5.8161e-07, 7.8138e-07], device='cuda:0') |
|
|
249 |
|
|
0.0024355036129704724 |
|
|
changing lr |
|
|
epoch 47, time 348.26, cls_loss 0.0167 cls_loss_mapping 0.1378 cls_loss_causal 0.7175 re_mapping 0.0324 re_causal 0.0644 |
|
|
Epoch 49, weight, value: tensor([[ 0.0212, 0.0258, -0.0364, ..., -0.0671, -0.1116, -0.0834], |
|
|
[-0.0367, -0.0387, -0.0215, ..., -0.0267, -0.0217, -0.0139], |
|
|
[ 0.0190, -0.0195, 0.0362, ..., -0.0869, -0.0856, -0.0737], |
|
|
..., |
|
|
[-0.0036, -0.0302, 0.0452, ..., -0.0227, -0.0324, -0.0301], |
|
|
[-0.0103, -0.0230, -0.0005, ..., -0.0521, 0.0036, -0.0532], |
|
|
[ 0.0663, 0.1911, 0.0223, ..., 0.0937, 0.0498, 0.1180]], |
|
|
device='cuda:0'), grad: tensor([[ 1.5041e-07, 1.8626e-08, 2.8182e-06, ..., 2.5146e-07, |
|
|
2.3022e-06, 2.9523e-07], |
|
|
[-4.1649e-06, -3.5809e-07, -8.1778e-05, ..., -5.4538e-06, |
|
|
-6.5923e-05, -7.3649e-06], |
|
|
[-3.5390e-07, -2.3004e-07, -1.1837e-06, ..., -6.8778e-07, |
|
|
-1.3318e-06, -6.9663e-07], |
|
|
..., |
|
|
[ 4.3176e-06, 5.4855e-07, 7.9632e-05, ..., 5.8115e-06, |
|
|
6.4492e-05, 7.6815e-06], |
|
|
[ 1.8626e-08, 1.0710e-08, 1.4761e-07, ..., 3.2596e-08, |
|
|
1.3225e-07, 3.3993e-08], |
|
|
[ 1.1176e-08, 6.0536e-09, 1.0571e-07, ..., 1.8626e-08, |
|
|
9.1735e-08, 2.0023e-08]], device='cuda:0') |
|
|
Epoch 49, bias, value: tensor([ 0.2346, -0.1037, 0.0338, 0.0039, 0.5387, -0.1391, -0.5740], |
|
|
device='cuda:0'), grad: tensor([ 2.5690e-05, -7.4625e-04, -8.5533e-06, 1.1157e-06, 7.2432e-04, |
|
|
1.2517e-06, 9.1363e-07], device='cuda:0') |
|
|
249 |
|
|
0.00224551509273949 |
|
|
changing lr |
|
|
epoch 48, time 349.54, cls_loss 0.0157 cls_loss_mapping 0.1420 cls_loss_causal 0.6908 re_mapping 0.0317 re_causal 0.0606 |
|
|
Epoch 50, weight, value: tensor([[ 0.0211, 0.0257, -0.0366, ..., -0.0671, -0.1117, -0.0833], |
|
|
[-0.0365, -0.0386, -0.0207, ..., -0.0266, -0.0210, -0.0138], |
|
|
[ 0.0189, -0.0195, 0.0356, ..., -0.0868, -0.0859, -0.0738], |
|
|
..., |
|
|
[-0.0037, -0.0303, 0.0446, ..., -0.0229, -0.0328, -0.0302], |
|
|
[-0.0102, -0.0229, -0.0003, ..., -0.0517, 0.0038, -0.0527], |
|
|
[ 0.0661, 0.1906, 0.0221, ..., 0.0934, 0.0496, 0.1176]], |
|
|
device='cuda:0'), grad: tensor([[ 9.1374e-05, 2.3050e-07, 1.6365e-03, ..., 1.0624e-03, |
|
|
2.1915e-03, 1.0624e-03], |
|
|
[ 1.9059e-05, 2.4680e-08, 3.4165e-04, ..., 2.2185e-04, |
|
|
4.5753e-04, 2.2185e-04], |
|
|
[ 1.7571e-04, 4.6566e-09, 3.1509e-03, ..., 2.0466e-03, |
|
|
4.2229e-03, 2.0466e-03], |
|
|
..., |
|
|
[ 3.8236e-05, -2.6356e-07, 6.8855e-04, ..., 4.4870e-04, |
|
|
9.2459e-04, 4.4870e-04], |
|
|
[ 2.2948e-05, 0.0000e+00, 4.1151e-04, ..., 2.6751e-04, |
|
|
5.5122e-04, 2.6751e-04], |
|
|
[ 1.2010e-05, 2.3283e-09, 2.1529e-04, ..., 1.3983e-04, |
|
|
2.8849e-04, 1.3983e-04]], device='cuda:0') |
|
|
Epoch 50, bias, value: tensor([ 0.2324, -0.1012, 0.0313, 0.0066, 0.5357, -0.1379, -0.5729], |
|
|
device='cuda:0'), grad: tensor([ 0.0201, 0.0042, 0.0387, -0.0791, 0.0085, 0.0051, 0.0026], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.002061073738537637 |
|
|
changing lr |
|
|
epoch 49, time 349.14, cls_loss 0.0116 cls_loss_mapping 0.1371 cls_loss_causal 0.6570 re_mapping 0.0313 re_causal 0.0632 |
|
|
Epoch 51, weight, value: tensor([[ 0.0212, 0.0257, -0.0357, ..., -0.0667, -0.1105, -0.0829], |
|
|
[-0.0363, -0.0384, -0.0197, ..., -0.0265, -0.0200, -0.0137], |
|
|
[ 0.0188, -0.0195, 0.0353, ..., -0.0867, -0.0859, -0.0736], |
|
|
..., |
|
|
[-0.0039, -0.0303, 0.0433, ..., -0.0230, -0.0339, -0.0304], |
|
|
[-0.0103, -0.0229, -0.0008, ..., -0.0518, 0.0032, -0.0528], |
|
|
[ 0.0659, 0.1901, 0.0220, ..., 0.0932, 0.0494, 0.1173]], |
|
|
device='cuda:0'), grad: tensor([[ 2.1094e-07, 1.3877e-07, 1.2722e-06, ..., 5.4948e-07, |
|
|
1.3569e-06, 4.4703e-07], |
|
|
[ 4.6566e-10, -4.6566e-09, -1.8626e-08, ..., 3.8650e-08, |
|
|
1.1642e-08, 1.9092e-08], |
|
|
[ 5.5879e-09, 2.3283e-09, 3.3993e-08, ..., 2.4680e-08, |
|
|
4.3306e-08, 1.9558e-08], |
|
|
..., |
|
|
[-1.2573e-07, -1.0757e-07, -7.4692e-07, ..., -1.7555e-07, |
|
|
-6.9290e-07, -1.4016e-07], |
|
|
[ 4.6082e-06, 1.4938e-06, 2.7388e-05, ..., 2.1830e-05, |
|
|
3.6180e-05, 1.7256e-05], |
|
|
[ 1.1269e-07, 3.7253e-08, 6.7614e-07, ..., 5.3132e-07, |
|
|
8.8988e-07, 4.2561e-07]], device='cuda:0') |
|
|
Epoch 51, bias, value: tensor([ 0.2339, -0.0990, 0.0304, 0.0086, 0.5305, -0.1386, -0.5717], |
|
|
device='cuda:0'), grad: tensor([ 5.8785e-06, -1.5087e-07, 1.8068e-07, -1.5676e-04, -2.9448e-06, |
|
|
1.5032e-04, 3.7197e-06], device='cuda:0') |
|
|
249 |
|
|
0.0018825509907063344 |
|
|
changing lr |
|
|
epoch 50, time 347.08, cls_loss 0.0089 cls_loss_mapping 0.1230 cls_loss_causal 0.6582 re_mapping 0.0311 re_causal 0.0606 |
|
|
Epoch 52, weight, value: tensor([[ 0.0211, 0.0257, -0.0356, ..., -0.0665, -0.1102, -0.0827], |
|
|
[-0.0362, -0.0383, -0.0197, ..., -0.0264, -0.0200, -0.0137], |
|
|
[ 0.0188, -0.0195, 0.0351, ..., -0.0865, -0.0858, -0.0735], |
|
|
..., |
|
|
[-0.0038, -0.0302, 0.0433, ..., -0.0230, -0.0337, -0.0303], |
|
|
[-0.0103, -0.0229, -0.0008, ..., -0.0516, 0.0032, -0.0526], |
|
|
[ 0.0658, 0.1896, 0.0219, ..., 0.0929, 0.0492, 0.1170]], |
|
|
device='cuda:0'), grad: tensor([[ 5.9232e-06, 1.7181e-05, 1.1671e-04, ..., 4.6760e-05, |
|
|
1.3542e-04, 5.2303e-05], |
|
|
[ 3.1404e-06, 8.9407e-06, 6.0827e-05, ..., 2.4393e-05, |
|
|
7.0512e-05, 2.7254e-05], |
|
|
[-1.4760e-05, -4.3690e-05, -2.9564e-04, ..., -1.1837e-04, |
|
|
-3.4332e-04, -1.3256e-04], |
|
|
..., |
|
|
[ 2.0899e-06, 5.6662e-06, 3.8713e-05, ..., 1.5587e-05, |
|
|
4.4852e-05, 1.7360e-05], |
|
|
[ 3.9935e-06, 8.7619e-06, 6.1870e-05, ..., 2.5079e-05, |
|
|
7.0989e-05, 2.7627e-05], |
|
|
[ 1.7686e-06, 3.5763e-06, 2.5600e-05, ..., 1.0423e-05, |
|
|
2.9281e-05, 1.1422e-05]], device='cuda:0') |
|
|
Epoch 52, bias, value: tensor([ 0.2331, -0.0991, 0.0303, 0.0088, 0.5297, -0.1382, -0.5705], |
|
|
device='cuda:0'), grad: tensor([ 9.2030e-04, 4.7851e-04, -2.3384e-03, -1.7226e-05, 3.0231e-04, |
|
|
4.6611e-04, 1.9002e-04], device='cuda:0') |
|
|
249 |
|
|
0.0017103063703014388 |
|
|
changing lr |
|
|
epoch 51, time 350.62, cls_loss 0.0133 cls_loss_mapping 0.1291 cls_loss_causal 0.6928 re_mapping 0.0306 re_causal 0.0583 |
|
|
Epoch 53, weight, value: tensor([[ 0.0209, 0.0255, -0.0364, ..., -0.0666, -0.1109, -0.0828], |
|
|
[-0.0361, -0.0383, -0.0199, ..., -0.0264, -0.0202, -0.0137], |
|
|
[ 0.0189, -0.0192, 0.0361, ..., -0.0861, -0.0845, -0.0731], |
|
|
..., |
|
|
[-0.0038, -0.0301, 0.0433, ..., -0.0228, -0.0335, -0.0301], |
|
|
[-0.0102, -0.0228, -0.0007, ..., -0.0515, 0.0033, -0.0525], |
|
|
[ 0.0656, 0.1892, 0.0218, ..., 0.0927, 0.0490, 0.1167]], |
|
|
device='cuda:0'), grad: tensor([[ 5.5879e-09, 1.8626e-09, 1.2247e-07, ..., 1.2107e-08, |
|
|
9.7323e-08, 9.7789e-09], |
|
|
[-1.0245e-08, -2.7940e-09, -2.2631e-07, ..., -2.4680e-08, |
|
|
-1.8207e-07, -1.9558e-08], |
|
|
[ 9.3132e-10, 4.6566e-10, 1.0710e-08, ..., 9.3132e-10, |
|
|
8.8476e-09, 1.3970e-09], |
|
|
..., |
|
|
[ 1.4901e-08, 4.6566e-09, 2.0303e-07, ..., 1.9092e-08, |
|
|
1.6438e-07, 2.3283e-08], |
|
|
[-1.4435e-08, -4.6566e-09, -1.4110e-07, ..., -9.7789e-09, |
|
|
-1.1316e-07, -1.9092e-08], |
|
|
[ 2.3283e-09, 9.3132e-10, 2.3749e-08, ..., 1.8626e-09, |
|
|
1.9092e-08, 3.2596e-09]], device='cuda:0') |
|
|
Epoch 53, bias, value: tensor([ 0.2295, -0.0996, 0.0344, 0.0079, 0.5294, -0.1378, -0.5696], |
|
|
device='cuda:0'), grad: tensor([ 8.1165e-07, -1.5292e-06, 4.8894e-08, 4.0513e-08, 1.1073e-06, |
|
|
-5.8394e-07, 1.0803e-07], device='cuda:0') |
|
|
249 |
|
|
0.0015446867550656784 |
|
|
changing lr |
|
|
epoch 52, time 348.17, cls_loss 0.0145 cls_loss_mapping 0.1320 cls_loss_causal 0.7023 re_mapping 0.0281 re_causal 0.0541 |
|
|
Epoch 54, weight, value: tensor([[ 0.0208, 0.0253, -0.0370, ..., -0.0667, -0.1116, -0.0828], |
|
|
[-0.0361, -0.0382, -0.0199, ..., -0.0264, -0.0202, -0.0137], |
|
|
[ 0.0189, -0.0191, 0.0367, ..., -0.0856, -0.0834, -0.0727], |
|
|
..., |
|
|
[-0.0038, -0.0300, 0.0433, ..., -0.0229, -0.0335, -0.0302], |
|
|
[-0.0102, -0.0228, -0.0007, ..., -0.0514, 0.0033, -0.0524], |
|
|
[ 0.0655, 0.1888, 0.0217, ..., 0.0925, 0.0489, 0.1165]], |
|
|
device='cuda:0'), grad: tensor([[ 3.2522e-06, 1.6252e-06, 1.9357e-05, ..., 1.2005e-06, |
|
|
1.5438e-05, 1.0198e-06], |
|
|
[ 4.3004e-07, 1.4878e-07, 2.4345e-06, ..., 1.5367e-07, |
|
|
1.9297e-06, 1.2107e-07], |
|
|
[ 3.3993e-07, 1.6950e-07, 2.0228e-06, ..., 1.2713e-07, |
|
|
1.6158e-06, 1.0803e-07], |
|
|
..., |
|
|
[-4.1947e-06, -2.0079e-06, -2.4796e-05, ..., -1.5376e-06, |
|
|
-1.9774e-05, -1.2927e-06], |
|
|
[ 8.4285e-08, 2.9569e-08, 4.8243e-07, ..., 5.0990e-08, |
|
|
4.0536e-07, 4.4703e-08], |
|
|
[ 3.1665e-08, 1.3039e-08, 1.8463e-07, ..., 1.7928e-08, |
|
|
1.5344e-07, 1.5832e-08]], device='cuda:0') |
|
|
Epoch 54, bias, value: tensor([ 0.2269, -0.0993, 0.0366, 0.0082, 0.5280, -0.1374, -0.5688], |
|
|
device='cuda:0'), grad: tensor([ 5.9217e-05, 7.1488e-06, 6.1877e-06, 7.3621e-07, -7.5459e-05, |
|
|
1.4594e-06, 5.6159e-07], device='cuda:0') |
|
|
249 |
|
|
0.001386025680863044 |
|
|
changing lr |
|
|
epoch 53, time 348.79, cls_loss 0.0133 cls_loss_mapping 0.1284 cls_loss_causal 0.7003 re_mapping 0.0296 re_causal 0.0593 |
|
|
Epoch 55, weight, value: tensor([[ 0.0208, 0.0252, -0.0373, ..., -0.0668, -0.1118, -0.0828], |
|
|
[-0.0360, -0.0381, -0.0197, ..., -0.0263, -0.0200, -0.0136], |
|
|
[ 0.0189, -0.0190, 0.0370, ..., -0.0854, -0.0829, -0.0725], |
|
|
..., |
|
|
[-0.0038, -0.0300, 0.0432, ..., -0.0228, -0.0334, -0.0301], |
|
|
[-0.0102, -0.0228, -0.0010, ..., -0.0515, 0.0028, -0.0524], |
|
|
[ 0.0654, 0.1885, 0.0217, ..., 0.0923, 0.0488, 0.1162]], |
|
|
device='cuda:0'), grad: tensor([[-3.7253e-09, -3.4925e-09, -1.8626e-08, ..., -5.1223e-09, |
|
|
-2.2585e-08, -5.1223e-09], |
|
|
[-7.6834e-09, -4.6566e-09, -5.3551e-08, ..., 2.3283e-09, |
|
|
-5.0291e-08, -3.0268e-09], |
|
|
[ 5.1688e-08, 2.7008e-08, 2.6380e-07, ..., 1.2666e-07, |
|
|
3.8091e-07, 1.4249e-07], |
|
|
..., |
|
|
[ 1.4435e-08, 8.8476e-09, 9.0804e-08, ..., 8.8476e-09, |
|
|
9.7556e-08, 1.7462e-08], |
|
|
[-1.3737e-08, -6.9849e-09, -8.3586e-08, ..., -1.1409e-08, |
|
|
-9.4064e-08, -2.3516e-08], |
|
|
[ 1.1642e-08, 6.5193e-09, 6.9384e-08, ..., 2.3516e-08, |
|
|
9.2434e-08, 3.0734e-08]], device='cuda:0') |
|
|
Epoch 55, bias, value: tensor([ 0.2246, -0.0989, 0.0374, 0.0097, 0.5275, -0.1382, -0.5680], |
|
|
device='cuda:0'), grad: tensor([-7.2876e-08, -1.2759e-07, 6.0722e-07, -5.7183e-07, 2.2934e-07, |
|
|
-1.8883e-07, 1.6927e-07], device='cuda:0') |
|
|
249 |
|
|
0.0012346426699819469 |
|
|
changing lr |
|
|
epoch 54, time 350.60, cls_loss 0.0113 cls_loss_mapping 0.1171 cls_loss_causal 0.6751 re_mapping 0.0298 re_causal 0.0561 |
|
|
Epoch 56, weight, value: tensor([[ 0.0207, 0.0252, -0.0373, ..., -0.0667, -0.1117, -0.0827], |
|
|
[-0.0360, -0.0381, -0.0198, ..., -0.0263, -0.0201, -0.0137], |
|
|
[ 0.0189, -0.0189, 0.0372, ..., -0.0851, -0.0823, -0.0722], |
|
|
..., |
|
|
[-0.0037, -0.0299, 0.0433, ..., -0.0228, -0.0333, -0.0301], |
|
|
[-0.0102, -0.0228, -0.0010, ..., -0.0514, 0.0028, -0.0524], |
|
|
[ 0.0653, 0.1882, 0.0216, ..., 0.0922, 0.0486, 0.1161]], |
|
|
device='cuda:0'), grad: tensor([[-1.0012e-08, -6.0536e-09, -3.8184e-08, ..., -4.1910e-09, |
|
|
-2.0023e-08, -4.8894e-09], |
|
|
[-1.8626e-09, 0.0000e+00, -2.3516e-08, ..., 0.0000e+00, |
|
|
-1.7462e-08, -4.6566e-10], |
|
|
[ 4.8894e-09, 2.7940e-09, 1.8394e-08, ..., 2.0955e-09, |
|
|
9.7789e-09, 2.3283e-09], |
|
|
..., |
|
|
[ 4.6566e-09, 1.8626e-09, 3.2596e-08, ..., 1.3970e-09, |
|
|
2.1420e-08, 1.8626e-09], |
|
|
[ 4.6566e-10, 2.3283e-10, 1.8626e-09, ..., 2.3283e-10, |
|
|
1.1642e-09, 4.6566e-10], |
|
|
[ 1.3970e-09, 9.3132e-10, 5.5879e-09, ..., 6.9849e-10, |
|
|
3.0268e-09, 6.9849e-10]], device='cuda:0') |
|
|
Epoch 56, bias, value: tensor([ 0.2242, -0.0992, 0.0382, 0.0095, 0.5265, -0.1378, -0.5674], |
|
|
device='cuda:0'), grad: tensor([-3.2573e-07, -1.0384e-07, 1.5576e-07, 3.0268e-09, 1.9046e-07, |
|
|
1.3504e-08, 4.7730e-08], device='cuda:0') |
|
|
249 |
|
|
0.0010908425876598518 |
|
|
changing lr |
|
|
epoch 55, time 347.23, cls_loss 0.0155 cls_loss_mapping 0.1176 cls_loss_causal 0.7039 re_mapping 0.0292 re_causal 0.0602 |
|
|
Epoch 57, weight, value: tensor([[ 0.0207, 0.0251, -0.0374, ..., -0.0666, -0.1116, -0.0826], |
|
|
[-0.0359, -0.0380, -0.0198, ..., -0.0263, -0.0201, -0.0137], |
|
|
[ 0.0188, -0.0189, 0.0371, ..., -0.0850, -0.0822, -0.0722], |
|
|
..., |
|
|
[-0.0037, -0.0298, 0.0435, ..., -0.0227, -0.0330, -0.0300], |
|
|
[-0.0106, -0.0229, -0.0022, ..., -0.0516, 0.0016, -0.0526], |
|
|
[ 0.0652, 0.1880, 0.0215, ..., 0.0920, 0.0486, 0.1159]], |
|
|
device='cuda:0'), grad: tensor([[-3.4750e-05, -3.1918e-05, -1.2326e-04, ..., -5.0396e-05, |
|
|
-1.2255e-04, -5.0694e-05], |
|
|
[ 6.5193e-08, 1.5763e-07, -2.6054e-07, ..., 1.1874e-07, |
|
|
-1.5344e-07, 9.9652e-08], |
|
|
[ 3.3677e-06, 3.0696e-06, 1.2785e-05, ..., 5.1595e-06, |
|
|
1.2890e-05, 5.2527e-06], |
|
|
..., |
|
|
[ 2.9519e-05, 2.7031e-05, 1.0395e-04, ..., 4.2289e-05, |
|
|
1.0300e-04, 4.2528e-05], |
|
|
[ 1.1106e-07, 1.3853e-07, -4.8429e-08, ..., 1.5693e-07, |
|
|
-1.3364e-07, -2.3283e-10], |
|
|
[ 1.0636e-06, 9.4343e-07, 4.4405e-06, ..., 1.7015e-06, |
|
|
4.5374e-06, 1.8021e-06]], device='cuda:0') |
|
|
Epoch 57, bias, value: tensor([ 0.2236, -0.0991, 0.0380, 0.0114, 0.5264, -0.1394, -0.5667], |
|
|
device='cuda:0'), grad: tensor([-7.5626e-04, -9.3132e-10, 7.8321e-05, 1.4409e-05, 6.3515e-04, |
|
|
1.3635e-06, 2.6494e-05], device='cuda:0') |
|
|
249 |
|
|
0.000954915028125264 |
|
|
changing lr |
|
|
epoch 56, time 347.41, cls_loss 0.0119 cls_loss_mapping 0.1096 cls_loss_causal 0.6021 re_mapping 0.0282 re_causal 0.0535 |
|
|
Epoch 58, weight, value: tensor([[ 0.0207, 0.0251, -0.0369, ..., -0.0663, -0.1110, -0.0823], |
|
|
[-0.0359, -0.0380, -0.0198, ..., -0.0263, -0.0202, -0.0137], |
|
|
[ 0.0188, -0.0189, 0.0372, ..., -0.0849, -0.0819, -0.0721], |
|
|
..., |
|
|
[-0.0037, -0.0298, 0.0430, ..., -0.0228, -0.0334, -0.0301], |
|
|
[-0.0105, -0.0229, -0.0022, ..., -0.0515, 0.0016, -0.0525], |
|
|
[ 0.0651, 0.1877, 0.0215, ..., 0.0919, 0.0484, 0.1157]], |
|
|
device='cuda:0'), grad: tensor([[ 6.9849e-10, 6.9849e-10, 2.5611e-09, ..., 2.3283e-10, |
|
|
2.0955e-09, 2.3283e-10], |
|
|
[ 2.0955e-09, 2.3283e-09, 7.6834e-09, ..., 6.9849e-10, |
|
|
6.5193e-09, 9.3132e-10], |
|
|
[-1.6531e-08, -1.6997e-08, -5.6112e-08, ..., -4.6566e-09, |
|
|
-4.7032e-08, -6.5193e-09], |
|
|
..., |
|
|
[ 1.8626e-09, 1.8626e-09, 6.5193e-09, ..., 6.9849e-10, |
|
|
5.5879e-09, 9.3132e-10], |
|
|
[ 7.2177e-09, 7.4506e-09, 2.4680e-08, ..., 2.0955e-09, |
|
|
2.0722e-08, 3.0268e-09], |
|
|
[ 6.9849e-10, 6.9849e-10, 2.7940e-09, ..., 2.3283e-10, |
|
|
2.5611e-09, 4.6566e-10]], device='cuda:0') |
|
|
Epoch 58, bias, value: tensor([ 0.2244, -0.0993, 0.0384, 0.0119, 0.5244, -0.1393, -0.5663], |
|
|
device='cuda:0'), grad: tensor([ 5.3551e-09, 1.6298e-08, -1.1828e-07, 3.1665e-08, 1.4901e-08, |
|
|
5.2154e-08, 6.7521e-09], device='cuda:0') |
|
|
249 |
|
|
0.0008271337313934874 |
|
|
changing lr |
|
|
epoch 57, time 376.49, cls_loss 0.0116 cls_loss_mapping 0.1136 cls_loss_causal 0.6296 re_mapping 0.0275 re_causal 0.0539 |
|
|
Epoch 59, weight, value: tensor([[ 0.0208, 0.0253, -0.0363, ..., -0.0659, -0.1101, -0.0819], |
|
|
[-0.0360, -0.0381, -0.0201, ..., -0.0264, -0.0205, -0.0138], |
|
|
[ 0.0188, -0.0189, 0.0371, ..., -0.0849, -0.0819, -0.0720], |
|
|
..., |
|
|
[-0.0037, -0.0298, 0.0429, ..., -0.0228, -0.0335, -0.0301], |
|
|
[-0.0105, -0.0228, -0.0022, ..., -0.0515, 0.0016, -0.0524], |
|
|
[ 0.0650, 0.1875, 0.0214, ..., 0.0918, 0.0483, 0.1155]], |
|
|
device='cuda:0'), grad: tensor([[ 2.8219e-07, 3.6857e-07, 2.9244e-06, ..., -5.0757e-07, |
|
|
2.0470e-06, -4.2375e-07], |
|
|
[-8.3912e-07, -7.2038e-07, -7.1563e-06, ..., -8.2701e-07, |
|
|
-6.3777e-06, -9.5321e-07], |
|
|
[-2.9076e-06, -2.4326e-06, -1.1869e-05, ..., -3.7178e-06, |
|
|
-1.2442e-05, -4.3623e-06], |
|
|
..., |
|
|
[ 2.9034e-07, 2.2585e-07, 2.2016e-06, ..., 9.3365e-07, |
|
|
2.5388e-06, 9.7509e-07], |
|
|
[ 2.8107e-06, 2.2501e-06, 1.1228e-05, ..., 2.5090e-06, |
|
|
1.0677e-05, 3.0845e-06], |
|
|
[ 1.0035e-07, 6.4261e-08, 6.9477e-07, ..., 3.0361e-07, |
|
|
7.8604e-07, 3.1339e-07]], device='cuda:0') |
|
|
Epoch 59, bias, value: tensor([ 0.2261, -0.1007, 0.0381, 0.0114, 0.5241, -0.1388, -0.5660], |
|
|
device='cuda:0'), grad: tensor([ 1.1176e-05, -3.3289e-05, -3.5226e-05, 9.2983e-06, 1.0848e-05, |
|
|
3.3617e-05, 3.5930e-06], device='cuda:0') |
|
|
249 |
|
|
0.00070775603199067 |
|
|
changing lr |
|
|
epoch 58, time 348.35, cls_loss 0.0117 cls_loss_mapping 0.1144 cls_loss_causal 0.6328 re_mapping 0.0285 re_causal 0.0533 |
|
|
Epoch 60, weight, value: tensor([[ 0.0208, 0.0253, -0.0363, ..., -0.0659, -0.1100, -0.0818], |
|
|
[-0.0360, -0.0380, -0.0203, ..., -0.0264, -0.0206, -0.0138], |
|
|
[ 0.0187, -0.0189, 0.0369, ..., -0.0849, -0.0820, -0.0720], |
|
|
..., |
|
|
[-0.0037, -0.0298, 0.0431, ..., -0.0228, -0.0332, -0.0300], |
|
|
[-0.0106, -0.0228, -0.0023, ..., -0.0515, 0.0015, -0.0524], |
|
|
[ 0.0650, 0.1874, 0.0213, ..., 0.0917, 0.0482, 0.1154]], |
|
|
device='cuda:0'), grad: tensor([[ 1.0012e-08, 7.2177e-09, 2.1653e-08, ..., 3.2596e-09, |
|
|
1.8394e-08, 4.8894e-09], |
|
|
[ 6.9849e-10, 0.0000e+00, 1.1874e-08, ..., 2.7940e-09, |
|
|
1.2806e-08, 3.7253e-09], |
|
|
[ 1.8626e-08, 0.0000e+00, 2.6240e-07, ..., 9.2434e-08, |
|
|
3.1875e-07, 1.0966e-07], |
|
|
..., |
|
|
[ 3.0268e-09, 0.0000e+00, 5.2387e-08, ..., 1.4668e-08, |
|
|
5.8906e-08, 1.8394e-08], |
|
|
[-2.4447e-08, 0.0000e+00, -3.6880e-07, ..., -1.1991e-07, |
|
|
-4.3539e-07, -1.4459e-07], |
|
|
[-9.3132e-09, -7.9162e-09, 3.7253e-09, ..., 3.9581e-09, |
|
|
1.1176e-08, 4.4238e-09]], device='cuda:0') |
|
|
Epoch 60, bias, value: tensor([ 0.2262, -0.1010, 0.0375, 0.0116, 0.5244, -0.1389, -0.5656], |
|
|
device='cuda:0'), grad: tensor([ 5.0524e-08, 7.7067e-08, 1.6429e-06, 9.4064e-08, 3.3434e-07, |
|
|
-2.3246e-06, 1.1711e-07], device='cuda:0') |
|
|
249 |
|
|
0.0005970223407163104 |
|
|
changing lr |
|
|
epoch 59, time 345.73, cls_loss 0.0119 cls_loss_mapping 0.1103 cls_loss_causal 0.6765 re_mapping 0.0283 re_causal 0.0535 |
|
|
Epoch 61, weight, value: tensor([[ 0.0208, 0.0252, -0.0365, ..., -0.0659, -0.1102, -0.0818], |
|
|
[-0.0359, -0.0380, -0.0201, ..., -0.0263, -0.0205, -0.0138], |
|
|
[ 0.0187, -0.0189, 0.0370, ..., -0.0848, -0.0819, -0.0719], |
|
|
..., |
|
|
[-0.0036, -0.0297, 0.0436, ..., -0.0226, -0.0326, -0.0298], |
|
|
[-0.0105, -0.0228, -0.0023, ..., -0.0515, 0.0014, -0.0524], |
|
|
[ 0.0649, 0.1872, 0.0213, ..., 0.0916, 0.0481, 0.1153]], |
|
|
device='cuda:0'), grad: tensor([[ 4.6253e-05, 6.1572e-05, 5.0497e-04, ..., 2.6250e-04, |
|
|
5.8270e-04, 1.9765e-04], |
|
|
[ 4.1835e-06, 3.6862e-06, 2.5585e-05, ..., 4.8950e-06, |
|
|
2.0474e-05, 4.6268e-06], |
|
|
[ 2.0102e-05, 1.8865e-05, 1.2565e-04, ..., 3.6746e-05, |
|
|
1.1319e-04, 3.0190e-05], |
|
|
..., |
|
|
[-9.2268e-05, -1.0306e-04, -7.8583e-04, ..., -3.2687e-04, |
|
|
-8.1778e-04, -2.5463e-04], |
|
|
[ 1.0476e-05, 8.9481e-06, 6.1095e-05, ..., 9.4324e-06, |
|
|
4.6521e-05, 9.6112e-06], |
|
|
[ 7.7114e-06, 6.6534e-06, 4.5508e-05, ..., 7.6964e-06, |
|
|
3.5346e-05, 7.5698e-06]], device='cuda:0') |
|
|
Epoch 61, bias, value: tensor([ 0.2248, -0.1002, 0.0378, 0.0107, 0.5253, -0.1390, -0.5653], |
|
|
device='cuda:0'), grad: tensor([ 0.0040, 0.0001, 0.0008, 0.0001, -0.0057, 0.0003, 0.0003], |
|
|
device='cuda:0') |
|
|
249 |
|
|
0.0004951556604879052 |
|
|
changing lr |
|
|
epoch 60, time 352.12, cls_loss 0.0114 cls_loss_mapping 0.1141 cls_loss_causal 0.6381 re_mapping 0.0275 re_causal 0.0531 |
|
|
Epoch 62, weight, value: tensor([[ 0.0208, 0.0252, -0.0365, ..., -0.0659, -0.1102, -0.0818], |
|
|
[-0.0359, -0.0379, -0.0199, ..., -0.0263, -0.0203, -0.0137], |
|
|
[ 0.0187, -0.0188, 0.0369, ..., -0.0848, -0.0819, -0.0719], |
|
|
..., |
|
|
[-0.0036, -0.0297, 0.0433, ..., -0.0226, -0.0328, -0.0299], |
|
|
[-0.0105, -0.0228, -0.0023, ..., -0.0514, 0.0014, -0.0524], |
|
|
[ 0.0649, 0.1871, 0.0213, ..., 0.0915, 0.0481, 0.1153]], |
|
|
device='cuda:0'), grad: tensor([[ 1.0245e-08, 3.7253e-09, 1.2573e-07, ..., 2.3283e-09, |
|
|
8.9407e-08, 8.8476e-09], |
|
|
[-1.2107e-08, -4.1910e-09, -1.4901e-07, ..., -1.3970e-09, |
|
|
-1.0338e-07, -8.8476e-09], |
|
|
[ 4.6566e-10, 0.0000e+00, 6.0536e-09, ..., 1.8626e-09, |
|
|
6.5193e-09, 1.8626e-09], |
|
|
..., |
|
|
[ 2.7940e-09, 9.3132e-10, 3.4459e-08, ..., 1.8626e-09, |
|
|
2.6543e-08, 3.7253e-09], |
|
|
[-3.7253e-09, 0.0000e+00, -4.6566e-08, ..., -1.4435e-08, |
|
|
-5.3085e-08, -1.5832e-08], |
|
|
[ 9.3132e-10, 0.0000e+00, 1.4435e-08, ..., 4.1910e-09, |
|
|
1.6298e-08, 4.6566e-09]], device='cuda:0') |
|
|
Epoch 62, bias, value: tensor([ 0.2244, -0.0993, 0.0373, 0.0113, 0.5244, -0.1389, -0.5651], |
|
|
device='cuda:0'), grad: tensor([ 1.2144e-06, -1.4575e-06, 4.0513e-08, 1.2666e-07, 3.1944e-07, |
|
|
-2.9523e-07, 9.4064e-08], device='cuda:0') |
|
|
249 |
|
|
0.00040236113724274745 |
|
|
changing lr |
|
|
epoch 61, time 346.83, cls_loss 0.0095 cls_loss_mapping 0.1049 cls_loss_causal 0.6761 re_mapping 0.0273 re_causal 0.0569 |
|
|
Epoch 63, weight, value: tensor([[ 0.0208, 0.0252, -0.0365, ..., -0.0659, -0.1101, -0.0817], |
|
|
[-0.0359, -0.0379, -0.0199, ..., -0.0262, -0.0203, -0.0137], |
|
|
[ 0.0187, -0.0188, 0.0369, ..., -0.0847, -0.0819, -0.0719], |
|
|
..., |
|
|
[-0.0036, -0.0297, 0.0434, ..., -0.0226, -0.0328, -0.0298], |
|
|
[-0.0105, -0.0228, -0.0023, ..., -0.0514, 0.0014, -0.0523], |
|
|
[ 0.0648, 0.1870, 0.0212, ..., 0.0915, 0.0481, 0.1152]], |
|
|
device='cuda:0'), grad: tensor([[ 3.4785e-07, 5.5879e-08, 3.0063e-06, ..., 2.0433e-06, |
|
|
4.2580e-06, 1.8263e-06], |
|
|
[ 6.0536e-09, -3.2596e-09, 4.8429e-08, ..., 5.4948e-08, |
|
|
9.0804e-08, 4.7497e-08], |
|
|
[ 2.3749e-08, 1.3970e-09, 2.2352e-07, ..., 1.5460e-07, |
|
|
3.2084e-07, 1.3690e-07], |
|
|
..., |
|
|
[-4.3260e-07, -6.4727e-08, -3.7365e-06, ..., -2.5649e-06, |
|
|
-5.3197e-06, -2.2911e-06], |
|
|
[ 1.6764e-08, 3.2596e-09, 1.4296e-07, ..., 9.6858e-08, |
|
|
2.0163e-07, 8.6613e-08], |
|
|
[ 1.5832e-08, 3.2596e-09, 1.3271e-07, ..., 8.9407e-08, |
|
|
1.8626e-07, 8.0094e-08]], device='cuda:0') |
|
|
Epoch 63, bias, value: tensor([ 0.2242, -0.0994, 0.0373, 0.0114, 0.5243, -0.1388, -0.5648], |
|
|
device='cuda:0'), grad: tensor([ 1.7613e-05, 2.6822e-07, 1.3281e-06, 1.0775e-06, -2.1890e-05, |
|
|
8.3307e-07, 7.7114e-07], device='cuda:0') |
|
|
249 |
|
|
0.00031882564680131423 |
|
|
changing lr |
|
|
epoch 62, time 350.05, cls_loss 0.0106 cls_loss_mapping 0.1023 cls_loss_causal 0.6294 re_mapping 0.0283 re_causal 0.0536 |
|
|
Epoch 64, weight, value: tensor([[ 0.0208, 0.0252, -0.0364, ..., -0.0658, -0.1100, -0.0816], |
|
|
[-0.0359, -0.0379, -0.0199, ..., -0.0262, -0.0203, -0.0137], |
|
|
[ 0.0187, -0.0188, 0.0369, ..., -0.0847, -0.0818, -0.0719], |
|
|
..., |
|
|
[-0.0036, -0.0297, 0.0433, ..., -0.0226, -0.0328, -0.0299], |
|
|
[-0.0105, -0.0228, -0.0023, ..., -0.0514, 0.0014, -0.0523], |
|
|
[ 0.0648, 0.1869, 0.0212, ..., 0.0914, 0.0480, 0.1152]], |
|
|
device='cuda:0'), grad: tensor([[ 6.7167e-06, 5.4836e-06, 5.4657e-05, ..., 2.2575e-05, |
|
|
6.0171e-05, 2.3410e-05], |
|
|
[-4.5478e-05, -3.7491e-05, -3.7265e-04, ..., -1.5616e-04, |
|
|
-4.1270e-04, -1.6129e-04], |
|
|
[-3.0193e-06, -2.0340e-06, -2.1368e-05, ..., -5.6773e-06, |
|
|
-1.9714e-05, -6.5938e-06], |
|
|
..., |
|
|
[ 3.9160e-05, 3.2187e-05, 3.1996e-04, ..., 1.3328e-04, |
|
|
3.5357e-04, 1.3781e-04], |
|
|
[ 1.5711e-06, 1.0915e-06, 1.1586e-05, ..., 3.5428e-06, |
|
|
1.1206e-05, 3.9823e-06], |
|
|
[ 3.3574e-07, 2.4680e-07, 2.5537e-06, ..., 8.7265e-07, |
|
|
2.5854e-06, 9.4902e-07]], device='cuda:0') |
|
|
Epoch 64, bias, value: tensor([ 0.2244, -0.0992, 0.0373, 0.0111, 0.5240, -0.1387, -0.5647], |
|
|
device='cuda:0'), grad: tensor([ 3.9625e-04, -2.6951e-03, -1.6081e-04, 3.8087e-05, 2.3155e-03, |
|
|
8.8155e-05, 1.9044e-05], device='cuda:0') |
|
|
249 |
|
|
0.0002447174185242325 |
|
|
changing lr |
|
|
epoch 63, time 349.59, cls_loss 0.0104 cls_loss_mapping 0.1057 cls_loss_causal 0.6085 re_mapping 0.0285 re_causal 0.0531 |
|
|
Epoch 65, weight, value: tensor([[ 0.0208, 0.0252, -0.0364, ..., -0.0658, -0.1100, -0.0816], |
|
|
[-0.0358, -0.0379, -0.0199, ..., -0.0262, -0.0202, -0.0137], |
|
|
[ 0.0187, -0.0188, 0.0369, ..., -0.0847, -0.0817, -0.0718], |
|
|
..., |
|
|
[-0.0036, -0.0297, 0.0432, ..., -0.0226, -0.0329, -0.0299], |
|
|
[-0.0105, -0.0228, -0.0022, ..., -0.0514, 0.0015, -0.0523], |
|
|
[ 0.0648, 0.1869, 0.0212, ..., 0.0914, 0.0480, 0.1151]], |
|
|
device='cuda:0'), grad: tensor([[-2.6748e-06, -2.0582e-06, -1.2316e-05, ..., -4.2096e-06, |
|
|
-1.1683e-05, -3.3285e-06], |
|
|
[ 5.5740e-07, 4.2049e-07, 2.5891e-06, ..., 9.0618e-07, |
|
|
2.4661e-06, 7.1293e-07], |
|
|
[ 8.3819e-09, 7.4506e-09, 3.5390e-08, ..., 9.3132e-09, |
|
|
3.1665e-08, 7.4506e-09], |
|
|
..., |
|
|
[ 2.0694e-06, 1.5972e-06, 9.5293e-06, ..., 3.2485e-06, |
|
|
9.0301e-06, 2.5686e-06], |
|
|
[ 1.3970e-09, 9.3132e-10, 5.5879e-09, ..., 1.8626e-09, |
|
|
5.1223e-09, 1.3970e-09], |
|
|
[ 1.6298e-08, 1.5832e-08, 6.5193e-08, ..., 1.3504e-08, |
|
|
5.7276e-08, 1.2107e-08]], device='cuda:0') |
|
|
Epoch 65, bias, value: tensor([ 0.2243, -0.0992, 0.0374, 0.0111, 0.5236, -0.1383, -0.5645], |
|
|
device='cuda:0'), grad: tensor([-3.7402e-05, 7.9349e-06, 9.6392e-08, 2.9430e-07, 2.8893e-05, |
|
|
1.6764e-08, 1.6717e-07], device='cuda:0') |
|
|
249 |
|
|
0.0001801856965207339 |
|
|
changing lr |
|
|
epoch 64, time 344.76, cls_loss 0.0114 cls_loss_mapping 0.1071 cls_loss_causal 0.6610 re_mapping 0.0270 re_causal 0.0537 |
|
|
Epoch 66, weight, value: tensor([[ 0.0208, 0.0252, -0.0364, ..., -0.0658, -0.1099, -0.0816], |
|
|
[-0.0358, -0.0379, -0.0199, ..., -0.0262, -0.0203, -0.0137], |
|
|
[ 0.0187, -0.0188, 0.0369, ..., -0.0846, -0.0817, -0.0718], |
|
|
..., |
|
|
[-0.0036, -0.0297, 0.0432, ..., -0.0226, -0.0329, -0.0299], |
|
|
[-0.0105, -0.0228, -0.0022, ..., -0.0513, 0.0015, -0.0523], |
|
|
[ 0.0648, 0.1868, 0.0212, ..., 0.0914, 0.0480, 0.1151]], |
|
|
device='cuda:0'), grad: tensor([[-9.0804e-07, -9.2201e-08, -9.3803e-06, ..., -6.5751e-06, |
|
|
-1.2361e-05, -6.6906e-06], |
|
|
[ 1.5413e-07, 3.9581e-08, 1.1614e-06, ..., 7.3295e-07, |
|
|
1.4473e-06, 7.4739e-07], |
|
|
[-1.1558e-06, -5.9744e-07, -3.2261e-06, ..., -6.3749e-07, |
|
|
-2.5984e-06, -6.8406e-07], |
|
|
..., |
|
|
[ 5.5227e-07, 1.1967e-07, 4.5411e-06, ..., 2.9691e-06, |
|
|
5.7630e-06, 3.0249e-06], |
|
|
[ 5.0338e-07, 2.4168e-07, 1.7425e-06, ..., 5.7649e-07, |
|
|
1.6401e-06, 6.0070e-07], |
|
|
[ 3.6415e-07, 1.2945e-07, 2.0899e-06, ..., 1.1530e-06, |
|
|
2.4363e-06, 1.1791e-06]], device='cuda:0') |
|
|
Epoch 66, bias, value: tensor([ 0.2242, -0.0993, 0.0375, 0.0111, 0.5233, -0.1382, -0.5644], |
|
|
device='cuda:0'), grad: tensor([-7.1764e-05, 8.9332e-06, -2.5719e-05, 2.3767e-05, 3.4869e-05, |
|
|
1.3739e-05, 1.6198e-05], device='cuda:0') |
|
|
249 |
|
|
0.000125360439090882 |
|
|
changing lr |
|
|
epoch 65, time 347.47, cls_loss 0.0074 cls_loss_mapping 0.1005 cls_loss_causal 0.6283 re_mapping 0.0267 re_causal 0.0555 |
|
|
Epoch 67, weight, value: tensor([[ 0.0208, 0.0252, -0.0363, ..., -0.0658, -0.1099, -0.0816], |
|
|
[-0.0358, -0.0379, -0.0199, ..., -0.0262, -0.0203, -0.0137], |
|
|
[ 0.0187, -0.0188, 0.0369, ..., -0.0846, -0.0817, -0.0718], |
|
|
..., |
|
|
[-0.0036, -0.0297, 0.0432, ..., -0.0226, -0.0329, -0.0299], |
|
|
[-0.0105, -0.0228, -0.0022, ..., -0.0513, 0.0015, -0.0523], |
|
|
[ 0.0648, 0.1868, 0.0212, ..., 0.0914, 0.0480, 0.1151]], |
|
|
device='cuda:0'), grad: tensor([[-8.9873e-08, -8.8476e-08, -5.7323e-07, ..., -3.3528e-08, |
|
|
-5.2247e-07, -5.7742e-08], |
|
|
[-1.3970e-08, -2.0955e-08, -9.2667e-08, ..., -1.9558e-08, |
|
|
-8.4750e-08, -1.8626e-08], |
|
|
[ 3.7253e-09, 2.7940e-09, 2.0489e-08, ..., 1.3970e-09, |
|
|
1.7695e-08, 2.3283e-09], |
|
|
..., |
|
|
[ 9.4529e-08, 9.4529e-08, 5.8860e-07, ..., 4.7032e-08, |
|
|
5.3411e-07, 6.8918e-08], |
|
|
[-1.9092e-08, -2.7940e-09, -6.6590e-08, ..., -2.7940e-09, |
|
|
-4.8894e-08, -8.8476e-09], |
|
|
[ 2.2817e-08, 1.3039e-08, 1.1083e-07, ..., 9.3132e-09, |
|
|
9.4995e-08, 1.5367e-08]], device='cuda:0') |
|
|
Epoch 67, bias, value: tensor([ 0.2242, -0.0993, 0.0374, 0.0111, 0.5233, -0.1382, -0.5644], |
|
|
device='cuda:0'), grad: tensor([-2.8703e-06, -4.4191e-07, 8.5682e-08, 8.1025e-08, 2.8610e-06, |
|
|
-9.1735e-08, 4.0047e-07], device='cuda:0') |
|
|
249 |
|
|
8.03520570068517e-05 |
|
|
changing lr |
|
|
epoch 66, time 350.79, cls_loss 0.0102 cls_loss_mapping 0.1035 cls_loss_causal 0.6503 re_mapping 0.0282 re_causal 0.0544 |
|
|
Epoch 68, weight, value: tensor([[ 0.0208, 0.0252, -0.0363, ..., -0.0657, -0.1098, -0.0816], |
|
|
[-0.0358, -0.0379, -0.0199, ..., -0.0262, -0.0203, -0.0137], |
|
|
[ 0.0187, -0.0188, 0.0369, ..., -0.0846, -0.0817, -0.0718], |
|
|
..., |
|
|
[-0.0036, -0.0297, 0.0432, ..., -0.0226, -0.0330, -0.0299], |
|
|
[-0.0105, -0.0228, -0.0022, ..., -0.0513, 0.0015, -0.0523], |
|
|
[ 0.0647, 0.1868, 0.0212, ..., 0.0914, 0.0480, 0.1151]], |
|
|
device='cuda:0'), grad: tensor([[ 2.3283e-09, -1.6578e-07, 3.4040e-07, ..., 8.8708e-07, |
|
|
1.9390e-06, 1.2936e-06], |
|
|
[ 5.5656e-06, 2.6505e-06, 3.2008e-05, ..., 6.8583e-06, |
|
|
3.1292e-05, 9.4473e-06], |
|
|
[ 1.8822e-06, -2.0154e-06, -5.2452e-06, ..., -6.8061e-06, |
|
|
-1.0513e-05, -6.3330e-06], |
|
|
..., |
|
|
[ 7.3723e-06, 3.7421e-06, 4.2647e-05, ..., 8.7246e-06, |
|
|
4.0591e-05, 1.1817e-05], |
|
|
[-2.2918e-05, -8.0541e-06, -1.1629e-04, ..., -1.9357e-05, |
|
|
-1.0854e-04, -2.9698e-05], |
|
|
[ 6.1579e-06, 2.8070e-06, 3.4779e-05, ..., 7.3351e-06, |
|
|
3.3885e-05, 1.0177e-05]], device='cuda:0') |
|
|
Epoch 68, bias, value: tensor([ 0.2243, -0.0993, 0.0374, 0.0110, 0.5232, -0.1381, -0.5643], |
|
|
device='cuda:0'), grad: tensor([ 4.2133e-06, 1.1146e-04, -1.1331e-04, 4.3392e-05, 1.4925e-04, |
|
|
-3.1281e-04, 1.1742e-04], device='cuda:0') |
|
|
249 |
|
|
4.5251191160326525e-05 |
|
|
changing lr |
|
|
epoch 67, time 348.82, cls_loss 0.0106 cls_loss_mapping 0.1088 cls_loss_causal 0.6864 re_mapping 0.0277 re_causal 0.0531 |
|
|
Epoch 69, weight, value: tensor([[ 0.0208, 0.0252, -0.0362, ..., -0.0657, -0.1098, -0.0816], |
|
|
[-0.0358, -0.0379, -0.0199, ..., -0.0262, -0.0203, -0.0137], |
|
|
[ 0.0187, -0.0188, 0.0369, ..., -0.0846, -0.0817, -0.0718], |
|
|
..., |
|
|
[-0.0037, -0.0297, 0.0431, ..., -0.0227, -0.0330, -0.0299], |
|
|
[-0.0105, -0.0228, -0.0022, ..., -0.0513, 0.0015, -0.0523], |
|
|
[ 0.0647, 0.1868, 0.0212, ..., 0.0914, 0.0480, 0.1150]], |
|
|
device='cuda:0'), grad: tensor([[-1.3970e-09, -1.8626e-09, -8.8476e-09, ..., -2.3283e-09, |
|
|
-8.3819e-09, -2.3283e-09], |
|
|
[ 2.7940e-09, 1.3970e-09, 9.3132e-09, ..., 1.8626e-09, |
|
|
7.4506e-09, 2.3283e-09], |
|
|
[ 6.0536e-09, 2.7940e-09, 2.1886e-08, ..., 3.7253e-09, |
|
|
1.7695e-08, 5.1223e-09], |
|
|
..., |
|
|
[ 1.9558e-08, 9.7789e-09, 7.5437e-08, ..., 1.3970e-08, |
|
|
6.1933e-08, 1.7229e-08], |
|
|
[-2.1560e-07, -9.8720e-08, -7.9814e-07, ..., -1.4249e-07, |
|
|
-6.4913e-07, -1.8161e-07], |
|
|
[ 3.6322e-08, 1.6764e-08, 1.3597e-07, ..., 2.4214e-08, |
|
|
1.1083e-07, 3.1199e-08]], device='cuda:0') |
|
|
Epoch 69, bias, value: tensor([ 0.2244, -0.0993, 0.0374, 0.0110, 0.5230, -0.1381, -0.5643], |
|
|
device='cuda:0'), grad: tensor([-7.5437e-08, 1.5367e-08, 6.5193e-08, 1.6261e-06, 2.5705e-07, |
|
|
-2.2799e-06, 3.9907e-07], device='cuda:0') |
|
|
249 |
|
|
2.0128530023804673e-05 |
|
|
changing lr |
|
|
epoch 68, time 348.35, cls_loss 0.0081 cls_loss_mapping 0.0988 cls_loss_causal 0.6166 re_mapping 0.0275 re_causal 0.0561 |
|
|
Epoch 70, weight, value: tensor([[ 0.0208, 0.0252, -0.0362, ..., -0.0657, -0.1098, -0.0816], |
|
|
[-0.0358, -0.0379, -0.0199, ..., -0.0262, -0.0203, -0.0137], |
|
|
[ 0.0187, -0.0188, 0.0369, ..., -0.0846, -0.0817, -0.0718], |
|
|
..., |
|
|
[-0.0036, -0.0297, 0.0431, ..., -0.0226, -0.0330, -0.0299], |
|
|
[-0.0105, -0.0228, -0.0022, ..., -0.0513, 0.0015, -0.0523], |
|
|
[ 0.0647, 0.1868, 0.0212, ..., 0.0914, 0.0480, 0.1150]], |
|
|
device='cuda:0'), grad: tensor([[ 1.6321e-07, 1.0757e-07, 8.1025e-07, ..., 8.5915e-08, |
|
|
8.3540e-07, 1.4552e-07], |
|
|
[-1.7928e-07, -8.5915e-08, -8.9034e-07, ..., -8.5915e-08, |
|
|
-9.1083e-07, -1.5227e-07], |
|
|
[ 6.4261e-08, 6.8452e-08, 3.1642e-07, ..., 4.1211e-08, |
|
|
3.3132e-07, 6.4494e-08], |
|
|
..., |
|
|
[-6.8918e-08, -9.8720e-08, -3.4203e-07, ..., -4.8662e-08, |
|
|
-3.6345e-07, -7.3109e-08], |
|
|
[ 1.4435e-08, 5.8208e-09, 7.5903e-08, ..., 4.6566e-09, |
|
|
7.7533e-08, 1.0012e-08], |
|
|
[ 4.1910e-09, 3.0268e-09, 1.9558e-08, ..., 3.0268e-09, |
|
|
2.0023e-08, 4.4238e-09]], device='cuda:0') |
|
|
Epoch 70, bias, value: tensor([ 0.2244, -0.0993, 0.0374, 0.0110, 0.5230, -0.1381, -0.5643], |
|
|
device='cuda:0'), grad: tensor([ 1.9502e-06, -2.0340e-06, 8.4704e-07, 5.7044e-08, -1.0040e-06, |
|
|
1.7532e-07, 4.5868e-08], device='cuda:0') |
|
|
249 |
|
|
5.034667293427056e-06 |
|
|
changing lr |
|
|
epoch 69, time 350.06, cls_loss 0.0124 cls_loss_mapping 0.1044 cls_loss_causal 0.6741 re_mapping 0.0288 re_causal 0.0534 |
|
|
---------------------saving last model at epoch 69---------------------------------------------------- |
|
|
/home/yuqian_fu |
|
|
{'gpu': '0', 'svroot': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//photo/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1', 'source_domain': 'photo', 'svpath': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//photo/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1/photo_16factor_best_test_check.csv', 'factor_num': 16, 'epoch': 'best', 'stride': 5, 'eval_mapping': False, 'network': 'resnet18'} |
|
|
-------------------------------------loading pretrain weights---------------------------------- |
|
|
loading weight of best |
|
|
randm: False |
|
|
stride: 5 |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
loading weight of best |
|
|
columns: ['photo', 'art_painting', 'cartoon', 'sketch'] |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/photo_test.hdf5 torch.Size([1670, 3, 227, 227]) torch.Size([1670]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/art_painting_test.hdf5 torch.Size([2048, 3, 227, 227]) torch.Size([2048]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/cartoon_test.hdf5 torch.Size([2344, 3, 227, 227]) torch.Size([2344]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/sketch_test.hdf5 torch.Size([3929, 3, 227, 227]) torch.Size([3929]) |
|
|
photo art_painting cartoon sketch Avg |
|
|
w/o do (original x) 99.461078 55.761719 49.616041 61.28786 55.555206 |
|
|
photo art_painting cartoon sketch Avg |
|
|
do 98.802395 57.617188 50.0 60.269789 55.962325 |
|
|
|