|
|
/home/yuqian_fu |
|
|
here1 |
|
|
here2 |
|
|
{'gpu': '0', 'data': 'sketch', 'ntr': None, 'translate': None, 'autoaug': 'CA_multiple', 'n': 3, 'stride': 5, 'factor_num': 16, 'epochs': 70, 'nbatch': 100, 'batchsize': 6, 'lr': 0.01, 'lr_scheduler': 'cosine', 'svroot': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//sketch/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1', 'clsadapt': True, 'lambda_causal': 1.0, 'lambda_re': 1.0, 'randm': True, 'randn': True, 'network': 'resnet18'} |
|
|
stride: 5 |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/sketch_train.hdf5 torch.Size([3531, 3, 227, 227]) torch.Size([3531]) |
|
|
--------------------------CA_multiple-------------------------- |
|
|
---------------------------16 factors----------------- |
|
|
randm: True |
|
|
randn: True |
|
|
n: 3 |
|
|
randm: False |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/sketch_val.hdf5 torch.Size([398, 3, 227, 227]) torch.Size([398]) |
|
|
-------------------------------------loading pretrain weights---------------------------------- |
|
|
Epoch 1, weight, value: tensor([[-0.0208, -0.0220, 0.0164, ..., 0.0019, 0.0060, 0.0055], |
|
|
[ 0.0100, 0.0072, 0.0083, ..., -0.0045, -0.0209, -0.0166], |
|
|
[ 0.0103, 0.0061, 0.0040, ..., -0.0083, -0.0102, 0.0187], |
|
|
..., |
|
|
[ 0.0074, 0.0004, -0.0085, ..., -0.0055, 0.0045, 0.0045], |
|
|
[ 0.0123, -0.0002, -0.0053, ..., 0.0055, -0.0196, -0.0166], |
|
|
[-0.0206, 0.0139, 0.0016, ..., -0.0135, 0.0219, -0.0163]], |
|
|
device='cuda:0'), grad: None |
|
|
Epoch 1, bias, value: tensor([-0.0178, 0.0114, 0.0058, -0.0179, 0.0187, 0.0121, -0.0002], |
|
|
device='cuda:0'), grad: None |
|
|
588 |
|
|
0.01 |
|
|
changing lr |
|
|
---------------------saving model at epoch 0---------------------------------------------------- |
|
|
epoch 0, time 808.00, cls_loss 7.4242 cls_loss_mapping 1.5280 cls_loss_causal 1.6920 re_mapping 0.2361 re_causal 0.2355 |
|
|
Epoch 2, weight, value: tensor([[-0.0031, -0.0020, 0.0128, ..., 0.0158, 0.0300, 0.0269], |
|
|
[ 0.1318, 0.1076, 0.0753, ..., -0.0284, -0.0293, -0.0291], |
|
|
[-0.0311, -0.0348, 0.0314, ..., 0.0709, 0.0060, 0.0512], |
|
|
..., |
|
|
[-0.1643, -0.1202, -0.1104, ..., 0.0602, 0.0656, 0.0368], |
|
|
[ 0.1364, 0.0869, 0.0833, ..., -0.0678, -0.0868, -0.0781], |
|
|
[-0.0035, 0.0195, -0.0150, ..., -0.0955, -0.0366, -0.0639]], |
|
|
device='cuda:0'), grad: tensor([[ 1.4246e-01, 4.9591e-02, 4.4464e-02, ..., 4.6326e-02, |
|
|
4.4983e-02, 1.6815e-02], |
|
|
[-4.0009e-02, -1.2909e-02, -7.5607e-03, ..., -1.5297e-02, |
|
|
-1.6342e-02, -6.8398e-03], |
|
|
[-9.7168e-02, -3.3356e-02, -3.5645e-02, ..., -3.4088e-02, |
|
|
-2.8824e-02, -9.9335e-03], |
|
|
..., |
|
|
[-3.7323e-02, -1.3939e-02, -1.0986e-02, ..., -6.9771e-03, |
|
|
-8.4991e-03, -2.9697e-03], |
|
|
[ 1.8387e-03, 6.6328e-04, 5.3406e-04, ..., 6.3944e-04, |
|
|
6.3848e-04, 2.6083e-04], |
|
|
[ 1.7262e-04, 5.7131e-05, 5.0813e-05, ..., 5.4508e-05, |
|
|
4.8101e-05, 1.6570e-05]], device='cuda:0') |
|
|
Epoch 2, bias, value: tensor([-0.0211, -0.0330, 0.0198, 0.0471, -0.0429, 0.0361, 0.0054], |
|
|
device='cuda:0'), grad: tensor([ 0.3125, -0.1070, -0.2090, 0.0684, -0.0692, 0.0041, 0.0004], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.009994965332706574 |
|
|
changing lr |
|
|
---------------------saving model at epoch 1---------------------------------------------------- |
|
|
epoch 1, time 814.99, cls_loss 0.9246 cls_loss_mapping 0.9319 cls_loss_causal 1.3704 re_mapping 0.0927 re_causal 0.0920 |
|
|
Epoch 3, weight, value: tensor([[ 0.0241, 0.0225, 0.0199, ..., 0.0375, 0.0633, 0.0597], |
|
|
[ 0.0934, 0.0721, 0.0373, ..., -0.0349, -0.0409, -0.0381], |
|
|
[-0.0102, -0.0210, 0.0503, ..., 0.0576, -0.0119, 0.0319], |
|
|
..., |
|
|
[-0.1723, -0.1231, -0.1008, ..., 0.0570, 0.0629, 0.0307], |
|
|
[ 0.1450, 0.0950, 0.0944, ..., -0.0800, -0.0971, -0.0869], |
|
|
[-0.0020, 0.0219, -0.0180, ..., -0.0981, -0.0372, -0.0616]], |
|
|
device='cuda:0'), grad: tensor([[-4.3732e-02, -1.3214e-02, -7.5569e-03, ..., -2.2293e-02, |
|
|
-2.5269e-02, -1.8829e-02], |
|
|
[ 4.2152e-03, 1.6422e-03, 1.0777e-03, ..., 1.3571e-03, |
|
|
1.5516e-03, 1.1444e-03], |
|
|
[ 1.9958e-02, 5.8060e-03, 3.2330e-03, ..., 1.0559e-02, |
|
|
1.1971e-02, 8.9111e-03], |
|
|
..., |
|
|
[ 1.5152e-02, 4.4861e-03, 2.5291e-03, ..., 8.0414e-03, |
|
|
9.1019e-03, 6.8016e-03], |
|
|
[ 2.3627e-04, 6.9618e-05, 3.9160e-05, ..., 1.2517e-04, |
|
|
1.4174e-04, 1.0562e-04], |
|
|
[ 2.0713e-05, 4.4778e-06, 2.8610e-06, ..., 1.4633e-05, |
|
|
1.6212e-05, 1.1973e-05]], device='cuda:0') |
|
|
Epoch 3, bias, value: tensor([ 0.0072, -0.0282, 0.0243, 0.0413, -0.0554, 0.0151, 0.0064], |
|
|
device='cuda:0'), grad: tensor([-1.1499e-01, 9.4757e-03, 5.3375e-02, 1.1086e-02, 4.0375e-02, |
|
|
6.2943e-04, 6.2585e-05], device='cuda:0') |
|
|
588 |
|
|
0.009979871469976196 |
|
|
changing lr |
|
|
---------------------saving model at epoch 2---------------------------------------------------- |
|
|
epoch 2, time 817.05, cls_loss 0.5564 cls_loss_mapping 0.5713 cls_loss_causal 1.1146 re_mapping 0.0838 re_causal 0.0829 |
|
|
Epoch 4, weight, value: tensor([[ 0.0254, 0.0233, 0.0175, ..., 0.0372, 0.0729, 0.0651], |
|
|
[ 0.0889, 0.0631, 0.0286, ..., -0.0262, -0.0341, -0.0294], |
|
|
[-0.0140, -0.0197, 0.0500, ..., 0.0424, -0.0304, 0.0150], |
|
|
..., |
|
|
[-0.1693, -0.1204, -0.0955, ..., 0.0552, 0.0656, 0.0328], |
|
|
[ 0.1554, 0.1025, 0.1018, ..., -0.0833, -0.1015, -0.0903], |
|
|
[-0.0044, 0.0212, -0.0197, ..., -0.0954, -0.0348, -0.0581]], |
|
|
device='cuda:0'), grad: tensor([[-9.0551e-04, -1.7595e-04, -1.8907e-04, ..., -2.0266e-04, |
|
|
-2.7013e-04, -2.0099e-04], |
|
|
[ 8.4281e-05, 1.9148e-05, 1.9908e-05, ..., 1.7017e-05, |
|
|
2.3603e-05, 1.7703e-05], |
|
|
[ 5.8413e-05, 1.0587e-05, 1.1586e-05, ..., 1.3597e-05, |
|
|
1.7911e-05, 1.3277e-05], |
|
|
..., |
|
|
[ 6.3276e-04, 1.1927e-04, 1.2910e-04, ..., 1.4400e-04, |
|
|
1.9073e-04, 1.4174e-04], |
|
|
[ 1.1668e-05, 2.5332e-06, 2.6636e-06, ..., 2.4624e-06, |
|
|
3.3677e-06, 2.5183e-06], |
|
|
[ 3.7141e-06, 7.7859e-07, 8.2701e-07, ..., 7.9721e-07, |
|
|
1.0841e-06, 8.0839e-07]], device='cuda:0') |
|
|
Epoch 4, bias, value: tensor([ 0.0101, -0.0139, 0.0060, 0.0420, -0.0526, 0.0196, -0.0008], |
|
|
device='cuda:0'), grad: tensor([-2.4834e-03, 2.2125e-04, 1.6344e-04, 3.0804e-04, 1.7481e-03, |
|
|
3.1203e-05, 1.0014e-05], device='cuda:0') |
|
|
588 |
|
|
0.009954748808839675 |
|
|
changing lr |
|
|
---------------------saving model at epoch 3---------------------------------------------------- |
|
|
epoch 3, time 816.59, cls_loss 0.4075 cls_loss_mapping 0.3747 cls_loss_causal 0.9471 re_mapping 0.0793 re_causal 0.0785 |
|
|
Epoch 5, weight, value: tensor([[ 0.0587, 0.0530, 0.0429, ..., 0.0455, 0.0862, 0.0758], |
|
|
[ 0.0638, 0.0423, 0.0081, ..., -0.0265, -0.0333, -0.0300], |
|
|
[-0.0198, -0.0299, 0.0373, ..., 0.0311, -0.0395, 0.0039], |
|
|
..., |
|
|
[-0.1696, -0.1202, -0.0889, ..., 0.0488, 0.0549, 0.0276], |
|
|
[ 0.1541, 0.1041, 0.1031, ..., -0.0847, -0.1032, -0.0916], |
|
|
[ 0.0019, 0.0266, -0.0159, ..., -0.0918, -0.0314, -0.0543]], |
|
|
device='cuda:0'), grad: tensor([[-0.0616, -0.0126, -0.0163, ..., -0.0148, -0.0252, -0.0245], |
|
|
[ 0.0148, 0.0030, 0.0038, ..., 0.0036, 0.0058, 0.0057], |
|
|
[ 0.0296, 0.0063, 0.0077, ..., 0.0072, 0.0109, 0.0107], |
|
|
..., |
|
|
[ 0.0063, 0.0008, 0.0018, ..., 0.0015, 0.0047, 0.0045], |
|
|
[ 0.0024, 0.0005, 0.0006, ..., 0.0006, 0.0010, 0.0010], |
|
|
[ 0.0011, 0.0002, 0.0003, ..., 0.0003, 0.0004, 0.0004]], |
|
|
device='cuda:0') |
|
|
Epoch 5, bias, value: tensor([ 0.0308, -0.0303, 0.0234, 0.0388, -0.0666, 0.0088, 0.0053], |
|
|
device='cuda:0'), grad: tensor([-0.1545, 0.0386, 0.0775, 0.0186, 0.0105, 0.0064, 0.0030], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.009919647942993149 |
|
|
changing lr |
|
|
epoch 4, time 813.70, cls_loss 0.3134 cls_loss_mapping 0.2826 cls_loss_causal 0.8661 re_mapping 0.0756 re_causal 0.0750 |
|
|
Epoch 6, weight, value: tensor([[ 0.0543, 0.0522, 0.0399, ..., 0.0406, 0.0876, 0.0772], |
|
|
[ 0.0528, 0.0340, 0.0004, ..., -0.0232, -0.0285, -0.0257], |
|
|
[-0.0175, -0.0272, 0.0388, ..., 0.0211, -0.0511, -0.0094], |
|
|
..., |
|
|
[-0.1690, -0.1242, -0.0903, ..., 0.0546, 0.0608, 0.0338], |
|
|
[ 0.1551, 0.1075, 0.1070, ..., -0.0853, -0.1042, -0.0924], |
|
|
[ 0.0127, 0.0348, -0.0096, ..., -0.0896, -0.0303, -0.0522]], |
|
|
device='cuda:0'), grad: tensor([[ 2.3499e-03, 1.0033e-03, 9.9850e-04, ..., 1.0929e-03, |
|
|
1.1692e-03, 1.2064e-03], |
|
|
[-2.9125e-03, -1.2484e-03, -1.2417e-03, ..., -1.3580e-03, |
|
|
-1.4515e-03, -1.4982e-03], |
|
|
[-1.9181e-04, -7.5772e-06, -1.7881e-05, ..., -2.9355e-05, |
|
|
-5.0336e-05, -5.4300e-05], |
|
|
..., |
|
|
[ 9.0778e-05, 1.0014e-05, 1.3940e-05, ..., 1.8895e-05, |
|
|
2.7582e-05, 2.9400e-05], |
|
|
[ 2.1681e-06, 8.0466e-07, 7.9907e-07, ..., 8.4005e-07, |
|
|
9.4809e-07, 9.8534e-07], |
|
|
[ 2.9460e-05, 1.1928e-05, 1.1489e-05, ..., 1.1235e-05, |
|
|
1.2793e-05, 1.3262e-05]], device='cuda:0') |
|
|
Epoch 6, bias, value: tensor([ 2.3044e-02, -3.1049e-02, 1.9318e-02, 4.3537e-02, -6.0893e-02, |
|
|
-9.9219e-05, 1.5851e-02], device='cuda:0'), grad: tensor([ 4.4403e-03, -5.4855e-03, -5.7840e-04, 1.3084e-03, 2.5487e-04, |
|
|
4.4256e-06, 5.7220e-05], device='cuda:0') |
|
|
588 |
|
|
0.009874639560909117 |
|
|
changing lr |
|
|
epoch 5, time 816.31, cls_loss 0.2576 cls_loss_mapping 0.2196 cls_loss_causal 0.7947 re_mapping 0.0733 re_causal 0.0730 |
|
|
Epoch 7, weight, value: tensor([[ 0.0654, 0.0580, 0.0449, ..., 0.0393, 0.0916, 0.0802], |
|
|
[ 0.0489, 0.0304, -0.0033, ..., -0.0207, -0.0244, -0.0211], |
|
|
[-0.0205, -0.0309, 0.0346, ..., 0.0150, -0.0583, -0.0180], |
|
|
..., |
|
|
[-0.1776, -0.1272, -0.0928, ..., 0.0570, 0.0628, 0.0360], |
|
|
[ 0.1572, 0.1097, 0.1087, ..., -0.0837, -0.1025, -0.0906], |
|
|
[ 0.0154, 0.0374, -0.0072, ..., -0.0894, -0.0337, -0.0543]], |
|
|
device='cuda:0'), grad: tensor([[ 2.8763e-03, 1.4992e-03, 1.5965e-03, ..., 4.7517e-04, |
|
|
7.0667e-04, 7.4673e-04], |
|
|
[-2.7885e-03, -1.4744e-03, -1.5659e-03, ..., -4.5371e-04, |
|
|
-6.7568e-04, -7.1335e-04], |
|
|
[-4.0770e-05, -1.8165e-05, -1.9968e-05, ..., -1.0021e-05, |
|
|
-1.5117e-05, -1.5944e-05], |
|
|
..., |
|
|
[-4.1306e-05, -2.6934e-06, -8.1509e-06, ..., -1.0774e-05, |
|
|
-1.5028e-05, -1.6659e-05], |
|
|
[ 1.3821e-06, 5.4017e-07, 5.9605e-07, ..., 2.9802e-07, |
|
|
4.5076e-07, 4.8056e-07], |
|
|
[-8.7321e-06, -5.0887e-06, -4.1500e-06, ..., -3.6508e-07, |
|
|
-9.2760e-07, -1.0021e-06]], device='cuda:0') |
|
|
Epoch 7, bias, value: tensor([ 0.0437, -0.0316, 0.0258, 0.0446, -0.0881, 0.0026, 0.0124], |
|
|
device='cuda:0'), grad: tensor([ 4.6577e-03, -4.4403e-03, -8.0109e-05, -2.6338e-06, -1.3149e-04, |
|
|
2.9169e-06, -1.2100e-05], device='cuda:0') |
|
|
588 |
|
|
0.009819814303479266 |
|
|
changing lr |
|
|
epoch 6, time 818.25, cls_loss 0.1873 cls_loss_mapping 0.1654 cls_loss_causal 0.7497 re_mapping 0.0694 re_causal 0.0692 |
|
|
Epoch 8, weight, value: tensor([[ 0.0525, 0.0526, 0.0366, ..., 0.0360, 0.0902, 0.0794], |
|
|
[ 0.0595, 0.0406, 0.0080, ..., -0.0211, -0.0242, -0.0207], |
|
|
[-0.0158, -0.0296, 0.0351, ..., 0.0148, -0.0563, -0.0175], |
|
|
..., |
|
|
[-0.1736, -0.1281, -0.0912, ..., 0.0596, 0.0646, 0.0378], |
|
|
[ 0.1559, 0.1095, 0.1088, ..., -0.0824, -0.1009, -0.0893], |
|
|
[ 0.0144, 0.0344, -0.0106, ..., -0.0869, -0.0334, -0.0533]], |
|
|
device='cuda:0'), grad: tensor([[-0.0395, -0.0124, -0.0165, ..., -0.0090, -0.0090, -0.0092], |
|
|
[ 0.0177, 0.0055, 0.0073, ..., 0.0041, 0.0041, 0.0042], |
|
|
[ 0.0131, 0.0041, 0.0054, ..., 0.0029, 0.0029, 0.0030], |
|
|
..., |
|
|
[ 0.0060, 0.0019, 0.0027, ..., 0.0015, 0.0015, 0.0015], |
|
|
[ 0.0008, 0.0002, 0.0003, ..., 0.0002, 0.0002, 0.0002], |
|
|
[ 0.0016, 0.0005, 0.0006, ..., 0.0004, 0.0004, 0.0004]], |
|
|
device='cuda:0') |
|
|
Epoch 8, bias, value: tensor([ 0.0272, -0.0320, 0.0361, 0.0317, -0.0745, 0.0015, 0.0192], |
|
|
device='cuda:0'), grad: tensor([-0.0837, 0.0378, 0.0279, 0.0006, 0.0122, 0.0018, 0.0034], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.009755282581475767 |
|
|
changing lr |
|
|
---------------------saving model at epoch 7---------------------------------------------------- |
|
|
epoch 7, time 819.90, cls_loss 0.1601 cls_loss_mapping 0.1472 cls_loss_causal 0.7095 re_mapping 0.0678 re_causal 0.0680 |
|
|
Epoch 9, weight, value: tensor([[ 0.0682, 0.0640, 0.0488, ..., 0.0328, 0.0870, 0.0772], |
|
|
[ 0.0578, 0.0391, 0.0079, ..., -0.0195, -0.0213, -0.0182], |
|
|
[-0.0168, -0.0309, 0.0323, ..., 0.0155, -0.0549, -0.0170], |
|
|
..., |
|
|
[-0.1874, -0.1370, -0.0998, ..., 0.0593, 0.0642, 0.0377], |
|
|
[ 0.1556, 0.1098, 0.1089, ..., -0.0811, -0.0990, -0.0878], |
|
|
[ 0.0138, 0.0337, -0.0127, ..., -0.0853, -0.0336, -0.0529]], |
|
|
device='cuda:0'), grad: tensor([[ 2.3198e-04, 4.4197e-05, 4.7863e-05, ..., 8.1241e-05, |
|
|
7.5579e-05, 8.2672e-05], |
|
|
[ 1.6642e-04, 2.4870e-05, 2.9132e-05, ..., 3.4600e-05, |
|
|
3.7372e-05, 3.7581e-05], |
|
|
[-8.1396e-04, -1.6522e-04, -1.7905e-04, ..., -2.9278e-04, |
|
|
-2.5034e-04, -2.8634e-04], |
|
|
..., |
|
|
[ 2.6441e-04, 6.7830e-05, 7.0870e-05, ..., 1.3185e-04, |
|
|
9.2685e-05, 1.1873e-04], |
|
|
[ 6.4850e-05, 1.1407e-05, 1.2659e-05, ..., 1.8835e-05, |
|
|
1.8939e-05, 1.9878e-05], |
|
|
[ 6.4135e-05, 1.2577e-05, 1.3813e-05, ..., 1.9103e-05, |
|
|
1.8761e-05, 2.0012e-05]], device='cuda:0') |
|
|
Epoch 9, bias, value: tensor([ 0.0404, -0.0317, 0.0415, 0.0336, -0.0923, 0.0013, 0.0160], |
|
|
device='cuda:0'), grad: tensor([ 5.8842e-04, 5.0735e-04, -1.9464e-03, 5.7876e-05, 4.4751e-04, |
|
|
1.7881e-04, 1.6797e-04], device='cuda:0') |
|
|
588 |
|
|
0.009681174353198686 |
|
|
changing lr |
|
|
---------------------saving model at epoch 8---------------------------------------------------- |
|
|
epoch 8, time 816.90, cls_loss 0.0972 cls_loss_mapping 0.1247 cls_loss_causal 0.6614 re_mapping 0.0653 re_causal 0.0658 |
|
|
Epoch 10, weight, value: tensor([[ 0.0649, 0.0594, 0.0458, ..., 0.0358, 0.0910, 0.0805], |
|
|
[ 0.0608, 0.0405, 0.0098, ..., -0.0244, -0.0258, -0.0222], |
|
|
[-0.0181, -0.0319, 0.0302, ..., 0.0187, -0.0479, -0.0123], |
|
|
..., |
|
|
[-0.1769, -0.1286, -0.0939, ..., 0.0563, 0.0585, 0.0340], |
|
|
[ 0.1486, 0.1067, 0.1055, ..., -0.0797, -0.0975, -0.0865], |
|
|
[ 0.0117, 0.0320, -0.0136, ..., -0.0836, -0.0337, -0.0524]], |
|
|
device='cuda:0'), grad: tensor([[ 1.3912e-04, 1.3083e-05, 3.1382e-05, ..., 1.8790e-05, |
|
|
2.7537e-05, 2.7105e-05], |
|
|
[ 1.4760e-05, 3.1404e-06, 4.3884e-06, ..., 3.0473e-06, |
|
|
4.1239e-06, 3.8967e-06], |
|
|
[-2.3866e-04, -2.5272e-05, -5.5760e-05, ..., -3.4124e-05, |
|
|
-4.9949e-05, -4.8786e-05], |
|
|
..., |
|
|
[ 6.2108e-05, 7.3090e-06, 1.5132e-05, ..., 8.9258e-06, |
|
|
1.3202e-05, 1.2845e-05], |
|
|
[ 3.4384e-06, 5.6624e-07, 8.8289e-07, ..., 4.7311e-07, |
|
|
6.9290e-07, 6.6683e-07], |
|
|
[ 1.0289e-05, 1.9372e-07, 2.1122e-06, ..., 1.8664e-06, |
|
|
2.6971e-06, 2.6338e-06]], device='cuda:0') |
|
|
Epoch 10, bias, value: tensor([ 0.0402, -0.0282, 0.0481, 0.0325, -0.0892, -0.0080, 0.0130], |
|
|
device='cuda:0'), grad: tensor([ 3.9959e-04, 3.6925e-05, -6.7711e-04, 2.5898e-05, 1.7405e-04, |
|
|
9.0674e-06, 3.2365e-05], device='cuda:0') |
|
|
588 |
|
|
0.009597638862757255 |
|
|
changing lr |
|
|
---------------------saving model at epoch 9---------------------------------------------------- |
|
|
epoch 9, time 819.66, cls_loss 0.0605 cls_loss_mapping 0.0894 cls_loss_causal 0.6396 re_mapping 0.0631 re_causal 0.0640 |
|
|
Epoch 11, weight, value: tensor([[ 0.0652, 0.0586, 0.0463, ..., 0.0348, 0.0899, 0.0799], |
|
|
[ 0.0594, 0.0395, 0.0094, ..., -0.0237, -0.0246, -0.0211], |
|
|
[-0.0221, -0.0334, 0.0274, ..., 0.0183, -0.0476, -0.0128], |
|
|
..., |
|
|
[-0.1754, -0.1277, -0.0936, ..., 0.0539, 0.0566, 0.0324], |
|
|
[ 0.1493, 0.1067, 0.1054, ..., -0.0776, -0.0948, -0.0839], |
|
|
[ 0.0100, 0.0321, -0.0137, ..., -0.0817, -0.0339, -0.0521]], |
|
|
device='cuda:0'), grad: tensor([[ 4.1342e-04, 6.2287e-05, 5.8174e-05, ..., 1.6308e-04, |
|
|
2.1768e-04, 2.0707e-04], |
|
|
[ 5.8487e-06, 1.2424e-06, 1.2908e-06, ..., 1.1362e-06, |
|
|
1.2964e-06, 1.3020e-06], |
|
|
[-1.8632e-04, -3.6508e-05, -4.0621e-05, ..., -2.4974e-05, |
|
|
-3.2604e-05, -3.2872e-05], |
|
|
..., |
|
|
[-3.2640e-04, -4.5002e-05, -3.9011e-05, ..., -1.5128e-04, |
|
|
-2.0230e-04, -1.9169e-04], |
|
|
[ 1.6093e-05, 3.6769e-06, 3.7700e-06, ..., 2.2780e-06, |
|
|
3.0100e-06, 3.0380e-06], |
|
|
[-4.5374e-06, -2.0377e-06, -1.6317e-06, ..., -6.0350e-07, |
|
|
-9.1456e-07, -9.5554e-07]], device='cuda:0') |
|
|
Epoch 11, bias, value: tensor([ 0.0427, -0.0284, 0.0403, 0.0371, -0.0876, -0.0022, 0.0062], |
|
|
device='cuda:0'), grad: tensor([ 1.0977e-03, 1.4700e-05, -4.6778e-04, 2.0730e-04, -8.7881e-04, |
|
|
3.9816e-05, -1.0781e-05], device='cuda:0') |
|
|
588 |
|
|
0.009504844339512096 |
|
|
changing lr |
|
|
epoch 10, time 821.27, cls_loss 0.0561 cls_loss_mapping 0.0788 cls_loss_causal 0.6070 re_mapping 0.0611 re_causal 0.0623 |
|
|
Epoch 12, weight, value: tensor([[ 0.0616, 0.0580, 0.0463, ..., 0.0337, 0.0886, 0.0786], |
|
|
[ 0.0605, 0.0394, 0.0097, ..., -0.0220, -0.0226, -0.0193], |
|
|
[-0.0253, -0.0349, 0.0243, ..., 0.0168, -0.0476, -0.0138], |
|
|
..., |
|
|
[-0.1707, -0.1254, -0.0913, ..., 0.0518, 0.0543, 0.0307], |
|
|
[ 0.1450, 0.1049, 0.1035, ..., -0.0758, -0.0927, -0.0822], |
|
|
[ 0.0142, 0.0328, -0.0124, ..., -0.0793, -0.0332, -0.0505]], |
|
|
device='cuda:0'), grad: tensor([[-1.8275e-04, -1.1110e-04, -1.1367e-04, ..., -4.3213e-05, |
|
|
-6.9678e-05, -6.4313e-05], |
|
|
[ 1.5283e-04, 8.8334e-05, 9.0718e-05, ..., 3.4511e-05, |
|
|
5.3734e-05, 5.0426e-05], |
|
|
[-3.7491e-05, -1.5181e-06, -3.3714e-06, ..., -3.9898e-06, |
|
|
-3.8147e-06, -6.1728e-06], |
|
|
..., |
|
|
[ 1.9655e-05, 6.2659e-06, 6.9514e-06, ..., 2.5034e-06, |
|
|
3.4645e-06, 3.9563e-06], |
|
|
[-3.6597e-05, -1.8775e-05, -1.9938e-05, ..., -9.4250e-07, |
|
|
-1.6708e-06, -2.5108e-06], |
|
|
[ 5.4181e-05, 2.5660e-05, 2.7269e-05, ..., 5.6326e-06, |
|
|
9.1419e-06, 9.7007e-06]], device='cuda:0') |
|
|
Epoch 12, bias, value: tensor([ 0.0346, -0.0208, 0.0361, 0.0345, -0.0834, -0.0068, 0.0138], |
|
|
device='cuda:0'), grad: tensor([-2.5177e-04, 2.2542e-04, -1.1235e-04, 6.1154e-05, 4.2081e-05, |
|
|
-5.0873e-05, 8.6904e-05], device='cuda:0') |
|
|
588 |
|
|
0.009402977659283692 |
|
|
changing lr |
|
|
---------------------saving model at epoch 11---------------------------------------------------- |
|
|
epoch 11, time 821.31, cls_loss 0.0570 cls_loss_mapping 0.0703 cls_loss_causal 0.5692 re_mapping 0.0586 re_causal 0.0600 |
|
|
Epoch 13, weight, value: tensor([[ 0.0644, 0.0581, 0.0467, ..., 0.0352, 0.0904, 0.0803], |
|
|
[ 0.0619, 0.0420, 0.0125, ..., -0.0235, -0.0242, -0.0208], |
|
|
[-0.0252, -0.0338, 0.0236, ..., 0.0161, -0.0469, -0.0139], |
|
|
..., |
|
|
[-0.1647, -0.1242, -0.0901, ..., 0.0510, 0.0538, 0.0308], |
|
|
[ 0.1384, 0.1016, 0.1000, ..., -0.0744, -0.0911, -0.0808], |
|
|
[ 0.0093, 0.0302, -0.0140, ..., -0.0776, -0.0332, -0.0500]], |
|
|
device='cuda:0'), grad: tensor([[ 8.0392e-06, -8.7544e-08, 2.9430e-07, ..., 4.3623e-06, |
|
|
5.4911e-06, 5.5321e-06], |
|
|
[ 1.2279e-05, 9.0711e-07, 1.6913e-06, ..., 7.1265e-06, |
|
|
8.9929e-06, 8.9481e-06], |
|
|
[-3.6621e-04, -2.8491e-05, -4.9472e-05, ..., -2.0599e-04, |
|
|
-2.6464e-04, -2.6321e-04], |
|
|
..., |
|
|
[ 3.5000e-04, 3.4899e-05, 5.5104e-05, ..., 1.8859e-04, |
|
|
2.4211e-04, 2.4068e-04], |
|
|
[-2.7314e-05, -1.5214e-05, -1.6838e-05, ..., -3.7253e-07, |
|
|
-7.0594e-07, -7.6555e-07], |
|
|
[ 1.8537e-05, 7.1526e-06, 8.2403e-06, ..., 3.8929e-06, |
|
|
5.1036e-06, 5.0962e-06]], device='cuda:0') |
|
|
Epoch 13, bias, value: tensor([ 0.0464, -0.0207, 0.0333, 0.0331, -0.0736, -0.0147, 0.0041], |
|
|
device='cuda:0'), grad: tensor([ 3.1501e-05, 4.5478e-05, -1.3533e-03, 1.9565e-05, 1.2522e-03, |
|
|
-3.3081e-05, 3.8832e-05], device='cuda:0') |
|
|
588 |
|
|
0.009292243968009333 |
|
|
changing lr |
|
|
---------------------saving model at epoch 12---------------------------------------------------- |
|
|
epoch 12, time 825.67, cls_loss 0.0419 cls_loss_mapping 0.0680 cls_loss_causal 0.5785 re_mapping 0.0564 re_causal 0.0583 |
|
|
Epoch 14, weight, value: tensor([[ 0.0640, 0.0587, 0.0481, ..., 0.0346, 0.0896, 0.0796], |
|
|
[ 0.0618, 0.0423, 0.0138, ..., -0.0229, -0.0235, -0.0202], |
|
|
[-0.0253, -0.0344, 0.0214, ..., 0.0166, -0.0444, -0.0121], |
|
|
..., |
|
|
[-0.1670, -0.1243, -0.0911, ..., 0.0478, 0.0499, 0.0273], |
|
|
[ 0.1387, 0.0999, 0.0983, ..., -0.0718, -0.0881, -0.0780], |
|
|
[ 0.0112, 0.0304, -0.0132, ..., -0.0752, -0.0325, -0.0486]], |
|
|
device='cuda:0'), grad: tensor([[ 2.0409e-03, 9.6655e-04, 1.0223e-03, ..., 8.1360e-05, |
|
|
2.6798e-04, 2.6321e-04], |
|
|
[ 1.8731e-05, 3.2540e-06, 3.3826e-06, ..., 1.0967e-05, |
|
|
1.3873e-05, 1.2882e-05], |
|
|
[-3.7594e-03, -1.2169e-03, -1.4153e-03, ..., -4.4370e-04, |
|
|
-8.7452e-04, -8.0872e-04], |
|
|
..., |
|
|
[ 1.4982e-03, 2.1255e-04, 3.4595e-04, ..., 2.7776e-04, |
|
|
4.8876e-04, 4.3702e-04], |
|
|
[ 9.2164e-06, 2.5686e-06, 2.9355e-06, ..., 1.6699e-06, |
|
|
2.7679e-06, 2.5965e-06], |
|
|
[ 5.2750e-05, 9.8124e-06, 1.0490e-05, ..., 2.8387e-05, |
|
|
3.6836e-05, 3.4153e-05]], device='cuda:0') |
|
|
Epoch 14, bias, value: tensor([ 0.0433, -0.0196, 0.0364, 0.0285, -0.0805, -0.0071, 0.0064], |
|
|
device='cuda:0'), grad: tensor([ 3.7994e-03, 5.9754e-05, -8.5068e-03, 3.9625e-04, 4.0627e-03, |
|
|
2.2739e-05, 1.6463e-04], device='cuda:0') |
|
|
588 |
|
|
0.009172866268606516 |
|
|
changing lr |
|
|
epoch 13, time 817.11, cls_loss 0.0311 cls_loss_mapping 0.0553 cls_loss_causal 0.5720 re_mapping 0.0535 re_causal 0.0557 |
|
|
Epoch 15, weight, value: tensor([[ 0.0683, 0.0621, 0.0518, ..., 0.0324, 0.0866, 0.0768], |
|
|
[ 0.0620, 0.0419, 0.0141, ..., -0.0221, -0.0225, -0.0192], |
|
|
[-0.0247, -0.0339, 0.0208, ..., 0.0148, -0.0450, -0.0135], |
|
|
..., |
|
|
[-0.1660, -0.1239, -0.0918, ..., 0.0477, 0.0502, 0.0282], |
|
|
[ 0.1343, 0.0975, 0.0958, ..., -0.0702, -0.0863, -0.0765], |
|
|
[ 0.0075, 0.0282, -0.0144, ..., -0.0735, -0.0320, -0.0477]], |
|
|
device='cuda:0'), grad: tensor([[ 3.0651e-03, 5.9986e-04, 5.0879e-04, ..., 5.8985e-04, |
|
|
8.3113e-04, 7.6103e-04], |
|
|
[ 4.5419e-04, 8.9407e-05, 8.0764e-05, ..., 8.5473e-05, |
|
|
1.2624e-04, 1.1367e-04], |
|
|
[-8.7509e-03, -1.7099e-03, -1.4582e-03, ..., -1.6956e-03, |
|
|
-2.3994e-03, -2.1935e-03], |
|
|
..., |
|
|
[ 2.7809e-03, 5.3740e-04, 4.3988e-04, ..., 5.4169e-04, |
|
|
7.4339e-04, 6.8808e-04], |
|
|
[ 1.7011e-04, 3.3975e-05, 2.9579e-05, ..., 3.2157e-05, |
|
|
4.6343e-05, 4.2140e-05], |
|
|
[ 4.3726e-04, 8.6069e-05, 7.3731e-05, ..., 8.3447e-05, |
|
|
1.1867e-04, 1.0842e-04]], device='cuda:0') |
|
|
Epoch 15, bias, value: tensor([ 4.2851e-02, -1.4903e-02, 3.4590e-02, 3.1522e-02, -7.6456e-02, |
|
|
-1.0301e-02, 1.8977e-05], device='cuda:0'), grad: tensor([ 0.0080, 0.0012, -0.0229, 0.0049, 0.0072, 0.0004, 0.0011], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.00904508497187474 |
|
|
changing lr |
|
|
---------------------saving model at epoch 14---------------------------------------------------- |
|
|
epoch 14, time 826.21, cls_loss 0.0221 cls_loss_mapping 0.0496 cls_loss_causal 0.5465 re_mapping 0.0512 re_causal 0.0539 |
|
|
Epoch 16, weight, value: tensor([[ 0.0712, 0.0653, 0.0555, ..., 0.0318, 0.0849, 0.0754], |
|
|
[ 0.0595, 0.0405, 0.0135, ..., -0.0222, -0.0225, -0.0193], |
|
|
[-0.0267, -0.0348, 0.0188, ..., 0.0135, -0.0448, -0.0141], |
|
|
..., |
|
|
[-0.1610, -0.1225, -0.0914, ..., 0.0478, 0.0508, 0.0292], |
|
|
[ 0.1298, 0.0948, 0.0930, ..., -0.0687, -0.0847, -0.0750], |
|
|
[ 0.0085, 0.0278, -0.0139, ..., -0.0716, -0.0310, -0.0463]], |
|
|
device='cuda:0'), grad: tensor([[ 1.7147e-03, 3.0088e-04, 3.9959e-04, ..., 4.1032e-04, |
|
|
6.5947e-04, 6.2037e-04], |
|
|
[ 1.4715e-05, 4.4033e-06, 4.8503e-06, ..., 2.9411e-06, |
|
|
4.1239e-06, 4.0755e-06], |
|
|
[-2.1495e-06, 1.5786e-06, 2.2314e-06, ..., -4.2655e-06, |
|
|
-2.8461e-06, -3.6471e-06], |
|
|
..., |
|
|
[-1.7195e-03, -2.9278e-04, -3.9339e-04, ..., -4.1509e-04, |
|
|
-6.6853e-04, -6.2752e-04], |
|
|
[ 2.4036e-05, 1.1437e-05, 1.1727e-05, ..., 2.2370e-06, |
|
|
3.2187e-06, 3.4068e-06], |
|
|
[-5.2631e-05, -2.8491e-05, -2.8670e-05, ..., -3.0771e-06, |
|
|
-4.1500e-06, -4.8578e-06]], device='cuda:0') |
|
|
Epoch 16, bias, value: tensor([ 0.0440, -0.0177, 0.0296, 0.0272, -0.0665, -0.0132, 0.0038], |
|
|
device='cuda:0'), grad: tensor([ 4.1809e-03, 3.2425e-05, -1.2070e-05, 5.0902e-05, -4.2076e-03, |
|
|
4.3541e-05, -8.7917e-05], device='cuda:0') |
|
|
588 |
|
|
0.008909157412340152 |
|
|
changing lr |
|
|
epoch 15, time 813.34, cls_loss 0.0266 cls_loss_mapping 0.0508 cls_loss_causal 0.5462 re_mapping 0.0484 re_causal 0.0513 |
|
|
Epoch 17, weight, value: tensor([[ 0.0715, 0.0649, 0.0553, ..., 0.0318, 0.0838, 0.0750], |
|
|
[ 0.0587, 0.0413, 0.0148, ..., -0.0222, -0.0224, -0.0192], |
|
|
[-0.0274, -0.0342, 0.0180, ..., 0.0116, -0.0450, -0.0154], |
|
|
..., |
|
|
[-0.1618, -0.1227, -0.0923, ..., 0.0472, 0.0501, 0.0288], |
|
|
[ 0.1293, 0.0936, 0.0919, ..., -0.0669, -0.0824, -0.0730], |
|
|
[ 0.0088, 0.0271, -0.0136, ..., -0.0700, -0.0305, -0.0454]], |
|
|
device='cuda:0'), grad: tensor([[ 8.0541e-06, 3.9004e-06, 2.7288e-06, ..., 2.1141e-06, |
|
|
1.3644e-06, 1.4128e-06], |
|
|
[-5.5343e-05, -2.2486e-05, -2.2516e-05, ..., -4.5486e-06, |
|
|
-6.7763e-06, -7.1824e-06], |
|
|
[ 1.2174e-05, 2.0973e-06, 3.1088e-06, ..., 2.5313e-06, |
|
|
3.2149e-06, 3.2037e-06], |
|
|
..., |
|
|
[ 4.1649e-06, 2.9355e-06, 3.1441e-06, ..., -4.7032e-07, |
|
|
-3.0920e-07, -2.5053e-07], |
|
|
[ 5.8673e-06, 1.9930e-06, 2.1309e-06, ..., 5.8115e-07, |
|
|
9.0804e-07, 9.4622e-07], |
|
|
[ 2.0936e-05, 7.8082e-06, 8.0466e-06, ..., 1.8356e-06, |
|
|
2.7772e-06, 2.9653e-06]], device='cuda:0') |
|
|
Epoch 17, bias, value: tensor([ 0.0460, -0.0192, 0.0244, 0.0282, -0.0692, -0.0082, 0.0049], |
|
|
device='cuda:0'), grad: tensor([ 1.7703e-05, -1.0329e-04, 3.1352e-05, 2.3097e-06, -1.1222e-06, |
|
|
1.2226e-05, 4.0591e-05], device='cuda:0') |
|
|
588 |
|
|
0.00876535733001806 |
|
|
changing lr |
|
|
---------------------saving model at epoch 16---------------------------------------------------- |
|
|
epoch 16, time 816.99, cls_loss 0.0211 cls_loss_mapping 0.0483 cls_loss_causal 0.5618 re_mapping 0.0473 re_causal 0.0506 |
|
|
Epoch 18, weight, value: tensor([[ 0.0775, 0.0697, 0.0607, ..., 0.0303, 0.0809, 0.0726], |
|
|
[ 0.0547, 0.0376, 0.0118, ..., -0.0215, -0.0212, -0.0183], |
|
|
[-0.0281, -0.0353, 0.0158, ..., 0.0125, -0.0429, -0.0142], |
|
|
..., |
|
|
[-0.1592, -0.1212, -0.0916, ..., 0.0467, 0.0500, 0.0292], |
|
|
[ 0.1258, 0.0916, 0.0898, ..., -0.0655, -0.0808, -0.0716], |
|
|
[ 0.0085, 0.0268, -0.0133, ..., -0.0684, -0.0301, -0.0447]], |
|
|
device='cuda:0'), grad: tensor([[ 1.4365e-04, 5.3078e-05, 5.5641e-05, ..., 8.5086e-06, |
|
|
1.4983e-05, 1.8373e-05], |
|
|
[ 4.0591e-05, 1.3851e-05, 1.5192e-05, ..., 4.5113e-06, |
|
|
6.4410e-06, 7.2829e-06], |
|
|
[ 1.2779e-04, 3.4571e-05, 3.9786e-05, ..., 2.5526e-05, |
|
|
2.9683e-05, 3.1978e-05], |
|
|
..., |
|
|
[ 1.4191e-03, 5.2643e-04, 5.5504e-04, ..., 8.7798e-05, |
|
|
1.5748e-04, 1.9002e-04], |
|
|
[-5.7945e-03, -2.1477e-03, -2.2564e-03, ..., -3.5357e-04, |
|
|
-6.3562e-04, -7.6866e-04], |
|
|
[ 3.9864e-03, 1.4734e-03, 1.5469e-03, ..., 2.4414e-04, |
|
|
4.3726e-04, 5.2881e-04]], device='cuda:0') |
|
|
Epoch 18, bias, value: tensor([ 0.0497, -0.0176, 0.0255, 0.0224, -0.0677, -0.0103, 0.0047], |
|
|
device='cuda:0'), grad: tensor([ 2.6631e-04, 7.9513e-05, 2.7847e-04, 1.0753e-04, 2.6379e-03, |
|
|
-1.0796e-02, 7.4348e-03], device='cuda:0') |
|
|
588 |
|
|
0.008613974319136962 |
|
|
changing lr |
|
|
epoch 17, time 818.76, cls_loss 0.0146 cls_loss_mapping 0.0373 cls_loss_causal 0.5409 re_mapping 0.0453 re_causal 0.0490 |
|
|
Epoch 19, weight, value: tensor([[ 0.0752, 0.0691, 0.0600, ..., 0.0290, 0.0787, 0.0707], |
|
|
[ 0.0547, 0.0370, 0.0118, ..., -0.0211, -0.0202, -0.0175], |
|
|
[-0.0266, -0.0350, 0.0150, ..., 0.0123, -0.0417, -0.0137], |
|
|
..., |
|
|
[-0.1582, -0.1196, -0.0907, ..., 0.0456, 0.0486, 0.0283], |
|
|
[ 0.1242, 0.0905, 0.0887, ..., -0.0641, -0.0790, -0.0700], |
|
|
[ 0.0071, 0.0258, -0.0134, ..., -0.0670, -0.0298, -0.0441]], |
|
|
device='cuda:0'), grad: tensor([[-1.0096e-06, -1.2126e-06, -1.1018e-06, ..., 3.4366e-07, |
|
|
2.8312e-07, 1.9837e-07], |
|
|
[ 7.3351e-06, 5.9605e-07, 1.2675e-06, ..., 3.3565e-06, |
|
|
4.1500e-06, 4.0606e-06], |
|
|
[-5.8636e-06, 4.5262e-07, -1.9465e-07, ..., -4.3362e-06, |
|
|
-5.1446e-06, -5.1446e-06], |
|
|
..., |
|
|
[ 3.3751e-06, 1.1064e-06, 1.2377e-06, ..., 1.1427e-06, |
|
|
1.4827e-06, 1.4231e-06], |
|
|
[ 3.0696e-06, 1.0030e-06, 1.0589e-06, ..., 3.2783e-07, |
|
|
5.2527e-07, 5.0385e-07], |
|
|
[-3.3528e-06, -1.4165e-06, -1.3765e-06, ..., 1.2759e-07, |
|
|
-2.5146e-08, 1.8626e-09]], device='cuda:0') |
|
|
Epoch 19, bias, value: tensor([ 0.0439, -0.0119, 0.0294, 0.0249, -0.0726, -0.0098, 0.0027], |
|
|
device='cuda:0'), grad: tensor([ 5.2713e-07, 1.9491e-05, -1.6972e-05, -8.5086e-06, 4.9025e-06, |
|
|
5.8562e-06, -5.3048e-06], device='cuda:0') |
|
|
588 |
|
|
0.008455313244934327 |
|
|
changing lr |
|
|
---------------------saving model at epoch 18---------------------------------------------------- |
|
|
epoch 18, time 814.80, cls_loss 0.0140 cls_loss_mapping 0.0346 cls_loss_causal 0.5277 re_mapping 0.0424 re_causal 0.0464 |
|
|
Epoch 20, weight, value: tensor([[ 0.0781, 0.0697, 0.0611, ..., 0.0294, 0.0788, 0.0709], |
|
|
[ 0.0527, 0.0364, 0.0118, ..., -0.0212, -0.0206, -0.0179], |
|
|
[-0.0272, -0.0350, 0.0138, ..., 0.0120, -0.0409, -0.0136], |
|
|
..., |
|
|
[-0.1548, -0.1181, -0.0897, ..., 0.0448, 0.0481, 0.0283], |
|
|
[ 0.1220, 0.0894, 0.0875, ..., -0.0628, -0.0776, -0.0687], |
|
|
[ 0.0063, 0.0250, -0.0135, ..., -0.0657, -0.0294, -0.0434]], |
|
|
device='cuda:0'), grad: tensor([[ 4.7296e-05, 1.2435e-05, 1.3180e-05, ..., 8.9556e-06, |
|
|
1.2085e-05, 1.2808e-05], |
|
|
[ 2.1249e-05, 3.6787e-06, 5.5023e-06, ..., 7.8008e-06, |
|
|
8.6352e-06, 9.1121e-06], |
|
|
[-9.3341e-05, -2.8506e-05, -2.8655e-05, ..., -1.3091e-05, |
|
|
-1.9684e-05, -2.0817e-05], |
|
|
..., |
|
|
[ 2.8729e-05, 9.0674e-06, 9.4250e-06, ..., 3.9823e-06, |
|
|
4.9956e-06, 5.6550e-06], |
|
|
[ 8.0541e-06, 2.3283e-06, 2.3935e-06, ..., 1.2824e-06, |
|
|
1.7788e-06, 1.9064e-06], |
|
|
[ 5.2862e-06, 1.3188e-06, 1.4612e-06, ..., 1.1791e-06, |
|
|
1.5628e-06, 1.6280e-06]], device='cuda:0') |
|
|
Epoch 20, bias, value: tensor([ 0.0500, -0.0137, 0.0276, 0.0195, -0.0668, -0.0109, 0.0007], |
|
|
device='cuda:0'), grad: tensor([ 1.1319e-04, 5.6565e-05, -2.1183e-04, -5.3734e-05, 6.3956e-05, |
|
|
1.8641e-05, 1.2890e-05], device='cuda:0') |
|
|
588 |
|
|
0.008289693629698565 |
|
|
changing lr |
|
|
epoch 19, time 817.70, cls_loss 0.0107 cls_loss_mapping 0.0303 cls_loss_causal 0.4927 re_mapping 0.0397 re_causal 0.0443 |
|
|
Epoch 21, weight, value: tensor([[ 0.0791, 0.0704, 0.0623, ..., 0.0290, 0.0777, 0.0699], |
|
|
[ 0.0500, 0.0352, 0.0110, ..., -0.0210, -0.0206, -0.0179], |
|
|
[-0.0248, -0.0341, 0.0136, ..., 0.0118, -0.0396, -0.0130], |
|
|
..., |
|
|
[-0.1541, -0.1172, -0.0894, ..., 0.0437, 0.0465, 0.0273], |
|
|
[ 0.1196, 0.0874, 0.0854, ..., -0.0615, -0.0760, -0.0673], |
|
|
[ 0.0064, 0.0247, -0.0130, ..., -0.0642, -0.0286, -0.0423]], |
|
|
device='cuda:0'), grad: tensor([[-3.9160e-05, -1.7390e-05, -2.0787e-05, ..., -1.6809e-05, |
|
|
-2.4855e-05, -2.4080e-05], |
|
|
[ 3.4142e-04, 8.4937e-05, 9.1851e-05, ..., 4.8280e-05, |
|
|
8.7559e-05, 8.2612e-05], |
|
|
[-1.4208e-05, 3.8259e-06, 3.5446e-06, ..., -7.3127e-06, |
|
|
-8.6874e-06, -8.7619e-06], |
|
|
..., |
|
|
[ 1.3396e-05, 3.8035e-06, 4.4145e-06, ..., 3.3602e-06, |
|
|
5.0776e-06, 4.9211e-06], |
|
|
[-2.9411e-06, -3.5223e-06, -4.0568e-06, ..., 6.3144e-07, |
|
|
1.1791e-06, 1.0673e-06], |
|
|
[-3.4285e-04, -7.9155e-05, -8.3864e-05, ..., -4.0293e-05, |
|
|
-7.7784e-05, -7.2777e-05]], device='cuda:0') |
|
|
Epoch 21, bias, value: tensor([ 0.0515, -0.0168, 0.0326, 0.0178, -0.0700, -0.0094, 0.0005], |
|
|
device='cuda:0'), grad: tensor([-7.2360e-05, 8.0204e-04, -5.6326e-05, 1.1629e-04, 3.0488e-05, |
|
|
1.3709e-06, -8.2064e-04], device='cuda:0') |
|
|
588 |
|
|
0.00811744900929367 |
|
|
changing lr |
|
|
---------------------saving model at epoch 20---------------------------------------------------- |
|
|
epoch 20, time 819.64, cls_loss 0.0094 cls_loss_mapping 0.0289 cls_loss_causal 0.4921 re_mapping 0.0375 re_causal 0.0426 |
|
|
Epoch 22, weight, value: tensor([[ 0.0755, 0.0697, 0.0617, ..., 0.0272, 0.0746, 0.0672], |
|
|
[ 0.0486, 0.0338, 0.0103, ..., -0.0204, -0.0197, -0.0172], |
|
|
[-0.0257, -0.0337, 0.0127, ..., 0.0108, -0.0398, -0.0138], |
|
|
..., |
|
|
[-0.1503, -0.1157, -0.0884, ..., 0.0436, 0.0470, 0.0282], |
|
|
[ 0.1188, 0.0868, 0.0848, ..., -0.0600, -0.0743, -0.0659], |
|
|
[ 0.0069, 0.0244, -0.0126, ..., -0.0629, -0.0281, -0.0414]], |
|
|
device='cuda:0'), grad: tensor([[-1.3173e-04, -6.0469e-05, -6.1989e-05, ..., -9.6112e-06, |
|
|
-2.5392e-05, -2.3678e-05], |
|
|
[-4.2051e-05, -3.4384e-06, -4.9137e-06, ..., -1.7628e-05, |
|
|
-2.1622e-05, -2.1964e-05], |
|
|
[ 2.7984e-05, 1.1958e-05, 1.2219e-05, ..., 3.5465e-06, |
|
|
6.8322e-06, 6.5304e-06], |
|
|
..., |
|
|
[ 9.0480e-05, 3.0965e-05, 3.3319e-05, ..., 1.5631e-05, |
|
|
2.5705e-05, 2.5153e-05], |
|
|
[ 1.5885e-05, 5.9344e-06, 6.1505e-06, ..., 2.3320e-06, |
|
|
4.1239e-06, 3.9600e-06], |
|
|
[ 3.3468e-05, 1.1489e-05, 1.2018e-05, ..., 5.7481e-06, |
|
|
9.4101e-06, 9.1642e-06]], device='cuda:0') |
|
|
Epoch 22, bias, value: tensor([ 0.0416, -0.0155, 0.0282, 0.0205, -0.0621, -0.0081, 0.0015], |
|
|
device='cuda:0'), grad: tensor([-2.2805e-04, -1.2910e-04, 5.2780e-05, 7.2978e-06, 1.9133e-04, |
|
|
3.2693e-05, 7.3075e-05], device='cuda:0') |
|
|
588 |
|
|
0.007938926261462368 |
|
|
changing lr |
|
|
epoch 21, time 817.08, cls_loss 0.0062 cls_loss_mapping 0.0266 cls_loss_causal 0.4959 re_mapping 0.0337 re_causal 0.0396 |
|
|
Epoch 23, weight, value: tensor([[ 0.0774, 0.0699, 0.0624, ..., 0.0271, 0.0741, 0.0668], |
|
|
[ 0.0476, 0.0338, 0.0106, ..., -0.0205, -0.0197, -0.0172], |
|
|
[-0.0258, -0.0335, 0.0118, ..., 0.0105, -0.0391, -0.0137], |
|
|
..., |
|
|
[-0.1503, -0.1148, -0.0882, ..., 0.0425, 0.0455, 0.0272], |
|
|
[ 0.1166, 0.0855, 0.0835, ..., -0.0589, -0.0731, -0.0647], |
|
|
[ 0.0065, 0.0236, -0.0126, ..., -0.0617, -0.0276, -0.0406]], |
|
|
device='cuda:0'), grad: tensor([[-2.8193e-05, -1.7360e-05, -2.0295e-05, ..., 4.8876e-06, |
|
|
2.8685e-07, 0.0000e+00], |
|
|
[ 3.0443e-05, 5.6103e-06, 8.6799e-06, ..., 1.1146e-05, |
|
|
1.4178e-05, 1.3806e-05], |
|
|
[ 5.9158e-06, 1.6000e-06, 2.3283e-06, ..., 3.2876e-06, |
|
|
3.8818e-06, 3.7644e-06], |
|
|
..., |
|
|
[ 2.7075e-05, 8.2180e-06, 1.0900e-05, ..., 4.9397e-06, |
|
|
8.1584e-06, 8.0317e-06], |
|
|
[ 1.0602e-05, 2.5760e-06, 3.6433e-06, ..., 3.0547e-06, |
|
|
4.2059e-06, 4.1127e-06], |
|
|
[ 1.5199e-05, 4.4219e-06, 5.9418e-06, ..., 3.2336e-06, |
|
|
5.0031e-06, 4.9137e-06]], device='cuda:0') |
|
|
Epoch 23, bias, value: tensor([ 0.0455, -0.0160, 0.0273, 0.0216, -0.0653, -0.0090, 0.0019], |
|
|
device='cuda:0'), grad: tensor([-3.4958e-05, 7.6354e-05, 1.3001e-05, -1.7130e-04, 5.8532e-05, |
|
|
2.4796e-05, 3.3438e-05], device='cuda:0') |
|
|
588 |
|
|
0.007754484907260515 |
|
|
changing lr |
|
|
epoch 22, time 813.28, cls_loss 0.0129 cls_loss_mapping 0.0309 cls_loss_causal 0.4926 re_mapping 0.0328 re_causal 0.0391 |
|
|
Epoch 24, weight, value: tensor([[ 0.0758, 0.0697, 0.0622, ..., 0.0260, 0.0721, 0.0649], |
|
|
[ 0.0471, 0.0335, 0.0109, ..., -0.0200, -0.0191, -0.0167], |
|
|
[-0.0247, -0.0333, 0.0110, ..., 0.0097, -0.0386, -0.0138], |
|
|
..., |
|
|
[-0.1486, -0.1136, -0.0874, ..., 0.0423, 0.0453, 0.0273], |
|
|
[ 0.1150, 0.0843, 0.0823, ..., -0.0578, -0.0717, -0.0635], |
|
|
[ 0.0080, 0.0234, -0.0121, ..., -0.0597, -0.0263, -0.0389]], |
|
|
device='cuda:0'), grad: tensor([[-2.2335e-03, -7.6771e-04, -8.5497e-04, ..., -2.6250e-04, |
|
|
-6.4802e-04, -6.1846e-04], |
|
|
[ 5.7936e-04, 2.0504e-04, 2.0194e-04, ..., 7.2360e-05, |
|
|
1.7726e-04, 1.7560e-04], |
|
|
[ 1.0309e-03, 3.1829e-04, 3.8576e-04, ..., 1.1569e-04, |
|
|
2.8825e-04, 2.7323e-04], |
|
|
..., |
|
|
[ 5.9754e-05, 5.4240e-05, 6.3658e-05, ..., 6.6273e-06, |
|
|
1.5646e-05, 7.1041e-06], |
|
|
[ 7.4506e-05, 2.3946e-05, 2.7284e-05, ..., 8.6576e-06, |
|
|
2.1428e-05, 2.0623e-05], |
|
|
[ 3.7026e-04, 1.2684e-04, 1.2946e-04, ..., 4.5449e-05, |
|
|
1.1164e-04, 1.1009e-04]], device='cuda:0') |
|
|
Epoch 24, bias, value: tensor([ 0.0413, -0.0152, 0.0281, 0.0178, -0.0638, -0.0087, 0.0063], |
|
|
device='cuda:0'), grad: tensor([-4.5090e-03, 1.1473e-03, 2.1763e-03, 2.4605e-04, 3.8803e-05, |
|
|
1.5461e-04, 7.4577e-04], device='cuda:0') |
|
|
588 |
|
|
0.007564496387029534 |
|
|
changing lr |
|
|
epoch 23, time 838.63, cls_loss 0.0068 cls_loss_mapping 0.0268 cls_loss_causal 0.4974 re_mapping 0.0309 re_causal 0.0379 |
|
|
Epoch 25, weight, value: tensor([[ 0.0772, 0.0701, 0.0627, ..., 0.0253, 0.0711, 0.0641], |
|
|
[ 0.0474, 0.0329, 0.0109, ..., -0.0192, -0.0180, -0.0156], |
|
|
[-0.0240, -0.0325, 0.0112, ..., 0.0092, -0.0379, -0.0136], |
|
|
..., |
|
|
[-0.1461, -0.1120, -0.0863, ..., 0.0420, 0.0448, 0.0272], |
|
|
[ 0.1120, 0.0828, 0.0807, ..., -0.0569, -0.0707, -0.0627], |
|
|
[ 0.0052, 0.0222, -0.0129, ..., -0.0591, -0.0268, -0.0390]], |
|
|
device='cuda:0'), grad: tensor([[-5.1880e-04, -1.9383e-04, -1.9336e-04, ..., -4.0913e-04, |
|
|
-5.2977e-04, -5.5504e-04], |
|
|
[ 2.3937e-04, 4.2588e-05, 5.4538e-05, ..., 1.3030e-04, |
|
|
1.8048e-04, 1.8859e-04], |
|
|
[ 6.7043e-04, 5.6118e-05, 1.1438e-04, ..., 1.1480e-04, |
|
|
2.1446e-04, 2.1780e-04], |
|
|
..., |
|
|
[-7.0429e-04, 3.7283e-05, -5.2094e-05, ..., 6.7055e-05, |
|
|
-1.4283e-05, -4.5113e-06], |
|
|
[ 8.7440e-05, 1.3962e-05, 1.9893e-05, ..., 2.1994e-05, |
|
|
3.5554e-05, 3.6418e-05], |
|
|
[ 1.3733e-04, 2.6837e-05, 3.4779e-05, ..., 4.2856e-05, |
|
|
6.5029e-05, 6.6936e-05]], device='cuda:0') |
|
|
Epoch 25, bias, value: tensor([ 0.0444, -0.0117, 0.0283, 0.0172, -0.0612, -0.0112, -0.0002], |
|
|
device='cuda:0'), grad: tensor([-0.0020, 0.0009, 0.0019, 0.0003, -0.0017, 0.0002, 0.0004], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.007369343312364995 |
|
|
changing lr |
|
|
epoch 24, time 812.60, cls_loss 0.0083 cls_loss_mapping 0.0233 cls_loss_causal 0.4944 re_mapping 0.0281 re_causal 0.0354 |
|
|
Epoch 26, weight, value: tensor([[ 0.0793, 0.0713, 0.0640, ..., 0.0250, 0.0705, 0.0636], |
|
|
[ 0.0455, 0.0320, 0.0104, ..., -0.0190, -0.0178, -0.0154], |
|
|
[-0.0262, -0.0326, 0.0101, ..., 0.0075, -0.0389, -0.0151], |
|
|
..., |
|
|
[-0.1435, -0.1111, -0.0858, ..., 0.0423, 0.0453, 0.0279], |
|
|
[ 0.1096, 0.0815, 0.0795, ..., -0.0560, -0.0697, -0.0618], |
|
|
[ 0.0047, 0.0217, -0.0128, ..., -0.0581, -0.0266, -0.0386]], |
|
|
device='cuda:0'), grad: tensor([[-1.9336e-04, -8.9705e-05, -1.1009e-04, ..., -4.8935e-05, |
|
|
-6.3360e-05, -5.6565e-05], |
|
|
[ 1.6642e-04, 6.4254e-05, 7.9572e-05, ..., 4.3362e-05, |
|
|
5.6803e-05, 5.0813e-05], |
|
|
[-5.6922e-06, -6.2399e-08, -2.3283e-07, ..., -2.2445e-07, |
|
|
-1.1520e-06, -1.0673e-06], |
|
|
..., |
|
|
[ 2.4468e-05, 1.0535e-05, 1.3068e-05, ..., 6.4000e-06, |
|
|
8.0764e-06, 7.2680e-06], |
|
|
[ 2.3529e-05, 9.8720e-06, 1.1981e-05, ..., 5.3756e-06, |
|
|
7.2233e-06, 6.5342e-06], |
|
|
[-4.8280e-06, -1.2685e-06, -8.4285e-07, ..., 8.8289e-07, |
|
|
5.6252e-07, 1.9465e-07]], device='cuda:0') |
|
|
Epoch 26, bias, value: tensor([ 0.0483, -0.0136, 0.0214, 0.0208, -0.0566, -0.0131, -0.0017], |
|
|
device='cuda:0'), grad: tensor([-5.5456e-04, 4.6062e-04, -7.7561e-06, -2.8417e-05, 6.7294e-05, |
|
|
6.2883e-05, -7.5717e-07], device='cuda:0') |
|
|
588 |
|
|
0.0071694186955877925 |
|
|
changing lr |
|
|
epoch 25, time 815.09, cls_loss 0.0045 cls_loss_mapping 0.0209 cls_loss_causal 0.4697 re_mapping 0.0255 re_causal 0.0334 |
|
|
Epoch 27, weight, value: tensor([[ 0.0799, 0.0712, 0.0642, ..., 0.0250, 0.0700, 0.0632], |
|
|
[ 0.0423, 0.0301, 0.0090, ..., -0.0189, -0.0177, -0.0154], |
|
|
[-0.0227, -0.0313, 0.0106, ..., 0.0076, -0.0376, -0.0143], |
|
|
..., |
|
|
[-0.1438, -0.1101, -0.0854, ..., 0.0410, 0.0437, 0.0268], |
|
|
[ 0.1093, 0.0812, 0.0792, ..., -0.0550, -0.0684, -0.0607], |
|
|
[ 0.0045, 0.0212, -0.0128, ..., -0.0570, -0.0263, -0.0380]], |
|
|
device='cuda:0'), grad: tensor([[ 7.5102e-04, 2.4223e-04, 2.5082e-04, ..., 8.1003e-05, |
|
|
1.7059e-04, 1.6427e-04], |
|
|
[-2.1534e-03, -4.9210e-04, -5.3501e-04, ..., -4.0579e-04, |
|
|
-7.5388e-04, -7.0095e-04], |
|
|
[ 6.5565e-04, 1.3733e-04, 1.5342e-04, ..., 1.4067e-04, |
|
|
2.4140e-04, 2.2197e-04], |
|
|
..., |
|
|
[ 4.6134e-04, 1.2887e-04, 1.3399e-04, ..., 6.0886e-05, |
|
|
1.3816e-04, 1.3185e-04], |
|
|
[ 1.4603e-04, 4.7207e-05, 4.9174e-05, ..., 1.7181e-05, |
|
|
3.4273e-05, 3.2783e-05], |
|
|
[-2.2185e-04, -1.3363e-04, -1.3316e-04, ..., 1.8224e-05, |
|
|
2.9132e-05, 2.1964e-05]], device='cuda:0') |
|
|
Epoch 27, bias, value: tensor([ 0.0511, -0.0171, 0.0282, 0.0184, -0.0614, -0.0114, -0.0023], |
|
|
device='cuda:0'), grad: tensor([ 0.0015, -0.0049, 0.0015, 0.0008, 0.0010, 0.0003, -0.0003], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.0069651251582696205 |
|
|
changing lr |
|
|
epoch 26, time 811.56, cls_loss 0.0048 cls_loss_mapping 0.0252 cls_loss_causal 0.4728 re_mapping 0.0231 re_causal 0.0307 |
|
|
Epoch 28, weight, value: tensor([[ 0.0781, 0.0708, 0.0640, ..., 0.0244, 0.0687, 0.0620], |
|
|
[ 0.0419, 0.0297, 0.0090, ..., -0.0184, -0.0171, -0.0149], |
|
|
[-0.0219, -0.0308, 0.0105, ..., 0.0071, -0.0371, -0.0143], |
|
|
..., |
|
|
[-0.1406, -0.1084, -0.0842, ..., 0.0407, 0.0434, 0.0268], |
|
|
[ 0.1079, 0.0803, 0.0783, ..., -0.0541, -0.0674, -0.0597], |
|
|
[ 0.0038, 0.0204, -0.0132, ..., -0.0560, -0.0258, -0.0374]], |
|
|
device='cuda:0'), grad: tensor([[ 2.9993e-04, 5.0426e-05, 7.1168e-05, ..., 6.6698e-05, |
|
|
1.1802e-04, 1.0926e-04], |
|
|
[ 6.8903e-05, 8.4564e-06, 1.6898e-05, ..., 2.1636e-05, |
|
|
3.6865e-05, 3.4124e-05], |
|
|
[-2.8992e-04, -4.8310e-05, -7.9393e-05, ..., -7.9989e-05, |
|
|
-1.5891e-04, -1.5199e-04], |
|
|
..., |
|
|
[-1.2755e-04, -2.4274e-05, -2.2009e-05, ..., -1.2144e-05, |
|
|
-2.3648e-05, -2.0996e-05], |
|
|
[ 3.1620e-05, 5.1260e-06, 8.0764e-06, ..., 8.0988e-06, |
|
|
1.3381e-05, 1.2286e-05], |
|
|
[ 5.0277e-05, 7.9572e-06, 1.3076e-05, ..., 1.3307e-05, |
|
|
2.3946e-05, 2.2426e-05]], device='cuda:0') |
|
|
Epoch 28, bias, value: tensor([ 0.0468, -0.0160, 0.0291, 0.0164, -0.0566, -0.0112, -0.0030], |
|
|
device='cuda:0'), grad: tensor([ 8.6927e-04, 2.4223e-04, -1.1597e-03, 3.6865e-05, -2.4700e-04, |
|
|
9.1076e-05, 1.6832e-04], device='cuda:0') |
|
|
588 |
|
|
0.006756874120406716 |
|
|
changing lr |
|
|
epoch 27, time 812.10, cls_loss 0.0076 cls_loss_mapping 0.0221 cls_loss_causal 0.4726 re_mapping 0.0227 re_causal 0.0314 |
|
|
Epoch 29, weight, value: tensor([[ 0.0767, 0.0703, 0.0636, ..., 0.0237, 0.0673, 0.0607], |
|
|
[ 0.0415, 0.0293, 0.0090, ..., -0.0181, -0.0167, -0.0145], |
|
|
[-0.0233, -0.0309, 0.0096, ..., 0.0065, -0.0370, -0.0145], |
|
|
..., |
|
|
[-0.1376, -0.1069, -0.0831, ..., 0.0406, 0.0437, 0.0272], |
|
|
[ 0.1074, 0.0792, 0.0773, ..., -0.0532, -0.0663, -0.0588], |
|
|
[ 0.0036, 0.0202, -0.0129, ..., -0.0553, -0.0257, -0.0370]], |
|
|
device='cuda:0'), grad: tensor([[ 4.3094e-05, 5.2899e-06, 8.3968e-06, ..., 3.5278e-06, |
|
|
6.4224e-06, 6.6943e-06], |
|
|
[ 1.5283e-04, 2.0862e-05, 3.1590e-05, ..., 1.8626e-05, |
|
|
2.8476e-05, 2.9594e-05], |
|
|
[ 3.7730e-05, 5.0962e-06, 7.6368e-06, ..., 5.5395e-06, |
|
|
8.8438e-06, 8.7544e-06], |
|
|
..., |
|
|
[-1.3523e-06, 7.4506e-08, 1.4901e-07, ..., -4.0233e-06, |
|
|
-7.0371e-06, -5.8599e-06], |
|
|
[ 4.2766e-05, 6.2548e-06, 9.1791e-06, ..., 2.9728e-06, |
|
|
5.7705e-06, 6.0350e-06], |
|
|
[ 9.9599e-05, 1.4491e-05, 2.1368e-05, ..., 5.8301e-06, |
|
|
1.1764e-05, 1.2614e-05]], device='cuda:0') |
|
|
Epoch 29, bias, value: tensor([ 0.0437, -0.0154, 0.0251, 0.0160, -0.0519, -0.0080, -0.0043], |
|
|
device='cuda:0'), grad: tensor([ 1.0687e-04, 3.7146e-04, 9.4533e-05, -9.0218e-04, -1.3970e-05, |
|
|
1.0341e-04, 2.3937e-04], device='cuda:0') |
|
|
588 |
|
|
0.00654508497187474 |
|
|
changing lr |
|
|
epoch 28, time 813.77, cls_loss 0.0030 cls_loss_mapping 0.0186 cls_loss_causal 0.4803 re_mapping 0.0193 re_causal 0.0285 |
|
|
Epoch 30, weight, value: tensor([[ 0.0781, 0.0712, 0.0646, ..., 0.0232, 0.0664, 0.0599], |
|
|
[ 0.0420, 0.0290, 0.0091, ..., -0.0173, -0.0157, -0.0134], |
|
|
[-0.0223, -0.0306, 0.0091, ..., 0.0059, -0.0366, -0.0144], |
|
|
..., |
|
|
[-0.1383, -0.1062, -0.0829, ..., 0.0398, 0.0426, 0.0264], |
|
|
[ 0.1040, 0.0778, 0.0758, ..., -0.0526, -0.0658, -0.0584], |
|
|
[ 0.0040, 0.0198, -0.0128, ..., -0.0544, -0.0254, -0.0364]], |
|
|
device='cuda:0'), grad: tensor([[-4.5508e-05, -2.1264e-05, -2.1949e-05, ..., -2.3674e-06, |
|
|
-5.2936e-06, -4.8615e-06], |
|
|
[-2.9489e-05, -6.8396e-06, -6.5006e-06, ..., -4.3102e-06, |
|
|
-1.1213e-05, -1.0550e-05], |
|
|
[-2.6766e-06, -2.5257e-06, -2.3134e-06, ..., 2.0117e-07, |
|
|
1.0058e-07, -2.6077e-08], |
|
|
..., |
|
|
[ 3.0696e-05, 1.1295e-05, 1.2062e-05, ..., 3.9376e-06, |
|
|
6.7912e-06, 6.2697e-06], |
|
|
[ 1.0431e-05, 3.5726e-06, 3.8259e-06, ..., 1.7378e-06, |
|
|
2.6803e-06, 2.4978e-06], |
|
|
[ 4.9114e-05, 1.3009e-05, 1.3947e-05, ..., 9.2536e-06, |
|
|
1.6272e-05, 1.5147e-05]], device='cuda:0') |
|
|
Epoch 30, bias, value: tensor([ 0.0453, -0.0113, 0.0267, 0.0166, -0.0566, -0.0127, -0.0029], |
|
|
device='cuda:0'), grad: tensor([-7.7009e-05, -7.4744e-05, -4.2170e-06, -4.6015e-05, 6.2525e-05, |
|
|
2.2516e-05, 1.1718e-04], device='cuda:0') |
|
|
588 |
|
|
0.006330184227833378 |
|
|
changing lr |
|
|
epoch 29, time 812.07, cls_loss 0.0059 cls_loss_mapping 0.0181 cls_loss_causal 0.4874 re_mapping 0.0183 re_causal 0.0279 |
|
|
Epoch 31, weight, value: tensor([[ 0.0769, 0.0709, 0.0645, ..., 0.0224, 0.0651, 0.0587], |
|
|
[ 0.0414, 0.0288, 0.0094, ..., -0.0171, -0.0156, -0.0134], |
|
|
[-0.0222, -0.0306, 0.0084, ..., 0.0058, -0.0357, -0.0140], |
|
|
..., |
|
|
[-0.1354, -0.1048, -0.0819, ..., 0.0395, 0.0425, 0.0266], |
|
|
[ 0.1018, 0.0766, 0.0746, ..., -0.0519, -0.0650, -0.0577], |
|
|
[ 0.0035, 0.0195, -0.0127, ..., -0.0537, -0.0253, -0.0361]], |
|
|
device='cuda:0'), grad: tensor([[ 3.9139e-03, 5.4932e-04, 3.8695e-04, ..., 3.5524e-04, |
|
|
7.6008e-04, 8.3113e-04], |
|
|
[-7.1466e-05, -4.0025e-05, -4.5657e-05, ..., -2.9542e-06, |
|
|
-7.9945e-06, -5.8375e-06], |
|
|
[ 1.5774e-03, 2.2268e-04, 1.6809e-04, ..., 1.9121e-04, |
|
|
3.5977e-04, 3.7146e-04], |
|
|
..., |
|
|
[-5.7487e-03, -7.9298e-04, -5.6410e-04, ..., -5.7840e-04, |
|
|
-1.1806e-03, -1.2665e-03], |
|
|
[ 4.4435e-05, 7.5325e-06, 6.5975e-06, ..., 4.9621e-06, |
|
|
9.7528e-06, 9.9167e-06], |
|
|
[ 1.0777e-04, 2.2024e-05, 2.0474e-05, ..., 1.0796e-05, |
|
|
2.1920e-05, 2.2098e-05]], device='cuda:0') |
|
|
Epoch 31, bias, value: tensor([ 0.0428, -0.0124, 0.0279, 0.0186, -0.0526, -0.0146, -0.0046], |
|
|
device='cuda:0'), grad: tensor([ 1.0109e-02, -8.3566e-05, 4.1885e-03, 4.4632e-04, -1.5038e-02, |
|
|
1.1331e-04, 2.6035e-04], device='cuda:0') |
|
|
588 |
|
|
0.006112604669781575 |
|
|
changing lr |
|
|
epoch 30, time 810.53, cls_loss 0.0028 cls_loss_mapping 0.0157 cls_loss_causal 0.4478 re_mapping 0.0176 re_causal 0.0272 |
|
|
Epoch 32, weight, value: tensor([[ 0.0778, 0.0712, 0.0649, ..., 0.0224, 0.0645, 0.0582], |
|
|
[ 0.0394, 0.0280, 0.0090, ..., -0.0171, -0.0157, -0.0135], |
|
|
[-0.0223, -0.0304, 0.0079, ..., 0.0056, -0.0352, -0.0139], |
|
|
..., |
|
|
[-0.1338, -0.1039, -0.0814, ..., 0.0390, 0.0420, 0.0264], |
|
|
[ 0.1008, 0.0758, 0.0739, ..., -0.0511, -0.0640, -0.0568], |
|
|
[ 0.0047, 0.0196, -0.0122, ..., -0.0528, -0.0248, -0.0354]], |
|
|
device='cuda:0'), grad: tensor([[ 1.5440e-03, 2.1601e-04, 3.2449e-04, ..., 5.2166e-04, |
|
|
7.2908e-04, 7.3481e-04], |
|
|
[ 8.5890e-05, 3.0577e-05, 2.9802e-05, ..., 9.7379e-06, |
|
|
1.2994e-05, 1.3396e-05], |
|
|
[ 1.4953e-05, 3.4757e-06, 2.5313e-06, ..., 4.3176e-06, |
|
|
5.6326e-06, 5.4464e-06], |
|
|
..., |
|
|
[-1.4734e-03, -1.8299e-04, -2.9349e-04, ..., -5.2118e-04, |
|
|
-7.2861e-04, -7.3338e-04], |
|
|
[ 5.3227e-05, 2.0131e-05, 2.0579e-05, ..., 4.8876e-06, |
|
|
7.8529e-06, 7.9647e-06], |
|
|
[-1.9717e-04, -8.7500e-05, -8.7082e-05, ..., -3.6918e-06, |
|
|
-1.1519e-05, -1.1921e-05]], device='cuda:0') |
|
|
Epoch 32, bias, value: tensor([ 0.0444, -0.0149, 0.0270, 0.0153, -0.0508, -0.0138, -0.0022], |
|
|
device='cuda:0'), grad: tensor([ 4.4670e-03, 1.7548e-04, 4.0948e-05, -9.1374e-05, -4.3602e-03, |
|
|
1.0496e-04, -3.3760e-04], device='cuda:0') |
|
|
588 |
|
|
0.005892784473993186 |
|
|
changing lr |
|
|
epoch 31, time 813.01, cls_loss 0.0045 cls_loss_mapping 0.0195 cls_loss_causal 0.4568 re_mapping 0.0153 re_causal 0.0249 |
|
|
Epoch 33, weight, value: tensor([[ 0.0805, 0.0723, 0.0664, ..., 0.0227, 0.0647, 0.0585], |
|
|
[ 0.0396, 0.0276, 0.0089, ..., -0.0171, -0.0156, -0.0135], |
|
|
[-0.0233, -0.0305, 0.0070, ..., 0.0051, -0.0351, -0.0142], |
|
|
..., |
|
|
[-0.1332, -0.1033, -0.0812, ..., 0.0383, 0.0413, 0.0259], |
|
|
[ 0.0997, 0.0750, 0.0731, ..., -0.0504, -0.0631, -0.0561], |
|
|
[ 0.0031, 0.0189, -0.0125, ..., -0.0522, -0.0249, -0.0353]], |
|
|
device='cuda:0'), grad: tensor([[-1.4362e-03, -3.2687e-04, -3.6669e-04, ..., -3.9530e-04, |
|
|
-6.0272e-04, -6.3896e-04], |
|
|
[ 8.1539e-05, 1.5169e-05, 1.7136e-05, ..., 1.8433e-05, |
|
|
3.4511e-05, 3.5882e-05], |
|
|
[ 2.3198e-04, 6.6042e-05, 6.8784e-05, ..., 7.9215e-05, |
|
|
1.0860e-04, 1.1557e-04], |
|
|
..., |
|
|
[ 2.8539e-04, 5.8323e-05, 6.8307e-05, ..., 6.3658e-05, |
|
|
1.0639e-04, 1.1194e-04], |
|
|
[ 3.8475e-05, 8.2403e-06, 9.5218e-06, ..., 7.7859e-06, |
|
|
1.3344e-05, 1.3851e-05], |
|
|
[ 1.0186e-04, 2.2292e-05, 2.5555e-05, ..., 2.1353e-05, |
|
|
3.5822e-05, 3.7313e-05]], device='cuda:0') |
|
|
Epoch 33, bias, value: tensor([ 0.0503, -0.0127, 0.0242, 0.0137, -0.0515, -0.0136, -0.0056], |
|
|
device='cuda:0'), grad: tensor([-3.3913e-03, 1.8573e-04, 5.0020e-04, 1.6565e-03, 7.0381e-04, |
|
|
9.3997e-05, 2.4843e-04], device='cuda:0') |
|
|
588 |
|
|
0.00567116632908828 |
|
|
changing lr |
|
|
epoch 32, time 813.20, cls_loss 0.0044 cls_loss_mapping 0.0188 cls_loss_causal 0.4588 re_mapping 0.0156 re_causal 0.0258 |
|
|
Epoch 34, weight, value: tensor([[ 0.0806, 0.0727, 0.0669, ..., 0.0224, 0.0639, 0.0578], |
|
|
[ 0.0380, 0.0269, 0.0086, ..., -0.0171, -0.0157, -0.0136], |
|
|
[-0.0225, -0.0304, 0.0066, ..., 0.0050, -0.0343, -0.0138], |
|
|
..., |
|
|
[-0.1327, -0.1026, -0.0809, ..., 0.0376, 0.0405, 0.0254], |
|
|
[ 0.0984, 0.0742, 0.0722, ..., -0.0497, -0.0624, -0.0554], |
|
|
[ 0.0033, 0.0186, -0.0124, ..., -0.0514, -0.0246, -0.0348]], |
|
|
device='cuda:0'), grad: tensor([[ 7.3204e-03, 2.9449e-03, 3.4027e-03, ..., 8.6594e-04, |
|
|
1.3466e-03, 1.3685e-03], |
|
|
[-8.4152e-03, -3.3875e-03, -3.9139e-03, ..., -9.9945e-04, |
|
|
-1.5469e-03, -1.5726e-03], |
|
|
[ 2.3198e-04, 7.9691e-05, 9.4295e-05, ..., 2.4244e-05, |
|
|
4.5002e-05, 4.4882e-05], |
|
|
..., |
|
|
[ 2.0623e-05, 3.5644e-05, 3.6538e-05, ..., 1.2584e-05, |
|
|
2.5295e-06, 4.0717e-06], |
|
|
[ 2.9469e-04, 1.1790e-04, 1.3626e-04, ..., 3.4988e-05, |
|
|
5.4449e-05, 5.5283e-05], |
|
|
[ 2.4796e-04, 9.5606e-05, 1.1057e-04, ..., 2.9609e-05, |
|
|
4.6521e-05, 4.6968e-05]], device='cuda:0') |
|
|
Epoch 34, bias, value: tensor([ 0.0493, -0.0150, 0.0264, 0.0159, -0.0530, -0.0139, -0.0048], |
|
|
device='cuda:0'), grad: tensor([ 1.3618e-02, -1.5640e-02, 4.8470e-04, 5.6648e-04, -6.4909e-05, |
|
|
5.5075e-04, 4.7946e-04], device='cuda:0') |
|
|
588 |
|
|
0.00544819654451717 |
|
|
changing lr |
|
|
epoch 33, time 818.78, cls_loss 0.0023 cls_loss_mapping 0.0129 cls_loss_causal 0.4522 re_mapping 0.0148 re_causal 0.0254 |
|
|
Epoch 35, weight, value: tensor([[ 0.0796, 0.0724, 0.0668, ..., 0.0219, 0.0628, 0.0569], |
|
|
[ 0.0381, 0.0267, 0.0087, ..., -0.0168, -0.0154, -0.0133], |
|
|
[-0.0228, -0.0304, 0.0060, ..., 0.0050, -0.0337, -0.0136], |
|
|
..., |
|
|
[-0.1316, -0.1018, -0.0804, ..., 0.0372, 0.0401, 0.0253], |
|
|
[ 0.0989, 0.0742, 0.0721, ..., -0.0490, -0.0614, -0.0546], |
|
|
[ 0.0030, 0.0181, -0.0125, ..., -0.0507, -0.0242, -0.0343]], |
|
|
device='cuda:0'), grad: tensor([[ 2.1183e-04, 5.8502e-05, 6.9916e-05, ..., 1.9699e-05, |
|
|
5.2303e-05, 4.9174e-05], |
|
|
[-8.1635e-04, -2.5177e-04, -3.0231e-04, ..., -5.6922e-05, |
|
|
-1.7893e-04, -1.6677e-04], |
|
|
[ 1.2612e-04, 2.9176e-05, 3.7014e-05, ..., 2.0146e-05, |
|
|
3.7521e-05, 3.6120e-05], |
|
|
..., |
|
|
[-2.9594e-05, 7.0855e-06, 7.8827e-06, ..., -1.1660e-05, |
|
|
-1.6466e-05, -1.6406e-05], |
|
|
[ 7.7128e-05, 2.2039e-05, 2.6911e-05, ..., 6.6794e-06, |
|
|
1.7837e-05, 1.6809e-05], |
|
|
[ 3.9768e-04, 1.2183e-04, 1.4615e-04, ..., 2.8759e-05, |
|
|
8.8274e-05, 8.2433e-05]], device='cuda:0') |
|
|
Epoch 35, bias, value: tensor([ 0.0468, -0.0135, 0.0256, 0.0136, -0.0524, -0.0108, -0.0047], |
|
|
device='cuda:0'), grad: tensor([ 4.9591e-04, -1.8606e-03, 3.1376e-04, 7.2539e-05, -1.0914e-04, |
|
|
1.8167e-04, 9.0694e-04], device='cuda:0') |
|
|
588 |
|
|
0.005224324151752577 |
|
|
changing lr |
|
|
epoch 34, time 809.73, cls_loss 0.0030 cls_loss_mapping 0.0131 cls_loss_causal 0.4321 re_mapping 0.0133 re_causal 0.0234 |
|
|
Epoch 36, weight, value: tensor([[ 0.0792, 0.0726, 0.0671, ..., 0.0215, 0.0620, 0.0561], |
|
|
[ 0.0384, 0.0266, 0.0090, ..., -0.0166, -0.0150, -0.0129], |
|
|
[-0.0234, -0.0303, 0.0054, ..., 0.0045, -0.0336, -0.0138], |
|
|
..., |
|
|
[-0.1299, -0.1010, -0.0799, ..., 0.0368, 0.0399, 0.0252], |
|
|
[ 0.0972, 0.0733, 0.0713, ..., -0.0484, -0.0608, -0.0540], |
|
|
[ 0.0034, 0.0177, -0.0125, ..., -0.0499, -0.0239, -0.0338]], |
|
|
device='cuda:0'), grad: tensor([[-3.4839e-05, -1.7911e-05, -1.8388e-05, ..., -9.1717e-06, |
|
|
-1.5274e-05, -1.3106e-05], |
|
|
[-4.1604e-05, -1.1638e-05, -1.3761e-05, ..., -7.8678e-06, |
|
|
-1.2323e-05, -1.2226e-05], |
|
|
[-2.9266e-05, -5.5470e-06, -6.1207e-06, ..., -4.1202e-06, |
|
|
-8.7023e-06, -8.1137e-06], |
|
|
..., |
|
|
[ 3.9935e-05, 1.6361e-05, 1.7539e-05, ..., 9.4622e-06, |
|
|
1.5900e-05, 1.4067e-05], |
|
|
[ 9.3356e-06, 2.7418e-06, 3.0492e-06, ..., 1.7481e-06, |
|
|
2.9411e-06, 2.8014e-06], |
|
|
[ 2.9460e-05, 9.2387e-06, 1.0267e-05, ..., 5.3309e-06, |
|
|
9.0525e-06, 8.5756e-06]], device='cuda:0') |
|
|
Epoch 36, bias, value: tensor([ 0.0453, -0.0114, 0.0233, 0.0128, -0.0499, -0.0123, -0.0032], |
|
|
device='cuda:0'), grad: tensor([-7.5638e-05, -9.5069e-05, -7.8142e-05, 6.7353e-05, 8.9824e-05, |
|
|
2.2471e-05, 6.9141e-05], device='cuda:0') |
|
|
588 |
|
|
0.005000000000000003 |
|
|
changing lr |
|
|
epoch 35, time 808.54, cls_loss 0.0037 cls_loss_mapping 0.0152 cls_loss_causal 0.4379 re_mapping 0.0126 re_causal 0.0233 |
|
|
Epoch 37, weight, value: tensor([[ 0.0787, 0.0727, 0.0673, ..., 0.0212, 0.0613, 0.0555], |
|
|
[ 0.0376, 0.0263, 0.0089, ..., -0.0166, -0.0151, -0.0130], |
|
|
[-0.0220, -0.0300, 0.0053, ..., 0.0042, -0.0331, -0.0135], |
|
|
..., |
|
|
[-0.1292, -0.1005, -0.0797, ..., 0.0367, 0.0396, 0.0252], |
|
|
[ 0.0957, 0.0724, 0.0704, ..., -0.0479, -0.0602, -0.0536], |
|
|
[ 0.0029, 0.0175, -0.0124, ..., -0.0495, -0.0241, -0.0338]], |
|
|
device='cuda:0'), grad: tensor([[ 1.1081e-04, 9.8497e-06, 2.6390e-05, ..., 5.8085e-05, |
|
|
7.8619e-05, 7.5281e-05], |
|
|
[ 2.9993e-04, 3.1650e-05, 7.5817e-05, ..., 1.5366e-04, |
|
|
2.0933e-04, 2.0015e-04], |
|
|
[ 6.7055e-05, 7.0333e-06, 1.7270e-05, ..., 3.8087e-05, |
|
|
5.1022e-05, 4.8697e-05], |
|
|
..., |
|
|
[ 3.2514e-05, 4.0419e-06, 8.8885e-06, ..., 1.5900e-05, |
|
|
2.1756e-05, 2.0742e-05], |
|
|
[ 2.3872e-05, 2.6692e-06, 6.0759e-06, ..., 1.1623e-05, |
|
|
1.6004e-05, 1.5318e-05], |
|
|
[ 9.1270e-06, -7.8883e-07, 1.5451e-06, ..., 7.1563e-06, |
|
|
9.1866e-06, 8.8215e-06]], device='cuda:0') |
|
|
Epoch 37, bias, value: tensor([ 0.0436, -0.0122, 0.0267, 0.0142, -0.0493, -0.0133, -0.0051], |
|
|
device='cuda:0'), grad: tensor([ 3.1781e-04, 8.4877e-04, 1.8942e-04, -1.5469e-03, 9.0361e-05, |
|
|
6.7234e-05, 3.1173e-05], device='cuda:0') |
|
|
588 |
|
|
0.004775675848247429 |
|
|
changing lr |
|
|
epoch 36, time 818.63, cls_loss 0.0022 cls_loss_mapping 0.0121 cls_loss_causal 0.4217 re_mapping 0.0124 re_causal 0.0234 |
|
|
Epoch 38, weight, value: tensor([[ 0.0800, 0.0731, 0.0679, ..., 0.0211, 0.0610, 0.0552], |
|
|
[ 0.0368, 0.0258, 0.0087, ..., -0.0164, -0.0150, -0.0129], |
|
|
[-0.0223, -0.0300, 0.0047, ..., 0.0039, -0.0329, -0.0136], |
|
|
..., |
|
|
[-0.1282, -0.0996, -0.0792, ..., 0.0364, 0.0393, 0.0251], |
|
|
[ 0.0949, 0.0719, 0.0699, ..., -0.0473, -0.0595, -0.0529], |
|
|
[ 0.0025, 0.0170, -0.0125, ..., -0.0490, -0.0238, -0.0334]], |
|
|
device='cuda:0'), grad: tensor([[-7.7605e-05, -4.7684e-05, -4.8161e-05, ..., -6.0908e-07, |
|
|
-3.0603e-06, -4.1500e-06], |
|
|
[ 4.1366e-05, 2.1577e-05, 2.2545e-05, ..., 2.4587e-06, |
|
|
4.7572e-06, 4.7386e-06], |
|
|
[-4.3094e-05, -7.4506e-06, -1.1407e-05, ..., -1.2629e-05, |
|
|
-1.9446e-05, -1.6555e-05], |
|
|
..., |
|
|
[ 9.5442e-06, 3.5148e-06, 4.1686e-06, ..., 1.9278e-06, |
|
|
2.9113e-06, 2.4792e-06], |
|
|
[ 1.3210e-05, 6.4522e-06, 6.8471e-06, ..., 8.5309e-07, |
|
|
1.6466e-06, 1.6224e-06], |
|
|
[ 2.2411e-05, 8.8289e-06, 1.0006e-05, ..., 2.2650e-06, |
|
|
4.0941e-06, 3.7327e-06]], device='cuda:0') |
|
|
Epoch 38, bias, value: tensor([ 0.0465, -0.0125, 0.0255, 0.0122, -0.0490, -0.0131, -0.0052], |
|
|
device='cuda:0'), grad: tensor([-8.8215e-05, 6.2108e-05, -1.2684e-04, 6.6817e-05, 2.1145e-05, |
|
|
2.1160e-05, 4.3809e-05], device='cuda:0') |
|
|
588 |
|
|
0.004551803455482836 |
|
|
changing lr |
|
|
epoch 37, time 812.52, cls_loss 0.0024 cls_loss_mapping 0.0128 cls_loss_causal 0.4274 re_mapping 0.0113 re_causal 0.0217 |
|
|
Epoch 39, weight, value: tensor([[ 0.0801, 0.0732, 0.0682, ..., 0.0209, 0.0604, 0.0547], |
|
|
[ 0.0360, 0.0253, 0.0084, ..., -0.0163, -0.0149, -0.0129], |
|
|
[-0.0223, -0.0298, 0.0044, ..., 0.0036, -0.0327, -0.0137], |
|
|
..., |
|
|
[-0.1261, -0.0988, -0.0787, ..., 0.0363, 0.0394, 0.0253], |
|
|
[ 0.0938, 0.0713, 0.0693, ..., -0.0468, -0.0590, -0.0524], |
|
|
[ 0.0012, 0.0164, -0.0128, ..., -0.0486, -0.0240, -0.0334]], |
|
|
device='cuda:0'), grad: tensor([[ 1.3208e-03, 2.7132e-04, 2.7966e-04, ..., 2.1148e-04, |
|
|
3.4261e-04, 3.1471e-04], |
|
|
[ 4.4847e-04, 6.2883e-05, 8.6010e-05, ..., 1.6391e-04, |
|
|
2.0814e-04, 1.9217e-04], |
|
|
[-2.8467e-04, -5.8651e-05, -8.5235e-05, ..., -9.0823e-06, |
|
|
-5.7757e-05, -7.6950e-05], |
|
|
..., |
|
|
[-1.1253e-03, -2.3699e-04, -2.0397e-04, ..., -1.2493e-04, |
|
|
-2.4390e-04, -2.0444e-04], |
|
|
[ 1.0937e-04, 1.8716e-05, 2.3216e-05, ..., 2.8864e-05, |
|
|
4.0889e-05, 3.8743e-05], |
|
|
[ 3.8415e-05, -2.9765e-06, 2.0452e-06, ..., 1.7777e-05, |
|
|
2.2992e-05, 2.1994e-05]], device='cuda:0') |
|
|
Epoch 39, bias, value: tensor([ 0.0464, -0.0130, 0.0248, 0.0131, -0.0451, -0.0139, -0.0080], |
|
|
device='cuda:0'), grad: tensor([ 0.0036, 0.0013, -0.0009, -0.0013, -0.0031, 0.0003, 0.0002], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.004328833670911726 |
|
|
changing lr |
|
|
epoch 38, time 816.59, cls_loss 0.0036 cls_loss_mapping 0.0166 cls_loss_causal 0.4544 re_mapping 0.0109 re_causal 0.0217 |
|
|
Epoch 40, weight, value: tensor([[ 0.0799, 0.0733, 0.0683, ..., 0.0205, 0.0596, 0.0540], |
|
|
[ 0.0372, 0.0255, 0.0089, ..., -0.0160, -0.0144, -0.0124], |
|
|
[-0.0221, -0.0297, 0.0042, ..., 0.0036, -0.0322, -0.0135], |
|
|
..., |
|
|
[-0.1258, -0.0986, -0.0787, ..., 0.0360, 0.0391, 0.0252], |
|
|
[ 0.0927, 0.0707, 0.0687, ..., -0.0464, -0.0585, -0.0520], |
|
|
[ 0.0008, 0.0161, -0.0128, ..., -0.0483, -0.0240, -0.0333]], |
|
|
device='cuda:0'), grad: tensor([[ 9.0182e-05, 7.5512e-06, 4.5821e-06, ..., 1.2167e-05, |
|
|
2.6450e-05, 2.8521e-05], |
|
|
[ 5.2541e-05, 8.3819e-06, 1.0535e-05, ..., 1.2502e-05, |
|
|
2.0817e-05, 1.9684e-05], |
|
|
[-3.5405e-04, -8.3625e-05, -8.7738e-05, ..., -1.2740e-05, |
|
|
-4.4912e-05, -5.0306e-05], |
|
|
..., |
|
|
[ 9.5487e-05, 4.2170e-05, 3.9607e-05, ..., -3.0175e-05, |
|
|
-3.2872e-05, -2.7359e-05], |
|
|
[ 2.5019e-05, 5.5432e-06, 7.0781e-06, ..., 4.3400e-06, |
|
|
7.1600e-06, 6.7949e-06], |
|
|
[ 4.0144e-05, 8.8736e-06, 1.1116e-05, ..., 5.5581e-06, |
|
|
9.4995e-06, 9.3132e-06]], device='cuda:0') |
|
|
Epoch 40, bias, value: tensor([ 0.0456, -0.0094, 0.0255, 0.0118, -0.0453, -0.0149, -0.0090], |
|
|
device='cuda:0'), grad: tensor([ 2.6417e-04, 1.5450e-04, -8.4782e-04, 1.2887e-04, 1.3530e-04, |
|
|
6.4671e-05, 1.0115e-04], device='cuda:0') |
|
|
588 |
|
|
0.0041072155260068206 |
|
|
changing lr |
|
|
epoch 39, time 810.16, cls_loss 0.0023 cls_loss_mapping 0.0103 cls_loss_causal 0.4366 re_mapping 0.0109 re_causal 0.0218 |
|
|
Epoch 41, weight, value: tensor([[ 0.0813, 0.0739, 0.0691, ..., 0.0206, 0.0597, 0.0541], |
|
|
[ 0.0351, 0.0247, 0.0081, ..., -0.0162, -0.0148, -0.0127], |
|
|
[-0.0220, -0.0297, 0.0037, ..., 0.0036, -0.0318, -0.0133], |
|
|
..., |
|
|
[-0.1254, -0.0982, -0.0785, ..., 0.0355, 0.0385, 0.0248], |
|
|
[ 0.0919, 0.0703, 0.0683, ..., -0.0460, -0.0580, -0.0516], |
|
|
[ 0.0014, 0.0162, -0.0125, ..., -0.0478, -0.0238, -0.0330]], |
|
|
device='cuda:0'), grad: tensor([[ 3.0786e-05, 6.2510e-06, 6.6385e-06, ..., 6.4000e-06, |
|
|
1.0177e-05, 9.1493e-06], |
|
|
[ 5.1297e-06, 7.0129e-07, 9.4157e-07, ..., 2.5406e-06, |
|
|
3.5912e-06, 3.3006e-06], |
|
|
[-1.6153e-04, -3.6359e-05, -4.1485e-05, ..., -3.7223e-05, |
|
|
-5.7846e-05, -5.2512e-05], |
|
|
..., |
|
|
[ 4.1604e-05, 9.7454e-06, 1.1414e-05, ..., 4.6752e-06, |
|
|
9.4995e-06, 8.5384e-06], |
|
|
[ 5.7332e-06, 1.5143e-06, 1.7388e-06, ..., 9.4809e-07, |
|
|
1.5926e-06, 1.4519e-06], |
|
|
[ 1.7926e-05, 4.1164e-06, 4.8168e-06, ..., 3.5632e-06, |
|
|
5.8189e-06, 5.2750e-06]], device='cuda:0') |
|
|
Epoch 41, bias, value: tensor([ 0.0489, -0.0132, 0.0257, 0.0117, -0.0461, -0.0150, -0.0077], |
|
|
device='cuda:0'), grad: tensor([ 8.4579e-05, 1.6034e-05, -4.2963e-04, 1.6403e-04, 1.0425e-04, |
|
|
1.4126e-05, 4.6611e-05], device='cuda:0') |
|
|
588 |
|
|
0.0038873953302184317 |
|
|
changing lr |
|
|
epoch 40, time 817.72, cls_loss 0.0031 cls_loss_mapping 0.0110 cls_loss_causal 0.4158 re_mapping 0.0106 re_causal 0.0213 |
|
|
Epoch 42, weight, value: tensor([[ 0.0811, 0.0739, 0.0692, ..., 0.0203, 0.0591, 0.0535], |
|
|
[ 0.0352, 0.0245, 0.0082, ..., -0.0161, -0.0146, -0.0125], |
|
|
[-0.0223, -0.0296, 0.0034, ..., 0.0033, -0.0318, -0.0134], |
|
|
..., |
|
|
[-0.1249, -0.0977, -0.0782, ..., 0.0354, 0.0383, 0.0248], |
|
|
[ 0.0923, 0.0700, 0.0680, ..., -0.0454, -0.0573, -0.0509], |
|
|
[ 0.0008, 0.0158, -0.0126, ..., -0.0474, -0.0237, -0.0328]], |
|
|
device='cuda:0'), grad: tensor([[-5.3596e-04, -1.5938e-04, -1.5640e-04, ..., -1.0812e-04, |
|
|
-1.7965e-04, -1.6940e-04], |
|
|
[ 2.6524e-05, 8.6874e-06, 8.8885e-06, ..., 5.1670e-06, |
|
|
8.2627e-06, 7.8455e-06], |
|
|
[ 3.5048e-04, 1.0979e-04, 1.0973e-04, ..., 6.7711e-05, |
|
|
1.1122e-04, 1.0532e-04], |
|
|
..., |
|
|
[ 5.9277e-05, 2.0459e-05, 2.0370e-05, ..., 9.4101e-06, |
|
|
1.5780e-05, 1.4931e-05], |
|
|
[-2.8157e-04, -1.4019e-04, -1.5092e-04, ..., -6.7241e-06, |
|
|
-9.8944e-06, -1.2219e-05], |
|
|
[ 2.2864e-04, 1.0979e-04, 1.1772e-04, ..., 8.9556e-06, |
|
|
1.3918e-05, 1.5289e-05]], device='cuda:0') |
|
|
Epoch 42, bias, value: tensor([ 0.0479, -0.0116, 0.0243, 0.0106, -0.0460, -0.0122, -0.0089], |
|
|
device='cuda:0'), grad: tensor([-1.2474e-03, 5.8174e-05, 7.9107e-04, 3.3116e-04, 1.2589e-04, |
|
|
-4.0770e-04, 3.4833e-04], device='cuda:0') |
|
|
588 |
|
|
0.003669815772166629 |
|
|
changing lr |
|
|
epoch 41, time 817.28, cls_loss 0.0020 cls_loss_mapping 0.0085 cls_loss_causal 0.4236 re_mapping 0.0098 re_causal 0.0200 |
|
|
Epoch 43, weight, value: tensor([[ 0.0810, 0.0739, 0.0693, ..., 0.0202, 0.0586, 0.0532], |
|
|
[ 0.0355, 0.0243, 0.0082, ..., -0.0159, -0.0144, -0.0124], |
|
|
[-0.0226, -0.0296, 0.0030, ..., 0.0031, -0.0316, -0.0135], |
|
|
..., |
|
|
[-0.1245, -0.0972, -0.0780, ..., 0.0350, 0.0379, 0.0244], |
|
|
[ 0.0916, 0.0696, 0.0677, ..., -0.0450, -0.0569, -0.0506], |
|
|
[ 0.0011, 0.0156, -0.0125, ..., -0.0469, -0.0234, -0.0324]], |
|
|
device='cuda:0'), grad: tensor([[ 5.7995e-05, -2.0489e-07, 3.4031e-06, ..., 3.5316e-05, |
|
|
2.9922e-05, 3.0935e-05], |
|
|
[ 1.4651e-04, 1.6674e-05, 2.3216e-05, ..., 5.4210e-05, |
|
|
5.6028e-05, 5.6356e-05], |
|
|
[ 2.0862e-04, 3.3289e-05, 4.1366e-05, ..., 6.0529e-05, |
|
|
7.0274e-05, 6.9916e-05], |
|
|
..., |
|
|
[ 1.3411e-04, 1.4596e-05, 2.4140e-05, ..., 5.3406e-05, |
|
|
5.9366e-05, 6.0856e-05], |
|
|
[ 5.0902e-05, 5.4762e-06, 8.8289e-06, ..., 2.0519e-05, |
|
|
2.2277e-05, 2.2873e-05], |
|
|
[-3.6061e-05, -1.9625e-05, -1.4506e-05, ..., 1.0744e-05, |
|
|
6.2138e-06, 7.1488e-06]], device='cuda:0') |
|
|
Epoch 43, bias, value: tensor([ 0.0477, -0.0095, 0.0231, 0.0105, -0.0466, -0.0127, -0.0083], |
|
|
device='cuda:0'), grad: tensor([ 1.6332e-04, 4.1175e-04, 5.6458e-04, -1.6212e-03, 3.8433e-04, |
|
|
1.4436e-04, -4.7505e-05], device='cuda:0') |
|
|
588 |
|
|
0.0034549150281252667 |
|
|
changing lr |
|
|
epoch 42, time 812.58, cls_loss 0.0022 cls_loss_mapping 0.0107 cls_loss_causal 0.4164 re_mapping 0.0095 re_causal 0.0194 |
|
|
Epoch 44, weight, value: tensor([[ 0.0800, 0.0737, 0.0691, ..., 0.0197, 0.0578, 0.0524], |
|
|
[ 0.0340, 0.0236, 0.0077, ..., -0.0159, -0.0144, -0.0124], |
|
|
[-0.0221, -0.0294, 0.0029, ..., 0.0031, -0.0312, -0.0132], |
|
|
..., |
|
|
[-0.1233, -0.0966, -0.0776, ..., 0.0348, 0.0378, 0.0244], |
|
|
[ 0.0914, 0.0695, 0.0676, ..., -0.0447, -0.0565, -0.0502], |
|
|
[ 0.0014, 0.0155, -0.0124, ..., -0.0465, -0.0231, -0.0320]], |
|
|
device='cuda:0'), grad: tensor([[-3.1042e-04, -1.0139e-04, -1.1402e-04, ..., -6.8963e-05, |
|
|
-1.1438e-04, -1.0639e-04], |
|
|
[ 5.1528e-05, 1.2212e-05, 1.5438e-05, ..., 1.2323e-05, |
|
|
2.0087e-05, 1.9476e-05], |
|
|
[ 3.5435e-05, 9.5963e-06, 1.1921e-05, ..., 7.0035e-06, |
|
|
1.2219e-05, 1.1384e-05], |
|
|
..., |
|
|
[ 4.0025e-05, 1.4938e-05, 1.6630e-05, ..., 5.5954e-06, |
|
|
1.2115e-05, 1.0990e-05], |
|
|
[-9.4846e-06, -7.3239e-06, -6.9775e-06, ..., 7.0781e-07, |
|
|
1.2163e-06, 1.0170e-06], |
|
|
[ 8.6725e-05, 4.3094e-05, 4.1217e-05, ..., 2.2948e-05, |
|
|
3.1769e-05, 2.8774e-05]], device='cuda:0') |
|
|
Epoch 44, bias, value: tensor([ 0.0451, -0.0115, 0.0242, 0.0110, -0.0447, -0.0125, -0.0075], |
|
|
device='cuda:0'), grad: tensor([-7.4959e-04, 1.3137e-04, 9.0301e-05, 2.6941e-04, 9.0837e-05, |
|
|
-4.8429e-06, 1.7333e-04], device='cuda:0') |
|
|
588 |
|
|
0.0032431258795932905 |
|
|
changing lr |
|
|
epoch 43, time 804.92, cls_loss 0.0019 cls_loss_mapping 0.0109 cls_loss_causal 0.4262 re_mapping 0.0091 re_causal 0.0198 |
|
|
Epoch 45, weight, value: tensor([[ 0.0801, 0.0737, 0.0692, ..., 0.0195, 0.0574, 0.0520], |
|
|
[ 0.0328, 0.0230, 0.0073, ..., -0.0160, -0.0145, -0.0125], |
|
|
[-0.0228, -0.0296, 0.0025, ..., 0.0029, -0.0311, -0.0133], |
|
|
..., |
|
|
[-0.1231, -0.0964, -0.0774, ..., 0.0345, 0.0374, 0.0242], |
|
|
[ 0.0925, 0.0699, 0.0679, ..., -0.0442, -0.0558, -0.0496], |
|
|
[ 0.0019, 0.0154, -0.0123, ..., -0.0459, -0.0228, -0.0316]], |
|
|
device='cuda:0'), grad: tensor([[-1.1873e-04, -5.6744e-05, -5.7548e-05, ..., -4.7311e-06, |
|
|
-1.7673e-05, -1.6883e-05], |
|
|
[-4.3958e-05, -7.3984e-06, -9.5293e-06, ..., -1.9416e-05, |
|
|
-2.5347e-05, -2.5064e-05], |
|
|
[ 6.7830e-05, 2.6092e-05, 2.6479e-05, ..., 5.3681e-06, |
|
|
1.3813e-05, 1.3068e-05], |
|
|
..., |
|
|
[ 5.3883e-05, 2.4945e-05, 2.5496e-05, ..., 8.3223e-06, |
|
|
1.2457e-05, 1.2666e-05], |
|
|
[ 2.9638e-05, 1.0327e-05, 1.0647e-05, ..., 4.1239e-06, |
|
|
7.9423e-06, 7.6257e-06], |
|
|
[-1.5184e-05, -6.3851e-06, -4.7423e-06, ..., 1.8915e-06, |
|
|
8.3167e-07, 1.0170e-06]], device='cuda:0') |
|
|
Epoch 45, bias, value: tensor([ 0.0453, -0.0129, 0.0222, 0.0099, -0.0452, -0.0095, -0.0056], |
|
|
device='cuda:0'), grad: tensor([-2.0373e-04, -1.1629e-04, 1.4174e-04, 5.9426e-05, 8.1062e-05, |
|
|
6.4790e-05, -2.6986e-05], device='cuda:0') |
|
|
588 |
|
|
0.0030348748417303863 |
|
|
changing lr |
|
|
epoch 44, time 804.95, cls_loss 0.0013 cls_loss_mapping 0.0102 cls_loss_causal 0.4315 re_mapping 0.0091 re_causal 0.0199 |
|
|
Epoch 46, weight, value: tensor([[ 0.0796, 0.0736, 0.0691, ..., 0.0193, 0.0569, 0.0516], |
|
|
[ 0.0330, 0.0230, 0.0074, ..., -0.0159, -0.0143, -0.0123], |
|
|
[-0.0223, -0.0294, 0.0024, ..., 0.0028, -0.0309, -0.0132], |
|
|
..., |
|
|
[-0.1218, -0.0958, -0.0770, ..., 0.0343, 0.0374, 0.0242], |
|
|
[ 0.0917, 0.0694, 0.0674, ..., -0.0440, -0.0556, -0.0494], |
|
|
[ 0.0009, 0.0151, -0.0125, ..., -0.0457, -0.0228, -0.0316]], |
|
|
device='cuda:0'), grad: tensor([[ 8.4102e-05, 1.2226e-05, 1.5602e-05, ..., 2.7627e-05, |
|
|
3.4750e-05, 3.2485e-05], |
|
|
[ 4.2558e-05, 6.7316e-06, 8.8438e-06, ..., 1.4171e-05, |
|
|
1.5810e-05, 1.5028e-05], |
|
|
[-5.0277e-05, -7.6517e-06, -8.5086e-06, ..., -2.5943e-05, |
|
|
-3.3945e-05, -3.1084e-05], |
|
|
..., |
|
|
[-1.1601e-05, -1.3327e-06, -7.1526e-07, ..., -3.8464e-07, |
|
|
-3.6620e-06, -3.3043e-06], |
|
|
[ 2.0102e-05, 3.0212e-06, 3.9376e-06, ..., 6.6571e-06, |
|
|
8.0541e-06, 7.5586e-06], |
|
|
[ 3.5107e-05, 6.1132e-06, 7.5251e-06, ..., 1.1280e-05, |
|
|
1.3128e-05, 1.2472e-05]], device='cuda:0') |
|
|
Epoch 46, bias, value: tensor([ 0.0443, -0.0117, 0.0232, 0.0096, -0.0433, -0.0102, -0.0078], |
|
|
device='cuda:0'), grad: tensor([ 2.4033e-04, 1.1915e-04, -2.0254e-04, -2.9182e-04, -2.3723e-05, |
|
|
5.7220e-05, 1.0157e-04], device='cuda:0') |
|
|
588 |
|
|
0.0028305813044122124 |
|
|
changing lr |
|
|
---------------------saving model at epoch 45---------------------------------------------------- |
|
|
epoch 45, time 802.96, cls_loss 0.0019 cls_loss_mapping 0.0101 cls_loss_causal 0.4210 re_mapping 0.0088 re_causal 0.0195 |
|
|
Epoch 47, weight, value: tensor([[ 0.0792, 0.0736, 0.0691, ..., 0.0191, 0.0565, 0.0512], |
|
|
[ 0.0332, 0.0229, 0.0075, ..., -0.0157, -0.0141, -0.0122], |
|
|
[-0.0215, -0.0292, 0.0023, ..., 0.0029, -0.0303, -0.0129], |
|
|
..., |
|
|
[-0.1216, -0.0955, -0.0769, ..., 0.0340, 0.0371, 0.0240], |
|
|
[ 0.0911, 0.0690, 0.0670, ..., -0.0437, -0.0553, -0.0491], |
|
|
[ 0.0004, 0.0148, -0.0126, ..., -0.0455, -0.0229, -0.0315]], |
|
|
device='cuda:0'), grad: tensor([[-1.8418e-05, -7.2941e-06, -5.7556e-06, ..., 1.8589e-06, |
|
|
5.3830e-07, -3.7253e-08], |
|
|
[-7.3433e-05, -2.5347e-05, -2.9951e-05, ..., -9.6112e-06, |
|
|
-1.9461e-05, -1.8835e-05], |
|
|
[ 3.6359e-05, 8.6427e-06, 9.7007e-06, ..., 1.0766e-05, |
|
|
1.2040e-05, 1.2040e-05], |
|
|
..., |
|
|
[ 5.8889e-05, 1.8105e-05, 2.0280e-05, ..., 9.3952e-06, |
|
|
1.4931e-05, 1.5140e-05], |
|
|
[ 1.7866e-05, 5.5432e-06, 6.1654e-06, ..., 3.0100e-06, |
|
|
4.7572e-06, 4.6492e-06], |
|
|
[ 2.5898e-05, 7.6815e-06, 8.3670e-06, ..., 5.1856e-06, |
|
|
7.3612e-06, 7.0445e-06]], device='cuda:0') |
|
|
Epoch 47, bias, value: tensor([ 0.0431, -0.0104, 0.0253, 0.0092, -0.0437, -0.0105, -0.0089], |
|
|
device='cuda:0'), grad: tensor([-3.0026e-05, -1.9670e-04, 8.9884e-05, -1.2034e-04, 1.3995e-04, |
|
|
4.6521e-05, 7.1287e-05], device='cuda:0') |
|
|
588 |
|
|
0.0026306566876350096 |
|
|
changing lr |
|
|
epoch 46, time 796.28, cls_loss 0.0014 cls_loss_mapping 0.0082 cls_loss_causal 0.4007 re_mapping 0.0084 re_causal 0.0181 |
|
|
Epoch 48, weight, value: tensor([[ 0.0800, 0.0739, 0.0695, ..., 0.0190, 0.0563, 0.0510], |
|
|
[ 0.0325, 0.0226, 0.0074, ..., -0.0158, -0.0142, -0.0122], |
|
|
[-0.0215, -0.0291, 0.0021, ..., 0.0028, -0.0302, -0.0128], |
|
|
..., |
|
|
[-0.1211, -0.0952, -0.0767, ..., 0.0339, 0.0370, 0.0240], |
|
|
[ 0.0904, 0.0686, 0.0666, ..., -0.0434, -0.0550, -0.0489], |
|
|
[ 0.0002, 0.0146, -0.0126, ..., -0.0452, -0.0228, -0.0314]], |
|
|
device='cuda:0'), grad: tensor([[-9.3728e-06, -8.9779e-06, -1.1541e-05, ..., -3.2373e-06, |
|
|
-2.8387e-06, -2.4326e-06], |
|
|
[-2.7701e-05, -5.1409e-06, -5.3905e-06, ..., -1.6578e-06, |
|
|
-6.9477e-06, -6.6012e-06], |
|
|
[-2.6628e-05, -4.9174e-06, -5.9009e-06, ..., -5.7742e-07, |
|
|
-5.8301e-07, -1.8738e-06], |
|
|
..., |
|
|
[ 1.3180e-05, 5.2154e-06, 5.7928e-06, ..., 8.4750e-07, |
|
|
-2.2538e-07, -2.4959e-07], |
|
|
[ 1.4670e-05, 4.6529e-06, 5.8711e-06, ..., 2.4792e-06, |
|
|
3.8445e-06, 4.0159e-06], |
|
|
[ 2.3812e-05, 6.0014e-06, 7.0669e-06, ..., 2.8089e-06, |
|
|
5.7369e-06, 5.8338e-06]], device='cuda:0') |
|
|
Epoch 48, bias, value: tensor([ 0.0447, -0.0115, 0.0252, 0.0092, -0.0433, -0.0108, -0.0094], |
|
|
device='cuda:0'), grad: tensor([ 3.0175e-06, -8.2195e-05, -6.7174e-05, 3.0354e-05, 2.5079e-05, |
|
|
3.1263e-05, 5.9664e-05], device='cuda:0') |
|
|
588 |
|
|
0.0024355036129704724 |
|
|
changing lr |
|
|
epoch 47, time 792.94, cls_loss 0.0016 cls_loss_mapping 0.0101 cls_loss_causal 0.4051 re_mapping 0.0084 re_causal 0.0183 |
|
|
Epoch 49, weight, value: tensor([[ 0.0804, 0.0740, 0.0697, ..., 0.0189, 0.0560, 0.0508], |
|
|
[ 0.0324, 0.0225, 0.0073, ..., -0.0157, -0.0141, -0.0122], |
|
|
[-0.0214, -0.0291, 0.0019, ..., 0.0027, -0.0300, -0.0128], |
|
|
..., |
|
|
[-0.1210, -0.0949, -0.0766, ..., 0.0338, 0.0368, 0.0239], |
|
|
[ 0.0896, 0.0683, 0.0662, ..., -0.0432, -0.0548, -0.0487], |
|
|
[ 0.0002, 0.0144, -0.0126, ..., -0.0449, -0.0227, -0.0312]], |
|
|
device='cuda:0'), grad: tensor([[-4.8965e-05, -4.8161e-05, -3.5852e-05, ..., 9.9897e-05, |
|
|
1.1665e-04, 1.0639e-04], |
|
|
[ 3.0613e-04, 4.1783e-05, 6.2168e-05, ..., 1.5008e-04, |
|
|
1.8573e-04, 1.8001e-04], |
|
|
[ 1.8191e-04, 2.0936e-05, 4.0591e-05, ..., 1.1367e-04, |
|
|
1.3721e-04, 1.3256e-04], |
|
|
..., |
|
|
[-3.9673e-04, -1.5929e-05, -6.0380e-05, ..., -3.6240e-04, |
|
|
-4.4727e-04, -4.2868e-04], |
|
|
[ 8.0705e-05, 1.0327e-05, 1.5706e-05, ..., 3.7074e-05, |
|
|
4.5300e-05, 4.4018e-05], |
|
|
[ 7.8142e-05, 1.2510e-05, 1.6153e-05, ..., 2.5809e-05, |
|
|
3.1680e-05, 3.1292e-05]], device='cuda:0') |
|
|
Epoch 49, bias, value: tensor([ 0.0453, -0.0111, 0.0253, 0.0092, -0.0439, -0.0116, -0.0092], |
|
|
device='cuda:0'), grad: tensor([ 1.4640e-05, 8.7214e-04, 5.3501e-04, -5.8937e-04, -1.2760e-03, |
|
|
2.3127e-04, 2.1350e-04], device='cuda:0') |
|
|
588 |
|
|
0.00224551509273949 |
|
|
changing lr |
|
|
epoch 48, time 794.79, cls_loss 0.0011 cls_loss_mapping 0.0088 cls_loss_causal 0.3953 re_mapping 0.0084 re_causal 0.0184 |
|
|
Epoch 50, weight, value: tensor([[ 0.0810, 0.0742, 0.0699, ..., 0.0188, 0.0559, 0.0507], |
|
|
[ 0.0323, 0.0223, 0.0073, ..., -0.0156, -0.0140, -0.0120], |
|
|
[-0.0219, -0.0292, 0.0016, ..., 0.0026, -0.0300, -0.0129], |
|
|
..., |
|
|
[-0.1206, -0.0946, -0.0764, ..., 0.0336, 0.0366, 0.0238], |
|
|
[ 0.0894, 0.0681, 0.0661, ..., -0.0430, -0.0545, -0.0484], |
|
|
[-0.0002, 0.0142, -0.0127, ..., -0.0447, -0.0226, -0.0312]], |
|
|
device='cuda:0'), grad: tensor([[ 3.0398e-04, 5.4359e-05, 5.4806e-05, ..., 4.3452e-05, |
|
|
8.6904e-05, 8.8394e-05], |
|
|
[ 1.2064e-04, 2.2903e-05, 2.6226e-05, ..., 3.2097e-05, |
|
|
4.6730e-05, 4.1187e-05], |
|
|
[ 9.8765e-05, 1.6436e-05, 2.0295e-05, ..., 2.4199e-05, |
|
|
3.8147e-05, 3.3438e-05], |
|
|
..., |
|
|
[-5.0640e-04, -8.4877e-05, -8.9824e-05, ..., -8.7798e-05, |
|
|
-1.6844e-04, -1.5855e-04], |
|
|
[ 4.8965e-05, 8.0839e-06, 9.2164e-06, ..., 1.0423e-05, |
|
|
1.8060e-05, 1.6093e-05], |
|
|
[ 2.6956e-05, 4.8652e-06, 5.8636e-06, ..., 6.7279e-06, |
|
|
9.9689e-06, 8.8587e-06]], device='cuda:0') |
|
|
Epoch 50, bias, value: tensor([ 0.0467, -0.0107, 0.0239, 0.0093, -0.0439, -0.0116, -0.0097], |
|
|
device='cuda:0'), grad: tensor([ 8.0776e-04, 3.3808e-04, 2.8300e-04, -2.2209e-04, -1.4238e-03, |
|
|
1.4186e-04, 7.4983e-05], device='cuda:0') |
|
|
588 |
|
|
0.002061073738537637 |
|
|
changing lr |
|
|
epoch 49, time 791.10, cls_loss 0.0014 cls_loss_mapping 0.0094 cls_loss_causal 0.4036 re_mapping 0.0082 re_causal 0.0180 |
|
|
Epoch 51, weight, value: tensor([[ 8.0744e-02, 7.4140e-02, 6.9905e-02, ..., 1.8685e-02, |
|
|
5.5529e-02, 5.0404e-02], |
|
|
[ 3.1746e-02, 2.2061e-02, 7.1410e-03, ..., -1.5593e-02, |
|
|
-1.3971e-02, -1.2023e-02], |
|
|
[-2.2164e-02, -2.9223e-02, 1.3682e-03, ..., 2.5026e-03, |
|
|
-2.9860e-02, -1.2878e-02], |
|
|
..., |
|
|
[-1.1973e-01, -9.4191e-02, -7.6101e-02, ..., 3.3470e-02, |
|
|
3.6480e-02, 2.3775e-02], |
|
|
[ 8.9263e-02, 6.8041e-02, 6.5975e-02, ..., -4.2811e-02, |
|
|
-5.4277e-02, -4.8199e-02], |
|
|
[-3.9018e-05, 1.4104e-02, -1.2632e-02, ..., -4.4504e-02, |
|
|
-2.2524e-02, -3.0979e-02]], device='cuda:0'), grad: tensor([[ 6.5947e-04, 1.4544e-04, 1.5628e-04, ..., 1.0252e-04, |
|
|
1.5032e-04, 1.4842e-04], |
|
|
[-2.4295e-04, -9.3803e-06, -1.9312e-05, ..., -1.9222e-05, |
|
|
-4.0501e-05, -4.5538e-05], |
|
|
[-2.1343e-03, -4.5037e-04, -4.8351e-04, ..., -4.4036e-04, |
|
|
-6.3276e-04, -5.9366e-04], |
|
|
..., |
|
|
[ 8.5306e-04, 1.6356e-04, 1.7834e-04, ..., 1.7262e-04, |
|
|
2.5058e-04, 2.3615e-04], |
|
|
[ 2.0075e-04, 3.8862e-05, 4.3094e-05, ..., 3.9220e-05, |
|
|
5.7250e-05, 5.3853e-05], |
|
|
[ 3.0589e-04, 5.1260e-05, 5.7101e-05, ..., 6.2466e-05, |
|
|
9.1553e-05, 8.6486e-05]], device='cuda:0') |
|
|
Epoch 51, bias, value: tensor([ 0.0460, -0.0112, 0.0234, 0.0090, -0.0426, -0.0114, -0.0092], |
|
|
device='cuda:0'), grad: tensor([ 0.0016, -0.0008, -0.0052, 0.0009, 0.0022, 0.0005, 0.0008], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.0018825509907063344 |
|
|
changing lr |
|
|
epoch 50, time 795.94, cls_loss 0.0011 cls_loss_mapping 0.0068 cls_loss_causal 0.4129 re_mapping 0.0080 re_causal 0.0179 |
|
|
Epoch 52, weight, value: tensor([[ 0.0813, 0.0744, 0.0702, ..., 0.0186, 0.0554, 0.0503], |
|
|
[ 0.0316, 0.0219, 0.0071, ..., -0.0156, -0.0139, -0.0120], |
|
|
[-0.0220, -0.0292, 0.0012, ..., 0.0025, -0.0297, -0.0128], |
|
|
..., |
|
|
[-0.1198, -0.0940, -0.0760, ..., 0.0332, 0.0362, 0.0235], |
|
|
[ 0.0887, 0.0678, 0.0657, ..., -0.0427, -0.0541, -0.0481], |
|
|
[-0.0004, 0.0139, -0.0127, ..., -0.0444, -0.0225, -0.0309]], |
|
|
device='cuda:0'), grad: tensor([[ 1.1241e-04, 1.8403e-05, 2.2218e-05, ..., 9.9093e-06, |
|
|
1.7971e-05, 1.4402e-05], |
|
|
[ 6.7770e-05, 1.4529e-05, 1.5959e-05, ..., 8.6650e-06, |
|
|
1.3560e-05, 1.1742e-05], |
|
|
[-4.6802e-04, -1.0204e-04, -1.1444e-04, ..., -4.8071e-05, |
|
|
-8.4698e-05, -7.0930e-05], |
|
|
..., |
|
|
[ 1.3340e-04, 3.3885e-05, 3.7134e-05, ..., 2.2039e-05, |
|
|
3.2812e-05, 2.8864e-05], |
|
|
[ 3.4034e-05, 7.4208e-06, 8.2627e-06, ..., 3.6862e-06, |
|
|
6.2957e-06, 5.3234e-06], |
|
|
[ 7.0393e-05, 1.4640e-05, 1.6451e-05, ..., 6.6310e-06, |
|
|
1.1899e-05, 9.8571e-06]], device='cuda:0') |
|
|
Epoch 52, bias, value: tensor([ 0.0470, -0.0109, 0.0237, 0.0099, -0.0437, -0.0121, -0.0100], |
|
|
device='cuda:0'), grad: tensor([ 2.9802e-04, 1.7095e-04, -1.1902e-03, 1.1951e-04, 3.3593e-04, |
|
|
8.6248e-05, 1.7893e-04], device='cuda:0') |
|
|
588 |
|
|
0.0017103063703014388 |
|
|
changing lr |
|
|
epoch 51, time 792.04, cls_loss 0.0011 cls_loss_mapping 0.0071 cls_loss_causal 0.3982 re_mapping 0.0078 re_causal 0.0171 |
|
|
Epoch 53, weight, value: tensor([[ 0.0812, 0.0743, 0.0702, ..., 0.0185, 0.0551, 0.0501], |
|
|
[ 0.0315, 0.0219, 0.0071, ..., -0.0155, -0.0139, -0.0119], |
|
|
[-0.0223, -0.0292, 0.0011, ..., 0.0024, -0.0296, -0.0128], |
|
|
..., |
|
|
[-0.1192, -0.0937, -0.0758, ..., 0.0331, 0.0361, 0.0235], |
|
|
[ 0.0883, 0.0675, 0.0655, ..., -0.0425, -0.0539, -0.0479], |
|
|
[-0.0003, 0.0138, -0.0127, ..., -0.0441, -0.0224, -0.0308]], |
|
|
device='cuda:0'), grad: tensor([[ 1.5008e-04, 5.9873e-05, 6.0588e-05, ..., 1.9684e-05, |
|
|
2.8446e-05, 2.8625e-05], |
|
|
[ 7.9155e-05, 2.2858e-05, 2.1115e-05, ..., 1.0148e-05, |
|
|
1.1407e-05, 1.2696e-05], |
|
|
[ 2.7150e-05, 3.9190e-05, 3.9279e-05, ..., -2.3127e-05, |
|
|
-2.2963e-05, -2.4423e-05], |
|
|
..., |
|
|
[ 4.2111e-05, 2.3544e-05, 2.2352e-05, ..., -1.0498e-05, |
|
|
-1.4298e-05, -1.1802e-05], |
|
|
[-1.4181e-03, -6.0606e-04, -6.0272e-04, ..., -9.5427e-05, |
|
|
-1.4818e-04, -1.5831e-04], |
|
|
[ 6.4373e-04, 2.7061e-04, 2.6965e-04, ..., 4.9829e-05, |
|
|
7.5281e-05, 7.9691e-05]], device='cuda:0') |
|
|
Epoch 53, bias, value: tensor([ 0.0467, -0.0108, 0.0230, 0.0096, -0.0428, -0.0121, -0.0098], |
|
|
device='cuda:0'), grad: tensor([ 2.8634e-04, 1.4770e-04, -2.5302e-05, 8.5211e-04, 1.3009e-05, |
|
|
-2.3708e-03, 1.0977e-03], device='cuda:0') |
|
|
588 |
|
|
0.0015446867550656784 |
|
|
changing lr |
|
|
epoch 52, time 795.64, cls_loss 0.0012 cls_loss_mapping 0.0080 cls_loss_causal 0.4011 re_mapping 0.0074 re_causal 0.0164 |
|
|
Epoch 54, weight, value: tensor([[ 0.0809, 0.0743, 0.0701, ..., 0.0183, 0.0548, 0.0498], |
|
|
[ 0.0313, 0.0217, 0.0071, ..., -0.0155, -0.0138, -0.0119], |
|
|
[-0.0224, -0.0292, 0.0009, ..., 0.0023, -0.0295, -0.0128], |
|
|
..., |
|
|
[-0.1189, -0.0935, -0.0757, ..., 0.0330, 0.0360, 0.0235], |
|
|
[ 0.0875, 0.0672, 0.0651, ..., -0.0424, -0.0538, -0.0478], |
|
|
[ 0.0007, 0.0141, -0.0123, ..., -0.0439, -0.0222, -0.0305]], |
|
|
device='cuda:0'), grad: tensor([[ 5.2261e-04, 1.0097e-04, 1.3101e-04, ..., 6.6817e-05, |
|
|
1.1981e-04, 1.1861e-04], |
|
|
[-1.7338e-03, -2.5797e-04, -3.6550e-04, ..., -4.3845e-04, |
|
|
-7.2861e-04, -6.7663e-04], |
|
|
[ 2.9945e-04, 2.9072e-05, 5.4806e-05, ..., 1.1230e-04, |
|
|
1.8954e-04, 1.6987e-04], |
|
|
..., |
|
|
[ 2.6298e-04, 4.7296e-05, 6.0678e-05, ..., 4.7565e-05, |
|
|
7.9811e-05, 7.6592e-05], |
|
|
[ 6.3241e-05, 1.1340e-05, 1.4648e-05, ..., 1.3143e-05, |
|
|
2.2039e-05, 2.0698e-05], |
|
|
[ 3.9071e-05, 6.1700e-07, 3.9712e-06, ..., 1.5177e-05, |
|
|
2.3156e-05, 2.1845e-05]], device='cuda:0') |
|
|
Epoch 54, bias, value: tensor([ 0.0460, -0.0107, 0.0226, 0.0094, -0.0424, -0.0133, -0.0077], |
|
|
device='cuda:0'), grad: tensor([ 0.0013, -0.0054, 0.0011, 0.0019, 0.0007, 0.0002, 0.0001], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.001386025680863044 |
|
|
changing lr |
|
|
epoch 53, time 793.18, cls_loss 0.0015 cls_loss_mapping 0.0074 cls_loss_causal 0.4159 re_mapping 0.0073 re_causal 0.0167 |
|
|
Epoch 55, weight, value: tensor([[ 0.0808, 0.0742, 0.0700, ..., 0.0183, 0.0547, 0.0497], |
|
|
[ 0.0314, 0.0217, 0.0072, ..., -0.0154, -0.0137, -0.0118], |
|
|
[-0.0221, -0.0291, 0.0009, ..., 0.0024, -0.0293, -0.0127], |
|
|
..., |
|
|
[-0.1188, -0.0934, -0.0757, ..., 0.0329, 0.0359, 0.0234], |
|
|
[ 0.0875, 0.0670, 0.0650, ..., -0.0422, -0.0536, -0.0476], |
|
|
[ 0.0005, 0.0141, -0.0122, ..., -0.0438, -0.0222, -0.0305]], |
|
|
device='cuda:0'), grad: tensor([[ 9.7215e-05, 3.1769e-05, 3.2514e-05, ..., 6.7428e-06, |
|
|
1.2338e-05, 1.2912e-05], |
|
|
[-1.1349e-04, -5.8800e-05, -5.8651e-05, ..., -9.7454e-06, |
|
|
-1.0319e-05, -1.2718e-05], |
|
|
[ 3.9130e-05, 6.7838e-06, 8.2403e-06, ..., 3.8221e-06, |
|
|
1.0595e-05, 9.8199e-06], |
|
|
..., |
|
|
[-1.5461e-04, -1.5020e-05, -2.1026e-05, ..., -1.6153e-05, |
|
|
-4.4107e-05, -4.0203e-05], |
|
|
[ 4.2558e-05, 7.0110e-06, 8.5458e-06, ..., 4.8019e-06, |
|
|
1.1727e-05, 1.0826e-05], |
|
|
[ 5.9634e-05, 1.9446e-05, 2.0742e-05, ..., 6.4783e-06, |
|
|
1.2368e-05, 1.2167e-05]], device='cuda:0') |
|
|
Epoch 55, bias, value: tensor([ 0.0458, -0.0104, 0.0234, 0.0088, -0.0427, -0.0127, -0.0083], |
|
|
device='cuda:0'), grad: tensor([ 1.7858e-04, -1.4818e-04, 1.0729e-04, 6.5923e-05, -4.4537e-04, |
|
|
1.1533e-04, 1.2648e-04], device='cuda:0') |
|
|
588 |
|
|
0.0012346426699819469 |
|
|
changing lr |
|
|
epoch 54, time 790.30, cls_loss 0.0012 cls_loss_mapping 0.0074 cls_loss_causal 0.4004 re_mapping 0.0072 re_causal 0.0161 |
|
|
Epoch 56, weight, value: tensor([[ 0.0806, 0.0741, 0.0700, ..., 0.0182, 0.0546, 0.0496], |
|
|
[ 0.0316, 0.0217, 0.0073, ..., -0.0154, -0.0137, -0.0117], |
|
|
[-0.0220, -0.0291, 0.0008, ..., 0.0023, -0.0292, -0.0126], |
|
|
..., |
|
|
[-0.1187, -0.0933, -0.0756, ..., 0.0328, 0.0357, 0.0233], |
|
|
[ 0.0872, 0.0669, 0.0649, ..., -0.0421, -0.0535, -0.0475], |
|
|
[ 0.0006, 0.0140, -0.0122, ..., -0.0436, -0.0221, -0.0304]], |
|
|
device='cuda:0'), grad: tensor([[ 1.4591e-04, 3.2336e-05, 3.3647e-05, ..., 1.9282e-05, |
|
|
3.5226e-05, 3.2991e-05], |
|
|
[-2.3139e-04, -5.2631e-05, -5.8651e-05, ..., -1.4164e-05, |
|
|
-5.5045e-05, -5.0008e-05], |
|
|
[ 1.2636e-04, 2.8759e-05, 2.8357e-05, ..., 2.2590e-05, |
|
|
2.8297e-05, 2.6703e-05], |
|
|
..., |
|
|
[ 5.3108e-05, 1.2226e-05, 1.3150e-05, ..., 4.5523e-06, |
|
|
1.2390e-05, 1.1265e-05], |
|
|
[ 3.8326e-05, 8.5011e-06, 9.1940e-06, ..., 3.8184e-06, |
|
|
9.3132e-06, 8.6427e-06], |
|
|
[ 6.7830e-05, 1.5043e-05, 1.6212e-05, ..., 7.2420e-06, |
|
|
1.6958e-05, 1.5751e-05]], device='cuda:0') |
|
|
Epoch 56, bias, value: tensor([ 0.0456, -0.0096, 0.0236, 0.0082, -0.0428, -0.0130, -0.0080], |
|
|
device='cuda:0'), grad: tensor([ 0.0004, -0.0008, 0.0003, -0.0005, 0.0002, 0.0001, 0.0002], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.0010908425876598518 |
|
|
changing lr |
|
|
epoch 55, time 791.04, cls_loss 0.0013 cls_loss_mapping 0.0064 cls_loss_causal 0.4006 re_mapping 0.0070 re_causal 0.0157 |
|
|
Epoch 57, weight, value: tensor([[ 0.0808, 0.0742, 0.0701, ..., 0.0182, 0.0545, 0.0495], |
|
|
[ 0.0315, 0.0217, 0.0073, ..., -0.0154, -0.0136, -0.0117], |
|
|
[-0.0221, -0.0291, 0.0007, ..., 0.0023, -0.0292, -0.0127], |
|
|
..., |
|
|
[-0.1184, -0.0931, -0.0755, ..., 0.0327, 0.0357, 0.0233], |
|
|
[ 0.0870, 0.0668, 0.0647, ..., -0.0420, -0.0534, -0.0474], |
|
|
[ 0.0003, 0.0139, -0.0123, ..., -0.0435, -0.0222, -0.0304]], |
|
|
device='cuda:0'), grad: tensor([[-1.2696e-05, -2.6584e-05, -2.5883e-05, ..., 2.2784e-05, |
|
|
1.7941e-05, 1.9476e-05], |
|
|
[ 3.1024e-05, -3.7383e-06, -1.1204e-06, ..., 2.1696e-05, |
|
|
2.3231e-05, 2.0608e-05], |
|
|
[ 7.4089e-05, 8.7768e-06, 1.0565e-05, ..., 2.9594e-05, |
|
|
3.3528e-05, 3.1263e-05], |
|
|
..., |
|
|
[ 1.1605e-04, 3.8505e-05, 4.2140e-05, ..., 2.1189e-05, |
|
|
3.2544e-05, 2.7776e-05], |
|
|
[ 2.7940e-05, 4.8168e-06, 5.5768e-06, ..., 9.3654e-06, |
|
|
1.1064e-05, 1.0468e-05], |
|
|
[ 4.4197e-05, 7.8306e-06, 8.4937e-06, ..., 1.2912e-05, |
|
|
1.5251e-05, 1.4573e-05]], device='cuda:0') |
|
|
Epoch 57, bias, value: tensor([ 0.0457, -0.0097, 0.0232, 0.0087, -0.0423, -0.0132, -0.0086], |
|
|
device='cuda:0'), grad: tensor([ 2.4304e-05, 1.1349e-04, 2.1267e-04, -8.1396e-04, 2.6894e-04, |
|
|
7.5221e-05, 1.1951e-04], device='cuda:0') |
|
|
588 |
|
|
0.000954915028125264 |
|
|
changing lr |
|
|
epoch 56, time 792.00, cls_loss 0.0014 cls_loss_mapping 0.0058 cls_loss_causal 0.3947 re_mapping 0.0070 re_causal 0.0153 |
|
|
Epoch 58, weight, value: tensor([[ 0.0812, 0.0743, 0.0703, ..., 0.0182, 0.0545, 0.0495], |
|
|
[ 0.0314, 0.0216, 0.0072, ..., -0.0153, -0.0136, -0.0117], |
|
|
[-0.0222, -0.0291, 0.0006, ..., 0.0023, -0.0291, -0.0126], |
|
|
..., |
|
|
[-0.1183, -0.0930, -0.0755, ..., 0.0326, 0.0356, 0.0232], |
|
|
[ 0.0867, 0.0667, 0.0646, ..., -0.0420, -0.0533, -0.0474], |
|
|
[ 0.0002, 0.0138, -0.0123, ..., -0.0435, -0.0221, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[ 2.2018e-04, 4.3094e-05, 4.2945e-05, ..., 3.9011e-05, |
|
|
6.7592e-05, 6.3181e-05], |
|
|
[ 8.5890e-05, 1.3866e-05, 1.4685e-05, ..., 2.1249e-05, |
|
|
3.2306e-05, 3.1620e-05], |
|
|
[-4.9263e-05, -8.6352e-06, -7.9125e-06, ..., -1.0364e-05, |
|
|
-1.6361e-05, -1.6630e-05], |
|
|
..., |
|
|
[-2.1291e-04, -1.9759e-05, -2.5615e-05, ..., -6.2048e-05, |
|
|
-1.0628e-04, -9.5904e-05], |
|
|
[ 2.1315e-04, 7.1287e-05, 6.4909e-05, ..., 1.3031e-05, |
|
|
1.5482e-05, 2.2084e-05], |
|
|
[-3.4785e-04, -1.2279e-04, -1.1086e-04, ..., -1.4894e-05, |
|
|
-1.4380e-05, -2.6733e-05]], device='cuda:0') |
|
|
Epoch 58, bias, value: tensor([ 0.0465, -0.0097, 0.0230, 0.0087, -0.0424, -0.0135, -0.0087], |
|
|
device='cuda:0'), grad: tensor([ 0.0006, 0.0002, -0.0001, 0.0002, -0.0007, 0.0004, -0.0007], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.0008271337313934874 |
|
|
changing lr |
|
|
epoch 57, time 785.47, cls_loss 0.0014 cls_loss_mapping 0.0066 cls_loss_causal 0.3959 re_mapping 0.0070 re_causal 0.0155 |
|
|
Epoch 59, weight, value: tensor([[ 8.1379e-02, 7.4439e-02, 7.0417e-02, ..., 1.8152e-02, |
|
|
5.4398e-02, 4.9417e-02], |
|
|
[ 3.1237e-02, 2.1533e-02, 7.2049e-03, ..., -1.5339e-02, |
|
|
-1.3612e-02, -1.1715e-02], |
|
|
[-2.2203e-02, -2.9107e-02, 5.2069e-04, ..., 2.2342e-03, |
|
|
-2.9068e-02, -1.2622e-02], |
|
|
..., |
|
|
[-1.1797e-01, -9.2928e-02, -7.5440e-02, ..., 3.2604e-02, |
|
|
3.5572e-02, 2.3227e-02], |
|
|
[ 8.6427e-02, 6.6554e-02, 6.4479e-02, ..., -4.1890e-02, |
|
|
-5.3248e-02, -4.7302e-02], |
|
|
[ 3.9185e-05, 1.3749e-02, -1.2354e-02, ..., -4.3392e-02, |
|
|
-2.2147e-02, -3.0321e-02]], device='cuda:0'), grad: tensor([[-1.1235e-04, -3.5435e-05, -3.2008e-05, ..., -6.7614e-06, |
|
|
-2.2739e-05, -2.2516e-05], |
|
|
[ 2.6822e-07, -1.0356e-05, -1.0908e-05, ..., 5.3830e-06, |
|
|
3.4962e-06, 6.2659e-06], |
|
|
[ 2.7850e-05, 5.6587e-06, 7.6592e-06, ..., 5.3123e-06, |
|
|
9.2909e-06, 9.0823e-06], |
|
|
..., |
|
|
[ 8.6054e-07, 1.7315e-05, 9.5293e-06, ..., -1.4305e-05, |
|
|
-1.3761e-05, -1.5251e-05], |
|
|
[ 2.3812e-05, 5.7258e-06, 6.6236e-06, ..., 3.6899e-06, |
|
|
7.1824e-06, 6.9141e-06], |
|
|
[ 2.9907e-05, 8.1360e-06, 9.1270e-06, ..., 3.8054e-06, |
|
|
8.0764e-06, 7.7859e-06]], device='cuda:0') |
|
|
Epoch 59, bias, value: tensor([ 0.0468, -0.0098, 0.0229, 0.0086, -0.0417, -0.0137, -0.0092], |
|
|
device='cuda:0'), grad: tensor([-2.5082e-04, 2.2411e-05, 7.2360e-05, 6.9439e-05, -4.3809e-05, |
|
|
5.9098e-05, 7.1585e-05], device='cuda:0') |
|
|
588 |
|
|
0.00070775603199067 |
|
|
changing lr |
|
|
epoch 58, time 790.13, cls_loss 0.0015 cls_loss_mapping 0.0079 cls_loss_causal 0.3930 re_mapping 0.0070 re_causal 0.0153 |
|
|
Epoch 60, weight, value: tensor([[ 0.0813, 0.0744, 0.0704, ..., 0.0181, 0.0543, 0.0493], |
|
|
[ 0.0312, 0.0215, 0.0072, ..., -0.0153, -0.0136, -0.0117], |
|
|
[-0.0220, -0.0291, 0.0005, ..., 0.0022, -0.0290, -0.0126], |
|
|
..., |
|
|
[-0.1179, -0.0928, -0.0754, ..., 0.0325, 0.0355, 0.0232], |
|
|
[ 0.0865, 0.0665, 0.0644, ..., -0.0418, -0.0531, -0.0472], |
|
|
[-0.0002, 0.0137, -0.0124, ..., -0.0433, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[-1.3614e-04, -4.7654e-05, -4.9800e-05, ..., -9.0376e-06, |
|
|
-2.8431e-05, -3.0428e-05], |
|
|
[ 2.2963e-05, -1.4622e-07, 5.2713e-07, ..., 7.8231e-06, |
|
|
9.9093e-06, 9.8273e-06], |
|
|
[ 2.0003e-04, 4.9949e-05, 5.4389e-05, ..., 3.1143e-05, |
|
|
4.8310e-05, 4.9323e-05], |
|
|
..., |
|
|
[-3.0428e-05, 3.3360e-06, 2.5891e-06, ..., -6.1467e-06, |
|
|
-7.1004e-06, -7.6443e-06], |
|
|
[ 1.8820e-05, 4.6901e-06, 5.1148e-06, ..., 3.0342e-06, |
|
|
4.5076e-06, 4.5747e-06], |
|
|
[ 3.3736e-05, 9.0003e-06, 9.7901e-06, ..., 5.9679e-06, |
|
|
8.0764e-06, 8.0466e-06]], device='cuda:0') |
|
|
Epoch 60, bias, value: tensor([ 0.0465, -0.0098, 0.0233, 0.0084, -0.0418, -0.0132, -0.0096], |
|
|
device='cuda:0'), grad: tensor([-2.8539e-04, 7.4804e-05, 4.8685e-04, -2.9206e-04, -1.1086e-04, |
|
|
4.5896e-05, 8.0407e-05], device='cuda:0') |
|
|
588 |
|
|
0.0005970223407163104 |
|
|
changing lr |
|
|
epoch 59, time 787.39, cls_loss 0.0013 cls_loss_mapping 0.0065 cls_loss_causal 0.4018 re_mapping 0.0070 re_causal 0.0152 |
|
|
Epoch 61, weight, value: tensor([[ 0.0813, 0.0745, 0.0705, ..., 0.0181, 0.0542, 0.0493], |
|
|
[ 0.0313, 0.0215, 0.0072, ..., -0.0153, -0.0135, -0.0116], |
|
|
[-0.0220, -0.0291, 0.0005, ..., 0.0022, -0.0289, -0.0125], |
|
|
..., |
|
|
[-0.1179, -0.0928, -0.0754, ..., 0.0324, 0.0354, 0.0231], |
|
|
[ 0.0865, 0.0664, 0.0644, ..., -0.0417, -0.0530, -0.0471], |
|
|
[-0.0003, 0.0136, -0.0124, ..., -0.0433, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[ 6.6161e-05, 2.5108e-05, 2.2292e-05, ..., -2.0247e-06, |
|
|
2.7250e-06, 2.9895e-06], |
|
|
[ 1.3210e-05, 1.6913e-05, 1.7300e-05, ..., -6.8434e-06, |
|
|
-1.0744e-05, -8.6725e-06], |
|
|
[ 5.3674e-05, 3.2604e-05, 3.2365e-05, ..., 5.1893e-06, |
|
|
6.1728e-06, 6.3814e-06], |
|
|
..., |
|
|
[-1.4462e-05, 5.9456e-06, 9.0003e-06, ..., 2.1271e-06, |
|
|
-2.9467e-06, -3.6284e-06], |
|
|
[-7.7200e-04, -3.9530e-04, -4.0317e-04, ..., -4.7147e-05, |
|
|
-6.9857e-05, -7.2420e-05], |
|
|
[ 4.9162e-04, 2.4366e-04, 2.4891e-04, ..., 3.4362e-05, |
|
|
5.1647e-05, 5.2601e-05]], device='cuda:0') |
|
|
Epoch 61, bias, value: tensor([ 0.0466, -0.0095, 0.0235, 0.0083, -0.0421, -0.0130, -0.0101], |
|
|
device='cuda:0'), grad: tensor([ 1.3614e-04, -1.2711e-05, 6.5029e-05, 2.9325e-04, -7.4446e-05, |
|
|
-1.1940e-03, 7.8773e-04], device='cuda:0') |
|
|
588 |
|
|
0.0004951556604879052 |
|
|
changing lr |
|
|
epoch 60, time 786.59, cls_loss 0.0013 cls_loss_mapping 0.0053 cls_loss_causal 0.4019 re_mapping 0.0070 re_causal 0.0155 |
|
|
Epoch 62, weight, value: tensor([[ 0.0815, 0.0745, 0.0705, ..., 0.0181, 0.0542, 0.0493], |
|
|
[ 0.0313, 0.0215, 0.0073, ..., -0.0153, -0.0135, -0.0116], |
|
|
[-0.0220, -0.0291, 0.0004, ..., 0.0022, -0.0289, -0.0125], |
|
|
..., |
|
|
[-0.1179, -0.0927, -0.0754, ..., 0.0324, 0.0353, 0.0230], |
|
|
[ 0.0863, 0.0664, 0.0643, ..., -0.0417, -0.0530, -0.0471], |
|
|
[-0.0004, 0.0135, -0.0125, ..., -0.0433, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[-1.4246e-04, -1.9640e-05, -3.0264e-05, ..., -6.9857e-05, |
|
|
-1.2910e-04, -1.1581e-04], |
|
|
[-1.0908e-05, -6.3218e-06, -5.1782e-06, ..., 2.3730e-06, |
|
|
3.9525e-06, 3.3639e-06], |
|
|
[ 2.5630e-05, 3.5446e-06, 5.0627e-06, ..., 1.0781e-05, |
|
|
2.0429e-05, 1.8463e-05], |
|
|
..., |
|
|
[ 3.6359e-05, 7.1079e-06, 9.3430e-06, ..., 1.7747e-05, |
|
|
3.2604e-05, 2.9057e-05], |
|
|
[ 1.3679e-05, 2.3674e-06, 3.1535e-06, ..., 5.2191e-06, |
|
|
9.7379e-06, 8.8066e-06], |
|
|
[ 2.7865e-05, 5.3830e-06, 6.8247e-06, ..., 1.0662e-05, |
|
|
1.9848e-05, 1.7881e-05]], device='cuda:0') |
|
|
Epoch 62, bias, value: tensor([ 0.0468, -0.0092, 0.0235, 0.0086, -0.0424, -0.0132, -0.0102], |
|
|
device='cuda:0'), grad: tensor([-5.7840e-04, 9.1270e-07, 9.4593e-05, 1.9586e-04, 1.4210e-04, |
|
|
4.8250e-05, 9.6798e-05], device='cuda:0') |
|
|
588 |
|
|
0.00040236113724274745 |
|
|
changing lr |
|
|
---------------------saving model at epoch 61---------------------------------------------------- |
|
|
epoch 61, time 798.02, cls_loss 0.0014 cls_loss_mapping 0.0060 cls_loss_causal 0.3726 re_mapping 0.0069 re_causal 0.0147 |
|
|
Epoch 63, weight, value: tensor([[ 0.0815, 0.0745, 0.0706, ..., 0.0181, 0.0542, 0.0492], |
|
|
[ 0.0313, 0.0215, 0.0073, ..., -0.0153, -0.0135, -0.0116], |
|
|
[-0.0219, -0.0290, 0.0004, ..., 0.0022, -0.0288, -0.0125], |
|
|
..., |
|
|
[-0.1179, -0.0927, -0.0753, ..., 0.0323, 0.0353, 0.0230], |
|
|
[ 0.0863, 0.0663, 0.0642, ..., -0.0416, -0.0529, -0.0470], |
|
|
[-0.0005, 0.0135, -0.0125, ..., -0.0432, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[-4.3929e-05, -3.4690e-05, -4.0025e-05, ..., -3.5614e-06, |
|
|
-5.4650e-06, -4.4927e-06], |
|
|
[ 5.9128e-05, 2.5079e-05, 2.8685e-05, ..., 1.2778e-05, |
|
|
1.7628e-05, 1.6913e-05], |
|
|
[-8.0585e-05, -1.8463e-05, -1.8775e-05, ..., -5.4836e-05, |
|
|
-6.4671e-05, -6.2823e-05], |
|
|
..., |
|
|
[-6.9857e-05, -3.1013e-06, -4.7386e-06, ..., 1.1753e-06, |
|
|
-6.4522e-06, -6.5900e-06], |
|
|
[ 2.0623e-05, 6.4410e-06, 7.3016e-06, ..., 4.1425e-06, |
|
|
6.3218e-06, 6.1989e-06], |
|
|
[ 3.7521e-05, 1.0610e-05, 1.1824e-05, ..., 1.3597e-05, |
|
|
1.7479e-05, 1.6838e-05]], device='cuda:0') |
|
|
Epoch 63, bias, value: tensor([ 0.0468, -0.0093, 0.0235, 0.0086, -0.0424, -0.0130, -0.0104], |
|
|
device='cuda:0'), grad: tensor([-1.0625e-05, 1.1456e-04, -2.4128e-04, 2.0921e-04, -2.1255e-04, |
|
|
4.9680e-05, 9.0718e-05], device='cuda:0') |
|
|
588 |
|
|
0.00031882564680131423 |
|
|
changing lr |
|
|
epoch 62, time 791.56, cls_loss 0.0014 cls_loss_mapping 0.0056 cls_loss_causal 0.3860 re_mapping 0.0069 re_causal 0.0145 |
|
|
Epoch 64, weight, value: tensor([[ 0.0815, 0.0746, 0.0706, ..., 0.0180, 0.0541, 0.0492], |
|
|
[ 0.0313, 0.0215, 0.0073, ..., -0.0153, -0.0135, -0.0116], |
|
|
[-0.0220, -0.0290, 0.0004, ..., 0.0022, -0.0288, -0.0125], |
|
|
..., |
|
|
[-0.1178, -0.0926, -0.0753, ..., 0.0323, 0.0353, 0.0230], |
|
|
[ 0.0862, 0.0663, 0.0642, ..., -0.0416, -0.0529, -0.0470], |
|
|
[-0.0006, 0.0135, -0.0125, ..., -0.0432, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[ 4.8518e-04, 1.4091e-04, 1.4400e-04, ..., 5.1767e-05, |
|
|
1.0324e-04, 1.0949e-04], |
|
|
[-1.6241e-03, -4.9543e-04, -4.9257e-04, ..., -1.2589e-04, |
|
|
-2.7442e-04, -3.0184e-04], |
|
|
[-9.9719e-05, -6.7130e-06, -1.5736e-05, ..., -2.8729e-05, |
|
|
-4.7058e-05, -3.9220e-05], |
|
|
..., |
|
|
[ 1.5032e-04, 4.4167e-05, 4.5151e-05, ..., 1.7181e-05, |
|
|
3.1292e-05, 3.0518e-05], |
|
|
[ 7.5758e-05, 2.1458e-05, 2.2143e-05, ..., 7.0743e-06, |
|
|
1.4804e-05, 1.5438e-05], |
|
|
[ 2.1255e-04, 7.0274e-05, 7.1526e-05, ..., 1.7896e-05, |
|
|
3.7909e-05, 3.9816e-05]], device='cuda:0') |
|
|
Epoch 64, bias, value: tensor([ 0.0468, -0.0092, 0.0235, 0.0086, -0.0423, -0.0131, -0.0105], |
|
|
device='cuda:0'), grad: tensor([ 0.0011, -0.0037, -0.0003, 0.0019, 0.0003, 0.0002, 0.0005], |
|
|
device='cuda:0') |
|
|
588 |
|
|
0.0002447174185242325 |
|
|
changing lr |
|
|
epoch 63, time 792.66, cls_loss 0.0014 cls_loss_mapping 0.0055 cls_loss_causal 0.3893 re_mapping 0.0069 re_causal 0.0150 |
|
|
Epoch 65, weight, value: tensor([[ 0.0815, 0.0746, 0.0706, ..., 0.0180, 0.0541, 0.0491], |
|
|
[ 0.0312, 0.0214, 0.0073, ..., -0.0153, -0.0135, -0.0116], |
|
|
[-0.0220, -0.0290, 0.0003, ..., 0.0022, -0.0288, -0.0125], |
|
|
..., |
|
|
[-0.1177, -0.0926, -0.0753, ..., 0.0323, 0.0352, 0.0230], |
|
|
[ 0.0862, 0.0662, 0.0642, ..., -0.0416, -0.0529, -0.0470], |
|
|
[-0.0006, 0.0135, -0.0125, ..., -0.0432, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[ 8.9228e-05, 1.2606e-05, 1.9163e-05, ..., 2.3171e-05, |
|
|
3.9995e-05, 3.6895e-05], |
|
|
[ 2.4647e-05, 4.6715e-06, 5.2266e-06, ..., 1.1235e-05, |
|
|
1.3925e-05, 1.3247e-05], |
|
|
[-1.8505e-06, -1.0058e-06, -1.2983e-06, ..., 3.5521e-06, |
|
|
4.2692e-06, 3.7104e-06], |
|
|
..., |
|
|
[-1.0407e-04, -1.2234e-05, -2.0415e-05, ..., -3.8534e-05, |
|
|
-6.0707e-05, -5.6356e-05], |
|
|
[ 2.6360e-05, 9.4771e-06, 9.5293e-06, ..., 3.1702e-06, |
|
|
6.7726e-06, 6.0052e-06], |
|
|
[-2.6450e-05, -1.1444e-05, -1.0967e-05, ..., 1.0896e-07, |
|
|
-3.2596e-06, -2.4661e-06]], device='cuda:0') |
|
|
Epoch 65, bias, value: tensor([ 0.0467, -0.0093, 0.0235, 0.0087, -0.0422, -0.0130, -0.0106], |
|
|
device='cuda:0'), grad: tensor([ 2.6202e-04, 7.3969e-05, 4.8243e-06, -1.3702e-05, -3.3689e-04, |
|
|
5.6893e-05, -4.6700e-05], device='cuda:0') |
|
|
588 |
|
|
0.0001801856965207339 |
|
|
changing lr |
|
|
epoch 64, time 789.94, cls_loss 0.0014 cls_loss_mapping 0.0060 cls_loss_causal 0.3751 re_mapping 0.0069 re_causal 0.0144 |
|
|
Epoch 66, weight, value: tensor([[ 0.0815, 0.0746, 0.0706, ..., 0.0180, 0.0541, 0.0491], |
|
|
[ 0.0312, 0.0214, 0.0072, ..., -0.0152, -0.0135, -0.0116], |
|
|
[-0.0220, -0.0290, 0.0003, ..., 0.0022, -0.0288, -0.0125], |
|
|
..., |
|
|
[-0.1177, -0.0926, -0.0753, ..., 0.0323, 0.0352, 0.0230], |
|
|
[ 0.0862, 0.0662, 0.0642, ..., -0.0415, -0.0528, -0.0469], |
|
|
[-0.0006, 0.0135, -0.0125, ..., -0.0432, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[ 2.0289e-04, 4.7714e-05, 4.6968e-05, ..., 2.2978e-05, |
|
|
5.6028e-05, 5.4181e-05], |
|
|
[-1.6952e-04, -6.3956e-05, -7.1347e-05, ..., -3.0875e-05, |
|
|
-5.2601e-05, -5.0157e-05], |
|
|
[-2.3925e-04, -3.6091e-05, -2.8566e-05, ..., -1.7762e-05, |
|
|
-6.2704e-05, -6.1035e-05], |
|
|
..., |
|
|
[ 5.8234e-05, 1.5043e-05, 1.5035e-05, ..., 6.8173e-06, |
|
|
1.5691e-05, 1.5207e-05], |
|
|
[ 3.3915e-05, 7.8082e-06, 7.6219e-06, ..., 3.8110e-06, |
|
|
9.4622e-06, 9.1419e-06], |
|
|
[ 4.7415e-05, 1.1221e-05, 1.1034e-05, ..., 5.9307e-06, |
|
|
1.4342e-05, 1.3761e-05]], device='cuda:0') |
|
|
Epoch 66, bias, value: tensor([ 0.0466, -0.0093, 0.0235, 0.0087, -0.0421, -0.0130, -0.0106], |
|
|
device='cuda:0'), grad: tensor([ 5.1594e-04, -3.5167e-04, -6.7663e-04, 1.6046e-04, 1.4317e-04, |
|
|
8.6904e-05, 1.2153e-04], device='cuda:0') |
|
|
588 |
|
|
0.000125360439090882 |
|
|
changing lr |
|
|
epoch 65, time 792.08, cls_loss 0.0013 cls_loss_mapping 0.0047 cls_loss_causal 0.4086 re_mapping 0.0070 re_causal 0.0149 |
|
|
Epoch 67, weight, value: tensor([[ 0.0815, 0.0746, 0.0706, ..., 0.0180, 0.0541, 0.0491], |
|
|
[ 0.0312, 0.0214, 0.0072, ..., -0.0152, -0.0135, -0.0116], |
|
|
[-0.0220, -0.0290, 0.0003, ..., 0.0022, -0.0288, -0.0125], |
|
|
..., |
|
|
[-0.1176, -0.0926, -0.0752, ..., 0.0323, 0.0352, 0.0230], |
|
|
[ 0.0861, 0.0662, 0.0641, ..., -0.0415, -0.0528, -0.0469], |
|
|
[-0.0006, 0.0134, -0.0125, ..., -0.0432, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[ 8.9169e-05, 1.9014e-05, 2.5526e-05, ..., 8.1286e-06, |
|
|
2.1189e-05, 2.0862e-05], |
|
|
[-7.4327e-05, -1.5467e-05, -2.2471e-05, ..., -1.4305e-06, |
|
|
-1.2703e-05, -1.2293e-05], |
|
|
[ 6.7241e-06, 6.5006e-07, 7.9162e-07, ..., -1.1548e-06, |
|
|
-6.4261e-07, -3.6508e-07], |
|
|
..., |
|
|
[-1.8179e-05, 2.3656e-07, -1.3821e-06, ..., -5.5097e-06, |
|
|
-9.0227e-06, -9.2089e-06], |
|
|
[ 1.1787e-05, 3.2131e-06, 3.6694e-06, ..., 1.5590e-06, |
|
|
3.1739e-06, 3.1292e-06], |
|
|
[-2.1517e-05, -9.9093e-06, -8.9705e-06, ..., -1.8366e-06, |
|
|
-3.9525e-06, -4.0568e-06]], device='cuda:0') |
|
|
Epoch 67, bias, value: tensor([ 0.0467, -0.0093, 0.0235, 0.0086, -0.0421, -0.0130, -0.0106], |
|
|
device='cuda:0'), grad: tensor([ 2.2435e-04, -1.8561e-04, 1.4521e-05, 1.4886e-05, -6.1214e-05, |
|
|
2.7463e-05, -3.4809e-05], device='cuda:0') |
|
|
588 |
|
|
8.03520570068517e-05 |
|
|
changing lr |
|
|
epoch 66, time 791.60, cls_loss 0.0012 cls_loss_mapping 0.0048 cls_loss_causal 0.3824 re_mapping 0.0069 re_causal 0.0144 |
|
|
Epoch 68, weight, value: tensor([[ 0.0815, 0.0746, 0.0706, ..., 0.0180, 0.0540, 0.0491], |
|
|
[ 0.0312, 0.0214, 0.0072, ..., -0.0152, -0.0135, -0.0116], |
|
|
[-0.0220, -0.0290, 0.0003, ..., 0.0022, -0.0288, -0.0125], |
|
|
..., |
|
|
[-0.1176, -0.0926, -0.0752, ..., 0.0323, 0.0352, 0.0230], |
|
|
[ 0.0861, 0.0662, 0.0641, ..., -0.0415, -0.0528, -0.0469], |
|
|
[-0.0006, 0.0134, -0.0125, ..., -0.0432, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[-3.4499e-04, -1.0622e-04, -1.1998e-04, ..., -5.3972e-05, |
|
|
-8.1837e-05, -8.7082e-05], |
|
|
[ 3.6597e-05, 1.3076e-05, 1.3515e-05, ..., -1.1846e-05, |
|
|
-9.7007e-06, -7.8529e-06], |
|
|
[ 2.3949e-04, 6.9976e-05, 7.8917e-05, ..., 4.0054e-05, |
|
|
6.0946e-05, 6.2346e-05], |
|
|
..., |
|
|
[ 8.7678e-05, 2.6032e-05, 2.8893e-05, ..., 1.6496e-05, |
|
|
2.5570e-05, 2.4974e-05], |
|
|
[ 6.9022e-05, 2.1338e-05, 2.3648e-05, ..., 9.4622e-06, |
|
|
1.6078e-05, 1.6108e-05], |
|
|
[-8.6308e-05, -3.0845e-05, -3.1888e-05, ..., 2.0768e-06, |
|
|
-9.3579e-06, -7.0520e-06]], device='cuda:0') |
|
|
Epoch 68, bias, value: tensor([ 0.0467, -0.0093, 0.0235, 0.0086, -0.0420, -0.0130, -0.0106], |
|
|
device='cuda:0'), grad: tensor([-7.2241e-04, 5.9903e-05, 5.2357e-04, -2.9445e-05, 1.9670e-04, |
|
|
1.4806e-04, -1.7786e-04], device='cuda:0') |
|
|
588 |
|
|
4.5251191160326525e-05 |
|
|
changing lr |
|
|
epoch 67, time 790.33, cls_loss 0.0011 cls_loss_mapping 0.0054 cls_loss_causal 0.4041 re_mapping 0.0069 re_causal 0.0150 |
|
|
Epoch 69, weight, value: tensor([[ 0.0815, 0.0746, 0.0706, ..., 0.0180, 0.0540, 0.0491], |
|
|
[ 0.0312, 0.0214, 0.0072, ..., -0.0152, -0.0135, -0.0116], |
|
|
[-0.0220, -0.0290, 0.0003, ..., 0.0022, -0.0288, -0.0125], |
|
|
..., |
|
|
[-0.1176, -0.0925, -0.0752, ..., 0.0323, 0.0352, 0.0230], |
|
|
[ 0.0861, 0.0662, 0.0641, ..., -0.0415, -0.0528, -0.0469], |
|
|
[-0.0006, 0.0134, -0.0125, ..., -0.0432, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[ 3.5071e-04, 1.1247e-04, 1.2410e-04, ..., 2.8133e-05, |
|
|
6.1333e-05, 5.7489e-05], |
|
|
[-6.8474e-04, -2.2268e-04, -2.4223e-04, ..., -5.7817e-05, |
|
|
-1.1975e-04, -1.1617e-04], |
|
|
[-9.4235e-05, -2.9683e-05, -3.3438e-05, ..., -1.4827e-05, |
|
|
-2.4989e-05, -2.4036e-05], |
|
|
..., |
|
|
[ 1.2064e-04, 3.9995e-05, 4.2647e-05, ..., 1.1832e-05, |
|
|
2.3708e-05, 2.3514e-05], |
|
|
[ 5.0396e-05, 1.6794e-05, 1.8194e-05, ..., 4.7646e-06, |
|
|
9.4473e-06, 9.3356e-06], |
|
|
[ 1.3649e-04, 4.4614e-05, 4.8399e-05, ..., 1.2234e-05, |
|
|
2.4438e-05, 2.3961e-05]], device='cuda:0') |
|
|
Epoch 69, bias, value: tensor([ 0.0467, -0.0093, 0.0234, 0.0086, -0.0420, -0.0131, -0.0106], |
|
|
device='cuda:0'), grad: tensor([ 0.0008, -0.0015, -0.0002, 0.0003, 0.0003, 0.0001, 0.0003], |
|
|
device='cuda:0') |
|
|
588 |
|
|
2.0128530023804673e-05 |
|
|
changing lr |
|
|
epoch 68, time 787.75, cls_loss 0.0012 cls_loss_mapping 0.0053 cls_loss_causal 0.3859 re_mapping 0.0069 re_causal 0.0147 |
|
|
Epoch 70, weight, value: tensor([[ 0.0815, 0.0746, 0.0706, ..., 0.0180, 0.0540, 0.0491], |
|
|
[ 0.0312, 0.0214, 0.0072, ..., -0.0152, -0.0135, -0.0116], |
|
|
[-0.0220, -0.0290, 0.0003, ..., 0.0022, -0.0288, -0.0125], |
|
|
..., |
|
|
[-0.1176, -0.0925, -0.0752, ..., 0.0323, 0.0352, 0.0230], |
|
|
[ 0.0861, 0.0662, 0.0641, ..., -0.0415, -0.0528, -0.0469], |
|
|
[-0.0006, 0.0134, -0.0125, ..., -0.0431, -0.0222, -0.0303]], |
|
|
device='cuda:0'), grad: tensor([[-1.6153e-04, -5.3585e-05, -5.9724e-05, ..., -3.2395e-05, |
|
|
-5.6535e-05, -5.7220e-05], |
|
|
[ 8.6248e-05, 2.2650e-05, 2.6375e-05, ..., 2.1592e-05, |
|
|
3.2693e-05, 3.2932e-05], |
|
|
[ 4.7624e-05, 1.4886e-05, 1.6645e-05, ..., 1.0297e-05, |
|
|
1.6779e-05, 1.7002e-05], |
|
|
..., |
|
|
[ 4.1991e-05, 1.4745e-05, 1.6540e-05, ..., 7.4059e-06, |
|
|
1.2279e-05, 1.2465e-05], |
|
|
[-8.6069e-05, -4.3333e-05, -4.7565e-05, ..., -1.9930e-06, |
|
|
-4.6790e-06, -6.3106e-06], |
|
|
[ 7.8261e-05, 3.6418e-05, 4.0323e-05, ..., 4.9211e-06, |
|
|
8.8066e-06, 9.9540e-06]], device='cuda:0') |
|
|
Epoch 70, bias, value: tensor([ 0.0467, -0.0093, 0.0234, 0.0086, -0.0420, -0.0131, -0.0106], |
|
|
device='cuda:0'), grad: tensor([-3.4833e-04, 1.9848e-04, 1.0222e-04, -4.1157e-05, 8.4817e-05, |
|
|
-1.1569e-04, 1.1945e-04], device='cuda:0') |
|
|
588 |
|
|
5.034667293427056e-06 |
|
|
changing lr |
|
|
epoch 69, time 799.21, cls_loss 0.0015 cls_loss_mapping 0.0062 cls_loss_causal 0.3843 re_mapping 0.0069 re_causal 0.0145 |
|
|
---------------------saving last model at epoch 69---------------------------------------------------- |
|
|
/home/yuqian_fu |
|
|
{'gpu': '0', 'svroot': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//sketch/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1', 'source_domain': 'sketch', 'svpath': '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/saved-PACS//sketch/CA_multiple_16fa_v2_ep70_lr0.01_cosine_base0.01_bs6_lamCa_1_lamRe1_adt4_cls1_EW2_70_rmTrue_rnTrue_str5_WithStyleAttackExp1/sketch_16factor_last_test_check.csv', 'factor_num': 16, 'epoch': 'last', 'stride': 5, 'eval_mapping': False, 'network': 'resnet18'} |
|
|
-------------------------------------loading pretrain weights---------------------------------- |
|
|
loading weight of last |
|
|
randm: False |
|
|
stride: 5 |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
loading weight of last |
|
|
columns: ['sketch', 'art_painting', 'cartoon', 'photo'] |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/sketch_test.hdf5 torch.Size([3929, 3, 227, 227]) torch.Size([3929]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/art_painting_test.hdf5 torch.Size([2048, 3, 227, 227]) torch.Size([2048]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/cartoon_test.hdf5 torch.Size([2344, 3, 227, 227]) torch.Size([2344]) |
|
|
/data/work-gcp-europe-west4-a/yuqian_fu/datasets/SingleSourceDG/data/PACS/photo_test.hdf5 torch.Size([1670, 3, 227, 227]) torch.Size([1670]) |
|
|
sketch art_painting cartoon photo Avg |
|
|
w/o do (original x) 99.338254 43.212891 56.186007 46.467066 48.621988 |
|
|
sketch art_painting cartoon photo Avg |
|
|
do 99.28735 46.630859 60.025597 52.035928 52.897462 |
|
|
|