Jinawei commited on
Commit
0beadfd
·
1 Parent(s): 538db6a

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "embedding_size": 160,
9
+ "finetuning_task": "stsb",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 160,
14
+ "id2label": {
15
+ "0": "LABEL_0"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 560,
19
+ "label2id": {
20
+ "LABEL_0": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_position_embeddings": 512,
24
+ "model_type": "bert",
25
+ "num_attention_heads": 10,
26
+ "num_hidden_layers": 7,
27
+ "output_intermediate": true,
28
+ "output_past": true,
29
+ "pad_token_id": 0,
30
+ "position_embedding_type": "absolute",
31
+ "problem_type": "regression",
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.17.0",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 30522
37
+ }
log_bs32_lr3e-05_20221124_035004_897265.txt ADDED
@@ -0,0 +1,1294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ------------> log file ==runs2/stsb/OUTPUT_ID/log_bs32_lr3e-05_20221124_035004_897265.txt
2
+ Namespace(aug_train=False, data_dir='/home.local/jianwei/datasets/nlp/glue_data/STS-B', do_eval=False, early_stop=False, early_stop_metric='accuracy', eval_step=120, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=0, lr_scheduler_type=<SchedulerType.CONSTANT_WITH_WARMUP: 'constant_with_warmup'>, max_length=128, max_train_steps=None, model_name_or_path='/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5', num_train_epochs=30, num_warmup_steps=0, output_dir='runs2/stsb/OUTPUT_ID', pad_to_max_length=False, per_device_eval_batch_size=32, per_device_train_batch_size=32, print_step=5, save_last=False, seed=None, task_name='stsb', train_file=None, use_slow_tokenizer=False, validation_file=None, weight_decay=0.0)
3
+ Distributed environment: NO
4
+ Num processes: 1
5
+ Process index: 0
6
+ Local process index: 0
7
+ Device: cuda
8
+ Mixed precision type: fp16
9
+
10
+ Sample 4674 of the training set: (tensor([ 101, 10079, 3629, 3102, 2048, 12632, 2336, 102, 10079, 4894,
11
+ 8563, 2340, 12632, 2336, 102, 0, 0, 0, 0, 0,
12
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0]), tensor(3.)).
33
+ Sample 112 of the training set: (tensor([ 101, 1037, 2879, 2003, 9361, 1037, 21854, 11563, 1012, 102,
34
+ 1037, 2879, 2003, 2559, 2012, 1037, 8094, 1012, 102, 0,
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
51
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55
+ 0, 0, 0, 0, 0, 0, 0, 0]), tensor(3.8000)).
56
+ Sample 4529 of the training set: (tensor([ 101, 3725, 3844, 2015, 2091, 4264, 2004, 3586, 6240, 9446,
57
+ 6561, 2605, 102, 7327, 7767, 1005, 4340, 1005, 2000, 10663,
58
+ 3586, 4168, 4017, 9446, 102, 0, 0, 0, 0, 0,
59
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
67
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68
+ 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
69
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73
+ 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78
+ 0, 0, 0, 0, 0, 0, 0, 0]), tensor(3.)).
79
+ ***** Running training *****
80
+ Num examples = 5749
81
+ Num Epochs = 30
82
+ Instantaneous batch size per device = 32
83
+ Total train batch size (w. parallel, distributed & accumulation) = 32
84
+ Gradient Accumulation steps = 1
85
+ Total optimization steps = 5400
86
+ 000005/005400, loss: 10.919802, avg_loss: 10.361012
87
+ 000010/005400, loss: 10.181771, avg_loss: 9.698319
88
+ 000015/005400, loss: 7.925676, avg_loss: 9.470798
89
+ 000020/005400, loss: 9.417774, avg_loss: 9.406657
90
+ 000025/005400, loss: 11.084503, avg_loss: 9.472598
91
+ 000030/005400, loss: 7.033692, avg_loss: 9.447541
92
+ 000035/005400, loss: 9.298050, avg_loss: 9.386552
93
+ 000040/005400, loss: 8.342388, avg_loss: 9.386284
94
+ 000045/005400, loss: 9.821406, avg_loss: 9.483932
95
+ 000050/005400, loss: 9.257509, avg_loss: 9.586469
96
+ 000055/005400, loss: 8.752683, avg_loss: 9.634466
97
+ 000060/005400, loss: 6.560993, avg_loss: 9.559579
98
+ 000065/005400, loss: 9.872775, avg_loss: 9.555094
99
+ 000070/005400, loss: 9.549786, avg_loss: 9.570707
100
+ 000075/005400, loss: 9.400767, avg_loss: 9.534591
101
+ 000080/005400, loss: 9.152719, avg_loss: 9.532532
102
+ 000085/005400, loss: 10.023327, avg_loss: 9.548860
103
+ 000090/005400, loss: 8.150848, avg_loss: 9.539588
104
+ 000095/005400, loss: 8.193304, avg_loss: 9.481100
105
+ 000100/005400, loss: 8.688814, avg_loss: 9.431278
106
+ 000105/005400, loss: 9.266927, avg_loss: 9.390692
107
+ 000110/005400, loss: 7.621550, avg_loss: 9.346891
108
+ 000115/005400, loss: 6.959364, avg_loss: 9.281569
109
+ 000120/005400, loss: 9.679270, avg_loss: 9.291935
110
+ 000125/005400, loss: 8.002371, avg_loss: 9.251488
111
+ 000130/005400, loss: 8.983469, avg_loss: 9.210261
112
+ 000135/005400, loss: 7.914767, avg_loss: 9.138828
113
+ 000140/005400, loss: 9.368698, avg_loss: 9.096162
114
+ 000145/005400, loss: 7.681985, avg_loss: 9.041475
115
+ 000150/005400, loss: 7.530379, avg_loss: 8.976686
116
+ 000155/005400, loss: 9.263411, avg_loss: 8.941676
117
+ 000160/005400, loss: 7.710734, avg_loss: 8.895261
118
+ 000165/005400, loss: 8.456438, avg_loss: 8.831314
119
+ 000170/005400, loss: 6.155419, avg_loss: 8.772081
120
+ 000175/005400, loss: 8.032525, avg_loss: 8.692823
121
+ 000180/005400, loss: 4.489757, avg_loss: 8.613270
122
+ ***** Running dev evaluation *****
123
+ Num examples = 1500
124
+ Instantaneous batch size per device = 32
125
+ epoch 0, step 180/5400: {'pearson': 0.21495300918671972, 'spearmanr': 0.18778433070729544}
126
+ 000185/005400, loss: 7.743507, avg_loss: 8.577872
127
+ 000190/005400, loss: 6.030101, avg_loss: 8.510907
128
+ 000195/005400, loss: 3.536020, avg_loss: 8.447573
129
+ 000200/005400, loss: 6.482443, avg_loss: 8.383874
130
+ 000205/005400, loss: 7.495704, avg_loss: 8.331849
131
+ 000210/005400, loss: 7.830889, avg_loss: 8.285714
132
+ 000215/005400, loss: 7.473868, avg_loss: 8.219836
133
+ 000220/005400, loss: 6.685350, avg_loss: 8.164707
134
+ 000225/005400, loss: 4.961877, avg_loss: 8.111880
135
+ 000230/005400, loss: 5.369789, avg_loss: 8.067034
136
+ 000235/005400, loss: 4.157079, avg_loss: 8.007677
137
+ 000240/005400, loss: 6.113519, avg_loss: 7.971732
138
+ 000245/005400, loss: 4.965279, avg_loss: 7.912654
139
+ 000250/005400, loss: 3.810572, avg_loss: 7.868684
140
+ 000255/005400, loss: 5.212838, avg_loss: 7.827637
141
+ 000260/005400, loss: 5.060454, avg_loss: 7.780213
142
+ 000265/005400, loss: 3.830095, avg_loss: 7.720051
143
+ 000270/005400, loss: 5.186792, avg_loss: 7.666411
144
+ 000275/005400, loss: 5.622235, avg_loss: 7.623420
145
+ 000280/005400, loss: 4.717276, avg_loss: 7.579897
146
+ 000285/005400, loss: 4.819950, avg_loss: 7.529627
147
+ 000290/005400, loss: 5.464397, avg_loss: 7.489963
148
+ 000295/005400, loss: 5.470286, avg_loss: 7.442070
149
+ 000300/005400, loss: 3.843780, avg_loss: 7.396591
150
+ 000305/005400, loss: 3.396843, avg_loss: 7.349362
151
+ 000310/005400, loss: 4.573213, avg_loss: 7.293336
152
+ 000315/005400, loss: 4.345067, avg_loss: 7.247148
153
+ 000320/005400, loss: 4.538530, avg_loss: 7.205413
154
+ 000325/005400, loss: 3.374168, avg_loss: 7.165096
155
+ 000330/005400, loss: 3.680195, avg_loss: 7.118695
156
+ 000335/005400, loss: 3.798603, avg_loss: 7.071580
157
+ 000340/005400, loss: 4.418723, avg_loss: 7.028340
158
+ 000345/005400, loss: 2.651713, avg_loss: 6.979162
159
+ 000350/005400, loss: 4.138247, avg_loss: 6.931522
160
+ 000355/005400, loss: 4.034257, avg_loss: 6.891976
161
+ 000360/005400, loss: 3.947625, avg_loss: 6.853448
162
+ ***** Running dev evaluation *****
163
+ Num examples = 1500
164
+ Instantaneous batch size per device = 32
165
+ epoch 1, step 360/5400: {'pearson': 0.1693196142024497, 'spearmanr': 0.1520939753827761}
166
+ 000365/005400, loss: 4.066084, avg_loss: 6.813658
167
+ 000370/005400, loss: 2.446641, avg_loss: 6.765847
168
+ 000375/005400, loss: 3.652923, avg_loss: 6.724291
169
+ 000380/005400, loss: 2.925441, avg_loss: 6.683929
170
+ 000385/005400, loss: 3.510277, avg_loss: 6.641427
171
+ 000390/005400, loss: 3.712820, avg_loss: 6.597568
172
+ 000395/005400, loss: 2.864999, avg_loss: 6.559540
173
+ 000400/005400, loss: 2.363536, avg_loss: 6.515924
174
+ 000405/005400, loss: 3.202157, avg_loss: 6.472551
175
+ 000410/005400, loss: 2.507275, avg_loss: 6.427726
176
+ 000415/005400, loss: 2.655454, avg_loss: 6.383974
177
+ 000420/005400, loss: 3.361968, avg_loss: 6.343995
178
+ 000425/005400, loss: 2.212350, avg_loss: 6.302114
179
+ 000430/005400, loss: 2.654854, avg_loss: 6.260254
180
+ 000435/005400, loss: 2.469006, avg_loss: 6.219799
181
+ 000440/005400, loss: 2.423651, avg_loss: 6.179832
182
+ 000445/005400, loss: 1.999993, avg_loss: 6.138898
183
+ 000450/005400, loss: 3.104252, avg_loss: 6.101511
184
+ 000455/005400, loss: 2.722913, avg_loss: 6.065750
185
+ 000460/005400, loss: 2.014916, avg_loss: 6.027135
186
+ 000465/005400, loss: 2.255650, avg_loss: 5.989807
187
+ 000470/005400, loss: 2.582577, avg_loss: 5.953727
188
+ 000475/005400, loss: 2.268125, avg_loss: 5.922364
189
+ 000480/005400, loss: 2.132411, avg_loss: 5.883134
190
+ 000485/005400, loss: 2.092988, avg_loss: 5.850251
191
+ 000490/005400, loss: 2.469923, avg_loss: 5.816648
192
+ 000495/005400, loss: 1.907046, avg_loss: 5.776493
193
+ 000500/005400, loss: 2.188262, avg_loss: 5.740701
194
+ 000505/005400, loss: 1.522663, avg_loss: 5.703157
195
+ 000510/005400, loss: 1.982296, avg_loss: 5.667968
196
+ 000515/005400, loss: 2.409446, avg_loss: 5.635783
197
+ 000520/005400, loss: 1.887568, avg_loss: 5.603417
198
+ 000525/005400, loss: 2.210217, avg_loss: 5.572377
199
+ 000530/005400, loss: 2.381753, avg_loss: 5.541968
200
+ 000535/005400, loss: 2.081358, avg_loss: 5.511043
201
+ 000540/005400, loss: 2.770565, avg_loss: 5.483432
202
+ ***** Running dev evaluation *****
203
+ Num examples = 1500
204
+ Instantaneous batch size per device = 32
205
+ epoch 2, step 540/5400: {'pearson': 0.5585231671416229, 'spearmanr': 0.5968823171253705}
206
+ 000545/005400, loss: 2.294569, avg_loss: 5.453700
207
+ 000550/005400, loss: 1.879893, avg_loss: 5.425492
208
+ 000555/005400, loss: 2.054521, avg_loss: 5.396323
209
+ 000560/005400, loss: 2.426673, avg_loss: 5.367306
210
+ 000565/005400, loss: 1.785937, avg_loss: 5.339570
211
+ 000570/005400, loss: 2.125966, avg_loss: 5.312624
212
+ 000575/005400, loss: 2.204447, avg_loss: 5.285309
213
+ 000580/005400, loss: 1.977976, avg_loss: 5.257472
214
+ 000585/005400, loss: 1.667451, avg_loss: 5.227795
215
+ 000590/005400, loss: 2.013373, avg_loss: 5.196637
216
+ 000595/005400, loss: 1.661575, avg_loss: 5.165030
217
+ 000600/005400, loss: 1.761523, avg_loss: 5.134732
218
+ 000605/005400, loss: 1.165827, avg_loss: 5.104335
219
+ 000610/005400, loss: 1.423938, avg_loss: 5.073547
220
+ 000615/005400, loss: 1.275937, avg_loss: 5.045039
221
+ 000620/005400, loss: 1.456807, avg_loss: 5.016568
222
+ 000625/005400, loss: 1.428447, avg_loss: 4.988611
223
+ 000630/005400, loss: 1.340862, avg_loss: 4.960019
224
+ 000635/005400, loss: 1.158772, avg_loss: 4.931690
225
+ 000640/005400, loss: 1.279753, avg_loss: 4.903349
226
+ 000645/005400, loss: 1.495990, avg_loss: 4.875799
227
+ 000650/005400, loss: 1.418819, avg_loss: 4.847696
228
+ 000655/005400, loss: 1.233781, avg_loss: 4.819313
229
+ 000660/005400, loss: 0.825644, avg_loss: 4.790388
230
+ 000665/005400, loss: 1.236975, avg_loss: 4.763828
231
+ 000670/005400, loss: 1.427844, avg_loss: 4.737722
232
+ 000675/005400, loss: 1.194959, avg_loss: 4.710323
233
+ 000680/005400, loss: 1.298458, avg_loss: 4.683432
234
+ 000685/005400, loss: 1.250220, avg_loss: 4.658527
235
+ 000690/005400, loss: 1.528629, avg_loss: 4.632631
236
+ 000695/005400, loss: 0.912524, avg_loss: 4.608270
237
+ 000700/005400, loss: 0.927178, avg_loss: 4.583745
238
+ 000705/005400, loss: 1.425212, avg_loss: 4.560665
239
+ 000710/005400, loss: 1.385559, avg_loss: 4.537204
240
+ 000715/005400, loss: 1.303016, avg_loss: 4.512879
241
+ 000720/005400, loss: 1.559370, avg_loss: 4.489179
242
+ ***** Running dev evaluation *****
243
+ Num examples = 1500
244
+ Instantaneous batch size per device = 32
245
+ epoch 3, step 720/5400: {'pearson': 0.7538161883822286, 'spearmanr': 0.7339178388810693}
246
+ 000725/005400, loss: 1.238668, avg_loss: 4.465238
247
+ 000730/005400, loss: 0.980286, avg_loss: 4.441211
248
+ 000735/005400, loss: 1.036360, avg_loss: 4.417511
249
+ 000740/005400, loss: 0.733856, avg_loss: 4.392772
250
+ 000745/005400, loss: 0.911818, avg_loss: 4.369401
251
+ 000750/005400, loss: 0.923820, avg_loss: 4.346304
252
+ 000755/005400, loss: 1.057140, avg_loss: 4.322930
253
+ 000760/005400, loss: 0.671852, avg_loss: 4.300414
254
+ 000765/005400, loss: 1.206062, avg_loss: 4.278674
255
+ 000770/005400, loss: 1.131734, avg_loss: 4.256801
256
+ 000775/005400, loss: 0.727703, avg_loss: 4.235108
257
+ 000780/005400, loss: 0.807117, avg_loss: 4.213659
258
+ 000785/005400, loss: 0.656183, avg_loss: 4.193212
259
+ 000790/005400, loss: 1.307819, avg_loss: 4.173065
260
+ 000795/005400, loss: 1.318512, avg_loss: 4.152132
261
+ 000800/005400, loss: 0.923019, avg_loss: 4.130066
262
+ 000805/005400, loss: 0.358175, avg_loss: 4.109530
263
+ 000810/005400, loss: 0.568605, avg_loss: 4.090200
264
+ 000815/005400, loss: 0.538159, avg_loss: 4.070320
265
+ 000820/005400, loss: 0.791279, avg_loss: 4.050106
266
+ 000825/005400, loss: 0.646954, avg_loss: 4.029779
267
+ 000830/005400, loss: 0.696995, avg_loss: 4.010299
268
+ 000835/005400, loss: 0.851315, avg_loss: 3.991444
269
+ 000840/005400, loss: 0.953209, avg_loss: 3.972254
270
+ 000845/005400, loss: 0.639867, avg_loss: 3.953383
271
+ 000850/005400, loss: 0.828691, avg_loss: 3.934936
272
+ 000855/005400, loss: 0.851312, avg_loss: 3.916689
273
+ 000860/005400, loss: 0.913527, avg_loss: 3.898366
274
+ 000865/005400, loss: 0.769578, avg_loss: 3.880448
275
+ 000870/005400, loss: 0.780589, avg_loss: 3.862448
276
+ 000875/005400, loss: 0.973308, avg_loss: 3.845167
277
+ 000880/005400, loss: 0.829930, avg_loss: 3.827655
278
+ 000885/005400, loss: 0.766140, avg_loss: 3.810577
279
+ 000890/005400, loss: 0.563716, avg_loss: 3.793318
280
+ 000895/005400, loss: 1.047082, avg_loss: 3.776392
281
+ 000900/005400, loss: 0.866974, avg_loss: 3.759473
282
+ ***** Running dev evaluation *****
283
+ Num examples = 1500
284
+ Instantaneous batch size per device = 32
285
+ epoch 4, step 900/5400: {'pearson': 0.8037387020413668, 'spearmanr': 0.8107612065966875}
286
+ 000905/005400, loss: 0.766670, avg_loss: 3.742716
287
+ 000910/005400, loss: 0.644619, avg_loss: 3.725242
288
+ 000915/005400, loss: 0.794441, avg_loss: 3.709127
289
+ 000920/005400, loss: 0.583735, avg_loss: 3.693077
290
+ 000925/005400, loss: 0.467945, avg_loss: 3.676897
291
+ 000930/005400, loss: 0.635556, avg_loss: 3.661061
292
+ 000935/005400, loss: 0.546880, avg_loss: 3.644815
293
+ 000940/005400, loss: 0.442663, avg_loss: 3.628318
294
+ 000945/005400, loss: 0.683668, avg_loss: 3.612901
295
+ 000950/005400, loss: 0.656306, avg_loss: 3.597124
296
+ 000955/005400, loss: 0.710459, avg_loss: 3.582143
297
+ 000960/005400, loss: 0.503140, avg_loss: 3.567162
298
+ 000965/005400, loss: 0.659339, avg_loss: 3.552504
299
+ 000970/005400, loss: 0.707433, avg_loss: 3.537561
300
+ 000975/005400, loss: 0.965483, avg_loss: 3.523352
301
+ 000980/005400, loss: 0.855915, avg_loss: 3.508989
302
+ 000985/005400, loss: 0.649465, avg_loss: 3.494453
303
+ 000990/005400, loss: 0.513151, avg_loss: 3.480050
304
+ 000995/005400, loss: 0.907288, avg_loss: 3.465789
305
+ 001000/005400, loss: 0.461537, avg_loss: 3.451433
306
+ 001005/005400, loss: 0.496157, avg_loss: 3.437648
307
+ 001010/005400, loss: 0.989706, avg_loss: 3.424380
308
+ 001015/005400, loss: 0.754088, avg_loss: 3.410539
309
+ 001020/005400, loss: 0.731938, avg_loss: 3.396362
310
+ 001025/005400, loss: 0.844449, avg_loss: 3.382560
311
+ 001030/005400, loss: 0.346046, avg_loss: 3.368838
312
+ 001035/005400, loss: 0.518788, avg_loss: 3.355767
313
+ 001040/005400, loss: 0.714191, avg_loss: 3.342353
314
+ 001045/005400, loss: 0.800863, avg_loss: 3.329591
315
+ 001050/005400, loss: 0.538331, avg_loss: 3.316277
316
+ 001055/005400, loss: 0.645015, avg_loss: 3.303465
317
+ 001060/005400, loss: 0.451743, avg_loss: 3.290423
318
+ 001065/005400, loss: 0.482815, avg_loss: 3.277498
319
+ 001070/005400, loss: 0.428583, avg_loss: 3.264993
320
+ 001075/005400, loss: 0.905002, avg_loss: 3.253403
321
+ 001080/005400, loss: 0.423076, avg_loss: 3.241331
322
+ ***** Running dev evaluation *****
323
+ Num examples = 1500
324
+ Instantaneous batch size per device = 32
325
+ epoch 5, step 1080/5400: {'pearson': 0.8115941618503355, 'spearmanr': 0.8282434089896973}
326
+ 001085/005400, loss: 0.671325, avg_loss: 3.228906
327
+ 001090/005400, loss: 0.510681, avg_loss: 3.216880
328
+ 001095/005400, loss: 0.503297, avg_loss: 3.204282
329
+ 001100/005400, loss: 0.476207, avg_loss: 3.192340
330
+ 001105/005400, loss: 0.287384, avg_loss: 3.180409
331
+ 001110/005400, loss: 0.838371, avg_loss: 3.168479
332
+ 001115/005400, loss: 0.561100, avg_loss: 3.157175
333
+ 001120/005400, loss: 0.461640, avg_loss: 3.145364
334
+ 001125/005400, loss: 0.672549, avg_loss: 3.133893
335
+ 001130/005400, loss: 0.443830, avg_loss: 3.122226
336
+ 001135/005400, loss: 0.465307, avg_loss: 3.110867
337
+ 001140/005400, loss: 0.763562, avg_loss: 3.099965
338
+ 001145/005400, loss: 0.561359, avg_loss: 3.088965
339
+ 001150/005400, loss: 0.411171, avg_loss: 3.077866
340
+ 001155/005400, loss: 0.406792, avg_loss: 3.066446
341
+ 001160/005400, loss: 0.503313, avg_loss: 3.055675
342
+ 001165/005400, loss: 0.475825, avg_loss: 3.045274
343
+ 001170/005400, loss: 0.584800, avg_loss: 3.034442
344
+ 001175/005400, loss: 0.465069, avg_loss: 3.023891
345
+ 001180/005400, loss: 0.494697, avg_loss: 3.013498
346
+ 001185/005400, loss: 0.544740, avg_loss: 3.003423
347
+ 001190/005400, loss: 0.406965, avg_loss: 2.992763
348
+ 001195/005400, loss: 0.268987, avg_loss: 2.982255
349
+ 001200/005400, loss: 0.495571, avg_loss: 2.972160
350
+ 001205/005400, loss: 0.538762, avg_loss: 2.961849
351
+ 001210/005400, loss: 0.478300, avg_loss: 2.952011
352
+ 001215/005400, loss: 0.338071, avg_loss: 2.942186
353
+ 001220/005400, loss: 0.505288, avg_loss: 2.932113
354
+ 001225/005400, loss: 0.570436, avg_loss: 2.922221
355
+ 001230/005400, loss: 0.523959, avg_loss: 2.912277
356
+ 001235/005400, loss: 0.491099, avg_loss: 2.902272
357
+ 001240/005400, loss: 0.559447, avg_loss: 2.892549
358
+ 001245/005400, loss: 0.753339, avg_loss: 2.883637
359
+ 001250/005400, loss: 0.602193, avg_loss: 2.874184
360
+ 001255/005400, loss: 0.302810, avg_loss: 2.864446
361
+ 001260/005400, loss: 0.636528, avg_loss: 2.855367
362
+ ***** Running dev evaluation *****
363
+ Num examples = 1500
364
+ Instantaneous batch size per device = 32
365
+ epoch 6, step 1260/5400: {'pearson': 0.8244669741341696, 'spearmanr': 0.8347289521968146}
366
+ 001265/005400, loss: 0.576356, avg_loss: 2.845884
367
+ 001270/005400, loss: 0.356003, avg_loss: 2.836981
368
+ 001275/005400, loss: 0.282959, avg_loss: 2.827679
369
+ 001280/005400, loss: 0.471389, avg_loss: 2.818289
370
+ 001285/005400, loss: 0.291599, avg_loss: 2.809166
371
+ 001290/005400, loss: 0.309215, avg_loss: 2.799846
372
+ 001295/005400, loss: 0.440720, avg_loss: 2.790764
373
+ 001300/005400, loss: 0.452717, avg_loss: 2.781574
374
+ 001305/005400, loss: 0.379403, avg_loss: 2.772831
375
+ 001310/005400, loss: 0.740967, avg_loss: 2.764373
376
+ 001315/005400, loss: 0.554469, avg_loss: 2.755583
377
+ 001320/005400, loss: 0.422943, avg_loss: 2.747635
378
+ 001325/005400, loss: 0.613703, avg_loss: 2.739164
379
+ 001330/005400, loss: 0.333465, avg_loss: 2.730182
380
+ 001335/005400, loss: 0.531835, avg_loss: 2.721662
381
+ 001340/005400, loss: 0.447510, avg_loss: 2.713335
382
+ 001345/005400, loss: 0.487799, avg_loss: 2.705467
383
+ 001350/005400, loss: 0.629011, avg_loss: 2.697427
384
+ 001355/005400, loss: 0.316717, avg_loss: 2.688931
385
+ 001360/005400, loss: 0.483824, avg_loss: 2.680822
386
+ 001365/005400, loss: 0.420798, avg_loss: 2.672428
387
+ 001370/005400, loss: 0.312988, avg_loss: 2.664160
388
+ 001375/005400, loss: 0.253772, avg_loss: 2.655796
389
+ 001380/005400, loss: 0.507312, avg_loss: 2.648081
390
+ 001385/005400, loss: 0.423927, avg_loss: 2.640514
391
+ 001390/005400, loss: 0.488432, avg_loss: 2.632712
392
+ 001395/005400, loss: 0.496802, avg_loss: 2.624703
393
+ 001400/005400, loss: 0.411566, avg_loss: 2.617226
394
+ 001405/005400, loss: 0.620914, avg_loss: 2.609520
395
+ 001410/005400, loss: 0.529554, avg_loss: 2.602186
396
+ 001415/005400, loss: 0.377586, avg_loss: 2.594550
397
+ 001420/005400, loss: 0.537113, avg_loss: 2.587398
398
+ 001425/005400, loss: 0.502925, avg_loss: 2.579730
399
+ 001430/005400, loss: 0.501363, avg_loss: 2.572518
400
+ 001435/005400, loss: 0.523148, avg_loss: 2.564881
401
+ 001440/005400, loss: 0.283889, avg_loss: 2.557591
402
+ ***** Running dev evaluation *****
403
+ Num examples = 1500
404
+ Instantaneous batch size per device = 32
405
+ epoch 7, step 1440/5400: {'pearson': 0.8356315632016451, 'spearmanr': 0.8428067774651329}
406
+ 001445/005400, loss: 0.283461, avg_loss: 2.549994
407
+ 001450/005400, loss: 0.473319, avg_loss: 2.542806
408
+ 001455/005400, loss: 0.465852, avg_loss: 2.535222
409
+ 001460/005400, loss: 0.452470, avg_loss: 2.528097
410
+ 001465/005400, loss: 0.528226, avg_loss: 2.521023
411
+ 001470/005400, loss: 0.372980, avg_loss: 2.513948
412
+ 001475/005400, loss: 0.580186, avg_loss: 2.507289
413
+ 001480/005400, loss: 0.250609, avg_loss: 2.500083
414
+ 001485/005400, loss: 0.373619, avg_loss: 2.492741
415
+ 001490/005400, loss: 0.313954, avg_loss: 2.485812
416
+ 001495/005400, loss: 0.421009, avg_loss: 2.478890
417
+ 001500/005400, loss: 0.417312, avg_loss: 2.472097
418
+ 001505/005400, loss: 0.419549, avg_loss: 2.465457
419
+ 001510/005400, loss: 0.567841, avg_loss: 2.458859
420
+ 001515/005400, loss: 0.221651, avg_loss: 2.452159
421
+ 001520/005400, loss: 0.323677, avg_loss: 2.445824
422
+ 001525/005400, loss: 0.563059, avg_loss: 2.439246
423
+ 001530/005400, loss: 0.273469, avg_loss: 2.432506
424
+ 001535/005400, loss: 0.230308, avg_loss: 2.425987
425
+ 001540/005400, loss: 0.275917, avg_loss: 2.419360
426
+ 001545/005400, loss: 0.490302, avg_loss: 2.412818
427
+ 001550/005400, loss: 0.171527, avg_loss: 2.406091
428
+ 001555/005400, loss: 0.499564, avg_loss: 2.399561
429
+ 001560/005400, loss: 0.583477, avg_loss: 2.393275
430
+ 001565/005400, loss: 0.422795, avg_loss: 2.387004
431
+ 001570/005400, loss: 0.356273, avg_loss: 2.380570
432
+ 001575/005400, loss: 0.442116, avg_loss: 2.374079
433
+ 001580/005400, loss: 0.380964, avg_loss: 2.367966
434
+ 001585/005400, loss: 0.454051, avg_loss: 2.361857
435
+ 001590/005400, loss: 0.292075, avg_loss: 2.355417
436
+ 001595/005400, loss: 0.433962, avg_loss: 2.349358
437
+ 001600/005400, loss: 0.253748, avg_loss: 2.343178
438
+ 001605/005400, loss: 0.277990, avg_loss: 2.337058
439
+ 001610/005400, loss: 0.658840, avg_loss: 2.331389
440
+ 001615/005400, loss: 0.284291, avg_loss: 2.325417
441
+ 001620/005400, loss: 0.347131, avg_loss: 2.319557
442
+ ***** Running dev evaluation *****
443
+ Num examples = 1500
444
+ Instantaneous batch size per device = 32
445
+ epoch 8, step 1620/5400: {'pearson': 0.840875635131036, 'spearmanr': 0.8391187190190564}
446
+ 001625/005400, loss: 0.468040, avg_loss: 2.313795
447
+ 001630/005400, loss: 0.377569, avg_loss: 2.307781
448
+ 001635/005400, loss: 0.373161, avg_loss: 2.301947
449
+ 001640/005400, loss: 0.542144, avg_loss: 2.296237
450
+ 001645/005400, loss: 0.394721, avg_loss: 2.290366
451
+ 001650/005400, loss: 0.313285, avg_loss: 2.284578
452
+ 001655/005400, loss: 0.458701, avg_loss: 2.278912
453
+ 001660/005400, loss: 0.294037, avg_loss: 2.273092
454
+ 001665/005400, loss: 0.288020, avg_loss: 2.267503
455
+ 001670/005400, loss: 0.372206, avg_loss: 2.261890
456
+ 001675/005400, loss: 0.439113, avg_loss: 2.256269
457
+ 001680/005400, loss: 0.265594, avg_loss: 2.250567
458
+ 001685/005400, loss: 0.307823, avg_loss: 2.244832
459
+ 001690/005400, loss: 0.214900, avg_loss: 2.239233
460
+ 001695/005400, loss: 0.430367, avg_loss: 2.234019
461
+ 001700/005400, loss: 0.428587, avg_loss: 2.228347
462
+ 001705/005400, loss: 0.466478, avg_loss: 2.223007
463
+ 001710/005400, loss: 0.406999, avg_loss: 2.217425
464
+ 001715/005400, loss: 0.249302, avg_loss: 2.211718
465
+ 001720/005400, loss: 0.449824, avg_loss: 2.206581
466
+ 001725/005400, loss: 0.200499, avg_loss: 2.201121
467
+ 001730/005400, loss: 0.528394, avg_loss: 2.196022
468
+ 001735/005400, loss: 0.420790, avg_loss: 2.190833
469
+ 001740/005400, loss: 0.393591, avg_loss: 2.185567
470
+ 001745/005400, loss: 0.292256, avg_loss: 2.180424
471
+ 001750/005400, loss: 0.401385, avg_loss: 2.175266
472
+ 001755/005400, loss: 0.294124, avg_loss: 2.169960
473
+ 001760/005400, loss: 0.363119, avg_loss: 2.164699
474
+ 001765/005400, loss: 0.390154, avg_loss: 2.159830
475
+ 001770/005400, loss: 0.313013, avg_loss: 2.154815
476
+ 001775/005400, loss: 0.308711, avg_loss: 2.149686
477
+ 001780/005400, loss: 0.483320, avg_loss: 2.144812
478
+ 001785/005400, loss: 0.379410, avg_loss: 2.139796
479
+ 001790/005400, loss: 0.422236, avg_loss: 2.134915
480
+ 001795/005400, loss: 0.511399, avg_loss: 2.130093
481
+ 001800/005400, loss: 0.423039, avg_loss: 2.125146
482
+ ***** Running dev evaluation *****
483
+ Num examples = 1500
484
+ Instantaneous batch size per device = 32
485
+ epoch 9, step 1800/5400: {'pearson': 0.8342714757320445, 'spearmanr': 0.8376185602281018}
486
+ 001805/005400, loss: 0.486487, avg_loss: 2.120132
487
+ 001810/005400, loss: 0.270155, avg_loss: 2.115208
488
+ 001815/005400, loss: 0.227492, avg_loss: 2.110093
489
+ 001820/005400, loss: 0.346458, avg_loss: 2.105187
490
+ 001825/005400, loss: 0.426929, avg_loss: 2.100322
491
+ 001830/005400, loss: 0.117478, avg_loss: 2.095436
492
+ 001835/005400, loss: 0.279193, avg_loss: 2.090488
493
+ 001840/005400, loss: 0.387577, avg_loss: 2.085845
494
+ 001845/005400, loss: 0.250648, avg_loss: 2.081071
495
+ 001850/005400, loss: 0.303584, avg_loss: 2.076289
496
+ 001855/005400, loss: 0.405041, avg_loss: 2.071732
497
+ 001860/005400, loss: 0.166183, avg_loss: 2.066910
498
+ 001865/005400, loss: 0.319343, avg_loss: 2.062141
499
+ 001870/005400, loss: 0.317750, avg_loss: 2.057461
500
+ 001875/005400, loss: 0.315497, avg_loss: 2.052864
501
+ 001880/005400, loss: 0.338883, avg_loss: 2.048301
502
+ 001885/005400, loss: 0.322422, avg_loss: 2.043658
503
+ 001890/005400, loss: 0.136494, avg_loss: 2.038912
504
+ 001895/005400, loss: 0.384212, avg_loss: 2.034422
505
+ 001900/005400, loss: 0.386642, avg_loss: 2.029817
506
+ 001905/005400, loss: 0.336843, avg_loss: 2.025262
507
+ 001910/005400, loss: 0.378603, avg_loss: 2.020888
508
+ 001915/005400, loss: 0.244922, avg_loss: 2.016456
509
+ 001920/005400, loss: 0.388475, avg_loss: 2.012008
510
+ 001925/005400, loss: 0.275199, avg_loss: 2.007573
511
+ 001930/005400, loss: 0.286381, avg_loss: 2.003031
512
+ 001935/005400, loss: 0.408020, avg_loss: 1.998613
513
+ 001940/005400, loss: 0.296814, avg_loss: 1.994459
514
+ 001945/005400, loss: 0.221215, avg_loss: 1.990418
515
+ 001950/005400, loss: 0.386474, avg_loss: 1.986272
516
+ 001955/005400, loss: 0.186999, avg_loss: 1.981974
517
+ 001960/005400, loss: 0.353515, avg_loss: 1.977982
518
+ 001965/005400, loss: 0.220710, avg_loss: 1.973756
519
+ 001970/005400, loss: 0.522696, avg_loss: 1.969660
520
+ 001975/005400, loss: 0.318528, avg_loss: 1.965668
521
+ 001980/005400, loss: 0.256884, avg_loss: 1.961408
522
+ ***** Running dev evaluation *****
523
+ Num examples = 1500
524
+ Instantaneous batch size per device = 32
525
+ epoch 10, step 1980/5400: {'pearson': 0.8390370712384592, 'spearmanr': 0.8380421225427299}
526
+ 001985/005400, loss: 0.339906, avg_loss: 1.957260
527
+ 001990/005400, loss: 0.177573, avg_loss: 1.953061
528
+ 001995/005400, loss: 0.434594, avg_loss: 1.949158
529
+ 002000/005400, loss: 0.394058, avg_loss: 1.945127
530
+ 002005/005400, loss: 0.284734, avg_loss: 1.941044
531
+ 002010/005400, loss: 0.441842, avg_loss: 1.937158
532
+ 002015/005400, loss: 0.370813, avg_loss: 1.933077
533
+ 002020/005400, loss: 0.231465, avg_loss: 1.929090
534
+ 002025/005400, loss: 0.401823, avg_loss: 1.925187
535
+ 002030/005400, loss: 0.417580, avg_loss: 1.921148
536
+ 002035/005400, loss: 0.233858, avg_loss: 1.917078
537
+ 002040/005400, loss: 0.179666, avg_loss: 1.913157
538
+ 002045/005400, loss: 0.260741, avg_loss: 1.909101
539
+ 002050/005400, loss: 0.221551, avg_loss: 1.905037
540
+ 002055/005400, loss: 0.234906, avg_loss: 1.901112
541
+ 002060/005400, loss: 0.170529, avg_loss: 1.897019
542
+ 002065/005400, loss: 0.246520, avg_loss: 1.893189
543
+ 002070/005400, loss: 0.221311, avg_loss: 1.889234
544
+ 002075/005400, loss: 0.181704, avg_loss: 1.885389
545
+ 002080/005400, loss: 0.418144, avg_loss: 1.881511
546
+ 002085/005400, loss: 0.207121, avg_loss: 1.877616
547
+ 002090/005400, loss: 0.250038, avg_loss: 1.873798
548
+ 002095/005400, loss: 0.266151, avg_loss: 1.869941
549
+ 002100/005400, loss: 0.329553, avg_loss: 1.866257
550
+ 002105/005400, loss: 0.316394, avg_loss: 1.862574
551
+ 002110/005400, loss: 0.202054, avg_loss: 1.858893
552
+ 002115/005400, loss: 0.558679, avg_loss: 1.855374
553
+ 002120/005400, loss: 0.305135, avg_loss: 1.851792
554
+ 002125/005400, loss: 0.306204, avg_loss: 1.848025
555
+ 002130/005400, loss: 0.354196, avg_loss: 1.844382
556
+ 002135/005400, loss: 0.513295, avg_loss: 1.840886
557
+ 002140/005400, loss: 0.338046, avg_loss: 1.837288
558
+ 002145/005400, loss: 0.233815, avg_loss: 1.833621
559
+ 002150/005400, loss: 0.303081, avg_loss: 1.830035
560
+ 002155/005400, loss: 0.217688, avg_loss: 1.826318
561
+ 002160/005400, loss: 0.223059, avg_loss: 1.822730
562
+ ***** Running dev evaluation *****
563
+ Num examples = 1500
564
+ Instantaneous batch size per device = 32
565
+ epoch 11, step 2160/5400: {'pearson': 0.8434982902424131, 'spearmanr': 0.8445651086908786}
566
+ 002165/005400, loss: 0.237432, avg_loss: 1.819061
567
+ 002170/005400, loss: 0.283776, avg_loss: 1.815507
568
+ 002175/005400, loss: 0.309928, avg_loss: 1.811960
569
+ 002180/005400, loss: 0.256525, avg_loss: 1.808401
570
+ 002185/005400, loss: 0.282268, avg_loss: 1.804922
571
+ 002190/005400, loss: 0.277528, avg_loss: 1.801368
572
+ 002195/005400, loss: 0.345856, avg_loss: 1.797885
573
+ 002200/005400, loss: 0.393328, avg_loss: 1.794652
574
+ 002205/005400, loss: 0.224377, avg_loss: 1.791248
575
+ 002210/005400, loss: 0.219291, avg_loss: 1.787713
576
+ 002215/005400, loss: 0.147671, avg_loss: 1.784197
577
+ 002220/005400, loss: 0.339344, avg_loss: 1.780853
578
+ 002225/005400, loss: 0.219361, avg_loss: 1.777467
579
+ 002230/005400, loss: 0.280020, avg_loss: 1.774036
580
+ 002235/005400, loss: 0.261592, avg_loss: 1.770745
581
+ 002240/005400, loss: 0.293255, avg_loss: 1.767543
582
+ 002245/005400, loss: 0.260899, avg_loss: 1.764155
583
+ 002250/005400, loss: 0.251379, avg_loss: 1.760734
584
+ 002255/005400, loss: 0.180517, avg_loss: 1.757394
585
+ 002260/005400, loss: 0.237342, avg_loss: 1.754018
586
+ 002265/005400, loss: 0.348091, avg_loss: 1.750775
587
+ 002270/005400, loss: 0.169205, avg_loss: 1.747420
588
+ 002275/005400, loss: 0.308270, avg_loss: 1.744165
589
+ 002280/005400, loss: 0.265926, avg_loss: 1.740912
590
+ 002285/005400, loss: 0.269741, avg_loss: 1.737594
591
+ 002290/005400, loss: 0.368088, avg_loss: 1.734481
592
+ 002295/005400, loss: 0.288817, avg_loss: 1.731510
593
+ 002300/005400, loss: 0.151223, avg_loss: 1.728326
594
+ 002305/005400, loss: 0.314602, avg_loss: 1.725295
595
+ 002310/005400, loss: 0.204679, avg_loss: 1.722112
596
+ 002315/005400, loss: 0.288287, avg_loss: 1.718930
597
+ 002320/005400, loss: 0.245926, avg_loss: 1.715852
598
+ 002325/005400, loss: 0.204663, avg_loss: 1.712662
599
+ 002330/005400, loss: 0.215070, avg_loss: 1.709556
600
+ 002335/005400, loss: 0.190882, avg_loss: 1.706442
601
+ 002340/005400, loss: 0.224660, avg_loss: 1.703429
602
+ ***** Running dev evaluation *****
603
+ Num examples = 1500
604
+ Instantaneous batch size per device = 32
605
+ epoch 12, step 2340/5400: {'pearson': 0.8415414818553372, 'spearmanr': 0.8425621296013649}
606
+ 002345/005400, loss: 0.207369, avg_loss: 1.700278
607
+ 002350/005400, loss: 0.261497, avg_loss: 1.697250
608
+ 002355/005400, loss: 0.230280, avg_loss: 1.694103
609
+ 002360/005400, loss: 0.262285, avg_loss: 1.690920
610
+ 002365/005400, loss: 0.151266, avg_loss: 1.687904
611
+ 002370/005400, loss: 0.269719, avg_loss: 1.684892
612
+ 002375/005400, loss: 0.354083, avg_loss: 1.681934
613
+ 002380/005400, loss: 0.237291, avg_loss: 1.678996
614
+ 002385/005400, loss: 0.186130, avg_loss: 1.676010
615
+ 002390/005400, loss: 0.260663, avg_loss: 1.673000
616
+ 002395/005400, loss: 0.203245, avg_loss: 1.669989
617
+ 002400/005400, loss: 0.309466, avg_loss: 1.667078
618
+ 002405/005400, loss: 0.167727, avg_loss: 1.664065
619
+ 002410/005400, loss: 0.180444, avg_loss: 1.661110
620
+ 002415/005400, loss: 0.205075, avg_loss: 1.658129
621
+ 002420/005400, loss: 0.251971, avg_loss: 1.655157
622
+ 002425/005400, loss: 0.503691, avg_loss: 1.652340
623
+ 002430/005400, loss: 0.361796, avg_loss: 1.649719
624
+ 002435/005400, loss: 0.220655, avg_loss: 1.646866
625
+ 002440/005400, loss: 0.364590, avg_loss: 1.644123
626
+ 002445/005400, loss: 0.387156, avg_loss: 1.641263
627
+ 002450/005400, loss: 0.321079, avg_loss: 1.638517
628
+ 002455/005400, loss: 0.165761, avg_loss: 1.635770
629
+ 002460/005400, loss: 0.270390, avg_loss: 1.632963
630
+ 002465/005400, loss: 0.202102, avg_loss: 1.630213
631
+ 002470/005400, loss: 0.162662, avg_loss: 1.627334
632
+ 002475/005400, loss: 0.141903, avg_loss: 1.624407
633
+ 002480/005400, loss: 0.130965, avg_loss: 1.621656
634
+ 002485/005400, loss: 0.185001, avg_loss: 1.618813
635
+ 002490/005400, loss: 0.237992, avg_loss: 1.616033
636
+ 002495/005400, loss: 0.158510, avg_loss: 1.613217
637
+ 002500/005400, loss: 0.259753, avg_loss: 1.610477
638
+ 002505/005400, loss: 0.108687, avg_loss: 1.607702
639
+ 002510/005400, loss: 0.179495, avg_loss: 1.604972
640
+ 002515/005400, loss: 0.267883, avg_loss: 1.602195
641
+ 002520/005400, loss: 0.205575, avg_loss: 1.599474
642
+ ***** Running dev evaluation *****
643
+ Num examples = 1500
644
+ Instantaneous batch size per device = 32
645
+ epoch 13, step 2520/5400: {'pearson': 0.8425599117367437, 'spearmanr': 0.8414850205786223}
646
+ 002525/005400, loss: 0.199653, avg_loss: 1.596711
647
+ 002530/005400, loss: 0.201341, avg_loss: 1.593993
648
+ 002535/005400, loss: 0.203724, avg_loss: 1.591213
649
+ 002540/005400, loss: 0.254623, avg_loss: 1.588562
650
+ 002545/005400, loss: 0.369073, avg_loss: 1.585980
651
+ 002550/005400, loss: 0.106891, avg_loss: 1.583355
652
+ 002555/005400, loss: 0.136818, avg_loss: 1.580702
653
+ 002560/005400, loss: 0.231878, avg_loss: 1.577973
654
+ 002565/005400, loss: 0.156474, avg_loss: 1.575269
655
+ 002570/005400, loss: 0.236511, avg_loss: 1.572622
656
+ 002575/005400, loss: 0.257811, avg_loss: 1.570007
657
+ 002580/005400, loss: 0.468576, avg_loss: 1.567428
658
+ 002585/005400, loss: 0.163139, avg_loss: 1.564778
659
+ 002590/005400, loss: 0.436930, avg_loss: 1.562216
660
+ 002595/005400, loss: 0.196596, avg_loss: 1.559604
661
+ 002600/005400, loss: 0.232763, avg_loss: 1.557100
662
+ 002605/005400, loss: 0.164102, avg_loss: 1.554545
663
+ 002610/005400, loss: 0.258984, avg_loss: 1.551967
664
+ 002615/005400, loss: 0.188581, avg_loss: 1.549408
665
+ 002620/005400, loss: 0.215384, avg_loss: 1.546768
666
+ 002625/005400, loss: 0.165978, avg_loss: 1.544174
667
+ 002630/005400, loss: 0.254275, avg_loss: 1.541621
668
+ 002635/005400, loss: 0.260447, avg_loss: 1.539074
669
+ 002640/005400, loss: 0.257019, avg_loss: 1.536569
670
+ 002645/005400, loss: 0.304152, avg_loss: 1.534171
671
+ 002650/005400, loss: 0.172311, avg_loss: 1.531694
672
+ 002655/005400, loss: 0.217652, avg_loss: 1.529228
673
+ 002660/005400, loss: 0.431580, avg_loss: 1.526855
674
+ 002665/005400, loss: 0.342930, avg_loss: 1.524416
675
+ 002670/005400, loss: 0.281481, avg_loss: 1.521985
676
+ 002675/005400, loss: 0.115055, avg_loss: 1.519483
677
+ 002680/005400, loss: 0.190243, avg_loss: 1.517189
678
+ 002685/005400, loss: 0.173296, avg_loss: 1.514757
679
+ 002690/005400, loss: 0.374071, avg_loss: 1.512512
680
+ 002695/005400, loss: 0.322947, avg_loss: 1.510223
681
+ 002700/005400, loss: 0.133452, avg_loss: 1.507823
682
+ ***** Running dev evaluation *****
683
+ Num examples = 1500
684
+ Instantaneous batch size per device = 32
685
+ epoch 14, step 2700/5400: {'pearson': 0.8428262938537643, 'spearmanr': 0.8418967117492774}
686
+ 002705/005400, loss: 0.165615, avg_loss: 1.505401
687
+ 002710/005400, loss: 0.191277, avg_loss: 1.503067
688
+ 002715/005400, loss: 0.186724, avg_loss: 1.500670
689
+ 002720/005400, loss: 0.166687, avg_loss: 1.498308
690
+ 002725/005400, loss: 0.173368, avg_loss: 1.495943
691
+ 002730/005400, loss: 0.182292, avg_loss: 1.493571
692
+ 002735/005400, loss: 0.094817, avg_loss: 1.491132
693
+ 002740/005400, loss: 0.151966, avg_loss: 1.488704
694
+ 002745/005400, loss: 0.118933, avg_loss: 1.486331
695
+ 002750/005400, loss: 0.150439, avg_loss: 1.484025
696
+ 002755/005400, loss: 0.220458, avg_loss: 1.481835
697
+ 002760/005400, loss: 0.165892, avg_loss: 1.479519
698
+ 002765/005400, loss: 0.226839, avg_loss: 1.477155
699
+ 002770/005400, loss: 0.181736, avg_loss: 1.474844
700
+ 002775/005400, loss: 0.103294, avg_loss: 1.472493
701
+ 002780/005400, loss: 0.152098, avg_loss: 1.470169
702
+ 002785/005400, loss: 0.210727, avg_loss: 1.467948
703
+ 002790/005400, loss: 0.218008, avg_loss: 1.465678
704
+ 002795/005400, loss: 0.303881, avg_loss: 1.463492
705
+ 002800/005400, loss: 0.149363, avg_loss: 1.461267
706
+ 002805/005400, loss: 0.278521, avg_loss: 1.459031
707
+ 002810/005400, loss: 0.177459, avg_loss: 1.456765
708
+ 002815/005400, loss: 0.147072, avg_loss: 1.454549
709
+ 002820/005400, loss: 0.154193, avg_loss: 1.452240
710
+ 002825/005400, loss: 0.118995, avg_loss: 1.450022
711
+ 002830/005400, loss: 0.306946, avg_loss: 1.447801
712
+ 002835/005400, loss: 0.203090, avg_loss: 1.445593
713
+ 002840/005400, loss: 0.196348, avg_loss: 1.443464
714
+ 002845/005400, loss: 0.113525, avg_loss: 1.441222
715
+ 002850/005400, loss: 0.305031, avg_loss: 1.439138
716
+ 002855/005400, loss: 0.179518, avg_loss: 1.436929
717
+ 002860/005400, loss: 0.317867, avg_loss: 1.434791
718
+ 002865/005400, loss: 0.244391, avg_loss: 1.432654
719
+ 002870/005400, loss: 0.201873, avg_loss: 1.430598
720
+ 002875/005400, loss: 0.332513, avg_loss: 1.428486
721
+ 002880/005400, loss: 0.174545, avg_loss: 1.426279
722
+ ***** Running dev evaluation *****
723
+ Num examples = 1500
724
+ Instantaneous batch size per device = 32
725
+ epoch 15, step 2880/5400: {'pearson': 0.8465462185651544, 'spearmanr': 0.8451574856196069}
726
+ 002885/005400, loss: 0.140292, avg_loss: 1.424148
727
+ 002890/005400, loss: 0.180590, avg_loss: 1.422041
728
+ 002895/005400, loss: 0.276235, avg_loss: 1.419968
729
+ 002900/005400, loss: 0.079708, avg_loss: 1.417818
730
+ 002905/005400, loss: 0.178860, avg_loss: 1.415680
731
+ 002910/005400, loss: 0.191974, avg_loss: 1.413542
732
+ 002915/005400, loss: 0.160231, avg_loss: 1.411524
733
+ 002920/005400, loss: 0.179065, avg_loss: 1.409382
734
+ 002925/005400, loss: 0.261529, avg_loss: 1.407299
735
+ 002930/005400, loss: 0.196875, avg_loss: 1.405278
736
+ 002935/005400, loss: 0.172792, avg_loss: 1.403193
737
+ 002940/005400, loss: 0.132129, avg_loss: 1.401091
738
+ 002945/005400, loss: 0.143233, avg_loss: 1.398991
739
+ 002950/005400, loss: 0.098005, avg_loss: 1.396972
740
+ 002955/005400, loss: 0.216378, avg_loss: 1.394936
741
+ 002960/005400, loss: 0.168641, avg_loss: 1.392847
742
+ 002965/005400, loss: 0.200968, avg_loss: 1.390786
743
+ 002970/005400, loss: 0.125896, avg_loss: 1.388788
744
+ 002975/005400, loss: 0.244486, avg_loss: 1.386788
745
+ 002980/005400, loss: 0.157024, avg_loss: 1.384753
746
+ 002985/005400, loss: 0.131733, avg_loss: 1.382739
747
+ 002990/005400, loss: 0.180723, avg_loss: 1.380701
748
+ 002995/005400, loss: 0.213533, avg_loss: 1.378717
749
+ 003000/005400, loss: 0.149431, avg_loss: 1.376713
750
+ 003005/005400, loss: 0.145573, avg_loss: 1.374738
751
+ 003010/005400, loss: 0.142425, avg_loss: 1.372738
752
+ 003015/005400, loss: 0.273710, avg_loss: 1.370737
753
+ 003020/005400, loss: 0.164532, avg_loss: 1.368793
754
+ 003025/005400, loss: 0.354658, avg_loss: 1.366944
755
+ 003030/005400, loss: 0.162812, avg_loss: 1.365036
756
+ 003035/005400, loss: 0.225085, avg_loss: 1.363150
757
+ 003040/005400, loss: 0.237793, avg_loss: 1.361249
758
+ 003045/005400, loss: 0.175477, avg_loss: 1.359304
759
+ 003050/005400, loss: 0.220884, avg_loss: 1.357379
760
+ 003055/005400, loss: 0.116397, avg_loss: 1.355440
761
+ 003060/005400, loss: 0.180262, avg_loss: 1.353549
762
+ ***** Running dev evaluation *****
763
+ Num examples = 1500
764
+ Instantaneous batch size per device = 32
765
+ epoch 16, step 3060/5400: {'pearson': 0.8475945534372652, 'spearmanr': 0.8462737598699491}
766
+ 003065/005400, loss: 0.208348, avg_loss: 1.351671
767
+ 003070/005400, loss: 0.162787, avg_loss: 1.349760
768
+ 003075/005400, loss: 0.204459, avg_loss: 1.347873
769
+ 003080/005400, loss: 0.243172, avg_loss: 1.346001
770
+ 003085/005400, loss: 0.105318, avg_loss: 1.344059
771
+ 003090/005400, loss: 0.143131, avg_loss: 1.342206
772
+ 003095/005400, loss: 0.170449, avg_loss: 1.340305
773
+ 003100/005400, loss: 0.208828, avg_loss: 1.338421
774
+ 003105/005400, loss: 0.186506, avg_loss: 1.336552
775
+ 003110/005400, loss: 0.138573, avg_loss: 1.334692
776
+ 003115/005400, loss: 0.199446, avg_loss: 1.332886
777
+ 003120/005400, loss: 0.178179, avg_loss: 1.331061
778
+ 003125/005400, loss: 0.158329, avg_loss: 1.329155
779
+ 003130/005400, loss: 0.132958, avg_loss: 1.327291
780
+ 003135/005400, loss: 0.117738, avg_loss: 1.325437
781
+ 003140/005400, loss: 0.187024, avg_loss: 1.323590
782
+ 003145/005400, loss: 0.285563, avg_loss: 1.321791
783
+ 003150/005400, loss: 0.126655, avg_loss: 1.320009
784
+ 003155/005400, loss: 0.246144, avg_loss: 1.318180
785
+ 003160/005400, loss: 0.222086, avg_loss: 1.316403
786
+ 003165/005400, loss: 0.088263, avg_loss: 1.314602
787
+ 003170/005400, loss: 0.159250, avg_loss: 1.312750
788
+ 003175/005400, loss: 0.232737, avg_loss: 1.311048
789
+ 003180/005400, loss: 0.150258, avg_loss: 1.309249
790
+ 003185/005400, loss: 0.149525, avg_loss: 1.307465
791
+ 003190/005400, loss: 0.175701, avg_loss: 1.305661
792
+ 003195/005400, loss: 0.224868, avg_loss: 1.303942
793
+ 003200/005400, loss: 0.151383, avg_loss: 1.302172
794
+ 003205/005400, loss: 0.216179, avg_loss: 1.300442
795
+ 003210/005400, loss: 0.197382, avg_loss: 1.298647
796
+ 003215/005400, loss: 0.174374, avg_loss: 1.296861
797
+ 003220/005400, loss: 0.146824, avg_loss: 1.295138
798
+ 003225/005400, loss: 0.172476, avg_loss: 1.293391
799
+ 003230/005400, loss: 0.180328, avg_loss: 1.291636
800
+ 003235/005400, loss: 0.219937, avg_loss: 1.289898
801
+ 003240/005400, loss: 0.152960, avg_loss: 1.288163
802
+ ***** Running dev evaluation *****
803
+ Num examples = 1500
804
+ Instantaneous batch size per device = 32
805
+ epoch 17, step 3240/5400: {'pearson': 0.8504202206275068, 'spearmanr': 0.8473922892792047}
806
+ 003245/005400, loss: 0.171524, avg_loss: 1.286479
807
+ 003250/005400, loss: 0.116338, avg_loss: 1.284752
808
+ 003255/005400, loss: 0.086406, avg_loss: 1.283072
809
+ 003260/005400, loss: 0.150628, avg_loss: 1.281353
810
+ 003265/005400, loss: 0.139414, avg_loss: 1.279617
811
+ 003270/005400, loss: 0.193610, avg_loss: 1.277939
812
+ 003275/005400, loss: 0.235554, avg_loss: 1.276249
813
+ 003280/005400, loss: 0.166258, avg_loss: 1.274573
814
+ 003285/005400, loss: 0.263752, avg_loss: 1.272963
815
+ 003290/005400, loss: 0.303736, avg_loss: 1.271314
816
+ 003295/005400, loss: 0.119213, avg_loss: 1.269609
817
+ 003300/005400, loss: 0.132104, avg_loss: 1.267901
818
+ 003305/005400, loss: 0.143845, avg_loss: 1.266212
819
+ 003310/005400, loss: 0.115098, avg_loss: 1.264532
820
+ 003315/005400, loss: 0.288430, avg_loss: 1.262901
821
+ 003320/005400, loss: 0.173986, avg_loss: 1.261220
822
+ 003325/005400, loss: 0.120085, avg_loss: 1.259552
823
+ 003330/005400, loss: 0.248743, avg_loss: 1.257920
824
+ 003335/005400, loss: 0.139627, avg_loss: 1.256220
825
+ 003340/005400, loss: 0.147467, avg_loss: 1.254561
826
+ 003345/005400, loss: 0.142301, avg_loss: 1.252920
827
+ 003350/005400, loss: 0.156088, avg_loss: 1.251271
828
+ 003355/005400, loss: 0.151669, avg_loss: 1.249613
829
+ 003360/005400, loss: 0.214872, avg_loss: 1.248012
830
+ 003365/005400, loss: 0.198525, avg_loss: 1.246435
831
+ 003370/005400, loss: 0.088710, avg_loss: 1.244759
832
+ 003375/005400, loss: 0.120682, avg_loss: 1.243157
833
+ 003380/005400, loss: 0.180583, avg_loss: 1.241588
834
+ 003385/005400, loss: 0.228067, avg_loss: 1.240034
835
+ 003390/005400, loss: 0.126767, avg_loss: 1.238442
836
+ 003395/005400, loss: 0.125910, avg_loss: 1.236902
837
+ 003400/005400, loss: 0.139716, avg_loss: 1.235308
838
+ 003405/005400, loss: 0.080612, avg_loss: 1.233692
839
+ 003410/005400, loss: 0.212925, avg_loss: 1.232123
840
+ 003415/005400, loss: 0.131897, avg_loss: 1.230545
841
+ 003420/005400, loss: 0.205202, avg_loss: 1.228983
842
+ ***** Running dev evaluation *****
843
+ Num examples = 1500
844
+ Instantaneous batch size per device = 32
845
+ epoch 18, step 3420/5400: {'pearson': 0.8498450703665391, 'spearmanr': 0.8479951774929629}
846
+ 003425/005400, loss: 0.122507, avg_loss: 1.227378
847
+ 003430/005400, loss: 0.250203, avg_loss: 1.225830
848
+ 003435/005400, loss: 0.173522, avg_loss: 1.224317
849
+ 003440/005400, loss: 0.087732, avg_loss: 1.222770
850
+ 003445/005400, loss: 0.154733, avg_loss: 1.221229
851
+ 003450/005400, loss: 0.217941, avg_loss: 1.219678
852
+ 003455/005400, loss: 0.137303, avg_loss: 1.218170
853
+ 003460/005400, loss: 0.112234, avg_loss: 1.216591
854
+ 003465/005400, loss: 0.150905, avg_loss: 1.215047
855
+ 003470/005400, loss: 0.158825, avg_loss: 1.213517
856
+ 003475/005400, loss: 0.173023, avg_loss: 1.212032
857
+ 003480/005400, loss: 0.178021, avg_loss: 1.210536
858
+ 003485/005400, loss: 0.247019, avg_loss: 1.209080
859
+ 003490/005400, loss: 0.072551, avg_loss: 1.207569
860
+ 003495/005400, loss: 0.162839, avg_loss: 1.206022
861
+ 003500/005400, loss: 0.189042, avg_loss: 1.204516
862
+ 003505/005400, loss: 0.173782, avg_loss: 1.203007
863
+ 003510/005400, loss: 0.138777, avg_loss: 1.201515
864
+ 003515/005400, loss: 0.177656, avg_loss: 1.200013
865
+ 003520/005400, loss: 0.103750, avg_loss: 1.198508
866
+ 003525/005400, loss: 0.169574, avg_loss: 1.197020
867
+ 003530/005400, loss: 0.119396, avg_loss: 1.195545
868
+ 003535/005400, loss: 0.264826, avg_loss: 1.194100
869
+ 003540/005400, loss: 0.098011, avg_loss: 1.192637
870
+ 003545/005400, loss: 0.088810, avg_loss: 1.191125
871
+ 003550/005400, loss: 0.107876, avg_loss: 1.189654
872
+ 003555/005400, loss: 0.157520, avg_loss: 1.188192
873
+ 003560/005400, loss: 0.176217, avg_loss: 1.186812
874
+ 003565/005400, loss: 0.111337, avg_loss: 1.185342
875
+ 003570/005400, loss: 0.166201, avg_loss: 1.183889
876
+ 003575/005400, loss: 0.171814, avg_loss: 1.182409
877
+ 003580/005400, loss: 0.112979, avg_loss: 1.181004
878
+ 003585/005400, loss: 0.119157, avg_loss: 1.179598
879
+ 003590/005400, loss: 0.114437, avg_loss: 1.178189
880
+ 003595/005400, loss: 0.155447, avg_loss: 1.176771
881
+ 003600/005400, loss: 0.157078, avg_loss: 1.175317
882
+ ***** Running dev evaluation *****
883
+ Num examples = 1500
884
+ Instantaneous batch size per device = 32
885
+ epoch 19, step 3600/5400: {'pearson': 0.8482436057295935, 'spearmanr': 0.8472426908693901}
886
+ 003605/005400, loss: 0.154441, avg_loss: 1.173877
887
+ 003610/005400, loss: 0.100947, avg_loss: 1.172478
888
+ 003615/005400, loss: 0.125365, avg_loss: 1.171029
889
+ 003620/005400, loss: 0.106434, avg_loss: 1.169605
890
+ 003625/005400, loss: 0.130245, avg_loss: 1.168211
891
+ 003630/005400, loss: 0.134600, avg_loss: 1.166787
892
+ 003635/005400, loss: 0.266648, avg_loss: 1.165400
893
+ 003640/005400, loss: 0.144939, avg_loss: 1.164021
894
+ 003645/005400, loss: 0.106222, avg_loss: 1.162577
895
+ 003650/005400, loss: 0.117357, avg_loss: 1.161193
896
+ 003655/005400, loss: 0.202359, avg_loss: 1.159805
897
+ 003660/005400, loss: 0.166776, avg_loss: 1.158439
898
+ 003665/005400, loss: 0.107025, avg_loss: 1.157045
899
+ 003670/005400, loss: 0.143284, avg_loss: 1.155661
900
+ 003675/005400, loss: 0.198224, avg_loss: 1.154297
901
+ 003680/005400, loss: 0.280506, avg_loss: 1.152964
902
+ 003685/005400, loss: 0.130698, avg_loss: 1.151564
903
+ 003690/005400, loss: 0.129304, avg_loss: 1.150198
904
+ 003695/005400, loss: 0.137243, avg_loss: 1.148803
905
+ 003700/005400, loss: 0.097097, avg_loss: 1.147449
906
+ 003705/005400, loss: 0.144787, avg_loss: 1.146119
907
+ 003710/005400, loss: 0.127824, avg_loss: 1.144796
908
+ 003715/005400, loss: 0.176846, avg_loss: 1.143457
909
+ 003720/005400, loss: 0.100565, avg_loss: 1.142128
910
+ 003725/005400, loss: 0.080043, avg_loss: 1.140760
911
+ 003730/005400, loss: 0.125706, avg_loss: 1.139474
912
+ 003735/005400, loss: 0.117341, avg_loss: 1.138159
913
+ 003740/005400, loss: 0.158067, avg_loss: 1.136843
914
+ 003745/005400, loss: 0.151995, avg_loss: 1.135553
915
+ 003750/005400, loss: 0.277281, avg_loss: 1.134297
916
+ 003755/005400, loss: 0.133230, avg_loss: 1.132962
917
+ 003760/005400, loss: 0.186799, avg_loss: 1.131718
918
+ 003765/005400, loss: 0.205163, avg_loss: 1.130425
919
+ 003770/005400, loss: 0.157280, avg_loss: 1.129118
920
+ 003775/005400, loss: 0.250720, avg_loss: 1.127838
921
+ 003780/005400, loss: 0.138770, avg_loss: 1.126563
922
+ ***** Running dev evaluation *****
923
+ Num examples = 1500
924
+ Instantaneous batch size per device = 32
925
+ epoch 20, step 3780/5400: {'pearson': 0.8516633883376111, 'spearmanr': 0.848796837026541}
926
+ 003785/005400, loss: 0.280053, avg_loss: 1.125306
927
+ 003790/005400, loss: 0.119360, avg_loss: 1.124036
928
+ 003795/005400, loss: 0.150453, avg_loss: 1.122750
929
+ 003800/005400, loss: 0.150021, avg_loss: 1.121459
930
+ 003805/005400, loss: 0.077378, avg_loss: 1.120170
931
+ 003810/005400, loss: 0.148403, avg_loss: 1.118906
932
+ 003815/005400, loss: 0.178699, avg_loss: 1.117645
933
+ 003820/005400, loss: 0.149582, avg_loss: 1.116330
934
+ 003825/005400, loss: 0.128546, avg_loss: 1.115013
935
+ 003830/005400, loss: 0.268229, avg_loss: 1.113776
936
+ 003835/005400, loss: 0.195517, avg_loss: 1.112531
937
+ 003840/005400, loss: 0.208493, avg_loss: 1.111268
938
+ 003845/005400, loss: 0.193140, avg_loss: 1.110029
939
+ 003850/005400, loss: 0.088294, avg_loss: 1.108804
940
+ 003855/005400, loss: 0.149382, avg_loss: 1.107547
941
+ 003860/005400, loss: 0.198664, avg_loss: 1.106283
942
+ 003865/005400, loss: 0.126898, avg_loss: 1.104976
943
+ 003870/005400, loss: 0.129632, avg_loss: 1.103741
944
+ 003875/005400, loss: 0.123535, avg_loss: 1.102518
945
+ 003880/005400, loss: 0.165960, avg_loss: 1.101254
946
+ 003885/005400, loss: 0.138942, avg_loss: 1.100020
947
+ 003890/005400, loss: 0.128230, avg_loss: 1.098769
948
+ 003895/005400, loss: 0.104971, avg_loss: 1.097568
949
+ 003900/005400, loss: 0.085618, avg_loss: 1.096347
950
+ 003905/005400, loss: 0.126211, avg_loss: 1.095101
951
+ 003910/005400, loss: 0.172208, avg_loss: 1.093876
952
+ 003915/005400, loss: 0.134293, avg_loss: 1.092629
953
+ 003920/005400, loss: 0.130413, avg_loss: 1.091402
954
+ 003925/005400, loss: 0.126139, avg_loss: 1.090250
955
+ 003930/005400, loss: 0.133957, avg_loss: 1.089023
956
+ 003935/005400, loss: 0.235973, avg_loss: 1.087812
957
+ 003940/005400, loss: 0.145638, avg_loss: 1.086616
958
+ 003945/005400, loss: 0.101992, avg_loss: 1.085413
959
+ 003950/005400, loss: 0.126402, avg_loss: 1.084220
960
+ 003955/005400, loss: 0.117492, avg_loss: 1.083047
961
+ 003960/005400, loss: 0.130239, avg_loss: 1.081839
962
+ ***** Running dev evaluation *****
963
+ Num examples = 1500
964
+ Instantaneous batch size per device = 32
965
+ epoch 21, step 3960/5400: {'pearson': 0.847780716202824, 'spearmanr': 0.8469865580881132}
966
+ 003965/005400, loss: 0.064479, avg_loss: 1.080611
967
+ 003970/005400, loss: 0.171006, avg_loss: 1.079406
968
+ 003975/005400, loss: 0.085861, avg_loss: 1.078165
969
+ 003980/005400, loss: 0.095522, avg_loss: 1.076947
970
+ 003985/005400, loss: 0.130852, avg_loss: 1.075774
971
+ 003990/005400, loss: 0.134866, avg_loss: 1.074596
972
+ 003995/005400, loss: 0.074542, avg_loss: 1.073402
973
+ 004000/005400, loss: 0.116856, avg_loss: 1.072228
974
+ 004005/005400, loss: 0.105077, avg_loss: 1.071026
975
+ 004010/005400, loss: 0.125664, avg_loss: 1.069845
976
+ 004015/005400, loss: 0.103024, avg_loss: 1.068720
977
+ 004020/005400, loss: 0.128571, avg_loss: 1.067519
978
+ 004025/005400, loss: 0.112454, avg_loss: 1.066343
979
+ 004030/005400, loss: 0.150399, avg_loss: 1.065202
980
+ 004035/005400, loss: 0.073474, avg_loss: 1.064020
981
+ 004040/005400, loss: 0.209692, avg_loss: 1.062895
982
+ 004045/005400, loss: 0.126410, avg_loss: 1.061723
983
+ 004050/005400, loss: 0.168801, avg_loss: 1.060549
984
+ 004055/005400, loss: 0.159003, avg_loss: 1.059423
985
+ 004060/005400, loss: 0.153256, avg_loss: 1.058304
986
+ 004065/005400, loss: 0.139507, avg_loss: 1.057153
987
+ 004070/005400, loss: 0.196264, avg_loss: 1.056009
988
+ 004075/005400, loss: 0.153815, avg_loss: 1.054860
989
+ 004080/005400, loss: 0.182606, avg_loss: 1.053734
990
+ 004085/005400, loss: 0.093651, avg_loss: 1.052585
991
+ 004090/005400, loss: 0.138306, avg_loss: 1.051489
992
+ 004095/005400, loss: 0.125193, avg_loss: 1.050385
993
+ 004100/005400, loss: 0.086516, avg_loss: 1.049260
994
+ 004105/005400, loss: 0.120107, avg_loss: 1.048157
995
+ 004110/005400, loss: 0.246864, avg_loss: 1.047057
996
+ 004115/005400, loss: 0.120596, avg_loss: 1.045902
997
+ 004120/005400, loss: 0.121840, avg_loss: 1.044833
998
+ 004125/005400, loss: 0.141377, avg_loss: 1.043755
999
+ 004130/005400, loss: 0.130236, avg_loss: 1.042661
1000
+ 004135/005400, loss: 0.077593, avg_loss: 1.041535
1001
+ 004140/005400, loss: 0.096709, avg_loss: 1.040430
1002
+ ***** Running dev evaluation *****
1003
+ Num examples = 1500
1004
+ Instantaneous batch size per device = 32
1005
+ epoch 22, step 4140/5400: {'pearson': 0.8516815294443599, 'spearmanr': 0.8481674736867748}
1006
+ 004145/005400, loss: 0.107884, avg_loss: 1.039325
1007
+ 004150/005400, loss: 0.163758, avg_loss: 1.038213
1008
+ 004155/005400, loss: 0.107559, avg_loss: 1.037071
1009
+ 004160/005400, loss: 0.221394, avg_loss: 1.035994
1010
+ 004165/005400, loss: 0.095282, avg_loss: 1.034890
1011
+ 004170/005400, loss: 0.115735, avg_loss: 1.033791
1012
+ 004175/005400, loss: 0.120850, avg_loss: 1.032702
1013
+ 004180/005400, loss: 0.148173, avg_loss: 1.031638
1014
+ 004185/005400, loss: 0.150222, avg_loss: 1.030549
1015
+ 004190/005400, loss: 0.167705, avg_loss: 1.029474
1016
+ 004195/005400, loss: 0.080327, avg_loss: 1.028393
1017
+ 004200/005400, loss: 0.163523, avg_loss: 1.027316
1018
+ 004205/005400, loss: 0.091747, avg_loss: 1.026259
1019
+ 004210/005400, loss: 0.146581, avg_loss: 1.025186
1020
+ 004215/005400, loss: 0.138113, avg_loss: 1.024116
1021
+ 004220/005400, loss: 0.126675, avg_loss: 1.023037
1022
+ 004225/005400, loss: 0.174576, avg_loss: 1.021997
1023
+ 004230/005400, loss: 0.192664, avg_loss: 1.020943
1024
+ 004235/005400, loss: 0.075478, avg_loss: 1.019836
1025
+ 004240/005400, loss: 0.152823, avg_loss: 1.018803
1026
+ 004245/005400, loss: 0.116004, avg_loss: 1.017760
1027
+ 004250/005400, loss: 0.151843, avg_loss: 1.016687
1028
+ 004255/005400, loss: 0.198972, avg_loss: 1.015644
1029
+ 004260/005400, loss: 0.158850, avg_loss: 1.014584
1030
+ 004265/005400, loss: 0.140898, avg_loss: 1.013570
1031
+ 004270/005400, loss: 0.102441, avg_loss: 1.012552
1032
+ 004275/005400, loss: 0.116065, avg_loss: 1.011494
1033
+ 004280/005400, loss: 0.093895, avg_loss: 1.010467
1034
+ 004285/005400, loss: 0.091400, avg_loss: 1.009428
1035
+ 004290/005400, loss: 0.135847, avg_loss: 1.008452
1036
+ 004295/005400, loss: 0.131350, avg_loss: 1.007404
1037
+ 004300/005400, loss: 0.086305, avg_loss: 1.006382
1038
+ 004305/005400, loss: 0.149123, avg_loss: 1.005382
1039
+ 004310/005400, loss: 0.077175, avg_loss: 1.004378
1040
+ 004315/005400, loss: 0.130131, avg_loss: 1.003319
1041
+ 004320/005400, loss: 0.081299, avg_loss: 1.002324
1042
+ ***** Running dev evaluation *****
1043
+ Num examples = 1500
1044
+ Instantaneous batch size per device = 32
1045
+ epoch 23, step 4320/5400: {'pearson': 0.8516975249826064, 'spearmanr': 0.848650349577711}
1046
+ 004325/005400, loss: 0.162849, avg_loss: 1.001306
1047
+ 004330/005400, loss: 0.106408, avg_loss: 1.000275
1048
+ 004335/005400, loss: 0.112816, avg_loss: 0.999257
1049
+ 004340/005400, loss: 0.117222, avg_loss: 0.998237
1050
+ 004345/005400, loss: 0.163939, avg_loss: 0.997231
1051
+ 004350/005400, loss: 0.132185, avg_loss: 0.996227
1052
+ 004355/005400, loss: 0.120796, avg_loss: 0.995204
1053
+ 004360/005400, loss: 0.110429, avg_loss: 0.994174
1054
+ 004365/005400, loss: 0.176232, avg_loss: 0.993202
1055
+ 004370/005400, loss: 0.108074, avg_loss: 0.992212
1056
+ 004375/005400, loss: 0.172169, avg_loss: 0.991219
1057
+ 004380/005400, loss: 0.115517, avg_loss: 0.990262
1058
+ 004385/005400, loss: 0.121775, avg_loss: 0.989272
1059
+ 004390/005400, loss: 0.126275, avg_loss: 0.988271
1060
+ 004395/005400, loss: 0.107515, avg_loss: 0.987280
1061
+ 004400/005400, loss: 0.086793, avg_loss: 0.986292
1062
+ 004405/005400, loss: 0.124593, avg_loss: 0.985330
1063
+ 004410/005400, loss: 0.132320, avg_loss: 0.984378
1064
+ 004415/005400, loss: 0.167460, avg_loss: 0.983421
1065
+ 004420/005400, loss: 0.143033, avg_loss: 0.982441
1066
+ 004425/005400, loss: 0.123850, avg_loss: 0.981486
1067
+ 004430/005400, loss: 0.095938, avg_loss: 0.980500
1068
+ 004435/005400, loss: 0.135725, avg_loss: 0.979532
1069
+ 004440/005400, loss: 0.081924, avg_loss: 0.978545
1070
+ 004445/005400, loss: 0.122266, avg_loss: 0.977577
1071
+ 004450/005400, loss: 0.101690, avg_loss: 0.976582
1072
+ 004455/005400, loss: 0.159788, avg_loss: 0.975642
1073
+ 004460/005400, loss: 0.173950, avg_loss: 0.974689
1074
+ 004465/005400, loss: 0.140697, avg_loss: 0.973738
1075
+ 004470/005400, loss: 0.118950, avg_loss: 0.972768
1076
+ 004475/005400, loss: 0.090251, avg_loss: 0.971827
1077
+ 004480/005400, loss: 0.142421, avg_loss: 0.970929
1078
+ 004485/005400, loss: 0.093378, avg_loss: 0.969989
1079
+ 004490/005400, loss: 0.132023, avg_loss: 0.969045
1080
+ 004495/005400, loss: 0.177814, avg_loss: 0.968117
1081
+ 004500/005400, loss: 0.098508, avg_loss: 0.967177
1082
+ ***** Running dev evaluation *****
1083
+ Num examples = 1500
1084
+ Instantaneous batch size per device = 32
1085
+ epoch 24, step 4500/5400: {'pearson': 0.8521285961729241, 'spearmanr': 0.8483649029133034}
1086
+ 004505/005400, loss: 0.110328, avg_loss: 0.966241
1087
+ 004510/005400, loss: 0.102069, avg_loss: 0.965301
1088
+ 004515/005400, loss: 0.162850, avg_loss: 0.964387
1089
+ 004520/005400, loss: 0.151388, avg_loss: 0.963471
1090
+ 004525/005400, loss: 0.087982, avg_loss: 0.962517
1091
+ 004530/005400, loss: 0.107709, avg_loss: 0.961596
1092
+ 004535/005400, loss: 0.093900, avg_loss: 0.960650
1093
+ 004540/005400, loss: 0.087082, avg_loss: 0.959718
1094
+ 004545/005400, loss: 0.125253, avg_loss: 0.958788
1095
+ 004550/005400, loss: 0.091174, avg_loss: 0.957888
1096
+ 004555/005400, loss: 0.064617, avg_loss: 0.956960
1097
+ 004560/005400, loss: 0.079881, avg_loss: 0.956014
1098
+ 004565/005400, loss: 0.103206, avg_loss: 0.955083
1099
+ 004570/005400, loss: 0.143321, avg_loss: 0.954173
1100
+ 004575/005400, loss: 0.149977, avg_loss: 0.953271
1101
+ 004580/005400, loss: 0.092622, avg_loss: 0.952352
1102
+ 004585/005400, loss: 0.126467, avg_loss: 0.951438
1103
+ 004590/005400, loss: 0.094483, avg_loss: 0.950528
1104
+ 004595/005400, loss: 0.107402, avg_loss: 0.949638
1105
+ 004600/005400, loss: 0.082824, avg_loss: 0.948713
1106
+ 004605/005400, loss: 0.195657, avg_loss: 0.947822
1107
+ 004610/005400, loss: 0.124441, avg_loss: 0.946935
1108
+ 004615/005400, loss: 0.121700, avg_loss: 0.946019
1109
+ 004620/005400, loss: 0.148236, avg_loss: 0.945115
1110
+ 004625/005400, loss: 0.140154, avg_loss: 0.944207
1111
+ 004630/005400, loss: 0.165997, avg_loss: 0.943339
1112
+ 004635/005400, loss: 0.098995, avg_loss: 0.942421
1113
+ 004640/005400, loss: 0.120260, avg_loss: 0.941555
1114
+ 004645/005400, loss: 0.125061, avg_loss: 0.940660
1115
+ 004650/005400, loss: 0.112413, avg_loss: 0.939759
1116
+ 004655/005400, loss: 0.104798, avg_loss: 0.938884
1117
+ 004660/005400, loss: 0.105972, avg_loss: 0.937982
1118
+ 004665/005400, loss: 0.137016, avg_loss: 0.937103
1119
+ 004670/005400, loss: 0.086489, avg_loss: 0.936211
1120
+ 004675/005400, loss: 0.130223, avg_loss: 0.935320
1121
+ 004680/005400, loss: 0.067240, avg_loss: 0.934422
1122
+ ***** Running dev evaluation *****
1123
+ Num examples = 1500
1124
+ Instantaneous batch size per device = 32
1125
+ epoch 25, step 4680/5400: {'pearson': 0.8527350758782244, 'spearmanr': 0.8507346588341773}
1126
+ 004685/005400, loss: 0.153279, avg_loss: 0.933544
1127
+ 004690/005400, loss: 0.084691, avg_loss: 0.932667
1128
+ 004695/005400, loss: 0.127423, avg_loss: 0.931791
1129
+ 004700/005400, loss: 0.123418, avg_loss: 0.930916
1130
+ 004705/005400, loss: 0.102127, avg_loss: 0.930041
1131
+ 004710/005400, loss: 0.094543, avg_loss: 0.929162
1132
+ 004715/005400, loss: 0.112821, avg_loss: 0.928298
1133
+ 004720/005400, loss: 0.094509, avg_loss: 0.927409
1134
+ 004725/005400, loss: 0.148458, avg_loss: 0.926568
1135
+ 004730/005400, loss: 0.084626, avg_loss: 0.925698
1136
+ 004735/005400, loss: 0.074723, avg_loss: 0.924825
1137
+ 004740/005400, loss: 0.101060, avg_loss: 0.923955
1138
+ 004745/005400, loss: 0.202314, avg_loss: 0.923109
1139
+ 004750/005400, loss: 0.055448, avg_loss: 0.922249
1140
+ 004755/005400, loss: 0.179162, avg_loss: 0.921398
1141
+ 004760/005400, loss: 0.109892, avg_loss: 0.920548
1142
+ 004765/005400, loss: 0.128364, avg_loss: 0.919729
1143
+ 004770/005400, loss: 0.064578, avg_loss: 0.918864
1144
+ 004775/005400, loss: 0.119190, avg_loss: 0.918013
1145
+ 004780/005400, loss: 0.106971, avg_loss: 0.917168
1146
+ 004785/005400, loss: 0.103674, avg_loss: 0.916293
1147
+ 004790/005400, loss: 0.086906, avg_loss: 0.915437
1148
+ 004795/005400, loss: 0.078381, avg_loss: 0.914585
1149
+ 004800/005400, loss: 0.075235, avg_loss: 0.913733
1150
+ 004805/005400, loss: 0.089770, avg_loss: 0.912904
1151
+ 004810/005400, loss: 0.104229, avg_loss: 0.912068
1152
+ 004815/005400, loss: 0.100126, avg_loss: 0.911218
1153
+ 004820/005400, loss: 0.118553, avg_loss: 0.910386
1154
+ 004825/005400, loss: 0.143384, avg_loss: 0.909569
1155
+ 004830/005400, loss: 0.102282, avg_loss: 0.908771
1156
+ 004835/005400, loss: 0.148549, avg_loss: 0.907961
1157
+ 004840/005400, loss: 0.182026, avg_loss: 0.907128
1158
+ 004845/005400, loss: 0.190450, avg_loss: 0.906318
1159
+ 004850/005400, loss: 0.206217, avg_loss: 0.905508
1160
+ 004855/005400, loss: 0.064875, avg_loss: 0.904663
1161
+ 004860/005400, loss: 0.099118, avg_loss: 0.903843
1162
+ ***** Running dev evaluation *****
1163
+ Num examples = 1500
1164
+ Instantaneous batch size per device = 32
1165
+ epoch 26, step 4860/5400: {'pearson': 0.8501907223027365, 'spearmanr': 0.8489084429386828}
1166
+ 004865/005400, loss: 0.155720, avg_loss: 0.903033
1167
+ 004870/005400, loss: 0.123857, avg_loss: 0.902210
1168
+ 004875/005400, loss: 0.106955, avg_loss: 0.901405
1169
+ 004880/005400, loss: 0.141843, avg_loss: 0.900608
1170
+ 004885/005400, loss: 0.101737, avg_loss: 0.899809
1171
+ 004890/005400, loss: 0.159319, avg_loss: 0.899006
1172
+ 004895/005400, loss: 0.095495, avg_loss: 0.898173
1173
+ 004900/005400, loss: 0.134695, avg_loss: 0.897373
1174
+ 004905/005400, loss: 0.062803, avg_loss: 0.896564
1175
+ 004910/005400, loss: 0.132602, avg_loss: 0.895749
1176
+ 004915/005400, loss: 0.117661, avg_loss: 0.894927
1177
+ 004920/005400, loss: 0.134668, avg_loss: 0.894128
1178
+ 004925/005400, loss: 0.089291, avg_loss: 0.893325
1179
+ 004930/005400, loss: 0.116079, avg_loss: 0.892549
1180
+ 004935/005400, loss: 0.092115, avg_loss: 0.891750
1181
+ 004940/005400, loss: 0.132650, avg_loss: 0.890975
1182
+ 004945/005400, loss: 0.062088, avg_loss: 0.890193
1183
+ 004950/005400, loss: 0.062359, avg_loss: 0.889396
1184
+ 004955/005400, loss: 0.086961, avg_loss: 0.888640
1185
+ 004960/005400, loss: 0.155230, avg_loss: 0.887873
1186
+ 004965/005400, loss: 0.110812, avg_loss: 0.887072
1187
+ 004970/005400, loss: 0.068260, avg_loss: 0.886263
1188
+ 004975/005400, loss: 0.156115, avg_loss: 0.885500
1189
+ 004980/005400, loss: 0.124095, avg_loss: 0.884712
1190
+ 004985/005400, loss: 0.126226, avg_loss: 0.883916
1191
+ 004990/005400, loss: 0.083915, avg_loss: 0.883123
1192
+ 004995/005400, loss: 0.083612, avg_loss: 0.882345
1193
+ 005000/005400, loss: 0.129824, avg_loss: 0.881565
1194
+ 005005/005400, loss: 0.131232, avg_loss: 0.880788
1195
+ 005010/005400, loss: 0.122785, avg_loss: 0.879998
1196
+ 005015/005400, loss: 0.103774, avg_loss: 0.879201
1197
+ 005020/005400, loss: 0.090597, avg_loss: 0.878425
1198
+ 005025/005400, loss: 0.084932, avg_loss: 0.877641
1199
+ 005030/005400, loss: 0.107362, avg_loss: 0.876879
1200
+ 005035/005400, loss: 0.127814, avg_loss: 0.876147
1201
+ 005040/005400, loss: 0.170438, avg_loss: 0.875395
1202
+ ***** Running dev evaluation *****
1203
+ Num examples = 1500
1204
+ Instantaneous batch size per device = 32
1205
+ epoch 27, step 5040/5400: {'pearson': 0.8554061134436448, 'spearmanr': 0.8524378109427393}
1206
+ 005045/005400, loss: 0.076105, avg_loss: 0.874619
1207
+ 005050/005400, loss: 0.103730, avg_loss: 0.873847
1208
+ 005055/005400, loss: 0.064459, avg_loss: 0.873081
1209
+ 005060/005400, loss: 0.112303, avg_loss: 0.872322
1210
+ 005065/005400, loss: 0.071940, avg_loss: 0.871558
1211
+ 005070/005400, loss: 0.088598, avg_loss: 0.870804
1212
+ 005075/005400, loss: 0.090358, avg_loss: 0.870038
1213
+ 005080/005400, loss: 0.068572, avg_loss: 0.869276
1214
+ 005085/005400, loss: 0.068552, avg_loss: 0.868498
1215
+ 005090/005400, loss: 0.140135, avg_loss: 0.867759
1216
+ 005095/005400, loss: 0.076569, avg_loss: 0.866984
1217
+ 005100/005400, loss: 0.098298, avg_loss: 0.866226
1218
+ 005105/005400, loss: 0.108042, avg_loss: 0.865460
1219
+ 005110/005400, loss: 0.072785, avg_loss: 0.864705
1220
+ 005115/005400, loss: 0.155046, avg_loss: 0.863964
1221
+ 005120/005400, loss: 0.225429, avg_loss: 0.863240
1222
+ 005125/005400, loss: 0.089123, avg_loss: 0.862489
1223
+ 005130/005400, loss: 0.062908, avg_loss: 0.861729
1224
+ 005135/005400, loss: 0.050400, avg_loss: 0.860973
1225
+ 005140/005400, loss: 0.051159, avg_loss: 0.860225
1226
+ 005145/005400, loss: 0.092017, avg_loss: 0.859488
1227
+ 005150/005400, loss: 0.119527, avg_loss: 0.858754
1228
+ 005155/005400, loss: 0.089991, avg_loss: 0.858025
1229
+ 005160/005400, loss: 0.093003, avg_loss: 0.857288
1230
+ 005165/005400, loss: 0.096349, avg_loss: 0.856544
1231
+ 005170/005400, loss: 0.070699, avg_loss: 0.855824
1232
+ 005175/005400, loss: 0.061141, avg_loss: 0.855089
1233
+ 005180/005400, loss: 0.117543, avg_loss: 0.854368
1234
+ 005185/005400, loss: 0.070801, avg_loss: 0.853643
1235
+ 005190/005400, loss: 0.110298, avg_loss: 0.852924
1236
+ 005195/005400, loss: 0.114286, avg_loss: 0.852209
1237
+ 005200/005400, loss: 0.129566, avg_loss: 0.851483
1238
+ 005205/005400, loss: 0.101796, avg_loss: 0.850774
1239
+ 005210/005400, loss: 0.125870, avg_loss: 0.850059
1240
+ 005215/005400, loss: 0.049415, avg_loss: 0.849329
1241
+ 005220/005400, loss: 0.107189, avg_loss: 0.848612
1242
+ ***** Running dev evaluation *****
1243
+ Num examples = 1500
1244
+ Instantaneous batch size per device = 32
1245
+ epoch 28, step 5220/5400: {'pearson': 0.8540466796613693, 'spearmanr': 0.850937622804088}
1246
+ 005225/005400, loss: 0.080942, avg_loss: 0.847872
1247
+ 005230/005400, loss: 0.102161, avg_loss: 0.847163
1248
+ 005235/005400, loss: 0.082529, avg_loss: 0.846440
1249
+ 005240/005400, loss: 0.105809, avg_loss: 0.845731
1250
+ 005245/005400, loss: 0.117093, avg_loss: 0.845006
1251
+ 005250/005400, loss: 0.106933, avg_loss: 0.844305
1252
+ 005255/005400, loss: 0.074675, avg_loss: 0.843584
1253
+ 005260/005400, loss: 0.102407, avg_loss: 0.842881
1254
+ 005265/005400, loss: 0.148522, avg_loss: 0.842175
1255
+ 005270/005400, loss: 0.087407, avg_loss: 0.841448
1256
+ 005275/005400, loss: 0.098112, avg_loss: 0.840739
1257
+ 005280/005400, loss: 0.092396, avg_loss: 0.840018
1258
+ 005285/005400, loss: 0.062919, avg_loss: 0.839300
1259
+ 005290/005400, loss: 0.132550, avg_loss: 0.838632
1260
+ 005295/005400, loss: 0.145091, avg_loss: 0.837944
1261
+ 005300/005400, loss: 0.118631, avg_loss: 0.837256
1262
+ 005305/005400, loss: 0.056487, avg_loss: 0.836545
1263
+ 005310/005400, loss: 0.103461, avg_loss: 0.835856
1264
+ 005315/005400, loss: 0.112280, avg_loss: 0.835183
1265
+ 005320/005400, loss: 0.037065, avg_loss: 0.834495
1266
+ 005325/005400, loss: 0.102541, avg_loss: 0.833812
1267
+ 005330/005400, loss: 0.052560, avg_loss: 0.833121
1268
+ 005335/005400, loss: 0.118150, avg_loss: 0.832437
1269
+ 005340/005400, loss: 0.093599, avg_loss: 0.831748
1270
+ 005345/005400, loss: 0.057692, avg_loss: 0.831051
1271
+ 005350/005400, loss: 0.083881, avg_loss: 0.830350
1272
+ 005355/005400, loss: 0.092801, avg_loss: 0.829662
1273
+ 005360/005400, loss: 0.109509, avg_loss: 0.828983
1274
+ 005365/005400, loss: 0.126566, avg_loss: 0.828295
1275
+ 005370/005400, loss: 0.090441, avg_loss: 0.827624
1276
+ 005375/005400, loss: 0.098362, avg_loss: 0.826956
1277
+ 005380/005400, loss: 0.086417, avg_loss: 0.826275
1278
+ 005385/005400, loss: 0.090084, avg_loss: 0.825580
1279
+ 005390/005400, loss: 0.089639, avg_loss: 0.824919
1280
+ 005395/005400, loss: 0.112607, avg_loss: 0.824232
1281
+ 005400/005400, loss: 0.079185, avg_loss: 0.823571
1282
+ ***** Running dev evaluation *****
1283
+ Num examples = 1500
1284
+ Instantaneous batch size per device = 32
1285
+ epoch 29, step 5400/5400: {'pearson': 0.8466150052031443, 'spearmanr': 0.845214209063919}
1286
+ ***** Running train evaluation *****
1287
+ Num examples = 5749
1288
+ Instantaneous batch size per device = 32
1289
+ Train Dataset Result: {'pearson': 0.9873720770623174, 'spearmanr': 0.987580466183614}
1290
+ ***** Running dev evaluation *****
1291
+ Num examples = 1500
1292
+ Instantaneous batch size per device = 32
1293
+ Dev Dataset Result: {'pearson': 0.8466150052031443, 'spearmanr': 0.845214209063919}
1294
+ Training time 0:04:24
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd2392d68ba15911e69c429109e06a5b13b3cae51981bd4cb24f0a2677f8603a
3
+ size 34298509
result.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'pearson': 0.21495300918671972, 'spearmanr': 0.18778433070729544}
2
+ {'pearson': 0.1693196142024497, 'spearmanr': 0.1520939753827761}
3
+ {'pearson': 0.5585231671416229, 'spearmanr': 0.5968823171253705}
4
+ {'pearson': 0.7538161883822286, 'spearmanr': 0.7339178388810693}
5
+ {'pearson': 0.8037387020413668, 'spearmanr': 0.8107612065966875}
6
+ {'pearson': 0.8115941618503355, 'spearmanr': 0.8282434089896973}
7
+ {'pearson': 0.8244669741341696, 'spearmanr': 0.8347289521968146}
8
+ {'pearson': 0.8356315632016451, 'spearmanr': 0.8428067774651329}
9
+ {'pearson': 0.840875635131036, 'spearmanr': 0.8391187190190564}
10
+ {'pearson': 0.8342714757320445, 'spearmanr': 0.8376185602281018}
11
+ {'pearson': 0.8390370712384592, 'spearmanr': 0.8380421225427299}
12
+ {'pearson': 0.8434982902424131, 'spearmanr': 0.8445651086908786}
13
+ {'pearson': 0.8415414818553372, 'spearmanr': 0.8425621296013649}
14
+ {'pearson': 0.8425599117367437, 'spearmanr': 0.8414850205786223}
15
+ {'pearson': 0.8428262938537643, 'spearmanr': 0.8418967117492774}
16
+ {'pearson': 0.8465462185651544, 'spearmanr': 0.8451574856196069}
17
+ {'pearson': 0.8475945534372652, 'spearmanr': 0.8462737598699491}
18
+ {'pearson': 0.8504202206275068, 'spearmanr': 0.8473922892792047}
19
+ {'pearson': 0.8498450703665391, 'spearmanr': 0.8479951774929629}
20
+ {'pearson': 0.8482436057295935, 'spearmanr': 0.8472426908693901}
21
+ {'pearson': 0.8516633883376111, 'spearmanr': 0.848796837026541}
22
+ {'pearson': 0.847780716202824, 'spearmanr': 0.8469865580881132}
23
+ {'pearson': 0.8516815294443599, 'spearmanr': 0.8481674736867748}
24
+ {'pearson': 0.8516975249826064, 'spearmanr': 0.848650349577711}
25
+ {'pearson': 0.8521285961729241, 'spearmanr': 0.8483649029133034}
26
+ {'pearson': 0.8527350758782244, 'spearmanr': 0.8507346588341773}
27
+ {'pearson': 0.8501907223027365, 'spearmanr': 0.8489084429386828}
28
+ {'pearson': 0.8554061134436448, 'spearmanr': 0.8524378109427393}
29
+ {'pearson': 0.8540466796613693, 'spearmanr': 0.850937622804088}
30
+ {'pearson': 0.8466150052031443, 'spearmanr': 0.845214209063919}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_basic_tokenize": true, "model_max_length": 512, "name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5", "never_split": null, "special_tokens_map_file": "/home.local/jianwei/.cache/huggingface/transformers/b680d52711d2451bbd6c6b1700365d6d731977c1357ae86bd7227f61145d3be2.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "tokenizer_class": "BertTokenizer"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff