Upload 8 files
Browse files- config.json +37 -0
- log_bs32_lr3e-05_20221124_035004_897265.txt +1294 -0
- pytorch_model.bin +3 -0
- result.txt +30 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- vocab.txt +0 -0
config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"embedding_size": 160,
|
| 9 |
+
"finetuning_task": "stsb",
|
| 10 |
+
"gradient_checkpointing": false,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 160,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "LABEL_0"
|
| 16 |
+
},
|
| 17 |
+
"initializer_range": 0.02,
|
| 18 |
+
"intermediate_size": 560,
|
| 19 |
+
"label2id": {
|
| 20 |
+
"LABEL_0": 0
|
| 21 |
+
},
|
| 22 |
+
"layer_norm_eps": 1e-12,
|
| 23 |
+
"max_position_embeddings": 512,
|
| 24 |
+
"model_type": "bert",
|
| 25 |
+
"num_attention_heads": 10,
|
| 26 |
+
"num_hidden_layers": 7,
|
| 27 |
+
"output_intermediate": true,
|
| 28 |
+
"output_past": true,
|
| 29 |
+
"pad_token_id": 0,
|
| 30 |
+
"position_embedding_type": "absolute",
|
| 31 |
+
"problem_type": "regression",
|
| 32 |
+
"torch_dtype": "float32",
|
| 33 |
+
"transformers_version": "4.17.0",
|
| 34 |
+
"type_vocab_size": 2,
|
| 35 |
+
"use_cache": true,
|
| 36 |
+
"vocab_size": 30522
|
| 37 |
+
}
|
log_bs32_lr3e-05_20221124_035004_897265.txt
ADDED
|
@@ -0,0 +1,1294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
------------> log file ==runs2/stsb/OUTPUT_ID/log_bs32_lr3e-05_20221124_035004_897265.txt
|
| 2 |
+
Namespace(aug_train=False, data_dir='/home.local/jianwei/datasets/nlp/glue_data/STS-B', do_eval=False, early_stop=False, early_stop_metric='accuracy', eval_step=120, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=0, lr_scheduler_type=<SchedulerType.CONSTANT_WITH_WARMUP: 'constant_with_warmup'>, max_length=128, max_train_steps=None, model_name_or_path='/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5', num_train_epochs=30, num_warmup_steps=0, output_dir='runs2/stsb/OUTPUT_ID', pad_to_max_length=False, per_device_eval_batch_size=32, per_device_train_batch_size=32, print_step=5, save_last=False, seed=None, task_name='stsb', train_file=None, use_slow_tokenizer=False, validation_file=None, weight_decay=0.0)
|
| 3 |
+
Distributed environment: NO
|
| 4 |
+
Num processes: 1
|
| 5 |
+
Process index: 0
|
| 6 |
+
Local process index: 0
|
| 7 |
+
Device: cuda
|
| 8 |
+
Mixed precision type: fp16
|
| 9 |
+
|
| 10 |
+
Sample 4674 of the training set: (tensor([ 101, 10079, 3629, 3102, 2048, 12632, 2336, 102, 10079, 4894,
|
| 11 |
+
8563, 2340, 12632, 2336, 102, 0, 0, 0, 0, 0,
|
| 12 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 13 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 14 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 15 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 16 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 17 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 18 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 19 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 20 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 21 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 22 |
+
0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 23 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 24 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 25 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 26 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 27 |
+
0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 28 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 29 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 30 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 31 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 32 |
+
0, 0, 0, 0, 0, 0, 0, 0]), tensor(3.)).
|
| 33 |
+
Sample 112 of the training set: (tensor([ 101, 1037, 2879, 2003, 9361, 1037, 21854, 11563, 1012, 102,
|
| 34 |
+
1037, 2879, 2003, 2559, 2012, 1037, 8094, 1012, 102, 0,
|
| 35 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 36 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 37 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 38 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 39 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 40 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 41 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 42 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 43 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 44 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 45 |
+
0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
| 46 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 47 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 48 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 49 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 50 |
+
0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
| 51 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 52 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 53 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 54 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 55 |
+
0, 0, 0, 0, 0, 0, 0, 0]), tensor(3.8000)).
|
| 56 |
+
Sample 4529 of the training set: (tensor([ 101, 3725, 3844, 2015, 2091, 4264, 2004, 3586, 6240, 9446,
|
| 57 |
+
6561, 2605, 102, 7327, 7767, 1005, 4340, 1005, 2000, 10663,
|
| 58 |
+
3586, 4168, 4017, 9446, 102, 0, 0, 0, 0, 0,
|
| 59 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 60 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 61 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 62 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 63 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 64 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 65 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 66 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 67 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 68 |
+
0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
| 69 |
+
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 70 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 71 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 72 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 73 |
+
0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
| 74 |
+
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 75 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 76 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 77 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 78 |
+
0, 0, 0, 0, 0, 0, 0, 0]), tensor(3.)).
|
| 79 |
+
***** Running training *****
|
| 80 |
+
Num examples = 5749
|
| 81 |
+
Num Epochs = 30
|
| 82 |
+
Instantaneous batch size per device = 32
|
| 83 |
+
Total train batch size (w. parallel, distributed & accumulation) = 32
|
| 84 |
+
Gradient Accumulation steps = 1
|
| 85 |
+
Total optimization steps = 5400
|
| 86 |
+
000005/005400, loss: 10.919802, avg_loss: 10.361012
|
| 87 |
+
000010/005400, loss: 10.181771, avg_loss: 9.698319
|
| 88 |
+
000015/005400, loss: 7.925676, avg_loss: 9.470798
|
| 89 |
+
000020/005400, loss: 9.417774, avg_loss: 9.406657
|
| 90 |
+
000025/005400, loss: 11.084503, avg_loss: 9.472598
|
| 91 |
+
000030/005400, loss: 7.033692, avg_loss: 9.447541
|
| 92 |
+
000035/005400, loss: 9.298050, avg_loss: 9.386552
|
| 93 |
+
000040/005400, loss: 8.342388, avg_loss: 9.386284
|
| 94 |
+
000045/005400, loss: 9.821406, avg_loss: 9.483932
|
| 95 |
+
000050/005400, loss: 9.257509, avg_loss: 9.586469
|
| 96 |
+
000055/005400, loss: 8.752683, avg_loss: 9.634466
|
| 97 |
+
000060/005400, loss: 6.560993, avg_loss: 9.559579
|
| 98 |
+
000065/005400, loss: 9.872775, avg_loss: 9.555094
|
| 99 |
+
000070/005400, loss: 9.549786, avg_loss: 9.570707
|
| 100 |
+
000075/005400, loss: 9.400767, avg_loss: 9.534591
|
| 101 |
+
000080/005400, loss: 9.152719, avg_loss: 9.532532
|
| 102 |
+
000085/005400, loss: 10.023327, avg_loss: 9.548860
|
| 103 |
+
000090/005400, loss: 8.150848, avg_loss: 9.539588
|
| 104 |
+
000095/005400, loss: 8.193304, avg_loss: 9.481100
|
| 105 |
+
000100/005400, loss: 8.688814, avg_loss: 9.431278
|
| 106 |
+
000105/005400, loss: 9.266927, avg_loss: 9.390692
|
| 107 |
+
000110/005400, loss: 7.621550, avg_loss: 9.346891
|
| 108 |
+
000115/005400, loss: 6.959364, avg_loss: 9.281569
|
| 109 |
+
000120/005400, loss: 9.679270, avg_loss: 9.291935
|
| 110 |
+
000125/005400, loss: 8.002371, avg_loss: 9.251488
|
| 111 |
+
000130/005400, loss: 8.983469, avg_loss: 9.210261
|
| 112 |
+
000135/005400, loss: 7.914767, avg_loss: 9.138828
|
| 113 |
+
000140/005400, loss: 9.368698, avg_loss: 9.096162
|
| 114 |
+
000145/005400, loss: 7.681985, avg_loss: 9.041475
|
| 115 |
+
000150/005400, loss: 7.530379, avg_loss: 8.976686
|
| 116 |
+
000155/005400, loss: 9.263411, avg_loss: 8.941676
|
| 117 |
+
000160/005400, loss: 7.710734, avg_loss: 8.895261
|
| 118 |
+
000165/005400, loss: 8.456438, avg_loss: 8.831314
|
| 119 |
+
000170/005400, loss: 6.155419, avg_loss: 8.772081
|
| 120 |
+
000175/005400, loss: 8.032525, avg_loss: 8.692823
|
| 121 |
+
000180/005400, loss: 4.489757, avg_loss: 8.613270
|
| 122 |
+
***** Running dev evaluation *****
|
| 123 |
+
Num examples = 1500
|
| 124 |
+
Instantaneous batch size per device = 32
|
| 125 |
+
epoch 0, step 180/5400: {'pearson': 0.21495300918671972, 'spearmanr': 0.18778433070729544}
|
| 126 |
+
000185/005400, loss: 7.743507, avg_loss: 8.577872
|
| 127 |
+
000190/005400, loss: 6.030101, avg_loss: 8.510907
|
| 128 |
+
000195/005400, loss: 3.536020, avg_loss: 8.447573
|
| 129 |
+
000200/005400, loss: 6.482443, avg_loss: 8.383874
|
| 130 |
+
000205/005400, loss: 7.495704, avg_loss: 8.331849
|
| 131 |
+
000210/005400, loss: 7.830889, avg_loss: 8.285714
|
| 132 |
+
000215/005400, loss: 7.473868, avg_loss: 8.219836
|
| 133 |
+
000220/005400, loss: 6.685350, avg_loss: 8.164707
|
| 134 |
+
000225/005400, loss: 4.961877, avg_loss: 8.111880
|
| 135 |
+
000230/005400, loss: 5.369789, avg_loss: 8.067034
|
| 136 |
+
000235/005400, loss: 4.157079, avg_loss: 8.007677
|
| 137 |
+
000240/005400, loss: 6.113519, avg_loss: 7.971732
|
| 138 |
+
000245/005400, loss: 4.965279, avg_loss: 7.912654
|
| 139 |
+
000250/005400, loss: 3.810572, avg_loss: 7.868684
|
| 140 |
+
000255/005400, loss: 5.212838, avg_loss: 7.827637
|
| 141 |
+
000260/005400, loss: 5.060454, avg_loss: 7.780213
|
| 142 |
+
000265/005400, loss: 3.830095, avg_loss: 7.720051
|
| 143 |
+
000270/005400, loss: 5.186792, avg_loss: 7.666411
|
| 144 |
+
000275/005400, loss: 5.622235, avg_loss: 7.623420
|
| 145 |
+
000280/005400, loss: 4.717276, avg_loss: 7.579897
|
| 146 |
+
000285/005400, loss: 4.819950, avg_loss: 7.529627
|
| 147 |
+
000290/005400, loss: 5.464397, avg_loss: 7.489963
|
| 148 |
+
000295/005400, loss: 5.470286, avg_loss: 7.442070
|
| 149 |
+
000300/005400, loss: 3.843780, avg_loss: 7.396591
|
| 150 |
+
000305/005400, loss: 3.396843, avg_loss: 7.349362
|
| 151 |
+
000310/005400, loss: 4.573213, avg_loss: 7.293336
|
| 152 |
+
000315/005400, loss: 4.345067, avg_loss: 7.247148
|
| 153 |
+
000320/005400, loss: 4.538530, avg_loss: 7.205413
|
| 154 |
+
000325/005400, loss: 3.374168, avg_loss: 7.165096
|
| 155 |
+
000330/005400, loss: 3.680195, avg_loss: 7.118695
|
| 156 |
+
000335/005400, loss: 3.798603, avg_loss: 7.071580
|
| 157 |
+
000340/005400, loss: 4.418723, avg_loss: 7.028340
|
| 158 |
+
000345/005400, loss: 2.651713, avg_loss: 6.979162
|
| 159 |
+
000350/005400, loss: 4.138247, avg_loss: 6.931522
|
| 160 |
+
000355/005400, loss: 4.034257, avg_loss: 6.891976
|
| 161 |
+
000360/005400, loss: 3.947625, avg_loss: 6.853448
|
| 162 |
+
***** Running dev evaluation *****
|
| 163 |
+
Num examples = 1500
|
| 164 |
+
Instantaneous batch size per device = 32
|
| 165 |
+
epoch 1, step 360/5400: {'pearson': 0.1693196142024497, 'spearmanr': 0.1520939753827761}
|
| 166 |
+
000365/005400, loss: 4.066084, avg_loss: 6.813658
|
| 167 |
+
000370/005400, loss: 2.446641, avg_loss: 6.765847
|
| 168 |
+
000375/005400, loss: 3.652923, avg_loss: 6.724291
|
| 169 |
+
000380/005400, loss: 2.925441, avg_loss: 6.683929
|
| 170 |
+
000385/005400, loss: 3.510277, avg_loss: 6.641427
|
| 171 |
+
000390/005400, loss: 3.712820, avg_loss: 6.597568
|
| 172 |
+
000395/005400, loss: 2.864999, avg_loss: 6.559540
|
| 173 |
+
000400/005400, loss: 2.363536, avg_loss: 6.515924
|
| 174 |
+
000405/005400, loss: 3.202157, avg_loss: 6.472551
|
| 175 |
+
000410/005400, loss: 2.507275, avg_loss: 6.427726
|
| 176 |
+
000415/005400, loss: 2.655454, avg_loss: 6.383974
|
| 177 |
+
000420/005400, loss: 3.361968, avg_loss: 6.343995
|
| 178 |
+
000425/005400, loss: 2.212350, avg_loss: 6.302114
|
| 179 |
+
000430/005400, loss: 2.654854, avg_loss: 6.260254
|
| 180 |
+
000435/005400, loss: 2.469006, avg_loss: 6.219799
|
| 181 |
+
000440/005400, loss: 2.423651, avg_loss: 6.179832
|
| 182 |
+
000445/005400, loss: 1.999993, avg_loss: 6.138898
|
| 183 |
+
000450/005400, loss: 3.104252, avg_loss: 6.101511
|
| 184 |
+
000455/005400, loss: 2.722913, avg_loss: 6.065750
|
| 185 |
+
000460/005400, loss: 2.014916, avg_loss: 6.027135
|
| 186 |
+
000465/005400, loss: 2.255650, avg_loss: 5.989807
|
| 187 |
+
000470/005400, loss: 2.582577, avg_loss: 5.953727
|
| 188 |
+
000475/005400, loss: 2.268125, avg_loss: 5.922364
|
| 189 |
+
000480/005400, loss: 2.132411, avg_loss: 5.883134
|
| 190 |
+
000485/005400, loss: 2.092988, avg_loss: 5.850251
|
| 191 |
+
000490/005400, loss: 2.469923, avg_loss: 5.816648
|
| 192 |
+
000495/005400, loss: 1.907046, avg_loss: 5.776493
|
| 193 |
+
000500/005400, loss: 2.188262, avg_loss: 5.740701
|
| 194 |
+
000505/005400, loss: 1.522663, avg_loss: 5.703157
|
| 195 |
+
000510/005400, loss: 1.982296, avg_loss: 5.667968
|
| 196 |
+
000515/005400, loss: 2.409446, avg_loss: 5.635783
|
| 197 |
+
000520/005400, loss: 1.887568, avg_loss: 5.603417
|
| 198 |
+
000525/005400, loss: 2.210217, avg_loss: 5.572377
|
| 199 |
+
000530/005400, loss: 2.381753, avg_loss: 5.541968
|
| 200 |
+
000535/005400, loss: 2.081358, avg_loss: 5.511043
|
| 201 |
+
000540/005400, loss: 2.770565, avg_loss: 5.483432
|
| 202 |
+
***** Running dev evaluation *****
|
| 203 |
+
Num examples = 1500
|
| 204 |
+
Instantaneous batch size per device = 32
|
| 205 |
+
epoch 2, step 540/5400: {'pearson': 0.5585231671416229, 'spearmanr': 0.5968823171253705}
|
| 206 |
+
000545/005400, loss: 2.294569, avg_loss: 5.453700
|
| 207 |
+
000550/005400, loss: 1.879893, avg_loss: 5.425492
|
| 208 |
+
000555/005400, loss: 2.054521, avg_loss: 5.396323
|
| 209 |
+
000560/005400, loss: 2.426673, avg_loss: 5.367306
|
| 210 |
+
000565/005400, loss: 1.785937, avg_loss: 5.339570
|
| 211 |
+
000570/005400, loss: 2.125966, avg_loss: 5.312624
|
| 212 |
+
000575/005400, loss: 2.204447, avg_loss: 5.285309
|
| 213 |
+
000580/005400, loss: 1.977976, avg_loss: 5.257472
|
| 214 |
+
000585/005400, loss: 1.667451, avg_loss: 5.227795
|
| 215 |
+
000590/005400, loss: 2.013373, avg_loss: 5.196637
|
| 216 |
+
000595/005400, loss: 1.661575, avg_loss: 5.165030
|
| 217 |
+
000600/005400, loss: 1.761523, avg_loss: 5.134732
|
| 218 |
+
000605/005400, loss: 1.165827, avg_loss: 5.104335
|
| 219 |
+
000610/005400, loss: 1.423938, avg_loss: 5.073547
|
| 220 |
+
000615/005400, loss: 1.275937, avg_loss: 5.045039
|
| 221 |
+
000620/005400, loss: 1.456807, avg_loss: 5.016568
|
| 222 |
+
000625/005400, loss: 1.428447, avg_loss: 4.988611
|
| 223 |
+
000630/005400, loss: 1.340862, avg_loss: 4.960019
|
| 224 |
+
000635/005400, loss: 1.158772, avg_loss: 4.931690
|
| 225 |
+
000640/005400, loss: 1.279753, avg_loss: 4.903349
|
| 226 |
+
000645/005400, loss: 1.495990, avg_loss: 4.875799
|
| 227 |
+
000650/005400, loss: 1.418819, avg_loss: 4.847696
|
| 228 |
+
000655/005400, loss: 1.233781, avg_loss: 4.819313
|
| 229 |
+
000660/005400, loss: 0.825644, avg_loss: 4.790388
|
| 230 |
+
000665/005400, loss: 1.236975, avg_loss: 4.763828
|
| 231 |
+
000670/005400, loss: 1.427844, avg_loss: 4.737722
|
| 232 |
+
000675/005400, loss: 1.194959, avg_loss: 4.710323
|
| 233 |
+
000680/005400, loss: 1.298458, avg_loss: 4.683432
|
| 234 |
+
000685/005400, loss: 1.250220, avg_loss: 4.658527
|
| 235 |
+
000690/005400, loss: 1.528629, avg_loss: 4.632631
|
| 236 |
+
000695/005400, loss: 0.912524, avg_loss: 4.608270
|
| 237 |
+
000700/005400, loss: 0.927178, avg_loss: 4.583745
|
| 238 |
+
000705/005400, loss: 1.425212, avg_loss: 4.560665
|
| 239 |
+
000710/005400, loss: 1.385559, avg_loss: 4.537204
|
| 240 |
+
000715/005400, loss: 1.303016, avg_loss: 4.512879
|
| 241 |
+
000720/005400, loss: 1.559370, avg_loss: 4.489179
|
| 242 |
+
***** Running dev evaluation *****
|
| 243 |
+
Num examples = 1500
|
| 244 |
+
Instantaneous batch size per device = 32
|
| 245 |
+
epoch 3, step 720/5400: {'pearson': 0.7538161883822286, 'spearmanr': 0.7339178388810693}
|
| 246 |
+
000725/005400, loss: 1.238668, avg_loss: 4.465238
|
| 247 |
+
000730/005400, loss: 0.980286, avg_loss: 4.441211
|
| 248 |
+
000735/005400, loss: 1.036360, avg_loss: 4.417511
|
| 249 |
+
000740/005400, loss: 0.733856, avg_loss: 4.392772
|
| 250 |
+
000745/005400, loss: 0.911818, avg_loss: 4.369401
|
| 251 |
+
000750/005400, loss: 0.923820, avg_loss: 4.346304
|
| 252 |
+
000755/005400, loss: 1.057140, avg_loss: 4.322930
|
| 253 |
+
000760/005400, loss: 0.671852, avg_loss: 4.300414
|
| 254 |
+
000765/005400, loss: 1.206062, avg_loss: 4.278674
|
| 255 |
+
000770/005400, loss: 1.131734, avg_loss: 4.256801
|
| 256 |
+
000775/005400, loss: 0.727703, avg_loss: 4.235108
|
| 257 |
+
000780/005400, loss: 0.807117, avg_loss: 4.213659
|
| 258 |
+
000785/005400, loss: 0.656183, avg_loss: 4.193212
|
| 259 |
+
000790/005400, loss: 1.307819, avg_loss: 4.173065
|
| 260 |
+
000795/005400, loss: 1.318512, avg_loss: 4.152132
|
| 261 |
+
000800/005400, loss: 0.923019, avg_loss: 4.130066
|
| 262 |
+
000805/005400, loss: 0.358175, avg_loss: 4.109530
|
| 263 |
+
000810/005400, loss: 0.568605, avg_loss: 4.090200
|
| 264 |
+
000815/005400, loss: 0.538159, avg_loss: 4.070320
|
| 265 |
+
000820/005400, loss: 0.791279, avg_loss: 4.050106
|
| 266 |
+
000825/005400, loss: 0.646954, avg_loss: 4.029779
|
| 267 |
+
000830/005400, loss: 0.696995, avg_loss: 4.010299
|
| 268 |
+
000835/005400, loss: 0.851315, avg_loss: 3.991444
|
| 269 |
+
000840/005400, loss: 0.953209, avg_loss: 3.972254
|
| 270 |
+
000845/005400, loss: 0.639867, avg_loss: 3.953383
|
| 271 |
+
000850/005400, loss: 0.828691, avg_loss: 3.934936
|
| 272 |
+
000855/005400, loss: 0.851312, avg_loss: 3.916689
|
| 273 |
+
000860/005400, loss: 0.913527, avg_loss: 3.898366
|
| 274 |
+
000865/005400, loss: 0.769578, avg_loss: 3.880448
|
| 275 |
+
000870/005400, loss: 0.780589, avg_loss: 3.862448
|
| 276 |
+
000875/005400, loss: 0.973308, avg_loss: 3.845167
|
| 277 |
+
000880/005400, loss: 0.829930, avg_loss: 3.827655
|
| 278 |
+
000885/005400, loss: 0.766140, avg_loss: 3.810577
|
| 279 |
+
000890/005400, loss: 0.563716, avg_loss: 3.793318
|
| 280 |
+
000895/005400, loss: 1.047082, avg_loss: 3.776392
|
| 281 |
+
000900/005400, loss: 0.866974, avg_loss: 3.759473
|
| 282 |
+
***** Running dev evaluation *****
|
| 283 |
+
Num examples = 1500
|
| 284 |
+
Instantaneous batch size per device = 32
|
| 285 |
+
epoch 4, step 900/5400: {'pearson': 0.8037387020413668, 'spearmanr': 0.8107612065966875}
|
| 286 |
+
000905/005400, loss: 0.766670, avg_loss: 3.742716
|
| 287 |
+
000910/005400, loss: 0.644619, avg_loss: 3.725242
|
| 288 |
+
000915/005400, loss: 0.794441, avg_loss: 3.709127
|
| 289 |
+
000920/005400, loss: 0.583735, avg_loss: 3.693077
|
| 290 |
+
000925/005400, loss: 0.467945, avg_loss: 3.676897
|
| 291 |
+
000930/005400, loss: 0.635556, avg_loss: 3.661061
|
| 292 |
+
000935/005400, loss: 0.546880, avg_loss: 3.644815
|
| 293 |
+
000940/005400, loss: 0.442663, avg_loss: 3.628318
|
| 294 |
+
000945/005400, loss: 0.683668, avg_loss: 3.612901
|
| 295 |
+
000950/005400, loss: 0.656306, avg_loss: 3.597124
|
| 296 |
+
000955/005400, loss: 0.710459, avg_loss: 3.582143
|
| 297 |
+
000960/005400, loss: 0.503140, avg_loss: 3.567162
|
| 298 |
+
000965/005400, loss: 0.659339, avg_loss: 3.552504
|
| 299 |
+
000970/005400, loss: 0.707433, avg_loss: 3.537561
|
| 300 |
+
000975/005400, loss: 0.965483, avg_loss: 3.523352
|
| 301 |
+
000980/005400, loss: 0.855915, avg_loss: 3.508989
|
| 302 |
+
000985/005400, loss: 0.649465, avg_loss: 3.494453
|
| 303 |
+
000990/005400, loss: 0.513151, avg_loss: 3.480050
|
| 304 |
+
000995/005400, loss: 0.907288, avg_loss: 3.465789
|
| 305 |
+
001000/005400, loss: 0.461537, avg_loss: 3.451433
|
| 306 |
+
001005/005400, loss: 0.496157, avg_loss: 3.437648
|
| 307 |
+
001010/005400, loss: 0.989706, avg_loss: 3.424380
|
| 308 |
+
001015/005400, loss: 0.754088, avg_loss: 3.410539
|
| 309 |
+
001020/005400, loss: 0.731938, avg_loss: 3.396362
|
| 310 |
+
001025/005400, loss: 0.844449, avg_loss: 3.382560
|
| 311 |
+
001030/005400, loss: 0.346046, avg_loss: 3.368838
|
| 312 |
+
001035/005400, loss: 0.518788, avg_loss: 3.355767
|
| 313 |
+
001040/005400, loss: 0.714191, avg_loss: 3.342353
|
| 314 |
+
001045/005400, loss: 0.800863, avg_loss: 3.329591
|
| 315 |
+
001050/005400, loss: 0.538331, avg_loss: 3.316277
|
| 316 |
+
001055/005400, loss: 0.645015, avg_loss: 3.303465
|
| 317 |
+
001060/005400, loss: 0.451743, avg_loss: 3.290423
|
| 318 |
+
001065/005400, loss: 0.482815, avg_loss: 3.277498
|
| 319 |
+
001070/005400, loss: 0.428583, avg_loss: 3.264993
|
| 320 |
+
001075/005400, loss: 0.905002, avg_loss: 3.253403
|
| 321 |
+
001080/005400, loss: 0.423076, avg_loss: 3.241331
|
| 322 |
+
***** Running dev evaluation *****
|
| 323 |
+
Num examples = 1500
|
| 324 |
+
Instantaneous batch size per device = 32
|
| 325 |
+
epoch 5, step 1080/5400: {'pearson': 0.8115941618503355, 'spearmanr': 0.8282434089896973}
|
| 326 |
+
001085/005400, loss: 0.671325, avg_loss: 3.228906
|
| 327 |
+
001090/005400, loss: 0.510681, avg_loss: 3.216880
|
| 328 |
+
001095/005400, loss: 0.503297, avg_loss: 3.204282
|
| 329 |
+
001100/005400, loss: 0.476207, avg_loss: 3.192340
|
| 330 |
+
001105/005400, loss: 0.287384, avg_loss: 3.180409
|
| 331 |
+
001110/005400, loss: 0.838371, avg_loss: 3.168479
|
| 332 |
+
001115/005400, loss: 0.561100, avg_loss: 3.157175
|
| 333 |
+
001120/005400, loss: 0.461640, avg_loss: 3.145364
|
| 334 |
+
001125/005400, loss: 0.672549, avg_loss: 3.133893
|
| 335 |
+
001130/005400, loss: 0.443830, avg_loss: 3.122226
|
| 336 |
+
001135/005400, loss: 0.465307, avg_loss: 3.110867
|
| 337 |
+
001140/005400, loss: 0.763562, avg_loss: 3.099965
|
| 338 |
+
001145/005400, loss: 0.561359, avg_loss: 3.088965
|
| 339 |
+
001150/005400, loss: 0.411171, avg_loss: 3.077866
|
| 340 |
+
001155/005400, loss: 0.406792, avg_loss: 3.066446
|
| 341 |
+
001160/005400, loss: 0.503313, avg_loss: 3.055675
|
| 342 |
+
001165/005400, loss: 0.475825, avg_loss: 3.045274
|
| 343 |
+
001170/005400, loss: 0.584800, avg_loss: 3.034442
|
| 344 |
+
001175/005400, loss: 0.465069, avg_loss: 3.023891
|
| 345 |
+
001180/005400, loss: 0.494697, avg_loss: 3.013498
|
| 346 |
+
001185/005400, loss: 0.544740, avg_loss: 3.003423
|
| 347 |
+
001190/005400, loss: 0.406965, avg_loss: 2.992763
|
| 348 |
+
001195/005400, loss: 0.268987, avg_loss: 2.982255
|
| 349 |
+
001200/005400, loss: 0.495571, avg_loss: 2.972160
|
| 350 |
+
001205/005400, loss: 0.538762, avg_loss: 2.961849
|
| 351 |
+
001210/005400, loss: 0.478300, avg_loss: 2.952011
|
| 352 |
+
001215/005400, loss: 0.338071, avg_loss: 2.942186
|
| 353 |
+
001220/005400, loss: 0.505288, avg_loss: 2.932113
|
| 354 |
+
001225/005400, loss: 0.570436, avg_loss: 2.922221
|
| 355 |
+
001230/005400, loss: 0.523959, avg_loss: 2.912277
|
| 356 |
+
001235/005400, loss: 0.491099, avg_loss: 2.902272
|
| 357 |
+
001240/005400, loss: 0.559447, avg_loss: 2.892549
|
| 358 |
+
001245/005400, loss: 0.753339, avg_loss: 2.883637
|
| 359 |
+
001250/005400, loss: 0.602193, avg_loss: 2.874184
|
| 360 |
+
001255/005400, loss: 0.302810, avg_loss: 2.864446
|
| 361 |
+
001260/005400, loss: 0.636528, avg_loss: 2.855367
|
| 362 |
+
***** Running dev evaluation *****
|
| 363 |
+
Num examples = 1500
|
| 364 |
+
Instantaneous batch size per device = 32
|
| 365 |
+
epoch 6, step 1260/5400: {'pearson': 0.8244669741341696, 'spearmanr': 0.8347289521968146}
|
| 366 |
+
001265/005400, loss: 0.576356, avg_loss: 2.845884
|
| 367 |
+
001270/005400, loss: 0.356003, avg_loss: 2.836981
|
| 368 |
+
001275/005400, loss: 0.282959, avg_loss: 2.827679
|
| 369 |
+
001280/005400, loss: 0.471389, avg_loss: 2.818289
|
| 370 |
+
001285/005400, loss: 0.291599, avg_loss: 2.809166
|
| 371 |
+
001290/005400, loss: 0.309215, avg_loss: 2.799846
|
| 372 |
+
001295/005400, loss: 0.440720, avg_loss: 2.790764
|
| 373 |
+
001300/005400, loss: 0.452717, avg_loss: 2.781574
|
| 374 |
+
001305/005400, loss: 0.379403, avg_loss: 2.772831
|
| 375 |
+
001310/005400, loss: 0.740967, avg_loss: 2.764373
|
| 376 |
+
001315/005400, loss: 0.554469, avg_loss: 2.755583
|
| 377 |
+
001320/005400, loss: 0.422943, avg_loss: 2.747635
|
| 378 |
+
001325/005400, loss: 0.613703, avg_loss: 2.739164
|
| 379 |
+
001330/005400, loss: 0.333465, avg_loss: 2.730182
|
| 380 |
+
001335/005400, loss: 0.531835, avg_loss: 2.721662
|
| 381 |
+
001340/005400, loss: 0.447510, avg_loss: 2.713335
|
| 382 |
+
001345/005400, loss: 0.487799, avg_loss: 2.705467
|
| 383 |
+
001350/005400, loss: 0.629011, avg_loss: 2.697427
|
| 384 |
+
001355/005400, loss: 0.316717, avg_loss: 2.688931
|
| 385 |
+
001360/005400, loss: 0.483824, avg_loss: 2.680822
|
| 386 |
+
001365/005400, loss: 0.420798, avg_loss: 2.672428
|
| 387 |
+
001370/005400, loss: 0.312988, avg_loss: 2.664160
|
| 388 |
+
001375/005400, loss: 0.253772, avg_loss: 2.655796
|
| 389 |
+
001380/005400, loss: 0.507312, avg_loss: 2.648081
|
| 390 |
+
001385/005400, loss: 0.423927, avg_loss: 2.640514
|
| 391 |
+
001390/005400, loss: 0.488432, avg_loss: 2.632712
|
| 392 |
+
001395/005400, loss: 0.496802, avg_loss: 2.624703
|
| 393 |
+
001400/005400, loss: 0.411566, avg_loss: 2.617226
|
| 394 |
+
001405/005400, loss: 0.620914, avg_loss: 2.609520
|
| 395 |
+
001410/005400, loss: 0.529554, avg_loss: 2.602186
|
| 396 |
+
001415/005400, loss: 0.377586, avg_loss: 2.594550
|
| 397 |
+
001420/005400, loss: 0.537113, avg_loss: 2.587398
|
| 398 |
+
001425/005400, loss: 0.502925, avg_loss: 2.579730
|
| 399 |
+
001430/005400, loss: 0.501363, avg_loss: 2.572518
|
| 400 |
+
001435/005400, loss: 0.523148, avg_loss: 2.564881
|
| 401 |
+
001440/005400, loss: 0.283889, avg_loss: 2.557591
|
| 402 |
+
***** Running dev evaluation *****
|
| 403 |
+
Num examples = 1500
|
| 404 |
+
Instantaneous batch size per device = 32
|
| 405 |
+
epoch 7, step 1440/5400: {'pearson': 0.8356315632016451, 'spearmanr': 0.8428067774651329}
|
| 406 |
+
001445/005400, loss: 0.283461, avg_loss: 2.549994
|
| 407 |
+
001450/005400, loss: 0.473319, avg_loss: 2.542806
|
| 408 |
+
001455/005400, loss: 0.465852, avg_loss: 2.535222
|
| 409 |
+
001460/005400, loss: 0.452470, avg_loss: 2.528097
|
| 410 |
+
001465/005400, loss: 0.528226, avg_loss: 2.521023
|
| 411 |
+
001470/005400, loss: 0.372980, avg_loss: 2.513948
|
| 412 |
+
001475/005400, loss: 0.580186, avg_loss: 2.507289
|
| 413 |
+
001480/005400, loss: 0.250609, avg_loss: 2.500083
|
| 414 |
+
001485/005400, loss: 0.373619, avg_loss: 2.492741
|
| 415 |
+
001490/005400, loss: 0.313954, avg_loss: 2.485812
|
| 416 |
+
001495/005400, loss: 0.421009, avg_loss: 2.478890
|
| 417 |
+
001500/005400, loss: 0.417312, avg_loss: 2.472097
|
| 418 |
+
001505/005400, loss: 0.419549, avg_loss: 2.465457
|
| 419 |
+
001510/005400, loss: 0.567841, avg_loss: 2.458859
|
| 420 |
+
001515/005400, loss: 0.221651, avg_loss: 2.452159
|
| 421 |
+
001520/005400, loss: 0.323677, avg_loss: 2.445824
|
| 422 |
+
001525/005400, loss: 0.563059, avg_loss: 2.439246
|
| 423 |
+
001530/005400, loss: 0.273469, avg_loss: 2.432506
|
| 424 |
+
001535/005400, loss: 0.230308, avg_loss: 2.425987
|
| 425 |
+
001540/005400, loss: 0.275917, avg_loss: 2.419360
|
| 426 |
+
001545/005400, loss: 0.490302, avg_loss: 2.412818
|
| 427 |
+
001550/005400, loss: 0.171527, avg_loss: 2.406091
|
| 428 |
+
001555/005400, loss: 0.499564, avg_loss: 2.399561
|
| 429 |
+
001560/005400, loss: 0.583477, avg_loss: 2.393275
|
| 430 |
+
001565/005400, loss: 0.422795, avg_loss: 2.387004
|
| 431 |
+
001570/005400, loss: 0.356273, avg_loss: 2.380570
|
| 432 |
+
001575/005400, loss: 0.442116, avg_loss: 2.374079
|
| 433 |
+
001580/005400, loss: 0.380964, avg_loss: 2.367966
|
| 434 |
+
001585/005400, loss: 0.454051, avg_loss: 2.361857
|
| 435 |
+
001590/005400, loss: 0.292075, avg_loss: 2.355417
|
| 436 |
+
001595/005400, loss: 0.433962, avg_loss: 2.349358
|
| 437 |
+
001600/005400, loss: 0.253748, avg_loss: 2.343178
|
| 438 |
+
001605/005400, loss: 0.277990, avg_loss: 2.337058
|
| 439 |
+
001610/005400, loss: 0.658840, avg_loss: 2.331389
|
| 440 |
+
001615/005400, loss: 0.284291, avg_loss: 2.325417
|
| 441 |
+
001620/005400, loss: 0.347131, avg_loss: 2.319557
|
| 442 |
+
***** Running dev evaluation *****
|
| 443 |
+
Num examples = 1500
|
| 444 |
+
Instantaneous batch size per device = 32
|
| 445 |
+
epoch 8, step 1620/5400: {'pearson': 0.840875635131036, 'spearmanr': 0.8391187190190564}
|
| 446 |
+
001625/005400, loss: 0.468040, avg_loss: 2.313795
|
| 447 |
+
001630/005400, loss: 0.377569, avg_loss: 2.307781
|
| 448 |
+
001635/005400, loss: 0.373161, avg_loss: 2.301947
|
| 449 |
+
001640/005400, loss: 0.542144, avg_loss: 2.296237
|
| 450 |
+
001645/005400, loss: 0.394721, avg_loss: 2.290366
|
| 451 |
+
001650/005400, loss: 0.313285, avg_loss: 2.284578
|
| 452 |
+
001655/005400, loss: 0.458701, avg_loss: 2.278912
|
| 453 |
+
001660/005400, loss: 0.294037, avg_loss: 2.273092
|
| 454 |
+
001665/005400, loss: 0.288020, avg_loss: 2.267503
|
| 455 |
+
001670/005400, loss: 0.372206, avg_loss: 2.261890
|
| 456 |
+
001675/005400, loss: 0.439113, avg_loss: 2.256269
|
| 457 |
+
001680/005400, loss: 0.265594, avg_loss: 2.250567
|
| 458 |
+
001685/005400, loss: 0.307823, avg_loss: 2.244832
|
| 459 |
+
001690/005400, loss: 0.214900, avg_loss: 2.239233
|
| 460 |
+
001695/005400, loss: 0.430367, avg_loss: 2.234019
|
| 461 |
+
001700/005400, loss: 0.428587, avg_loss: 2.228347
|
| 462 |
+
001705/005400, loss: 0.466478, avg_loss: 2.223007
|
| 463 |
+
001710/005400, loss: 0.406999, avg_loss: 2.217425
|
| 464 |
+
001715/005400, loss: 0.249302, avg_loss: 2.211718
|
| 465 |
+
001720/005400, loss: 0.449824, avg_loss: 2.206581
|
| 466 |
+
001725/005400, loss: 0.200499, avg_loss: 2.201121
|
| 467 |
+
001730/005400, loss: 0.528394, avg_loss: 2.196022
|
| 468 |
+
001735/005400, loss: 0.420790, avg_loss: 2.190833
|
| 469 |
+
001740/005400, loss: 0.393591, avg_loss: 2.185567
|
| 470 |
+
001745/005400, loss: 0.292256, avg_loss: 2.180424
|
| 471 |
+
001750/005400, loss: 0.401385, avg_loss: 2.175266
|
| 472 |
+
001755/005400, loss: 0.294124, avg_loss: 2.169960
|
| 473 |
+
001760/005400, loss: 0.363119, avg_loss: 2.164699
|
| 474 |
+
001765/005400, loss: 0.390154, avg_loss: 2.159830
|
| 475 |
+
001770/005400, loss: 0.313013, avg_loss: 2.154815
|
| 476 |
+
001775/005400, loss: 0.308711, avg_loss: 2.149686
|
| 477 |
+
001780/005400, loss: 0.483320, avg_loss: 2.144812
|
| 478 |
+
001785/005400, loss: 0.379410, avg_loss: 2.139796
|
| 479 |
+
001790/005400, loss: 0.422236, avg_loss: 2.134915
|
| 480 |
+
001795/005400, loss: 0.511399, avg_loss: 2.130093
|
| 481 |
+
001800/005400, loss: 0.423039, avg_loss: 2.125146
|
| 482 |
+
***** Running dev evaluation *****
|
| 483 |
+
Num examples = 1500
|
| 484 |
+
Instantaneous batch size per device = 32
|
| 485 |
+
epoch 9, step 1800/5400: {'pearson': 0.8342714757320445, 'spearmanr': 0.8376185602281018}
|
| 486 |
+
001805/005400, loss: 0.486487, avg_loss: 2.120132
|
| 487 |
+
001810/005400, loss: 0.270155, avg_loss: 2.115208
|
| 488 |
+
001815/005400, loss: 0.227492, avg_loss: 2.110093
|
| 489 |
+
001820/005400, loss: 0.346458, avg_loss: 2.105187
|
| 490 |
+
001825/005400, loss: 0.426929, avg_loss: 2.100322
|
| 491 |
+
001830/005400, loss: 0.117478, avg_loss: 2.095436
|
| 492 |
+
001835/005400, loss: 0.279193, avg_loss: 2.090488
|
| 493 |
+
001840/005400, loss: 0.387577, avg_loss: 2.085845
|
| 494 |
+
001845/005400, loss: 0.250648, avg_loss: 2.081071
|
| 495 |
+
001850/005400, loss: 0.303584, avg_loss: 2.076289
|
| 496 |
+
001855/005400, loss: 0.405041, avg_loss: 2.071732
|
| 497 |
+
001860/005400, loss: 0.166183, avg_loss: 2.066910
|
| 498 |
+
001865/005400, loss: 0.319343, avg_loss: 2.062141
|
| 499 |
+
001870/005400, loss: 0.317750, avg_loss: 2.057461
|
| 500 |
+
001875/005400, loss: 0.315497, avg_loss: 2.052864
|
| 501 |
+
001880/005400, loss: 0.338883, avg_loss: 2.048301
|
| 502 |
+
001885/005400, loss: 0.322422, avg_loss: 2.043658
|
| 503 |
+
001890/005400, loss: 0.136494, avg_loss: 2.038912
|
| 504 |
+
001895/005400, loss: 0.384212, avg_loss: 2.034422
|
| 505 |
+
001900/005400, loss: 0.386642, avg_loss: 2.029817
|
| 506 |
+
001905/005400, loss: 0.336843, avg_loss: 2.025262
|
| 507 |
+
001910/005400, loss: 0.378603, avg_loss: 2.020888
|
| 508 |
+
001915/005400, loss: 0.244922, avg_loss: 2.016456
|
| 509 |
+
001920/005400, loss: 0.388475, avg_loss: 2.012008
|
| 510 |
+
001925/005400, loss: 0.275199, avg_loss: 2.007573
|
| 511 |
+
001930/005400, loss: 0.286381, avg_loss: 2.003031
|
| 512 |
+
001935/005400, loss: 0.408020, avg_loss: 1.998613
|
| 513 |
+
001940/005400, loss: 0.296814, avg_loss: 1.994459
|
| 514 |
+
001945/005400, loss: 0.221215, avg_loss: 1.990418
|
| 515 |
+
001950/005400, loss: 0.386474, avg_loss: 1.986272
|
| 516 |
+
001955/005400, loss: 0.186999, avg_loss: 1.981974
|
| 517 |
+
001960/005400, loss: 0.353515, avg_loss: 1.977982
|
| 518 |
+
001965/005400, loss: 0.220710, avg_loss: 1.973756
|
| 519 |
+
001970/005400, loss: 0.522696, avg_loss: 1.969660
|
| 520 |
+
001975/005400, loss: 0.318528, avg_loss: 1.965668
|
| 521 |
+
001980/005400, loss: 0.256884, avg_loss: 1.961408
|
| 522 |
+
***** Running dev evaluation *****
|
| 523 |
+
Num examples = 1500
|
| 524 |
+
Instantaneous batch size per device = 32
|
| 525 |
+
epoch 10, step 1980/5400: {'pearson': 0.8390370712384592, 'spearmanr': 0.8380421225427299}
|
| 526 |
+
001985/005400, loss: 0.339906, avg_loss: 1.957260
|
| 527 |
+
001990/005400, loss: 0.177573, avg_loss: 1.953061
|
| 528 |
+
001995/005400, loss: 0.434594, avg_loss: 1.949158
|
| 529 |
+
002000/005400, loss: 0.394058, avg_loss: 1.945127
|
| 530 |
+
002005/005400, loss: 0.284734, avg_loss: 1.941044
|
| 531 |
+
002010/005400, loss: 0.441842, avg_loss: 1.937158
|
| 532 |
+
002015/005400, loss: 0.370813, avg_loss: 1.933077
|
| 533 |
+
002020/005400, loss: 0.231465, avg_loss: 1.929090
|
| 534 |
+
002025/005400, loss: 0.401823, avg_loss: 1.925187
|
| 535 |
+
002030/005400, loss: 0.417580, avg_loss: 1.921148
|
| 536 |
+
002035/005400, loss: 0.233858, avg_loss: 1.917078
|
| 537 |
+
002040/005400, loss: 0.179666, avg_loss: 1.913157
|
| 538 |
+
002045/005400, loss: 0.260741, avg_loss: 1.909101
|
| 539 |
+
002050/005400, loss: 0.221551, avg_loss: 1.905037
|
| 540 |
+
002055/005400, loss: 0.234906, avg_loss: 1.901112
|
| 541 |
+
002060/005400, loss: 0.170529, avg_loss: 1.897019
|
| 542 |
+
002065/005400, loss: 0.246520, avg_loss: 1.893189
|
| 543 |
+
002070/005400, loss: 0.221311, avg_loss: 1.889234
|
| 544 |
+
002075/005400, loss: 0.181704, avg_loss: 1.885389
|
| 545 |
+
002080/005400, loss: 0.418144, avg_loss: 1.881511
|
| 546 |
+
002085/005400, loss: 0.207121, avg_loss: 1.877616
|
| 547 |
+
002090/005400, loss: 0.250038, avg_loss: 1.873798
|
| 548 |
+
002095/005400, loss: 0.266151, avg_loss: 1.869941
|
| 549 |
+
002100/005400, loss: 0.329553, avg_loss: 1.866257
|
| 550 |
+
002105/005400, loss: 0.316394, avg_loss: 1.862574
|
| 551 |
+
002110/005400, loss: 0.202054, avg_loss: 1.858893
|
| 552 |
+
002115/005400, loss: 0.558679, avg_loss: 1.855374
|
| 553 |
+
002120/005400, loss: 0.305135, avg_loss: 1.851792
|
| 554 |
+
002125/005400, loss: 0.306204, avg_loss: 1.848025
|
| 555 |
+
002130/005400, loss: 0.354196, avg_loss: 1.844382
|
| 556 |
+
002135/005400, loss: 0.513295, avg_loss: 1.840886
|
| 557 |
+
002140/005400, loss: 0.338046, avg_loss: 1.837288
|
| 558 |
+
002145/005400, loss: 0.233815, avg_loss: 1.833621
|
| 559 |
+
002150/005400, loss: 0.303081, avg_loss: 1.830035
|
| 560 |
+
002155/005400, loss: 0.217688, avg_loss: 1.826318
|
| 561 |
+
002160/005400, loss: 0.223059, avg_loss: 1.822730
|
| 562 |
+
***** Running dev evaluation *****
|
| 563 |
+
Num examples = 1500
|
| 564 |
+
Instantaneous batch size per device = 32
|
| 565 |
+
epoch 11, step 2160/5400: {'pearson': 0.8434982902424131, 'spearmanr': 0.8445651086908786}
|
| 566 |
+
002165/005400, loss: 0.237432, avg_loss: 1.819061
|
| 567 |
+
002170/005400, loss: 0.283776, avg_loss: 1.815507
|
| 568 |
+
002175/005400, loss: 0.309928, avg_loss: 1.811960
|
| 569 |
+
002180/005400, loss: 0.256525, avg_loss: 1.808401
|
| 570 |
+
002185/005400, loss: 0.282268, avg_loss: 1.804922
|
| 571 |
+
002190/005400, loss: 0.277528, avg_loss: 1.801368
|
| 572 |
+
002195/005400, loss: 0.345856, avg_loss: 1.797885
|
| 573 |
+
002200/005400, loss: 0.393328, avg_loss: 1.794652
|
| 574 |
+
002205/005400, loss: 0.224377, avg_loss: 1.791248
|
| 575 |
+
002210/005400, loss: 0.219291, avg_loss: 1.787713
|
| 576 |
+
002215/005400, loss: 0.147671, avg_loss: 1.784197
|
| 577 |
+
002220/005400, loss: 0.339344, avg_loss: 1.780853
|
| 578 |
+
002225/005400, loss: 0.219361, avg_loss: 1.777467
|
| 579 |
+
002230/005400, loss: 0.280020, avg_loss: 1.774036
|
| 580 |
+
002235/005400, loss: 0.261592, avg_loss: 1.770745
|
| 581 |
+
002240/005400, loss: 0.293255, avg_loss: 1.767543
|
| 582 |
+
002245/005400, loss: 0.260899, avg_loss: 1.764155
|
| 583 |
+
002250/005400, loss: 0.251379, avg_loss: 1.760734
|
| 584 |
+
002255/005400, loss: 0.180517, avg_loss: 1.757394
|
| 585 |
+
002260/005400, loss: 0.237342, avg_loss: 1.754018
|
| 586 |
+
002265/005400, loss: 0.348091, avg_loss: 1.750775
|
| 587 |
+
002270/005400, loss: 0.169205, avg_loss: 1.747420
|
| 588 |
+
002275/005400, loss: 0.308270, avg_loss: 1.744165
|
| 589 |
+
002280/005400, loss: 0.265926, avg_loss: 1.740912
|
| 590 |
+
002285/005400, loss: 0.269741, avg_loss: 1.737594
|
| 591 |
+
002290/005400, loss: 0.368088, avg_loss: 1.734481
|
| 592 |
+
002295/005400, loss: 0.288817, avg_loss: 1.731510
|
| 593 |
+
002300/005400, loss: 0.151223, avg_loss: 1.728326
|
| 594 |
+
002305/005400, loss: 0.314602, avg_loss: 1.725295
|
| 595 |
+
002310/005400, loss: 0.204679, avg_loss: 1.722112
|
| 596 |
+
002315/005400, loss: 0.288287, avg_loss: 1.718930
|
| 597 |
+
002320/005400, loss: 0.245926, avg_loss: 1.715852
|
| 598 |
+
002325/005400, loss: 0.204663, avg_loss: 1.712662
|
| 599 |
+
002330/005400, loss: 0.215070, avg_loss: 1.709556
|
| 600 |
+
002335/005400, loss: 0.190882, avg_loss: 1.706442
|
| 601 |
+
002340/005400, loss: 0.224660, avg_loss: 1.703429
|
| 602 |
+
***** Running dev evaluation *****
|
| 603 |
+
Num examples = 1500
|
| 604 |
+
Instantaneous batch size per device = 32
|
| 605 |
+
epoch 12, step 2340/5400: {'pearson': 0.8415414818553372, 'spearmanr': 0.8425621296013649}
|
| 606 |
+
002345/005400, loss: 0.207369, avg_loss: 1.700278
|
| 607 |
+
002350/005400, loss: 0.261497, avg_loss: 1.697250
|
| 608 |
+
002355/005400, loss: 0.230280, avg_loss: 1.694103
|
| 609 |
+
002360/005400, loss: 0.262285, avg_loss: 1.690920
|
| 610 |
+
002365/005400, loss: 0.151266, avg_loss: 1.687904
|
| 611 |
+
002370/005400, loss: 0.269719, avg_loss: 1.684892
|
| 612 |
+
002375/005400, loss: 0.354083, avg_loss: 1.681934
|
| 613 |
+
002380/005400, loss: 0.237291, avg_loss: 1.678996
|
| 614 |
+
002385/005400, loss: 0.186130, avg_loss: 1.676010
|
| 615 |
+
002390/005400, loss: 0.260663, avg_loss: 1.673000
|
| 616 |
+
002395/005400, loss: 0.203245, avg_loss: 1.669989
|
| 617 |
+
002400/005400, loss: 0.309466, avg_loss: 1.667078
|
| 618 |
+
002405/005400, loss: 0.167727, avg_loss: 1.664065
|
| 619 |
+
002410/005400, loss: 0.180444, avg_loss: 1.661110
|
| 620 |
+
002415/005400, loss: 0.205075, avg_loss: 1.658129
|
| 621 |
+
002420/005400, loss: 0.251971, avg_loss: 1.655157
|
| 622 |
+
002425/005400, loss: 0.503691, avg_loss: 1.652340
|
| 623 |
+
002430/005400, loss: 0.361796, avg_loss: 1.649719
|
| 624 |
+
002435/005400, loss: 0.220655, avg_loss: 1.646866
|
| 625 |
+
002440/005400, loss: 0.364590, avg_loss: 1.644123
|
| 626 |
+
002445/005400, loss: 0.387156, avg_loss: 1.641263
|
| 627 |
+
002450/005400, loss: 0.321079, avg_loss: 1.638517
|
| 628 |
+
002455/005400, loss: 0.165761, avg_loss: 1.635770
|
| 629 |
+
002460/005400, loss: 0.270390, avg_loss: 1.632963
|
| 630 |
+
002465/005400, loss: 0.202102, avg_loss: 1.630213
|
| 631 |
+
002470/005400, loss: 0.162662, avg_loss: 1.627334
|
| 632 |
+
002475/005400, loss: 0.141903, avg_loss: 1.624407
|
| 633 |
+
002480/005400, loss: 0.130965, avg_loss: 1.621656
|
| 634 |
+
002485/005400, loss: 0.185001, avg_loss: 1.618813
|
| 635 |
+
002490/005400, loss: 0.237992, avg_loss: 1.616033
|
| 636 |
+
002495/005400, loss: 0.158510, avg_loss: 1.613217
|
| 637 |
+
002500/005400, loss: 0.259753, avg_loss: 1.610477
|
| 638 |
+
002505/005400, loss: 0.108687, avg_loss: 1.607702
|
| 639 |
+
002510/005400, loss: 0.179495, avg_loss: 1.604972
|
| 640 |
+
002515/005400, loss: 0.267883, avg_loss: 1.602195
|
| 641 |
+
002520/005400, loss: 0.205575, avg_loss: 1.599474
|
| 642 |
+
***** Running dev evaluation *****
|
| 643 |
+
Num examples = 1500
|
| 644 |
+
Instantaneous batch size per device = 32
|
| 645 |
+
epoch 13, step 2520/5400: {'pearson': 0.8425599117367437, 'spearmanr': 0.8414850205786223}
|
| 646 |
+
002525/005400, loss: 0.199653, avg_loss: 1.596711
|
| 647 |
+
002530/005400, loss: 0.201341, avg_loss: 1.593993
|
| 648 |
+
002535/005400, loss: 0.203724, avg_loss: 1.591213
|
| 649 |
+
002540/005400, loss: 0.254623, avg_loss: 1.588562
|
| 650 |
+
002545/005400, loss: 0.369073, avg_loss: 1.585980
|
| 651 |
+
002550/005400, loss: 0.106891, avg_loss: 1.583355
|
| 652 |
+
002555/005400, loss: 0.136818, avg_loss: 1.580702
|
| 653 |
+
002560/005400, loss: 0.231878, avg_loss: 1.577973
|
| 654 |
+
002565/005400, loss: 0.156474, avg_loss: 1.575269
|
| 655 |
+
002570/005400, loss: 0.236511, avg_loss: 1.572622
|
| 656 |
+
002575/005400, loss: 0.257811, avg_loss: 1.570007
|
| 657 |
+
002580/005400, loss: 0.468576, avg_loss: 1.567428
|
| 658 |
+
002585/005400, loss: 0.163139, avg_loss: 1.564778
|
| 659 |
+
002590/005400, loss: 0.436930, avg_loss: 1.562216
|
| 660 |
+
002595/005400, loss: 0.196596, avg_loss: 1.559604
|
| 661 |
+
002600/005400, loss: 0.232763, avg_loss: 1.557100
|
| 662 |
+
002605/005400, loss: 0.164102, avg_loss: 1.554545
|
| 663 |
+
002610/005400, loss: 0.258984, avg_loss: 1.551967
|
| 664 |
+
002615/005400, loss: 0.188581, avg_loss: 1.549408
|
| 665 |
+
002620/005400, loss: 0.215384, avg_loss: 1.546768
|
| 666 |
+
002625/005400, loss: 0.165978, avg_loss: 1.544174
|
| 667 |
+
002630/005400, loss: 0.254275, avg_loss: 1.541621
|
| 668 |
+
002635/005400, loss: 0.260447, avg_loss: 1.539074
|
| 669 |
+
002640/005400, loss: 0.257019, avg_loss: 1.536569
|
| 670 |
+
002645/005400, loss: 0.304152, avg_loss: 1.534171
|
| 671 |
+
002650/005400, loss: 0.172311, avg_loss: 1.531694
|
| 672 |
+
002655/005400, loss: 0.217652, avg_loss: 1.529228
|
| 673 |
+
002660/005400, loss: 0.431580, avg_loss: 1.526855
|
| 674 |
+
002665/005400, loss: 0.342930, avg_loss: 1.524416
|
| 675 |
+
002670/005400, loss: 0.281481, avg_loss: 1.521985
|
| 676 |
+
002675/005400, loss: 0.115055, avg_loss: 1.519483
|
| 677 |
+
002680/005400, loss: 0.190243, avg_loss: 1.517189
|
| 678 |
+
002685/005400, loss: 0.173296, avg_loss: 1.514757
|
| 679 |
+
002690/005400, loss: 0.374071, avg_loss: 1.512512
|
| 680 |
+
002695/005400, loss: 0.322947, avg_loss: 1.510223
|
| 681 |
+
002700/005400, loss: 0.133452, avg_loss: 1.507823
|
| 682 |
+
***** Running dev evaluation *****
|
| 683 |
+
Num examples = 1500
|
| 684 |
+
Instantaneous batch size per device = 32
|
| 685 |
+
epoch 14, step 2700/5400: {'pearson': 0.8428262938537643, 'spearmanr': 0.8418967117492774}
|
| 686 |
+
002705/005400, loss: 0.165615, avg_loss: 1.505401
|
| 687 |
+
002710/005400, loss: 0.191277, avg_loss: 1.503067
|
| 688 |
+
002715/005400, loss: 0.186724, avg_loss: 1.500670
|
| 689 |
+
002720/005400, loss: 0.166687, avg_loss: 1.498308
|
| 690 |
+
002725/005400, loss: 0.173368, avg_loss: 1.495943
|
| 691 |
+
002730/005400, loss: 0.182292, avg_loss: 1.493571
|
| 692 |
+
002735/005400, loss: 0.094817, avg_loss: 1.491132
|
| 693 |
+
002740/005400, loss: 0.151966, avg_loss: 1.488704
|
| 694 |
+
002745/005400, loss: 0.118933, avg_loss: 1.486331
|
| 695 |
+
002750/005400, loss: 0.150439, avg_loss: 1.484025
|
| 696 |
+
002755/005400, loss: 0.220458, avg_loss: 1.481835
|
| 697 |
+
002760/005400, loss: 0.165892, avg_loss: 1.479519
|
| 698 |
+
002765/005400, loss: 0.226839, avg_loss: 1.477155
|
| 699 |
+
002770/005400, loss: 0.181736, avg_loss: 1.474844
|
| 700 |
+
002775/005400, loss: 0.103294, avg_loss: 1.472493
|
| 701 |
+
002780/005400, loss: 0.152098, avg_loss: 1.470169
|
| 702 |
+
002785/005400, loss: 0.210727, avg_loss: 1.467948
|
| 703 |
+
002790/005400, loss: 0.218008, avg_loss: 1.465678
|
| 704 |
+
002795/005400, loss: 0.303881, avg_loss: 1.463492
|
| 705 |
+
002800/005400, loss: 0.149363, avg_loss: 1.461267
|
| 706 |
+
002805/005400, loss: 0.278521, avg_loss: 1.459031
|
| 707 |
+
002810/005400, loss: 0.177459, avg_loss: 1.456765
|
| 708 |
+
002815/005400, loss: 0.147072, avg_loss: 1.454549
|
| 709 |
+
002820/005400, loss: 0.154193, avg_loss: 1.452240
|
| 710 |
+
002825/005400, loss: 0.118995, avg_loss: 1.450022
|
| 711 |
+
002830/005400, loss: 0.306946, avg_loss: 1.447801
|
| 712 |
+
002835/005400, loss: 0.203090, avg_loss: 1.445593
|
| 713 |
+
002840/005400, loss: 0.196348, avg_loss: 1.443464
|
| 714 |
+
002845/005400, loss: 0.113525, avg_loss: 1.441222
|
| 715 |
+
002850/005400, loss: 0.305031, avg_loss: 1.439138
|
| 716 |
+
002855/005400, loss: 0.179518, avg_loss: 1.436929
|
| 717 |
+
002860/005400, loss: 0.317867, avg_loss: 1.434791
|
| 718 |
+
002865/005400, loss: 0.244391, avg_loss: 1.432654
|
| 719 |
+
002870/005400, loss: 0.201873, avg_loss: 1.430598
|
| 720 |
+
002875/005400, loss: 0.332513, avg_loss: 1.428486
|
| 721 |
+
002880/005400, loss: 0.174545, avg_loss: 1.426279
|
| 722 |
+
***** Running dev evaluation *****
|
| 723 |
+
Num examples = 1500
|
| 724 |
+
Instantaneous batch size per device = 32
|
| 725 |
+
epoch 15, step 2880/5400: {'pearson': 0.8465462185651544, 'spearmanr': 0.8451574856196069}
|
| 726 |
+
002885/005400, loss: 0.140292, avg_loss: 1.424148
|
| 727 |
+
002890/005400, loss: 0.180590, avg_loss: 1.422041
|
| 728 |
+
002895/005400, loss: 0.276235, avg_loss: 1.419968
|
| 729 |
+
002900/005400, loss: 0.079708, avg_loss: 1.417818
|
| 730 |
+
002905/005400, loss: 0.178860, avg_loss: 1.415680
|
| 731 |
+
002910/005400, loss: 0.191974, avg_loss: 1.413542
|
| 732 |
+
002915/005400, loss: 0.160231, avg_loss: 1.411524
|
| 733 |
+
002920/005400, loss: 0.179065, avg_loss: 1.409382
|
| 734 |
+
002925/005400, loss: 0.261529, avg_loss: 1.407299
|
| 735 |
+
002930/005400, loss: 0.196875, avg_loss: 1.405278
|
| 736 |
+
002935/005400, loss: 0.172792, avg_loss: 1.403193
|
| 737 |
+
002940/005400, loss: 0.132129, avg_loss: 1.401091
|
| 738 |
+
002945/005400, loss: 0.143233, avg_loss: 1.398991
|
| 739 |
+
002950/005400, loss: 0.098005, avg_loss: 1.396972
|
| 740 |
+
002955/005400, loss: 0.216378, avg_loss: 1.394936
|
| 741 |
+
002960/005400, loss: 0.168641, avg_loss: 1.392847
|
| 742 |
+
002965/005400, loss: 0.200968, avg_loss: 1.390786
|
| 743 |
+
002970/005400, loss: 0.125896, avg_loss: 1.388788
|
| 744 |
+
002975/005400, loss: 0.244486, avg_loss: 1.386788
|
| 745 |
+
002980/005400, loss: 0.157024, avg_loss: 1.384753
|
| 746 |
+
002985/005400, loss: 0.131733, avg_loss: 1.382739
|
| 747 |
+
002990/005400, loss: 0.180723, avg_loss: 1.380701
|
| 748 |
+
002995/005400, loss: 0.213533, avg_loss: 1.378717
|
| 749 |
+
003000/005400, loss: 0.149431, avg_loss: 1.376713
|
| 750 |
+
003005/005400, loss: 0.145573, avg_loss: 1.374738
|
| 751 |
+
003010/005400, loss: 0.142425, avg_loss: 1.372738
|
| 752 |
+
003015/005400, loss: 0.273710, avg_loss: 1.370737
|
| 753 |
+
003020/005400, loss: 0.164532, avg_loss: 1.368793
|
| 754 |
+
003025/005400, loss: 0.354658, avg_loss: 1.366944
|
| 755 |
+
003030/005400, loss: 0.162812, avg_loss: 1.365036
|
| 756 |
+
003035/005400, loss: 0.225085, avg_loss: 1.363150
|
| 757 |
+
003040/005400, loss: 0.237793, avg_loss: 1.361249
|
| 758 |
+
003045/005400, loss: 0.175477, avg_loss: 1.359304
|
| 759 |
+
003050/005400, loss: 0.220884, avg_loss: 1.357379
|
| 760 |
+
003055/005400, loss: 0.116397, avg_loss: 1.355440
|
| 761 |
+
003060/005400, loss: 0.180262, avg_loss: 1.353549
|
| 762 |
+
***** Running dev evaluation *****
|
| 763 |
+
Num examples = 1500
|
| 764 |
+
Instantaneous batch size per device = 32
|
| 765 |
+
epoch 16, step 3060/5400: {'pearson': 0.8475945534372652, 'spearmanr': 0.8462737598699491}
|
| 766 |
+
003065/005400, loss: 0.208348, avg_loss: 1.351671
|
| 767 |
+
003070/005400, loss: 0.162787, avg_loss: 1.349760
|
| 768 |
+
003075/005400, loss: 0.204459, avg_loss: 1.347873
|
| 769 |
+
003080/005400, loss: 0.243172, avg_loss: 1.346001
|
| 770 |
+
003085/005400, loss: 0.105318, avg_loss: 1.344059
|
| 771 |
+
003090/005400, loss: 0.143131, avg_loss: 1.342206
|
| 772 |
+
003095/005400, loss: 0.170449, avg_loss: 1.340305
|
| 773 |
+
003100/005400, loss: 0.208828, avg_loss: 1.338421
|
| 774 |
+
003105/005400, loss: 0.186506, avg_loss: 1.336552
|
| 775 |
+
003110/005400, loss: 0.138573, avg_loss: 1.334692
|
| 776 |
+
003115/005400, loss: 0.199446, avg_loss: 1.332886
|
| 777 |
+
003120/005400, loss: 0.178179, avg_loss: 1.331061
|
| 778 |
+
003125/005400, loss: 0.158329, avg_loss: 1.329155
|
| 779 |
+
003130/005400, loss: 0.132958, avg_loss: 1.327291
|
| 780 |
+
003135/005400, loss: 0.117738, avg_loss: 1.325437
|
| 781 |
+
003140/005400, loss: 0.187024, avg_loss: 1.323590
|
| 782 |
+
003145/005400, loss: 0.285563, avg_loss: 1.321791
|
| 783 |
+
003150/005400, loss: 0.126655, avg_loss: 1.320009
|
| 784 |
+
003155/005400, loss: 0.246144, avg_loss: 1.318180
|
| 785 |
+
003160/005400, loss: 0.222086, avg_loss: 1.316403
|
| 786 |
+
003165/005400, loss: 0.088263, avg_loss: 1.314602
|
| 787 |
+
003170/005400, loss: 0.159250, avg_loss: 1.312750
|
| 788 |
+
003175/005400, loss: 0.232737, avg_loss: 1.311048
|
| 789 |
+
003180/005400, loss: 0.150258, avg_loss: 1.309249
|
| 790 |
+
003185/005400, loss: 0.149525, avg_loss: 1.307465
|
| 791 |
+
003190/005400, loss: 0.175701, avg_loss: 1.305661
|
| 792 |
+
003195/005400, loss: 0.224868, avg_loss: 1.303942
|
| 793 |
+
003200/005400, loss: 0.151383, avg_loss: 1.302172
|
| 794 |
+
003205/005400, loss: 0.216179, avg_loss: 1.300442
|
| 795 |
+
003210/005400, loss: 0.197382, avg_loss: 1.298647
|
| 796 |
+
003215/005400, loss: 0.174374, avg_loss: 1.296861
|
| 797 |
+
003220/005400, loss: 0.146824, avg_loss: 1.295138
|
| 798 |
+
003225/005400, loss: 0.172476, avg_loss: 1.293391
|
| 799 |
+
003230/005400, loss: 0.180328, avg_loss: 1.291636
|
| 800 |
+
003235/005400, loss: 0.219937, avg_loss: 1.289898
|
| 801 |
+
003240/005400, loss: 0.152960, avg_loss: 1.288163
|
| 802 |
+
***** Running dev evaluation *****
|
| 803 |
+
Num examples = 1500
|
| 804 |
+
Instantaneous batch size per device = 32
|
| 805 |
+
epoch 17, step 3240/5400: {'pearson': 0.8504202206275068, 'spearmanr': 0.8473922892792047}
|
| 806 |
+
003245/005400, loss: 0.171524, avg_loss: 1.286479
|
| 807 |
+
003250/005400, loss: 0.116338, avg_loss: 1.284752
|
| 808 |
+
003255/005400, loss: 0.086406, avg_loss: 1.283072
|
| 809 |
+
003260/005400, loss: 0.150628, avg_loss: 1.281353
|
| 810 |
+
003265/005400, loss: 0.139414, avg_loss: 1.279617
|
| 811 |
+
003270/005400, loss: 0.193610, avg_loss: 1.277939
|
| 812 |
+
003275/005400, loss: 0.235554, avg_loss: 1.276249
|
| 813 |
+
003280/005400, loss: 0.166258, avg_loss: 1.274573
|
| 814 |
+
003285/005400, loss: 0.263752, avg_loss: 1.272963
|
| 815 |
+
003290/005400, loss: 0.303736, avg_loss: 1.271314
|
| 816 |
+
003295/005400, loss: 0.119213, avg_loss: 1.269609
|
| 817 |
+
003300/005400, loss: 0.132104, avg_loss: 1.267901
|
| 818 |
+
003305/005400, loss: 0.143845, avg_loss: 1.266212
|
| 819 |
+
003310/005400, loss: 0.115098, avg_loss: 1.264532
|
| 820 |
+
003315/005400, loss: 0.288430, avg_loss: 1.262901
|
| 821 |
+
003320/005400, loss: 0.173986, avg_loss: 1.261220
|
| 822 |
+
003325/005400, loss: 0.120085, avg_loss: 1.259552
|
| 823 |
+
003330/005400, loss: 0.248743, avg_loss: 1.257920
|
| 824 |
+
003335/005400, loss: 0.139627, avg_loss: 1.256220
|
| 825 |
+
003340/005400, loss: 0.147467, avg_loss: 1.254561
|
| 826 |
+
003345/005400, loss: 0.142301, avg_loss: 1.252920
|
| 827 |
+
003350/005400, loss: 0.156088, avg_loss: 1.251271
|
| 828 |
+
003355/005400, loss: 0.151669, avg_loss: 1.249613
|
| 829 |
+
003360/005400, loss: 0.214872, avg_loss: 1.248012
|
| 830 |
+
003365/005400, loss: 0.198525, avg_loss: 1.246435
|
| 831 |
+
003370/005400, loss: 0.088710, avg_loss: 1.244759
|
| 832 |
+
003375/005400, loss: 0.120682, avg_loss: 1.243157
|
| 833 |
+
003380/005400, loss: 0.180583, avg_loss: 1.241588
|
| 834 |
+
003385/005400, loss: 0.228067, avg_loss: 1.240034
|
| 835 |
+
003390/005400, loss: 0.126767, avg_loss: 1.238442
|
| 836 |
+
003395/005400, loss: 0.125910, avg_loss: 1.236902
|
| 837 |
+
003400/005400, loss: 0.139716, avg_loss: 1.235308
|
| 838 |
+
003405/005400, loss: 0.080612, avg_loss: 1.233692
|
| 839 |
+
003410/005400, loss: 0.212925, avg_loss: 1.232123
|
| 840 |
+
003415/005400, loss: 0.131897, avg_loss: 1.230545
|
| 841 |
+
003420/005400, loss: 0.205202, avg_loss: 1.228983
|
| 842 |
+
***** Running dev evaluation *****
|
| 843 |
+
Num examples = 1500
|
| 844 |
+
Instantaneous batch size per device = 32
|
| 845 |
+
epoch 18, step 3420/5400: {'pearson': 0.8498450703665391, 'spearmanr': 0.8479951774929629}
|
| 846 |
+
003425/005400, loss: 0.122507, avg_loss: 1.227378
|
| 847 |
+
003430/005400, loss: 0.250203, avg_loss: 1.225830
|
| 848 |
+
003435/005400, loss: 0.173522, avg_loss: 1.224317
|
| 849 |
+
003440/005400, loss: 0.087732, avg_loss: 1.222770
|
| 850 |
+
003445/005400, loss: 0.154733, avg_loss: 1.221229
|
| 851 |
+
003450/005400, loss: 0.217941, avg_loss: 1.219678
|
| 852 |
+
003455/005400, loss: 0.137303, avg_loss: 1.218170
|
| 853 |
+
003460/005400, loss: 0.112234, avg_loss: 1.216591
|
| 854 |
+
003465/005400, loss: 0.150905, avg_loss: 1.215047
|
| 855 |
+
003470/005400, loss: 0.158825, avg_loss: 1.213517
|
| 856 |
+
003475/005400, loss: 0.173023, avg_loss: 1.212032
|
| 857 |
+
003480/005400, loss: 0.178021, avg_loss: 1.210536
|
| 858 |
+
003485/005400, loss: 0.247019, avg_loss: 1.209080
|
| 859 |
+
003490/005400, loss: 0.072551, avg_loss: 1.207569
|
| 860 |
+
003495/005400, loss: 0.162839, avg_loss: 1.206022
|
| 861 |
+
003500/005400, loss: 0.189042, avg_loss: 1.204516
|
| 862 |
+
003505/005400, loss: 0.173782, avg_loss: 1.203007
|
| 863 |
+
003510/005400, loss: 0.138777, avg_loss: 1.201515
|
| 864 |
+
003515/005400, loss: 0.177656, avg_loss: 1.200013
|
| 865 |
+
003520/005400, loss: 0.103750, avg_loss: 1.198508
|
| 866 |
+
003525/005400, loss: 0.169574, avg_loss: 1.197020
|
| 867 |
+
003530/005400, loss: 0.119396, avg_loss: 1.195545
|
| 868 |
+
003535/005400, loss: 0.264826, avg_loss: 1.194100
|
| 869 |
+
003540/005400, loss: 0.098011, avg_loss: 1.192637
|
| 870 |
+
003545/005400, loss: 0.088810, avg_loss: 1.191125
|
| 871 |
+
003550/005400, loss: 0.107876, avg_loss: 1.189654
|
| 872 |
+
003555/005400, loss: 0.157520, avg_loss: 1.188192
|
| 873 |
+
003560/005400, loss: 0.176217, avg_loss: 1.186812
|
| 874 |
+
003565/005400, loss: 0.111337, avg_loss: 1.185342
|
| 875 |
+
003570/005400, loss: 0.166201, avg_loss: 1.183889
|
| 876 |
+
003575/005400, loss: 0.171814, avg_loss: 1.182409
|
| 877 |
+
003580/005400, loss: 0.112979, avg_loss: 1.181004
|
| 878 |
+
003585/005400, loss: 0.119157, avg_loss: 1.179598
|
| 879 |
+
003590/005400, loss: 0.114437, avg_loss: 1.178189
|
| 880 |
+
003595/005400, loss: 0.155447, avg_loss: 1.176771
|
| 881 |
+
003600/005400, loss: 0.157078, avg_loss: 1.175317
|
| 882 |
+
***** Running dev evaluation *****
|
| 883 |
+
Num examples = 1500
|
| 884 |
+
Instantaneous batch size per device = 32
|
| 885 |
+
epoch 19, step 3600/5400: {'pearson': 0.8482436057295935, 'spearmanr': 0.8472426908693901}
|
| 886 |
+
003605/005400, loss: 0.154441, avg_loss: 1.173877
|
| 887 |
+
003610/005400, loss: 0.100947, avg_loss: 1.172478
|
| 888 |
+
003615/005400, loss: 0.125365, avg_loss: 1.171029
|
| 889 |
+
003620/005400, loss: 0.106434, avg_loss: 1.169605
|
| 890 |
+
003625/005400, loss: 0.130245, avg_loss: 1.168211
|
| 891 |
+
003630/005400, loss: 0.134600, avg_loss: 1.166787
|
| 892 |
+
003635/005400, loss: 0.266648, avg_loss: 1.165400
|
| 893 |
+
003640/005400, loss: 0.144939, avg_loss: 1.164021
|
| 894 |
+
003645/005400, loss: 0.106222, avg_loss: 1.162577
|
| 895 |
+
003650/005400, loss: 0.117357, avg_loss: 1.161193
|
| 896 |
+
003655/005400, loss: 0.202359, avg_loss: 1.159805
|
| 897 |
+
003660/005400, loss: 0.166776, avg_loss: 1.158439
|
| 898 |
+
003665/005400, loss: 0.107025, avg_loss: 1.157045
|
| 899 |
+
003670/005400, loss: 0.143284, avg_loss: 1.155661
|
| 900 |
+
003675/005400, loss: 0.198224, avg_loss: 1.154297
|
| 901 |
+
003680/005400, loss: 0.280506, avg_loss: 1.152964
|
| 902 |
+
003685/005400, loss: 0.130698, avg_loss: 1.151564
|
| 903 |
+
003690/005400, loss: 0.129304, avg_loss: 1.150198
|
| 904 |
+
003695/005400, loss: 0.137243, avg_loss: 1.148803
|
| 905 |
+
003700/005400, loss: 0.097097, avg_loss: 1.147449
|
| 906 |
+
003705/005400, loss: 0.144787, avg_loss: 1.146119
|
| 907 |
+
003710/005400, loss: 0.127824, avg_loss: 1.144796
|
| 908 |
+
003715/005400, loss: 0.176846, avg_loss: 1.143457
|
| 909 |
+
003720/005400, loss: 0.100565, avg_loss: 1.142128
|
| 910 |
+
003725/005400, loss: 0.080043, avg_loss: 1.140760
|
| 911 |
+
003730/005400, loss: 0.125706, avg_loss: 1.139474
|
| 912 |
+
003735/005400, loss: 0.117341, avg_loss: 1.138159
|
| 913 |
+
003740/005400, loss: 0.158067, avg_loss: 1.136843
|
| 914 |
+
003745/005400, loss: 0.151995, avg_loss: 1.135553
|
| 915 |
+
003750/005400, loss: 0.277281, avg_loss: 1.134297
|
| 916 |
+
003755/005400, loss: 0.133230, avg_loss: 1.132962
|
| 917 |
+
003760/005400, loss: 0.186799, avg_loss: 1.131718
|
| 918 |
+
003765/005400, loss: 0.205163, avg_loss: 1.130425
|
| 919 |
+
003770/005400, loss: 0.157280, avg_loss: 1.129118
|
| 920 |
+
003775/005400, loss: 0.250720, avg_loss: 1.127838
|
| 921 |
+
003780/005400, loss: 0.138770, avg_loss: 1.126563
|
| 922 |
+
***** Running dev evaluation *****
|
| 923 |
+
Num examples = 1500
|
| 924 |
+
Instantaneous batch size per device = 32
|
| 925 |
+
epoch 20, step 3780/5400: {'pearson': 0.8516633883376111, 'spearmanr': 0.848796837026541}
|
| 926 |
+
003785/005400, loss: 0.280053, avg_loss: 1.125306
|
| 927 |
+
003790/005400, loss: 0.119360, avg_loss: 1.124036
|
| 928 |
+
003795/005400, loss: 0.150453, avg_loss: 1.122750
|
| 929 |
+
003800/005400, loss: 0.150021, avg_loss: 1.121459
|
| 930 |
+
003805/005400, loss: 0.077378, avg_loss: 1.120170
|
| 931 |
+
003810/005400, loss: 0.148403, avg_loss: 1.118906
|
| 932 |
+
003815/005400, loss: 0.178699, avg_loss: 1.117645
|
| 933 |
+
003820/005400, loss: 0.149582, avg_loss: 1.116330
|
| 934 |
+
003825/005400, loss: 0.128546, avg_loss: 1.115013
|
| 935 |
+
003830/005400, loss: 0.268229, avg_loss: 1.113776
|
| 936 |
+
003835/005400, loss: 0.195517, avg_loss: 1.112531
|
| 937 |
+
003840/005400, loss: 0.208493, avg_loss: 1.111268
|
| 938 |
+
003845/005400, loss: 0.193140, avg_loss: 1.110029
|
| 939 |
+
003850/005400, loss: 0.088294, avg_loss: 1.108804
|
| 940 |
+
003855/005400, loss: 0.149382, avg_loss: 1.107547
|
| 941 |
+
003860/005400, loss: 0.198664, avg_loss: 1.106283
|
| 942 |
+
003865/005400, loss: 0.126898, avg_loss: 1.104976
|
| 943 |
+
003870/005400, loss: 0.129632, avg_loss: 1.103741
|
| 944 |
+
003875/005400, loss: 0.123535, avg_loss: 1.102518
|
| 945 |
+
003880/005400, loss: 0.165960, avg_loss: 1.101254
|
| 946 |
+
003885/005400, loss: 0.138942, avg_loss: 1.100020
|
| 947 |
+
003890/005400, loss: 0.128230, avg_loss: 1.098769
|
| 948 |
+
003895/005400, loss: 0.104971, avg_loss: 1.097568
|
| 949 |
+
003900/005400, loss: 0.085618, avg_loss: 1.096347
|
| 950 |
+
003905/005400, loss: 0.126211, avg_loss: 1.095101
|
| 951 |
+
003910/005400, loss: 0.172208, avg_loss: 1.093876
|
| 952 |
+
003915/005400, loss: 0.134293, avg_loss: 1.092629
|
| 953 |
+
003920/005400, loss: 0.130413, avg_loss: 1.091402
|
| 954 |
+
003925/005400, loss: 0.126139, avg_loss: 1.090250
|
| 955 |
+
003930/005400, loss: 0.133957, avg_loss: 1.089023
|
| 956 |
+
003935/005400, loss: 0.235973, avg_loss: 1.087812
|
| 957 |
+
003940/005400, loss: 0.145638, avg_loss: 1.086616
|
| 958 |
+
003945/005400, loss: 0.101992, avg_loss: 1.085413
|
| 959 |
+
003950/005400, loss: 0.126402, avg_loss: 1.084220
|
| 960 |
+
003955/005400, loss: 0.117492, avg_loss: 1.083047
|
| 961 |
+
003960/005400, loss: 0.130239, avg_loss: 1.081839
|
| 962 |
+
***** Running dev evaluation *****
|
| 963 |
+
Num examples = 1500
|
| 964 |
+
Instantaneous batch size per device = 32
|
| 965 |
+
epoch 21, step 3960/5400: {'pearson': 0.847780716202824, 'spearmanr': 0.8469865580881132}
|
| 966 |
+
003965/005400, loss: 0.064479, avg_loss: 1.080611
|
| 967 |
+
003970/005400, loss: 0.171006, avg_loss: 1.079406
|
| 968 |
+
003975/005400, loss: 0.085861, avg_loss: 1.078165
|
| 969 |
+
003980/005400, loss: 0.095522, avg_loss: 1.076947
|
| 970 |
+
003985/005400, loss: 0.130852, avg_loss: 1.075774
|
| 971 |
+
003990/005400, loss: 0.134866, avg_loss: 1.074596
|
| 972 |
+
003995/005400, loss: 0.074542, avg_loss: 1.073402
|
| 973 |
+
004000/005400, loss: 0.116856, avg_loss: 1.072228
|
| 974 |
+
004005/005400, loss: 0.105077, avg_loss: 1.071026
|
| 975 |
+
004010/005400, loss: 0.125664, avg_loss: 1.069845
|
| 976 |
+
004015/005400, loss: 0.103024, avg_loss: 1.068720
|
| 977 |
+
004020/005400, loss: 0.128571, avg_loss: 1.067519
|
| 978 |
+
004025/005400, loss: 0.112454, avg_loss: 1.066343
|
| 979 |
+
004030/005400, loss: 0.150399, avg_loss: 1.065202
|
| 980 |
+
004035/005400, loss: 0.073474, avg_loss: 1.064020
|
| 981 |
+
004040/005400, loss: 0.209692, avg_loss: 1.062895
|
| 982 |
+
004045/005400, loss: 0.126410, avg_loss: 1.061723
|
| 983 |
+
004050/005400, loss: 0.168801, avg_loss: 1.060549
|
| 984 |
+
004055/005400, loss: 0.159003, avg_loss: 1.059423
|
| 985 |
+
004060/005400, loss: 0.153256, avg_loss: 1.058304
|
| 986 |
+
004065/005400, loss: 0.139507, avg_loss: 1.057153
|
| 987 |
+
004070/005400, loss: 0.196264, avg_loss: 1.056009
|
| 988 |
+
004075/005400, loss: 0.153815, avg_loss: 1.054860
|
| 989 |
+
004080/005400, loss: 0.182606, avg_loss: 1.053734
|
| 990 |
+
004085/005400, loss: 0.093651, avg_loss: 1.052585
|
| 991 |
+
004090/005400, loss: 0.138306, avg_loss: 1.051489
|
| 992 |
+
004095/005400, loss: 0.125193, avg_loss: 1.050385
|
| 993 |
+
004100/005400, loss: 0.086516, avg_loss: 1.049260
|
| 994 |
+
004105/005400, loss: 0.120107, avg_loss: 1.048157
|
| 995 |
+
004110/005400, loss: 0.246864, avg_loss: 1.047057
|
| 996 |
+
004115/005400, loss: 0.120596, avg_loss: 1.045902
|
| 997 |
+
004120/005400, loss: 0.121840, avg_loss: 1.044833
|
| 998 |
+
004125/005400, loss: 0.141377, avg_loss: 1.043755
|
| 999 |
+
004130/005400, loss: 0.130236, avg_loss: 1.042661
|
| 1000 |
+
004135/005400, loss: 0.077593, avg_loss: 1.041535
|
| 1001 |
+
004140/005400, loss: 0.096709, avg_loss: 1.040430
|
| 1002 |
+
***** Running dev evaluation *****
|
| 1003 |
+
Num examples = 1500
|
| 1004 |
+
Instantaneous batch size per device = 32
|
| 1005 |
+
epoch 22, step 4140/5400: {'pearson': 0.8516815294443599, 'spearmanr': 0.8481674736867748}
|
| 1006 |
+
004145/005400, loss: 0.107884, avg_loss: 1.039325
|
| 1007 |
+
004150/005400, loss: 0.163758, avg_loss: 1.038213
|
| 1008 |
+
004155/005400, loss: 0.107559, avg_loss: 1.037071
|
| 1009 |
+
004160/005400, loss: 0.221394, avg_loss: 1.035994
|
| 1010 |
+
004165/005400, loss: 0.095282, avg_loss: 1.034890
|
| 1011 |
+
004170/005400, loss: 0.115735, avg_loss: 1.033791
|
| 1012 |
+
004175/005400, loss: 0.120850, avg_loss: 1.032702
|
| 1013 |
+
004180/005400, loss: 0.148173, avg_loss: 1.031638
|
| 1014 |
+
004185/005400, loss: 0.150222, avg_loss: 1.030549
|
| 1015 |
+
004190/005400, loss: 0.167705, avg_loss: 1.029474
|
| 1016 |
+
004195/005400, loss: 0.080327, avg_loss: 1.028393
|
| 1017 |
+
004200/005400, loss: 0.163523, avg_loss: 1.027316
|
| 1018 |
+
004205/005400, loss: 0.091747, avg_loss: 1.026259
|
| 1019 |
+
004210/005400, loss: 0.146581, avg_loss: 1.025186
|
| 1020 |
+
004215/005400, loss: 0.138113, avg_loss: 1.024116
|
| 1021 |
+
004220/005400, loss: 0.126675, avg_loss: 1.023037
|
| 1022 |
+
004225/005400, loss: 0.174576, avg_loss: 1.021997
|
| 1023 |
+
004230/005400, loss: 0.192664, avg_loss: 1.020943
|
| 1024 |
+
004235/005400, loss: 0.075478, avg_loss: 1.019836
|
| 1025 |
+
004240/005400, loss: 0.152823, avg_loss: 1.018803
|
| 1026 |
+
004245/005400, loss: 0.116004, avg_loss: 1.017760
|
| 1027 |
+
004250/005400, loss: 0.151843, avg_loss: 1.016687
|
| 1028 |
+
004255/005400, loss: 0.198972, avg_loss: 1.015644
|
| 1029 |
+
004260/005400, loss: 0.158850, avg_loss: 1.014584
|
| 1030 |
+
004265/005400, loss: 0.140898, avg_loss: 1.013570
|
| 1031 |
+
004270/005400, loss: 0.102441, avg_loss: 1.012552
|
| 1032 |
+
004275/005400, loss: 0.116065, avg_loss: 1.011494
|
| 1033 |
+
004280/005400, loss: 0.093895, avg_loss: 1.010467
|
| 1034 |
+
004285/005400, loss: 0.091400, avg_loss: 1.009428
|
| 1035 |
+
004290/005400, loss: 0.135847, avg_loss: 1.008452
|
| 1036 |
+
004295/005400, loss: 0.131350, avg_loss: 1.007404
|
| 1037 |
+
004300/005400, loss: 0.086305, avg_loss: 1.006382
|
| 1038 |
+
004305/005400, loss: 0.149123, avg_loss: 1.005382
|
| 1039 |
+
004310/005400, loss: 0.077175, avg_loss: 1.004378
|
| 1040 |
+
004315/005400, loss: 0.130131, avg_loss: 1.003319
|
| 1041 |
+
004320/005400, loss: 0.081299, avg_loss: 1.002324
|
| 1042 |
+
***** Running dev evaluation *****
|
| 1043 |
+
Num examples = 1500
|
| 1044 |
+
Instantaneous batch size per device = 32
|
| 1045 |
+
epoch 23, step 4320/5400: {'pearson': 0.8516975249826064, 'spearmanr': 0.848650349577711}
|
| 1046 |
+
004325/005400, loss: 0.162849, avg_loss: 1.001306
|
| 1047 |
+
004330/005400, loss: 0.106408, avg_loss: 1.000275
|
| 1048 |
+
004335/005400, loss: 0.112816, avg_loss: 0.999257
|
| 1049 |
+
004340/005400, loss: 0.117222, avg_loss: 0.998237
|
| 1050 |
+
004345/005400, loss: 0.163939, avg_loss: 0.997231
|
| 1051 |
+
004350/005400, loss: 0.132185, avg_loss: 0.996227
|
| 1052 |
+
004355/005400, loss: 0.120796, avg_loss: 0.995204
|
| 1053 |
+
004360/005400, loss: 0.110429, avg_loss: 0.994174
|
| 1054 |
+
004365/005400, loss: 0.176232, avg_loss: 0.993202
|
| 1055 |
+
004370/005400, loss: 0.108074, avg_loss: 0.992212
|
| 1056 |
+
004375/005400, loss: 0.172169, avg_loss: 0.991219
|
| 1057 |
+
004380/005400, loss: 0.115517, avg_loss: 0.990262
|
| 1058 |
+
004385/005400, loss: 0.121775, avg_loss: 0.989272
|
| 1059 |
+
004390/005400, loss: 0.126275, avg_loss: 0.988271
|
| 1060 |
+
004395/005400, loss: 0.107515, avg_loss: 0.987280
|
| 1061 |
+
004400/005400, loss: 0.086793, avg_loss: 0.986292
|
| 1062 |
+
004405/005400, loss: 0.124593, avg_loss: 0.985330
|
| 1063 |
+
004410/005400, loss: 0.132320, avg_loss: 0.984378
|
| 1064 |
+
004415/005400, loss: 0.167460, avg_loss: 0.983421
|
| 1065 |
+
004420/005400, loss: 0.143033, avg_loss: 0.982441
|
| 1066 |
+
004425/005400, loss: 0.123850, avg_loss: 0.981486
|
| 1067 |
+
004430/005400, loss: 0.095938, avg_loss: 0.980500
|
| 1068 |
+
004435/005400, loss: 0.135725, avg_loss: 0.979532
|
| 1069 |
+
004440/005400, loss: 0.081924, avg_loss: 0.978545
|
| 1070 |
+
004445/005400, loss: 0.122266, avg_loss: 0.977577
|
| 1071 |
+
004450/005400, loss: 0.101690, avg_loss: 0.976582
|
| 1072 |
+
004455/005400, loss: 0.159788, avg_loss: 0.975642
|
| 1073 |
+
004460/005400, loss: 0.173950, avg_loss: 0.974689
|
| 1074 |
+
004465/005400, loss: 0.140697, avg_loss: 0.973738
|
| 1075 |
+
004470/005400, loss: 0.118950, avg_loss: 0.972768
|
| 1076 |
+
004475/005400, loss: 0.090251, avg_loss: 0.971827
|
| 1077 |
+
004480/005400, loss: 0.142421, avg_loss: 0.970929
|
| 1078 |
+
004485/005400, loss: 0.093378, avg_loss: 0.969989
|
| 1079 |
+
004490/005400, loss: 0.132023, avg_loss: 0.969045
|
| 1080 |
+
004495/005400, loss: 0.177814, avg_loss: 0.968117
|
| 1081 |
+
004500/005400, loss: 0.098508, avg_loss: 0.967177
|
| 1082 |
+
***** Running dev evaluation *****
|
| 1083 |
+
Num examples = 1500
|
| 1084 |
+
Instantaneous batch size per device = 32
|
| 1085 |
+
epoch 24, step 4500/5400: {'pearson': 0.8521285961729241, 'spearmanr': 0.8483649029133034}
|
| 1086 |
+
004505/005400, loss: 0.110328, avg_loss: 0.966241
|
| 1087 |
+
004510/005400, loss: 0.102069, avg_loss: 0.965301
|
| 1088 |
+
004515/005400, loss: 0.162850, avg_loss: 0.964387
|
| 1089 |
+
004520/005400, loss: 0.151388, avg_loss: 0.963471
|
| 1090 |
+
004525/005400, loss: 0.087982, avg_loss: 0.962517
|
| 1091 |
+
004530/005400, loss: 0.107709, avg_loss: 0.961596
|
| 1092 |
+
004535/005400, loss: 0.093900, avg_loss: 0.960650
|
| 1093 |
+
004540/005400, loss: 0.087082, avg_loss: 0.959718
|
| 1094 |
+
004545/005400, loss: 0.125253, avg_loss: 0.958788
|
| 1095 |
+
004550/005400, loss: 0.091174, avg_loss: 0.957888
|
| 1096 |
+
004555/005400, loss: 0.064617, avg_loss: 0.956960
|
| 1097 |
+
004560/005400, loss: 0.079881, avg_loss: 0.956014
|
| 1098 |
+
004565/005400, loss: 0.103206, avg_loss: 0.955083
|
| 1099 |
+
004570/005400, loss: 0.143321, avg_loss: 0.954173
|
| 1100 |
+
004575/005400, loss: 0.149977, avg_loss: 0.953271
|
| 1101 |
+
004580/005400, loss: 0.092622, avg_loss: 0.952352
|
| 1102 |
+
004585/005400, loss: 0.126467, avg_loss: 0.951438
|
| 1103 |
+
004590/005400, loss: 0.094483, avg_loss: 0.950528
|
| 1104 |
+
004595/005400, loss: 0.107402, avg_loss: 0.949638
|
| 1105 |
+
004600/005400, loss: 0.082824, avg_loss: 0.948713
|
| 1106 |
+
004605/005400, loss: 0.195657, avg_loss: 0.947822
|
| 1107 |
+
004610/005400, loss: 0.124441, avg_loss: 0.946935
|
| 1108 |
+
004615/005400, loss: 0.121700, avg_loss: 0.946019
|
| 1109 |
+
004620/005400, loss: 0.148236, avg_loss: 0.945115
|
| 1110 |
+
004625/005400, loss: 0.140154, avg_loss: 0.944207
|
| 1111 |
+
004630/005400, loss: 0.165997, avg_loss: 0.943339
|
| 1112 |
+
004635/005400, loss: 0.098995, avg_loss: 0.942421
|
| 1113 |
+
004640/005400, loss: 0.120260, avg_loss: 0.941555
|
| 1114 |
+
004645/005400, loss: 0.125061, avg_loss: 0.940660
|
| 1115 |
+
004650/005400, loss: 0.112413, avg_loss: 0.939759
|
| 1116 |
+
004655/005400, loss: 0.104798, avg_loss: 0.938884
|
| 1117 |
+
004660/005400, loss: 0.105972, avg_loss: 0.937982
|
| 1118 |
+
004665/005400, loss: 0.137016, avg_loss: 0.937103
|
| 1119 |
+
004670/005400, loss: 0.086489, avg_loss: 0.936211
|
| 1120 |
+
004675/005400, loss: 0.130223, avg_loss: 0.935320
|
| 1121 |
+
004680/005400, loss: 0.067240, avg_loss: 0.934422
|
| 1122 |
+
***** Running dev evaluation *****
|
| 1123 |
+
Num examples = 1500
|
| 1124 |
+
Instantaneous batch size per device = 32
|
| 1125 |
+
epoch 25, step 4680/5400: {'pearson': 0.8527350758782244, 'spearmanr': 0.8507346588341773}
|
| 1126 |
+
004685/005400, loss: 0.153279, avg_loss: 0.933544
|
| 1127 |
+
004690/005400, loss: 0.084691, avg_loss: 0.932667
|
| 1128 |
+
004695/005400, loss: 0.127423, avg_loss: 0.931791
|
| 1129 |
+
004700/005400, loss: 0.123418, avg_loss: 0.930916
|
| 1130 |
+
004705/005400, loss: 0.102127, avg_loss: 0.930041
|
| 1131 |
+
004710/005400, loss: 0.094543, avg_loss: 0.929162
|
| 1132 |
+
004715/005400, loss: 0.112821, avg_loss: 0.928298
|
| 1133 |
+
004720/005400, loss: 0.094509, avg_loss: 0.927409
|
| 1134 |
+
004725/005400, loss: 0.148458, avg_loss: 0.926568
|
| 1135 |
+
004730/005400, loss: 0.084626, avg_loss: 0.925698
|
| 1136 |
+
004735/005400, loss: 0.074723, avg_loss: 0.924825
|
| 1137 |
+
004740/005400, loss: 0.101060, avg_loss: 0.923955
|
| 1138 |
+
004745/005400, loss: 0.202314, avg_loss: 0.923109
|
| 1139 |
+
004750/005400, loss: 0.055448, avg_loss: 0.922249
|
| 1140 |
+
004755/005400, loss: 0.179162, avg_loss: 0.921398
|
| 1141 |
+
004760/005400, loss: 0.109892, avg_loss: 0.920548
|
| 1142 |
+
004765/005400, loss: 0.128364, avg_loss: 0.919729
|
| 1143 |
+
004770/005400, loss: 0.064578, avg_loss: 0.918864
|
| 1144 |
+
004775/005400, loss: 0.119190, avg_loss: 0.918013
|
| 1145 |
+
004780/005400, loss: 0.106971, avg_loss: 0.917168
|
| 1146 |
+
004785/005400, loss: 0.103674, avg_loss: 0.916293
|
| 1147 |
+
004790/005400, loss: 0.086906, avg_loss: 0.915437
|
| 1148 |
+
004795/005400, loss: 0.078381, avg_loss: 0.914585
|
| 1149 |
+
004800/005400, loss: 0.075235, avg_loss: 0.913733
|
| 1150 |
+
004805/005400, loss: 0.089770, avg_loss: 0.912904
|
| 1151 |
+
004810/005400, loss: 0.104229, avg_loss: 0.912068
|
| 1152 |
+
004815/005400, loss: 0.100126, avg_loss: 0.911218
|
| 1153 |
+
004820/005400, loss: 0.118553, avg_loss: 0.910386
|
| 1154 |
+
004825/005400, loss: 0.143384, avg_loss: 0.909569
|
| 1155 |
+
004830/005400, loss: 0.102282, avg_loss: 0.908771
|
| 1156 |
+
004835/005400, loss: 0.148549, avg_loss: 0.907961
|
| 1157 |
+
004840/005400, loss: 0.182026, avg_loss: 0.907128
|
| 1158 |
+
004845/005400, loss: 0.190450, avg_loss: 0.906318
|
| 1159 |
+
004850/005400, loss: 0.206217, avg_loss: 0.905508
|
| 1160 |
+
004855/005400, loss: 0.064875, avg_loss: 0.904663
|
| 1161 |
+
004860/005400, loss: 0.099118, avg_loss: 0.903843
|
| 1162 |
+
***** Running dev evaluation *****
|
| 1163 |
+
Num examples = 1500
|
| 1164 |
+
Instantaneous batch size per device = 32
|
| 1165 |
+
epoch 26, step 4860/5400: {'pearson': 0.8501907223027365, 'spearmanr': 0.8489084429386828}
|
| 1166 |
+
004865/005400, loss: 0.155720, avg_loss: 0.903033
|
| 1167 |
+
004870/005400, loss: 0.123857, avg_loss: 0.902210
|
| 1168 |
+
004875/005400, loss: 0.106955, avg_loss: 0.901405
|
| 1169 |
+
004880/005400, loss: 0.141843, avg_loss: 0.900608
|
| 1170 |
+
004885/005400, loss: 0.101737, avg_loss: 0.899809
|
| 1171 |
+
004890/005400, loss: 0.159319, avg_loss: 0.899006
|
| 1172 |
+
004895/005400, loss: 0.095495, avg_loss: 0.898173
|
| 1173 |
+
004900/005400, loss: 0.134695, avg_loss: 0.897373
|
| 1174 |
+
004905/005400, loss: 0.062803, avg_loss: 0.896564
|
| 1175 |
+
004910/005400, loss: 0.132602, avg_loss: 0.895749
|
| 1176 |
+
004915/005400, loss: 0.117661, avg_loss: 0.894927
|
| 1177 |
+
004920/005400, loss: 0.134668, avg_loss: 0.894128
|
| 1178 |
+
004925/005400, loss: 0.089291, avg_loss: 0.893325
|
| 1179 |
+
004930/005400, loss: 0.116079, avg_loss: 0.892549
|
| 1180 |
+
004935/005400, loss: 0.092115, avg_loss: 0.891750
|
| 1181 |
+
004940/005400, loss: 0.132650, avg_loss: 0.890975
|
| 1182 |
+
004945/005400, loss: 0.062088, avg_loss: 0.890193
|
| 1183 |
+
004950/005400, loss: 0.062359, avg_loss: 0.889396
|
| 1184 |
+
004955/005400, loss: 0.086961, avg_loss: 0.888640
|
| 1185 |
+
004960/005400, loss: 0.155230, avg_loss: 0.887873
|
| 1186 |
+
004965/005400, loss: 0.110812, avg_loss: 0.887072
|
| 1187 |
+
004970/005400, loss: 0.068260, avg_loss: 0.886263
|
| 1188 |
+
004975/005400, loss: 0.156115, avg_loss: 0.885500
|
| 1189 |
+
004980/005400, loss: 0.124095, avg_loss: 0.884712
|
| 1190 |
+
004985/005400, loss: 0.126226, avg_loss: 0.883916
|
| 1191 |
+
004990/005400, loss: 0.083915, avg_loss: 0.883123
|
| 1192 |
+
004995/005400, loss: 0.083612, avg_loss: 0.882345
|
| 1193 |
+
005000/005400, loss: 0.129824, avg_loss: 0.881565
|
| 1194 |
+
005005/005400, loss: 0.131232, avg_loss: 0.880788
|
| 1195 |
+
005010/005400, loss: 0.122785, avg_loss: 0.879998
|
| 1196 |
+
005015/005400, loss: 0.103774, avg_loss: 0.879201
|
| 1197 |
+
005020/005400, loss: 0.090597, avg_loss: 0.878425
|
| 1198 |
+
005025/005400, loss: 0.084932, avg_loss: 0.877641
|
| 1199 |
+
005030/005400, loss: 0.107362, avg_loss: 0.876879
|
| 1200 |
+
005035/005400, loss: 0.127814, avg_loss: 0.876147
|
| 1201 |
+
005040/005400, loss: 0.170438, avg_loss: 0.875395
|
| 1202 |
+
***** Running dev evaluation *****
|
| 1203 |
+
Num examples = 1500
|
| 1204 |
+
Instantaneous batch size per device = 32
|
| 1205 |
+
epoch 27, step 5040/5400: {'pearson': 0.8554061134436448, 'spearmanr': 0.8524378109427393}
|
| 1206 |
+
005045/005400, loss: 0.076105, avg_loss: 0.874619
|
| 1207 |
+
005050/005400, loss: 0.103730, avg_loss: 0.873847
|
| 1208 |
+
005055/005400, loss: 0.064459, avg_loss: 0.873081
|
| 1209 |
+
005060/005400, loss: 0.112303, avg_loss: 0.872322
|
| 1210 |
+
005065/005400, loss: 0.071940, avg_loss: 0.871558
|
| 1211 |
+
005070/005400, loss: 0.088598, avg_loss: 0.870804
|
| 1212 |
+
005075/005400, loss: 0.090358, avg_loss: 0.870038
|
| 1213 |
+
005080/005400, loss: 0.068572, avg_loss: 0.869276
|
| 1214 |
+
005085/005400, loss: 0.068552, avg_loss: 0.868498
|
| 1215 |
+
005090/005400, loss: 0.140135, avg_loss: 0.867759
|
| 1216 |
+
005095/005400, loss: 0.076569, avg_loss: 0.866984
|
| 1217 |
+
005100/005400, loss: 0.098298, avg_loss: 0.866226
|
| 1218 |
+
005105/005400, loss: 0.108042, avg_loss: 0.865460
|
| 1219 |
+
005110/005400, loss: 0.072785, avg_loss: 0.864705
|
| 1220 |
+
005115/005400, loss: 0.155046, avg_loss: 0.863964
|
| 1221 |
+
005120/005400, loss: 0.225429, avg_loss: 0.863240
|
| 1222 |
+
005125/005400, loss: 0.089123, avg_loss: 0.862489
|
| 1223 |
+
005130/005400, loss: 0.062908, avg_loss: 0.861729
|
| 1224 |
+
005135/005400, loss: 0.050400, avg_loss: 0.860973
|
| 1225 |
+
005140/005400, loss: 0.051159, avg_loss: 0.860225
|
| 1226 |
+
005145/005400, loss: 0.092017, avg_loss: 0.859488
|
| 1227 |
+
005150/005400, loss: 0.119527, avg_loss: 0.858754
|
| 1228 |
+
005155/005400, loss: 0.089991, avg_loss: 0.858025
|
| 1229 |
+
005160/005400, loss: 0.093003, avg_loss: 0.857288
|
| 1230 |
+
005165/005400, loss: 0.096349, avg_loss: 0.856544
|
| 1231 |
+
005170/005400, loss: 0.070699, avg_loss: 0.855824
|
| 1232 |
+
005175/005400, loss: 0.061141, avg_loss: 0.855089
|
| 1233 |
+
005180/005400, loss: 0.117543, avg_loss: 0.854368
|
| 1234 |
+
005185/005400, loss: 0.070801, avg_loss: 0.853643
|
| 1235 |
+
005190/005400, loss: 0.110298, avg_loss: 0.852924
|
| 1236 |
+
005195/005400, loss: 0.114286, avg_loss: 0.852209
|
| 1237 |
+
005200/005400, loss: 0.129566, avg_loss: 0.851483
|
| 1238 |
+
005205/005400, loss: 0.101796, avg_loss: 0.850774
|
| 1239 |
+
005210/005400, loss: 0.125870, avg_loss: 0.850059
|
| 1240 |
+
005215/005400, loss: 0.049415, avg_loss: 0.849329
|
| 1241 |
+
005220/005400, loss: 0.107189, avg_loss: 0.848612
|
| 1242 |
+
***** Running dev evaluation *****
|
| 1243 |
+
Num examples = 1500
|
| 1244 |
+
Instantaneous batch size per device = 32
|
| 1245 |
+
epoch 28, step 5220/5400: {'pearson': 0.8540466796613693, 'spearmanr': 0.850937622804088}
|
| 1246 |
+
005225/005400, loss: 0.080942, avg_loss: 0.847872
|
| 1247 |
+
005230/005400, loss: 0.102161, avg_loss: 0.847163
|
| 1248 |
+
005235/005400, loss: 0.082529, avg_loss: 0.846440
|
| 1249 |
+
005240/005400, loss: 0.105809, avg_loss: 0.845731
|
| 1250 |
+
005245/005400, loss: 0.117093, avg_loss: 0.845006
|
| 1251 |
+
005250/005400, loss: 0.106933, avg_loss: 0.844305
|
| 1252 |
+
005255/005400, loss: 0.074675, avg_loss: 0.843584
|
| 1253 |
+
005260/005400, loss: 0.102407, avg_loss: 0.842881
|
| 1254 |
+
005265/005400, loss: 0.148522, avg_loss: 0.842175
|
| 1255 |
+
005270/005400, loss: 0.087407, avg_loss: 0.841448
|
| 1256 |
+
005275/005400, loss: 0.098112, avg_loss: 0.840739
|
| 1257 |
+
005280/005400, loss: 0.092396, avg_loss: 0.840018
|
| 1258 |
+
005285/005400, loss: 0.062919, avg_loss: 0.839300
|
| 1259 |
+
005290/005400, loss: 0.132550, avg_loss: 0.838632
|
| 1260 |
+
005295/005400, loss: 0.145091, avg_loss: 0.837944
|
| 1261 |
+
005300/005400, loss: 0.118631, avg_loss: 0.837256
|
| 1262 |
+
005305/005400, loss: 0.056487, avg_loss: 0.836545
|
| 1263 |
+
005310/005400, loss: 0.103461, avg_loss: 0.835856
|
| 1264 |
+
005315/005400, loss: 0.112280, avg_loss: 0.835183
|
| 1265 |
+
005320/005400, loss: 0.037065, avg_loss: 0.834495
|
| 1266 |
+
005325/005400, loss: 0.102541, avg_loss: 0.833812
|
| 1267 |
+
005330/005400, loss: 0.052560, avg_loss: 0.833121
|
| 1268 |
+
005335/005400, loss: 0.118150, avg_loss: 0.832437
|
| 1269 |
+
005340/005400, loss: 0.093599, avg_loss: 0.831748
|
| 1270 |
+
005345/005400, loss: 0.057692, avg_loss: 0.831051
|
| 1271 |
+
005350/005400, loss: 0.083881, avg_loss: 0.830350
|
| 1272 |
+
005355/005400, loss: 0.092801, avg_loss: 0.829662
|
| 1273 |
+
005360/005400, loss: 0.109509, avg_loss: 0.828983
|
| 1274 |
+
005365/005400, loss: 0.126566, avg_loss: 0.828295
|
| 1275 |
+
005370/005400, loss: 0.090441, avg_loss: 0.827624
|
| 1276 |
+
005375/005400, loss: 0.098362, avg_loss: 0.826956
|
| 1277 |
+
005380/005400, loss: 0.086417, avg_loss: 0.826275
|
| 1278 |
+
005385/005400, loss: 0.090084, avg_loss: 0.825580
|
| 1279 |
+
005390/005400, loss: 0.089639, avg_loss: 0.824919
|
| 1280 |
+
005395/005400, loss: 0.112607, avg_loss: 0.824232
|
| 1281 |
+
005400/005400, loss: 0.079185, avg_loss: 0.823571
|
| 1282 |
+
***** Running dev evaluation *****
|
| 1283 |
+
Num examples = 1500
|
| 1284 |
+
Instantaneous batch size per device = 32
|
| 1285 |
+
epoch 29, step 5400/5400: {'pearson': 0.8466150052031443, 'spearmanr': 0.845214209063919}
|
| 1286 |
+
***** Running train evaluation *****
|
| 1287 |
+
Num examples = 5749
|
| 1288 |
+
Instantaneous batch size per device = 32
|
| 1289 |
+
Train Dataset Result: {'pearson': 0.9873720770623174, 'spearmanr': 0.987580466183614}
|
| 1290 |
+
***** Running dev evaluation *****
|
| 1291 |
+
Num examples = 1500
|
| 1292 |
+
Instantaneous batch size per device = 32
|
| 1293 |
+
Dev Dataset Result: {'pearson': 0.8466150052031443, 'spearmanr': 0.845214209063919}
|
| 1294 |
+
Training time 0:04:24
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd2392d68ba15911e69c429109e06a5b13b3cae51981bd4cb24f0a2677f8603a
|
| 3 |
+
size 34298509
|
result.txt
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{'pearson': 0.21495300918671972, 'spearmanr': 0.18778433070729544}
|
| 2 |
+
{'pearson': 0.1693196142024497, 'spearmanr': 0.1520939753827761}
|
| 3 |
+
{'pearson': 0.5585231671416229, 'spearmanr': 0.5968823171253705}
|
| 4 |
+
{'pearson': 0.7538161883822286, 'spearmanr': 0.7339178388810693}
|
| 5 |
+
{'pearson': 0.8037387020413668, 'spearmanr': 0.8107612065966875}
|
| 6 |
+
{'pearson': 0.8115941618503355, 'spearmanr': 0.8282434089896973}
|
| 7 |
+
{'pearson': 0.8244669741341696, 'spearmanr': 0.8347289521968146}
|
| 8 |
+
{'pearson': 0.8356315632016451, 'spearmanr': 0.8428067774651329}
|
| 9 |
+
{'pearson': 0.840875635131036, 'spearmanr': 0.8391187190190564}
|
| 10 |
+
{'pearson': 0.8342714757320445, 'spearmanr': 0.8376185602281018}
|
| 11 |
+
{'pearson': 0.8390370712384592, 'spearmanr': 0.8380421225427299}
|
| 12 |
+
{'pearson': 0.8434982902424131, 'spearmanr': 0.8445651086908786}
|
| 13 |
+
{'pearson': 0.8415414818553372, 'spearmanr': 0.8425621296013649}
|
| 14 |
+
{'pearson': 0.8425599117367437, 'spearmanr': 0.8414850205786223}
|
| 15 |
+
{'pearson': 0.8428262938537643, 'spearmanr': 0.8418967117492774}
|
| 16 |
+
{'pearson': 0.8465462185651544, 'spearmanr': 0.8451574856196069}
|
| 17 |
+
{'pearson': 0.8475945534372652, 'spearmanr': 0.8462737598699491}
|
| 18 |
+
{'pearson': 0.8504202206275068, 'spearmanr': 0.8473922892792047}
|
| 19 |
+
{'pearson': 0.8498450703665391, 'spearmanr': 0.8479951774929629}
|
| 20 |
+
{'pearson': 0.8482436057295935, 'spearmanr': 0.8472426908693901}
|
| 21 |
+
{'pearson': 0.8516633883376111, 'spearmanr': 0.848796837026541}
|
| 22 |
+
{'pearson': 0.847780716202824, 'spearmanr': 0.8469865580881132}
|
| 23 |
+
{'pearson': 0.8516815294443599, 'spearmanr': 0.8481674736867748}
|
| 24 |
+
{'pearson': 0.8516975249826064, 'spearmanr': 0.848650349577711}
|
| 25 |
+
{'pearson': 0.8521285961729241, 'spearmanr': 0.8483649029133034}
|
| 26 |
+
{'pearson': 0.8527350758782244, 'spearmanr': 0.8507346588341773}
|
| 27 |
+
{'pearson': 0.8501907223027365, 'spearmanr': 0.8489084429386828}
|
| 28 |
+
{'pearson': 0.8554061134436448, 'spearmanr': 0.8524378109427393}
|
| 29 |
+
{'pearson': 0.8540466796613693, 'spearmanr': 0.850937622804088}
|
| 30 |
+
{'pearson': 0.8466150052031443, 'spearmanr': 0.845214209063919}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_basic_tokenize": true, "model_max_length": 512, "name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5", "never_split": null, "special_tokens_map_file": "/home.local/jianwei/.cache/huggingface/transformers/b680d52711d2451bbd6c6b1700365d6d731977c1357ae86bd7227f61145d3be2.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "tokenizer_class": "BertTokenizer"}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|