Upload 8 files
Browse files- config.json +31 -0
- log_bs32_lr3e-05_20221124_034416_123214.txt +1784 -0
- pytorch_model.bin +3 -0
- result.txt +30 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- vocab.txt +0 -0
config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"embedding_size": 160,
|
| 9 |
+
"finetuning_task": "cola",
|
| 10 |
+
"gradient_checkpointing": false,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 160,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 560,
|
| 16 |
+
"layer_norm_eps": 1e-12,
|
| 17 |
+
"max_position_embeddings": 512,
|
| 18 |
+
"model_type": "bert",
|
| 19 |
+
"num_attention_heads": 10,
|
| 20 |
+
"num_hidden_layers": 7,
|
| 21 |
+
"output_intermediate": true,
|
| 22 |
+
"output_past": true,
|
| 23 |
+
"pad_token_id": 0,
|
| 24 |
+
"position_embedding_type": "absolute",
|
| 25 |
+
"problem_type": "single_label_classification",
|
| 26 |
+
"torch_dtype": "float32",
|
| 27 |
+
"transformers_version": "4.17.0",
|
| 28 |
+
"type_vocab_size": 2,
|
| 29 |
+
"use_cache": true,
|
| 30 |
+
"vocab_size": 30522
|
| 31 |
+
}
|
log_bs32_lr3e-05_20221124_034416_123214.txt
ADDED
|
@@ -0,0 +1,1784 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
------------> log file ==runs2/cola/OUTPUT_ID/log_bs32_lr3e-05_20221124_034416_123214.txt
|
| 2 |
+
Namespace(aug_train=False, data_dir='/home.local/jianwei/datasets/nlp/glue_data/CoLA', do_eval=False, early_stop=False, early_stop_metric='accuracy', eval_step=120, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=0, lr_scheduler_type=<SchedulerType.CONSTANT_WITH_WARMUP: 'constant_with_warmup'>, max_length=128, max_train_steps=None, model_name_or_path='/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5', num_train_epochs=30, num_warmup_steps=0, output_dir='runs2/cola/OUTPUT_ID', pad_to_max_length=False, per_device_eval_batch_size=32, per_device_train_batch_size=32, print_step=5, save_last=False, seed=None, task_name='cola', train_file=None, use_slow_tokenizer=False, validation_file=None, weight_decay=0.0)
|
| 3 |
+
Distributed environment: NO
|
| 4 |
+
Num processes: 1
|
| 5 |
+
Process index: 0
|
| 6 |
+
Local process index: 0
|
| 7 |
+
Device: cuda
|
| 8 |
+
Mixed precision type: fp16
|
| 9 |
+
|
| 10 |
+
Sample 3305 of the training set: (tensor([ 101, 2058, 1996, 3481, 2045, 18360, 1037, 2312, 5210, 1012,
|
| 11 |
+
102, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 12 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 13 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 14 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 15 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 16 |
+
0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 17 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 18 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 19 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 20 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)).
|
| 21 |
+
Sample 4580 of the training set: (tensor([ 101, 7525, 2097, 3191, 2115, 4311, 1010, 2021, 7157, 2097, 2025, 1012,
|
| 22 |
+
102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 23 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 24 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 25 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 26 |
+
0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 27 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 28 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 29 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 30 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)).
|
| 31 |
+
Sample 1031 of the training set: (tensor([ 101, 2040, 7164, 2505, 2008, 2040, 2758, 1029, 102, 0, 0, 0,
|
| 32 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 33 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 34 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 35 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 36 |
+
0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 37 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 38 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 39 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 40 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)).
|
| 41 |
+
***** Running training *****
|
| 42 |
+
Num examples = 8551
|
| 43 |
+
Num Epochs = 30
|
| 44 |
+
Instantaneous batch size per device = 32
|
| 45 |
+
Total train batch size (w. parallel, distributed & accumulation) = 32
|
| 46 |
+
Gradient Accumulation steps = 1
|
| 47 |
+
Total optimization steps = 8040
|
| 48 |
+
000005/008040, loss: 0.652222, avg_loss: 0.651953
|
| 49 |
+
000010/008040, loss: 0.661713, avg_loss: 0.653055
|
| 50 |
+
000015/008040, loss: 0.677567, avg_loss: 0.654401
|
| 51 |
+
000020/008040, loss: 0.650467, avg_loss: 0.655538
|
| 52 |
+
000025/008040, loss: 0.677872, avg_loss: 0.658810
|
| 53 |
+
000030/008040, loss: 0.661591, avg_loss: 0.658843
|
| 54 |
+
000035/008040, loss: 0.645645, avg_loss: 0.659464
|
| 55 |
+
000040/008040, loss: 0.652420, avg_loss: 0.659061
|
| 56 |
+
000045/008040, loss: 0.687469, avg_loss: 0.657230
|
| 57 |
+
000050/008040, loss: 0.626709, avg_loss: 0.656326
|
| 58 |
+
000055/008040, loss: 0.644302, avg_loss: 0.655348
|
| 59 |
+
000060/008040, loss: 0.648483, avg_loss: 0.655236
|
| 60 |
+
000065/008040, loss: 0.688477, avg_loss: 0.655078
|
| 61 |
+
000070/008040, loss: 0.695236, avg_loss: 0.656294
|
| 62 |
+
000075/008040, loss: 0.659332, avg_loss: 0.655568
|
| 63 |
+
000080/008040, loss: 0.658981, avg_loss: 0.654595
|
| 64 |
+
000085/008040, loss: 0.667786, avg_loss: 0.654187
|
| 65 |
+
000090/008040, loss: 0.658127, avg_loss: 0.653667
|
| 66 |
+
000095/008040, loss: 0.671814, avg_loss: 0.652597
|
| 67 |
+
000100/008040, loss: 0.633545, avg_loss: 0.652570
|
| 68 |
+
000105/008040, loss: 0.641556, avg_loss: 0.651439
|
| 69 |
+
000110/008040, loss: 0.605942, avg_loss: 0.650307
|
| 70 |
+
000115/008040, loss: 0.668716, avg_loss: 0.649737
|
| 71 |
+
000120/008040, loss: 0.565681, avg_loss: 0.648114
|
| 72 |
+
000125/008040, loss: 0.645332, avg_loss: 0.647180
|
| 73 |
+
000130/008040, loss: 0.654228, avg_loss: 0.645764
|
| 74 |
+
000135/008040, loss: 0.647469, avg_loss: 0.644160
|
| 75 |
+
000140/008040, loss: 0.589897, avg_loss: 0.644381
|
| 76 |
+
000145/008040, loss: 0.622665, avg_loss: 0.643773
|
| 77 |
+
000150/008040, loss: 0.582314, avg_loss: 0.642514
|
| 78 |
+
000155/008040, loss: 0.706223, avg_loss: 0.642192
|
| 79 |
+
000160/008040, loss: 0.573463, avg_loss: 0.640978
|
| 80 |
+
000165/008040, loss: 0.596039, avg_loss: 0.641289
|
| 81 |
+
000170/008040, loss: 0.629677, avg_loss: 0.641707
|
| 82 |
+
000175/008040, loss: 0.581818, avg_loss: 0.640926
|
| 83 |
+
000180/008040, loss: 0.573257, avg_loss: 0.640330
|
| 84 |
+
000185/008040, loss: 0.536911, avg_loss: 0.639325
|
| 85 |
+
000190/008040, loss: 0.607376, avg_loss: 0.638997
|
| 86 |
+
000195/008040, loss: 0.565323, avg_loss: 0.638167
|
| 87 |
+
000200/008040, loss: 0.561142, avg_loss: 0.637244
|
| 88 |
+
000205/008040, loss: 0.709908, avg_loss: 0.636707
|
| 89 |
+
000210/008040, loss: 0.655846, avg_loss: 0.635711
|
| 90 |
+
000215/008040, loss: 0.562042, avg_loss: 0.635813
|
| 91 |
+
000220/008040, loss: 0.623016, avg_loss: 0.635337
|
| 92 |
+
000225/008040, loss: 0.665192, avg_loss: 0.636041
|
| 93 |
+
000230/008040, loss: 0.602562, avg_loss: 0.636022
|
| 94 |
+
000235/008040, loss: 0.579742, avg_loss: 0.636067
|
| 95 |
+
000240/008040, loss: 0.643562, avg_loss: 0.635816
|
| 96 |
+
000245/008040, loss: 0.572937, avg_loss: 0.635337
|
| 97 |
+
000250/008040, loss: 0.609734, avg_loss: 0.635397
|
| 98 |
+
000255/008040, loss: 0.540230, avg_loss: 0.634626
|
| 99 |
+
000260/008040, loss: 0.688667, avg_loss: 0.634769
|
| 100 |
+
000265/008040, loss: 0.600624, avg_loss: 0.633670
|
| 101 |
+
***** Running dev evaluation *****
|
| 102 |
+
Num examples = 1042
|
| 103 |
+
Instantaneous batch size per device = 32
|
| 104 |
+
epoch 0, step 268/8040: {'matthews_correlation': 0.0}
|
| 105 |
+
000270/008040, loss: 0.532043, avg_loss: 0.633611
|
| 106 |
+
000275/008040, loss: 0.593201, avg_loss: 0.633069
|
| 107 |
+
000280/008040, loss: 0.706932, avg_loss: 0.633117
|
| 108 |
+
000285/008040, loss: 0.684525, avg_loss: 0.632790
|
| 109 |
+
000290/008040, loss: 0.642426, avg_loss: 0.632020
|
| 110 |
+
000295/008040, loss: 0.617699, avg_loss: 0.632309
|
| 111 |
+
000300/008040, loss: 0.693405, avg_loss: 0.632864
|
| 112 |
+
000305/008040, loss: 0.721481, avg_loss: 0.632628
|
| 113 |
+
000310/008040, loss: 0.551292, avg_loss: 0.631563
|
| 114 |
+
000315/008040, loss: 0.619102, avg_loss: 0.631161
|
| 115 |
+
000320/008040, loss: 0.573761, avg_loss: 0.631119
|
| 116 |
+
000325/008040, loss: 0.621773, avg_loss: 0.631340
|
| 117 |
+
000330/008040, loss: 0.504166, avg_loss: 0.630779
|
| 118 |
+
000335/008040, loss: 0.627258, avg_loss: 0.630988
|
| 119 |
+
000340/008040, loss: 0.620651, avg_loss: 0.630650
|
| 120 |
+
000345/008040, loss: 0.579193, avg_loss: 0.630047
|
| 121 |
+
000350/008040, loss: 0.621361, avg_loss: 0.629630
|
| 122 |
+
000355/008040, loss: 0.638191, avg_loss: 0.629452
|
| 123 |
+
000360/008040, loss: 0.589127, avg_loss: 0.629559
|
| 124 |
+
000365/008040, loss: 0.725616, avg_loss: 0.629725
|
| 125 |
+
000370/008040, loss: 0.677101, avg_loss: 0.629617
|
| 126 |
+
000375/008040, loss: 0.575119, avg_loss: 0.628965
|
| 127 |
+
000380/008040, loss: 0.493011, avg_loss: 0.628739
|
| 128 |
+
000385/008040, loss: 0.653969, avg_loss: 0.629037
|
| 129 |
+
000390/008040, loss: 0.672195, avg_loss: 0.628962
|
| 130 |
+
000395/008040, loss: 0.665962, avg_loss: 0.628864
|
| 131 |
+
000400/008040, loss: 0.515625, avg_loss: 0.627842
|
| 132 |
+
000405/008040, loss: 0.596878, avg_loss: 0.627075
|
| 133 |
+
000410/008040, loss: 0.624397, avg_loss: 0.626773
|
| 134 |
+
000415/008040, loss: 0.568840, avg_loss: 0.626794
|
| 135 |
+
000420/008040, loss: 0.678520, avg_loss: 0.626516
|
| 136 |
+
000425/008040, loss: 0.691116, avg_loss: 0.626167
|
| 137 |
+
000430/008040, loss: 0.629356, avg_loss: 0.625845
|
| 138 |
+
000435/008040, loss: 0.592529, avg_loss: 0.624969
|
| 139 |
+
000440/008040, loss: 0.621323, avg_loss: 0.624545
|
| 140 |
+
000445/008040, loss: 0.713966, avg_loss: 0.624795
|
| 141 |
+
000450/008040, loss: 0.510971, avg_loss: 0.624669
|
| 142 |
+
000455/008040, loss: 0.672722, avg_loss: 0.625094
|
| 143 |
+
000460/008040, loss: 0.569397, avg_loss: 0.624232
|
| 144 |
+
000465/008040, loss: 0.543617, avg_loss: 0.624290
|
| 145 |
+
000470/008040, loss: 0.701065, avg_loss: 0.623677
|
| 146 |
+
000475/008040, loss: 0.697563, avg_loss: 0.623886
|
| 147 |
+
000480/008040, loss: 0.516510, avg_loss: 0.623712
|
| 148 |
+
000485/008040, loss: 0.623947, avg_loss: 0.624315
|
| 149 |
+
000490/008040, loss: 0.571724, avg_loss: 0.623997
|
| 150 |
+
000495/008040, loss: 0.652824, avg_loss: 0.624104
|
| 151 |
+
000500/008040, loss: 0.591492, avg_loss: 0.623351
|
| 152 |
+
000505/008040, loss: 0.562141, avg_loss: 0.623008
|
| 153 |
+
000510/008040, loss: 0.571037, avg_loss: 0.622772
|
| 154 |
+
000515/008040, loss: 0.594940, avg_loss: 0.622726
|
| 155 |
+
000520/008040, loss: 0.595879, avg_loss: 0.622236
|
| 156 |
+
000525/008040, loss: 0.420456, avg_loss: 0.621296
|
| 157 |
+
000530/008040, loss: 0.679199, avg_loss: 0.621323
|
| 158 |
+
000535/008040, loss: 0.630341, avg_loss: 0.621167
|
| 159 |
+
***** Running dev evaluation *****
|
| 160 |
+
Num examples = 1042
|
| 161 |
+
Instantaneous batch size per device = 32
|
| 162 |
+
epoch 1, step 536/8040: {'matthews_correlation': 0.0}
|
| 163 |
+
000540/008040, loss: 0.539192, avg_loss: 0.620370
|
| 164 |
+
000545/008040, loss: 0.624496, avg_loss: 0.620560
|
| 165 |
+
000550/008040, loss: 0.478798, avg_loss: 0.620446
|
| 166 |
+
000555/008040, loss: 0.623642, avg_loss: 0.619969
|
| 167 |
+
000560/008040, loss: 0.588112, avg_loss: 0.620092
|
| 168 |
+
000565/008040, loss: 0.537552, avg_loss: 0.619577
|
| 169 |
+
000570/008040, loss: 0.650581, avg_loss: 0.619168
|
| 170 |
+
000575/008040, loss: 0.587677, avg_loss: 0.619309
|
| 171 |
+
000580/008040, loss: 0.557770, avg_loss: 0.619064
|
| 172 |
+
000585/008040, loss: 0.741516, avg_loss: 0.619524
|
| 173 |
+
000590/008040, loss: 0.600594, avg_loss: 0.619864
|
| 174 |
+
000595/008040, loss: 0.669243, avg_loss: 0.619429
|
| 175 |
+
000600/008040, loss: 0.649200, avg_loss: 0.619334
|
| 176 |
+
000605/008040, loss: 0.707039, avg_loss: 0.619096
|
| 177 |
+
000610/008040, loss: 0.570152, avg_loss: 0.618888
|
| 178 |
+
000615/008040, loss: 0.678078, avg_loss: 0.618893
|
| 179 |
+
000620/008040, loss: 0.590279, avg_loss: 0.618822
|
| 180 |
+
000625/008040, loss: 0.648178, avg_loss: 0.618851
|
| 181 |
+
000630/008040, loss: 0.707413, avg_loss: 0.619119
|
| 182 |
+
000635/008040, loss: 0.613609, avg_loss: 0.619032
|
| 183 |
+
000640/008040, loss: 0.637474, avg_loss: 0.619302
|
| 184 |
+
000645/008040, loss: 0.620033, avg_loss: 0.619061
|
| 185 |
+
000650/008040, loss: 0.674812, avg_loss: 0.619388
|
| 186 |
+
000655/008040, loss: 0.603241, avg_loss: 0.619122
|
| 187 |
+
000660/008040, loss: 0.596001, avg_loss: 0.619185
|
| 188 |
+
000665/008040, loss: 0.546028, avg_loss: 0.618988
|
| 189 |
+
000670/008040, loss: 0.467194, avg_loss: 0.618476
|
| 190 |
+
000675/008040, loss: 0.539108, avg_loss: 0.617887
|
| 191 |
+
000680/008040, loss: 0.630531, avg_loss: 0.617587
|
| 192 |
+
000685/008040, loss: 0.628906, avg_loss: 0.617714
|
| 193 |
+
000690/008040, loss: 0.662460, avg_loss: 0.617408
|
| 194 |
+
000695/008040, loss: 0.556480, avg_loss: 0.617490
|
| 195 |
+
000700/008040, loss: 0.562729, avg_loss: 0.617541
|
| 196 |
+
000705/008040, loss: 0.534187, avg_loss: 0.617240
|
| 197 |
+
000710/008040, loss: 0.705315, avg_loss: 0.617217
|
| 198 |
+
000715/008040, loss: 0.628326, avg_loss: 0.617076
|
| 199 |
+
000720/008040, loss: 0.451012, avg_loss: 0.616425
|
| 200 |
+
000725/008040, loss: 0.679764, avg_loss: 0.616494
|
| 201 |
+
000730/008040, loss: 0.648232, avg_loss: 0.616603
|
| 202 |
+
000735/008040, loss: 0.775688, avg_loss: 0.616731
|
| 203 |
+
000740/008040, loss: 0.483841, avg_loss: 0.616351
|
| 204 |
+
000745/008040, loss: 0.480392, avg_loss: 0.616453
|
| 205 |
+
000750/008040, loss: 0.641754, avg_loss: 0.616477
|
| 206 |
+
000755/008040, loss: 0.655449, avg_loss: 0.616491
|
| 207 |
+
000760/008040, loss: 0.620735, avg_loss: 0.616589
|
| 208 |
+
000765/008040, loss: 0.698372, avg_loss: 0.616510
|
| 209 |
+
000770/008040, loss: 0.541710, avg_loss: 0.616465
|
| 210 |
+
000775/008040, loss: 0.616829, avg_loss: 0.616582
|
| 211 |
+
000780/008040, loss: 0.615852, avg_loss: 0.616320
|
| 212 |
+
000785/008040, loss: 0.645004, avg_loss: 0.616204
|
| 213 |
+
000790/008040, loss: 0.620392, avg_loss: 0.616373
|
| 214 |
+
000795/008040, loss: 0.621986, avg_loss: 0.616532
|
| 215 |
+
000800/008040, loss: 0.698029, avg_loss: 0.616433
|
| 216 |
+
***** Running dev evaluation *****
|
| 217 |
+
Num examples = 1042
|
| 218 |
+
Instantaneous batch size per device = 32
|
| 219 |
+
epoch 2, step 804/8040: {'matthews_correlation': 0.0}
|
| 220 |
+
000805/008040, loss: 0.666588, avg_loss: 0.616280
|
| 221 |
+
000810/008040, loss: 0.513000, avg_loss: 0.616144
|
| 222 |
+
000815/008040, loss: 0.678246, avg_loss: 0.616261
|
| 223 |
+
000820/008040, loss: 0.508728, avg_loss: 0.616257
|
| 224 |
+
000825/008040, loss: 0.561485, avg_loss: 0.615815
|
| 225 |
+
000830/008040, loss: 0.629829, avg_loss: 0.615827
|
| 226 |
+
000835/008040, loss: 0.614128, avg_loss: 0.615832
|
| 227 |
+
000840/008040, loss: 0.594833, avg_loss: 0.615989
|
| 228 |
+
000845/008040, loss: 0.536339, avg_loss: 0.616183
|
| 229 |
+
000850/008040, loss: 0.599487, avg_loss: 0.616021
|
| 230 |
+
000855/008040, loss: 0.673401, avg_loss: 0.616031
|
| 231 |
+
000860/008040, loss: 0.539940, avg_loss: 0.616041
|
| 232 |
+
000865/008040, loss: 0.572212, avg_loss: 0.616227
|
| 233 |
+
000870/008040, loss: 0.609131, avg_loss: 0.615873
|
| 234 |
+
000875/008040, loss: 0.614037, avg_loss: 0.615787
|
| 235 |
+
000880/008040, loss: 0.571541, avg_loss: 0.615553
|
| 236 |
+
000885/008040, loss: 0.544708, avg_loss: 0.615402
|
| 237 |
+
000890/008040, loss: 0.536331, avg_loss: 0.615272
|
| 238 |
+
000895/008040, loss: 0.675529, avg_loss: 0.615234
|
| 239 |
+
000900/008040, loss: 0.627602, avg_loss: 0.615170
|
| 240 |
+
000905/008040, loss: 0.660126, avg_loss: 0.615088
|
| 241 |
+
000910/008040, loss: 0.558937, avg_loss: 0.614803
|
| 242 |
+
000915/008040, loss: 0.549660, avg_loss: 0.614641
|
| 243 |
+
000920/008040, loss: 0.643845, avg_loss: 0.614743
|
| 244 |
+
000925/008040, loss: 0.572258, avg_loss: 0.614782
|
| 245 |
+
000930/008040, loss: 0.634689, avg_loss: 0.614739
|
| 246 |
+
000935/008040, loss: 0.543579, avg_loss: 0.614672
|
| 247 |
+
000940/008040, loss: 0.714607, avg_loss: 0.614516
|
| 248 |
+
000945/008040, loss: 0.596260, avg_loss: 0.614343
|
| 249 |
+
000950/008040, loss: 0.564911, avg_loss: 0.614114
|
| 250 |
+
000955/008040, loss: 0.735931, avg_loss: 0.614013
|
| 251 |
+
000960/008040, loss: 0.742020, avg_loss: 0.614185
|
| 252 |
+
000965/008040, loss: 0.632996, avg_loss: 0.613984
|
| 253 |
+
000970/008040, loss: 0.505898, avg_loss: 0.614050
|
| 254 |
+
000975/008040, loss: 0.632904, avg_loss: 0.614360
|
| 255 |
+
000980/008040, loss: 0.637787, avg_loss: 0.614284
|
| 256 |
+
000985/008040, loss: 0.544106, avg_loss: 0.614300
|
| 257 |
+
000990/008040, loss: 0.627823, avg_loss: 0.614128
|
| 258 |
+
000995/008040, loss: 0.518829, avg_loss: 0.613865
|
| 259 |
+
001000/008040, loss: 0.682663, avg_loss: 0.613936
|
| 260 |
+
001005/008040, loss: 0.702003, avg_loss: 0.614055
|
| 261 |
+
001010/008040, loss: 0.638351, avg_loss: 0.613856
|
| 262 |
+
001015/008040, loss: 0.643425, avg_loss: 0.613794
|
| 263 |
+
001020/008040, loss: 0.511559, avg_loss: 0.613882
|
| 264 |
+
001025/008040, loss: 0.570694, avg_loss: 0.613526
|
| 265 |
+
001030/008040, loss: 0.742371, avg_loss: 0.613564
|
| 266 |
+
001035/008040, loss: 0.525169, avg_loss: 0.613386
|
| 267 |
+
001040/008040, loss: 0.631721, avg_loss: 0.613472
|
| 268 |
+
001045/008040, loss: 0.678436, avg_loss: 0.613299
|
| 269 |
+
001050/008040, loss: 0.496040, avg_loss: 0.613325
|
| 270 |
+
001055/008040, loss: 0.656860, avg_loss: 0.613305
|
| 271 |
+
001060/008040, loss: 0.574501, avg_loss: 0.613204
|
| 272 |
+
001065/008040, loss: 0.547646, avg_loss: 0.613304
|
| 273 |
+
001070/008040, loss: 0.567757, avg_loss: 0.613248
|
| 274 |
+
***** Running dev evaluation *****
|
| 275 |
+
Num examples = 1042
|
| 276 |
+
Instantaneous batch size per device = 32
|
| 277 |
+
epoch 3, step 1072/8040: {'matthews_correlation': 0.0}
|
| 278 |
+
001075/008040, loss: 0.516525, avg_loss: 0.613198
|
| 279 |
+
001080/008040, loss: 0.646126, avg_loss: 0.613297
|
| 280 |
+
001085/008040, loss: 0.559669, avg_loss: 0.613150
|
| 281 |
+
001090/008040, loss: 0.466164, avg_loss: 0.612698
|
| 282 |
+
001095/008040, loss: 0.651367, avg_loss: 0.612560
|
| 283 |
+
001100/008040, loss: 0.654549, avg_loss: 0.612559
|
| 284 |
+
001105/008040, loss: 0.541718, avg_loss: 0.612571
|
| 285 |
+
001110/008040, loss: 0.508430, avg_loss: 0.612473
|
| 286 |
+
001115/008040, loss: 0.573200, avg_loss: 0.612111
|
| 287 |
+
001120/008040, loss: 0.557476, avg_loss: 0.611973
|
| 288 |
+
001125/008040, loss: 0.652611, avg_loss: 0.611617
|
| 289 |
+
001130/008040, loss: 0.526085, avg_loss: 0.611471
|
| 290 |
+
001135/008040, loss: 0.444359, avg_loss: 0.610979
|
| 291 |
+
001140/008040, loss: 0.628677, avg_loss: 0.610810
|
| 292 |
+
001145/008040, loss: 0.503414, avg_loss: 0.610575
|
| 293 |
+
001150/008040, loss: 0.473530, avg_loss: 0.610274
|
| 294 |
+
001155/008040, loss: 0.545444, avg_loss: 0.609830
|
| 295 |
+
001160/008040, loss: 0.505241, avg_loss: 0.609483
|
| 296 |
+
001165/008040, loss: 0.495068, avg_loss: 0.609279
|
| 297 |
+
001170/008040, loss: 0.588757, avg_loss: 0.609380
|
| 298 |
+
001175/008040, loss: 0.567360, avg_loss: 0.609198
|
| 299 |
+
001180/008040, loss: 0.648659, avg_loss: 0.609238
|
| 300 |
+
001185/008040, loss: 0.637810, avg_loss: 0.609066
|
| 301 |
+
001190/008040, loss: 0.824970, avg_loss: 0.608990
|
| 302 |
+
001195/008040, loss: 0.519943, avg_loss: 0.608662
|
| 303 |
+
001200/008040, loss: 0.601860, avg_loss: 0.608558
|
| 304 |
+
001205/008040, loss: 0.583908, avg_loss: 0.608157
|
| 305 |
+
001210/008040, loss: 0.725353, avg_loss: 0.608082
|
| 306 |
+
001215/008040, loss: 0.490597, avg_loss: 0.607950
|
| 307 |
+
001220/008040, loss: 0.425632, avg_loss: 0.607686
|
| 308 |
+
001225/008040, loss: 0.596016, avg_loss: 0.607460
|
| 309 |
+
001230/008040, loss: 0.577240, avg_loss: 0.607340
|
| 310 |
+
001235/008040, loss: 0.586720, avg_loss: 0.607166
|
| 311 |
+
001240/008040, loss: 0.395218, avg_loss: 0.606811
|
| 312 |
+
001245/008040, loss: 0.633438, avg_loss: 0.606556
|
| 313 |
+
001250/008040, loss: 0.680958, avg_loss: 0.606540
|
| 314 |
+
001255/008040, loss: 0.628593, avg_loss: 0.606489
|
| 315 |
+
001260/008040, loss: 0.440163, avg_loss: 0.606267
|
| 316 |
+
001265/008040, loss: 0.556847, avg_loss: 0.606262
|
| 317 |
+
001270/008040, loss: 0.504066, avg_loss: 0.605940
|
| 318 |
+
001275/008040, loss: 0.572201, avg_loss: 0.605561
|
| 319 |
+
001280/008040, loss: 0.718948, avg_loss: 0.605577
|
| 320 |
+
001285/008040, loss: 0.485722, avg_loss: 0.605334
|
| 321 |
+
001290/008040, loss: 0.612705, avg_loss: 0.605227
|
| 322 |
+
001295/008040, loss: 0.666271, avg_loss: 0.604907
|
| 323 |
+
001300/008040, loss: 0.501457, avg_loss: 0.604697
|
| 324 |
+
001305/008040, loss: 0.642426, avg_loss: 0.604318
|
| 325 |
+
001310/008040, loss: 0.627075, avg_loss: 0.604377
|
| 326 |
+
001315/008040, loss: 0.406536, avg_loss: 0.603942
|
| 327 |
+
001320/008040, loss: 0.498077, avg_loss: 0.603909
|
| 328 |
+
001325/008040, loss: 0.622322, avg_loss: 0.604236
|
| 329 |
+
001330/008040, loss: 0.563160, avg_loss: 0.604285
|
| 330 |
+
001335/008040, loss: 0.545391, avg_loss: 0.604257
|
| 331 |
+
001340/008040, loss: 0.478202, avg_loss: 0.604044
|
| 332 |
+
***** Running dev evaluation *****
|
| 333 |
+
Num examples = 1042
|
| 334 |
+
Instantaneous batch size per device = 32
|
| 335 |
+
epoch 4, step 1340/8040: {'matthews_correlation': 0.15816318746785782}
|
| 336 |
+
001345/008040, loss: 0.497211, avg_loss: 0.603771
|
| 337 |
+
001350/008040, loss: 0.504078, avg_loss: 0.603257
|
| 338 |
+
001355/008040, loss: 0.563469, avg_loss: 0.603027
|
| 339 |
+
001360/008040, loss: 0.436638, avg_loss: 0.602608
|
| 340 |
+
001365/008040, loss: 0.503475, avg_loss: 0.602021
|
| 341 |
+
001370/008040, loss: 0.512344, avg_loss: 0.601490
|
| 342 |
+
001375/008040, loss: 0.479725, avg_loss: 0.601133
|
| 343 |
+
001380/008040, loss: 0.547909, avg_loss: 0.600951
|
| 344 |
+
001385/008040, loss: 0.400013, avg_loss: 0.600657
|
| 345 |
+
001390/008040, loss: 0.542755, avg_loss: 0.600452
|
| 346 |
+
001395/008040, loss: 0.376102, avg_loss: 0.600132
|
| 347 |
+
001400/008040, loss: 0.548512, avg_loss: 0.599679
|
| 348 |
+
001405/008040, loss: 0.676727, avg_loss: 0.599476
|
| 349 |
+
001410/008040, loss: 0.657574, avg_loss: 0.599159
|
| 350 |
+
001415/008040, loss: 0.496979, avg_loss: 0.598694
|
| 351 |
+
001420/008040, loss: 0.572510, avg_loss: 0.598379
|
| 352 |
+
001425/008040, loss: 0.535450, avg_loss: 0.598210
|
| 353 |
+
001430/008040, loss: 0.394039, avg_loss: 0.597776
|
| 354 |
+
001435/008040, loss: 0.513657, avg_loss: 0.597460
|
| 355 |
+
001440/008040, loss: 0.511215, avg_loss: 0.597127
|
| 356 |
+
001445/008040, loss: 0.339886, avg_loss: 0.596767
|
| 357 |
+
001450/008040, loss: 0.510815, avg_loss: 0.596433
|
| 358 |
+
001455/008040, loss: 0.654274, avg_loss: 0.596212
|
| 359 |
+
001460/008040, loss: 0.349232, avg_loss: 0.595762
|
| 360 |
+
001465/008040, loss: 0.493137, avg_loss: 0.595544
|
| 361 |
+
001470/008040, loss: 0.592804, avg_loss: 0.595768
|
| 362 |
+
001475/008040, loss: 0.455566, avg_loss: 0.595485
|
| 363 |
+
001480/008040, loss: 0.570206, avg_loss: 0.595109
|
| 364 |
+
001485/008040, loss: 0.385925, avg_loss: 0.594787
|
| 365 |
+
001490/008040, loss: 0.431305, avg_loss: 0.594303
|
| 366 |
+
001495/008040, loss: 0.571796, avg_loss: 0.594218
|
| 367 |
+
001500/008040, loss: 0.523552, avg_loss: 0.594122
|
| 368 |
+
001505/008040, loss: 0.777546, avg_loss: 0.594023
|
| 369 |
+
001510/008040, loss: 0.629959, avg_loss: 0.593755
|
| 370 |
+
001515/008040, loss: 0.650162, avg_loss: 0.593670
|
| 371 |
+
001520/008040, loss: 0.509098, avg_loss: 0.593517
|
| 372 |
+
001525/008040, loss: 0.462357, avg_loss: 0.593270
|
| 373 |
+
001530/008040, loss: 0.643883, avg_loss: 0.593174
|
| 374 |
+
001535/008040, loss: 0.417114, avg_loss: 0.592827
|
| 375 |
+
001540/008040, loss: 0.470078, avg_loss: 0.592541
|
| 376 |
+
001545/008040, loss: 0.504375, avg_loss: 0.592232
|
| 377 |
+
001550/008040, loss: 0.536144, avg_loss: 0.592052
|
| 378 |
+
001555/008040, loss: 0.408104, avg_loss: 0.591786
|
| 379 |
+
001560/008040, loss: 0.476906, avg_loss: 0.591672
|
| 380 |
+
001565/008040, loss: 0.560863, avg_loss: 0.591418
|
| 381 |
+
001570/008040, loss: 0.519066, avg_loss: 0.591105
|
| 382 |
+
001575/008040, loss: 0.594341, avg_loss: 0.590856
|
| 383 |
+
001580/008040, loss: 0.466764, avg_loss: 0.590662
|
| 384 |
+
001585/008040, loss: 0.450207, avg_loss: 0.590457
|
| 385 |
+
001590/008040, loss: 0.435432, avg_loss: 0.590293
|
| 386 |
+
001595/008040, loss: 0.556160, avg_loss: 0.589989
|
| 387 |
+
001600/008040, loss: 0.611170, avg_loss: 0.589724
|
| 388 |
+
001605/008040, loss: 0.402641, avg_loss: 0.589453
|
| 389 |
+
***** Running dev evaluation *****
|
| 390 |
+
Num examples = 1042
|
| 391 |
+
Instantaneous batch size per device = 32
|
| 392 |
+
epoch 5, step 1608/8040: {'matthews_correlation': 0.1895854925674006}
|
| 393 |
+
001610/008040, loss: 0.432098, avg_loss: 0.588991
|
| 394 |
+
001615/008040, loss: 0.498215, avg_loss: 0.588663
|
| 395 |
+
001620/008040, loss: 0.663460, avg_loss: 0.588371
|
| 396 |
+
001625/008040, loss: 0.360754, avg_loss: 0.587877
|
| 397 |
+
001630/008040, loss: 0.387835, avg_loss: 0.587359
|
| 398 |
+
001635/008040, loss: 0.324583, avg_loss: 0.586922
|
| 399 |
+
001640/008040, loss: 0.535975, avg_loss: 0.586623
|
| 400 |
+
001645/008040, loss: 0.592346, avg_loss: 0.586544
|
| 401 |
+
001650/008040, loss: 0.525536, avg_loss: 0.586103
|
| 402 |
+
001655/008040, loss: 0.575745, avg_loss: 0.585766
|
| 403 |
+
001660/008040, loss: 0.449825, avg_loss: 0.585359
|
| 404 |
+
001665/008040, loss: 0.420126, avg_loss: 0.585023
|
| 405 |
+
001670/008040, loss: 0.639540, avg_loss: 0.584721
|
| 406 |
+
001675/008040, loss: 0.344273, avg_loss: 0.584341
|
| 407 |
+
001680/008040, loss: 0.540806, avg_loss: 0.583815
|
| 408 |
+
001685/008040, loss: 0.384409, avg_loss: 0.583182
|
| 409 |
+
001690/008040, loss: 0.440865, avg_loss: 0.583039
|
| 410 |
+
001695/008040, loss: 0.555412, avg_loss: 0.582610
|
| 411 |
+
001700/008040, loss: 0.407997, avg_loss: 0.582275
|
| 412 |
+
001705/008040, loss: 0.383024, avg_loss: 0.581847
|
| 413 |
+
001710/008040, loss: 0.537008, avg_loss: 0.581442
|
| 414 |
+
001715/008040, loss: 0.355068, avg_loss: 0.581088
|
| 415 |
+
001720/008040, loss: 0.338402, avg_loss: 0.580900
|
| 416 |
+
001725/008040, loss: 0.566572, avg_loss: 0.580647
|
| 417 |
+
001730/008040, loss: 0.548752, avg_loss: 0.580402
|
| 418 |
+
001735/008040, loss: 0.535681, avg_loss: 0.580117
|
| 419 |
+
001740/008040, loss: 0.614624, avg_loss: 0.579823
|
| 420 |
+
001745/008040, loss: 0.530151, avg_loss: 0.579474
|
| 421 |
+
001750/008040, loss: 0.489605, avg_loss: 0.579157
|
| 422 |
+
001755/008040, loss: 0.584639, avg_loss: 0.578986
|
| 423 |
+
001760/008040, loss: 0.501501, avg_loss: 0.578577
|
| 424 |
+
001765/008040, loss: 0.511179, avg_loss: 0.578278
|
| 425 |
+
001770/008040, loss: 0.400345, avg_loss: 0.577970
|
| 426 |
+
001775/008040, loss: 0.562296, avg_loss: 0.577756
|
| 427 |
+
001780/008040, loss: 0.460430, avg_loss: 0.577570
|
| 428 |
+
001785/008040, loss: 0.420773, avg_loss: 0.577251
|
| 429 |
+
001790/008040, loss: 0.476810, avg_loss: 0.576814
|
| 430 |
+
001795/008040, loss: 0.473215, avg_loss: 0.576538
|
| 431 |
+
001800/008040, loss: 0.324244, avg_loss: 0.576321
|
| 432 |
+
001805/008040, loss: 0.466421, avg_loss: 0.575848
|
| 433 |
+
001810/008040, loss: 0.515072, avg_loss: 0.575525
|
| 434 |
+
001815/008040, loss: 0.348133, avg_loss: 0.575163
|
| 435 |
+
001820/008040, loss: 0.405993, avg_loss: 0.574717
|
| 436 |
+
001825/008040, loss: 0.724440, avg_loss: 0.574358
|
| 437 |
+
001830/008040, loss: 0.404591, avg_loss: 0.573989
|
| 438 |
+
001835/008040, loss: 0.386953, avg_loss: 0.573703
|
| 439 |
+
001840/008040, loss: 0.297220, avg_loss: 0.573313
|
| 440 |
+
001845/008040, loss: 0.549763, avg_loss: 0.573012
|
| 441 |
+
001850/008040, loss: 0.439955, avg_loss: 0.572694
|
| 442 |
+
001855/008040, loss: 0.379715, avg_loss: 0.572410
|
| 443 |
+
001860/008040, loss: 0.301922, avg_loss: 0.571970
|
| 444 |
+
001865/008040, loss: 0.442181, avg_loss: 0.571632
|
| 445 |
+
001870/008040, loss: 0.403500, avg_loss: 0.571553
|
| 446 |
+
001875/008040, loss: 0.479061, avg_loss: 0.571291
|
| 447 |
+
***** Running dev evaluation *****
|
| 448 |
+
Num examples = 1042
|
| 449 |
+
Instantaneous batch size per device = 32
|
| 450 |
+
epoch 6, step 1876/8040: {'matthews_correlation': 0.21307686539085852}
|
| 451 |
+
001880/008040, loss: 0.669218, avg_loss: 0.571140
|
| 452 |
+
001885/008040, loss: 0.542337, avg_loss: 0.570861
|
| 453 |
+
001890/008040, loss: 0.589821, avg_loss: 0.570818
|
| 454 |
+
001895/008040, loss: 0.476381, avg_loss: 0.570683
|
| 455 |
+
001900/008040, loss: 0.404007, avg_loss: 0.570266
|
| 456 |
+
001905/008040, loss: 0.356413, avg_loss: 0.569667
|
| 457 |
+
001910/008040, loss: 0.477184, avg_loss: 0.569371
|
| 458 |
+
001915/008040, loss: 0.490374, avg_loss: 0.568927
|
| 459 |
+
001920/008040, loss: 0.415562, avg_loss: 0.568538
|
| 460 |
+
001925/008040, loss: 0.388258, avg_loss: 0.568085
|
| 461 |
+
001930/008040, loss: 0.456861, avg_loss: 0.567801
|
| 462 |
+
001935/008040, loss: 0.384424, avg_loss: 0.567589
|
| 463 |
+
001940/008040, loss: 0.353615, avg_loss: 0.567248
|
| 464 |
+
001945/008040, loss: 0.391302, avg_loss: 0.566774
|
| 465 |
+
001950/008040, loss: 0.465639, avg_loss: 0.566302
|
| 466 |
+
001955/008040, loss: 0.230816, avg_loss: 0.565751
|
| 467 |
+
001960/008040, loss: 0.518036, avg_loss: 0.565276
|
| 468 |
+
001965/008040, loss: 0.410194, avg_loss: 0.564808
|
| 469 |
+
001970/008040, loss: 0.349751, avg_loss: 0.564646
|
| 470 |
+
001975/008040, loss: 0.334585, avg_loss: 0.564052
|
| 471 |
+
001980/008040, loss: 0.534948, avg_loss: 0.563787
|
| 472 |
+
001985/008040, loss: 0.362720, avg_loss: 0.563456
|
| 473 |
+
001990/008040, loss: 0.448977, avg_loss: 0.563030
|
| 474 |
+
001995/008040, loss: 0.355686, avg_loss: 0.562736
|
| 475 |
+
002000/008040, loss: 0.439312, avg_loss: 0.562282
|
| 476 |
+
002005/008040, loss: 0.560658, avg_loss: 0.561938
|
| 477 |
+
002010/008040, loss: 0.257093, avg_loss: 0.561694
|
| 478 |
+
002015/008040, loss: 0.427464, avg_loss: 0.561446
|
| 479 |
+
002020/008040, loss: 0.501780, avg_loss: 0.561143
|
| 480 |
+
002025/008040, loss: 0.395622, avg_loss: 0.560958
|
| 481 |
+
002030/008040, loss: 0.593700, avg_loss: 0.560682
|
| 482 |
+
002035/008040, loss: 0.260624, avg_loss: 0.560042
|
| 483 |
+
002040/008040, loss: 0.331761, avg_loss: 0.559656
|
| 484 |
+
002045/008040, loss: 0.454527, avg_loss: 0.559403
|
| 485 |
+
002050/008040, loss: 0.379538, avg_loss: 0.559265
|
| 486 |
+
002055/008040, loss: 0.376312, avg_loss: 0.558873
|
| 487 |
+
002060/008040, loss: 0.522795, avg_loss: 0.558628
|
| 488 |
+
002065/008040, loss: 0.457785, avg_loss: 0.558271
|
| 489 |
+
002070/008040, loss: 0.561003, avg_loss: 0.557992
|
| 490 |
+
002075/008040, loss: 0.269575, avg_loss: 0.557529
|
| 491 |
+
002080/008040, loss: 0.476014, avg_loss: 0.557187
|
| 492 |
+
002085/008040, loss: 0.425539, avg_loss: 0.556978
|
| 493 |
+
002090/008040, loss: 0.350451, avg_loss: 0.556575
|
| 494 |
+
002095/008040, loss: 0.275333, avg_loss: 0.556114
|
| 495 |
+
002100/008040, loss: 0.463511, avg_loss: 0.556032
|
| 496 |
+
002105/008040, loss: 0.313173, avg_loss: 0.555600
|
| 497 |
+
002110/008040, loss: 0.555047, avg_loss: 0.555265
|
| 498 |
+
002115/008040, loss: 0.554151, avg_loss: 0.555004
|
| 499 |
+
002120/008040, loss: 0.383408, avg_loss: 0.554883
|
| 500 |
+
002125/008040, loss: 0.488256, avg_loss: 0.554548
|
| 501 |
+
002130/008040, loss: 0.357615, avg_loss: 0.554264
|
| 502 |
+
002135/008040, loss: 0.157333, avg_loss: 0.553637
|
| 503 |
+
002140/008040, loss: 0.288863, avg_loss: 0.553479
|
| 504 |
+
***** Running dev evaluation *****
|
| 505 |
+
Num examples = 1042
|
| 506 |
+
Instantaneous batch size per device = 32
|
| 507 |
+
epoch 7, step 2144/8040: {'matthews_correlation': 0.22254373946847703}
|
| 508 |
+
002145/008040, loss: 0.303406, avg_loss: 0.553004
|
| 509 |
+
002150/008040, loss: 0.306277, avg_loss: 0.552618
|
| 510 |
+
002155/008040, loss: 0.338732, avg_loss: 0.552232
|
| 511 |
+
002160/008040, loss: 0.427835, avg_loss: 0.551830
|
| 512 |
+
002165/008040, loss: 0.414758, avg_loss: 0.551564
|
| 513 |
+
002170/008040, loss: 0.576152, avg_loss: 0.551253
|
| 514 |
+
002175/008040, loss: 0.407147, avg_loss: 0.550871
|
| 515 |
+
002180/008040, loss: 0.399303, avg_loss: 0.550418
|
| 516 |
+
002185/008040, loss: 0.417184, avg_loss: 0.550105
|
| 517 |
+
002190/008040, loss: 0.373152, avg_loss: 0.549766
|
| 518 |
+
002195/008040, loss: 0.357236, avg_loss: 0.549473
|
| 519 |
+
002200/008040, loss: 0.488667, avg_loss: 0.549189
|
| 520 |
+
002205/008040, loss: 0.430184, avg_loss: 0.548822
|
| 521 |
+
002210/008040, loss: 0.509947, avg_loss: 0.548544
|
| 522 |
+
002215/008040, loss: 0.248991, avg_loss: 0.548000
|
| 523 |
+
002220/008040, loss: 0.524855, avg_loss: 0.547764
|
| 524 |
+
002225/008040, loss: 0.307470, avg_loss: 0.547397
|
| 525 |
+
002230/008040, loss: 0.317670, avg_loss: 0.546941
|
| 526 |
+
002235/008040, loss: 0.257109, avg_loss: 0.546452
|
| 527 |
+
002240/008040, loss: 0.437452, avg_loss: 0.546180
|
| 528 |
+
002245/008040, loss: 0.416744, avg_loss: 0.545923
|
| 529 |
+
002250/008040, loss: 0.301953, avg_loss: 0.545449
|
| 530 |
+
002255/008040, loss: 0.505546, avg_loss: 0.545019
|
| 531 |
+
002260/008040, loss: 0.438693, avg_loss: 0.544505
|
| 532 |
+
002265/008040, loss: 0.269212, avg_loss: 0.544109
|
| 533 |
+
002270/008040, loss: 0.436381, avg_loss: 0.543815
|
| 534 |
+
002275/008040, loss: 0.455789, avg_loss: 0.543614
|
| 535 |
+
002280/008040, loss: 0.498013, avg_loss: 0.543247
|
| 536 |
+
002285/008040, loss: 0.329160, avg_loss: 0.542899
|
| 537 |
+
002290/008040, loss: 0.281924, avg_loss: 0.542624
|
| 538 |
+
002295/008040, loss: 0.366100, avg_loss: 0.542508
|
| 539 |
+
002300/008040, loss: 0.371017, avg_loss: 0.542094
|
| 540 |
+
002305/008040, loss: 0.395582, avg_loss: 0.541701
|
| 541 |
+
002310/008040, loss: 0.396773, avg_loss: 0.541435
|
| 542 |
+
002315/008040, loss: 0.382794, avg_loss: 0.541095
|
| 543 |
+
002320/008040, loss: 0.306365, avg_loss: 0.540708
|
| 544 |
+
002325/008040, loss: 0.434783, avg_loss: 0.540375
|
| 545 |
+
002330/008040, loss: 0.357806, avg_loss: 0.539949
|
| 546 |
+
002335/008040, loss: 0.380549, avg_loss: 0.539729
|
| 547 |
+
002340/008040, loss: 0.216732, avg_loss: 0.539146
|
| 548 |
+
002345/008040, loss: 0.385883, avg_loss: 0.538827
|
| 549 |
+
002350/008040, loss: 0.594073, avg_loss: 0.538528
|
| 550 |
+
002355/008040, loss: 0.413212, avg_loss: 0.538440
|
| 551 |
+
002360/008040, loss: 0.210932, avg_loss: 0.538051
|
| 552 |
+
002365/008040, loss: 0.336315, avg_loss: 0.537636
|
| 553 |
+
002370/008040, loss: 0.447609, avg_loss: 0.537165
|
| 554 |
+
002375/008040, loss: 0.580924, avg_loss: 0.537083
|
| 555 |
+
002380/008040, loss: 0.482136, avg_loss: 0.536826
|
| 556 |
+
002385/008040, loss: 0.290261, avg_loss: 0.536410
|
| 557 |
+
002390/008040, loss: 0.567177, avg_loss: 0.536199
|
| 558 |
+
002395/008040, loss: 0.543372, avg_loss: 0.535950
|
| 559 |
+
002400/008040, loss: 0.344522, avg_loss: 0.535539
|
| 560 |
+
002405/008040, loss: 0.219617, avg_loss: 0.535235
|
| 561 |
+
002410/008040, loss: 0.449083, avg_loss: 0.534978
|
| 562 |
+
***** Running dev evaluation *****
|
| 563 |
+
Num examples = 1042
|
| 564 |
+
Instantaneous batch size per device = 32
|
| 565 |
+
epoch 8, step 2412/8040: {'matthews_correlation': 0.22951168079779777}
|
| 566 |
+
002415/008040, loss: 0.347907, avg_loss: 0.534702
|
| 567 |
+
002420/008040, loss: 0.380767, avg_loss: 0.534286
|
| 568 |
+
002425/008040, loss: 0.322007, avg_loss: 0.533838
|
| 569 |
+
002430/008040, loss: 0.468786, avg_loss: 0.533497
|
| 570 |
+
002435/008040, loss: 0.309734, avg_loss: 0.533102
|
| 571 |
+
002440/008040, loss: 0.197897, avg_loss: 0.532586
|
| 572 |
+
002445/008040, loss: 0.345694, avg_loss: 0.532278
|
| 573 |
+
002450/008040, loss: 0.559374, avg_loss: 0.531879
|
| 574 |
+
002455/008040, loss: 0.311327, avg_loss: 0.531552
|
| 575 |
+
002460/008040, loss: 0.300930, avg_loss: 0.531075
|
| 576 |
+
002465/008040, loss: 0.339101, avg_loss: 0.530702
|
| 577 |
+
002470/008040, loss: 0.473831, avg_loss: 0.530428
|
| 578 |
+
002475/008040, loss: 0.467193, avg_loss: 0.530181
|
| 579 |
+
002480/008040, loss: 0.500907, avg_loss: 0.529953
|
| 580 |
+
002485/008040, loss: 0.457974, avg_loss: 0.529605
|
| 581 |
+
002490/008040, loss: 0.343074, avg_loss: 0.529418
|
| 582 |
+
002495/008040, loss: 0.444531, avg_loss: 0.529064
|
| 583 |
+
002500/008040, loss: 0.330952, avg_loss: 0.528731
|
| 584 |
+
002505/008040, loss: 0.258092, avg_loss: 0.528329
|
| 585 |
+
002510/008040, loss: 0.373878, avg_loss: 0.527944
|
| 586 |
+
002515/008040, loss: 0.298052, avg_loss: 0.527590
|
| 587 |
+
002520/008040, loss: 0.290030, avg_loss: 0.527212
|
| 588 |
+
002525/008040, loss: 0.235393, avg_loss: 0.526718
|
| 589 |
+
002530/008040, loss: 0.293452, avg_loss: 0.526214
|
| 590 |
+
002535/008040, loss: 0.193506, avg_loss: 0.525848
|
| 591 |
+
002540/008040, loss: 0.344110, avg_loss: 0.525426
|
| 592 |
+
002545/008040, loss: 0.457061, avg_loss: 0.525270
|
| 593 |
+
002550/008040, loss: 0.443786, avg_loss: 0.524819
|
| 594 |
+
002555/008040, loss: 0.245760, avg_loss: 0.524364
|
| 595 |
+
002560/008040, loss: 0.319549, avg_loss: 0.524011
|
| 596 |
+
002565/008040, loss: 0.259768, avg_loss: 0.523602
|
| 597 |
+
002570/008040, loss: 0.575876, avg_loss: 0.523300
|
| 598 |
+
002575/008040, loss: 0.411968, avg_loss: 0.522964
|
| 599 |
+
002580/008040, loss: 0.563390, avg_loss: 0.522671
|
| 600 |
+
002585/008040, loss: 0.227027, avg_loss: 0.522272
|
| 601 |
+
002590/008040, loss: 0.354241, avg_loss: 0.521908
|
| 602 |
+
002595/008040, loss: 0.265463, avg_loss: 0.521373
|
| 603 |
+
002600/008040, loss: 0.427238, avg_loss: 0.521074
|
| 604 |
+
002605/008040, loss: 0.287523, avg_loss: 0.520749
|
| 605 |
+
002610/008040, loss: 0.325408, avg_loss: 0.520449
|
| 606 |
+
002615/008040, loss: 0.423108, avg_loss: 0.520330
|
| 607 |
+
002620/008040, loss: 0.443160, avg_loss: 0.520100
|
| 608 |
+
002625/008040, loss: 0.299728, avg_loss: 0.519799
|
| 609 |
+
002630/008040, loss: 0.535900, avg_loss: 0.519659
|
| 610 |
+
002635/008040, loss: 0.479263, avg_loss: 0.519475
|
| 611 |
+
002640/008040, loss: 0.471956, avg_loss: 0.519183
|
| 612 |
+
002645/008040, loss: 0.267595, avg_loss: 0.518814
|
| 613 |
+
002650/008040, loss: 0.322007, avg_loss: 0.518672
|
| 614 |
+
002655/008040, loss: 0.332003, avg_loss: 0.518388
|
| 615 |
+
002660/008040, loss: 0.439392, avg_loss: 0.518129
|
| 616 |
+
002665/008040, loss: 0.322509, avg_loss: 0.517892
|
| 617 |
+
002670/008040, loss: 0.328728, avg_loss: 0.517541
|
| 618 |
+
002675/008040, loss: 0.224217, avg_loss: 0.517160
|
| 619 |
+
002680/008040, loss: 0.213649, avg_loss: 0.516849
|
| 620 |
+
***** Running dev evaluation *****
|
| 621 |
+
Num examples = 1042
|
| 622 |
+
Instantaneous batch size per device = 32
|
| 623 |
+
epoch 9, step 2680/8040: {'matthews_correlation': 0.23063296136375847}
|
| 624 |
+
002685/008040, loss: 0.357351, avg_loss: 0.516590
|
| 625 |
+
002690/008040, loss: 0.191270, avg_loss: 0.516160
|
| 626 |
+
002695/008040, loss: 0.370189, avg_loss: 0.515724
|
| 627 |
+
002700/008040, loss: 0.336470, avg_loss: 0.515376
|
| 628 |
+
002705/008040, loss: 0.268067, avg_loss: 0.515164
|
| 629 |
+
002710/008040, loss: 0.179003, avg_loss: 0.514576
|
| 630 |
+
002715/008040, loss: 0.427791, avg_loss: 0.514281
|
| 631 |
+
002720/008040, loss: 0.361250, avg_loss: 0.513829
|
| 632 |
+
002725/008040, loss: 0.189704, avg_loss: 0.513455
|
| 633 |
+
002730/008040, loss: 0.273372, avg_loss: 0.513143
|
| 634 |
+
002735/008040, loss: 0.239124, avg_loss: 0.512584
|
| 635 |
+
002740/008040, loss: 0.378276, avg_loss: 0.512291
|
| 636 |
+
002745/008040, loss: 0.646600, avg_loss: 0.512093
|
| 637 |
+
002750/008040, loss: 0.267004, avg_loss: 0.511666
|
| 638 |
+
002755/008040, loss: 0.422471, avg_loss: 0.511320
|
| 639 |
+
002760/008040, loss: 0.209467, avg_loss: 0.510807
|
| 640 |
+
002765/008040, loss: 0.346109, avg_loss: 0.510532
|
| 641 |
+
002770/008040, loss: 0.233545, avg_loss: 0.510248
|
| 642 |
+
002775/008040, loss: 0.466287, avg_loss: 0.509798
|
| 643 |
+
002780/008040, loss: 0.403711, avg_loss: 0.509469
|
| 644 |
+
002785/008040, loss: 0.327383, avg_loss: 0.509096
|
| 645 |
+
002790/008040, loss: 0.404777, avg_loss: 0.508683
|
| 646 |
+
002795/008040, loss: 0.542493, avg_loss: 0.508458
|
| 647 |
+
002800/008040, loss: 0.367202, avg_loss: 0.508142
|
| 648 |
+
002805/008040, loss: 0.368768, avg_loss: 0.507783
|
| 649 |
+
002810/008040, loss: 0.312175, avg_loss: 0.507447
|
| 650 |
+
002815/008040, loss: 0.302803, avg_loss: 0.507226
|
| 651 |
+
002820/008040, loss: 0.401391, avg_loss: 0.507006
|
| 652 |
+
002825/008040, loss: 0.244652, avg_loss: 0.506694
|
| 653 |
+
002830/008040, loss: 0.370769, avg_loss: 0.506369
|
| 654 |
+
002835/008040, loss: 0.182358, avg_loss: 0.505992
|
| 655 |
+
002840/008040, loss: 0.266695, avg_loss: 0.505660
|
| 656 |
+
002845/008040, loss: 0.334674, avg_loss: 0.505358
|
| 657 |
+
002850/008040, loss: 0.348623, avg_loss: 0.505099
|
| 658 |
+
002855/008040, loss: 0.187014, avg_loss: 0.504682
|
| 659 |
+
002860/008040, loss: 0.259943, avg_loss: 0.504276
|
| 660 |
+
002865/008040, loss: 0.382271, avg_loss: 0.503964
|
| 661 |
+
002870/008040, loss: 0.570188, avg_loss: 0.503794
|
| 662 |
+
002875/008040, loss: 0.218833, avg_loss: 0.503395
|
| 663 |
+
002880/008040, loss: 0.356927, avg_loss: 0.502927
|
| 664 |
+
002885/008040, loss: 0.173357, avg_loss: 0.502596
|
| 665 |
+
002890/008040, loss: 0.360153, avg_loss: 0.502279
|
| 666 |
+
002895/008040, loss: 0.295114, avg_loss: 0.501944
|
| 667 |
+
002900/008040, loss: 0.210005, avg_loss: 0.501620
|
| 668 |
+
002905/008040, loss: 0.300519, avg_loss: 0.501388
|
| 669 |
+
002910/008040, loss: 0.207313, avg_loss: 0.501142
|
| 670 |
+
002915/008040, loss: 0.333078, avg_loss: 0.500983
|
| 671 |
+
002920/008040, loss: 0.285614, avg_loss: 0.500578
|
| 672 |
+
002925/008040, loss: 0.373211, avg_loss: 0.500342
|
| 673 |
+
002930/008040, loss: 0.308893, avg_loss: 0.500114
|
| 674 |
+
002935/008040, loss: 0.350144, avg_loss: 0.499820
|
| 675 |
+
002940/008040, loss: 0.439544, avg_loss: 0.499650
|
| 676 |
+
002945/008040, loss: 0.320980, avg_loss: 0.499399
|
| 677 |
+
***** Running dev evaluation *****
|
| 678 |
+
Num examples = 1042
|
| 679 |
+
Instantaneous batch size per device = 32
|
| 680 |
+
epoch 10, step 2948/8040: {'matthews_correlation': 0.18813850606847293}
|
| 681 |
+
002950/008040, loss: 0.588822, avg_loss: 0.499118
|
| 682 |
+
002955/008040, loss: 0.375319, avg_loss: 0.498777
|
| 683 |
+
002960/008040, loss: 0.218809, avg_loss: 0.498292
|
| 684 |
+
002965/008040, loss: 0.346797, avg_loss: 0.497883
|
| 685 |
+
002970/008040, loss: 0.256976, avg_loss: 0.497420
|
| 686 |
+
002975/008040, loss: 0.267139, avg_loss: 0.496947
|
| 687 |
+
002980/008040, loss: 0.096225, avg_loss: 0.496468
|
| 688 |
+
002985/008040, loss: 0.540555, avg_loss: 0.496152
|
| 689 |
+
002990/008040, loss: 0.360075, avg_loss: 0.495919
|
| 690 |
+
002995/008040, loss: 0.439329, avg_loss: 0.495439
|
| 691 |
+
003000/008040, loss: 0.407537, avg_loss: 0.495246
|
| 692 |
+
003005/008040, loss: 0.148738, avg_loss: 0.494893
|
| 693 |
+
003010/008040, loss: 0.212634, avg_loss: 0.494479
|
| 694 |
+
003015/008040, loss: 0.413274, avg_loss: 0.494294
|
| 695 |
+
003020/008040, loss: 0.505027, avg_loss: 0.493973
|
| 696 |
+
003025/008040, loss: 0.353874, avg_loss: 0.493663
|
| 697 |
+
003030/008040, loss: 0.191752, avg_loss: 0.493211
|
| 698 |
+
003035/008040, loss: 0.262918, avg_loss: 0.492834
|
| 699 |
+
003040/008040, loss: 0.251038, avg_loss: 0.492580
|
| 700 |
+
003045/008040, loss: 0.291340, avg_loss: 0.492184
|
| 701 |
+
003050/008040, loss: 0.387451, avg_loss: 0.491783
|
| 702 |
+
003055/008040, loss: 0.277742, avg_loss: 0.491582
|
| 703 |
+
003060/008040, loss: 0.147416, avg_loss: 0.491181
|
| 704 |
+
003065/008040, loss: 0.383072, avg_loss: 0.490947
|
| 705 |
+
003070/008040, loss: 0.332592, avg_loss: 0.490599
|
| 706 |
+
003075/008040, loss: 0.376683, avg_loss: 0.490302
|
| 707 |
+
003080/008040, loss: 0.316549, avg_loss: 0.490015
|
| 708 |
+
003085/008040, loss: 0.248254, avg_loss: 0.489692
|
| 709 |
+
003090/008040, loss: 0.446958, avg_loss: 0.489378
|
| 710 |
+
003095/008040, loss: 0.534116, avg_loss: 0.489182
|
| 711 |
+
003100/008040, loss: 0.186525, avg_loss: 0.488832
|
| 712 |
+
003105/008040, loss: 0.367679, avg_loss: 0.488583
|
| 713 |
+
003110/008040, loss: 0.263721, avg_loss: 0.488235
|
| 714 |
+
003115/008040, loss: 0.160604, avg_loss: 0.487953
|
| 715 |
+
003120/008040, loss: 0.351810, avg_loss: 0.487793
|
| 716 |
+
003125/008040, loss: 0.282861, avg_loss: 0.487567
|
| 717 |
+
003130/008040, loss: 0.291616, avg_loss: 0.487259
|
| 718 |
+
003135/008040, loss: 0.215605, avg_loss: 0.486928
|
| 719 |
+
003140/008040, loss: 0.497842, avg_loss: 0.486740
|
| 720 |
+
003145/008040, loss: 0.141915, avg_loss: 0.486475
|
| 721 |
+
003150/008040, loss: 0.493731, avg_loss: 0.486197
|
| 722 |
+
003155/008040, loss: 0.337679, avg_loss: 0.486007
|
| 723 |
+
003160/008040, loss: 0.423936, avg_loss: 0.485696
|
| 724 |
+
003165/008040, loss: 0.325907, avg_loss: 0.485502
|
| 725 |
+
003170/008040, loss: 0.373995, avg_loss: 0.485242
|
| 726 |
+
003175/008040, loss: 0.221332, avg_loss: 0.484958
|
| 727 |
+
003180/008040, loss: 0.245173, avg_loss: 0.484625
|
| 728 |
+
003185/008040, loss: 0.243983, avg_loss: 0.484395
|
| 729 |
+
003190/008040, loss: 0.279999, avg_loss: 0.484095
|
| 730 |
+
003195/008040, loss: 0.289218, avg_loss: 0.483891
|
| 731 |
+
003200/008040, loss: 0.619335, avg_loss: 0.483651
|
| 732 |
+
003205/008040, loss: 0.231071, avg_loss: 0.483458
|
| 733 |
+
003210/008040, loss: 0.285882, avg_loss: 0.483162
|
| 734 |
+
003215/008040, loss: 0.306520, avg_loss: 0.482869
|
| 735 |
+
***** Running dev evaluation *****
|
| 736 |
+
Num examples = 1042
|
| 737 |
+
Instantaneous batch size per device = 32
|
| 738 |
+
epoch 11, step 3216/8040: {'matthews_correlation': 0.20603205189543294}
|
| 739 |
+
003220/008040, loss: 0.297244, avg_loss: 0.482532
|
| 740 |
+
003225/008040, loss: 0.182570, avg_loss: 0.482196
|
| 741 |
+
003230/008040, loss: 0.242626, avg_loss: 0.481782
|
| 742 |
+
003235/008040, loss: 0.463844, avg_loss: 0.481595
|
| 743 |
+
003240/008040, loss: 0.132137, avg_loss: 0.481325
|
| 744 |
+
003245/008040, loss: 0.392527, avg_loss: 0.481101
|
| 745 |
+
003250/008040, loss: 0.270020, avg_loss: 0.480822
|
| 746 |
+
003255/008040, loss: 0.137306, avg_loss: 0.480509
|
| 747 |
+
003260/008040, loss: 0.391590, avg_loss: 0.480182
|
| 748 |
+
003265/008040, loss: 0.262886, avg_loss: 0.479862
|
| 749 |
+
003270/008040, loss: 0.058543, avg_loss: 0.479458
|
| 750 |
+
003275/008040, loss: 0.404657, avg_loss: 0.479210
|
| 751 |
+
003280/008040, loss: 0.276438, avg_loss: 0.478893
|
| 752 |
+
003285/008040, loss: 0.190946, avg_loss: 0.478536
|
| 753 |
+
003290/008040, loss: 0.490697, avg_loss: 0.478209
|
| 754 |
+
003295/008040, loss: 0.276675, avg_loss: 0.478014
|
| 755 |
+
003300/008040, loss: 0.402889, avg_loss: 0.477756
|
| 756 |
+
003305/008040, loss: 0.233364, avg_loss: 0.477388
|
| 757 |
+
003310/008040, loss: 0.231294, avg_loss: 0.477059
|
| 758 |
+
003315/008040, loss: 0.109852, avg_loss: 0.476722
|
| 759 |
+
003320/008040, loss: 0.320070, avg_loss: 0.476551
|
| 760 |
+
003325/008040, loss: 0.244717, avg_loss: 0.476204
|
| 761 |
+
003330/008040, loss: 0.181756, avg_loss: 0.475863
|
| 762 |
+
003335/008040, loss: 0.222641, avg_loss: 0.475576
|
| 763 |
+
003340/008040, loss: 0.121102, avg_loss: 0.475165
|
| 764 |
+
003345/008040, loss: 0.265407, avg_loss: 0.474816
|
| 765 |
+
003350/008040, loss: 0.322964, avg_loss: 0.474535
|
| 766 |
+
003355/008040, loss: 0.237767, avg_loss: 0.474252
|
| 767 |
+
003360/008040, loss: 0.343557, avg_loss: 0.473983
|
| 768 |
+
003365/008040, loss: 0.257172, avg_loss: 0.473661
|
| 769 |
+
003370/008040, loss: 0.260944, avg_loss: 0.473336
|
| 770 |
+
003375/008040, loss: 0.292535, avg_loss: 0.473136
|
| 771 |
+
003380/008040, loss: 0.228900, avg_loss: 0.472876
|
| 772 |
+
003385/008040, loss: 0.133238, avg_loss: 0.472483
|
| 773 |
+
003390/008040, loss: 0.090823, avg_loss: 0.472257
|
| 774 |
+
003395/008040, loss: 0.353693, avg_loss: 0.471962
|
| 775 |
+
003400/008040, loss: 0.349422, avg_loss: 0.471682
|
| 776 |
+
003405/008040, loss: 0.257864, avg_loss: 0.471517
|
| 777 |
+
003410/008040, loss: 0.252595, avg_loss: 0.471166
|
| 778 |
+
003415/008040, loss: 0.247344, avg_loss: 0.470882
|
| 779 |
+
003420/008040, loss: 0.263462, avg_loss: 0.470580
|
| 780 |
+
003425/008040, loss: 0.343597, avg_loss: 0.470315
|
| 781 |
+
003430/008040, loss: 0.544077, avg_loss: 0.470094
|
| 782 |
+
003435/008040, loss: 0.303536, avg_loss: 0.469789
|
| 783 |
+
003440/008040, loss: 0.327594, avg_loss: 0.469431
|
| 784 |
+
003445/008040, loss: 0.325582, avg_loss: 0.469118
|
| 785 |
+
003450/008040, loss: 0.270005, avg_loss: 0.468807
|
| 786 |
+
003455/008040, loss: 0.373651, avg_loss: 0.468467
|
| 787 |
+
003460/008040, loss: 0.296829, avg_loss: 0.468144
|
| 788 |
+
003465/008040, loss: 0.382215, avg_loss: 0.467829
|
| 789 |
+
003470/008040, loss: 0.412389, avg_loss: 0.467540
|
| 790 |
+
003475/008040, loss: 0.434352, avg_loss: 0.467421
|
| 791 |
+
003480/008040, loss: 0.624913, avg_loss: 0.467201
|
| 792 |
+
***** Running dev evaluation *****
|
| 793 |
+
Num examples = 1042
|
| 794 |
+
Instantaneous batch size per device = 32
|
| 795 |
+
epoch 12, step 3484/8040: {'matthews_correlation': 0.2118432448298745}
|
| 796 |
+
003485/008040, loss: 0.174334, avg_loss: 0.467056
|
| 797 |
+
003490/008040, loss: 0.165397, avg_loss: 0.466660
|
| 798 |
+
003495/008040, loss: 0.265744, avg_loss: 0.466368
|
| 799 |
+
003500/008040, loss: 0.194233, avg_loss: 0.466006
|
| 800 |
+
003505/008040, loss: 0.293150, avg_loss: 0.465843
|
| 801 |
+
003510/008040, loss: 0.190316, avg_loss: 0.465447
|
| 802 |
+
003515/008040, loss: 0.186641, avg_loss: 0.465075
|
| 803 |
+
003520/008040, loss: 0.197128, avg_loss: 0.464644
|
| 804 |
+
003525/008040, loss: 0.258738, avg_loss: 0.464389
|
| 805 |
+
003530/008040, loss: 0.500658, avg_loss: 0.464118
|
| 806 |
+
003535/008040, loss: 0.181811, avg_loss: 0.463812
|
| 807 |
+
003540/008040, loss: 0.255612, avg_loss: 0.463451
|
| 808 |
+
003545/008040, loss: 0.301980, avg_loss: 0.463198
|
| 809 |
+
003550/008040, loss: 0.262177, avg_loss: 0.462909
|
| 810 |
+
003555/008040, loss: 0.150805, avg_loss: 0.462686
|
| 811 |
+
003560/008040, loss: 0.106480, avg_loss: 0.462353
|
| 812 |
+
003565/008040, loss: 0.353977, avg_loss: 0.462018
|
| 813 |
+
003570/008040, loss: 0.095433, avg_loss: 0.461672
|
| 814 |
+
003575/008040, loss: 0.218111, avg_loss: 0.461353
|
| 815 |
+
003580/008040, loss: 0.149131, avg_loss: 0.461044
|
| 816 |
+
003585/008040, loss: 0.267326, avg_loss: 0.460682
|
| 817 |
+
003590/008040, loss: 0.132778, avg_loss: 0.460332
|
| 818 |
+
003595/008040, loss: 0.084128, avg_loss: 0.460019
|
| 819 |
+
003600/008040, loss: 0.240429, avg_loss: 0.459736
|
| 820 |
+
003605/008040, loss: 0.087250, avg_loss: 0.459388
|
| 821 |
+
003610/008040, loss: 0.350120, avg_loss: 0.459246
|
| 822 |
+
003615/008040, loss: 0.268835, avg_loss: 0.458947
|
| 823 |
+
003620/008040, loss: 0.269842, avg_loss: 0.458634
|
| 824 |
+
003625/008040, loss: 0.156989, avg_loss: 0.458435
|
| 825 |
+
003630/008040, loss: 0.230001, avg_loss: 0.458124
|
| 826 |
+
003635/008040, loss: 0.326509, avg_loss: 0.457910
|
| 827 |
+
003640/008040, loss: 0.336903, avg_loss: 0.457636
|
| 828 |
+
003645/008040, loss: 0.484366, avg_loss: 0.457448
|
| 829 |
+
003650/008040, loss: 0.292469, avg_loss: 0.457143
|
| 830 |
+
003655/008040, loss: 0.196029, avg_loss: 0.456941
|
| 831 |
+
003660/008040, loss: 0.388540, avg_loss: 0.456735
|
| 832 |
+
003665/008040, loss: 0.392700, avg_loss: 0.456447
|
| 833 |
+
003670/008040, loss: 0.214234, avg_loss: 0.456100
|
| 834 |
+
003675/008040, loss: 0.166740, avg_loss: 0.455776
|
| 835 |
+
003680/008040, loss: 0.476048, avg_loss: 0.455626
|
| 836 |
+
003685/008040, loss: 0.133647, avg_loss: 0.455296
|
| 837 |
+
003690/008040, loss: 0.138388, avg_loss: 0.454962
|
| 838 |
+
003695/008040, loss: 0.419241, avg_loss: 0.454699
|
| 839 |
+
003700/008040, loss: 0.273247, avg_loss: 0.454571
|
| 840 |
+
003705/008040, loss: 0.335091, avg_loss: 0.454264
|
| 841 |
+
003710/008040, loss: 0.352429, avg_loss: 0.454025
|
| 842 |
+
003715/008040, loss: 0.424709, avg_loss: 0.453788
|
| 843 |
+
003720/008040, loss: 0.169942, avg_loss: 0.453501
|
| 844 |
+
003725/008040, loss: 0.356818, avg_loss: 0.453249
|
| 845 |
+
003730/008040, loss: 0.165223, avg_loss: 0.452948
|
| 846 |
+
003735/008040, loss: 0.186675, avg_loss: 0.452718
|
| 847 |
+
003740/008040, loss: 0.381955, avg_loss: 0.452507
|
| 848 |
+
003745/008040, loss: 0.304955, avg_loss: 0.452266
|
| 849 |
+
003750/008040, loss: 0.405848, avg_loss: 0.452040
|
| 850 |
+
***** Running dev evaluation *****
|
| 851 |
+
Num examples = 1042
|
| 852 |
+
Instantaneous batch size per device = 32
|
| 853 |
+
epoch 13, step 3752/8040: {'matthews_correlation': 0.20261239362380884}
|
| 854 |
+
003755/008040, loss: 0.216770, avg_loss: 0.451750
|
| 855 |
+
003760/008040, loss: 0.223490, avg_loss: 0.451394
|
| 856 |
+
003765/008040, loss: 0.375553, avg_loss: 0.451131
|
| 857 |
+
003770/008040, loss: 0.196744, avg_loss: 0.450761
|
| 858 |
+
003775/008040, loss: 0.363349, avg_loss: 0.450549
|
| 859 |
+
003780/008040, loss: 0.293711, avg_loss: 0.450257
|
| 860 |
+
003785/008040, loss: 0.413372, avg_loss: 0.450013
|
| 861 |
+
003790/008040, loss: 0.122226, avg_loss: 0.449670
|
| 862 |
+
003795/008040, loss: 0.356951, avg_loss: 0.449397
|
| 863 |
+
003800/008040, loss: 0.118402, avg_loss: 0.449095
|
| 864 |
+
003805/008040, loss: 0.146611, avg_loss: 0.448795
|
| 865 |
+
003810/008040, loss: 0.157524, avg_loss: 0.448482
|
| 866 |
+
003815/008040, loss: 0.343435, avg_loss: 0.448149
|
| 867 |
+
003820/008040, loss: 0.368691, avg_loss: 0.447827
|
| 868 |
+
003825/008040, loss: 0.201137, avg_loss: 0.447525
|
| 869 |
+
003830/008040, loss: 0.132014, avg_loss: 0.447167
|
| 870 |
+
003835/008040, loss: 0.090910, avg_loss: 0.446837
|
| 871 |
+
003840/008040, loss: 0.232675, avg_loss: 0.446527
|
| 872 |
+
003845/008040, loss: 0.275011, avg_loss: 0.446207
|
| 873 |
+
003850/008040, loss: 0.152451, avg_loss: 0.446016
|
| 874 |
+
003855/008040, loss: 0.314412, avg_loss: 0.445785
|
| 875 |
+
003860/008040, loss: 0.148032, avg_loss: 0.445503
|
| 876 |
+
003865/008040, loss: 0.165825, avg_loss: 0.445180
|
| 877 |
+
003870/008040, loss: 0.094333, avg_loss: 0.444829
|
| 878 |
+
003875/008040, loss: 0.375745, avg_loss: 0.444747
|
| 879 |
+
003880/008040, loss: 0.403074, avg_loss: 0.444519
|
| 880 |
+
003885/008040, loss: 0.165065, avg_loss: 0.444287
|
| 881 |
+
003890/008040, loss: 0.234658, avg_loss: 0.444039
|
| 882 |
+
003895/008040, loss: 0.319576, avg_loss: 0.443776
|
| 883 |
+
003900/008040, loss: 0.217123, avg_loss: 0.443509
|
| 884 |
+
003905/008040, loss: 0.378043, avg_loss: 0.443243
|
| 885 |
+
003910/008040, loss: 0.122735, avg_loss: 0.443042
|
| 886 |
+
003915/008040, loss: 0.264233, avg_loss: 0.442776
|
| 887 |
+
003920/008040, loss: 0.076704, avg_loss: 0.442449
|
| 888 |
+
003925/008040, loss: 0.125913, avg_loss: 0.442138
|
| 889 |
+
003930/008040, loss: 0.272993, avg_loss: 0.441843
|
| 890 |
+
003935/008040, loss: 0.068447, avg_loss: 0.441514
|
| 891 |
+
003940/008040, loss: 0.244199, avg_loss: 0.441180
|
| 892 |
+
003945/008040, loss: 0.228176, avg_loss: 0.440980
|
| 893 |
+
003950/008040, loss: 0.148637, avg_loss: 0.440699
|
| 894 |
+
003955/008040, loss: 0.299796, avg_loss: 0.440381
|
| 895 |
+
003960/008040, loss: 0.276640, avg_loss: 0.440127
|
| 896 |
+
003965/008040, loss: 0.344766, avg_loss: 0.439945
|
| 897 |
+
003970/008040, loss: 0.085695, avg_loss: 0.439692
|
| 898 |
+
003975/008040, loss: 0.467870, avg_loss: 0.439398
|
| 899 |
+
003980/008040, loss: 0.275529, avg_loss: 0.439101
|
| 900 |
+
003985/008040, loss: 0.342526, avg_loss: 0.438883
|
| 901 |
+
003990/008040, loss: 0.117043, avg_loss: 0.438652
|
| 902 |
+
003995/008040, loss: 0.216054, avg_loss: 0.438416
|
| 903 |
+
004000/008040, loss: 0.194812, avg_loss: 0.438143
|
| 904 |
+
004005/008040, loss: 0.173094, avg_loss: 0.437904
|
| 905 |
+
004010/008040, loss: 0.072183, avg_loss: 0.437564
|
| 906 |
+
004015/008040, loss: 0.403915, avg_loss: 0.437357
|
| 907 |
+
004020/008040, loss: 0.087834, avg_loss: 0.436997
|
| 908 |
+
***** Running dev evaluation *****
|
| 909 |
+
Num examples = 1042
|
| 910 |
+
Instantaneous batch size per device = 32
|
| 911 |
+
epoch 14, step 4020/8040: {'matthews_correlation': 0.22518881045488998}
|
| 912 |
+
004025/008040, loss: 0.165382, avg_loss: 0.436778
|
| 913 |
+
004030/008040, loss: 0.179677, avg_loss: 0.436415
|
| 914 |
+
004035/008040, loss: 0.185260, avg_loss: 0.436160
|
| 915 |
+
004040/008040, loss: 0.095814, avg_loss: 0.435886
|
| 916 |
+
004045/008040, loss: 0.345136, avg_loss: 0.435673
|
| 917 |
+
004050/008040, loss: 0.150750, avg_loss: 0.435363
|
| 918 |
+
004055/008040, loss: 0.185758, avg_loss: 0.435133
|
| 919 |
+
004060/008040, loss: 0.212922, avg_loss: 0.434953
|
| 920 |
+
004065/008040, loss: 0.149902, avg_loss: 0.434669
|
| 921 |
+
004070/008040, loss: 0.089165, avg_loss: 0.434428
|
| 922 |
+
004075/008040, loss: 0.168942, avg_loss: 0.434088
|
| 923 |
+
004080/008040, loss: 0.170014, avg_loss: 0.433740
|
| 924 |
+
004085/008040, loss: 0.148718, avg_loss: 0.433445
|
| 925 |
+
004090/008040, loss: 0.307841, avg_loss: 0.433193
|
| 926 |
+
004095/008040, loss: 0.170424, avg_loss: 0.432888
|
| 927 |
+
004100/008040, loss: 0.253233, avg_loss: 0.432721
|
| 928 |
+
004105/008040, loss: 0.243379, avg_loss: 0.432492
|
| 929 |
+
004110/008040, loss: 0.197091, avg_loss: 0.432229
|
| 930 |
+
004115/008040, loss: 0.149977, avg_loss: 0.431969
|
| 931 |
+
004120/008040, loss: 0.255616, avg_loss: 0.431698
|
| 932 |
+
004125/008040, loss: 0.144500, avg_loss: 0.431442
|
| 933 |
+
004130/008040, loss: 0.214776, avg_loss: 0.431226
|
| 934 |
+
004135/008040, loss: 0.155176, avg_loss: 0.430925
|
| 935 |
+
004140/008040, loss: 0.195912, avg_loss: 0.430689
|
| 936 |
+
004145/008040, loss: 0.148231, avg_loss: 0.430308
|
| 937 |
+
004150/008040, loss: 0.309211, avg_loss: 0.430062
|
| 938 |
+
004155/008040, loss: 0.400494, avg_loss: 0.429949
|
| 939 |
+
004160/008040, loss: 0.190466, avg_loss: 0.429596
|
| 940 |
+
004165/008040, loss: 0.115031, avg_loss: 0.429430
|
| 941 |
+
004170/008040, loss: 0.154674, avg_loss: 0.429143
|
| 942 |
+
004175/008040, loss: 0.087612, avg_loss: 0.428819
|
| 943 |
+
004180/008040, loss: 0.391154, avg_loss: 0.428581
|
| 944 |
+
004185/008040, loss: 0.233200, avg_loss: 0.428438
|
| 945 |
+
004190/008040, loss: 0.113590, avg_loss: 0.428204
|
| 946 |
+
004195/008040, loss: 0.139859, avg_loss: 0.427997
|
| 947 |
+
004200/008040, loss: 0.365552, avg_loss: 0.427729
|
| 948 |
+
004205/008040, loss: 0.285945, avg_loss: 0.427567
|
| 949 |
+
004210/008040, loss: 0.298795, avg_loss: 0.427337
|
| 950 |
+
004215/008040, loss: 0.184676, avg_loss: 0.427015
|
| 951 |
+
004220/008040, loss: 0.347303, avg_loss: 0.426763
|
| 952 |
+
004225/008040, loss: 0.249475, avg_loss: 0.426473
|
| 953 |
+
004230/008040, loss: 0.345056, avg_loss: 0.426234
|
| 954 |
+
004235/008040, loss: 0.132455, avg_loss: 0.425935
|
| 955 |
+
004240/008040, loss: 0.083139, avg_loss: 0.425697
|
| 956 |
+
004245/008040, loss: 0.186649, avg_loss: 0.425451
|
| 957 |
+
004250/008040, loss: 0.159150, avg_loss: 0.425129
|
| 958 |
+
004255/008040, loss: 0.119297, avg_loss: 0.424885
|
| 959 |
+
004260/008040, loss: 0.233108, avg_loss: 0.424649
|
| 960 |
+
004265/008040, loss: 0.144114, avg_loss: 0.424408
|
| 961 |
+
004270/008040, loss: 0.210518, avg_loss: 0.424164
|
| 962 |
+
004275/008040, loss: 0.242731, avg_loss: 0.423926
|
| 963 |
+
004280/008040, loss: 0.157653, avg_loss: 0.423696
|
| 964 |
+
004285/008040, loss: 0.191035, avg_loss: 0.423368
|
| 965 |
+
***** Running dev evaluation *****
|
| 966 |
+
Num examples = 1042
|
| 967 |
+
Instantaneous batch size per device = 32
|
| 968 |
+
epoch 15, step 4288/8040: {'matthews_correlation': 0.24863648291608131}
|
| 969 |
+
004290/008040, loss: 0.266971, avg_loss: 0.423141
|
| 970 |
+
004295/008040, loss: 0.322248, avg_loss: 0.422900
|
| 971 |
+
004300/008040, loss: 0.231828, avg_loss: 0.422700
|
| 972 |
+
004305/008040, loss: 0.072297, avg_loss: 0.422383
|
| 973 |
+
004310/008040, loss: 0.193845, avg_loss: 0.422057
|
| 974 |
+
004315/008040, loss: 0.217596, avg_loss: 0.421784
|
| 975 |
+
004320/008040, loss: 0.393519, avg_loss: 0.421559
|
| 976 |
+
004325/008040, loss: 0.400877, avg_loss: 0.421312
|
| 977 |
+
004330/008040, loss: 0.087280, avg_loss: 0.420988
|
| 978 |
+
004335/008040, loss: 0.124804, avg_loss: 0.420766
|
| 979 |
+
004340/008040, loss: 0.130229, avg_loss: 0.420461
|
| 980 |
+
004345/008040, loss: 0.304618, avg_loss: 0.420192
|
| 981 |
+
004350/008040, loss: 0.189475, avg_loss: 0.419871
|
| 982 |
+
004355/008040, loss: 0.203492, avg_loss: 0.419608
|
| 983 |
+
004360/008040, loss: 0.144623, avg_loss: 0.419307
|
| 984 |
+
004365/008040, loss: 0.127642, avg_loss: 0.418997
|
| 985 |
+
004370/008040, loss: 0.218811, avg_loss: 0.418663
|
| 986 |
+
004375/008040, loss: 0.059012, avg_loss: 0.418390
|
| 987 |
+
004380/008040, loss: 0.237954, avg_loss: 0.418244
|
| 988 |
+
004385/008040, loss: 0.248724, avg_loss: 0.418026
|
| 989 |
+
004390/008040, loss: 0.160703, avg_loss: 0.417783
|
| 990 |
+
004395/008040, loss: 0.276666, avg_loss: 0.417556
|
| 991 |
+
004400/008040, loss: 0.101335, avg_loss: 0.417345
|
| 992 |
+
004405/008040, loss: 0.468661, avg_loss: 0.417191
|
| 993 |
+
004410/008040, loss: 0.154267, avg_loss: 0.416879
|
| 994 |
+
004415/008040, loss: 0.196224, avg_loss: 0.416633
|
| 995 |
+
004420/008040, loss: 0.092791, avg_loss: 0.416405
|
| 996 |
+
004425/008040, loss: 0.447025, avg_loss: 0.416244
|
| 997 |
+
004430/008040, loss: 0.225542, avg_loss: 0.415959
|
| 998 |
+
004435/008040, loss: 0.091548, avg_loss: 0.415671
|
| 999 |
+
004440/008040, loss: 0.116130, avg_loss: 0.415333
|
| 1000 |
+
004445/008040, loss: 0.225495, avg_loss: 0.415069
|
| 1001 |
+
004450/008040, loss: 0.213666, avg_loss: 0.414828
|
| 1002 |
+
004455/008040, loss: 0.130709, avg_loss: 0.414580
|
| 1003 |
+
004460/008040, loss: 0.225696, avg_loss: 0.414366
|
| 1004 |
+
004465/008040, loss: 0.322512, avg_loss: 0.414150
|
| 1005 |
+
004470/008040, loss: 0.409171, avg_loss: 0.413917
|
| 1006 |
+
004475/008040, loss: 0.459070, avg_loss: 0.413800
|
| 1007 |
+
004480/008040, loss: 0.205403, avg_loss: 0.413537
|
| 1008 |
+
004485/008040, loss: 0.097172, avg_loss: 0.413289
|
| 1009 |
+
004490/008040, loss: 0.104971, avg_loss: 0.413039
|
| 1010 |
+
004495/008040, loss: 0.269551, avg_loss: 0.412801
|
| 1011 |
+
004500/008040, loss: 0.151229, avg_loss: 0.412506
|
| 1012 |
+
004505/008040, loss: 0.137360, avg_loss: 0.412240
|
| 1013 |
+
004510/008040, loss: 0.349339, avg_loss: 0.412084
|
| 1014 |
+
004515/008040, loss: 0.074355, avg_loss: 0.411830
|
| 1015 |
+
004520/008040, loss: 0.165137, avg_loss: 0.411674
|
| 1016 |
+
004525/008040, loss: 0.120821, avg_loss: 0.411390
|
| 1017 |
+
004530/008040, loss: 0.156756, avg_loss: 0.411117
|
| 1018 |
+
004535/008040, loss: 0.131685, avg_loss: 0.410892
|
| 1019 |
+
004540/008040, loss: 0.215486, avg_loss: 0.410740
|
| 1020 |
+
004545/008040, loss: 0.276792, avg_loss: 0.410564
|
| 1021 |
+
004550/008040, loss: 0.163451, avg_loss: 0.410302
|
| 1022 |
+
004555/008040, loss: 0.153240, avg_loss: 0.410035
|
| 1023 |
+
***** Running dev evaluation *****
|
| 1024 |
+
Num examples = 1042
|
| 1025 |
+
Instantaneous batch size per device = 32
|
| 1026 |
+
epoch 16, step 4556/8040: {'matthews_correlation': 0.19984853723708582}
|
| 1027 |
+
004560/008040, loss: 0.091924, avg_loss: 0.409807
|
| 1028 |
+
004565/008040, loss: 0.285278, avg_loss: 0.409558
|
| 1029 |
+
004570/008040, loss: 0.153244, avg_loss: 0.409386
|
| 1030 |
+
004575/008040, loss: 0.117640, avg_loss: 0.409192
|
| 1031 |
+
004580/008040, loss: 0.196797, avg_loss: 0.408980
|
| 1032 |
+
004585/008040, loss: 0.170434, avg_loss: 0.408724
|
| 1033 |
+
004590/008040, loss: 0.291520, avg_loss: 0.408442
|
| 1034 |
+
004595/008040, loss: 0.095928, avg_loss: 0.408124
|
| 1035 |
+
004600/008040, loss: 0.133423, avg_loss: 0.407880
|
| 1036 |
+
004605/008040, loss: 0.224401, avg_loss: 0.407606
|
| 1037 |
+
004610/008040, loss: 0.244196, avg_loss: 0.407368
|
| 1038 |
+
004615/008040, loss: 0.086107, avg_loss: 0.407023
|
| 1039 |
+
004620/008040, loss: 0.088616, avg_loss: 0.406692
|
| 1040 |
+
004625/008040, loss: 0.182435, avg_loss: 0.406431
|
| 1041 |
+
004630/008040, loss: 0.108714, avg_loss: 0.406193
|
| 1042 |
+
004635/008040, loss: 0.052255, avg_loss: 0.405910
|
| 1043 |
+
004640/008040, loss: 0.201341, avg_loss: 0.405591
|
| 1044 |
+
004645/008040, loss: 0.125202, avg_loss: 0.405348
|
| 1045 |
+
004650/008040, loss: 0.262958, avg_loss: 0.405130
|
| 1046 |
+
004655/008040, loss: 0.136696, avg_loss: 0.404868
|
| 1047 |
+
004660/008040, loss: 0.232297, avg_loss: 0.404751
|
| 1048 |
+
004665/008040, loss: 0.169257, avg_loss: 0.404498
|
| 1049 |
+
004670/008040, loss: 0.086830, avg_loss: 0.404246
|
| 1050 |
+
004675/008040, loss: 0.199137, avg_loss: 0.404007
|
| 1051 |
+
004680/008040, loss: 0.169171, avg_loss: 0.403782
|
| 1052 |
+
004685/008040, loss: 0.112735, avg_loss: 0.403494
|
| 1053 |
+
004690/008040, loss: 0.240913, avg_loss: 0.403283
|
| 1054 |
+
004695/008040, loss: 0.228971, avg_loss: 0.403096
|
| 1055 |
+
004700/008040, loss: 0.203035, avg_loss: 0.402813
|
| 1056 |
+
004705/008040, loss: 0.252691, avg_loss: 0.402542
|
| 1057 |
+
004710/008040, loss: 0.183577, avg_loss: 0.402340
|
| 1058 |
+
004715/008040, loss: 0.152381, avg_loss: 0.402066
|
| 1059 |
+
004720/008040, loss: 0.081548, avg_loss: 0.401885
|
| 1060 |
+
004725/008040, loss: 0.243844, avg_loss: 0.401712
|
| 1061 |
+
004730/008040, loss: 0.191078, avg_loss: 0.401477
|
| 1062 |
+
004735/008040, loss: 0.309967, avg_loss: 0.401239
|
| 1063 |
+
004740/008040, loss: 0.177741, avg_loss: 0.400996
|
| 1064 |
+
004745/008040, loss: 0.443657, avg_loss: 0.400854
|
| 1065 |
+
004750/008040, loss: 0.174745, avg_loss: 0.400599
|
| 1066 |
+
004755/008040, loss: 0.117440, avg_loss: 0.400411
|
| 1067 |
+
004760/008040, loss: 0.216662, avg_loss: 0.400179
|
| 1068 |
+
004765/008040, loss: 0.331156, avg_loss: 0.399990
|
| 1069 |
+
004770/008040, loss: 0.239916, avg_loss: 0.399799
|
| 1070 |
+
004775/008040, loss: 0.392543, avg_loss: 0.399598
|
| 1071 |
+
004780/008040, loss: 0.310010, avg_loss: 0.399412
|
| 1072 |
+
004785/008040, loss: 0.176596, avg_loss: 0.399203
|
| 1073 |
+
004790/008040, loss: 0.193463, avg_loss: 0.399010
|
| 1074 |
+
004795/008040, loss: 0.065539, avg_loss: 0.398731
|
| 1075 |
+
004800/008040, loss: 0.078370, avg_loss: 0.398525
|
| 1076 |
+
004805/008040, loss: 0.268404, avg_loss: 0.398395
|
| 1077 |
+
004810/008040, loss: 0.130425, avg_loss: 0.398184
|
| 1078 |
+
004815/008040, loss: 0.124595, avg_loss: 0.397953
|
| 1079 |
+
004820/008040, loss: 0.351294, avg_loss: 0.397781
|
| 1080 |
+
***** Running dev evaluation *****
|
| 1081 |
+
Num examples = 1042
|
| 1082 |
+
Instantaneous batch size per device = 32
|
| 1083 |
+
epoch 17, step 4824/8040: {'matthews_correlation': 0.23319244596326755}
|
| 1084 |
+
004825/008040, loss: 0.117143, avg_loss: 0.397605
|
| 1085 |
+
004830/008040, loss: 0.085320, avg_loss: 0.397402
|
| 1086 |
+
004835/008040, loss: 0.329389, avg_loss: 0.397171
|
| 1087 |
+
004840/008040, loss: 0.167244, avg_loss: 0.396923
|
| 1088 |
+
004845/008040, loss: 0.084977, avg_loss: 0.396725
|
| 1089 |
+
004850/008040, loss: 0.170633, avg_loss: 0.396479
|
| 1090 |
+
004855/008040, loss: 0.162252, avg_loss: 0.396249
|
| 1091 |
+
004860/008040, loss: 0.242330, avg_loss: 0.396023
|
| 1092 |
+
004865/008040, loss: 0.158724, avg_loss: 0.395797
|
| 1093 |
+
004870/008040, loss: 0.145546, avg_loss: 0.395552
|
| 1094 |
+
004875/008040, loss: 0.154330, avg_loss: 0.395311
|
| 1095 |
+
004880/008040, loss: 0.156234, avg_loss: 0.395089
|
| 1096 |
+
004885/008040, loss: 0.104371, avg_loss: 0.394829
|
| 1097 |
+
004890/008040, loss: 0.116719, avg_loss: 0.394561
|
| 1098 |
+
004895/008040, loss: 0.167743, avg_loss: 0.394364
|
| 1099 |
+
004900/008040, loss: 0.064857, avg_loss: 0.394083
|
| 1100 |
+
004905/008040, loss: 0.067141, avg_loss: 0.393839
|
| 1101 |
+
004910/008040, loss: 0.099388, avg_loss: 0.393620
|
| 1102 |
+
004915/008040, loss: 0.121026, avg_loss: 0.393411
|
| 1103 |
+
004920/008040, loss: 0.237852, avg_loss: 0.393219
|
| 1104 |
+
004925/008040, loss: 0.116583, avg_loss: 0.392966
|
| 1105 |
+
004930/008040, loss: 0.066189, avg_loss: 0.392755
|
| 1106 |
+
004935/008040, loss: 0.100841, avg_loss: 0.392546
|
| 1107 |
+
004940/008040, loss: 0.184811, avg_loss: 0.392326
|
| 1108 |
+
004945/008040, loss: 0.261129, avg_loss: 0.392141
|
| 1109 |
+
004950/008040, loss: 0.227229, avg_loss: 0.391920
|
| 1110 |
+
004955/008040, loss: 0.073722, avg_loss: 0.391622
|
| 1111 |
+
004960/008040, loss: 0.217448, avg_loss: 0.391421
|
| 1112 |
+
004965/008040, loss: 0.166534, avg_loss: 0.391247
|
| 1113 |
+
004970/008040, loss: 0.169978, avg_loss: 0.391054
|
| 1114 |
+
004975/008040, loss: 0.162336, avg_loss: 0.390803
|
| 1115 |
+
004980/008040, loss: 0.100707, avg_loss: 0.390613
|
| 1116 |
+
004985/008040, loss: 0.066454, avg_loss: 0.390380
|
| 1117 |
+
004990/008040, loss: 0.268293, avg_loss: 0.390155
|
| 1118 |
+
004995/008040, loss: 0.299800, avg_loss: 0.389940
|
| 1119 |
+
005000/008040, loss: 0.159697, avg_loss: 0.389720
|
| 1120 |
+
005005/008040, loss: 0.089164, avg_loss: 0.389519
|
| 1121 |
+
005010/008040, loss: 0.083933, avg_loss: 0.389239
|
| 1122 |
+
005015/008040, loss: 0.109845, avg_loss: 0.389072
|
| 1123 |
+
005020/008040, loss: 0.323453, avg_loss: 0.388975
|
| 1124 |
+
005025/008040, loss: 0.218569, avg_loss: 0.388836
|
| 1125 |
+
005030/008040, loss: 0.306596, avg_loss: 0.388684
|
| 1126 |
+
005035/008040, loss: 0.214361, avg_loss: 0.388518
|
| 1127 |
+
005040/008040, loss: 0.203883, avg_loss: 0.388300
|
| 1128 |
+
005045/008040, loss: 0.119648, avg_loss: 0.388050
|
| 1129 |
+
005050/008040, loss: 0.241945, avg_loss: 0.387813
|
| 1130 |
+
005055/008040, loss: 0.295856, avg_loss: 0.387694
|
| 1131 |
+
005060/008040, loss: 0.299737, avg_loss: 0.387502
|
| 1132 |
+
005065/008040, loss: 0.173353, avg_loss: 0.387261
|
| 1133 |
+
005070/008040, loss: 0.148706, avg_loss: 0.387023
|
| 1134 |
+
005075/008040, loss: 0.235021, avg_loss: 0.386797
|
| 1135 |
+
005080/008040, loss: 0.309368, avg_loss: 0.386680
|
| 1136 |
+
005085/008040, loss: 0.187352, avg_loss: 0.386492
|
| 1137 |
+
005090/008040, loss: 0.124517, avg_loss: 0.386330
|
| 1138 |
+
***** Running dev evaluation *****
|
| 1139 |
+
Num examples = 1042
|
| 1140 |
+
Instantaneous batch size per device = 32
|
| 1141 |
+
epoch 18, step 5092/8040: {'matthews_correlation': 0.24348660475263997}
|
| 1142 |
+
005095/008040, loss: 0.244682, avg_loss: 0.386098
|
| 1143 |
+
005100/008040, loss: 0.038414, avg_loss: 0.385838
|
| 1144 |
+
005105/008040, loss: 0.095592, avg_loss: 0.385585
|
| 1145 |
+
005110/008040, loss: 0.112026, avg_loss: 0.385404
|
| 1146 |
+
005115/008040, loss: 0.193563, avg_loss: 0.385160
|
| 1147 |
+
005120/008040, loss: 0.169361, avg_loss: 0.384950
|
| 1148 |
+
005125/008040, loss: 0.115310, avg_loss: 0.384794
|
| 1149 |
+
005130/008040, loss: 0.111171, avg_loss: 0.384595
|
| 1150 |
+
005135/008040, loss: 0.347275, avg_loss: 0.384407
|
| 1151 |
+
005140/008040, loss: 0.066092, avg_loss: 0.384158
|
| 1152 |
+
005145/008040, loss: 0.038691, avg_loss: 0.383875
|
| 1153 |
+
005150/008040, loss: 0.187798, avg_loss: 0.383690
|
| 1154 |
+
005155/008040, loss: 0.080341, avg_loss: 0.383389
|
| 1155 |
+
005160/008040, loss: 0.250113, avg_loss: 0.383158
|
| 1156 |
+
005165/008040, loss: 0.230404, avg_loss: 0.382940
|
| 1157 |
+
005170/008040, loss: 0.199019, avg_loss: 0.382771
|
| 1158 |
+
005175/008040, loss: 0.042526, avg_loss: 0.382549
|
| 1159 |
+
005180/008040, loss: 0.107391, avg_loss: 0.382405
|
| 1160 |
+
005185/008040, loss: 0.123089, avg_loss: 0.382158
|
| 1161 |
+
005190/008040, loss: 0.211129, avg_loss: 0.381957
|
| 1162 |
+
005195/008040, loss: 0.191329, avg_loss: 0.381727
|
| 1163 |
+
005200/008040, loss: 0.247005, avg_loss: 0.381528
|
| 1164 |
+
005205/008040, loss: 0.151045, avg_loss: 0.381342
|
| 1165 |
+
005210/008040, loss: 0.221767, avg_loss: 0.381150
|
| 1166 |
+
005215/008040, loss: 0.098915, avg_loss: 0.380916
|
| 1167 |
+
005220/008040, loss: 0.120604, avg_loss: 0.380805
|
| 1168 |
+
005225/008040, loss: 0.198758, avg_loss: 0.380580
|
| 1169 |
+
005230/008040, loss: 0.170238, avg_loss: 0.380375
|
| 1170 |
+
005235/008040, loss: 0.287471, avg_loss: 0.380185
|
| 1171 |
+
005240/008040, loss: 0.099829, avg_loss: 0.379951
|
| 1172 |
+
005245/008040, loss: 0.192130, avg_loss: 0.379746
|
| 1173 |
+
005250/008040, loss: 0.174886, avg_loss: 0.379625
|
| 1174 |
+
005255/008040, loss: 0.154950, avg_loss: 0.379463
|
| 1175 |
+
005260/008040, loss: 0.315916, avg_loss: 0.379261
|
| 1176 |
+
005265/008040, loss: 0.142192, avg_loss: 0.379091
|
| 1177 |
+
005270/008040, loss: 0.157938, avg_loss: 0.378953
|
| 1178 |
+
005275/008040, loss: 0.112631, avg_loss: 0.378706
|
| 1179 |
+
005280/008040, loss: 0.112058, avg_loss: 0.378528
|
| 1180 |
+
005285/008040, loss: 0.109890, avg_loss: 0.378278
|
| 1181 |
+
005290/008040, loss: 0.096205, avg_loss: 0.378069
|
| 1182 |
+
005295/008040, loss: 0.101920, avg_loss: 0.377793
|
| 1183 |
+
005300/008040, loss: 0.223294, avg_loss: 0.377597
|
| 1184 |
+
005305/008040, loss: 0.334869, avg_loss: 0.377448
|
| 1185 |
+
005310/008040, loss: 0.133110, avg_loss: 0.377219
|
| 1186 |
+
005315/008040, loss: 0.285188, avg_loss: 0.377080
|
| 1187 |
+
005320/008040, loss: 0.093014, avg_loss: 0.376865
|
| 1188 |
+
005325/008040, loss: 0.064642, avg_loss: 0.376663
|
| 1189 |
+
005330/008040, loss: 0.399625, avg_loss: 0.376612
|
| 1190 |
+
005335/008040, loss: 0.099368, avg_loss: 0.376419
|
| 1191 |
+
005340/008040, loss: 0.127971, avg_loss: 0.376192
|
| 1192 |
+
005345/008040, loss: 0.276726, avg_loss: 0.376028
|
| 1193 |
+
005350/008040, loss: 0.203088, avg_loss: 0.375818
|
| 1194 |
+
005355/008040, loss: 0.162861, avg_loss: 0.375639
|
| 1195 |
+
005360/008040, loss: 0.111333, avg_loss: 0.375443
|
| 1196 |
+
***** Running dev evaluation *****
|
| 1197 |
+
Num examples = 1042
|
| 1198 |
+
Instantaneous batch size per device = 32
|
| 1199 |
+
epoch 19, step 5360/8040: {'matthews_correlation': 0.2545245288314363}
|
| 1200 |
+
005365/008040, loss: 0.182490, avg_loss: 0.375250
|
| 1201 |
+
005370/008040, loss: 0.306611, avg_loss: 0.375098
|
| 1202 |
+
005375/008040, loss: 0.086688, avg_loss: 0.374876
|
| 1203 |
+
005380/008040, loss: 0.073351, avg_loss: 0.374639
|
| 1204 |
+
005385/008040, loss: 0.076141, avg_loss: 0.374468
|
| 1205 |
+
005390/008040, loss: 0.263192, avg_loss: 0.374279
|
| 1206 |
+
005395/008040, loss: 0.057974, avg_loss: 0.374064
|
| 1207 |
+
005400/008040, loss: 0.168771, avg_loss: 0.373868
|
| 1208 |
+
005405/008040, loss: 0.101774, avg_loss: 0.373638
|
| 1209 |
+
005410/008040, loss: 0.233914, avg_loss: 0.373431
|
| 1210 |
+
005415/008040, loss: 0.199603, avg_loss: 0.373236
|
| 1211 |
+
005420/008040, loss: 0.069979, avg_loss: 0.373044
|
| 1212 |
+
005425/008040, loss: 0.157929, avg_loss: 0.372784
|
| 1213 |
+
005430/008040, loss: 0.102463, avg_loss: 0.372559
|
| 1214 |
+
005435/008040, loss: 0.311656, avg_loss: 0.372387
|
| 1215 |
+
005440/008040, loss: 0.025280, avg_loss: 0.372177
|
| 1216 |
+
005445/008040, loss: 0.199722, avg_loss: 0.372008
|
| 1217 |
+
005450/008040, loss: 0.037342, avg_loss: 0.371794
|
| 1218 |
+
005455/008040, loss: 0.464360, avg_loss: 0.371634
|
| 1219 |
+
005460/008040, loss: 0.151161, avg_loss: 0.371387
|
| 1220 |
+
005465/008040, loss: 0.131248, avg_loss: 0.371174
|
| 1221 |
+
005470/008040, loss: 0.091763, avg_loss: 0.370973
|
| 1222 |
+
005475/008040, loss: 0.124437, avg_loss: 0.370771
|
| 1223 |
+
005480/008040, loss: 0.056806, avg_loss: 0.370576
|
| 1224 |
+
005485/008040, loss: 0.053934, avg_loss: 0.370370
|
| 1225 |
+
005490/008040, loss: 0.134340, avg_loss: 0.370134
|
| 1226 |
+
005495/008040, loss: 0.403093, avg_loss: 0.369976
|
| 1227 |
+
005500/008040, loss: 0.295253, avg_loss: 0.369797
|
| 1228 |
+
005505/008040, loss: 0.123554, avg_loss: 0.369599
|
| 1229 |
+
005510/008040, loss: 0.146412, avg_loss: 0.369405
|
| 1230 |
+
005515/008040, loss: 0.086848, avg_loss: 0.369164
|
| 1231 |
+
005520/008040, loss: 0.190395, avg_loss: 0.368969
|
| 1232 |
+
005525/008040, loss: 0.214298, avg_loss: 0.368869
|
| 1233 |
+
005530/008040, loss: 0.157094, avg_loss: 0.368707
|
| 1234 |
+
005535/008040, loss: 0.236498, avg_loss: 0.368588
|
| 1235 |
+
005540/008040, loss: 0.150522, avg_loss: 0.368397
|
| 1236 |
+
005545/008040, loss: 0.056312, avg_loss: 0.368192
|
| 1237 |
+
005550/008040, loss: 0.132276, avg_loss: 0.367994
|
| 1238 |
+
005555/008040, loss: 0.152209, avg_loss: 0.367803
|
| 1239 |
+
005560/008040, loss: 0.123136, avg_loss: 0.367632
|
| 1240 |
+
005565/008040, loss: 0.295406, avg_loss: 0.367456
|
| 1241 |
+
005570/008040, loss: 0.163695, avg_loss: 0.367255
|
| 1242 |
+
005575/008040, loss: 0.032764, avg_loss: 0.367021
|
| 1243 |
+
005580/008040, loss: 0.077804, avg_loss: 0.366803
|
| 1244 |
+
005585/008040, loss: 0.426609, avg_loss: 0.366718
|
| 1245 |
+
005590/008040, loss: 0.170544, avg_loss: 0.366554
|
| 1246 |
+
005595/008040, loss: 0.121247, avg_loss: 0.366328
|
| 1247 |
+
005600/008040, loss: 0.118504, avg_loss: 0.366139
|
| 1248 |
+
005605/008040, loss: 0.127036, avg_loss: 0.365945
|
| 1249 |
+
005610/008040, loss: 0.253191, avg_loss: 0.365772
|
| 1250 |
+
005615/008040, loss: 0.132579, avg_loss: 0.365584
|
| 1251 |
+
005620/008040, loss: 0.206162, avg_loss: 0.365378
|
| 1252 |
+
005625/008040, loss: 0.138357, avg_loss: 0.365198
|
| 1253 |
+
***** Running dev evaluation *****
|
| 1254 |
+
Num examples = 1042
|
| 1255 |
+
Instantaneous batch size per device = 32
|
| 1256 |
+
epoch 20, step 5628/8040: {'matthews_correlation': 0.20994533418798944}
|
| 1257 |
+
005630/008040, loss: 0.220501, avg_loss: 0.364957
|
| 1258 |
+
005635/008040, loss: 0.176737, avg_loss: 0.364715
|
| 1259 |
+
005640/008040, loss: 0.150695, avg_loss: 0.364518
|
| 1260 |
+
005645/008040, loss: 0.186189, avg_loss: 0.364357
|
| 1261 |
+
005650/008040, loss: 0.238791, avg_loss: 0.364176
|
| 1262 |
+
005655/008040, loss: 0.128307, avg_loss: 0.363933
|
| 1263 |
+
005660/008040, loss: 0.098545, avg_loss: 0.363732
|
| 1264 |
+
005665/008040, loss: 0.059385, avg_loss: 0.363478
|
| 1265 |
+
005670/008040, loss: 0.106437, avg_loss: 0.363278
|
| 1266 |
+
005675/008040, loss: 0.051390, avg_loss: 0.363102
|
| 1267 |
+
005680/008040, loss: 0.264690, avg_loss: 0.362942
|
| 1268 |
+
005685/008040, loss: 0.051885, avg_loss: 0.362837
|
| 1269 |
+
005690/008040, loss: 0.186132, avg_loss: 0.362659
|
| 1270 |
+
005695/008040, loss: 0.110282, avg_loss: 0.362424
|
| 1271 |
+
005700/008040, loss: 0.035696, avg_loss: 0.362194
|
| 1272 |
+
005705/008040, loss: 0.210790, avg_loss: 0.362002
|
| 1273 |
+
005710/008040, loss: 0.176916, avg_loss: 0.361813
|
| 1274 |
+
005715/008040, loss: 0.068533, avg_loss: 0.361627
|
| 1275 |
+
005720/008040, loss: 0.059564, avg_loss: 0.361417
|
| 1276 |
+
005725/008040, loss: 0.087551, avg_loss: 0.361274
|
| 1277 |
+
005730/008040, loss: 0.166153, avg_loss: 0.361102
|
| 1278 |
+
005735/008040, loss: 0.123037, avg_loss: 0.360934
|
| 1279 |
+
005740/008040, loss: 0.175274, avg_loss: 0.360734
|
| 1280 |
+
005745/008040, loss: 0.053053, avg_loss: 0.360546
|
| 1281 |
+
005750/008040, loss: 0.193917, avg_loss: 0.360384
|
| 1282 |
+
005755/008040, loss: 0.192751, avg_loss: 0.360214
|
| 1283 |
+
005760/008040, loss: 0.101878, avg_loss: 0.360026
|
| 1284 |
+
005765/008040, loss: 0.085134, avg_loss: 0.359832
|
| 1285 |
+
005770/008040, loss: 0.293009, avg_loss: 0.359642
|
| 1286 |
+
005775/008040, loss: 0.104310, avg_loss: 0.359413
|
| 1287 |
+
005780/008040, loss: 0.011819, avg_loss: 0.359168
|
| 1288 |
+
005785/008040, loss: 0.210015, avg_loss: 0.359009
|
| 1289 |
+
005790/008040, loss: 0.200174, avg_loss: 0.358879
|
| 1290 |
+
005795/008040, loss: 0.141055, avg_loss: 0.358671
|
| 1291 |
+
005800/008040, loss: 0.078129, avg_loss: 0.358469
|
| 1292 |
+
005805/008040, loss: 0.083557, avg_loss: 0.358299
|
| 1293 |
+
005810/008040, loss: 0.075039, avg_loss: 0.358079
|
| 1294 |
+
005815/008040, loss: 0.080684, avg_loss: 0.357903
|
| 1295 |
+
005820/008040, loss: 0.345792, avg_loss: 0.357738
|
| 1296 |
+
005825/008040, loss: 0.078370, avg_loss: 0.357590
|
| 1297 |
+
005830/008040, loss: 0.199539, avg_loss: 0.357436
|
| 1298 |
+
005835/008040, loss: 0.196801, avg_loss: 0.357251
|
| 1299 |
+
005840/008040, loss: 0.173617, avg_loss: 0.357071
|
| 1300 |
+
005845/008040, loss: 0.056907, avg_loss: 0.356874
|
| 1301 |
+
005850/008040, loss: 0.165107, avg_loss: 0.356683
|
| 1302 |
+
005855/008040, loss: 0.100072, avg_loss: 0.356521
|
| 1303 |
+
005860/008040, loss: 0.178491, avg_loss: 0.356324
|
| 1304 |
+
005865/008040, loss: 0.212101, avg_loss: 0.356138
|
| 1305 |
+
005870/008040, loss: 0.215021, avg_loss: 0.355963
|
| 1306 |
+
005875/008040, loss: 0.273816, avg_loss: 0.355788
|
| 1307 |
+
005880/008040, loss: 0.364194, avg_loss: 0.355641
|
| 1308 |
+
005885/008040, loss: 0.270123, avg_loss: 0.355498
|
| 1309 |
+
005890/008040, loss: 0.047443, avg_loss: 0.355291
|
| 1310 |
+
005895/008040, loss: 0.142198, avg_loss: 0.355145
|
| 1311 |
+
***** Running dev evaluation *****
|
| 1312 |
+
Num examples = 1042
|
| 1313 |
+
Instantaneous batch size per device = 32
|
| 1314 |
+
epoch 21, step 5896/8040: {'matthews_correlation': 0.21551745055261307}
|
| 1315 |
+
005900/008040, loss: 0.191457, avg_loss: 0.354973
|
| 1316 |
+
005905/008040, loss: 0.295734, avg_loss: 0.354803
|
| 1317 |
+
005910/008040, loss: 0.075735, avg_loss: 0.354682
|
| 1318 |
+
005915/008040, loss: 0.142483, avg_loss: 0.354498
|
| 1319 |
+
005920/008040, loss: 0.117506, avg_loss: 0.354280
|
| 1320 |
+
005925/008040, loss: 0.108497, avg_loss: 0.354039
|
| 1321 |
+
005930/008040, loss: 0.023560, avg_loss: 0.353889
|
| 1322 |
+
005935/008040, loss: 0.051859, avg_loss: 0.353655
|
| 1323 |
+
005940/008040, loss: 0.096430, avg_loss: 0.353502
|
| 1324 |
+
005945/008040, loss: 0.168284, avg_loss: 0.353288
|
| 1325 |
+
005950/008040, loss: 0.137047, avg_loss: 0.353141
|
| 1326 |
+
005955/008040, loss: 0.182130, avg_loss: 0.353003
|
| 1327 |
+
005960/008040, loss: 0.052544, avg_loss: 0.352779
|
| 1328 |
+
005965/008040, loss: 0.148201, avg_loss: 0.352614
|
| 1329 |
+
005970/008040, loss: 0.203060, avg_loss: 0.352449
|
| 1330 |
+
005975/008040, loss: 0.152961, avg_loss: 0.352309
|
| 1331 |
+
005980/008040, loss: 0.149886, avg_loss: 0.352082
|
| 1332 |
+
005985/008040, loss: 0.108204, avg_loss: 0.351957
|
| 1333 |
+
005990/008040, loss: 0.102725, avg_loss: 0.351766
|
| 1334 |
+
005995/008040, loss: 0.023260, avg_loss: 0.351590
|
| 1335 |
+
006000/008040, loss: 0.115315, avg_loss: 0.351441
|
| 1336 |
+
006005/008040, loss: 0.074605, avg_loss: 0.351242
|
| 1337 |
+
006010/008040, loss: 0.142932, avg_loss: 0.351052
|
| 1338 |
+
006015/008040, loss: 0.083695, avg_loss: 0.350857
|
| 1339 |
+
006020/008040, loss: 0.043695, avg_loss: 0.350694
|
| 1340 |
+
006025/008040, loss: 0.099229, avg_loss: 0.350506
|
| 1341 |
+
006030/008040, loss: 0.024634, avg_loss: 0.350314
|
| 1342 |
+
006035/008040, loss: 0.213198, avg_loss: 0.350172
|
| 1343 |
+
006040/008040, loss: 0.090062, avg_loss: 0.349989
|
| 1344 |
+
006045/008040, loss: 0.165650, avg_loss: 0.349814
|
| 1345 |
+
006050/008040, loss: 0.375118, avg_loss: 0.349689
|
| 1346 |
+
006055/008040, loss: 0.092574, avg_loss: 0.349502
|
| 1347 |
+
006060/008040, loss: 0.076881, avg_loss: 0.349333
|
| 1348 |
+
006065/008040, loss: 0.215473, avg_loss: 0.349175
|
| 1349 |
+
006070/008040, loss: 0.047726, avg_loss: 0.349023
|
| 1350 |
+
006075/008040, loss: 0.275449, avg_loss: 0.348853
|
| 1351 |
+
006080/008040, loss: 0.091764, avg_loss: 0.348649
|
| 1352 |
+
006085/008040, loss: 0.158617, avg_loss: 0.348518
|
| 1353 |
+
006090/008040, loss: 0.398433, avg_loss: 0.348350
|
| 1354 |
+
006095/008040, loss: 0.249465, avg_loss: 0.348170
|
| 1355 |
+
006100/008040, loss: 0.230916, avg_loss: 0.348021
|
| 1356 |
+
006105/008040, loss: 0.138895, avg_loss: 0.347855
|
| 1357 |
+
006110/008040, loss: 0.023905, avg_loss: 0.347659
|
| 1358 |
+
006115/008040, loss: 0.183222, avg_loss: 0.347486
|
| 1359 |
+
006120/008040, loss: 0.149845, avg_loss: 0.347367
|
| 1360 |
+
006125/008040, loss: 0.120646, avg_loss: 0.347237
|
| 1361 |
+
006130/008040, loss: 0.232747, avg_loss: 0.347078
|
| 1362 |
+
006135/008040, loss: 0.086326, avg_loss: 0.346889
|
| 1363 |
+
006140/008040, loss: 0.044021, avg_loss: 0.346692
|
| 1364 |
+
006145/008040, loss: 0.173458, avg_loss: 0.346579
|
| 1365 |
+
006150/008040, loss: 0.110168, avg_loss: 0.346419
|
| 1366 |
+
006155/008040, loss: 0.041496, avg_loss: 0.346239
|
| 1367 |
+
006160/008040, loss: 0.048964, avg_loss: 0.346022
|
| 1368 |
+
***** Running dev evaluation *****
|
| 1369 |
+
Num examples = 1042
|
| 1370 |
+
Instantaneous batch size per device = 32
|
| 1371 |
+
epoch 22, step 6164/8040: {'matthews_correlation': 0.20483291444361929}
|
| 1372 |
+
006165/008040, loss: 0.256147, avg_loss: 0.345885
|
| 1373 |
+
006170/008040, loss: 0.100646, avg_loss: 0.345729
|
| 1374 |
+
006175/008040, loss: 0.135744, avg_loss: 0.345560
|
| 1375 |
+
006180/008040, loss: 0.070830, avg_loss: 0.345336
|
| 1376 |
+
006185/008040, loss: 0.183400, avg_loss: 0.345210
|
| 1377 |
+
006190/008040, loss: 0.171377, avg_loss: 0.345125
|
| 1378 |
+
006195/008040, loss: 0.104681, avg_loss: 0.344985
|
| 1379 |
+
006200/008040, loss: 0.047664, avg_loss: 0.344778
|
| 1380 |
+
006205/008040, loss: 0.132229, avg_loss: 0.344638
|
| 1381 |
+
006210/008040, loss: 0.212232, avg_loss: 0.344449
|
| 1382 |
+
006215/008040, loss: 0.037690, avg_loss: 0.344257
|
| 1383 |
+
006220/008040, loss: 0.265332, avg_loss: 0.344097
|
| 1384 |
+
006225/008040, loss: 0.114738, avg_loss: 0.343896
|
| 1385 |
+
006230/008040, loss: 0.075357, avg_loss: 0.343712
|
| 1386 |
+
006235/008040, loss: 0.082196, avg_loss: 0.343519
|
| 1387 |
+
006240/008040, loss: 0.141044, avg_loss: 0.343342
|
| 1388 |
+
006245/008040, loss: 0.061539, avg_loss: 0.343173
|
| 1389 |
+
006250/008040, loss: 0.156940, avg_loss: 0.342981
|
| 1390 |
+
006255/008040, loss: 0.074917, avg_loss: 0.342844
|
| 1391 |
+
006260/008040, loss: 0.182297, avg_loss: 0.342644
|
| 1392 |
+
006265/008040, loss: 0.188166, avg_loss: 0.342492
|
| 1393 |
+
006270/008040, loss: 0.098817, avg_loss: 0.342310
|
| 1394 |
+
006275/008040, loss: 0.026582, avg_loss: 0.342126
|
| 1395 |
+
006280/008040, loss: 0.093155, avg_loss: 0.341935
|
| 1396 |
+
006285/008040, loss: 0.121849, avg_loss: 0.341775
|
| 1397 |
+
006290/008040, loss: 0.117257, avg_loss: 0.341654
|
| 1398 |
+
006295/008040, loss: 0.470718, avg_loss: 0.341518
|
| 1399 |
+
006300/008040, loss: 0.071532, avg_loss: 0.341330
|
| 1400 |
+
006305/008040, loss: 0.077978, avg_loss: 0.341138
|
| 1401 |
+
006310/008040, loss: 0.280971, avg_loss: 0.341040
|
| 1402 |
+
006315/008040, loss: 0.083832, avg_loss: 0.340839
|
| 1403 |
+
006320/008040, loss: 0.123453, avg_loss: 0.340651
|
| 1404 |
+
006325/008040, loss: 0.293229, avg_loss: 0.340510
|
| 1405 |
+
006330/008040, loss: 0.010351, avg_loss: 0.340313
|
| 1406 |
+
006335/008040, loss: 0.206183, avg_loss: 0.340134
|
| 1407 |
+
006340/008040, loss: 0.157802, avg_loss: 0.340008
|
| 1408 |
+
006345/008040, loss: 0.045268, avg_loss: 0.339836
|
| 1409 |
+
006350/008040, loss: 0.096029, avg_loss: 0.339639
|
| 1410 |
+
006355/008040, loss: 0.094634, avg_loss: 0.339505
|
| 1411 |
+
006360/008040, loss: 0.020495, avg_loss: 0.339314
|
| 1412 |
+
006365/008040, loss: 0.099276, avg_loss: 0.339191
|
| 1413 |
+
006370/008040, loss: 0.040780, avg_loss: 0.339016
|
| 1414 |
+
006375/008040, loss: 0.107320, avg_loss: 0.338862
|
| 1415 |
+
006380/008040, loss: 0.212123, avg_loss: 0.338757
|
| 1416 |
+
006385/008040, loss: 0.228286, avg_loss: 0.338587
|
| 1417 |
+
006390/008040, loss: 0.094378, avg_loss: 0.338381
|
| 1418 |
+
006395/008040, loss: 0.094123, avg_loss: 0.338215
|
| 1419 |
+
006400/008040, loss: 0.153880, avg_loss: 0.338045
|
| 1420 |
+
006405/008040, loss: 0.025904, avg_loss: 0.337846
|
| 1421 |
+
006410/008040, loss: 0.082967, avg_loss: 0.337646
|
| 1422 |
+
006415/008040, loss: 0.040689, avg_loss: 0.337446
|
| 1423 |
+
006420/008040, loss: 0.075779, avg_loss: 0.337272
|
| 1424 |
+
006425/008040, loss: 0.150025, avg_loss: 0.337107
|
| 1425 |
+
006430/008040, loss: 0.141630, avg_loss: 0.336983
|
| 1426 |
+
***** Running dev evaluation *****
|
| 1427 |
+
Num examples = 1042
|
| 1428 |
+
Instantaneous batch size per device = 32
|
| 1429 |
+
epoch 23, step 6432/8040: {'matthews_correlation': 0.24931944187781385}
|
| 1430 |
+
006435/008040, loss: 0.153386, avg_loss: 0.336812
|
| 1431 |
+
006440/008040, loss: 0.222147, avg_loss: 0.336705
|
| 1432 |
+
006445/008040, loss: 0.156677, avg_loss: 0.336554
|
| 1433 |
+
006450/008040, loss: 0.010944, avg_loss: 0.336408
|
| 1434 |
+
006455/008040, loss: 0.038571, avg_loss: 0.336201
|
| 1435 |
+
006460/008040, loss: 0.114449, avg_loss: 0.336032
|
| 1436 |
+
006465/008040, loss: 0.089689, avg_loss: 0.335848
|
| 1437 |
+
006470/008040, loss: 0.329702, avg_loss: 0.335703
|
| 1438 |
+
006475/008040, loss: 0.234976, avg_loss: 0.335533
|
| 1439 |
+
006480/008040, loss: 0.090094, avg_loss: 0.335370
|
| 1440 |
+
006485/008040, loss: 0.090959, avg_loss: 0.335211
|
| 1441 |
+
006490/008040, loss: 0.184350, avg_loss: 0.335043
|
| 1442 |
+
006495/008040, loss: 0.013678, avg_loss: 0.334875
|
| 1443 |
+
006500/008040, loss: 0.066592, avg_loss: 0.334706
|
| 1444 |
+
006505/008040, loss: 0.044437, avg_loss: 0.334594
|
| 1445 |
+
006510/008040, loss: 0.044384, avg_loss: 0.334395
|
| 1446 |
+
006515/008040, loss: 0.141515, avg_loss: 0.334245
|
| 1447 |
+
006520/008040, loss: 0.147063, avg_loss: 0.334082
|
| 1448 |
+
006525/008040, loss: 0.132490, avg_loss: 0.333948
|
| 1449 |
+
006530/008040, loss: 0.115439, avg_loss: 0.333813
|
| 1450 |
+
006535/008040, loss: 0.072417, avg_loss: 0.333611
|
| 1451 |
+
006540/008040, loss: 0.122740, avg_loss: 0.333438
|
| 1452 |
+
006545/008040, loss: 0.153896, avg_loss: 0.333265
|
| 1453 |
+
006550/008040, loss: 0.216064, avg_loss: 0.333112
|
| 1454 |
+
006555/008040, loss: 0.074902, avg_loss: 0.332915
|
| 1455 |
+
006560/008040, loss: 0.229158, avg_loss: 0.332779
|
| 1456 |
+
006565/008040, loss: 0.142630, avg_loss: 0.332649
|
| 1457 |
+
006570/008040, loss: 0.106839, avg_loss: 0.332540
|
| 1458 |
+
006575/008040, loss: 0.016569, avg_loss: 0.332368
|
| 1459 |
+
006580/008040, loss: 0.110829, avg_loss: 0.332200
|
| 1460 |
+
006585/008040, loss: 0.072897, avg_loss: 0.332014
|
| 1461 |
+
006590/008040, loss: 0.148401, avg_loss: 0.331831
|
| 1462 |
+
006595/008040, loss: 0.472177, avg_loss: 0.331741
|
| 1463 |
+
006600/008040, loss: 0.026618, avg_loss: 0.331582
|
| 1464 |
+
006605/008040, loss: 0.192052, avg_loss: 0.331442
|
| 1465 |
+
006610/008040, loss: 0.176737, avg_loss: 0.331313
|
| 1466 |
+
006615/008040, loss: 0.256435, avg_loss: 0.331163
|
| 1467 |
+
006620/008040, loss: 0.112773, avg_loss: 0.330998
|
| 1468 |
+
006625/008040, loss: 0.168097, avg_loss: 0.330900
|
| 1469 |
+
006630/008040, loss: 0.149819, avg_loss: 0.330756
|
| 1470 |
+
006635/008040, loss: 0.035909, avg_loss: 0.330597
|
| 1471 |
+
006640/008040, loss: 0.116389, avg_loss: 0.330473
|
| 1472 |
+
006645/008040, loss: 0.067924, avg_loss: 0.330317
|
| 1473 |
+
006650/008040, loss: 0.029742, avg_loss: 0.330124
|
| 1474 |
+
006655/008040, loss: 0.111944, avg_loss: 0.329962
|
| 1475 |
+
006660/008040, loss: 0.138587, avg_loss: 0.329805
|
| 1476 |
+
006665/008040, loss: 0.214782, avg_loss: 0.329670
|
| 1477 |
+
006670/008040, loss: 0.134468, avg_loss: 0.329520
|
| 1478 |
+
006675/008040, loss: 0.131746, avg_loss: 0.329344
|
| 1479 |
+
006680/008040, loss: 0.085801, avg_loss: 0.329181
|
| 1480 |
+
006685/008040, loss: 0.123189, avg_loss: 0.329020
|
| 1481 |
+
006690/008040, loss: 0.059361, avg_loss: 0.328905
|
| 1482 |
+
006695/008040, loss: 0.131232, avg_loss: 0.328752
|
| 1483 |
+
006700/008040, loss: 0.036765, avg_loss: 0.328543
|
| 1484 |
+
***** Running dev evaluation *****
|
| 1485 |
+
Num examples = 1042
|
| 1486 |
+
Instantaneous batch size per device = 32
|
| 1487 |
+
epoch 24, step 6700/8040: {'matthews_correlation': 0.23227684406858393}
|
| 1488 |
+
006705/008040, loss: 0.030755, avg_loss: 0.328399
|
| 1489 |
+
006710/008040, loss: 0.099883, avg_loss: 0.328219
|
| 1490 |
+
006715/008040, loss: 0.177528, avg_loss: 0.328066
|
| 1491 |
+
006720/008040, loss: 0.272356, avg_loss: 0.327913
|
| 1492 |
+
006725/008040, loss: 0.024312, avg_loss: 0.327721
|
| 1493 |
+
006730/008040, loss: 0.085378, avg_loss: 0.327553
|
| 1494 |
+
006735/008040, loss: 0.042832, avg_loss: 0.327387
|
| 1495 |
+
006740/008040, loss: 0.151696, avg_loss: 0.327222
|
| 1496 |
+
006745/008040, loss: 0.125671, avg_loss: 0.327072
|
| 1497 |
+
006750/008040, loss: 0.065977, avg_loss: 0.326951
|
| 1498 |
+
006755/008040, loss: 0.120872, avg_loss: 0.326772
|
| 1499 |
+
006760/008040, loss: 0.024726, avg_loss: 0.326601
|
| 1500 |
+
006765/008040, loss: 0.058831, avg_loss: 0.326394
|
| 1501 |
+
006770/008040, loss: 0.360663, avg_loss: 0.326308
|
| 1502 |
+
006775/008040, loss: 0.055613, avg_loss: 0.326138
|
| 1503 |
+
006780/008040, loss: 0.068713, avg_loss: 0.325965
|
| 1504 |
+
006785/008040, loss: 0.048198, avg_loss: 0.325800
|
| 1505 |
+
006790/008040, loss: 0.176144, avg_loss: 0.325608
|
| 1506 |
+
006795/008040, loss: 0.084840, avg_loss: 0.325489
|
| 1507 |
+
006800/008040, loss: 0.252102, avg_loss: 0.325354
|
| 1508 |
+
006805/008040, loss: 0.021211, avg_loss: 0.325169
|
| 1509 |
+
006810/008040, loss: 0.206506, avg_loss: 0.325002
|
| 1510 |
+
006815/008040, loss: 0.049258, avg_loss: 0.324803
|
| 1511 |
+
006820/008040, loss: 0.062864, avg_loss: 0.324719
|
| 1512 |
+
006825/008040, loss: 0.211644, avg_loss: 0.324574
|
| 1513 |
+
006830/008040, loss: 0.244467, avg_loss: 0.324422
|
| 1514 |
+
006835/008040, loss: 0.125059, avg_loss: 0.324291
|
| 1515 |
+
006840/008040, loss: 0.126497, avg_loss: 0.324127
|
| 1516 |
+
006845/008040, loss: 0.195687, avg_loss: 0.323964
|
| 1517 |
+
006850/008040, loss: 0.020408, avg_loss: 0.323804
|
| 1518 |
+
006855/008040, loss: 0.039552, avg_loss: 0.323659
|
| 1519 |
+
006860/008040, loss: 0.053010, avg_loss: 0.323502
|
| 1520 |
+
006865/008040, loss: 0.240591, avg_loss: 0.323372
|
| 1521 |
+
006870/008040, loss: 0.067442, avg_loss: 0.323200
|
| 1522 |
+
006875/008040, loss: 0.070645, avg_loss: 0.323066
|
| 1523 |
+
006880/008040, loss: 0.043463, avg_loss: 0.322892
|
| 1524 |
+
006885/008040, loss: 0.065176, avg_loss: 0.322750
|
| 1525 |
+
006890/008040, loss: 0.022665, avg_loss: 0.322594
|
| 1526 |
+
006895/008040, loss: 0.024392, avg_loss: 0.322466
|
| 1527 |
+
006900/008040, loss: 0.065461, avg_loss: 0.322293
|
| 1528 |
+
006905/008040, loss: 0.137459, avg_loss: 0.322161
|
| 1529 |
+
006910/008040, loss: 0.183977, avg_loss: 0.322088
|
| 1530 |
+
006915/008040, loss: 0.082571, avg_loss: 0.321932
|
| 1531 |
+
006920/008040, loss: 0.047769, avg_loss: 0.321773
|
| 1532 |
+
006925/008040, loss: 0.185715, avg_loss: 0.321622
|
| 1533 |
+
006930/008040, loss: 0.120999, avg_loss: 0.321527
|
| 1534 |
+
006935/008040, loss: 0.011521, avg_loss: 0.321368
|
| 1535 |
+
006940/008040, loss: 0.056693, avg_loss: 0.321238
|
| 1536 |
+
006945/008040, loss: 0.312799, avg_loss: 0.321111
|
| 1537 |
+
006950/008040, loss: 0.093848, avg_loss: 0.320974
|
| 1538 |
+
006955/008040, loss: 0.147207, avg_loss: 0.320829
|
| 1539 |
+
006960/008040, loss: 0.244541, avg_loss: 0.320667
|
| 1540 |
+
006965/008040, loss: 0.171516, avg_loss: 0.320572
|
| 1541 |
+
***** Running dev evaluation *****
|
| 1542 |
+
Num examples = 1042
|
| 1543 |
+
Instantaneous batch size per device = 32
|
| 1544 |
+
epoch 25, step 6968/8040: {'matthews_correlation': 0.2203939727085643}
|
| 1545 |
+
006970/008040, loss: 0.090653, avg_loss: 0.320431
|
| 1546 |
+
006975/008040, loss: 0.113621, avg_loss: 0.320278
|
| 1547 |
+
006980/008040, loss: 0.050388, avg_loss: 0.320129
|
| 1548 |
+
006985/008040, loss: 0.195083, avg_loss: 0.319979
|
| 1549 |
+
006990/008040, loss: 0.071205, avg_loss: 0.319799
|
| 1550 |
+
006995/008040, loss: 0.020149, avg_loss: 0.319621
|
| 1551 |
+
007000/008040, loss: 0.059265, avg_loss: 0.319458
|
| 1552 |
+
007005/008040, loss: 0.179539, avg_loss: 0.319284
|
| 1553 |
+
007010/008040, loss: 0.012788, avg_loss: 0.319089
|
| 1554 |
+
007015/008040, loss: 0.034613, avg_loss: 0.318934
|
| 1555 |
+
007020/008040, loss: 0.168260, avg_loss: 0.318806
|
| 1556 |
+
007025/008040, loss: 0.188078, avg_loss: 0.318649
|
| 1557 |
+
007030/008040, loss: 0.011455, avg_loss: 0.318545
|
| 1558 |
+
007035/008040, loss: 0.040954, avg_loss: 0.318369
|
| 1559 |
+
007040/008040, loss: 0.096969, avg_loss: 0.318195
|
| 1560 |
+
007045/008040, loss: 0.071101, avg_loss: 0.318037
|
| 1561 |
+
007050/008040, loss: 0.081423, avg_loss: 0.317851
|
| 1562 |
+
007055/008040, loss: 0.195587, avg_loss: 0.317688
|
| 1563 |
+
007060/008040, loss: 0.032233, avg_loss: 0.317557
|
| 1564 |
+
007065/008040, loss: 0.271995, avg_loss: 0.317422
|
| 1565 |
+
007070/008040, loss: 0.037814, avg_loss: 0.317280
|
| 1566 |
+
007075/008040, loss: 0.068158, avg_loss: 0.317114
|
| 1567 |
+
007080/008040, loss: 0.254120, avg_loss: 0.317008
|
| 1568 |
+
007085/008040, loss: 0.078963, avg_loss: 0.316860
|
| 1569 |
+
007090/008040, loss: 0.024606, avg_loss: 0.316704
|
| 1570 |
+
007095/008040, loss: 0.112365, avg_loss: 0.316543
|
| 1571 |
+
007100/008040, loss: 0.068048, avg_loss: 0.316381
|
| 1572 |
+
007105/008040, loss: 0.029112, avg_loss: 0.316234
|
| 1573 |
+
007110/008040, loss: 0.055819, avg_loss: 0.316104
|
| 1574 |
+
007115/008040, loss: 0.025860, avg_loss: 0.315943
|
| 1575 |
+
007120/008040, loss: 0.094708, avg_loss: 0.315802
|
| 1576 |
+
007125/008040, loss: 0.087746, avg_loss: 0.315670
|
| 1577 |
+
007130/008040, loss: 0.134385, avg_loss: 0.315513
|
| 1578 |
+
007135/008040, loss: 0.135339, avg_loss: 0.315398
|
| 1579 |
+
007140/008040, loss: 0.315180, avg_loss: 0.315259
|
| 1580 |
+
007145/008040, loss: 0.054737, avg_loss: 0.315112
|
| 1581 |
+
007150/008040, loss: 0.405788, avg_loss: 0.315005
|
| 1582 |
+
007155/008040, loss: 0.188528, avg_loss: 0.314918
|
| 1583 |
+
007160/008040, loss: 0.061403, avg_loss: 0.314754
|
| 1584 |
+
007165/008040, loss: 0.077819, avg_loss: 0.314588
|
| 1585 |
+
007170/008040, loss: 0.136640, avg_loss: 0.314467
|
| 1586 |
+
007175/008040, loss: 0.055474, avg_loss: 0.314311
|
| 1587 |
+
007180/008040, loss: 0.023272, avg_loss: 0.314152
|
| 1588 |
+
007185/008040, loss: 0.098981, avg_loss: 0.314002
|
| 1589 |
+
007190/008040, loss: 0.019560, avg_loss: 0.313822
|
| 1590 |
+
007195/008040, loss: 0.348302, avg_loss: 0.313728
|
| 1591 |
+
007200/008040, loss: 0.105960, avg_loss: 0.313588
|
| 1592 |
+
007205/008040, loss: 0.246406, avg_loss: 0.313467
|
| 1593 |
+
007210/008040, loss: 0.074683, avg_loss: 0.313337
|
| 1594 |
+
007215/008040, loss: 0.291595, avg_loss: 0.313251
|
| 1595 |
+
007220/008040, loss: 0.034121, avg_loss: 0.313123
|
| 1596 |
+
007225/008040, loss: 0.074492, avg_loss: 0.313020
|
| 1597 |
+
007230/008040, loss: 0.108867, avg_loss: 0.312875
|
| 1598 |
+
007235/008040, loss: 0.158608, avg_loss: 0.312725
|
| 1599 |
+
***** Running dev evaluation *****
|
| 1600 |
+
Num examples = 1042
|
| 1601 |
+
Instantaneous batch size per device = 32
|
| 1602 |
+
epoch 26, step 7236/8040: {'matthews_correlation': 0.23696373689939254}
|
| 1603 |
+
007240/008040, loss: 0.150083, avg_loss: 0.312579
|
| 1604 |
+
007245/008040, loss: 0.122167, avg_loss: 0.312434
|
| 1605 |
+
007250/008040, loss: 0.083941, avg_loss: 0.312296
|
| 1606 |
+
007255/008040, loss: 0.191218, avg_loss: 0.312159
|
| 1607 |
+
007260/008040, loss: 0.092216, avg_loss: 0.312034
|
| 1608 |
+
007265/008040, loss: 0.081775, avg_loss: 0.311902
|
| 1609 |
+
007270/008040, loss: 0.059207, avg_loss: 0.311740
|
| 1610 |
+
007275/008040, loss: 0.273339, avg_loss: 0.311638
|
| 1611 |
+
007280/008040, loss: 0.271834, avg_loss: 0.311530
|
| 1612 |
+
007285/008040, loss: 0.179015, avg_loss: 0.311387
|
| 1613 |
+
007290/008040, loss: 0.046599, avg_loss: 0.311231
|
| 1614 |
+
007295/008040, loss: 0.051559, avg_loss: 0.311052
|
| 1615 |
+
007300/008040, loss: 0.105356, avg_loss: 0.310882
|
| 1616 |
+
007305/008040, loss: 0.043740, avg_loss: 0.310739
|
| 1617 |
+
007310/008040, loss: 0.080959, avg_loss: 0.310613
|
| 1618 |
+
007315/008040, loss: 0.058263, avg_loss: 0.310452
|
| 1619 |
+
007320/008040, loss: 0.167761, avg_loss: 0.310307
|
| 1620 |
+
007325/008040, loss: 0.128561, avg_loss: 0.310206
|
| 1621 |
+
007330/008040, loss: 0.103146, avg_loss: 0.310052
|
| 1622 |
+
007335/008040, loss: 0.219730, avg_loss: 0.309914
|
| 1623 |
+
007340/008040, loss: 0.055324, avg_loss: 0.309757
|
| 1624 |
+
007345/008040, loss: 0.057465, avg_loss: 0.309610
|
| 1625 |
+
007350/008040, loss: 0.402242, avg_loss: 0.309490
|
| 1626 |
+
007355/008040, loss: 0.352928, avg_loss: 0.309368
|
| 1627 |
+
007360/008040, loss: 0.270440, avg_loss: 0.309237
|
| 1628 |
+
007365/008040, loss: 0.094341, avg_loss: 0.309134
|
| 1629 |
+
007370/008040, loss: 0.325051, avg_loss: 0.309029
|
| 1630 |
+
007375/008040, loss: 0.040529, avg_loss: 0.308867
|
| 1631 |
+
007380/008040, loss: 0.090125, avg_loss: 0.308736
|
| 1632 |
+
007385/008040, loss: 0.053935, avg_loss: 0.308590
|
| 1633 |
+
007390/008040, loss: 0.430134, avg_loss: 0.308482
|
| 1634 |
+
007395/008040, loss: 0.147528, avg_loss: 0.308348
|
| 1635 |
+
007400/008040, loss: 0.121706, avg_loss: 0.308195
|
| 1636 |
+
007405/008040, loss: 0.178868, avg_loss: 0.308087
|
| 1637 |
+
007410/008040, loss: 0.178170, avg_loss: 0.307974
|
| 1638 |
+
007415/008040, loss: 0.023204, avg_loss: 0.307802
|
| 1639 |
+
007420/008040, loss: 0.182678, avg_loss: 0.307687
|
| 1640 |
+
007425/008040, loss: 0.090694, avg_loss: 0.307537
|
| 1641 |
+
007430/008040, loss: 0.028491, avg_loss: 0.307386
|
| 1642 |
+
007435/008040, loss: 0.027389, avg_loss: 0.307237
|
| 1643 |
+
007440/008040, loss: 0.283375, avg_loss: 0.307118
|
| 1644 |
+
007445/008040, loss: 0.036991, avg_loss: 0.306987
|
| 1645 |
+
007450/008040, loss: 0.103909, avg_loss: 0.306909
|
| 1646 |
+
007455/008040, loss: 0.036829, avg_loss: 0.306770
|
| 1647 |
+
007460/008040, loss: 0.052082, avg_loss: 0.306626
|
| 1648 |
+
007465/008040, loss: 0.338257, avg_loss: 0.306543
|
| 1649 |
+
007470/008040, loss: 0.037553, avg_loss: 0.306415
|
| 1650 |
+
007475/008040, loss: 0.031671, avg_loss: 0.306279
|
| 1651 |
+
007480/008040, loss: 0.039051, avg_loss: 0.306135
|
| 1652 |
+
007485/008040, loss: 0.096327, avg_loss: 0.305981
|
| 1653 |
+
007490/008040, loss: 0.198999, avg_loss: 0.305869
|
| 1654 |
+
007495/008040, loss: 0.182575, avg_loss: 0.305730
|
| 1655 |
+
007500/008040, loss: 0.257397, avg_loss: 0.305617
|
| 1656 |
+
***** Running dev evaluation *****
|
| 1657 |
+
Num examples = 1042
|
| 1658 |
+
Instantaneous batch size per device = 32
|
| 1659 |
+
epoch 27, step 7504/8040: {'matthews_correlation': 0.22894062387495076}
|
| 1660 |
+
007505/008040, loss: 0.123742, avg_loss: 0.305505
|
| 1661 |
+
007510/008040, loss: 0.061330, avg_loss: 0.305378
|
| 1662 |
+
007515/008040, loss: 0.016107, avg_loss: 0.305229
|
| 1663 |
+
007520/008040, loss: 0.035412, avg_loss: 0.305081
|
| 1664 |
+
007525/008040, loss: 0.043585, avg_loss: 0.304929
|
| 1665 |
+
007530/008040, loss: 0.019222, avg_loss: 0.304760
|
| 1666 |
+
007535/008040, loss: 0.071748, avg_loss: 0.304596
|
| 1667 |
+
007540/008040, loss: 0.045426, avg_loss: 0.304455
|
| 1668 |
+
007545/008040, loss: 0.020044, avg_loss: 0.304301
|
| 1669 |
+
007550/008040, loss: 0.062295, avg_loss: 0.304174
|
| 1670 |
+
007555/008040, loss: 0.017569, avg_loss: 0.304044
|
| 1671 |
+
007560/008040, loss: 0.180191, avg_loss: 0.303921
|
| 1672 |
+
007565/008040, loss: 0.049493, avg_loss: 0.303774
|
| 1673 |
+
007570/008040, loss: 0.173383, avg_loss: 0.303647
|
| 1674 |
+
007575/008040, loss: 0.193030, avg_loss: 0.303516
|
| 1675 |
+
007580/008040, loss: 0.131787, avg_loss: 0.303401
|
| 1676 |
+
007585/008040, loss: 0.007346, avg_loss: 0.303243
|
| 1677 |
+
007590/008040, loss: 0.155826, avg_loss: 0.303116
|
| 1678 |
+
007595/008040, loss: 0.163601, avg_loss: 0.302978
|
| 1679 |
+
007600/008040, loss: 0.035214, avg_loss: 0.302842
|
| 1680 |
+
007605/008040, loss: 0.029514, avg_loss: 0.302685
|
| 1681 |
+
007610/008040, loss: 0.060444, avg_loss: 0.302529
|
| 1682 |
+
007615/008040, loss: 0.318817, avg_loss: 0.302458
|
| 1683 |
+
007620/008040, loss: 0.080297, avg_loss: 0.302338
|
| 1684 |
+
007625/008040, loss: 0.186482, avg_loss: 0.302204
|
| 1685 |
+
007630/008040, loss: 0.279500, avg_loss: 0.302077
|
| 1686 |
+
007635/008040, loss: 0.200537, avg_loss: 0.301970
|
| 1687 |
+
007640/008040, loss: 0.194061, avg_loss: 0.301894
|
| 1688 |
+
007645/008040, loss: 0.024023, avg_loss: 0.301730
|
| 1689 |
+
007650/008040, loss: 0.033270, avg_loss: 0.301606
|
| 1690 |
+
007655/008040, loss: 0.031241, avg_loss: 0.301439
|
| 1691 |
+
007660/008040, loss: 0.113729, avg_loss: 0.301307
|
| 1692 |
+
007665/008040, loss: 0.041298, avg_loss: 0.301153
|
| 1693 |
+
007670/008040, loss: 0.044293, avg_loss: 0.301029
|
| 1694 |
+
007675/008040, loss: 0.075025, avg_loss: 0.300890
|
| 1695 |
+
007680/008040, loss: 0.125628, avg_loss: 0.300743
|
| 1696 |
+
007685/008040, loss: 0.041533, avg_loss: 0.300654
|
| 1697 |
+
007690/008040, loss: 0.026194, avg_loss: 0.300504
|
| 1698 |
+
007695/008040, loss: 0.045489, avg_loss: 0.300366
|
| 1699 |
+
007700/008040, loss: 0.075324, avg_loss: 0.300245
|
| 1700 |
+
007705/008040, loss: 0.141094, avg_loss: 0.300132
|
| 1701 |
+
007710/008040, loss: 0.036195, avg_loss: 0.299978
|
| 1702 |
+
007715/008040, loss: 0.017604, avg_loss: 0.299834
|
| 1703 |
+
007720/008040, loss: 0.081078, avg_loss: 0.299699
|
| 1704 |
+
007725/008040, loss: 0.021621, avg_loss: 0.299564
|
| 1705 |
+
007730/008040, loss: 0.144577, avg_loss: 0.299449
|
| 1706 |
+
007735/008040, loss: 0.079561, avg_loss: 0.299298
|
| 1707 |
+
007740/008040, loss: 0.040003, avg_loss: 0.299200
|
| 1708 |
+
007745/008040, loss: 0.211169, avg_loss: 0.299085
|
| 1709 |
+
007750/008040, loss: 0.029886, avg_loss: 0.298986
|
| 1710 |
+
007755/008040, loss: 0.138877, avg_loss: 0.298871
|
| 1711 |
+
007760/008040, loss: 0.043360, avg_loss: 0.298753
|
| 1712 |
+
007765/008040, loss: 0.152495, avg_loss: 0.298619
|
| 1713 |
+
007770/008040, loss: 0.060497, avg_loss: 0.298466
|
| 1714 |
+
***** Running dev evaluation *****
|
| 1715 |
+
Num examples = 1042
|
| 1716 |
+
Instantaneous batch size per device = 32
|
| 1717 |
+
epoch 28, step 7772/8040: {'matthews_correlation': 0.23262243281540648}
|
| 1718 |
+
007775/008040, loss: 0.118019, avg_loss: 0.298329
|
| 1719 |
+
007780/008040, loss: 0.028191, avg_loss: 0.298189
|
| 1720 |
+
007785/008040, loss: 0.135941, avg_loss: 0.298065
|
| 1721 |
+
007790/008040, loss: 0.356724, avg_loss: 0.297975
|
| 1722 |
+
007795/008040, loss: 0.026043, avg_loss: 0.297834
|
| 1723 |
+
007800/008040, loss: 0.034852, avg_loss: 0.297691
|
| 1724 |
+
007805/008040, loss: 0.154475, avg_loss: 0.297570
|
| 1725 |
+
007810/008040, loss: 0.037241, avg_loss: 0.297409
|
| 1726 |
+
007815/008040, loss: 0.073721, avg_loss: 0.297300
|
| 1727 |
+
007820/008040, loss: 0.050705, avg_loss: 0.297156
|
| 1728 |
+
007825/008040, loss: 0.105259, avg_loss: 0.297006
|
| 1729 |
+
007830/008040, loss: 0.088514, avg_loss: 0.296893
|
| 1730 |
+
007835/008040, loss: 0.095720, avg_loss: 0.296728
|
| 1731 |
+
007840/008040, loss: 0.055406, avg_loss: 0.296592
|
| 1732 |
+
007845/008040, loss: 0.045261, avg_loss: 0.296458
|
| 1733 |
+
007850/008040, loss: 0.020414, avg_loss: 0.296311
|
| 1734 |
+
007855/008040, loss: 0.060944, avg_loss: 0.296162
|
| 1735 |
+
007860/008040, loss: 0.270967, avg_loss: 0.296048
|
| 1736 |
+
007865/008040, loss: 0.287739, avg_loss: 0.295965
|
| 1737 |
+
007870/008040, loss: 0.151463, avg_loss: 0.295820
|
| 1738 |
+
007875/008040, loss: 0.029142, avg_loss: 0.295686
|
| 1739 |
+
007880/008040, loss: 0.064737, avg_loss: 0.295532
|
| 1740 |
+
007885/008040, loss: 0.077103, avg_loss: 0.295377
|
| 1741 |
+
007890/008040, loss: 0.095900, avg_loss: 0.295271
|
| 1742 |
+
007895/008040, loss: 0.030873, avg_loss: 0.295135
|
| 1743 |
+
007900/008040, loss: 0.041561, avg_loss: 0.295050
|
| 1744 |
+
007905/008040, loss: 0.052920, avg_loss: 0.294904
|
| 1745 |
+
007910/008040, loss: 0.090048, avg_loss: 0.294785
|
| 1746 |
+
007915/008040, loss: 0.392368, avg_loss: 0.294698
|
| 1747 |
+
007920/008040, loss: 0.053813, avg_loss: 0.294579
|
| 1748 |
+
007925/008040, loss: 0.122067, avg_loss: 0.294466
|
| 1749 |
+
007930/008040, loss: 0.108241, avg_loss: 0.294332
|
| 1750 |
+
007935/008040, loss: 0.047713, avg_loss: 0.294191
|
| 1751 |
+
007940/008040, loss: 0.146655, avg_loss: 0.294085
|
| 1752 |
+
007945/008040, loss: 0.041561, avg_loss: 0.293967
|
| 1753 |
+
007950/008040, loss: 0.104168, avg_loss: 0.293888
|
| 1754 |
+
007955/008040, loss: 0.036348, avg_loss: 0.293742
|
| 1755 |
+
007960/008040, loss: 0.185095, avg_loss: 0.293636
|
| 1756 |
+
007965/008040, loss: 0.250438, avg_loss: 0.293519
|
| 1757 |
+
007970/008040, loss: 0.060160, avg_loss: 0.293385
|
| 1758 |
+
007975/008040, loss: 0.030413, avg_loss: 0.293244
|
| 1759 |
+
007980/008040, loss: 0.119690, avg_loss: 0.293127
|
| 1760 |
+
007985/008040, loss: 0.157349, avg_loss: 0.293020
|
| 1761 |
+
007990/008040, loss: 0.076855, avg_loss: 0.292911
|
| 1762 |
+
007995/008040, loss: 0.060965, avg_loss: 0.292781
|
| 1763 |
+
008000/008040, loss: 0.152858, avg_loss: 0.292687
|
| 1764 |
+
008005/008040, loss: 0.257621, avg_loss: 0.292574
|
| 1765 |
+
008010/008040, loss: 0.050748, avg_loss: 0.292464
|
| 1766 |
+
008015/008040, loss: 0.078382, avg_loss: 0.292323
|
| 1767 |
+
008020/008040, loss: 0.218891, avg_loss: 0.292214
|
| 1768 |
+
008025/008040, loss: 0.052570, avg_loss: 0.292084
|
| 1769 |
+
008030/008040, loss: 0.058064, avg_loss: 0.291971
|
| 1770 |
+
008035/008040, loss: 0.017564, avg_loss: 0.291854
|
| 1771 |
+
008040/008040, loss: 0.016145, avg_loss: 0.291706
|
| 1772 |
+
***** Running dev evaluation *****
|
| 1773 |
+
Num examples = 1042
|
| 1774 |
+
Instantaneous batch size per device = 32
|
| 1775 |
+
epoch 29, step 8040/8040: {'matthews_correlation': 0.20156166898476155}
|
| 1776 |
+
***** Running train evaluation *****
|
| 1777 |
+
Num examples = 8551
|
| 1778 |
+
Instantaneous batch size per device = 32
|
| 1779 |
+
Train Dataset Result: {'matthews_correlation': 0.9609897432355321}
|
| 1780 |
+
***** Running dev evaluation *****
|
| 1781 |
+
Num examples = 1042
|
| 1782 |
+
Instantaneous batch size per device = 32
|
| 1783 |
+
Dev Dataset Result: {'matthews_correlation': 0.20156166898476155}
|
| 1784 |
+
Training time 0:05:46
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f28a7d74920b730a1cff525d934075a8a8be71b7ae5780f8108d8f4091784d7c
|
| 3 |
+
size 34299149
|
result.txt
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{'matthews_correlation': 0.0}
|
| 2 |
+
{'matthews_correlation': 0.0}
|
| 3 |
+
{'matthews_correlation': 0.0}
|
| 4 |
+
{'matthews_correlation': 0.0}
|
| 5 |
+
{'matthews_correlation': 0.15816318746785782}
|
| 6 |
+
{'matthews_correlation': 0.1895854925674006}
|
| 7 |
+
{'matthews_correlation': 0.21307686539085852}
|
| 8 |
+
{'matthews_correlation': 0.22254373946847703}
|
| 9 |
+
{'matthews_correlation': 0.22951168079779777}
|
| 10 |
+
{'matthews_correlation': 0.23063296136375847}
|
| 11 |
+
{'matthews_correlation': 0.18813850606847293}
|
| 12 |
+
{'matthews_correlation': 0.20603205189543294}
|
| 13 |
+
{'matthews_correlation': 0.2118432448298745}
|
| 14 |
+
{'matthews_correlation': 0.20261239362380884}
|
| 15 |
+
{'matthews_correlation': 0.22518881045488998}
|
| 16 |
+
{'matthews_correlation': 0.24863648291608131}
|
| 17 |
+
{'matthews_correlation': 0.19984853723708582}
|
| 18 |
+
{'matthews_correlation': 0.23319244596326755}
|
| 19 |
+
{'matthews_correlation': 0.24348660475263997}
|
| 20 |
+
{'matthews_correlation': 0.2545245288314363}
|
| 21 |
+
{'matthews_correlation': 0.20994533418798944}
|
| 22 |
+
{'matthews_correlation': 0.21551745055261307}
|
| 23 |
+
{'matthews_correlation': 0.20483291444361929}
|
| 24 |
+
{'matthews_correlation': 0.24931944187781385}
|
| 25 |
+
{'matthews_correlation': 0.23227684406858393}
|
| 26 |
+
{'matthews_correlation': 0.2203939727085643}
|
| 27 |
+
{'matthews_correlation': 0.23696373689939254}
|
| 28 |
+
{'matthews_correlation': 0.22894062387495076}
|
| 29 |
+
{'matthews_correlation': 0.23262243281540648}
|
| 30 |
+
{'matthews_correlation': 0.20156166898476155}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_basic_tokenize": true, "model_max_length": 512, "name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5", "never_split": null, "special_tokens_map_file": "/home.local/jianwei/.cache/huggingface/transformers/b680d52711d2451bbd6c6b1700365d6d731977c1357ae86bd7227f61145d3be2.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "tokenizer_class": "BertTokenizer"}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|