Add multi-head XLM-R model (intent+NER) for TR
Browse files- .gitattributes +3 -0
- README.md +19 -0
- checkpoint-2500/model.safetensors +3 -0
- checkpoint-2500/optimizer.pt +3 -0
- checkpoint-2500/rng_state.pth +3 -0
- checkpoint-2500/scaler.pt +3 -0
- checkpoint-2500/scheduler.pt +3 -0
- checkpoint-2500/sentencepiece.bpe.model +3 -0
- checkpoint-2500/special_tokens_map.json +15 -0
- checkpoint-2500/tokenizer.json +3 -0
- checkpoint-2500/tokenizer_config.json +55 -0
- checkpoint-2500/trainer_state.json +463 -0
- checkpoint-2500/training_args.bin +3 -0
- checkpoint-3000/model.safetensors +3 -0
- checkpoint-3000/optimizer.pt +3 -0
- checkpoint-3000/rng_state.pth +3 -0
- checkpoint-3000/scaler.pt +3 -0
- checkpoint-3000/scheduler.pt +3 -0
- checkpoint-3000/sentencepiece.bpe.model +3 -0
- checkpoint-3000/special_tokens_map.json +15 -0
- checkpoint-3000/tokenizer.json +3 -0
- checkpoint-3000/tokenizer_config.json +55 -0
- checkpoint-3000/trainer_state.json +547 -0
- checkpoint-3000/training_args.bin +3 -0
- label_schemes.json +193 -0
- model.safetensors +3 -0
- modeling_xlmr_multihead.py +31 -0
- runs/Sep01_10-59-23_a3c5035f483f/events.out.tfevents.1756724394.a3c5035f483f.2331.0 +3 -0
- runs/Sep01_11-21-03_a3c5035f483f/events.out.tfevents.1756725668.a3c5035f483f.2331.1 +3 -0
- sentencepiece.bpe.model +3 -0
- special_tokens_map.json +15 -0
- tokenizer.json +3 -0
- tokenizer_config.json +55 -0
- training_args.bin +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
checkpoint-2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
checkpoint-3000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# XLM-RoBERTa Multi-Head (TR) — Intent + NER
|
| 2 |
+
|
| 3 |
+
- Base: `xlm-roberta-base`
|
| 4 |
+
- Tasks: Intent classification (60 sınıf), NER (BIO)
|
| 5 |
+
- Dosyalar: `pytorch_model.bin`, `config.json`, `tokenizer.*`, `label_schemes.json`, `modeling_xlmr_multihead.py`
|
| 6 |
+
|
| 7 |
+
## Kullanım
|
| 8 |
+
|
| 9 |
+
```python
|
| 10 |
+
from transformers import AutoTokenizer
|
| 11 |
+
from modeling_xlmr_multihead import XLMRMultiHead
|
| 12 |
+
import torch, json
|
| 13 |
+
|
| 14 |
+
ckpt = "celalkartoglu/xlmr-multihead-tr"
|
| 15 |
+
|
| 16 |
+
tok = AutoTokenizer.from_pretrained(ckpt)
|
| 17 |
+
model = XLMRMultiHead(n_intent=60, n_ner=111)
|
| 18 |
+
model.load_state_dict(torch.load("pytorch_model.bin", map_location="cpu"))
|
| 19 |
+
model.eval()
|
checkpoint-2500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4cebda0fd8fb43cdd7bfaed3ed802d2d765fa5a6e277ca051d6f4edd4c2acf95
|
| 3 |
+
size 1112724220
|
checkpoint-2500/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3d0c750103a8c782395706d4b04be165e37b41ca2b102a5a41c949cec4dfc6e
|
| 3 |
+
size 2220847627
|
checkpoint-2500/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f132dbe4b36ef2ef5948850ab1d51b605a7a6fe61572f71d627f2cd76c02944
|
| 3 |
+
size 14645
|
checkpoint-2500/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:756748015f02714454fc8b7c1609af89637c927d42c77e08d04e18d38ef0be30
|
| 3 |
+
size 1383
|
checkpoint-2500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a739733499f8f4ac73e337930f1fa842e39bf811ec9e25a9c7583f40a9e61a1
|
| 3 |
+
size 1465
|
checkpoint-2500/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
checkpoint-2500/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
checkpoint-2500/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0091a328b3441d754e481db5a390d7f3b8dabc6016869fd13ba350d23ddc4cd
|
| 3 |
+
size 17082832
|
checkpoint-2500/tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 54 |
+
"unk_token": "<unk>"
|
| 55 |
+
}
|
checkpoint-2500/trainer_state.json
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 2500,
|
| 3 |
+
"best_metric": 0.7024300779458964,
|
| 4 |
+
"best_model_checkpoint": "/content/drive/MyDrive/NLP/Multi-Task/data/massive_tr/xlmr-multihead/checkpoint-2500",
|
| 5 |
+
"epoch": 5.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 2500,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.1,
|
| 14 |
+
"grad_norm": 31.571680068969727,
|
| 15 |
+
"learning_rate": 8.166666666666666e-06,
|
| 16 |
+
"loss": 9.5398,
|
| 17 |
+
"step": 50
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.2,
|
| 21 |
+
"grad_norm": 20.519420623779297,
|
| 22 |
+
"learning_rate": 1.65e-05,
|
| 23 |
+
"loss": 6.6539,
|
| 24 |
+
"step": 100
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.3,
|
| 28 |
+
"grad_norm": 14.259832382202148,
|
| 29 |
+
"learning_rate": 2.483333333333333e-05,
|
| 30 |
+
"loss": 6.1233,
|
| 31 |
+
"step": 150
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.4,
|
| 35 |
+
"grad_norm": 28.014291763305664,
|
| 36 |
+
"learning_rate": 2.979787234042553e-05,
|
| 37 |
+
"loss": 5.6725,
|
| 38 |
+
"step": 200
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.5,
|
| 42 |
+
"grad_norm": 28.722820281982422,
|
| 43 |
+
"learning_rate": 2.926595744680851e-05,
|
| 44 |
+
"loss": 5.3297,
|
| 45 |
+
"step": 250
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.6,
|
| 49 |
+
"grad_norm": 38.051170349121094,
|
| 50 |
+
"learning_rate": 2.873404255319149e-05,
|
| 51 |
+
"loss": 4.5911,
|
| 52 |
+
"step": 300
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.7,
|
| 56 |
+
"grad_norm": 26.39080810546875,
|
| 57 |
+
"learning_rate": 2.820212765957447e-05,
|
| 58 |
+
"loss": 4.017,
|
| 59 |
+
"step": 350
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.8,
|
| 63 |
+
"grad_norm": 18.74272918701172,
|
| 64 |
+
"learning_rate": 2.7670212765957448e-05,
|
| 65 |
+
"loss": 3.6629,
|
| 66 |
+
"step": 400
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.9,
|
| 70 |
+
"grad_norm": 18.32183074951172,
|
| 71 |
+
"learning_rate": 2.7138297872340427e-05,
|
| 72 |
+
"loss": 3.3781,
|
| 73 |
+
"step": 450
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 1.0,
|
| 77 |
+
"grad_norm": 29.03256607055664,
|
| 78 |
+
"learning_rate": 2.6606382978723407e-05,
|
| 79 |
+
"loss": 2.9562,
|
| 80 |
+
"step": 500
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1.0,
|
| 84 |
+
"eval_intent_accuracy": 0.704,
|
| 85 |
+
"eval_intent_macro_f1": 0.48933652636220204,
|
| 86 |
+
"eval_joint_score": 0.5881757105943153,
|
| 87 |
+
"eval_loss": 2.396956443786621,
|
| 88 |
+
"eval_ner_f1": 0.4723514211886305,
|
| 89 |
+
"eval_ner_precision": 0.5163841807909605,
|
| 90 |
+
"eval_ner_recall": 0.43523809523809526,
|
| 91 |
+
"eval_runtime": 0.5889,
|
| 92 |
+
"eval_samples_per_second": 1698.154,
|
| 93 |
+
"eval_steps_per_second": 54.341,
|
| 94 |
+
"step": 500
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 1.1,
|
| 98 |
+
"grad_norm": 40.97825241088867,
|
| 99 |
+
"learning_rate": 2.6074468085106382e-05,
|
| 100 |
+
"loss": 2.701,
|
| 101 |
+
"step": 550
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 1.2,
|
| 105 |
+
"grad_norm": 23.553442001342773,
|
| 106 |
+
"learning_rate": 2.554255319148936e-05,
|
| 107 |
+
"loss": 2.4746,
|
| 108 |
+
"step": 600
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 1.3,
|
| 112 |
+
"grad_norm": 26.10988426208496,
|
| 113 |
+
"learning_rate": 2.501063829787234e-05,
|
| 114 |
+
"loss": 2.3423,
|
| 115 |
+
"step": 650
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 1.4,
|
| 119 |
+
"grad_norm": 19.3520565032959,
|
| 120 |
+
"learning_rate": 2.447872340425532e-05,
|
| 121 |
+
"loss": 2.1157,
|
| 122 |
+
"step": 700
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 1.5,
|
| 126 |
+
"grad_norm": 34.30548858642578,
|
| 127 |
+
"learning_rate": 2.3946808510638296e-05,
|
| 128 |
+
"loss": 1.9679,
|
| 129 |
+
"step": 750
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 1.6,
|
| 133 |
+
"grad_norm": 15.152769088745117,
|
| 134 |
+
"learning_rate": 2.341489361702128e-05,
|
| 135 |
+
"loss": 1.9404,
|
| 136 |
+
"step": 800
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 1.7,
|
| 140 |
+
"grad_norm": 25.86590003967285,
|
| 141 |
+
"learning_rate": 2.2882978723404258e-05,
|
| 142 |
+
"loss": 1.97,
|
| 143 |
+
"step": 850
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 1.8,
|
| 147 |
+
"grad_norm": 18.924379348754883,
|
| 148 |
+
"learning_rate": 2.2351063829787237e-05,
|
| 149 |
+
"loss": 1.8073,
|
| 150 |
+
"step": 900
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 1.9,
|
| 154 |
+
"grad_norm": 20.5709171295166,
|
| 155 |
+
"learning_rate": 2.1819148936170213e-05,
|
| 156 |
+
"loss": 1.7987,
|
| 157 |
+
"step": 950
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 2.0,
|
| 161 |
+
"grad_norm": 31.26434898376465,
|
| 162 |
+
"learning_rate": 2.1287234042553192e-05,
|
| 163 |
+
"loss": 1.7652,
|
| 164 |
+
"step": 1000
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 2.0,
|
| 168 |
+
"eval_intent_accuracy": 0.833,
|
| 169 |
+
"eval_intent_macro_f1": 0.7032350726349212,
|
| 170 |
+
"eval_joint_score": 0.7102992125984251,
|
| 171 |
+
"eval_loss": 1.4518628120422363,
|
| 172 |
+
"eval_ner_f1": 0.5875984251968503,
|
| 173 |
+
"eval_ner_precision": 0.6079429735234216,
|
| 174 |
+
"eval_ner_recall": 0.5685714285714286,
|
| 175 |
+
"eval_runtime": 0.595,
|
| 176 |
+
"eval_samples_per_second": 1680.634,
|
| 177 |
+
"eval_steps_per_second": 53.78,
|
| 178 |
+
"step": 1000
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 2.1,
|
| 182 |
+
"grad_norm": 35.977622985839844,
|
| 183 |
+
"learning_rate": 2.075531914893617e-05,
|
| 184 |
+
"loss": 1.4852,
|
| 185 |
+
"step": 1050
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 2.2,
|
| 189 |
+
"grad_norm": 30.942049026489258,
|
| 190 |
+
"learning_rate": 2.0223404255319147e-05,
|
| 191 |
+
"loss": 1.3909,
|
| 192 |
+
"step": 1100
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 2.3,
|
| 196 |
+
"grad_norm": 29.61802101135254,
|
| 197 |
+
"learning_rate": 1.9691489361702126e-05,
|
| 198 |
+
"loss": 1.4372,
|
| 199 |
+
"step": 1150
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 2.4,
|
| 203 |
+
"grad_norm": 17.332490921020508,
|
| 204 |
+
"learning_rate": 1.915957446808511e-05,
|
| 205 |
+
"loss": 1.4184,
|
| 206 |
+
"step": 1200
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 2.5,
|
| 210 |
+
"grad_norm": 9.733820915222168,
|
| 211 |
+
"learning_rate": 1.8627659574468088e-05,
|
| 212 |
+
"loss": 1.3071,
|
| 213 |
+
"step": 1250
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 2.6,
|
| 217 |
+
"grad_norm": 22.356639862060547,
|
| 218 |
+
"learning_rate": 1.8095744680851064e-05,
|
| 219 |
+
"loss": 1.3381,
|
| 220 |
+
"step": 1300
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 2.7,
|
| 224 |
+
"grad_norm": 26.953872680664062,
|
| 225 |
+
"learning_rate": 1.7563829787234043e-05,
|
| 226 |
+
"loss": 1.347,
|
| 227 |
+
"step": 1350
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 2.8,
|
| 231 |
+
"grad_norm": 39.545013427734375,
|
| 232 |
+
"learning_rate": 1.7031914893617022e-05,
|
| 233 |
+
"loss": 1.2399,
|
| 234 |
+
"step": 1400
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 2.9,
|
| 238 |
+
"grad_norm": 29.881067276000977,
|
| 239 |
+
"learning_rate": 1.65e-05,
|
| 240 |
+
"loss": 1.2018,
|
| 241 |
+
"step": 1450
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 3.0,
|
| 245 |
+
"grad_norm": 34.384517669677734,
|
| 246 |
+
"learning_rate": 1.5968085106382977e-05,
|
| 247 |
+
"loss": 1.2898,
|
| 248 |
+
"step": 1500
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 3.0,
|
| 252 |
+
"eval_intent_accuracy": 0.843,
|
| 253 |
+
"eval_intent_macro_f1": 0.7342243232665543,
|
| 254 |
+
"eval_joint_score": 0.7419320432043204,
|
| 255 |
+
"eval_loss": 1.2403136491775513,
|
| 256 |
+
"eval_ner_f1": 0.6408640864086408,
|
| 257 |
+
"eval_ner_precision": 0.6075085324232082,
|
| 258 |
+
"eval_ner_recall": 0.6780952380952381,
|
| 259 |
+
"eval_runtime": 0.5931,
|
| 260 |
+
"eval_samples_per_second": 1686.094,
|
| 261 |
+
"eval_steps_per_second": 53.955,
|
| 262 |
+
"step": 1500
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 3.1,
|
| 266 |
+
"grad_norm": 40.15689468383789,
|
| 267 |
+
"learning_rate": 1.5436170212765956e-05,
|
| 268 |
+
"loss": 1.0928,
|
| 269 |
+
"step": 1550
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 3.2,
|
| 273 |
+
"grad_norm": 14.882131576538086,
|
| 274 |
+
"learning_rate": 1.4904255319148937e-05,
|
| 275 |
+
"loss": 0.971,
|
| 276 |
+
"step": 1600
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 3.3,
|
| 280 |
+
"grad_norm": 16.319412231445312,
|
| 281 |
+
"learning_rate": 1.4372340425531915e-05,
|
| 282 |
+
"loss": 1.1631,
|
| 283 |
+
"step": 1650
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 3.4,
|
| 287 |
+
"grad_norm": 26.943748474121094,
|
| 288 |
+
"learning_rate": 1.3840425531914896e-05,
|
| 289 |
+
"loss": 1.0528,
|
| 290 |
+
"step": 1700
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 3.5,
|
| 294 |
+
"grad_norm": 19.07010841369629,
|
| 295 |
+
"learning_rate": 1.3308510638297873e-05,
|
| 296 |
+
"loss": 1.0475,
|
| 297 |
+
"step": 1750
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 3.6,
|
| 301 |
+
"grad_norm": 16.704652786254883,
|
| 302 |
+
"learning_rate": 1.277659574468085e-05,
|
| 303 |
+
"loss": 1.0072,
|
| 304 |
+
"step": 1800
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 3.7,
|
| 308 |
+
"grad_norm": 20.118215560913086,
|
| 309 |
+
"learning_rate": 1.224468085106383e-05,
|
| 310 |
+
"loss": 1.0892,
|
| 311 |
+
"step": 1850
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 3.8,
|
| 315 |
+
"grad_norm": 25.932292938232422,
|
| 316 |
+
"learning_rate": 1.1712765957446809e-05,
|
| 317 |
+
"loss": 0.8766,
|
| 318 |
+
"step": 1900
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 3.9,
|
| 322 |
+
"grad_norm": 20.448410034179688,
|
| 323 |
+
"learning_rate": 1.1180851063829788e-05,
|
| 324 |
+
"loss": 0.8948,
|
| 325 |
+
"step": 1950
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 4.0,
|
| 329 |
+
"grad_norm": 13.030745506286621,
|
| 330 |
+
"learning_rate": 1.0648936170212766e-05,
|
| 331 |
+
"loss": 0.9611,
|
| 332 |
+
"step": 2000
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 4.0,
|
| 336 |
+
"eval_intent_accuracy": 0.865,
|
| 337 |
+
"eval_intent_macro_f1": 0.7845036463088115,
|
| 338 |
+
"eval_joint_score": 0.7702941855099215,
|
| 339 |
+
"eval_loss": 1.1503586769104004,
|
| 340 |
+
"eval_ner_f1": 0.675588371019843,
|
| 341 |
+
"eval_ner_precision": 0.6553267681289168,
|
| 342 |
+
"eval_ner_recall": 0.6971428571428572,
|
| 343 |
+
"eval_runtime": 0.5885,
|
| 344 |
+
"eval_samples_per_second": 1699.13,
|
| 345 |
+
"eval_steps_per_second": 54.372,
|
| 346 |
+
"step": 2000
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 4.1,
|
| 350 |
+
"grad_norm": 7.756314754486084,
|
| 351 |
+
"learning_rate": 1.0117021276595745e-05,
|
| 352 |
+
"loss": 0.7831,
|
| 353 |
+
"step": 2050
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 4.2,
|
| 357 |
+
"grad_norm": 15.828548431396484,
|
| 358 |
+
"learning_rate": 9.585106382978724e-06,
|
| 359 |
+
"loss": 0.7917,
|
| 360 |
+
"step": 2100
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 4.3,
|
| 364 |
+
"grad_norm": 17.722307205200195,
|
| 365 |
+
"learning_rate": 9.053191489361703e-06,
|
| 366 |
+
"loss": 0.7958,
|
| 367 |
+
"step": 2150
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 4.4,
|
| 371 |
+
"grad_norm": 31.272441864013672,
|
| 372 |
+
"learning_rate": 8.52127659574468e-06,
|
| 373 |
+
"loss": 0.7537,
|
| 374 |
+
"step": 2200
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 4.5,
|
| 378 |
+
"grad_norm": 37.739131927490234,
|
| 379 |
+
"learning_rate": 7.98936170212766e-06,
|
| 380 |
+
"loss": 0.8002,
|
| 381 |
+
"step": 2250
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 4.6,
|
| 385 |
+
"grad_norm": 29.077699661254883,
|
| 386 |
+
"learning_rate": 7.457446808510638e-06,
|
| 387 |
+
"loss": 0.8878,
|
| 388 |
+
"step": 2300
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 4.7,
|
| 392 |
+
"grad_norm": 43.24278259277344,
|
| 393 |
+
"learning_rate": 6.925531914893618e-06,
|
| 394 |
+
"loss": 0.8453,
|
| 395 |
+
"step": 2350
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 4.8,
|
| 399 |
+
"grad_norm": 94.07080841064453,
|
| 400 |
+
"learning_rate": 6.393617021276596e-06,
|
| 401 |
+
"loss": 0.7931,
|
| 402 |
+
"step": 2400
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 4.9,
|
| 406 |
+
"grad_norm": 20.592226028442383,
|
| 407 |
+
"learning_rate": 5.861702127659575e-06,
|
| 408 |
+
"loss": 0.7832,
|
| 409 |
+
"step": 2450
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 5.0,
|
| 413 |
+
"grad_norm": 36.34307098388672,
|
| 414 |
+
"learning_rate": 5.3297872340425535e-06,
|
| 415 |
+
"loss": 0.8036,
|
| 416 |
+
"step": 2500
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 5.0,
|
| 420 |
+
"eval_intent_accuracy": 0.868,
|
| 421 |
+
"eval_intent_macro_f1": 0.8051212440624268,
|
| 422 |
+
"eval_joint_score": 0.7852150389729482,
|
| 423 |
+
"eval_loss": 1.0913232564926147,
|
| 424 |
+
"eval_ner_f1": 0.7024300779458964,
|
| 425 |
+
"eval_ner_precision": 0.6772767462422635,
|
| 426 |
+
"eval_ner_recall": 0.7295238095238096,
|
| 427 |
+
"eval_runtime": 0.5824,
|
| 428 |
+
"eval_samples_per_second": 1717.074,
|
| 429 |
+
"eval_steps_per_second": 54.946,
|
| 430 |
+
"step": 2500
|
| 431 |
+
}
|
| 432 |
+
],
|
| 433 |
+
"logging_steps": 50,
|
| 434 |
+
"max_steps": 3000,
|
| 435 |
+
"num_input_tokens_seen": 0,
|
| 436 |
+
"num_train_epochs": 6,
|
| 437 |
+
"save_steps": 500,
|
| 438 |
+
"stateful_callbacks": {
|
| 439 |
+
"EarlyStoppingCallback": {
|
| 440 |
+
"args": {
|
| 441 |
+
"early_stopping_patience": 2,
|
| 442 |
+
"early_stopping_threshold": 0.0005
|
| 443 |
+
},
|
| 444 |
+
"attributes": {
|
| 445 |
+
"early_stopping_patience_counter": 0
|
| 446 |
+
}
|
| 447 |
+
},
|
| 448 |
+
"TrainerControl": {
|
| 449 |
+
"args": {
|
| 450 |
+
"should_epoch_stop": false,
|
| 451 |
+
"should_evaluate": false,
|
| 452 |
+
"should_log": false,
|
| 453 |
+
"should_save": true,
|
| 454 |
+
"should_training_stop": false
|
| 455 |
+
},
|
| 456 |
+
"attributes": {}
|
| 457 |
+
}
|
| 458 |
+
},
|
| 459 |
+
"total_flos": 0.0,
|
| 460 |
+
"train_batch_size": 16,
|
| 461 |
+
"trial_name": null,
|
| 462 |
+
"trial_params": null
|
| 463 |
+
}
|
checkpoint-2500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f4f62880a212beb81193a9861931b6524a6e34b8db381dd37db521a5f5c4365
|
| 3 |
+
size 5905
|
checkpoint-3000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a8f9aa6c167366934527385354bfc97c0141c5aa07ebd9840679f44af003e05
|
| 3 |
+
size 1112724220
|
checkpoint-3000/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab521bdc935ebe8e3adbfe6efdc13f07767aaae1d887c688f07fc2ffa9bdaee1
|
| 3 |
+
size 2220847627
|
checkpoint-3000/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7fb16fe58d7a2babe8b210fc16add386e796fcbfaa7b1b7c8e812b5b7f5ba51
|
| 3 |
+
size 14645
|
checkpoint-3000/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb991a0a0a68e475b29937e715b9a48479241229b17b1bcd57cfc33b936aaa3b
|
| 3 |
+
size 1383
|
checkpoint-3000/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b4ddfbbe4d25d106204e7026b316dfbe9c8fb4352b2ed1fb5bbb4c4276142d1
|
| 3 |
+
size 1465
|
checkpoint-3000/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
checkpoint-3000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
checkpoint-3000/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0091a328b3441d754e481db5a390d7f3b8dabc6016869fd13ba350d23ddc4cd
|
| 3 |
+
size 17082832
|
checkpoint-3000/tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 54 |
+
"unk_token": "<unk>"
|
| 55 |
+
}
|
checkpoint-3000/trainer_state.json
ADDED
|
@@ -0,0 +1,547 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 3000,
|
| 3 |
+
"best_metric": 0.7091743119266055,
|
| 4 |
+
"best_model_checkpoint": "/content/drive/MyDrive/NLP/Multi-Task/data/massive_tr/xlmr-multihead/checkpoint-3000",
|
| 5 |
+
"epoch": 6.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 3000,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.1,
|
| 14 |
+
"grad_norm": 31.571680068969727,
|
| 15 |
+
"learning_rate": 8.166666666666666e-06,
|
| 16 |
+
"loss": 9.5398,
|
| 17 |
+
"step": 50
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.2,
|
| 21 |
+
"grad_norm": 20.519420623779297,
|
| 22 |
+
"learning_rate": 1.65e-05,
|
| 23 |
+
"loss": 6.6539,
|
| 24 |
+
"step": 100
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.3,
|
| 28 |
+
"grad_norm": 14.259832382202148,
|
| 29 |
+
"learning_rate": 2.483333333333333e-05,
|
| 30 |
+
"loss": 6.1233,
|
| 31 |
+
"step": 150
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.4,
|
| 35 |
+
"grad_norm": 28.014291763305664,
|
| 36 |
+
"learning_rate": 2.979787234042553e-05,
|
| 37 |
+
"loss": 5.6725,
|
| 38 |
+
"step": 200
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.5,
|
| 42 |
+
"grad_norm": 28.722820281982422,
|
| 43 |
+
"learning_rate": 2.926595744680851e-05,
|
| 44 |
+
"loss": 5.3297,
|
| 45 |
+
"step": 250
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.6,
|
| 49 |
+
"grad_norm": 38.051170349121094,
|
| 50 |
+
"learning_rate": 2.873404255319149e-05,
|
| 51 |
+
"loss": 4.5911,
|
| 52 |
+
"step": 300
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.7,
|
| 56 |
+
"grad_norm": 26.39080810546875,
|
| 57 |
+
"learning_rate": 2.820212765957447e-05,
|
| 58 |
+
"loss": 4.017,
|
| 59 |
+
"step": 350
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.8,
|
| 63 |
+
"grad_norm": 18.74272918701172,
|
| 64 |
+
"learning_rate": 2.7670212765957448e-05,
|
| 65 |
+
"loss": 3.6629,
|
| 66 |
+
"step": 400
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.9,
|
| 70 |
+
"grad_norm": 18.32183074951172,
|
| 71 |
+
"learning_rate": 2.7138297872340427e-05,
|
| 72 |
+
"loss": 3.3781,
|
| 73 |
+
"step": 450
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 1.0,
|
| 77 |
+
"grad_norm": 29.03256607055664,
|
| 78 |
+
"learning_rate": 2.6606382978723407e-05,
|
| 79 |
+
"loss": 2.9562,
|
| 80 |
+
"step": 500
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 1.0,
|
| 84 |
+
"eval_intent_accuracy": 0.704,
|
| 85 |
+
"eval_intent_macro_f1": 0.48933652636220204,
|
| 86 |
+
"eval_joint_score": 0.5881757105943153,
|
| 87 |
+
"eval_loss": 2.396956443786621,
|
| 88 |
+
"eval_ner_f1": 0.4723514211886305,
|
| 89 |
+
"eval_ner_precision": 0.5163841807909605,
|
| 90 |
+
"eval_ner_recall": 0.43523809523809526,
|
| 91 |
+
"eval_runtime": 0.5889,
|
| 92 |
+
"eval_samples_per_second": 1698.154,
|
| 93 |
+
"eval_steps_per_second": 54.341,
|
| 94 |
+
"step": 500
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 1.1,
|
| 98 |
+
"grad_norm": 40.97825241088867,
|
| 99 |
+
"learning_rate": 2.6074468085106382e-05,
|
| 100 |
+
"loss": 2.701,
|
| 101 |
+
"step": 550
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 1.2,
|
| 105 |
+
"grad_norm": 23.553442001342773,
|
| 106 |
+
"learning_rate": 2.554255319148936e-05,
|
| 107 |
+
"loss": 2.4746,
|
| 108 |
+
"step": 600
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 1.3,
|
| 112 |
+
"grad_norm": 26.10988426208496,
|
| 113 |
+
"learning_rate": 2.501063829787234e-05,
|
| 114 |
+
"loss": 2.3423,
|
| 115 |
+
"step": 650
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 1.4,
|
| 119 |
+
"grad_norm": 19.3520565032959,
|
| 120 |
+
"learning_rate": 2.447872340425532e-05,
|
| 121 |
+
"loss": 2.1157,
|
| 122 |
+
"step": 700
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 1.5,
|
| 126 |
+
"grad_norm": 34.30548858642578,
|
| 127 |
+
"learning_rate": 2.3946808510638296e-05,
|
| 128 |
+
"loss": 1.9679,
|
| 129 |
+
"step": 750
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 1.6,
|
| 133 |
+
"grad_norm": 15.152769088745117,
|
| 134 |
+
"learning_rate": 2.341489361702128e-05,
|
| 135 |
+
"loss": 1.9404,
|
| 136 |
+
"step": 800
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 1.7,
|
| 140 |
+
"grad_norm": 25.86590003967285,
|
| 141 |
+
"learning_rate": 2.2882978723404258e-05,
|
| 142 |
+
"loss": 1.97,
|
| 143 |
+
"step": 850
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 1.8,
|
| 147 |
+
"grad_norm": 18.924379348754883,
|
| 148 |
+
"learning_rate": 2.2351063829787237e-05,
|
| 149 |
+
"loss": 1.8073,
|
| 150 |
+
"step": 900
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 1.9,
|
| 154 |
+
"grad_norm": 20.5709171295166,
|
| 155 |
+
"learning_rate": 2.1819148936170213e-05,
|
| 156 |
+
"loss": 1.7987,
|
| 157 |
+
"step": 950
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 2.0,
|
| 161 |
+
"grad_norm": 31.26434898376465,
|
| 162 |
+
"learning_rate": 2.1287234042553192e-05,
|
| 163 |
+
"loss": 1.7652,
|
| 164 |
+
"step": 1000
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 2.0,
|
| 168 |
+
"eval_intent_accuracy": 0.833,
|
| 169 |
+
"eval_intent_macro_f1": 0.7032350726349212,
|
| 170 |
+
"eval_joint_score": 0.7102992125984251,
|
| 171 |
+
"eval_loss": 1.4518628120422363,
|
| 172 |
+
"eval_ner_f1": 0.5875984251968503,
|
| 173 |
+
"eval_ner_precision": 0.6079429735234216,
|
| 174 |
+
"eval_ner_recall": 0.5685714285714286,
|
| 175 |
+
"eval_runtime": 0.595,
|
| 176 |
+
"eval_samples_per_second": 1680.634,
|
| 177 |
+
"eval_steps_per_second": 53.78,
|
| 178 |
+
"step": 1000
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 2.1,
|
| 182 |
+
"grad_norm": 35.977622985839844,
|
| 183 |
+
"learning_rate": 2.075531914893617e-05,
|
| 184 |
+
"loss": 1.4852,
|
| 185 |
+
"step": 1050
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 2.2,
|
| 189 |
+
"grad_norm": 30.942049026489258,
|
| 190 |
+
"learning_rate": 2.0223404255319147e-05,
|
| 191 |
+
"loss": 1.3909,
|
| 192 |
+
"step": 1100
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 2.3,
|
| 196 |
+
"grad_norm": 29.61802101135254,
|
| 197 |
+
"learning_rate": 1.9691489361702126e-05,
|
| 198 |
+
"loss": 1.4372,
|
| 199 |
+
"step": 1150
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 2.4,
|
| 203 |
+
"grad_norm": 17.332490921020508,
|
| 204 |
+
"learning_rate": 1.915957446808511e-05,
|
| 205 |
+
"loss": 1.4184,
|
| 206 |
+
"step": 1200
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 2.5,
|
| 210 |
+
"grad_norm": 9.733820915222168,
|
| 211 |
+
"learning_rate": 1.8627659574468088e-05,
|
| 212 |
+
"loss": 1.3071,
|
| 213 |
+
"step": 1250
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 2.6,
|
| 217 |
+
"grad_norm": 22.356639862060547,
|
| 218 |
+
"learning_rate": 1.8095744680851064e-05,
|
| 219 |
+
"loss": 1.3381,
|
| 220 |
+
"step": 1300
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 2.7,
|
| 224 |
+
"grad_norm": 26.953872680664062,
|
| 225 |
+
"learning_rate": 1.7563829787234043e-05,
|
| 226 |
+
"loss": 1.347,
|
| 227 |
+
"step": 1350
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 2.8,
|
| 231 |
+
"grad_norm": 39.545013427734375,
|
| 232 |
+
"learning_rate": 1.7031914893617022e-05,
|
| 233 |
+
"loss": 1.2399,
|
| 234 |
+
"step": 1400
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 2.9,
|
| 238 |
+
"grad_norm": 29.881067276000977,
|
| 239 |
+
"learning_rate": 1.65e-05,
|
| 240 |
+
"loss": 1.2018,
|
| 241 |
+
"step": 1450
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 3.0,
|
| 245 |
+
"grad_norm": 34.384517669677734,
|
| 246 |
+
"learning_rate": 1.5968085106382977e-05,
|
| 247 |
+
"loss": 1.2898,
|
| 248 |
+
"step": 1500
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 3.0,
|
| 252 |
+
"eval_intent_accuracy": 0.843,
|
| 253 |
+
"eval_intent_macro_f1": 0.7342243232665543,
|
| 254 |
+
"eval_joint_score": 0.7419320432043204,
|
| 255 |
+
"eval_loss": 1.2403136491775513,
|
| 256 |
+
"eval_ner_f1": 0.6408640864086408,
|
| 257 |
+
"eval_ner_precision": 0.6075085324232082,
|
| 258 |
+
"eval_ner_recall": 0.6780952380952381,
|
| 259 |
+
"eval_runtime": 0.5931,
|
| 260 |
+
"eval_samples_per_second": 1686.094,
|
| 261 |
+
"eval_steps_per_second": 53.955,
|
| 262 |
+
"step": 1500
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 3.1,
|
| 266 |
+
"grad_norm": 40.15689468383789,
|
| 267 |
+
"learning_rate": 1.5436170212765956e-05,
|
| 268 |
+
"loss": 1.0928,
|
| 269 |
+
"step": 1550
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 3.2,
|
| 273 |
+
"grad_norm": 14.882131576538086,
|
| 274 |
+
"learning_rate": 1.4904255319148937e-05,
|
| 275 |
+
"loss": 0.971,
|
| 276 |
+
"step": 1600
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 3.3,
|
| 280 |
+
"grad_norm": 16.319412231445312,
|
| 281 |
+
"learning_rate": 1.4372340425531915e-05,
|
| 282 |
+
"loss": 1.1631,
|
| 283 |
+
"step": 1650
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 3.4,
|
| 287 |
+
"grad_norm": 26.943748474121094,
|
| 288 |
+
"learning_rate": 1.3840425531914896e-05,
|
| 289 |
+
"loss": 1.0528,
|
| 290 |
+
"step": 1700
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 3.5,
|
| 294 |
+
"grad_norm": 19.07010841369629,
|
| 295 |
+
"learning_rate": 1.3308510638297873e-05,
|
| 296 |
+
"loss": 1.0475,
|
| 297 |
+
"step": 1750
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 3.6,
|
| 301 |
+
"grad_norm": 16.704652786254883,
|
| 302 |
+
"learning_rate": 1.277659574468085e-05,
|
| 303 |
+
"loss": 1.0072,
|
| 304 |
+
"step": 1800
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 3.7,
|
| 308 |
+
"grad_norm": 20.118215560913086,
|
| 309 |
+
"learning_rate": 1.224468085106383e-05,
|
| 310 |
+
"loss": 1.0892,
|
| 311 |
+
"step": 1850
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 3.8,
|
| 315 |
+
"grad_norm": 25.932292938232422,
|
| 316 |
+
"learning_rate": 1.1712765957446809e-05,
|
| 317 |
+
"loss": 0.8766,
|
| 318 |
+
"step": 1900
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 3.9,
|
| 322 |
+
"grad_norm": 20.448410034179688,
|
| 323 |
+
"learning_rate": 1.1180851063829788e-05,
|
| 324 |
+
"loss": 0.8948,
|
| 325 |
+
"step": 1950
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 4.0,
|
| 329 |
+
"grad_norm": 13.030745506286621,
|
| 330 |
+
"learning_rate": 1.0648936170212766e-05,
|
| 331 |
+
"loss": 0.9611,
|
| 332 |
+
"step": 2000
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 4.0,
|
| 336 |
+
"eval_intent_accuracy": 0.865,
|
| 337 |
+
"eval_intent_macro_f1": 0.7845036463088115,
|
| 338 |
+
"eval_joint_score": 0.7702941855099215,
|
| 339 |
+
"eval_loss": 1.1503586769104004,
|
| 340 |
+
"eval_ner_f1": 0.675588371019843,
|
| 341 |
+
"eval_ner_precision": 0.6553267681289168,
|
| 342 |
+
"eval_ner_recall": 0.6971428571428572,
|
| 343 |
+
"eval_runtime": 0.5885,
|
| 344 |
+
"eval_samples_per_second": 1699.13,
|
| 345 |
+
"eval_steps_per_second": 54.372,
|
| 346 |
+
"step": 2000
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 4.1,
|
| 350 |
+
"grad_norm": 7.756314754486084,
|
| 351 |
+
"learning_rate": 1.0117021276595745e-05,
|
| 352 |
+
"loss": 0.7831,
|
| 353 |
+
"step": 2050
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 4.2,
|
| 357 |
+
"grad_norm": 15.828548431396484,
|
| 358 |
+
"learning_rate": 9.585106382978724e-06,
|
| 359 |
+
"loss": 0.7917,
|
| 360 |
+
"step": 2100
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 4.3,
|
| 364 |
+
"grad_norm": 17.722307205200195,
|
| 365 |
+
"learning_rate": 9.053191489361703e-06,
|
| 366 |
+
"loss": 0.7958,
|
| 367 |
+
"step": 2150
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 4.4,
|
| 371 |
+
"grad_norm": 31.272441864013672,
|
| 372 |
+
"learning_rate": 8.52127659574468e-06,
|
| 373 |
+
"loss": 0.7537,
|
| 374 |
+
"step": 2200
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 4.5,
|
| 378 |
+
"grad_norm": 37.739131927490234,
|
| 379 |
+
"learning_rate": 7.98936170212766e-06,
|
| 380 |
+
"loss": 0.8002,
|
| 381 |
+
"step": 2250
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 4.6,
|
| 385 |
+
"grad_norm": 29.077699661254883,
|
| 386 |
+
"learning_rate": 7.457446808510638e-06,
|
| 387 |
+
"loss": 0.8878,
|
| 388 |
+
"step": 2300
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 4.7,
|
| 392 |
+
"grad_norm": 43.24278259277344,
|
| 393 |
+
"learning_rate": 6.925531914893618e-06,
|
| 394 |
+
"loss": 0.8453,
|
| 395 |
+
"step": 2350
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 4.8,
|
| 399 |
+
"grad_norm": 94.07080841064453,
|
| 400 |
+
"learning_rate": 6.393617021276596e-06,
|
| 401 |
+
"loss": 0.7931,
|
| 402 |
+
"step": 2400
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 4.9,
|
| 406 |
+
"grad_norm": 20.592226028442383,
|
| 407 |
+
"learning_rate": 5.861702127659575e-06,
|
| 408 |
+
"loss": 0.7832,
|
| 409 |
+
"step": 2450
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 5.0,
|
| 413 |
+
"grad_norm": 36.34307098388672,
|
| 414 |
+
"learning_rate": 5.3297872340425535e-06,
|
| 415 |
+
"loss": 0.8036,
|
| 416 |
+
"step": 2500
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 5.0,
|
| 420 |
+
"eval_intent_accuracy": 0.868,
|
| 421 |
+
"eval_intent_macro_f1": 0.8051212440624268,
|
| 422 |
+
"eval_joint_score": 0.7852150389729482,
|
| 423 |
+
"eval_loss": 1.0913232564926147,
|
| 424 |
+
"eval_ner_f1": 0.7024300779458964,
|
| 425 |
+
"eval_ner_precision": 0.6772767462422635,
|
| 426 |
+
"eval_ner_recall": 0.7295238095238096,
|
| 427 |
+
"eval_runtime": 0.5824,
|
| 428 |
+
"eval_samples_per_second": 1717.074,
|
| 429 |
+
"eval_steps_per_second": 54.946,
|
| 430 |
+
"step": 2500
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 5.1,
|
| 434 |
+
"grad_norm": 13.311659812927246,
|
| 435 |
+
"learning_rate": 4.797872340425533e-06,
|
| 436 |
+
"loss": 0.6409,
|
| 437 |
+
"step": 2550
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"epoch": 5.2,
|
| 441 |
+
"grad_norm": 17.807374954223633,
|
| 442 |
+
"learning_rate": 4.265957446808511e-06,
|
| 443 |
+
"loss": 0.7401,
|
| 444 |
+
"step": 2600
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"epoch": 5.3,
|
| 448 |
+
"grad_norm": 8.320006370544434,
|
| 449 |
+
"learning_rate": 3.7340425531914894e-06,
|
| 450 |
+
"loss": 0.668,
|
| 451 |
+
"step": 2650
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"epoch": 5.4,
|
| 455 |
+
"grad_norm": 20.279203414916992,
|
| 456 |
+
"learning_rate": 3.202127659574468e-06,
|
| 457 |
+
"loss": 0.6477,
|
| 458 |
+
"step": 2700
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"epoch": 5.5,
|
| 462 |
+
"grad_norm": 23.9965763092041,
|
| 463 |
+
"learning_rate": 2.670212765957447e-06,
|
| 464 |
+
"loss": 0.7239,
|
| 465 |
+
"step": 2750
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"epoch": 5.6,
|
| 469 |
+
"grad_norm": 38.03826904296875,
|
| 470 |
+
"learning_rate": 2.1382978723404258e-06,
|
| 471 |
+
"loss": 0.6816,
|
| 472 |
+
"step": 2800
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"epoch": 5.7,
|
| 476 |
+
"grad_norm": 17.692941665649414,
|
| 477 |
+
"learning_rate": 1.6063829787234043e-06,
|
| 478 |
+
"loss": 0.6778,
|
| 479 |
+
"step": 2850
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"epoch": 5.8,
|
| 483 |
+
"grad_norm": 33.07294464111328,
|
| 484 |
+
"learning_rate": 1.074468085106383e-06,
|
| 485 |
+
"loss": 0.694,
|
| 486 |
+
"step": 2900
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"epoch": 5.9,
|
| 490 |
+
"grad_norm": 17.965429306030273,
|
| 491 |
+
"learning_rate": 5.425531914893617e-07,
|
| 492 |
+
"loss": 0.6421,
|
| 493 |
+
"step": 2950
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 6.0,
|
| 497 |
+
"grad_norm": 15.919111251831055,
|
| 498 |
+
"learning_rate": 1.0638297872340427e-08,
|
| 499 |
+
"loss": 0.6727,
|
| 500 |
+
"step": 3000
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 6.0,
|
| 504 |
+
"eval_intent_accuracy": 0.87,
|
| 505 |
+
"eval_intent_macro_f1": 0.8193369098618751,
|
| 506 |
+
"eval_joint_score": 0.7895871559633028,
|
| 507 |
+
"eval_loss": 1.0895804166793823,
|
| 508 |
+
"eval_ner_f1": 0.7091743119266055,
|
| 509 |
+
"eval_ner_precision": 0.684070796460177,
|
| 510 |
+
"eval_ner_recall": 0.7361904761904762,
|
| 511 |
+
"eval_runtime": 0.6107,
|
| 512 |
+
"eval_samples_per_second": 1637.488,
|
| 513 |
+
"eval_steps_per_second": 52.4,
|
| 514 |
+
"step": 3000
|
| 515 |
+
}
|
| 516 |
+
],
|
| 517 |
+
"logging_steps": 50,
|
| 518 |
+
"max_steps": 3000,
|
| 519 |
+
"num_input_tokens_seen": 0,
|
| 520 |
+
"num_train_epochs": 6,
|
| 521 |
+
"save_steps": 500,
|
| 522 |
+
"stateful_callbacks": {
|
| 523 |
+
"EarlyStoppingCallback": {
|
| 524 |
+
"args": {
|
| 525 |
+
"early_stopping_patience": 2,
|
| 526 |
+
"early_stopping_threshold": 0.0005
|
| 527 |
+
},
|
| 528 |
+
"attributes": {
|
| 529 |
+
"early_stopping_patience_counter": 0
|
| 530 |
+
}
|
| 531 |
+
},
|
| 532 |
+
"TrainerControl": {
|
| 533 |
+
"args": {
|
| 534 |
+
"should_epoch_stop": false,
|
| 535 |
+
"should_evaluate": false,
|
| 536 |
+
"should_log": false,
|
| 537 |
+
"should_save": true,
|
| 538 |
+
"should_training_stop": true
|
| 539 |
+
},
|
| 540 |
+
"attributes": {}
|
| 541 |
+
}
|
| 542 |
+
},
|
| 543 |
+
"total_flos": 0.0,
|
| 544 |
+
"train_batch_size": 16,
|
| 545 |
+
"trial_name": null,
|
| 546 |
+
"trial_params": null
|
| 547 |
+
}
|
checkpoint-3000/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f4f62880a212beb81193a9861931b6524a6e34b8db381dd37db521a5f5c4365
|
| 3 |
+
size 5905
|
label_schemes.json
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"intents": [
|
| 3 |
+
"intent_0",
|
| 4 |
+
"intent_1",
|
| 5 |
+
"intent_10",
|
| 6 |
+
"intent_11",
|
| 7 |
+
"intent_12",
|
| 8 |
+
"intent_13",
|
| 9 |
+
"intent_14",
|
| 10 |
+
"intent_15",
|
| 11 |
+
"intent_16",
|
| 12 |
+
"intent_17",
|
| 13 |
+
"intent_18",
|
| 14 |
+
"intent_19",
|
| 15 |
+
"intent_2",
|
| 16 |
+
"intent_20",
|
| 17 |
+
"intent_21",
|
| 18 |
+
"intent_22",
|
| 19 |
+
"intent_23",
|
| 20 |
+
"intent_24",
|
| 21 |
+
"intent_25",
|
| 22 |
+
"intent_26",
|
| 23 |
+
"intent_27",
|
| 24 |
+
"intent_28",
|
| 25 |
+
"intent_29",
|
| 26 |
+
"intent_3",
|
| 27 |
+
"intent_30",
|
| 28 |
+
"intent_31",
|
| 29 |
+
"intent_32",
|
| 30 |
+
"intent_33",
|
| 31 |
+
"intent_34",
|
| 32 |
+
"intent_35",
|
| 33 |
+
"intent_36",
|
| 34 |
+
"intent_37",
|
| 35 |
+
"intent_38",
|
| 36 |
+
"intent_39",
|
| 37 |
+
"intent_4",
|
| 38 |
+
"intent_40",
|
| 39 |
+
"intent_41",
|
| 40 |
+
"intent_42",
|
| 41 |
+
"intent_43",
|
| 42 |
+
"intent_44",
|
| 43 |
+
"intent_45",
|
| 44 |
+
"intent_46",
|
| 45 |
+
"intent_47",
|
| 46 |
+
"intent_48",
|
| 47 |
+
"intent_49",
|
| 48 |
+
"intent_5",
|
| 49 |
+
"intent_50",
|
| 50 |
+
"intent_51",
|
| 51 |
+
"intent_52",
|
| 52 |
+
"intent_53",
|
| 53 |
+
"intent_54",
|
| 54 |
+
"intent_55",
|
| 55 |
+
"intent_56",
|
| 56 |
+
"intent_57",
|
| 57 |
+
"intent_58",
|
| 58 |
+
"intent_59",
|
| 59 |
+
"intent_6",
|
| 60 |
+
"intent_7",
|
| 61 |
+
"intent_8",
|
| 62 |
+
"intent_9"
|
| 63 |
+
],
|
| 64 |
+
"sentiments": [
|
| 65 |
+
"pozitif",
|
| 66 |
+
"negatif",
|
| 67 |
+
"nötr"
|
| 68 |
+
],
|
| 69 |
+
"dialogue_acts": [
|
| 70 |
+
"soru",
|
| 71 |
+
"istek",
|
| 72 |
+
"teşekkür",
|
| 73 |
+
"selamlama",
|
| 74 |
+
"onay",
|
| 75 |
+
"red",
|
| 76 |
+
"açıklama",
|
| 77 |
+
"cevap",
|
| 78 |
+
"düzeltme"
|
| 79 |
+
],
|
| 80 |
+
"ner_tags": [
|
| 81 |
+
"O",
|
| 82 |
+
"B-ALARM_TYPE",
|
| 83 |
+
"I-ALARM_TYPE",
|
| 84 |
+
"B-APP_NAME",
|
| 85 |
+
"I-APP_NAME",
|
| 86 |
+
"B-ARTIST_NAME",
|
| 87 |
+
"I-ARTIST_NAME",
|
| 88 |
+
"B-AUDIOBOOK_AUTHOR",
|
| 89 |
+
"I-AUDIOBOOK_AUTHOR",
|
| 90 |
+
"B-AUDIOBOOK_NAME",
|
| 91 |
+
"I-AUDIOBOOK_NAME",
|
| 92 |
+
"B-BUSINESS_NAME",
|
| 93 |
+
"I-BUSINESS_NAME",
|
| 94 |
+
"B-BUSINESS_TYPE",
|
| 95 |
+
"I-BUSINESS_TYPE",
|
| 96 |
+
"B-CHANGE_AMOUNT",
|
| 97 |
+
"I-CHANGE_AMOUNT",
|
| 98 |
+
"B-COFFEE_TYPE",
|
| 99 |
+
"I-COFFEE_TYPE",
|
| 100 |
+
"B-COLOR_TYPE",
|
| 101 |
+
"I-COLOR_TYPE",
|
| 102 |
+
"B-COOKING_TYPE",
|
| 103 |
+
"I-COOKING_TYPE",
|
| 104 |
+
"B-CURRENCY_NAME",
|
| 105 |
+
"I-CURRENCY_NAME",
|
| 106 |
+
"B-DATE",
|
| 107 |
+
"I-DATE",
|
| 108 |
+
"B-DEFINITION_WORD",
|
| 109 |
+
"I-DEFINITION_WORD",
|
| 110 |
+
"B-DEVICE_TYPE",
|
| 111 |
+
"I-DEVICE_TYPE",
|
| 112 |
+
"B-DRINK_TYPE",
|
| 113 |
+
"I-DRINK_TYPE",
|
| 114 |
+
"B-EMAIL_ADDRESS",
|
| 115 |
+
"I-EMAIL_ADDRESS",
|
| 116 |
+
"B-EMAIL_FOLDER",
|
| 117 |
+
"I-EMAIL_FOLDER",
|
| 118 |
+
"B-EVENT_NAME",
|
| 119 |
+
"I-EVENT_NAME",
|
| 120 |
+
"B-FOOD_TYPE",
|
| 121 |
+
"I-FOOD_TYPE",
|
| 122 |
+
"B-GAME_NAME",
|
| 123 |
+
"I-GAME_NAME",
|
| 124 |
+
"B-GAME_TYPE",
|
| 125 |
+
"I-GAME_TYPE",
|
| 126 |
+
"B-GENERAL_FREQUENCY",
|
| 127 |
+
"I-GENERAL_FREQUENCY",
|
| 128 |
+
"B-HOUSE_PLACE",
|
| 129 |
+
"I-HOUSE_PLACE",
|
| 130 |
+
"B-INGREDIENT",
|
| 131 |
+
"I-INGREDIENT",
|
| 132 |
+
"B-JOKE_TYPE",
|
| 133 |
+
"I-JOKE_TYPE",
|
| 134 |
+
"B-LIST_NAME",
|
| 135 |
+
"I-LIST_NAME",
|
| 136 |
+
"B-MEAL_TYPE",
|
| 137 |
+
"I-MEAL_TYPE",
|
| 138 |
+
"B-MEDIA_TYPE",
|
| 139 |
+
"I-MEDIA_TYPE",
|
| 140 |
+
"B-MOVIE_NAME",
|
| 141 |
+
"I-MOVIE_NAME",
|
| 142 |
+
"B-MOVIE_TYPE",
|
| 143 |
+
"I-MOVIE_TYPE",
|
| 144 |
+
"B-MUSIC_ALBUM",
|
| 145 |
+
"I-MUSIC_ALBUM",
|
| 146 |
+
"B-MUSIC_DESCRIPTOR",
|
| 147 |
+
"I-MUSIC_DESCRIPTOR",
|
| 148 |
+
"B-MUSIC_GENRE",
|
| 149 |
+
"I-MUSIC_GENRE",
|
| 150 |
+
"B-NEWS_TOPIC",
|
| 151 |
+
"I-NEWS_TOPIC",
|
| 152 |
+
"B-ORDER_TYPE",
|
| 153 |
+
"I-ORDER_TYPE",
|
| 154 |
+
"B-PERSON",
|
| 155 |
+
"I-PERSON",
|
| 156 |
+
"B-PERSONAL_INFO",
|
| 157 |
+
"I-PERSONAL_INFO",
|
| 158 |
+
"B-PLACE_NAME",
|
| 159 |
+
"I-PLACE_NAME",
|
| 160 |
+
"B-PLAYER_SETTING",
|
| 161 |
+
"I-PLAYER_SETTING",
|
| 162 |
+
"B-PLAYLIST_NAME",
|
| 163 |
+
"I-PLAYLIST_NAME",
|
| 164 |
+
"B-PODCAST_DESCRIPTOR",
|
| 165 |
+
"I-PODCAST_DESCRIPTOR",
|
| 166 |
+
"B-PODCAST_NAME",
|
| 167 |
+
"I-PODCAST_NAME",
|
| 168 |
+
"B-RADIO_NAME",
|
| 169 |
+
"I-RADIO_NAME",
|
| 170 |
+
"B-RELATION",
|
| 171 |
+
"I-RELATION",
|
| 172 |
+
"B-SONG_NAME",
|
| 173 |
+
"I-SONG_NAME",
|
| 174 |
+
"B-SPORT_TYPE",
|
| 175 |
+
"I-SPORT_TYPE",
|
| 176 |
+
"B-TIME",
|
| 177 |
+
"I-TIME",
|
| 178 |
+
"B-TIME_ZONE",
|
| 179 |
+
"I-TIME_ZONE",
|
| 180 |
+
"B-TIMEOFDAY",
|
| 181 |
+
"I-TIMEOFDAY",
|
| 182 |
+
"B-TRANSPORT_AGENCY",
|
| 183 |
+
"I-TRANSPORT_AGENCY",
|
| 184 |
+
"B-TRANSPORT_DESCRIPTOR",
|
| 185 |
+
"I-TRANSPORT_DESCRIPTOR",
|
| 186 |
+
"B-TRANSPORT_NAME",
|
| 187 |
+
"I-TRANSPORT_NAME",
|
| 188 |
+
"B-TRANSPORT_TYPE",
|
| 189 |
+
"I-TRANSPORT_TYPE",
|
| 190 |
+
"B-WEATHER_DESCRIPTOR",
|
| 191 |
+
"I-WEATHER_DESCRIPTOR"
|
| 192 |
+
]
|
| 193 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a8f9aa6c167366934527385354bfc97c0141c5aa07ebd9840679f44af003e05
|
| 3 |
+
size 1112724220
|
modeling_xlmr_multihead.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from transformers import AutoModel
|
| 4 |
+
|
| 5 |
+
IGNORE_IDX = -100
|
| 6 |
+
|
| 7 |
+
class XLMRMultiHead(nn.Module):
|
| 8 |
+
def __init__(self, base="xlm-roberta-base", n_intent=0, n_ner=0, dropout=0.1):
|
| 9 |
+
super().__init__()
|
| 10 |
+
self.enc = AutoModel.from_pretrained(base)
|
| 11 |
+
h = self.enc.config.hidden_size
|
| 12 |
+
self.drop = nn.Dropout(dropout)
|
| 13 |
+
self.intent = nn.Linear(h, n_intent)
|
| 14 |
+
self.ner = nn.Linear(h, n_ner)
|
| 15 |
+
self.ce_int = nn.CrossEntropyLoss()
|
| 16 |
+
self.ce_tok = nn.CrossEntropyLoss(ignore_index=IGNORE_IDX)
|
| 17 |
+
|
| 18 |
+
def forward(self, input_ids, attention_mask, labels_intent=None, labels_ner=None):
|
| 19 |
+
out = self.enc(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
|
| 20 |
+
cls = self.drop(out.last_hidden_state[:,0])
|
| 21 |
+
seq = self.drop(out.last_hidden_state)
|
| 22 |
+
|
| 23 |
+
li = self.intent(cls) # [B, n_intent]
|
| 24 |
+
ln = self.ner(seq) # [B, T, n_ner]
|
| 25 |
+
|
| 26 |
+
loss=None
|
| 27 |
+
if labels_intent is not None and labels_ner is not None:
|
| 28 |
+
l_i = self.ce_int(li, labels_intent)
|
| 29 |
+
l_n = self.ce_tok(ln.reshape(-1, ln.size(-1)), labels_ner.reshape(-1))
|
| 30 |
+
loss = 1.0*l_i + 0.8*l_n
|
| 31 |
+
return {"loss": loss, "logits_intent": li, "logits_ner": ln}
|
runs/Sep01_10-59-23_a3c5035f483f/events.out.tfevents.1756724394.a3c5035f483f.2331.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97df4cc4f8bd4a3821ed1e75af425693a017d06c268e7105099dbf8d00d26282
|
| 3 |
+
size 4388
|
runs/Sep01_11-21-03_a3c5035f483f/events.out.tfevents.1756725668.a3c5035f483f.2331.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0027b16012ae2f6b1f4d523cacc88ddc56c66aa277ad1689d26ebd78e95c954a
|
| 3 |
+
size 4390
|
sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3af6aa7f169ffc6462ef897edb214dfc2de1217c2669749e57b6f94c096232f7
|
| 3 |
+
size 17082832
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"250001": {
|
| 36 |
+
"content": "<mask>",
|
| 37 |
+
"lstrip": true,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"bos_token": "<s>",
|
| 45 |
+
"clean_up_tokenization_spaces": false,
|
| 46 |
+
"cls_token": "<s>",
|
| 47 |
+
"eos_token": "</s>",
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "<mask>",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"pad_token": "<pad>",
|
| 52 |
+
"sep_token": "</s>",
|
| 53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 54 |
+
"unk_token": "<unk>"
|
| 55 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f4f62880a212beb81193a9861931b6524a6e34b8db381dd37db521a5f5c4365
|
| 3 |
+
size 5905
|