juanmcristobal commited on
Commit
7a093c4
·
verified ·
1 Parent(s): eb3496e

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ library_name: transformers
4
+ tags:
5
+ - autotrain
6
+ - token-classification
7
+ base_model: answerdotai/ModernBERT-large
8
+ widget:
9
+ - text: "I love AutoTrain"
10
+ datasets:
11
+ - juanmcristobal/ner-ioc-dataset3
12
+ ---
13
+
14
+ # Model Trained Using AutoTrain
15
+
16
+ - Problem type: Token Classification
17
+
18
+ ## Validation Metrics
19
+ loss: 0.080692358314991
20
+
21
+ precision: 0.8956920811279763
22
+
23
+ recall: 0.9126250733540829
24
+
25
+ f1: 0.9040792973909716
26
+
27
+ accuracy: 0.9731636716504077
checkpoint-10605/config.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "answerdotai/ModernBERT-large",
3
+ "_num_labels": 42,
4
+ "architectures": [
5
+ "ModernBertForTokenClassification"
6
+ ],
7
+ "attention_bias": false,
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 50281,
10
+ "classifier_activation": "gelu",
11
+ "classifier_bias": false,
12
+ "classifier_dropout": 0.0,
13
+ "classifier_pooling": "mean",
14
+ "cls_token_id": 50281,
15
+ "decoder_bias": true,
16
+ "deterministic_flash_attn": false,
17
+ "embedding_dropout": 0.0,
18
+ "eos_token_id": 50282,
19
+ "global_attn_every_n_layers": 3,
20
+ "global_rope_theta": 160000.0,
21
+ "gradient_checkpointing": false,
22
+ "hidden_activation": "gelu",
23
+ "hidden_size": 1024,
24
+ "id2label": {
25
+ "0": "B-URL",
26
+ "1": "I-URL",
27
+ "2": "O",
28
+ "3": "B-ORG",
29
+ "4": "B-SERVICE",
30
+ "5": "I-ORG",
31
+ "6": "B-SECTOR",
32
+ "7": "I-SECTOR",
33
+ "8": "B-FILEPATH",
34
+ "9": "I-FILEPATH",
35
+ "10": "I-DOMAIN",
36
+ "11": "B-PLATFORM",
37
+ "12": "I-SERVICE",
38
+ "13": "I-PLATFORM",
39
+ "14": "B-THREAT-ACTOR",
40
+ "15": "I-THREAT-ACTOR",
41
+ "16": "B-PRODUCT",
42
+ "17": "B-MALWARE",
43
+ "18": "I-MALWARE",
44
+ "19": "B-LOC",
45
+ "20": "B-CVE",
46
+ "21": "I-CVE",
47
+ "22": "B-TOOL",
48
+ "23": "I-PRODUCT",
49
+ "24": "B-IPV4",
50
+ "25": "I-IPV4",
51
+ "26": "B-MITRE-TACTIC",
52
+ "27": "I-MITRE-TACTIC",
53
+ "28": "B-DOMAIN",
54
+ "29": "I-TOOL",
55
+ "30": "B-MD5",
56
+ "31": "I-LOC",
57
+ "32": "B-CAMPAIGN",
58
+ "33": "I-CAMPAIGN",
59
+ "34": "B-SHA1",
60
+ "35": "B-SHA256",
61
+ "36": "B-EMAIL",
62
+ "37": "I-EMAIL",
63
+ "38": "B-IPV6",
64
+ "39": "I-IPV6",
65
+ "40": "B-REGISTRY-KEYS",
66
+ "41": "I-REGISTRY-KEYS"
67
+ },
68
+ "initializer_cutoff_factor": 2.0,
69
+ "initializer_range": 0.02,
70
+ "intermediate_size": 2624,
71
+ "label2id": {
72
+ "B-CAMPAIGN": 32,
73
+ "B-CVE": 20,
74
+ "B-DOMAIN": 28,
75
+ "B-EMAIL": 36,
76
+ "B-FILEPATH": 8,
77
+ "B-IPV4": 24,
78
+ "B-IPV6": 38,
79
+ "B-LOC": 19,
80
+ "B-MALWARE": 17,
81
+ "B-MD5": 30,
82
+ "B-MITRE-TACTIC": 26,
83
+ "B-ORG": 3,
84
+ "B-PLATFORM": 11,
85
+ "B-PRODUCT": 16,
86
+ "B-REGISTRY-KEYS": 40,
87
+ "B-SECTOR": 6,
88
+ "B-SERVICE": 4,
89
+ "B-SHA1": 34,
90
+ "B-SHA256": 35,
91
+ "B-THREAT-ACTOR": 14,
92
+ "B-TOOL": 22,
93
+ "B-URL": 0,
94
+ "I-CAMPAIGN": 33,
95
+ "I-CVE": 21,
96
+ "I-DOMAIN": 10,
97
+ "I-EMAIL": 37,
98
+ "I-FILEPATH": 9,
99
+ "I-IPV4": 25,
100
+ "I-IPV6": 39,
101
+ "I-LOC": 31,
102
+ "I-MALWARE": 18,
103
+ "I-MITRE-TACTIC": 27,
104
+ "I-ORG": 5,
105
+ "I-PLATFORM": 13,
106
+ "I-PRODUCT": 23,
107
+ "I-REGISTRY-KEYS": 41,
108
+ "I-SECTOR": 7,
109
+ "I-SERVICE": 12,
110
+ "I-THREAT-ACTOR": 15,
111
+ "I-TOOL": 29,
112
+ "I-URL": 1,
113
+ "O": 2
114
+ },
115
+ "layer_norm_eps": 1e-05,
116
+ "local_attention": 128,
117
+ "local_rope_theta": 10000.0,
118
+ "max_position_embeddings": 8192,
119
+ "mlp_bias": false,
120
+ "mlp_dropout": 0.0,
121
+ "model_type": "modernbert",
122
+ "norm_bias": false,
123
+ "norm_eps": 1e-05,
124
+ "num_attention_heads": 16,
125
+ "num_hidden_layers": 28,
126
+ "pad_token_id": 50283,
127
+ "position_embedding_type": "absolute",
128
+ "reference_compile": true,
129
+ "repad_logits_with_grad": false,
130
+ "sep_token_id": 50282,
131
+ "sparse_pred_ignore_index": -100,
132
+ "sparse_prediction": false,
133
+ "torch_dtype": "float32",
134
+ "transformers_version": "4.48.0",
135
+ "vocab_size": 50368
136
+ }
checkpoint-10605/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52fe43effd7557bea8056246926a2cc952953b1facb01c2c020f0b8fa5e7927f
3
+ size 1583515640
checkpoint-10605/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:355db5828e53dad3fb07dfb9d1e869ab4a33eaa748de5119b7f93eef9127cfb5
3
+ size 3167141114
checkpoint-10605/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95b6047bd8cc6f4cdf7c46dea47edb8e542435510070c6cd1e0a7d9ccf5fd7da
3
+ size 14244
checkpoint-10605/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c39fecfbb20dafe4de628b038dfd9b1c411f754cb08db3057f17717acae14f13
3
+ size 1064
checkpoint-10605/trainer_state.json ADDED
@@ -0,0 +1,3046 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.080692358314991,
3
+ "best_model_checkpoint": "autotrain-sec4/checkpoint-10605",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 10605,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.007072135785007072,
13
+ "grad_norm": 75.34838104248047,
14
+ "learning_rate": 6.504524886877829e-07,
15
+ "loss": 3.7557,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.014144271570014143,
20
+ "grad_norm": 15.420210838317871,
21
+ "learning_rate": 1.3574660633484164e-06,
22
+ "loss": 2.4958,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.021216407355021217,
27
+ "grad_norm": 4.017354965209961,
28
+ "learning_rate": 2.0361990950226245e-06,
29
+ "loss": 1.1167,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.028288543140028287,
34
+ "grad_norm": 3.0722529888153076,
35
+ "learning_rate": 2.743212669683258e-06,
36
+ "loss": 0.7577,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.03536067892503536,
41
+ "grad_norm": 4.105304718017578,
42
+ "learning_rate": 3.450226244343892e-06,
43
+ "loss": 0.6,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.042432814710042434,
48
+ "grad_norm": 2.796954870223999,
49
+ "learning_rate": 4.157239819004525e-06,
50
+ "loss": 0.5213,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.04950495049504951,
55
+ "grad_norm": 2.0742604732513428,
56
+ "learning_rate": 4.864253393665159e-06,
57
+ "loss": 0.4437,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.056577086280056574,
62
+ "grad_norm": 1.6229099035263062,
63
+ "learning_rate": 5.5712669683257925e-06,
64
+ "loss": 0.3827,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.06364922206506365,
69
+ "grad_norm": 2.0883893966674805,
70
+ "learning_rate": 6.278280542986425e-06,
71
+ "loss": 0.3385,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.07072135785007072,
76
+ "grad_norm": 2.5166773796081543,
77
+ "learning_rate": 6.985294117647059e-06,
78
+ "loss": 0.2949,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.07779349363507779,
83
+ "grad_norm": 2.4907472133636475,
84
+ "learning_rate": 7.692307692307694e-06,
85
+ "loss": 0.2776,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.08486562942008487,
90
+ "grad_norm": 2.4089417457580566,
91
+ "learning_rate": 8.399321266968327e-06,
92
+ "loss": 0.2867,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.09193776520509193,
97
+ "grad_norm": 1.0878061056137085,
98
+ "learning_rate": 9.10633484162896e-06,
99
+ "loss": 0.2567,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.09900990099009901,
104
+ "grad_norm": 1.353794813156128,
105
+ "learning_rate": 9.813348416289593e-06,
106
+ "loss": 0.2377,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.10608203677510608,
111
+ "grad_norm": 1.3143301010131836,
112
+ "learning_rate": 1.0520361990950227e-05,
113
+ "loss": 0.2446,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.11315417256011315,
118
+ "grad_norm": 1.1061075925827026,
119
+ "learning_rate": 1.122737556561086e-05,
120
+ "loss": 0.2136,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.12022630834512023,
125
+ "grad_norm": 4.132507801055908,
126
+ "learning_rate": 1.1934389140271494e-05,
127
+ "loss": 0.2089,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.1272984441301273,
132
+ "grad_norm": 2.015286922454834,
133
+ "learning_rate": 1.2641402714932126e-05,
134
+ "loss": 0.206,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.13437057991513437,
139
+ "grad_norm": 1.3202295303344727,
140
+ "learning_rate": 1.3348416289592761e-05,
141
+ "loss": 0.2135,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.14144271570014144,
146
+ "grad_norm": 1.005441665649414,
147
+ "learning_rate": 1.4055429864253395e-05,
148
+ "loss": 0.198,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.1485148514851485,
153
+ "grad_norm": 1.0634604692459106,
154
+ "learning_rate": 1.4762443438914029e-05,
155
+ "loss": 0.1869,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.15558698727015557,
160
+ "grad_norm": 1.5153967142105103,
161
+ "learning_rate": 1.5469457013574662e-05,
162
+ "loss": 0.1938,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.16265912305516267,
167
+ "grad_norm": 1.5015569925308228,
168
+ "learning_rate": 1.6176470588235296e-05,
169
+ "loss": 0.1916,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.16973125884016974,
174
+ "grad_norm": 1.3801521062850952,
175
+ "learning_rate": 1.688348416289593e-05,
176
+ "loss": 0.1859,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.1768033946251768,
181
+ "grad_norm": 1.4514083862304688,
182
+ "learning_rate": 1.7590497737556563e-05,
183
+ "loss": 0.1794,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.18387553041018387,
188
+ "grad_norm": 1.1229180097579956,
189
+ "learning_rate": 1.8297511312217194e-05,
190
+ "loss": 0.181,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.19094766619519093,
195
+ "grad_norm": 0.9584263563156128,
196
+ "learning_rate": 1.9004524886877827e-05,
197
+ "loss": 0.1702,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.19801980198019803,
202
+ "grad_norm": 1.5446968078613281,
203
+ "learning_rate": 1.971153846153846e-05,
204
+ "loss": 0.1655,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.2050919377652051,
209
+ "grad_norm": 1.180188775062561,
210
+ "learning_rate": 2.0418552036199095e-05,
211
+ "loss": 0.1783,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.21216407355021216,
216
+ "grad_norm": 1.4047558307647705,
217
+ "learning_rate": 2.112556561085973e-05,
218
+ "loss": 0.1654,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.21923620933521923,
223
+ "grad_norm": 1.6405079364776611,
224
+ "learning_rate": 2.1832579185520362e-05,
225
+ "loss": 0.1676,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.2263083451202263,
230
+ "grad_norm": 0.799575924873352,
231
+ "learning_rate": 2.2539592760180996e-05,
232
+ "loss": 0.1529,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.2333804809052334,
237
+ "grad_norm": 1.0612698793411255,
238
+ "learning_rate": 2.324660633484163e-05,
239
+ "loss": 0.158,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.24045261669024046,
244
+ "grad_norm": 0.8970231413841248,
245
+ "learning_rate": 2.3953619909502263e-05,
246
+ "loss": 0.1556,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.24752475247524752,
251
+ "grad_norm": 1.1954729557037354,
252
+ "learning_rate": 2.4660633484162897e-05,
253
+ "loss": 0.1512,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.2545968882602546,
258
+ "grad_norm": 2.2156217098236084,
259
+ "learning_rate": 2.536764705882353e-05,
260
+ "loss": 0.1528,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.26166902404526166,
265
+ "grad_norm": 1.4057120084762573,
266
+ "learning_rate": 2.6074660633484164e-05,
267
+ "loss": 0.1562,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.26874115983026875,
272
+ "grad_norm": 1.9392261505126953,
273
+ "learning_rate": 2.6781674208144798e-05,
274
+ "loss": 0.1601,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.2758132956152758,
279
+ "grad_norm": 1.114762306213379,
280
+ "learning_rate": 2.7488687782805435e-05,
281
+ "loss": 0.1616,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.2828854314002829,
286
+ "grad_norm": 0.6488351225852966,
287
+ "learning_rate": 2.8195701357466065e-05,
288
+ "loss": 0.1564,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.28995756718529,
293
+ "grad_norm": 0.7562068700790405,
294
+ "learning_rate": 2.8902714932126696e-05,
295
+ "loss": 0.1447,
296
+ "step": 1025
297
+ },
298
+ {
299
+ "epoch": 0.297029702970297,
300
+ "grad_norm": 1.5005004405975342,
301
+ "learning_rate": 2.9609728506787333e-05,
302
+ "loss": 0.1411,
303
+ "step": 1050
304
+ },
305
+ {
306
+ "epoch": 0.3041018387553041,
307
+ "grad_norm": 0.9700536131858826,
308
+ "learning_rate": 3.0316742081447963e-05,
309
+ "loss": 0.1628,
310
+ "step": 1075
311
+ },
312
+ {
313
+ "epoch": 0.31117397454031115,
314
+ "grad_norm": 0.9135770797729492,
315
+ "learning_rate": 3.10237556561086e-05,
316
+ "loss": 0.1429,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 0.31824611032531824,
321
+ "grad_norm": 0.5135190486907959,
322
+ "learning_rate": 3.1730769230769234e-05,
323
+ "loss": 0.1349,
324
+ "step": 1125
325
+ },
326
+ {
327
+ "epoch": 0.32531824611032534,
328
+ "grad_norm": 0.7440194487571716,
329
+ "learning_rate": 3.243778280542987e-05,
330
+ "loss": 0.1511,
331
+ "step": 1150
332
+ },
333
+ {
334
+ "epoch": 0.3323903818953324,
335
+ "grad_norm": 0.774247407913208,
336
+ "learning_rate": 3.31447963800905e-05,
337
+ "loss": 0.1433,
338
+ "step": 1175
339
+ },
340
+ {
341
+ "epoch": 0.33946251768033947,
342
+ "grad_norm": 0.9245631098747253,
343
+ "learning_rate": 3.3851809954751135e-05,
344
+ "loss": 0.136,
345
+ "step": 1200
346
+ },
347
+ {
348
+ "epoch": 0.3465346534653465,
349
+ "grad_norm": 0.5323517918586731,
350
+ "learning_rate": 3.455882352941177e-05,
351
+ "loss": 0.1402,
352
+ "step": 1225
353
+ },
354
+ {
355
+ "epoch": 0.3536067892503536,
356
+ "grad_norm": 0.5974646210670471,
357
+ "learning_rate": 3.52658371040724e-05,
358
+ "loss": 0.139,
359
+ "step": 1250
360
+ },
361
+ {
362
+ "epoch": 0.3606789250353607,
363
+ "grad_norm": 0.7609033584594727,
364
+ "learning_rate": 3.5972850678733036e-05,
365
+ "loss": 0.1414,
366
+ "step": 1275
367
+ },
368
+ {
369
+ "epoch": 0.36775106082036774,
370
+ "grad_norm": 0.6598523259162903,
371
+ "learning_rate": 3.667986425339366e-05,
372
+ "loss": 0.1279,
373
+ "step": 1300
374
+ },
375
+ {
376
+ "epoch": 0.37482319660537483,
377
+ "grad_norm": 0.646514892578125,
378
+ "learning_rate": 3.73868778280543e-05,
379
+ "loss": 0.1318,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 0.38189533239038187,
384
+ "grad_norm": 0.6801839470863342,
385
+ "learning_rate": 3.809389140271493e-05,
386
+ "loss": 0.135,
387
+ "step": 1350
388
+ },
389
+ {
390
+ "epoch": 0.38896746817538896,
391
+ "grad_norm": 0.43455860018730164,
392
+ "learning_rate": 3.880090497737557e-05,
393
+ "loss": 0.1229,
394
+ "step": 1375
395
+ },
396
+ {
397
+ "epoch": 0.39603960396039606,
398
+ "grad_norm": 0.7492242455482483,
399
+ "learning_rate": 3.95079185520362e-05,
400
+ "loss": 0.1336,
401
+ "step": 1400
402
+ },
403
+ {
404
+ "epoch": 0.4031117397454031,
405
+ "grad_norm": 0.7798008918762207,
406
+ "learning_rate": 4.021493212669684e-05,
407
+ "loss": 0.1278,
408
+ "step": 1425
409
+ },
410
+ {
411
+ "epoch": 0.4101838755304102,
412
+ "grad_norm": 0.5796477794647217,
413
+ "learning_rate": 4.0921945701357465e-05,
414
+ "loss": 0.1231,
415
+ "step": 1450
416
+ },
417
+ {
418
+ "epoch": 0.41725601131541723,
419
+ "grad_norm": 0.8398697376251221,
420
+ "learning_rate": 4.1628959276018105e-05,
421
+ "loss": 0.1305,
422
+ "step": 1475
423
+ },
424
+ {
425
+ "epoch": 0.4243281471004243,
426
+ "grad_norm": 1.1518373489379883,
427
+ "learning_rate": 4.233597285067873e-05,
428
+ "loss": 0.1345,
429
+ "step": 1500
430
+ },
431
+ {
432
+ "epoch": 0.4314002828854314,
433
+ "grad_norm": 0.6339196562767029,
434
+ "learning_rate": 4.304298642533937e-05,
435
+ "loss": 0.1388,
436
+ "step": 1525
437
+ },
438
+ {
439
+ "epoch": 0.43847241867043846,
440
+ "grad_norm": 0.6023873090744019,
441
+ "learning_rate": 4.375e-05,
442
+ "loss": 0.1343,
443
+ "step": 1550
444
+ },
445
+ {
446
+ "epoch": 0.44554455445544555,
447
+ "grad_norm": 0.6470553874969482,
448
+ "learning_rate": 4.445701357466063e-05,
449
+ "loss": 0.1353,
450
+ "step": 1575
451
+ },
452
+ {
453
+ "epoch": 0.4526166902404526,
454
+ "grad_norm": 0.6824638843536377,
455
+ "learning_rate": 4.516402714932127e-05,
456
+ "loss": 0.1233,
457
+ "step": 1600
458
+ },
459
+ {
460
+ "epoch": 0.4596888260254597,
461
+ "grad_norm": 0.7944373488426208,
462
+ "learning_rate": 4.58710407239819e-05,
463
+ "loss": 0.1235,
464
+ "step": 1625
465
+ },
466
+ {
467
+ "epoch": 0.4667609618104668,
468
+ "grad_norm": 0.576715350151062,
469
+ "learning_rate": 4.6578054298642534e-05,
470
+ "loss": 0.1225,
471
+ "step": 1650
472
+ },
473
+ {
474
+ "epoch": 0.4738330975954738,
475
+ "grad_norm": 0.5373973846435547,
476
+ "learning_rate": 4.728506787330317e-05,
477
+ "loss": 0.1379,
478
+ "step": 1675
479
+ },
480
+ {
481
+ "epoch": 0.4809052333804809,
482
+ "grad_norm": 0.6271287798881531,
483
+ "learning_rate": 4.79920814479638e-05,
484
+ "loss": 0.1295,
485
+ "step": 1700
486
+ },
487
+ {
488
+ "epoch": 0.48797736916548795,
489
+ "grad_norm": 0.776996374130249,
490
+ "learning_rate": 4.8699095022624435e-05,
491
+ "loss": 0.1284,
492
+ "step": 1725
493
+ },
494
+ {
495
+ "epoch": 0.49504950495049505,
496
+ "grad_norm": 0.7526723742485046,
497
+ "learning_rate": 4.940610859728507e-05,
498
+ "loss": 0.1215,
499
+ "step": 1750
500
+ },
501
+ {
502
+ "epoch": 0.5021216407355021,
503
+ "grad_norm": 0.5230354070663452,
504
+ "learning_rate": 4.99999921989483e-05,
505
+ "loss": 0.1198,
506
+ "step": 1775
507
+ },
508
+ {
509
+ "epoch": 0.5091937765205092,
510
+ "grad_norm": 0.9118756651878357,
511
+ "learning_rate": 4.999958995831941e-05,
512
+ "loss": 0.134,
513
+ "step": 1800
514
+ },
515
+ {
516
+ "epoch": 0.5162659123055162,
517
+ "grad_norm": 0.7880003452301025,
518
+ "learning_rate": 4.9998578271728684e-05,
519
+ "loss": 0.1271,
520
+ "step": 1825
521
+ },
522
+ {
523
+ "epoch": 0.5233380480905233,
524
+ "grad_norm": 0.47186046838760376,
525
+ "learning_rate": 4.999695716383928e-05,
526
+ "loss": 0.1188,
527
+ "step": 1850
528
+ },
529
+ {
530
+ "epoch": 0.5304101838755304,
531
+ "grad_norm": 0.5088236331939697,
532
+ "learning_rate": 4.9994726674170947e-05,
533
+ "loss": 0.1179,
534
+ "step": 1875
535
+ },
536
+ {
537
+ "epoch": 0.5374823196605375,
538
+ "grad_norm": 0.6230493187904358,
539
+ "learning_rate": 4.999188685709908e-05,
540
+ "loss": 0.1262,
541
+ "step": 1900
542
+ },
543
+ {
544
+ "epoch": 0.5445544554455446,
545
+ "grad_norm": 0.9874520301818848,
546
+ "learning_rate": 4.998843778185343e-05,
547
+ "loss": 0.1244,
548
+ "step": 1925
549
+ },
550
+ {
551
+ "epoch": 0.5516265912305516,
552
+ "grad_norm": 0.5979165434837341,
553
+ "learning_rate": 4.998437953251637e-05,
554
+ "loss": 0.1291,
555
+ "step": 1950
556
+ },
557
+ {
558
+ "epoch": 0.5586987270155587,
559
+ "grad_norm": 0.8219141364097595,
560
+ "learning_rate": 4.997971220802088e-05,
561
+ "loss": 0.1155,
562
+ "step": 1975
563
+ },
564
+ {
565
+ "epoch": 0.5657708628005658,
566
+ "grad_norm": 0.41394418478012085,
567
+ "learning_rate": 4.997443592214809e-05,
568
+ "loss": 0.1198,
569
+ "step": 2000
570
+ },
571
+ {
572
+ "epoch": 0.5728429985855729,
573
+ "grad_norm": 1.253273844718933,
574
+ "learning_rate": 4.996855080352457e-05,
575
+ "loss": 0.1191,
576
+ "step": 2025
577
+ },
578
+ {
579
+ "epoch": 0.57991513437058,
580
+ "grad_norm": 0.9971447587013245,
581
+ "learning_rate": 4.9962056995619135e-05,
582
+ "loss": 0.1328,
583
+ "step": 2050
584
+ },
585
+ {
586
+ "epoch": 0.5869872701555869,
587
+ "grad_norm": 0.49998825788497925,
588
+ "learning_rate": 4.995495465673939e-05,
589
+ "loss": 0.115,
590
+ "step": 2075
591
+ },
592
+ {
593
+ "epoch": 0.594059405940594,
594
+ "grad_norm": 0.9231402277946472,
595
+ "learning_rate": 4.994724396002783e-05,
596
+ "loss": 0.1131,
597
+ "step": 2100
598
+ },
599
+ {
600
+ "epoch": 0.6011315417256011,
601
+ "grad_norm": 0.5812481641769409,
602
+ "learning_rate": 4.9938925093457684e-05,
603
+ "loss": 0.103,
604
+ "step": 2125
605
+ },
606
+ {
607
+ "epoch": 0.6082036775106082,
608
+ "grad_norm": 0.602626383304596,
609
+ "learning_rate": 4.992999825982824e-05,
610
+ "loss": 0.1097,
611
+ "step": 2150
612
+ },
613
+ {
614
+ "epoch": 0.6152758132956153,
615
+ "grad_norm": 0.4413115084171295,
616
+ "learning_rate": 4.9920463676759975e-05,
617
+ "loss": 0.1081,
618
+ "step": 2175
619
+ },
620
+ {
621
+ "epoch": 0.6223479490806223,
622
+ "grad_norm": 0.796340823173523,
623
+ "learning_rate": 4.991032157668924e-05,
624
+ "loss": 0.1134,
625
+ "step": 2200
626
+ },
627
+ {
628
+ "epoch": 0.6294200848656294,
629
+ "grad_norm": 0.8647779822349548,
630
+ "learning_rate": 4.989957220686257e-05,
631
+ "loss": 0.1169,
632
+ "step": 2225
633
+ },
634
+ {
635
+ "epoch": 0.6364922206506365,
636
+ "grad_norm": 0.5049439072608948,
637
+ "learning_rate": 4.988821582933065e-05,
638
+ "loss": 0.1155,
639
+ "step": 2250
640
+ },
641
+ {
642
+ "epoch": 0.6435643564356436,
643
+ "grad_norm": 0.5249194502830505,
644
+ "learning_rate": 4.9876252720941974e-05,
645
+ "loss": 0.1076,
646
+ "step": 2275
647
+ },
648
+ {
649
+ "epoch": 0.6506364922206507,
650
+ "grad_norm": 0.9044466018676758,
651
+ "learning_rate": 4.986368317333603e-05,
652
+ "loss": 0.1171,
653
+ "step": 2300
654
+ },
655
+ {
656
+ "epoch": 0.6577086280056577,
657
+ "grad_norm": 0.6795557141304016,
658
+ "learning_rate": 4.985050749293626e-05,
659
+ "loss": 0.1099,
660
+ "step": 2325
661
+ },
662
+ {
663
+ "epoch": 0.6647807637906648,
664
+ "grad_norm": 0.5548400282859802,
665
+ "learning_rate": 4.983672600094253e-05,
666
+ "loss": 0.1145,
667
+ "step": 2350
668
+ },
669
+ {
670
+ "epoch": 0.6718528995756718,
671
+ "grad_norm": 0.8753097057342529,
672
+ "learning_rate": 4.982233903332335e-05,
673
+ "loss": 0.1086,
674
+ "step": 2375
675
+ },
676
+ {
677
+ "epoch": 0.6789250353606789,
678
+ "grad_norm": 0.7015976309776306,
679
+ "learning_rate": 4.980734694080763e-05,
680
+ "loss": 0.1134,
681
+ "step": 2400
682
+ },
683
+ {
684
+ "epoch": 0.685997171145686,
685
+ "grad_norm": 0.3631691336631775,
686
+ "learning_rate": 4.979175008887619e-05,
687
+ "loss": 0.1102,
688
+ "step": 2425
689
+ },
690
+ {
691
+ "epoch": 0.693069306930693,
692
+ "grad_norm": 0.512153685092926,
693
+ "learning_rate": 4.977554885775278e-05,
694
+ "loss": 0.1135,
695
+ "step": 2450
696
+ },
697
+ {
698
+ "epoch": 0.7001414427157001,
699
+ "grad_norm": 0.62693190574646,
700
+ "learning_rate": 4.975874364239491e-05,
701
+ "loss": 0.1104,
702
+ "step": 2475
703
+ },
704
+ {
705
+ "epoch": 0.7072135785007072,
706
+ "grad_norm": 0.8053261637687683,
707
+ "learning_rate": 4.974133485248409e-05,
708
+ "loss": 0.1092,
709
+ "step": 2500
710
+ },
711
+ {
712
+ "epoch": 0.7142857142857143,
713
+ "grad_norm": 0.8354981541633606,
714
+ "learning_rate": 4.972332291241597e-05,
715
+ "loss": 0.1155,
716
+ "step": 2525
717
+ },
718
+ {
719
+ "epoch": 0.7213578500707214,
720
+ "grad_norm": 1.4328135251998901,
721
+ "learning_rate": 4.97047082612899e-05,
722
+ "loss": 0.1071,
723
+ "step": 2550
724
+ },
725
+ {
726
+ "epoch": 0.7284299858557284,
727
+ "grad_norm": 0.5748372077941895,
728
+ "learning_rate": 4.96854913528983e-05,
729
+ "loss": 0.105,
730
+ "step": 2575
731
+ },
732
+ {
733
+ "epoch": 0.7355021216407355,
734
+ "grad_norm": 0.5140411257743835,
735
+ "learning_rate": 4.966567265571553e-05,
736
+ "loss": 0.1076,
737
+ "step": 2600
738
+ },
739
+ {
740
+ "epoch": 0.7425742574257426,
741
+ "grad_norm": 0.42150813341140747,
742
+ "learning_rate": 4.9645252652886524e-05,
743
+ "loss": 0.1076,
744
+ "step": 2625
745
+ },
746
+ {
747
+ "epoch": 0.7496463932107497,
748
+ "grad_norm": 0.7284517288208008,
749
+ "learning_rate": 4.9624231842214966e-05,
750
+ "loss": 0.1147,
751
+ "step": 2650
752
+ },
753
+ {
754
+ "epoch": 0.7567185289957568,
755
+ "grad_norm": 0.3435399830341339,
756
+ "learning_rate": 4.960261073615119e-05,
757
+ "loss": 0.117,
758
+ "step": 2675
759
+ },
760
+ {
761
+ "epoch": 0.7637906647807637,
762
+ "grad_norm": 0.5726285576820374,
763
+ "learning_rate": 4.9580389861779676e-05,
764
+ "loss": 0.1028,
765
+ "step": 2700
766
+ },
767
+ {
768
+ "epoch": 0.7708628005657708,
769
+ "grad_norm": 0.6155900955200195,
770
+ "learning_rate": 4.955756976080619e-05,
771
+ "loss": 0.1059,
772
+ "step": 2725
773
+ },
774
+ {
775
+ "epoch": 0.7779349363507779,
776
+ "grad_norm": 0.5489698052406311,
777
+ "learning_rate": 4.95341509895446e-05,
778
+ "loss": 0.1085,
779
+ "step": 2750
780
+ },
781
+ {
782
+ "epoch": 0.785007072135785,
783
+ "grad_norm": 0.4485224783420563,
784
+ "learning_rate": 4.951013411890329e-05,
785
+ "loss": 0.1046,
786
+ "step": 2775
787
+ },
788
+ {
789
+ "epoch": 0.7920792079207921,
790
+ "grad_norm": 1.4330599308013916,
791
+ "learning_rate": 4.9485519734371254e-05,
792
+ "loss": 0.1066,
793
+ "step": 2800
794
+ },
795
+ {
796
+ "epoch": 0.7991513437057991,
797
+ "grad_norm": 0.5611838102340698,
798
+ "learning_rate": 4.946030843600382e-05,
799
+ "loss": 0.108,
800
+ "step": 2825
801
+ },
802
+ {
803
+ "epoch": 0.8062234794908062,
804
+ "grad_norm": 0.44172587990760803,
805
+ "learning_rate": 4.943450083840804e-05,
806
+ "loss": 0.1115,
807
+ "step": 2850
808
+ },
809
+ {
810
+ "epoch": 0.8132956152758133,
811
+ "grad_norm": 0.5805100798606873,
812
+ "learning_rate": 4.940809757072767e-05,
813
+ "loss": 0.1017,
814
+ "step": 2875
815
+ },
816
+ {
817
+ "epoch": 0.8203677510608204,
818
+ "grad_norm": 0.42033523321151733,
819
+ "learning_rate": 4.938109927662786e-05,
820
+ "loss": 0.0987,
821
+ "step": 2900
822
+ },
823
+ {
824
+ "epoch": 0.8274398868458275,
825
+ "grad_norm": 0.519852876663208,
826
+ "learning_rate": 4.9353506614279465e-05,
827
+ "loss": 0.1085,
828
+ "step": 2925
829
+ },
830
+ {
831
+ "epoch": 0.8345120226308345,
832
+ "grad_norm": 0.41062799096107483,
833
+ "learning_rate": 4.932532025634298e-05,
834
+ "loss": 0.1122,
835
+ "step": 2950
836
+ },
837
+ {
838
+ "epoch": 0.8415841584158416,
839
+ "grad_norm": 0.4441488981246948,
840
+ "learning_rate": 4.929654088995216e-05,
841
+ "loss": 0.1075,
842
+ "step": 2975
843
+ },
844
+ {
845
+ "epoch": 0.8486562942008486,
846
+ "grad_norm": 0.5983813405036926,
847
+ "learning_rate": 4.926716921669724e-05,
848
+ "loss": 0.105,
849
+ "step": 3000
850
+ },
851
+ {
852
+ "epoch": 0.8557284299858557,
853
+ "grad_norm": 0.4088096022605896,
854
+ "learning_rate": 4.92372059526079e-05,
855
+ "loss": 0.1055,
856
+ "step": 3025
857
+ },
858
+ {
859
+ "epoch": 0.8628005657708628,
860
+ "grad_norm": 0.3722389042377472,
861
+ "learning_rate": 4.92066518281357e-05,
862
+ "loss": 0.1016,
863
+ "step": 3050
864
+ },
865
+ {
866
+ "epoch": 0.8698727015558698,
867
+ "grad_norm": 0.3515225946903229,
868
+ "learning_rate": 4.9175507588136395e-05,
869
+ "loss": 0.1061,
870
+ "step": 3075
871
+ },
872
+ {
873
+ "epoch": 0.8769448373408769,
874
+ "grad_norm": 0.6641696691513062,
875
+ "learning_rate": 4.914377399185167e-05,
876
+ "loss": 0.0976,
877
+ "step": 3100
878
+ },
879
+ {
880
+ "epoch": 0.884016973125884,
881
+ "grad_norm": 1.0824599266052246,
882
+ "learning_rate": 4.911145181289072e-05,
883
+ "loss": 0.0993,
884
+ "step": 3125
885
+ },
886
+ {
887
+ "epoch": 0.8910891089108911,
888
+ "grad_norm": 0.9281112551689148,
889
+ "learning_rate": 4.90785418392113e-05,
890
+ "loss": 0.1049,
891
+ "step": 3150
892
+ },
893
+ {
894
+ "epoch": 0.8981612446958982,
895
+ "grad_norm": 0.6602789163589478,
896
+ "learning_rate": 4.904504487310061e-05,
897
+ "loss": 0.1105,
898
+ "step": 3175
899
+ },
900
+ {
901
+ "epoch": 0.9052333804809052,
902
+ "grad_norm": 0.522657036781311,
903
+ "learning_rate": 4.901096173115567e-05,
904
+ "loss": 0.098,
905
+ "step": 3200
906
+ },
907
+ {
908
+ "epoch": 0.9123055162659123,
909
+ "grad_norm": 0.38893359899520874,
910
+ "learning_rate": 4.897629324426343e-05,
911
+ "loss": 0.0992,
912
+ "step": 3225
913
+ },
914
+ {
915
+ "epoch": 0.9193776520509194,
916
+ "grad_norm": 0.9329886436462402,
917
+ "learning_rate": 4.894104025758054e-05,
918
+ "loss": 0.1048,
919
+ "step": 3250
920
+ },
921
+ {
922
+ "epoch": 0.9264497878359265,
923
+ "grad_norm": 0.4844260811805725,
924
+ "learning_rate": 4.890520363051269e-05,
925
+ "loss": 0.1019,
926
+ "step": 3275
927
+ },
928
+ {
929
+ "epoch": 0.9335219236209336,
930
+ "grad_norm": 0.39830756187438965,
931
+ "learning_rate": 4.886878423669373e-05,
932
+ "loss": 0.0996,
933
+ "step": 3300
934
+ },
935
+ {
936
+ "epoch": 0.9405940594059405,
937
+ "grad_norm": 0.3621814548969269,
938
+ "learning_rate": 4.8831782963964314e-05,
939
+ "loss": 0.0959,
940
+ "step": 3325
941
+ },
942
+ {
943
+ "epoch": 0.9476661951909476,
944
+ "grad_norm": 0.43947598338127136,
945
+ "learning_rate": 4.87942007143503e-05,
946
+ "loss": 0.1028,
947
+ "step": 3350
948
+ },
949
+ {
950
+ "epoch": 0.9547383309759547,
951
+ "grad_norm": 0.4322707951068878,
952
+ "learning_rate": 4.87560384040407e-05,
953
+ "loss": 0.0957,
954
+ "step": 3375
955
+ },
956
+ {
957
+ "epoch": 0.9618104667609618,
958
+ "grad_norm": 0.7152990102767944,
959
+ "learning_rate": 4.8717296963365446e-05,
960
+ "loss": 0.0983,
961
+ "step": 3400
962
+ },
963
+ {
964
+ "epoch": 0.9688826025459689,
965
+ "grad_norm": 0.36375129222869873,
966
+ "learning_rate": 4.8677977336772576e-05,
967
+ "loss": 0.0986,
968
+ "step": 3425
969
+ },
970
+ {
971
+ "epoch": 0.9759547383309759,
972
+ "grad_norm": 0.3541475534439087,
973
+ "learning_rate": 4.8638080482805334e-05,
974
+ "loss": 0.1039,
975
+ "step": 3450
976
+ },
977
+ {
978
+ "epoch": 0.983026874115983,
979
+ "grad_norm": 0.5277055501937866,
980
+ "learning_rate": 4.8597607374078734e-05,
981
+ "loss": 0.1009,
982
+ "step": 3475
983
+ },
984
+ {
985
+ "epoch": 0.9900990099009901,
986
+ "grad_norm": 0.32760316133499146,
987
+ "learning_rate": 4.855655899725587e-05,
988
+ "loss": 0.0967,
989
+ "step": 3500
990
+ },
991
+ {
992
+ "epoch": 0.9971711456859972,
993
+ "grad_norm": 0.4430937170982361,
994
+ "learning_rate": 4.851493635302384e-05,
995
+ "loss": 0.0959,
996
+ "step": 3525
997
+ },
998
+ {
999
+ "epoch": 1.0,
1000
+ "eval_accuracy": 0.9641816937160642,
1001
+ "eval_f1": 0.855812468110255,
1002
+ "eval_loss": 0.10302519798278809,
1003
+ "eval_precision": 0.8459347469557771,
1004
+ "eval_recall": 0.8659235929015533,
1005
+ "eval_runtime": 65.6322,
1006
+ "eval_samples_per_second": 765.966,
1007
+ "eval_steps_per_second": 3.002,
1008
+ "step": 3535
1009
+ },
1010
+ {
1011
+ "epoch": 1.0042432814710043,
1012
+ "grad_norm": 0.5774176120758057,
1013
+ "learning_rate": 4.8472740456069404e-05,
1014
+ "loss": 0.0883,
1015
+ "step": 3550
1016
+ },
1017
+ {
1018
+ "epoch": 1.0113154172560113,
1019
+ "grad_norm": 0.6957634687423706,
1020
+ "learning_rate": 4.842997233505418e-05,
1021
+ "loss": 0.0774,
1022
+ "step": 3575
1023
+ },
1024
+ {
1025
+ "epoch": 1.0183875530410185,
1026
+ "grad_norm": 0.3464818000793457,
1027
+ "learning_rate": 4.838663303258961e-05,
1028
+ "loss": 0.0823,
1029
+ "step": 3600
1030
+ },
1031
+ {
1032
+ "epoch": 1.0254596888260255,
1033
+ "grad_norm": 0.6941576600074768,
1034
+ "learning_rate": 4.8342723605211556e-05,
1035
+ "loss": 0.0826,
1036
+ "step": 3625
1037
+ },
1038
+ {
1039
+ "epoch": 1.0325318246110324,
1040
+ "grad_norm": 0.44520071148872375,
1041
+ "learning_rate": 4.829824512335449e-05,
1042
+ "loss": 0.0867,
1043
+ "step": 3650
1044
+ },
1045
+ {
1046
+ "epoch": 1.0396039603960396,
1047
+ "grad_norm": 0.3814884126186371,
1048
+ "learning_rate": 4.825319867132545e-05,
1049
+ "loss": 0.0869,
1050
+ "step": 3675
1051
+ },
1052
+ {
1053
+ "epoch": 1.0466760961810466,
1054
+ "grad_norm": 0.44684168696403503,
1055
+ "learning_rate": 4.8207585347277574e-05,
1056
+ "loss": 0.0842,
1057
+ "step": 3700
1058
+ },
1059
+ {
1060
+ "epoch": 1.0537482319660538,
1061
+ "grad_norm": 0.7008658051490784,
1062
+ "learning_rate": 4.816140626318334e-05,
1063
+ "loss": 0.0864,
1064
+ "step": 3725
1065
+ },
1066
+ {
1067
+ "epoch": 1.0608203677510608,
1068
+ "grad_norm": 0.45034533739089966,
1069
+ "learning_rate": 4.8114662544807476e-05,
1070
+ "loss": 0.0801,
1071
+ "step": 3750
1072
+ },
1073
+ {
1074
+ "epoch": 1.0678925035360678,
1075
+ "grad_norm": 0.6562340259552002,
1076
+ "learning_rate": 4.8067355331679485e-05,
1077
+ "loss": 0.0814,
1078
+ "step": 3775
1079
+ },
1080
+ {
1081
+ "epoch": 1.074964639321075,
1082
+ "grad_norm": 0.7696852684020996,
1083
+ "learning_rate": 4.801948577706588e-05,
1084
+ "loss": 0.0837,
1085
+ "step": 3800
1086
+ },
1087
+ {
1088
+ "epoch": 1.082036775106082,
1089
+ "grad_norm": 0.38005831837654114,
1090
+ "learning_rate": 4.7971055047942074e-05,
1091
+ "loss": 0.0836,
1092
+ "step": 3825
1093
+ },
1094
+ {
1095
+ "epoch": 1.0891089108910892,
1096
+ "grad_norm": 0.5856048464775085,
1097
+ "learning_rate": 4.792206432496392e-05,
1098
+ "loss": 0.0776,
1099
+ "step": 3850
1100
+ },
1101
+ {
1102
+ "epoch": 1.0961810466760962,
1103
+ "grad_norm": 0.3622016906738281,
1104
+ "learning_rate": 4.787251480243895e-05,
1105
+ "loss": 0.084,
1106
+ "step": 3875
1107
+ },
1108
+ {
1109
+ "epoch": 1.1032531824611032,
1110
+ "grad_norm": 0.3826795816421509,
1111
+ "learning_rate": 4.782240768829722e-05,
1112
+ "loss": 0.0822,
1113
+ "step": 3900
1114
+ },
1115
+ {
1116
+ "epoch": 1.1103253182461104,
1117
+ "grad_norm": 0.4321337938308716,
1118
+ "learning_rate": 4.77717442040619e-05,
1119
+ "loss": 0.0797,
1120
+ "step": 3925
1121
+ },
1122
+ {
1123
+ "epoch": 1.1173974540311173,
1124
+ "grad_norm": 0.3895370364189148,
1125
+ "learning_rate": 4.772052558481949e-05,
1126
+ "loss": 0.0808,
1127
+ "step": 3950
1128
+ },
1129
+ {
1130
+ "epoch": 1.1244695898161245,
1131
+ "grad_norm": 0.3963691294193268,
1132
+ "learning_rate": 4.766875307918969e-05,
1133
+ "loss": 0.0829,
1134
+ "step": 3975
1135
+ },
1136
+ {
1137
+ "epoch": 1.1315417256011315,
1138
+ "grad_norm": 1.1154179573059082,
1139
+ "learning_rate": 4.7616427949294985e-05,
1140
+ "loss": 0.0864,
1141
+ "step": 4000
1142
+ },
1143
+ {
1144
+ "epoch": 1.1386138613861387,
1145
+ "grad_norm": 0.6903427839279175,
1146
+ "learning_rate": 4.756355147072986e-05,
1147
+ "loss": 0.0816,
1148
+ "step": 4025
1149
+ },
1150
+ {
1151
+ "epoch": 1.1456859971711457,
1152
+ "grad_norm": 0.3236379325389862,
1153
+ "learning_rate": 4.751012493252971e-05,
1154
+ "loss": 0.085,
1155
+ "step": 4050
1156
+ },
1157
+ {
1158
+ "epoch": 1.1527581329561527,
1159
+ "grad_norm": 0.31708064675331116,
1160
+ "learning_rate": 4.745614963713941e-05,
1161
+ "loss": 0.0824,
1162
+ "step": 4075
1163
+ },
1164
+ {
1165
+ "epoch": 1.15983026874116,
1166
+ "grad_norm": 0.47363677620887756,
1167
+ "learning_rate": 4.740162690038159e-05,
1168
+ "loss": 0.08,
1169
+ "step": 4100
1170
+ },
1171
+ {
1172
+ "epoch": 1.166902404526167,
1173
+ "grad_norm": 0.5583330988883972,
1174
+ "learning_rate": 4.7346558051424505e-05,
1175
+ "loss": 0.0864,
1176
+ "step": 4125
1177
+ },
1178
+ {
1179
+ "epoch": 1.1739745403111739,
1180
+ "grad_norm": 0.4270670413970947,
1181
+ "learning_rate": 4.729094443274969e-05,
1182
+ "loss": 0.0774,
1183
+ "step": 4150
1184
+ },
1185
+ {
1186
+ "epoch": 1.181046676096181,
1187
+ "grad_norm": 0.4069669544696808,
1188
+ "learning_rate": 4.7234787400119176e-05,
1189
+ "loss": 0.0856,
1190
+ "step": 4175
1191
+ },
1192
+ {
1193
+ "epoch": 1.188118811881188,
1194
+ "grad_norm": 0.3747076988220215,
1195
+ "learning_rate": 4.717808832254251e-05,
1196
+ "loss": 0.09,
1197
+ "step": 4200
1198
+ },
1199
+ {
1200
+ "epoch": 1.1951909476661953,
1201
+ "grad_norm": 0.45713725686073303,
1202
+ "learning_rate": 4.71208485822433e-05,
1203
+ "loss": 0.0758,
1204
+ "step": 4225
1205
+ },
1206
+ {
1207
+ "epoch": 1.2022630834512023,
1208
+ "grad_norm": 0.41270920634269714,
1209
+ "learning_rate": 4.7063069574625595e-05,
1210
+ "loss": 0.0849,
1211
+ "step": 4250
1212
+ },
1213
+ {
1214
+ "epoch": 1.2093352192362095,
1215
+ "grad_norm": 0.7367244362831116,
1216
+ "learning_rate": 4.70047527082398e-05,
1217
+ "loss": 0.0938,
1218
+ "step": 4275
1219
+ },
1220
+ {
1221
+ "epoch": 1.2164073550212164,
1222
+ "grad_norm": 0.8561830520629883,
1223
+ "learning_rate": 4.6945899404748376e-05,
1224
+ "loss": 0.0822,
1225
+ "step": 4300
1226
+ },
1227
+ {
1228
+ "epoch": 1.2234794908062234,
1229
+ "grad_norm": 0.8913972973823547,
1230
+ "learning_rate": 4.6886511098891194e-05,
1231
+ "loss": 0.0898,
1232
+ "step": 4325
1233
+ },
1234
+ {
1235
+ "epoch": 1.2305516265912306,
1236
+ "grad_norm": 2.0065510272979736,
1237
+ "learning_rate": 4.682658923845052e-05,
1238
+ "loss": 0.0784,
1239
+ "step": 4350
1240
+ },
1241
+ {
1242
+ "epoch": 1.2376237623762376,
1243
+ "grad_norm": 0.36617517471313477,
1244
+ "learning_rate": 4.676613528421575e-05,
1245
+ "loss": 0.0775,
1246
+ "step": 4375
1247
+ },
1248
+ {
1249
+ "epoch": 1.2446958981612446,
1250
+ "grad_norm": 0.3758735954761505,
1251
+ "learning_rate": 4.67051507099478e-05,
1252
+ "loss": 0.081,
1253
+ "step": 4400
1254
+ },
1255
+ {
1256
+ "epoch": 1.2517680339462518,
1257
+ "grad_norm": 0.4398355782032013,
1258
+ "learning_rate": 4.664363700234312e-05,
1259
+ "loss": 0.0847,
1260
+ "step": 4425
1261
+ },
1262
+ {
1263
+ "epoch": 1.2588401697312588,
1264
+ "grad_norm": 0.4554755687713623,
1265
+ "learning_rate": 4.658159566099757e-05,
1266
+ "loss": 0.0828,
1267
+ "step": 4450
1268
+ },
1269
+ {
1270
+ "epoch": 1.265912305516266,
1271
+ "grad_norm": 0.3456956446170807,
1272
+ "learning_rate": 4.651902819836974e-05,
1273
+ "loss": 0.0825,
1274
+ "step": 4475
1275
+ },
1276
+ {
1277
+ "epoch": 1.272984441301273,
1278
+ "grad_norm": 0.38322913646698,
1279
+ "learning_rate": 4.645593613974415e-05,
1280
+ "loss": 0.0812,
1281
+ "step": 4500
1282
+ },
1283
+ {
1284
+ "epoch": 1.2800565770862802,
1285
+ "grad_norm": 0.5935035943984985,
1286
+ "learning_rate": 4.639232102319403e-05,
1287
+ "loss": 0.0813,
1288
+ "step": 4525
1289
+ },
1290
+ {
1291
+ "epoch": 1.2871287128712872,
1292
+ "grad_norm": 0.39350178837776184,
1293
+ "learning_rate": 4.632818439954386e-05,
1294
+ "loss": 0.0828,
1295
+ "step": 4550
1296
+ },
1297
+ {
1298
+ "epoch": 1.2942008486562941,
1299
+ "grad_norm": 0.36052772402763367,
1300
+ "learning_rate": 4.626352783233154e-05,
1301
+ "loss": 0.0826,
1302
+ "step": 4575
1303
+ },
1304
+ {
1305
+ "epoch": 1.3012729844413014,
1306
+ "grad_norm": 0.3978725075721741,
1307
+ "learning_rate": 4.619835289777027e-05,
1308
+ "loss": 0.0831,
1309
+ "step": 4600
1310
+ },
1311
+ {
1312
+ "epoch": 1.3083451202263083,
1313
+ "grad_norm": 0.4236008822917938,
1314
+ "learning_rate": 4.6132661184710137e-05,
1315
+ "loss": 0.0895,
1316
+ "step": 4625
1317
+ },
1318
+ {
1319
+ "epoch": 1.3154172560113153,
1320
+ "grad_norm": 0.4994755983352661,
1321
+ "learning_rate": 4.606645429459937e-05,
1322
+ "loss": 0.0882,
1323
+ "step": 4650
1324
+ },
1325
+ {
1326
+ "epoch": 1.3224893917963225,
1327
+ "grad_norm": 1.0472633838653564,
1328
+ "learning_rate": 4.5999733841445325e-05,
1329
+ "loss": 0.0751,
1330
+ "step": 4675
1331
+ },
1332
+ {
1333
+ "epoch": 1.3295615275813295,
1334
+ "grad_norm": 0.29904720187187195,
1335
+ "learning_rate": 4.593250145177511e-05,
1336
+ "loss": 0.0859,
1337
+ "step": 4700
1338
+ },
1339
+ {
1340
+ "epoch": 1.3366336633663367,
1341
+ "grad_norm": 0.3832816779613495,
1342
+ "learning_rate": 4.5864758764595927e-05,
1343
+ "loss": 0.0785,
1344
+ "step": 4725
1345
+ },
1346
+ {
1347
+ "epoch": 1.3437057991513437,
1348
+ "grad_norm": 0.49846336245536804,
1349
+ "learning_rate": 4.5796507431355174e-05,
1350
+ "loss": 0.075,
1351
+ "step": 4750
1352
+ },
1353
+ {
1354
+ "epoch": 1.350777934936351,
1355
+ "grad_norm": 0.379282146692276,
1356
+ "learning_rate": 4.5727749115900094e-05,
1357
+ "loss": 0.0858,
1358
+ "step": 4775
1359
+ },
1360
+ {
1361
+ "epoch": 1.3578500707213579,
1362
+ "grad_norm": 0.5583311915397644,
1363
+ "learning_rate": 4.56584854944373e-05,
1364
+ "loss": 0.0951,
1365
+ "step": 4800
1366
+ },
1367
+ {
1368
+ "epoch": 1.3649222065063649,
1369
+ "grad_norm": 0.48655304312705994,
1370
+ "learning_rate": 4.5588718255491865e-05,
1371
+ "loss": 0.0884,
1372
+ "step": 4825
1373
+ },
1374
+ {
1375
+ "epoch": 1.371994342291372,
1376
+ "grad_norm": 0.6549155116081238,
1377
+ "learning_rate": 4.5518449099866155e-05,
1378
+ "loss": 0.0858,
1379
+ "step": 4850
1380
+ },
1381
+ {
1382
+ "epoch": 1.379066478076379,
1383
+ "grad_norm": 0.3174663186073303,
1384
+ "learning_rate": 4.5447679740598403e-05,
1385
+ "loss": 0.0837,
1386
+ "step": 4875
1387
+ },
1388
+ {
1389
+ "epoch": 1.386138613861386,
1390
+ "grad_norm": 0.36421680450439453,
1391
+ "learning_rate": 4.537641190292091e-05,
1392
+ "loss": 0.0841,
1393
+ "step": 4900
1394
+ },
1395
+ {
1396
+ "epoch": 1.3932107496463932,
1397
+ "grad_norm": 0.3484024107456207,
1398
+ "learning_rate": 4.530464732421801e-05,
1399
+ "loss": 0.0886,
1400
+ "step": 4925
1401
+ },
1402
+ {
1403
+ "epoch": 1.4002828854314002,
1404
+ "grad_norm": 0.7408681511878967,
1405
+ "learning_rate": 4.523238775398371e-05,
1406
+ "loss": 0.0813,
1407
+ "step": 4950
1408
+ },
1409
+ {
1410
+ "epoch": 1.4073550212164074,
1411
+ "grad_norm": 0.35419762134552,
1412
+ "learning_rate": 4.515963495377902e-05,
1413
+ "loss": 0.0768,
1414
+ "step": 4975
1415
+ },
1416
+ {
1417
+ "epoch": 1.4144271570014144,
1418
+ "grad_norm": 0.3253655731678009,
1419
+ "learning_rate": 4.508639069718904e-05,
1420
+ "loss": 0.0776,
1421
+ "step": 5000
1422
+ },
1423
+ {
1424
+ "epoch": 1.4214992927864216,
1425
+ "grad_norm": 0.3978010416030884,
1426
+ "learning_rate": 4.501265676977974e-05,
1427
+ "loss": 0.077,
1428
+ "step": 5025
1429
+ },
1430
+ {
1431
+ "epoch": 1.4285714285714286,
1432
+ "grad_norm": 0.2976684868335724,
1433
+ "learning_rate": 4.493843496905434e-05,
1434
+ "loss": 0.0727,
1435
+ "step": 5050
1436
+ },
1437
+ {
1438
+ "epoch": 1.4356435643564356,
1439
+ "grad_norm": 0.3433378040790558,
1440
+ "learning_rate": 4.486372710440961e-05,
1441
+ "loss": 0.0804,
1442
+ "step": 5075
1443
+ },
1444
+ {
1445
+ "epoch": 1.4427157001414428,
1446
+ "grad_norm": 0.5507691502571106,
1447
+ "learning_rate": 4.478853499709167e-05,
1448
+ "loss": 0.0852,
1449
+ "step": 5100
1450
+ },
1451
+ {
1452
+ "epoch": 1.4497878359264498,
1453
+ "grad_norm": 0.36808139085769653,
1454
+ "learning_rate": 4.471286048015163e-05,
1455
+ "loss": 0.0832,
1456
+ "step": 5125
1457
+ },
1458
+ {
1459
+ "epoch": 1.4568599717114568,
1460
+ "grad_norm": 0.3539692163467407,
1461
+ "learning_rate": 4.46367053984009e-05,
1462
+ "loss": 0.0817,
1463
+ "step": 5150
1464
+ },
1465
+ {
1466
+ "epoch": 1.463932107496464,
1467
+ "grad_norm": 0.8253048658370972,
1468
+ "learning_rate": 4.4560071608366216e-05,
1469
+ "loss": 0.0826,
1470
+ "step": 5175
1471
+ },
1472
+ {
1473
+ "epoch": 1.471004243281471,
1474
+ "grad_norm": 0.4476601779460907,
1475
+ "learning_rate": 4.448296097824437e-05,
1476
+ "loss": 0.088,
1477
+ "step": 5200
1478
+ },
1479
+ {
1480
+ "epoch": 1.4780763790664782,
1481
+ "grad_norm": 1.5917980670928955,
1482
+ "learning_rate": 4.440537538785668e-05,
1483
+ "loss": 0.0828,
1484
+ "step": 5225
1485
+ },
1486
+ {
1487
+ "epoch": 1.4851485148514851,
1488
+ "grad_norm": 0.6799459457397461,
1489
+ "learning_rate": 4.432731672860316e-05,
1490
+ "loss": 0.0786,
1491
+ "step": 5250
1492
+ },
1493
+ {
1494
+ "epoch": 1.4922206506364923,
1495
+ "grad_norm": 0.34660467505455017,
1496
+ "learning_rate": 4.424878690341642e-05,
1497
+ "loss": 0.0749,
1498
+ "step": 5275
1499
+ },
1500
+ {
1501
+ "epoch": 1.4992927864214993,
1502
+ "grad_norm": 0.45074212551116943,
1503
+ "learning_rate": 4.4169787826715225e-05,
1504
+ "loss": 0.0904,
1505
+ "step": 5300
1506
+ },
1507
+ {
1508
+ "epoch": 1.5063649222065063,
1509
+ "grad_norm": 0.41240620613098145,
1510
+ "learning_rate": 4.4090321424357914e-05,
1511
+ "loss": 0.078,
1512
+ "step": 5325
1513
+ },
1514
+ {
1515
+ "epoch": 1.5134370579915135,
1516
+ "grad_norm": 0.3282380998134613,
1517
+ "learning_rate": 4.401038963359538e-05,
1518
+ "loss": 0.0835,
1519
+ "step": 5350
1520
+ },
1521
+ {
1522
+ "epoch": 1.5205091937765205,
1523
+ "grad_norm": 0.3657848536968231,
1524
+ "learning_rate": 4.3929994403023865e-05,
1525
+ "loss": 0.0801,
1526
+ "step": 5375
1527
+ },
1528
+ {
1529
+ "epoch": 1.5275813295615275,
1530
+ "grad_norm": 0.3543812930583954,
1531
+ "learning_rate": 4.384913769253746e-05,
1532
+ "loss": 0.081,
1533
+ "step": 5400
1534
+ },
1535
+ {
1536
+ "epoch": 1.5346534653465347,
1537
+ "grad_norm": 0.8204165697097778,
1538
+ "learning_rate": 4.376782147328031e-05,
1539
+ "loss": 0.0757,
1540
+ "step": 5425
1541
+ },
1542
+ {
1543
+ "epoch": 1.541725601131542,
1544
+ "grad_norm": 0.4716034233570099,
1545
+ "learning_rate": 4.368604772759861e-05,
1546
+ "loss": 0.0818,
1547
+ "step": 5450
1548
+ },
1549
+ {
1550
+ "epoch": 1.5487977369165487,
1551
+ "grad_norm": 0.3865686357021332,
1552
+ "learning_rate": 4.360381844899221e-05,
1553
+ "loss": 0.076,
1554
+ "step": 5475
1555
+ },
1556
+ {
1557
+ "epoch": 1.5558698727015559,
1558
+ "grad_norm": 0.5982063412666321,
1559
+ "learning_rate": 4.352113564206606e-05,
1560
+ "loss": 0.0861,
1561
+ "step": 5500
1562
+ },
1563
+ {
1564
+ "epoch": 1.562942008486563,
1565
+ "grad_norm": 0.4285977780818939,
1566
+ "learning_rate": 4.343800132248132e-05,
1567
+ "loss": 0.0767,
1568
+ "step": 5525
1569
+ },
1570
+ {
1571
+ "epoch": 1.57001414427157,
1572
+ "grad_norm": 0.542254626750946,
1573
+ "learning_rate": 4.3354417516906274e-05,
1574
+ "loss": 0.0814,
1575
+ "step": 5550
1576
+ },
1577
+ {
1578
+ "epoch": 1.577086280056577,
1579
+ "grad_norm": 0.273899644613266,
1580
+ "learning_rate": 4.327038626296682e-05,
1581
+ "loss": 0.0759,
1582
+ "step": 5575
1583
+ },
1584
+ {
1585
+ "epoch": 1.5841584158415842,
1586
+ "grad_norm": 0.3528795540332794,
1587
+ "learning_rate": 4.318590960919692e-05,
1588
+ "loss": 0.0785,
1589
+ "step": 5600
1590
+ },
1591
+ {
1592
+ "epoch": 1.5912305516265912,
1593
+ "grad_norm": 0.4583662748336792,
1594
+ "learning_rate": 4.3100989614988566e-05,
1595
+ "loss": 0.0831,
1596
+ "step": 5625
1597
+ },
1598
+ {
1599
+ "epoch": 1.5983026874115982,
1600
+ "grad_norm": 0.4005572497844696,
1601
+ "learning_rate": 4.3015628350541595e-05,
1602
+ "loss": 0.0781,
1603
+ "step": 5650
1604
+ },
1605
+ {
1606
+ "epoch": 1.6053748231966054,
1607
+ "grad_norm": 0.3729841411113739,
1608
+ "learning_rate": 4.2929827896813274e-05,
1609
+ "loss": 0.0849,
1610
+ "step": 5675
1611
+ },
1612
+ {
1613
+ "epoch": 1.6124469589816126,
1614
+ "grad_norm": 0.37740039825439453,
1615
+ "learning_rate": 4.284359034546751e-05,
1616
+ "loss": 0.0819,
1617
+ "step": 5700
1618
+ },
1619
+ {
1620
+ "epoch": 1.6195190947666194,
1621
+ "grad_norm": 0.45638662576675415,
1622
+ "learning_rate": 4.2756917798823874e-05,
1623
+ "loss": 0.0749,
1624
+ "step": 5725
1625
+ },
1626
+ {
1627
+ "epoch": 1.6265912305516266,
1628
+ "grad_norm": 0.3588665723800659,
1629
+ "learning_rate": 4.2669812369806374e-05,
1630
+ "loss": 0.0715,
1631
+ "step": 5750
1632
+ },
1633
+ {
1634
+ "epoch": 1.6336633663366338,
1635
+ "grad_norm": 0.3274244964122772,
1636
+ "learning_rate": 4.258227618189191e-05,
1637
+ "loss": 0.0707,
1638
+ "step": 5775
1639
+ },
1640
+ {
1641
+ "epoch": 1.6407355021216408,
1642
+ "grad_norm": 0.36558204889297485,
1643
+ "learning_rate": 4.249431136905853e-05,
1644
+ "loss": 0.0732,
1645
+ "step": 5800
1646
+ },
1647
+ {
1648
+ "epoch": 1.6478076379066477,
1649
+ "grad_norm": 0.32020628452301025,
1650
+ "learning_rate": 4.240592007573341e-05,
1651
+ "loss": 0.0787,
1652
+ "step": 5825
1653
+ },
1654
+ {
1655
+ "epoch": 1.654879773691655,
1656
+ "grad_norm": 0.4697955846786499,
1657
+ "learning_rate": 4.231710445674056e-05,
1658
+ "loss": 0.0707,
1659
+ "step": 5850
1660
+ },
1661
+ {
1662
+ "epoch": 1.661951909476662,
1663
+ "grad_norm": 0.3986113965511322,
1664
+ "learning_rate": 4.222786667724832e-05,
1665
+ "loss": 0.0746,
1666
+ "step": 5875
1667
+ },
1668
+ {
1669
+ "epoch": 1.669024045261669,
1670
+ "grad_norm": 0.35733580589294434,
1671
+ "learning_rate": 4.213820891271652e-05,
1672
+ "loss": 0.0787,
1673
+ "step": 5900
1674
+ },
1675
+ {
1676
+ "epoch": 1.6760961810466761,
1677
+ "grad_norm": 0.6812814474105835,
1678
+ "learning_rate": 4.204813334884353e-05,
1679
+ "loss": 0.0921,
1680
+ "step": 5925
1681
+ },
1682
+ {
1683
+ "epoch": 1.6831683168316833,
1684
+ "grad_norm": 0.3964853584766388,
1685
+ "learning_rate": 4.19576421815129e-05,
1686
+ "loss": 0.0786,
1687
+ "step": 5950
1688
+ },
1689
+ {
1690
+ "epoch": 1.69024045261669,
1691
+ "grad_norm": 0.4422667324542999,
1692
+ "learning_rate": 4.186673761673989e-05,
1693
+ "loss": 0.0752,
1694
+ "step": 5975
1695
+ },
1696
+ {
1697
+ "epoch": 1.6973125884016973,
1698
+ "grad_norm": 0.5387901663780212,
1699
+ "learning_rate": 4.177542187061763e-05,
1700
+ "loss": 0.0829,
1701
+ "step": 6000
1702
+ },
1703
+ {
1704
+ "epoch": 1.7043847241867045,
1705
+ "grad_norm": 0.30730950832366943,
1706
+ "learning_rate": 4.168369716926315e-05,
1707
+ "loss": 0.0782,
1708
+ "step": 6025
1709
+ },
1710
+ {
1711
+ "epoch": 1.7114568599717115,
1712
+ "grad_norm": 0.3543797731399536,
1713
+ "learning_rate": 4.159156574876306e-05,
1714
+ "loss": 0.0842,
1715
+ "step": 6050
1716
+ },
1717
+ {
1718
+ "epoch": 1.7185289957567185,
1719
+ "grad_norm": 0.44837215542793274,
1720
+ "learning_rate": 4.1499029855119106e-05,
1721
+ "loss": 0.0754,
1722
+ "step": 6075
1723
+ },
1724
+ {
1725
+ "epoch": 1.7256011315417257,
1726
+ "grad_norm": 0.4181993007659912,
1727
+ "learning_rate": 4.1406091744193354e-05,
1728
+ "loss": 0.0833,
1729
+ "step": 6100
1730
+ },
1731
+ {
1732
+ "epoch": 1.7326732673267327,
1733
+ "grad_norm": 0.47836604714393616,
1734
+ "learning_rate": 4.1312753681653213e-05,
1735
+ "loss": 0.0819,
1736
+ "step": 6125
1737
+ },
1738
+ {
1739
+ "epoch": 1.7397454031117396,
1740
+ "grad_norm": 0.42279157042503357,
1741
+ "learning_rate": 4.121901794291622e-05,
1742
+ "loss": 0.0748,
1743
+ "step": 6150
1744
+ },
1745
+ {
1746
+ "epoch": 1.7468175388967468,
1747
+ "grad_norm": 0.40630263090133667,
1748
+ "learning_rate": 4.112488681309455e-05,
1749
+ "loss": 0.0812,
1750
+ "step": 6175
1751
+ },
1752
+ {
1753
+ "epoch": 1.753889674681754,
1754
+ "grad_norm": 0.414064884185791,
1755
+ "learning_rate": 4.103036258693932e-05,
1756
+ "loss": 0.0815,
1757
+ "step": 6200
1758
+ },
1759
+ {
1760
+ "epoch": 1.7609618104667608,
1761
+ "grad_norm": 0.3953958749771118,
1762
+ "learning_rate": 4.0935447568784646e-05,
1763
+ "loss": 0.0839,
1764
+ "step": 6225
1765
+ },
1766
+ {
1767
+ "epoch": 1.768033946251768,
1768
+ "grad_norm": 0.4421181082725525,
1769
+ "learning_rate": 4.084014407249145e-05,
1770
+ "loss": 0.0784,
1771
+ "step": 6250
1772
+ },
1773
+ {
1774
+ "epoch": 1.7751060820367752,
1775
+ "grad_norm": 0.6101165413856506,
1776
+ "learning_rate": 4.0744454421391095e-05,
1777
+ "loss": 0.084,
1778
+ "step": 6275
1779
+ },
1780
+ {
1781
+ "epoch": 1.7821782178217822,
1782
+ "grad_norm": 0.8179222941398621,
1783
+ "learning_rate": 4.064838094822868e-05,
1784
+ "loss": 0.0792,
1785
+ "step": 6300
1786
+ },
1787
+ {
1788
+ "epoch": 1.7892503536067892,
1789
+ "grad_norm": 0.40432754158973694,
1790
+ "learning_rate": 4.055192599510624e-05,
1791
+ "loss": 0.0769,
1792
+ "step": 6325
1793
+ },
1794
+ {
1795
+ "epoch": 1.7963224893917964,
1796
+ "grad_norm": 0.36589494347572327,
1797
+ "learning_rate": 4.0455091913425606e-05,
1798
+ "loss": 0.0783,
1799
+ "step": 6350
1800
+ },
1801
+ {
1802
+ "epoch": 1.8033946251768034,
1803
+ "grad_norm": 0.3545513451099396,
1804
+ "learning_rate": 4.0357881063831105e-05,
1805
+ "loss": 0.0723,
1806
+ "step": 6375
1807
+ },
1808
+ {
1809
+ "epoch": 1.8104667609618104,
1810
+ "grad_norm": 0.5380228161811829,
1811
+ "learning_rate": 4.026029581615198e-05,
1812
+ "loss": 0.0771,
1813
+ "step": 6400
1814
+ },
1815
+ {
1816
+ "epoch": 1.8175388967468176,
1817
+ "grad_norm": 0.6094365119934082,
1818
+ "learning_rate": 4.016233854934468e-05,
1819
+ "loss": 0.084,
1820
+ "step": 6425
1821
+ },
1822
+ {
1823
+ "epoch": 1.8246110325318248,
1824
+ "grad_norm": 0.6324317455291748,
1825
+ "learning_rate": 4.0064011651434776e-05,
1826
+ "loss": 0.0794,
1827
+ "step": 6450
1828
+ },
1829
+ {
1830
+ "epoch": 1.8316831683168315,
1831
+ "grad_norm": 0.2819110155105591,
1832
+ "learning_rate": 3.996531751945883e-05,
1833
+ "loss": 0.0853,
1834
+ "step": 6475
1835
+ },
1836
+ {
1837
+ "epoch": 1.8387553041018387,
1838
+ "grad_norm": 0.3535131514072418,
1839
+ "learning_rate": 3.986625855940591e-05,
1840
+ "loss": 0.0712,
1841
+ "step": 6500
1842
+ },
1843
+ {
1844
+ "epoch": 1.845827439886846,
1845
+ "grad_norm": 0.32027262449264526,
1846
+ "learning_rate": 3.9766837186158944e-05,
1847
+ "loss": 0.0751,
1848
+ "step": 6525
1849
+ },
1850
+ {
1851
+ "epoch": 1.852899575671853,
1852
+ "grad_norm": 0.4550008475780487,
1853
+ "learning_rate": 3.9667055823435885e-05,
1854
+ "loss": 0.0744,
1855
+ "step": 6550
1856
+ },
1857
+ {
1858
+ "epoch": 1.85997171145686,
1859
+ "grad_norm": 0.30715903639793396,
1860
+ "learning_rate": 3.956691690373055e-05,
1861
+ "loss": 0.0745,
1862
+ "step": 6575
1863
+ },
1864
+ {
1865
+ "epoch": 1.8670438472418671,
1866
+ "grad_norm": 0.5177515745162964,
1867
+ "learning_rate": 3.946642286825339e-05,
1868
+ "loss": 0.0715,
1869
+ "step": 6600
1870
+ },
1871
+ {
1872
+ "epoch": 1.874115983026874,
1873
+ "grad_norm": 0.5210005640983582,
1874
+ "learning_rate": 3.9365576166871964e-05,
1875
+ "loss": 0.0696,
1876
+ "step": 6625
1877
+ },
1878
+ {
1879
+ "epoch": 1.881188118811881,
1880
+ "grad_norm": 0.28931137919425964,
1881
+ "learning_rate": 3.926437925805118e-05,
1882
+ "loss": 0.0749,
1883
+ "step": 6650
1884
+ },
1885
+ {
1886
+ "epoch": 1.8882602545968883,
1887
+ "grad_norm": 0.5680564641952515,
1888
+ "learning_rate": 3.916283460879341e-05,
1889
+ "loss": 0.074,
1890
+ "step": 6675
1891
+ },
1892
+ {
1893
+ "epoch": 1.8953323903818955,
1894
+ "grad_norm": 0.44310295581817627,
1895
+ "learning_rate": 3.90609446945783e-05,
1896
+ "loss": 0.0805,
1897
+ "step": 6700
1898
+ },
1899
+ {
1900
+ "epoch": 1.9024045261669023,
1901
+ "grad_norm": 0.6425299644470215,
1902
+ "learning_rate": 3.8958711999302475e-05,
1903
+ "loss": 0.0842,
1904
+ "step": 6725
1905
+ },
1906
+ {
1907
+ "epoch": 1.9094766619519095,
1908
+ "grad_norm": 0.300752729177475,
1909
+ "learning_rate": 3.885613901521893e-05,
1910
+ "loss": 0.0698,
1911
+ "step": 6750
1912
+ },
1913
+ {
1914
+ "epoch": 1.9165487977369167,
1915
+ "grad_norm": 0.43727684020996094,
1916
+ "learning_rate": 3.875322824287633e-05,
1917
+ "loss": 0.0767,
1918
+ "step": 6775
1919
+ },
1920
+ {
1921
+ "epoch": 1.9236209335219236,
1922
+ "grad_norm": 0.41746985912323,
1923
+ "learning_rate": 3.8649982191058e-05,
1924
+ "loss": 0.0795,
1925
+ "step": 6800
1926
+ },
1927
+ {
1928
+ "epoch": 1.9306930693069306,
1929
+ "grad_norm": 0.35224172472953796,
1930
+ "learning_rate": 3.8546403376720786e-05,
1931
+ "loss": 0.0765,
1932
+ "step": 6825
1933
+ },
1934
+ {
1935
+ "epoch": 1.9377652050919378,
1936
+ "grad_norm": 0.295906662940979,
1937
+ "learning_rate": 3.8442494324933724e-05,
1938
+ "loss": 0.076,
1939
+ "step": 6850
1940
+ },
1941
+ {
1942
+ "epoch": 1.9448373408769448,
1943
+ "grad_norm": 0.2558325529098511,
1944
+ "learning_rate": 3.833825756881645e-05,
1945
+ "loss": 0.0807,
1946
+ "step": 6875
1947
+ },
1948
+ {
1949
+ "epoch": 1.9519094766619518,
1950
+ "grad_norm": 0.3434934616088867,
1951
+ "learning_rate": 3.8233695649477426e-05,
1952
+ "loss": 0.0686,
1953
+ "step": 6900
1954
+ },
1955
+ {
1956
+ "epoch": 1.958981612446959,
1957
+ "grad_norm": 0.23843282461166382,
1958
+ "learning_rate": 3.8128811115952044e-05,
1959
+ "loss": 0.0795,
1960
+ "step": 6925
1961
+ },
1962
+ {
1963
+ "epoch": 1.9660537482319662,
1964
+ "grad_norm": 0.7551613450050354,
1965
+ "learning_rate": 3.802360652514047e-05,
1966
+ "loss": 0.0681,
1967
+ "step": 6950
1968
+ },
1969
+ {
1970
+ "epoch": 1.973125884016973,
1971
+ "grad_norm": 0.4647892713546753,
1972
+ "learning_rate": 3.7918084441745275e-05,
1973
+ "loss": 0.0798,
1974
+ "step": 6975
1975
+ },
1976
+ {
1977
+ "epoch": 1.9801980198019802,
1978
+ "grad_norm": 0.3910640478134155,
1979
+ "learning_rate": 3.781224743820897e-05,
1980
+ "loss": 0.077,
1981
+ "step": 7000
1982
+ },
1983
+ {
1984
+ "epoch": 1.9872701555869874,
1985
+ "grad_norm": 0.3788031339645386,
1986
+ "learning_rate": 3.770609809465124e-05,
1987
+ "loss": 0.0805,
1988
+ "step": 7025
1989
+ },
1990
+ {
1991
+ "epoch": 1.9943422913719944,
1992
+ "grad_norm": 0.6797524690628052,
1993
+ "learning_rate": 3.759963899880609e-05,
1994
+ "loss": 0.0726,
1995
+ "step": 7050
1996
+ },
1997
+ {
1998
+ "epoch": 2.0,
1999
+ "eval_accuracy": 0.9707863671603164,
2000
+ "eval_f1": 0.8914057811793163,
2001
+ "eval_loss": 0.08405517786741257,
2002
+ "eval_precision": 0.8863308401537767,
2003
+ "eval_recall": 0.8965391729373575,
2004
+ "eval_runtime": 54.724,
2005
+ "eval_samples_per_second": 918.646,
2006
+ "eval_steps_per_second": 3.6,
2007
+ "step": 7070
2008
+ },
2009
+ {
2010
+ "epoch": 2.0014144271570014,
2011
+ "grad_norm": 0.22099077701568604,
2012
+ "learning_rate": 3.749287274595872e-05,
2013
+ "loss": 0.0755,
2014
+ "step": 7075
2015
+ },
2016
+ {
2017
+ "epoch": 2.0084865629420086,
2018
+ "grad_norm": 0.4921034872531891,
2019
+ "learning_rate": 3.738580193888228e-05,
2020
+ "loss": 0.0501,
2021
+ "step": 7100
2022
+ },
2023
+ {
2024
+ "epoch": 2.0155586987270158,
2025
+ "grad_norm": 0.23433761298656464,
2026
+ "learning_rate": 3.7278429187774436e-05,
2027
+ "loss": 0.0501,
2028
+ "step": 7125
2029
+ },
2030
+ {
2031
+ "epoch": 2.0226308345120225,
2032
+ "grad_norm": 0.4263274371623993,
2033
+ "learning_rate": 3.7170757110193697e-05,
2034
+ "loss": 0.0513,
2035
+ "step": 7150
2036
+ },
2037
+ {
2038
+ "epoch": 2.0297029702970297,
2039
+ "grad_norm": 0.2480274736881256,
2040
+ "learning_rate": 3.7062788330995635e-05,
2041
+ "loss": 0.0509,
2042
+ "step": 7175
2043
+ },
2044
+ {
2045
+ "epoch": 2.036775106082037,
2046
+ "grad_norm": 0.5820468664169312,
2047
+ "learning_rate": 3.695452548226887e-05,
2048
+ "loss": 0.0573,
2049
+ "step": 7200
2050
+ },
2051
+ {
2052
+ "epoch": 2.0438472418670437,
2053
+ "grad_norm": 0.42300423979759216,
2054
+ "learning_rate": 3.684597120327094e-05,
2055
+ "loss": 0.0558,
2056
+ "step": 7225
2057
+ },
2058
+ {
2059
+ "epoch": 2.050919377652051,
2060
+ "grad_norm": 0.41470280289649963,
2061
+ "learning_rate": 3.673712814036392e-05,
2062
+ "loss": 0.0519,
2063
+ "step": 7250
2064
+ },
2065
+ {
2066
+ "epoch": 2.057991513437058,
2067
+ "grad_norm": 0.2917075753211975,
2068
+ "learning_rate": 3.662799894694995e-05,
2069
+ "loss": 0.0518,
2070
+ "step": 7275
2071
+ },
2072
+ {
2073
+ "epoch": 2.065063649222065,
2074
+ "grad_norm": 0.41207775473594666,
2075
+ "learning_rate": 3.651858628340649e-05,
2076
+ "loss": 0.0474,
2077
+ "step": 7300
2078
+ },
2079
+ {
2080
+ "epoch": 2.072135785007072,
2081
+ "grad_norm": 0.31983888149261475,
2082
+ "learning_rate": 3.640889281702155e-05,
2083
+ "loss": 0.0583,
2084
+ "step": 7325
2085
+ },
2086
+ {
2087
+ "epoch": 2.0792079207920793,
2088
+ "grad_norm": 0.65561842918396,
2089
+ "learning_rate": 3.629892122192859e-05,
2090
+ "loss": 0.057,
2091
+ "step": 7350
2092
+ },
2093
+ {
2094
+ "epoch": 2.0862800565770865,
2095
+ "grad_norm": 1.253188133239746,
2096
+ "learning_rate": 3.6188674179041356e-05,
2097
+ "loss": 0.0524,
2098
+ "step": 7375
2099
+ },
2100
+ {
2101
+ "epoch": 2.0933521923620932,
2102
+ "grad_norm": 0.34525951743125916,
2103
+ "learning_rate": 3.607815437598853e-05,
2104
+ "loss": 0.0577,
2105
+ "step": 7400
2106
+ },
2107
+ {
2108
+ "epoch": 2.1004243281471005,
2109
+ "grad_norm": 0.5648230910301208,
2110
+ "learning_rate": 3.5967364507048226e-05,
2111
+ "loss": 0.0564,
2112
+ "step": 7425
2113
+ },
2114
+ {
2115
+ "epoch": 2.1074964639321077,
2116
+ "grad_norm": 0.2504655420780182,
2117
+ "learning_rate": 3.585630727308227e-05,
2118
+ "loss": 0.0476,
2119
+ "step": 7450
2120
+ },
2121
+ {
2122
+ "epoch": 2.1145685997171144,
2123
+ "grad_norm": 0.3219181299209595,
2124
+ "learning_rate": 3.574498538147035e-05,
2125
+ "loss": 0.0538,
2126
+ "step": 7475
2127
+ },
2128
+ {
2129
+ "epoch": 2.1216407355021216,
2130
+ "grad_norm": 0.36033445596694946,
2131
+ "learning_rate": 3.563340154604411e-05,
2132
+ "loss": 0.0576,
2133
+ "step": 7500
2134
+ },
2135
+ {
2136
+ "epoch": 2.128712871287129,
2137
+ "grad_norm": 0.3835320472717285,
2138
+ "learning_rate": 3.552155848702086e-05,
2139
+ "loss": 0.0529,
2140
+ "step": 7525
2141
+ },
2142
+ {
2143
+ "epoch": 2.1357850070721356,
2144
+ "grad_norm": 0.40226078033447266,
2145
+ "learning_rate": 3.5409458930937346e-05,
2146
+ "loss": 0.0507,
2147
+ "step": 7550
2148
+ },
2149
+ {
2150
+ "epoch": 2.142857142857143,
2151
+ "grad_norm": 0.34018775820732117,
2152
+ "learning_rate": 3.5297105610583265e-05,
2153
+ "loss": 0.0514,
2154
+ "step": 7575
2155
+ },
2156
+ {
2157
+ "epoch": 2.14992927864215,
2158
+ "grad_norm": 0.7404220700263977,
2159
+ "learning_rate": 3.518450126493464e-05,
2160
+ "loss": 0.0511,
2161
+ "step": 7600
2162
+ },
2163
+ {
2164
+ "epoch": 2.157001414427157,
2165
+ "grad_norm": 0.2098894715309143,
2166
+ "learning_rate": 3.507164863908704e-05,
2167
+ "loss": 0.0543,
2168
+ "step": 7625
2169
+ },
2170
+ {
2171
+ "epoch": 2.164073550212164,
2172
+ "grad_norm": 0.43998876214027405,
2173
+ "learning_rate": 3.4958550484188675e-05,
2174
+ "loss": 0.0547,
2175
+ "step": 7650
2176
+ },
2177
+ {
2178
+ "epoch": 2.171145685997171,
2179
+ "grad_norm": 0.32174915075302124,
2180
+ "learning_rate": 3.4845209557373315e-05,
2181
+ "loss": 0.0528,
2182
+ "step": 7675
2183
+ },
2184
+ {
2185
+ "epoch": 2.1782178217821784,
2186
+ "grad_norm": 0.8568967580795288,
2187
+ "learning_rate": 3.4731628621693094e-05,
2188
+ "loss": 0.0511,
2189
+ "step": 7700
2190
+ },
2191
+ {
2192
+ "epoch": 2.185289957567185,
2193
+ "grad_norm": 0.39218711853027344,
2194
+ "learning_rate": 3.461781044605112e-05,
2195
+ "loss": 0.0556,
2196
+ "step": 7725
2197
+ },
2198
+ {
2199
+ "epoch": 2.1923620933521923,
2200
+ "grad_norm": 0.2832966446876526,
2201
+ "learning_rate": 3.4503757805134004e-05,
2202
+ "loss": 0.0553,
2203
+ "step": 7750
2204
+ },
2205
+ {
2206
+ "epoch": 2.1994342291371995,
2207
+ "grad_norm": 0.3094783425331116,
2208
+ "learning_rate": 3.438947347934421e-05,
2209
+ "loss": 0.0482,
2210
+ "step": 7775
2211
+ },
2212
+ {
2213
+ "epoch": 2.2065063649222063,
2214
+ "grad_norm": 0.40016797184944153,
2215
+ "learning_rate": 3.427496025473227e-05,
2216
+ "loss": 0.0508,
2217
+ "step": 7800
2218
+ },
2219
+ {
2220
+ "epoch": 2.2135785007072135,
2221
+ "grad_norm": 0.3726707398891449,
2222
+ "learning_rate": 3.416022092292887e-05,
2223
+ "loss": 0.0507,
2224
+ "step": 7825
2225
+ },
2226
+ {
2227
+ "epoch": 2.2206506364922207,
2228
+ "grad_norm": 0.4792294502258301,
2229
+ "learning_rate": 3.4045258281076766e-05,
2230
+ "loss": 0.0549,
2231
+ "step": 7850
2232
+ },
2233
+ {
2234
+ "epoch": 2.227722772277228,
2235
+ "grad_norm": 1.009504795074463,
2236
+ "learning_rate": 3.393007513176265e-05,
2237
+ "loss": 0.0558,
2238
+ "step": 7875
2239
+ },
2240
+ {
2241
+ "epoch": 2.2347949080622347,
2242
+ "grad_norm": 0.40913182497024536,
2243
+ "learning_rate": 3.38146742829488e-05,
2244
+ "loss": 0.058,
2245
+ "step": 7900
2246
+ },
2247
+ {
2248
+ "epoch": 2.241867043847242,
2249
+ "grad_norm": 0.436496376991272,
2250
+ "learning_rate": 3.369905854790458e-05,
2251
+ "loss": 0.0546,
2252
+ "step": 7925
2253
+ },
2254
+ {
2255
+ "epoch": 2.248939179632249,
2256
+ "grad_norm": 0.2754940986633301,
2257
+ "learning_rate": 3.358323074513795e-05,
2258
+ "loss": 0.0498,
2259
+ "step": 7950
2260
+ },
2261
+ {
2262
+ "epoch": 2.256011315417256,
2263
+ "grad_norm": 0.7202574610710144,
2264
+ "learning_rate": 3.3467193698326696e-05,
2265
+ "loss": 0.0542,
2266
+ "step": 7975
2267
+ },
2268
+ {
2269
+ "epoch": 2.263083451202263,
2270
+ "grad_norm": 0.3375069200992584,
2271
+ "learning_rate": 3.335095023624958e-05,
2272
+ "loss": 0.0501,
2273
+ "step": 8000
2274
+ },
2275
+ {
2276
+ "epoch": 2.2701555869872703,
2277
+ "grad_norm": 0.34498855471611023,
2278
+ "learning_rate": 3.323450319271744e-05,
2279
+ "loss": 0.0551,
2280
+ "step": 8025
2281
+ },
2282
+ {
2283
+ "epoch": 2.2772277227722775,
2284
+ "grad_norm": 0.427866131067276,
2285
+ "learning_rate": 3.311785540650405e-05,
2286
+ "loss": 0.0517,
2287
+ "step": 8050
2288
+ },
2289
+ {
2290
+ "epoch": 2.2842998585572842,
2291
+ "grad_norm": 0.2636614739894867,
2292
+ "learning_rate": 3.300100972127694e-05,
2293
+ "loss": 0.0503,
2294
+ "step": 8075
2295
+ },
2296
+ {
2297
+ "epoch": 2.2913719943422914,
2298
+ "grad_norm": 0.34367385506629944,
2299
+ "learning_rate": 3.288396898552807e-05,
2300
+ "loss": 0.0512,
2301
+ "step": 8100
2302
+ },
2303
+ {
2304
+ "epoch": 2.298444130127298,
2305
+ "grad_norm": 0.27327075600624084,
2306
+ "learning_rate": 3.2766736052504385e-05,
2307
+ "loss": 0.0539,
2308
+ "step": 8125
2309
+ },
2310
+ {
2311
+ "epoch": 2.3055162659123054,
2312
+ "grad_norm": 0.3980356454849243,
2313
+ "learning_rate": 3.264931378013827e-05,
2314
+ "loss": 0.0583,
2315
+ "step": 8150
2316
+ },
2317
+ {
2318
+ "epoch": 2.3125884016973126,
2319
+ "grad_norm": 0.24006566405296326,
2320
+ "learning_rate": 3.2531705030977864e-05,
2321
+ "loss": 0.0511,
2322
+ "step": 8175
2323
+ },
2324
+ {
2325
+ "epoch": 2.31966053748232,
2326
+ "grad_norm": 0.5592175722122192,
2327
+ "learning_rate": 3.241391267211729e-05,
2328
+ "loss": 0.0502,
2329
+ "step": 8200
2330
+ },
2331
+ {
2332
+ "epoch": 2.3267326732673266,
2333
+ "grad_norm": 1.0215811729431152,
2334
+ "learning_rate": 3.229593957512672e-05,
2335
+ "loss": 0.0554,
2336
+ "step": 8225
2337
+ },
2338
+ {
2339
+ "epoch": 2.333804809052334,
2340
+ "grad_norm": 0.27189400792121887,
2341
+ "learning_rate": 3.2177788615982454e-05,
2342
+ "loss": 0.0501,
2343
+ "step": 8250
2344
+ },
2345
+ {
2346
+ "epoch": 2.340876944837341,
2347
+ "grad_norm": 0.3555835485458374,
2348
+ "learning_rate": 3.205946267499672e-05,
2349
+ "loss": 0.0563,
2350
+ "step": 8275
2351
+ },
2352
+ {
2353
+ "epoch": 2.3479490806223478,
2354
+ "grad_norm": 0.36138617992401123,
2355
+ "learning_rate": 3.19409646367475e-05,
2356
+ "loss": 0.0567,
2357
+ "step": 8300
2358
+ },
2359
+ {
2360
+ "epoch": 2.355021216407355,
2361
+ "grad_norm": 0.8795189261436462,
2362
+ "learning_rate": 3.182229739000822e-05,
2363
+ "loss": 0.0599,
2364
+ "step": 8325
2365
+ },
2366
+ {
2367
+ "epoch": 2.362093352192362,
2368
+ "grad_norm": 0.26847535371780396,
2369
+ "learning_rate": 3.170346382767731e-05,
2370
+ "loss": 0.0496,
2371
+ "step": 8350
2372
+ },
2373
+ {
2374
+ "epoch": 2.3691654879773694,
2375
+ "grad_norm": 0.29600900411605835,
2376
+ "learning_rate": 3.158446684670765e-05,
2377
+ "loss": 0.0485,
2378
+ "step": 8375
2379
+ },
2380
+ {
2381
+ "epoch": 2.376237623762376,
2382
+ "grad_norm": 0.2916370630264282,
2383
+ "learning_rate": 3.1465309348036016e-05,
2384
+ "loss": 0.0499,
2385
+ "step": 8400
2386
+ },
2387
+ {
2388
+ "epoch": 2.3833097595473833,
2389
+ "grad_norm": 0.5006932616233826,
2390
+ "learning_rate": 3.1345994236512305e-05,
2391
+ "loss": 0.0485,
2392
+ "step": 8425
2393
+ },
2394
+ {
2395
+ "epoch": 2.3903818953323905,
2396
+ "grad_norm": 0.3413617014884949,
2397
+ "learning_rate": 3.122652442082874e-05,
2398
+ "loss": 0.0506,
2399
+ "step": 8450
2400
+ },
2401
+ {
2402
+ "epoch": 2.3974540311173973,
2403
+ "grad_norm": 0.3974776268005371,
2404
+ "learning_rate": 3.1106902813448956e-05,
2405
+ "loss": 0.05,
2406
+ "step": 8475
2407
+ },
2408
+ {
2409
+ "epoch": 2.4045261669024045,
2410
+ "grad_norm": 0.3416425883769989,
2411
+ "learning_rate": 3.098713233053702e-05,
2412
+ "loss": 0.0532,
2413
+ "step": 8500
2414
+ },
2415
+ {
2416
+ "epoch": 2.4115983026874117,
2417
+ "grad_norm": 0.5350253582000732,
2418
+ "learning_rate": 3.086721589188628e-05,
2419
+ "loss": 0.0505,
2420
+ "step": 8525
2421
+ },
2422
+ {
2423
+ "epoch": 2.418670438472419,
2424
+ "grad_norm": 0.6094595789909363,
2425
+ "learning_rate": 3.0747156420848275e-05,
2426
+ "loss": 0.0488,
2427
+ "step": 8550
2428
+ },
2429
+ {
2430
+ "epoch": 2.4257425742574257,
2431
+ "grad_norm": 0.2579383850097656,
2432
+ "learning_rate": 3.0626956844261404e-05,
2433
+ "loss": 0.0531,
2434
+ "step": 8575
2435
+ },
2436
+ {
2437
+ "epoch": 2.432814710042433,
2438
+ "grad_norm": 0.3322567939758301,
2439
+ "learning_rate": 3.050662009237959e-05,
2440
+ "loss": 0.0521,
2441
+ "step": 8600
2442
+ },
2443
+ {
2444
+ "epoch": 2.4398868458274396,
2445
+ "grad_norm": 0.2817617654800415,
2446
+ "learning_rate": 3.038614909880084e-05,
2447
+ "loss": 0.0583,
2448
+ "step": 8625
2449
+ },
2450
+ {
2451
+ "epoch": 2.446958981612447,
2452
+ "grad_norm": 0.36568090319633484,
2453
+ "learning_rate": 3.026554680039575e-05,
2454
+ "loss": 0.0505,
2455
+ "step": 8650
2456
+ },
2457
+ {
2458
+ "epoch": 2.454031117397454,
2459
+ "grad_norm": 0.32663673162460327,
2460
+ "learning_rate": 3.0144816137235892e-05,
2461
+ "loss": 0.0521,
2462
+ "step": 8675
2463
+ },
2464
+ {
2465
+ "epoch": 2.4611032531824613,
2466
+ "grad_norm": 0.4173208475112915,
2467
+ "learning_rate": 3.0023960052522148e-05,
2468
+ "loss": 0.0586,
2469
+ "step": 8700
2470
+ },
2471
+ {
2472
+ "epoch": 2.468175388967468,
2473
+ "grad_norm": 0.2725406289100647,
2474
+ "learning_rate": 2.9902981492512945e-05,
2475
+ "loss": 0.0484,
2476
+ "step": 8725
2477
+ },
2478
+ {
2479
+ "epoch": 2.4752475247524752,
2480
+ "grad_norm": 0.4088152348995209,
2481
+ "learning_rate": 2.9781883406452453e-05,
2482
+ "loss": 0.0477,
2483
+ "step": 8750
2484
+ },
2485
+ {
2486
+ "epoch": 2.4823196605374824,
2487
+ "grad_norm": 0.35629716515541077,
2488
+ "learning_rate": 2.966066874649869e-05,
2489
+ "loss": 0.0484,
2490
+ "step": 8775
2491
+ },
2492
+ {
2493
+ "epoch": 2.489391796322489,
2494
+ "grad_norm": 0.4508310854434967,
2495
+ "learning_rate": 2.9539340467651494e-05,
2496
+ "loss": 0.0525,
2497
+ "step": 8800
2498
+ },
2499
+ {
2500
+ "epoch": 2.4964639321074964,
2501
+ "grad_norm": 0.49331003427505493,
2502
+ "learning_rate": 2.9417901527680582e-05,
2503
+ "loss": 0.049,
2504
+ "step": 8825
2505
+ },
2506
+ {
2507
+ "epoch": 2.5035360678925036,
2508
+ "grad_norm": 0.3239150941371918,
2509
+ "learning_rate": 2.9296354887053367e-05,
2510
+ "loss": 0.0522,
2511
+ "step": 8850
2512
+ },
2513
+ {
2514
+ "epoch": 2.510608203677511,
2515
+ "grad_norm": 0.2788864076137543,
2516
+ "learning_rate": 2.9174703508862828e-05,
2517
+ "loss": 0.0549,
2518
+ "step": 8875
2519
+ },
2520
+ {
2521
+ "epoch": 2.5176803394625176,
2522
+ "grad_norm": 0.3619994819164276,
2523
+ "learning_rate": 2.9057822401556116e-05,
2524
+ "loss": 0.0525,
2525
+ "step": 8900
2526
+ },
2527
+ {
2528
+ "epoch": 2.5247524752475248,
2529
+ "grad_norm": 0.25908371806144714,
2530
+ "learning_rate": 2.89359743427919e-05,
2531
+ "loss": 0.057,
2532
+ "step": 8925
2533
+ },
2534
+ {
2535
+ "epoch": 2.531824611032532,
2536
+ "grad_norm": 0.3243955373764038,
2537
+ "learning_rate": 2.8814030331907134e-05,
2538
+ "loss": 0.052,
2539
+ "step": 8950
2540
+ },
2541
+ {
2542
+ "epoch": 2.5388967468175387,
2543
+ "grad_norm": 0.3788680136203766,
2544
+ "learning_rate": 2.8691993341681998e-05,
2545
+ "loss": 0.0526,
2546
+ "step": 8975
2547
+ },
2548
+ {
2549
+ "epoch": 2.545968882602546,
2550
+ "grad_norm": 0.27258631587028503,
2551
+ "learning_rate": 2.856986634716332e-05,
2552
+ "loss": 0.0555,
2553
+ "step": 9000
2554
+ },
2555
+ {
2556
+ "epoch": 2.553041018387553,
2557
+ "grad_norm": 1.2522977590560913,
2558
+ "learning_rate": 2.8447652325592066e-05,
2559
+ "loss": 0.0574,
2560
+ "step": 9025
2561
+ },
2562
+ {
2563
+ "epoch": 2.5601131541725604,
2564
+ "grad_norm": 0.23333418369293213,
2565
+ "learning_rate": 2.8325354256330787e-05,
2566
+ "loss": 0.0531,
2567
+ "step": 9050
2568
+ },
2569
+ {
2570
+ "epoch": 2.567185289957567,
2571
+ "grad_norm": 0.32750052213668823,
2572
+ "learning_rate": 2.8202975120790963e-05,
2573
+ "loss": 0.0505,
2574
+ "step": 9075
2575
+ },
2576
+ {
2577
+ "epoch": 2.5742574257425743,
2578
+ "grad_norm": 0.3817848861217499,
2579
+ "learning_rate": 2.8080517902360316e-05,
2580
+ "loss": 0.0533,
2581
+ "step": 9100
2582
+ },
2583
+ {
2584
+ "epoch": 2.581329561527581,
2585
+ "grad_norm": 1.9594416618347168,
2586
+ "learning_rate": 2.7957985586330106e-05,
2587
+ "loss": 0.0499,
2588
+ "step": 9125
2589
+ },
2590
+ {
2591
+ "epoch": 2.5884016973125883,
2592
+ "grad_norm": 0.3222751021385193,
2593
+ "learning_rate": 2.7835381159822336e-05,
2594
+ "loss": 0.0553,
2595
+ "step": 9150
2596
+ },
2597
+ {
2598
+ "epoch": 2.5954738330975955,
2599
+ "grad_norm": 0.3441489040851593,
2600
+ "learning_rate": 2.7712707611716938e-05,
2601
+ "loss": 0.0533,
2602
+ "step": 9175
2603
+ },
2604
+ {
2605
+ "epoch": 2.6025459688826027,
2606
+ "grad_norm": 0.39582306146621704,
2607
+ "learning_rate": 2.75899679325789e-05,
2608
+ "loss": 0.0516,
2609
+ "step": 9200
2610
+ },
2611
+ {
2612
+ "epoch": 2.6096181046676095,
2613
+ "grad_norm": 0.32057181000709534,
2614
+ "learning_rate": 2.7467165114585358e-05,
2615
+ "loss": 0.0573,
2616
+ "step": 9225
2617
+ },
2618
+ {
2619
+ "epoch": 2.6166902404526167,
2620
+ "grad_norm": 0.49784281849861145,
2621
+ "learning_rate": 2.734430215145269e-05,
2622
+ "loss": 0.0508,
2623
+ "step": 9250
2624
+ },
2625
+ {
2626
+ "epoch": 2.623762376237624,
2627
+ "grad_norm": 0.3396826386451721,
2628
+ "learning_rate": 2.7221382038363492e-05,
2629
+ "loss": 0.053,
2630
+ "step": 9275
2631
+ },
2632
+ {
2633
+ "epoch": 2.6308345120226306,
2634
+ "grad_norm": 0.30675604939460754,
2635
+ "learning_rate": 2.7098407771893563e-05,
2636
+ "loss": 0.0527,
2637
+ "step": 9300
2638
+ },
2639
+ {
2640
+ "epoch": 2.637906647807638,
2641
+ "grad_norm": 0.7486620545387268,
2642
+ "learning_rate": 2.6975382349938892e-05,
2643
+ "loss": 0.0466,
2644
+ "step": 9325
2645
+ },
2646
+ {
2647
+ "epoch": 2.644978783592645,
2648
+ "grad_norm": 0.2760941684246063,
2649
+ "learning_rate": 2.685230877164253e-05,
2650
+ "loss": 0.0544,
2651
+ "step": 9350
2652
+ },
2653
+ {
2654
+ "epoch": 2.6520509193776522,
2655
+ "grad_norm": 0.4320007264614105,
2656
+ "learning_rate": 2.672919003732149e-05,
2657
+ "loss": 0.0516,
2658
+ "step": 9375
2659
+ },
2660
+ {
2661
+ "epoch": 2.659123055162659,
2662
+ "grad_norm": 0.390225350856781,
2663
+ "learning_rate": 2.6606029148393625e-05,
2664
+ "loss": 0.0491,
2665
+ "step": 9400
2666
+ },
2667
+ {
2668
+ "epoch": 2.666195190947666,
2669
+ "grad_norm": 0.3701136112213135,
2670
+ "learning_rate": 2.648282910730444e-05,
2671
+ "loss": 0.0531,
2672
+ "step": 9425
2673
+ },
2674
+ {
2675
+ "epoch": 2.6732673267326734,
2676
+ "grad_norm": 0.554530918598175,
2677
+ "learning_rate": 2.6359592917453897e-05,
2678
+ "loss": 0.0514,
2679
+ "step": 9450
2680
+ },
2681
+ {
2682
+ "epoch": 2.68033946251768,
2683
+ "grad_norm": 0.7218428254127502,
2684
+ "learning_rate": 2.623632358312319e-05,
2685
+ "loss": 0.0531,
2686
+ "step": 9475
2687
+ },
2688
+ {
2689
+ "epoch": 2.6874115983026874,
2690
+ "grad_norm": 0.4088691174983978,
2691
+ "learning_rate": 2.6113024109401547e-05,
2692
+ "loss": 0.0517,
2693
+ "step": 9500
2694
+ },
2695
+ {
2696
+ "epoch": 2.6944837340876946,
2697
+ "grad_norm": 0.2585553228855133,
2698
+ "learning_rate": 2.5989697502112903e-05,
2699
+ "loss": 0.0495,
2700
+ "step": 9525
2701
+ },
2702
+ {
2703
+ "epoch": 2.701555869872702,
2704
+ "grad_norm": 0.3183114528656006,
2705
+ "learning_rate": 2.58663467677427e-05,
2706
+ "loss": 0.0488,
2707
+ "step": 9550
2708
+ },
2709
+ {
2710
+ "epoch": 2.7086280056577086,
2711
+ "grad_norm": 0.35523074865341187,
2712
+ "learning_rate": 2.574297491336452e-05,
2713
+ "loss": 0.0507,
2714
+ "step": 9575
2715
+ },
2716
+ {
2717
+ "epoch": 2.7157001414427158,
2718
+ "grad_norm": 0.3662826120853424,
2719
+ "learning_rate": 2.5619584946566844e-05,
2720
+ "loss": 0.0492,
2721
+ "step": 9600
2722
+ },
2723
+ {
2724
+ "epoch": 2.7227722772277225,
2725
+ "grad_norm": 0.5980022549629211,
2726
+ "learning_rate": 2.549617987537968e-05,
2727
+ "loss": 0.056,
2728
+ "step": 9625
2729
+ },
2730
+ {
2731
+ "epoch": 2.7298444130127297,
2732
+ "grad_norm": 0.3375484347343445,
2733
+ "learning_rate": 2.537276270820127e-05,
2734
+ "loss": 0.0545,
2735
+ "step": 9650
2736
+ },
2737
+ {
2738
+ "epoch": 2.736916548797737,
2739
+ "grad_norm": 0.2702608108520508,
2740
+ "learning_rate": 2.524933645372472e-05,
2741
+ "loss": 0.0483,
2742
+ "step": 9675
2743
+ },
2744
+ {
2745
+ "epoch": 2.743988684582744,
2746
+ "grad_norm": 0.20587654411792755,
2747
+ "learning_rate": 2.512590412086468e-05,
2748
+ "loss": 0.048,
2749
+ "step": 9700
2750
+ },
2751
+ {
2752
+ "epoch": 2.751060820367751,
2753
+ "grad_norm": 0.40114825963974,
2754
+ "learning_rate": 2.5002468718683985e-05,
2755
+ "loss": 0.0478,
2756
+ "step": 9725
2757
+ },
2758
+ {
2759
+ "epoch": 2.758132956152758,
2760
+ "grad_norm": 0.25009649991989136,
2761
+ "learning_rate": 2.4879033256320268e-05,
2762
+ "loss": 0.0528,
2763
+ "step": 9750
2764
+ },
2765
+ {
2766
+ "epoch": 2.7652050919377653,
2767
+ "grad_norm": 0.4038143455982208,
2768
+ "learning_rate": 2.475560074291266e-05,
2769
+ "loss": 0.0536,
2770
+ "step": 9775
2771
+ },
2772
+ {
2773
+ "epoch": 2.772277227722772,
2774
+ "grad_norm": 0.3600768744945526,
2775
+ "learning_rate": 2.463217418752838e-05,
2776
+ "loss": 0.0513,
2777
+ "step": 9800
2778
+ },
2779
+ {
2780
+ "epoch": 2.7793493635077793,
2781
+ "grad_norm": 0.2662784457206726,
2782
+ "learning_rate": 2.4508756599089423e-05,
2783
+ "loss": 0.0528,
2784
+ "step": 9825
2785
+ },
2786
+ {
2787
+ "epoch": 2.7864214992927865,
2788
+ "grad_norm": 0.4926321506500244,
2789
+ "learning_rate": 2.4385350986299162e-05,
2790
+ "loss": 0.0502,
2791
+ "step": 9850
2792
+ },
2793
+ {
2794
+ "epoch": 2.7934936350777937,
2795
+ "grad_norm": 0.3003157675266266,
2796
+ "learning_rate": 2.4261960357569036e-05,
2797
+ "loss": 0.0491,
2798
+ "step": 9875
2799
+ },
2800
+ {
2801
+ "epoch": 2.8005657708628005,
2802
+ "grad_norm": 0.3073439598083496,
2803
+ "learning_rate": 2.413858772094521e-05,
2804
+ "loss": 0.051,
2805
+ "step": 9900
2806
+ },
2807
+ {
2808
+ "epoch": 2.8076379066478077,
2809
+ "grad_norm": 0.4092470109462738,
2810
+ "learning_rate": 2.4015236084035204e-05,
2811
+ "loss": 0.0474,
2812
+ "step": 9925
2813
+ },
2814
+ {
2815
+ "epoch": 2.814710042432815,
2816
+ "grad_norm": 0.5402696132659912,
2817
+ "learning_rate": 2.389190845393463e-05,
2818
+ "loss": 0.0492,
2819
+ "step": 9950
2820
+ },
2821
+ {
2822
+ "epoch": 2.8217821782178216,
2823
+ "grad_norm": 0.3162129521369934,
2824
+ "learning_rate": 2.376860783715384e-05,
2825
+ "loss": 0.0509,
2826
+ "step": 9975
2827
+ },
2828
+ {
2829
+ "epoch": 2.828854314002829,
2830
+ "grad_norm": 0.3184404671192169,
2831
+ "learning_rate": 2.3645337239544658e-05,
2832
+ "loss": 0.0493,
2833
+ "step": 10000
2834
+ },
2835
+ {
2836
+ "epoch": 2.835926449787836,
2837
+ "grad_norm": 0.33763962984085083,
2838
+ "learning_rate": 2.3522099666227087e-05,
2839
+ "loss": 0.0489,
2840
+ "step": 10025
2841
+ },
2842
+ {
2843
+ "epoch": 2.8429985855728432,
2844
+ "grad_norm": 0.4636186361312866,
2845
+ "learning_rate": 2.339889812151606e-05,
2846
+ "loss": 0.0545,
2847
+ "step": 10050
2848
+ },
2849
+ {
2850
+ "epoch": 2.85007072135785,
2851
+ "grad_norm": 0.3926536738872528,
2852
+ "learning_rate": 2.3275735608848208e-05,
2853
+ "loss": 0.0501,
2854
+ "step": 10075
2855
+ },
2856
+ {
2857
+ "epoch": 2.857142857142857,
2858
+ "grad_norm": 0.4206957519054413,
2859
+ "learning_rate": 2.3152615130708592e-05,
2860
+ "loss": 0.0502,
2861
+ "step": 10100
2862
+ },
2863
+ {
2864
+ "epoch": 2.864214992927864,
2865
+ "grad_norm": 0.729640781879425,
2866
+ "learning_rate": 2.302953968855759e-05,
2867
+ "loss": 0.0469,
2868
+ "step": 10125
2869
+ },
2870
+ {
2871
+ "epoch": 2.871287128712871,
2872
+ "grad_norm": 0.37660130858421326,
2873
+ "learning_rate": 2.2906512282757644e-05,
2874
+ "loss": 0.0524,
2875
+ "step": 10150
2876
+ },
2877
+ {
2878
+ "epoch": 2.8783592644978784,
2879
+ "grad_norm": 0.5026111602783203,
2880
+ "learning_rate": 2.2783535912500187e-05,
2881
+ "loss": 0.0587,
2882
+ "step": 10175
2883
+ },
2884
+ {
2885
+ "epoch": 2.8854314002828856,
2886
+ "grad_norm": 0.3521290123462677,
2887
+ "learning_rate": 2.266061357573248e-05,
2888
+ "loss": 0.0507,
2889
+ "step": 10200
2890
+ },
2891
+ {
2892
+ "epoch": 2.8925035360678923,
2893
+ "grad_norm": 0.32747042179107666,
2894
+ "learning_rate": 2.253774826908453e-05,
2895
+ "loss": 0.0531,
2896
+ "step": 10225
2897
+ },
2898
+ {
2899
+ "epoch": 2.8995756718528995,
2900
+ "grad_norm": 0.3167445659637451,
2901
+ "learning_rate": 2.2414942987796084e-05,
2902
+ "loss": 0.0468,
2903
+ "step": 10250
2904
+ },
2905
+ {
2906
+ "epoch": 2.9066478076379068,
2907
+ "grad_norm": 0.30890071392059326,
2908
+ "learning_rate": 2.2292200725643534e-05,
2909
+ "loss": 0.0535,
2910
+ "step": 10275
2911
+ },
2912
+ {
2913
+ "epoch": 2.9137199434229135,
2914
+ "grad_norm": 0.4126497209072113,
2915
+ "learning_rate": 2.216952447486701e-05,
2916
+ "loss": 0.0491,
2917
+ "step": 10300
2918
+ },
2919
+ {
2920
+ "epoch": 2.9207920792079207,
2921
+ "grad_norm": 1.0146815776824951,
2922
+ "learning_rate": 2.2046917226097387e-05,
2923
+ "loss": 0.0507,
2924
+ "step": 10325
2925
+ },
2926
+ {
2927
+ "epoch": 2.927864214992928,
2928
+ "grad_norm": 0.3795275390148163,
2929
+ "learning_rate": 2.1924381968283394e-05,
2930
+ "loss": 0.0501,
2931
+ "step": 10350
2932
+ },
2933
+ {
2934
+ "epoch": 2.934936350777935,
2935
+ "grad_norm": 0.3325127065181732,
2936
+ "learning_rate": 2.1801921688618758e-05,
2937
+ "loss": 0.0438,
2938
+ "step": 10375
2939
+ },
2940
+ {
2941
+ "epoch": 2.942008486562942,
2942
+ "grad_norm": 0.358235239982605,
2943
+ "learning_rate": 2.167953937246934e-05,
2944
+ "loss": 0.0519,
2945
+ "step": 10400
2946
+ },
2947
+ {
2948
+ "epoch": 2.949080622347949,
2949
+ "grad_norm": 0.37195077538490295,
2950
+ "learning_rate": 2.1557238003300438e-05,
2951
+ "loss": 0.0472,
2952
+ "step": 10425
2953
+ },
2954
+ {
2955
+ "epoch": 2.9561527581329563,
2956
+ "grad_norm": 0.3947463929653168,
2957
+ "learning_rate": 2.1435020562603944e-05,
2958
+ "loss": 0.0469,
2959
+ "step": 10450
2960
+ },
2961
+ {
2962
+ "epoch": 2.963224893917963,
2963
+ "grad_norm": 0.3517344892024994,
2964
+ "learning_rate": 2.1312890029825765e-05,
2965
+ "loss": 0.0451,
2966
+ "step": 10475
2967
+ },
2968
+ {
2969
+ "epoch": 2.9702970297029703,
2970
+ "grad_norm": 0.4573589861392975,
2971
+ "learning_rate": 2.1190849382293153e-05,
2972
+ "loss": 0.0478,
2973
+ "step": 10500
2974
+ },
2975
+ {
2976
+ "epoch": 2.9773691654879775,
2977
+ "grad_norm": 0.4348020553588867,
2978
+ "learning_rate": 2.1068901595142076e-05,
2979
+ "loss": 0.0471,
2980
+ "step": 10525
2981
+ },
2982
+ {
2983
+ "epoch": 2.9844413012729847,
2984
+ "grad_norm": 0.27447789907455444,
2985
+ "learning_rate": 2.0947049641244763e-05,
2986
+ "loss": 0.0525,
2987
+ "step": 10550
2988
+ },
2989
+ {
2990
+ "epoch": 2.9915134370579914,
2991
+ "grad_norm": 0.5210450291633606,
2992
+ "learning_rate": 2.0825296491137178e-05,
2993
+ "loss": 0.049,
2994
+ "step": 10575
2995
+ },
2996
+ {
2997
+ "epoch": 2.9985855728429986,
2998
+ "grad_norm": 0.4803789556026459,
2999
+ "learning_rate": 2.0703645112946632e-05,
3000
+ "loss": 0.048,
3001
+ "step": 10600
3002
+ },
3003
+ {
3004
+ "epoch": 3.0,
3005
+ "eval_accuracy": 0.9731636716504077,
3006
+ "eval_f1": 0.9040792973909716,
3007
+ "eval_loss": 0.080692358314991,
3008
+ "eval_precision": 0.8956920811279763,
3009
+ "eval_recall": 0.9126250733540829,
3010
+ "eval_runtime": 54.8402,
3011
+ "eval_samples_per_second": 916.701,
3012
+ "eval_steps_per_second": 3.592,
3013
+ "step": 10605
3014
+ }
3015
+ ],
3016
+ "logging_steps": 25,
3017
+ "max_steps": 17675,
3018
+ "num_input_tokens_seen": 0,
3019
+ "num_train_epochs": 5,
3020
+ "save_steps": 500,
3021
+ "stateful_callbacks": {
3022
+ "EarlyStoppingCallback": {
3023
+ "args": {
3024
+ "early_stopping_patience": 5,
3025
+ "early_stopping_threshold": 0.01
3026
+ },
3027
+ "attributes": {
3028
+ "early_stopping_patience_counter": 1
3029
+ }
3030
+ },
3031
+ "TrainerControl": {
3032
+ "args": {
3033
+ "should_epoch_stop": false,
3034
+ "should_evaluate": false,
3035
+ "should_log": false,
3036
+ "should_save": true,
3037
+ "should_training_stop": false
3038
+ },
3039
+ "attributes": {}
3040
+ }
3041
+ },
3042
+ "total_flos": 3.589143261681162e+17,
3043
+ "train_batch_size": 128,
3044
+ "trial_name": null,
3045
+ "trial_params": null
3046
+ }
checkpoint-10605/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7181b00303f72cd9e8d7e9489278da1dba68ace8042efb585819e98f131563
3
+ size 5368
config.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "answerdotai/ModernBERT-large",
3
+ "_num_labels": 42,
4
+ "architectures": [
5
+ "ModernBertForTokenClassification"
6
+ ],
7
+ "attention_bias": false,
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 50281,
10
+ "classifier_activation": "gelu",
11
+ "classifier_bias": false,
12
+ "classifier_dropout": 0.0,
13
+ "classifier_pooling": "mean",
14
+ "cls_token_id": 50281,
15
+ "decoder_bias": true,
16
+ "deterministic_flash_attn": false,
17
+ "embedding_dropout": 0.0,
18
+ "eos_token_id": 50282,
19
+ "global_attn_every_n_layers": 3,
20
+ "global_rope_theta": 160000.0,
21
+ "gradient_checkpointing": false,
22
+ "hidden_activation": "gelu",
23
+ "hidden_size": 1024,
24
+ "id2label": {
25
+ "0": "B-URL",
26
+ "1": "I-URL",
27
+ "2": "O",
28
+ "3": "B-ORG",
29
+ "4": "B-SERVICE",
30
+ "5": "I-ORG",
31
+ "6": "B-SECTOR",
32
+ "7": "I-SECTOR",
33
+ "8": "B-FILEPATH",
34
+ "9": "I-FILEPATH",
35
+ "10": "I-DOMAIN",
36
+ "11": "B-PLATFORM",
37
+ "12": "I-SERVICE",
38
+ "13": "I-PLATFORM",
39
+ "14": "B-THREAT-ACTOR",
40
+ "15": "I-THREAT-ACTOR",
41
+ "16": "B-PRODUCT",
42
+ "17": "B-MALWARE",
43
+ "18": "I-MALWARE",
44
+ "19": "B-LOC",
45
+ "20": "B-CVE",
46
+ "21": "I-CVE",
47
+ "22": "B-TOOL",
48
+ "23": "I-PRODUCT",
49
+ "24": "B-IPV4",
50
+ "25": "I-IPV4",
51
+ "26": "B-MITRE-TACTIC",
52
+ "27": "I-MITRE-TACTIC",
53
+ "28": "B-DOMAIN",
54
+ "29": "I-TOOL",
55
+ "30": "B-MD5",
56
+ "31": "I-LOC",
57
+ "32": "B-CAMPAIGN",
58
+ "33": "I-CAMPAIGN",
59
+ "34": "B-SHA1",
60
+ "35": "B-SHA256",
61
+ "36": "B-EMAIL",
62
+ "37": "I-EMAIL",
63
+ "38": "B-IPV6",
64
+ "39": "I-IPV6",
65
+ "40": "B-REGISTRY-KEYS",
66
+ "41": "I-REGISTRY-KEYS"
67
+ },
68
+ "initializer_cutoff_factor": 2.0,
69
+ "initializer_range": 0.02,
70
+ "intermediate_size": 2624,
71
+ "label2id": {
72
+ "B-CAMPAIGN": 32,
73
+ "B-CVE": 20,
74
+ "B-DOMAIN": 28,
75
+ "B-EMAIL": 36,
76
+ "B-FILEPATH": 8,
77
+ "B-IPV4": 24,
78
+ "B-IPV6": 38,
79
+ "B-LOC": 19,
80
+ "B-MALWARE": 17,
81
+ "B-MD5": 30,
82
+ "B-MITRE-TACTIC": 26,
83
+ "B-ORG": 3,
84
+ "B-PLATFORM": 11,
85
+ "B-PRODUCT": 16,
86
+ "B-REGISTRY-KEYS": 40,
87
+ "B-SECTOR": 6,
88
+ "B-SERVICE": 4,
89
+ "B-SHA1": 34,
90
+ "B-SHA256": 35,
91
+ "B-THREAT-ACTOR": 14,
92
+ "B-TOOL": 22,
93
+ "B-URL": 0,
94
+ "I-CAMPAIGN": 33,
95
+ "I-CVE": 21,
96
+ "I-DOMAIN": 10,
97
+ "I-EMAIL": 37,
98
+ "I-FILEPATH": 9,
99
+ "I-IPV4": 25,
100
+ "I-IPV6": 39,
101
+ "I-LOC": 31,
102
+ "I-MALWARE": 18,
103
+ "I-MITRE-TACTIC": 27,
104
+ "I-ORG": 5,
105
+ "I-PLATFORM": 13,
106
+ "I-PRODUCT": 23,
107
+ "I-REGISTRY-KEYS": 41,
108
+ "I-SECTOR": 7,
109
+ "I-SERVICE": 12,
110
+ "I-THREAT-ACTOR": 15,
111
+ "I-TOOL": 29,
112
+ "I-URL": 1,
113
+ "O": 2
114
+ },
115
+ "layer_norm_eps": 1e-05,
116
+ "local_attention": 128,
117
+ "local_rope_theta": 10000.0,
118
+ "max_position_embeddings": 8192,
119
+ "mlp_bias": false,
120
+ "mlp_dropout": 0.0,
121
+ "model_type": "modernbert",
122
+ "norm_bias": false,
123
+ "norm_eps": 1e-05,
124
+ "num_attention_heads": 16,
125
+ "num_hidden_layers": 28,
126
+ "pad_token_id": 50283,
127
+ "position_embedding_type": "absolute",
128
+ "reference_compile": true,
129
+ "repad_logits_with_grad": false,
130
+ "sep_token_id": 50282,
131
+ "sparse_pred_ignore_index": -100,
132
+ "sparse_prediction": false,
133
+ "torch_dtype": "float32",
134
+ "transformers_version": "4.48.0",
135
+ "vocab_size": 50368
136
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52fe43effd7557bea8056246926a2cc952953b1facb01c2c020f0b8fa5e7927f
3
+ size 1583515640
runs/Oct24_07-27-25_r-juanmcristobal-securemodernbert-50nhce16-13175-rr7xq/events.out.tfevents.1761290846.r-juanmcristobal-securemodernbert-50nhce16-13175-rr7xq.90.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42db7c8c41ebecd2078c717806e719e1644bc6523b9b71b6f0c4db85c63fa039
3
- size 151924
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f72922b2d4f23b24b599d2a862a5c2a3785f4349f987a90dc5fd62813d9f869
3
+ size 159645
runs/Oct24_07-27-25_r-juanmcristobal-securemodernbert-50nhce16-13175-rr7xq/events.out.tfevents.1761295429.r-juanmcristobal-securemodernbert-50nhce16-13175-rr7xq.90.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d38fb6db1fe852ab57f12fb7b89cf22ca835971860b913acffa0a36f515c69
3
+ size 569
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "|||IP_ADDRESS|||",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "1": {
12
+ "content": "<|padding|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "50254": {
20
+ "content": " ",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "50255": {
28
+ "content": " ",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "50256": {
36
+ "content": " ",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "50257": {
44
+ "content": " ",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "50258": {
52
+ "content": " ",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "50259": {
60
+ "content": " ",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "50260": {
68
+ "content": " ",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "50261": {
76
+ "content": " ",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "50262": {
84
+ "content": " ",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "50263": {
92
+ "content": " ",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "50264": {
100
+ "content": " ",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "50265": {
108
+ "content": " ",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "50266": {
116
+ "content": " ",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "50267": {
124
+ "content": " ",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "50268": {
132
+ "content": " ",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "50269": {
140
+ "content": " ",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "50270": {
148
+ "content": " ",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "50271": {
156
+ "content": " ",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "50272": {
164
+ "content": " ",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "50273": {
172
+ "content": " ",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "50274": {
180
+ "content": " ",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "50275": {
188
+ "content": " ",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "50276": {
196
+ "content": " ",
197
+ "lstrip": false,
198
+ "normalized": true,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "50277": {
204
+ "content": "|||EMAIL_ADDRESS|||",
205
+ "lstrip": false,
206
+ "normalized": true,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "50278": {
212
+ "content": "|||PHONE_NUMBER|||",
213
+ "lstrip": false,
214
+ "normalized": true,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "50279": {
220
+ "content": "<|endoftext|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "50280": {
228
+ "content": "[UNK]",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "50281": {
236
+ "content": "[CLS]",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "50282": {
244
+ "content": "[SEP]",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "50283": {
252
+ "content": "[PAD]",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "50284": {
260
+ "content": "[MASK]",
261
+ "lstrip": true,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "50285": {
268
+ "content": "[unused0]",
269
+ "lstrip": false,
270
+ "normalized": true,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "50286": {
276
+ "content": "[unused1]",
277
+ "lstrip": false,
278
+ "normalized": true,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "50287": {
284
+ "content": "[unused2]",
285
+ "lstrip": false,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "50288": {
292
+ "content": "[unused3]",
293
+ "lstrip": false,
294
+ "normalized": true,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
+ },
299
+ "50289": {
300
+ "content": "[unused4]",
301
+ "lstrip": false,
302
+ "normalized": true,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": false
306
+ },
307
+ "50290": {
308
+ "content": "[unused5]",
309
+ "lstrip": false,
310
+ "normalized": true,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": false
314
+ },
315
+ "50291": {
316
+ "content": "[unused6]",
317
+ "lstrip": false,
318
+ "normalized": true,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": false
322
+ },
323
+ "50292": {
324
+ "content": "[unused7]",
325
+ "lstrip": false,
326
+ "normalized": true,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "50293": {
332
+ "content": "[unused8]",
333
+ "lstrip": false,
334
+ "normalized": true,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": false
338
+ },
339
+ "50294": {
340
+ "content": "[unused9]",
341
+ "lstrip": false,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": false
346
+ },
347
+ "50295": {
348
+ "content": "[unused10]",
349
+ "lstrip": false,
350
+ "normalized": true,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": false
354
+ },
355
+ "50296": {
356
+ "content": "[unused11]",
357
+ "lstrip": false,
358
+ "normalized": true,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": false
362
+ },
363
+ "50297": {
364
+ "content": "[unused12]",
365
+ "lstrip": false,
366
+ "normalized": true,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": false
370
+ },
371
+ "50298": {
372
+ "content": "[unused13]",
373
+ "lstrip": false,
374
+ "normalized": true,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": false
378
+ },
379
+ "50299": {
380
+ "content": "[unused14]",
381
+ "lstrip": false,
382
+ "normalized": true,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": false
386
+ },
387
+ "50300": {
388
+ "content": "[unused15]",
389
+ "lstrip": false,
390
+ "normalized": true,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": false
394
+ },
395
+ "50301": {
396
+ "content": "[unused16]",
397
+ "lstrip": false,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": false
402
+ },
403
+ "50302": {
404
+ "content": "[unused17]",
405
+ "lstrip": false,
406
+ "normalized": true,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": false
410
+ },
411
+ "50303": {
412
+ "content": "[unused18]",
413
+ "lstrip": false,
414
+ "normalized": true,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": false
418
+ },
419
+ "50304": {
420
+ "content": "[unused19]",
421
+ "lstrip": false,
422
+ "normalized": true,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": false
426
+ },
427
+ "50305": {
428
+ "content": "[unused20]",
429
+ "lstrip": false,
430
+ "normalized": true,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": false
434
+ },
435
+ "50306": {
436
+ "content": "[unused21]",
437
+ "lstrip": false,
438
+ "normalized": true,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": false
442
+ },
443
+ "50307": {
444
+ "content": "[unused22]",
445
+ "lstrip": false,
446
+ "normalized": true,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": false
450
+ },
451
+ "50308": {
452
+ "content": "[unused23]",
453
+ "lstrip": false,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": false
458
+ },
459
+ "50309": {
460
+ "content": "[unused24]",
461
+ "lstrip": false,
462
+ "normalized": true,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": false
466
+ },
467
+ "50310": {
468
+ "content": "[unused25]",
469
+ "lstrip": false,
470
+ "normalized": true,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": false
474
+ },
475
+ "50311": {
476
+ "content": "[unused26]",
477
+ "lstrip": false,
478
+ "normalized": true,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": false
482
+ },
483
+ "50312": {
484
+ "content": "[unused27]",
485
+ "lstrip": false,
486
+ "normalized": true,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": false
490
+ },
491
+ "50313": {
492
+ "content": "[unused28]",
493
+ "lstrip": false,
494
+ "normalized": true,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": false
498
+ },
499
+ "50314": {
500
+ "content": "[unused29]",
501
+ "lstrip": false,
502
+ "normalized": true,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": false
506
+ },
507
+ "50315": {
508
+ "content": "[unused30]",
509
+ "lstrip": false,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": false
514
+ },
515
+ "50316": {
516
+ "content": "[unused31]",
517
+ "lstrip": false,
518
+ "normalized": true,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": false
522
+ },
523
+ "50317": {
524
+ "content": "[unused32]",
525
+ "lstrip": false,
526
+ "normalized": true,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": false
530
+ },
531
+ "50318": {
532
+ "content": "[unused33]",
533
+ "lstrip": false,
534
+ "normalized": true,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": false
538
+ },
539
+ "50319": {
540
+ "content": "[unused34]",
541
+ "lstrip": false,
542
+ "normalized": true,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": false
546
+ },
547
+ "50320": {
548
+ "content": "[unused35]",
549
+ "lstrip": false,
550
+ "normalized": true,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": false
554
+ },
555
+ "50321": {
556
+ "content": "[unused36]",
557
+ "lstrip": false,
558
+ "normalized": true,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": false
562
+ },
563
+ "50322": {
564
+ "content": "[unused37]",
565
+ "lstrip": false,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": false
570
+ },
571
+ "50323": {
572
+ "content": "[unused38]",
573
+ "lstrip": false,
574
+ "normalized": true,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": false
578
+ },
579
+ "50324": {
580
+ "content": "[unused39]",
581
+ "lstrip": false,
582
+ "normalized": true,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": false
586
+ },
587
+ "50325": {
588
+ "content": "[unused40]",
589
+ "lstrip": false,
590
+ "normalized": true,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": false
594
+ },
595
+ "50326": {
596
+ "content": "[unused41]",
597
+ "lstrip": false,
598
+ "normalized": true,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": false
602
+ },
603
+ "50327": {
604
+ "content": "[unused42]",
605
+ "lstrip": false,
606
+ "normalized": true,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": false
610
+ },
611
+ "50328": {
612
+ "content": "[unused43]",
613
+ "lstrip": false,
614
+ "normalized": true,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": false
618
+ },
619
+ "50329": {
620
+ "content": "[unused44]",
621
+ "lstrip": false,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": false
626
+ },
627
+ "50330": {
628
+ "content": "[unused45]",
629
+ "lstrip": false,
630
+ "normalized": true,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": false
634
+ },
635
+ "50331": {
636
+ "content": "[unused46]",
637
+ "lstrip": false,
638
+ "normalized": true,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": false
642
+ },
643
+ "50332": {
644
+ "content": "[unused47]",
645
+ "lstrip": false,
646
+ "normalized": true,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": false
650
+ },
651
+ "50333": {
652
+ "content": "[unused48]",
653
+ "lstrip": false,
654
+ "normalized": true,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": false
658
+ },
659
+ "50334": {
660
+ "content": "[unused49]",
661
+ "lstrip": false,
662
+ "normalized": true,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": false
666
+ },
667
+ "50335": {
668
+ "content": "[unused50]",
669
+ "lstrip": false,
670
+ "normalized": true,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": false
674
+ },
675
+ "50336": {
676
+ "content": "[unused51]",
677
+ "lstrip": false,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": false
682
+ },
683
+ "50337": {
684
+ "content": "[unused52]",
685
+ "lstrip": false,
686
+ "normalized": true,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": false
690
+ },
691
+ "50338": {
692
+ "content": "[unused53]",
693
+ "lstrip": false,
694
+ "normalized": true,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": false
698
+ },
699
+ "50339": {
700
+ "content": "[unused54]",
701
+ "lstrip": false,
702
+ "normalized": true,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": false
706
+ },
707
+ "50340": {
708
+ "content": "[unused55]",
709
+ "lstrip": false,
710
+ "normalized": true,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": false
714
+ },
715
+ "50341": {
716
+ "content": "[unused56]",
717
+ "lstrip": false,
718
+ "normalized": true,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": false
722
+ },
723
+ "50342": {
724
+ "content": "[unused57]",
725
+ "lstrip": false,
726
+ "normalized": true,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": false
730
+ },
731
+ "50343": {
732
+ "content": "[unused58]",
733
+ "lstrip": false,
734
+ "normalized": true,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": false
738
+ },
739
+ "50344": {
740
+ "content": "[unused59]",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": false
746
+ },
747
+ "50345": {
748
+ "content": "[unused60]",
749
+ "lstrip": false,
750
+ "normalized": true,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": false
754
+ },
755
+ "50346": {
756
+ "content": "[unused61]",
757
+ "lstrip": false,
758
+ "normalized": true,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": false
762
+ },
763
+ "50347": {
764
+ "content": "[unused62]",
765
+ "lstrip": false,
766
+ "normalized": true,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": false
770
+ },
771
+ "50348": {
772
+ "content": "[unused63]",
773
+ "lstrip": false,
774
+ "normalized": true,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": false
778
+ },
779
+ "50349": {
780
+ "content": "[unused64]",
781
+ "lstrip": false,
782
+ "normalized": true,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": false
786
+ },
787
+ "50350": {
788
+ "content": "[unused65]",
789
+ "lstrip": false,
790
+ "normalized": true,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": false
794
+ },
795
+ "50351": {
796
+ "content": "[unused66]",
797
+ "lstrip": false,
798
+ "normalized": true,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": false
802
+ },
803
+ "50352": {
804
+ "content": "[unused67]",
805
+ "lstrip": false,
806
+ "normalized": true,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": false
810
+ },
811
+ "50353": {
812
+ "content": "[unused68]",
813
+ "lstrip": false,
814
+ "normalized": true,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": false
818
+ },
819
+ "50354": {
820
+ "content": "[unused69]",
821
+ "lstrip": false,
822
+ "normalized": true,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": false
826
+ },
827
+ "50355": {
828
+ "content": "[unused70]",
829
+ "lstrip": false,
830
+ "normalized": true,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": false
834
+ },
835
+ "50356": {
836
+ "content": "[unused71]",
837
+ "lstrip": false,
838
+ "normalized": true,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": false
842
+ },
843
+ "50357": {
844
+ "content": "[unused72]",
845
+ "lstrip": false,
846
+ "normalized": true,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": false
850
+ },
851
+ "50358": {
852
+ "content": "[unused73]",
853
+ "lstrip": false,
854
+ "normalized": true,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": false
858
+ },
859
+ "50359": {
860
+ "content": "[unused74]",
861
+ "lstrip": false,
862
+ "normalized": true,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": false
866
+ },
867
+ "50360": {
868
+ "content": "[unused75]",
869
+ "lstrip": false,
870
+ "normalized": true,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": false
874
+ },
875
+ "50361": {
876
+ "content": "[unused76]",
877
+ "lstrip": false,
878
+ "normalized": true,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": false
882
+ },
883
+ "50362": {
884
+ "content": "[unused77]",
885
+ "lstrip": false,
886
+ "normalized": true,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": false
890
+ },
891
+ "50363": {
892
+ "content": "[unused78]",
893
+ "lstrip": false,
894
+ "normalized": true,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": false
898
+ },
899
+ "50364": {
900
+ "content": "[unused79]",
901
+ "lstrip": false,
902
+ "normalized": true,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": false
906
+ },
907
+ "50365": {
908
+ "content": "[unused80]",
909
+ "lstrip": false,
910
+ "normalized": true,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": false
914
+ },
915
+ "50366": {
916
+ "content": "[unused81]",
917
+ "lstrip": false,
918
+ "normalized": true,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": false
922
+ },
923
+ "50367": {
924
+ "content": "[unused82]",
925
+ "lstrip": false,
926
+ "normalized": true,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": false
930
+ }
931
+ },
932
+ "clean_up_tokenization_spaces": true,
933
+ "cls_token": "[CLS]",
934
+ "extra_special_tokens": {},
935
+ "mask_token": "[MASK]",
936
+ "model_input_names": [
937
+ "input_ids",
938
+ "attention_mask"
939
+ ],
940
+ "model_max_length": 8192,
941
+ "pad_token": "[PAD]",
942
+ "sep_token": "[SEP]",
943
+ "tokenizer_class": "PreTrainedTokenizerFast",
944
+ "unk_token": "[UNK]"
945
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7181b00303f72cd9e8d7e9489278da1dba68ace8042efb585819e98f131563
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "juanmcristobal/ner-ioc-dataset3",
3
+ "model": "answerdotai/ModernBERT-large",
4
+ "lr": 5e-05,
5
+ "epochs": 5,
6
+ "max_seq_length": 128,
7
+ "batch_size": 128,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "cosine",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "tokens_column": "tokens",
18
+ "tags_column": "tags",
19
+ "logging_steps": -1,
20
+ "project_name": "autotrain-sec4",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "juanmcristobal",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }