indo-ner-large-stage2-gold-final

Files changed (4) hide show

README.md CHANGED Viewed

@@ -18,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1903
-- F1: 0.8202
-- Precision: 0.7998
-- Recall: 0.8417
 ## Model description
@@ -53,11 +53,11 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|
-| 0.3264        | 1.0   | 926  | 0.2126          | 0.7851 | 0.7626    | 0.8091 |
-| 0.1823        | 2.0   | 1852 | 0.1930          | 0.8056 | 0.7806    | 0.8322 |
-| 0.1444        | 3.0   | 2778 | 0.1894          | 0.8155 | 0.7939    | 0.8382 |
-| 0.1293        | 4.0   | 3704 | 0.1885          | 0.8192 | 0.7993    | 0.8400 |
-| 0.1184        | 5.0   | 4630 | 0.1903          | 0.8202 | 0.7998    | 0.8417 |
 ### Framework versions

 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1738
+- F1: 0.8448
+- Precision: 0.8291
+- Recall: 0.8610
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|
+| 0.311         | 1.0   | 926  | 0.1891          | 0.8127 | 0.7947    | 0.8316 |
+| 0.1567        | 2.0   | 1852 | 0.1723          | 0.8314 | 0.8122    | 0.8516 |
+| 0.1221        | 3.0   | 2778 | 0.1721          | 0.8389 | 0.8214    | 0.8573 |
+| 0.1088        | 4.0   | 3704 | 0.1723          | 0.8430 | 0.8289    | 0.8577 |
+| 0.0986        | 5.0   | 4630 | 0.1738          | 0.8448 | 0.8291    | 0.8610 |
 ### Framework versions

config.json CHANGED Viewed

@@ -9,7 +9,7 @@
   "dtype": "float32",
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "id2label": {
     "0": "O",
     "1": "B-CRD",
@@ -52,7 +52,7 @@
     "38": "I-WOA"
   },
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "label2id": {
     "B-CRD": 1,
     "B-DAT": 2,
@@ -97,8 +97,8 @@
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "output_past": true,
   "pad_token_id": 0,
   "pooler_fc_size": 768,
@@ -110,5 +110,5 @@
   "transformers_version": "4.57.3",
   "type_vocab_size": 2,
   "use_cache": true,
-  "vocab_size": 50000
 }

   "dtype": "float32",
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
   "id2label": {
     "0": "O",
     "1": "B-CRD",
     "38": "I-WOA"
   },
   "initializer_range": 0.02,
+  "intermediate_size": 4096,
   "label2id": {
     "B-CRD": 1,
     "B-DAT": 2,
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "bert",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
   "output_past": true,
   "pad_token_id": 0,
   "pooler_fc_size": 768,
   "transformers_version": "4.57.3",
   "type_vocab_size": 2,
   "use_cache": true,
+  "vocab_size": 30522
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f41ff1ea2cf913676675fd9d9ba03830a51e6bc5b0821c850177e948eba1057c
-size 495546332

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f259dba0203e157df4ac391b358198f5326475476f2a22d1cb2b8d5763dd45a
+size 1336575868

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ced4e35556928af2ab28a610c4ae6c9ecd0c158972549028b9ff09f99eee0d7a
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:a94b2e593d6e7cdb40c7a2898feb5656e0fdd035d4427579bfe70bf66f23b103
 size 5841