Chottokun/ruri-v3-pt-310m_ner_wikipedia

Browse files

Files changed (4) hide show

README.md +19 -19
config.json +4 -4
model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 library_name: transformers
-license: mit
-base_model: sbintuitions/modernbert-ja-130m
 tags:
 - generated_from_trainer
 metrics:
@@ -19,13 +19,13 @@ should probably proofread and complete it, then remove this comment. -->
 # results_sudachi
-This model is a fine-tuned version of [sbintuitions/modernbert-ja-130m](https://huggingface.co/sbintuitions/modernbert-ja-130m) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1159
-- Precision: 0.8180
-- Recall: 0.8556
-- F1: 0.8364
-- Accuracy: 0.9715
 ## Model description
@@ -50,22 +50,22 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
-- num_epochs: 10
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
-| No log        | 1.0   | 301  | 0.1564          | 0.7248    | 0.7737 | 0.7484 | 0.9594   |
-| 0.2212        | 2.0   | 602  | 0.1159          | 0.8180    | 0.8556 | 0.8364 | 0.9715   |
-| 0.2212        | 3.0   | 903  | 0.1493          | 0.8260    | 0.8455 | 0.8356 | 0.9702   |
-| 0.0388        | 4.0   | 1204 | 0.1576          | 0.8244    | 0.8534 | 0.8387 | 0.9731   |
-| 0.0128        | 5.0   | 1505 | 0.1501          | 0.8383    | 0.8563 | 0.8472 | 0.9723   |
-| 0.0128        | 6.0   | 1806 | 0.1774          | 0.8640    | 0.8714 | 0.8677 | 0.9751   |
-| 0.0033        | 7.0   | 2107 | 0.1890          | 0.8563    | 0.8865 | 0.8712 | 0.9760   |
-| 0.0033        | 8.0   | 2408 | 0.2046          | 0.8668    | 0.8786 | 0.8726 | 0.9753   |
-| 0.0011        | 9.0   | 2709 | 0.1951          | 0.8636    | 0.8822 | 0.8728 | 0.9763   |
-| 0.0001        | 10.0  | 3010 | 0.1955          | 0.8622    | 0.8807 | 0.8714 | 0.9763   |
 ### Framework versions

 ---
 library_name: transformers
+license: apache-2.0
+base_model: cl-nagoya/ruri-v3-310m
 tags:
 - generated_from_trainer
 metrics:
 # results_sudachi
+This model is a fine-tuned version of [cl-nagoya/ruri-v3-310m](https://huggingface.co/cl-nagoya/ruri-v3-310m) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1287
+- Precision: 0.8679
+- Recall: 0.9007
+- F1: 0.8840
+- Accuracy: 0.9782
 ## Model description
 - seed: 42
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
+- num_epochs: 50
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
+| No log        | 1.0   | 301  | 0.0995          | 0.7768    | 0.85   | 0.8117 | 0.9715   |
+| 0.1284        | 2.0   | 602  | 0.1045          | 0.8453    | 0.8821 | 0.8633 | 0.9759   |
+| 0.1284        | 3.0   | 903  | 0.1262          | 0.8196    | 0.86   | 0.8393 | 0.9737   |
+| 0.0377        | 4.0   | 1204 | 0.1100          | 0.8401    | 0.8821 | 0.8606 | 0.9762   |
+| 0.0164        | 5.0   | 1505 | 0.1068          | 0.8539    | 0.8807 | 0.8671 | 0.9783   |
+| 0.0164        | 6.0   | 1806 | 0.1194          | 0.8568    | 0.885  | 0.8707 | 0.9776   |
+| 0.0086        | 7.0   | 2107 | 0.1287          | 0.8679    | 0.9007 | 0.8840 | 0.9782   |
+| 0.0086        | 8.0   | 2408 | 0.1278          | 0.8506    | 0.895  | 0.8723 | 0.9767   |
+| 0.0069        | 9.0   | 2709 | 0.1338          | 0.8629    | 0.8721 | 0.8675 | 0.9770   |
+| 0.0047        | 10.0  | 3010 | 0.1415          | 0.8618    | 0.8864 | 0.8739 | 0.9779   |
 ### Framework versions

config.json CHANGED Viewed

@@ -18,7 +18,7 @@
   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
-  "hidden_size": 512,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
@@ -40,7 +40,7 @@
   },
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
-  "intermediate_size": 2048,
   "label2id": {
     "LABEL_0": 0,
     "LABEL_1": 1,
@@ -69,8 +69,8 @@
   "model_type": "modernbert",
   "norm_bias": false,
   "norm_eps": 1e-05,
-  "num_attention_heads": 8,
-  "num_hidden_layers": 19,
   "pad_token_id": 3,
   "position_embedding_type": "rope",
   "repad_logits_with_grad": false,

   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
+  "hidden_size": 768,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
   },
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
+  "intermediate_size": 3072,
   "label2id": {
     "LABEL_0": 0,
     "LABEL_1": 1,
   "model_type": "modernbert",
   "norm_bias": false,
   "norm_eps": 1e-05,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 25,
   "pad_token_id": 3,
   "position_embedding_type": "rope",
   "repad_logits_with_grad": false,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf3b7a4ceb256c18001736c171c69df727cff5e005a56ad2151d6168cd26bf5e
-size 529659996

 version https://git-lfs.github.com/spec/v1
+oid sha256:25a3819dfbe35e3d337f8dcf0683485ee834670d0580a7de2a3504dab9245215
+size 1260878660

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d6f3b3e26529db2c6256bc8b1251a3523029042e7143fe1290172a7f15c1b58
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d07a6df5b656599fe3d13f9a378fa8b124b785d70e1cfe617ee0498c81fe15a
 size 5304