Update

Browse files

Files changed (7) hide show

.gitignore +2 -1
README.txt → README.md +50 -5
config.json +41 -41
runs/Oct17_10-28-48_6a22a682a44e/1634466547.3715649/events.out.tfevents.1634466547.6a22a682a44e.3256.1 +0 -3
runs/Oct17_10-28-48_6a22a682a44e/events.out.tfevents.1634466547.6a22a682a44e.3256.0 +0 -3
runs/Oct17_10-28-48_6a22a682a44e/events.out.tfevents.1634468593.6a22a682a44e.3256.2 +0 -3
tokenizer_config.json +1 -1

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 checkpoint-*/
-run/

+.ipynb_checkpoints/
 checkpoint-*/
+runs/

README.txt → README.md RENAMED Viewed

@@ -3,16 +3,38 @@ language: ti
 widget:
 - text: "ድምጻዊ ኣብርሃም ኣፈወርቂ ንዘልኣለም ህያው ኮይኑ ኣብ ልብና ይነብር"
 datasets:
-- tiposd_sera.py
 model-index:
-- name: tipos-tiroberta
-  results: []
 ---
 # Tigrinya POS tagging with TiRoBERTa
-This model is a fine-tuned version of [TiRoBERTa](https://huggingface.co//content/tiroberta) on the NTC tiposd dataset.
 ## Training
@@ -29,7 +51,7 @@ The following hyperparameters were used during training:
 ### Results
-It achieves the following results on the evaluation set:
 - Loss: 0.3194
 - Adj Precision: 0.9219
 - Adj Recall: 0.9335
@@ -122,3 +144,26 @@ It achieves the following results on the evaluation set:
 - Pytorch 1.9.0+cu111
 - Datasets 1.13.3
 - Tokenizers 0.10.3

 widget:
 - text: "ድምጻዊ ኣብርሃም ኣፈወርቂ ንዘልኣለም ህያው ኮይኑ ኣብ ልብና ይነብር"
 datasets:
+- TLMD
+- NTC
+metrics:
+- f1
+- precision
+- recall
+- accuracy
 model-index:
+- name: tiroberta-base-pos
+  results:
+  - task:
+      name: Token Classification
+      type: token-classification
+    metrics:
+    - name: F1
+      type: f1
+      value: 0.9562
+    - name: Precision
+      type: precision
+      value: 0.9562
+    - name: Recall
+      type: recall
+      value: 0.9562
+    - name: Accuracy
+      type: accuracy
+      value: 0.9562
 ---
 # Tigrinya POS tagging with TiRoBERTa
+This model is a fine-tuned version of [TiRoBERTa](https://huggingface.co/fgaim/tiroberta) on the NTC-v1 dataset (Tedla et al. 2016).
 ## Training
 ### Results
+The model achieves the following results on the test set:
 - Loss: 0.3194
 - Adj Precision: 0.9219
 - Adj Recall: 0.9335
 - Pytorch 1.9.0+cu111
 - Datasets 1.13.3
 - Tokenizers 0.10.3
+## Citation
+If you use this model in your product or research, please cite as follows:
+```
+@article{Fitsum2021TiPLMs,
+  author={Fitsum Gaim and Wonsuk Yang and Jong C. Park},
+  title={Monolingual Pre-trained Language Models for Tigrinya},
+  year=2021,
+  publisher={WiNLP 2021/EMNLP 2021}
+}
+```
+## References
+```
+Tedla, Y., Yamamoto, K. & Marasinghe, A. 2016.
+Tigrinya Part-of-Speech Tagging with Morphological Patterns and the New Nagaoka Tigrinya Corpus.
+International Journal Of Computer Applications 146 pp. 33-41 (2016).
+```

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/content/tiroberta",
   "architectures": [
     "RobertaForTokenClassification"
   ],
@@ -12,50 +12,50 @@
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
-    "0": 0,
-    "1": 1,
-    "2": 2,
-    "3": 3,
-    "4": 4,
-    "5": 5,
-    "6": 6,
-    "7": 7,
-    "8": 8,
-    "9": 9,
-    "10": 10,
-    "11": 11,
-    "12": 12,
-    "13": 13,
-    "14": 14,
-    "15": 15,
-    "16": 16,
-    "17": 17,
-    "18": 18,
-    "19": 19
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
-    "0": 0,
-    "1": 1,
-    "2": 2,
-    "3": 3,
-    "4": 4,
-    "5": 5,
-    "6": 6,
-    "7": 7,
-    "8": 8,
-    "9": 9,
-    "10": 10,
-    "11": 11,
-    "12": 12,
-    "13": 13,
-    "14": 14,
-    "15": 15,
-    "16": 16,
-    "17": 17,
-    "18": 18,
-    "19": 19
   },
   "layer_norm_eps": 1e-05,
   "max_position_embeddings": 514,

 {
+  "_name_or_path": "tiroberta-pos",
   "architectures": [
     "RobertaForTokenClassification"
   ],
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
+    "0": "ADJ",
+    "1": "ADV",
+    "2": "CON",
+    "3": "FW",
+    "4": "INT",
+    "5": "N",
+    "6": "NUM",
+    "7": "N_PRP",
+    "8": "N_V",
+    "9": "PRE",
+    "10": "PRO",
+    "11": "PUN",
+    "12": "UNC",
+    "13": "V",
+    "14": "V_AUX",
+    "15": "V_GER",
+    "16": "V_IMF",
+    "17": "V_IMV",
+    "18": "V_PRF",
+    "19": "V_REL"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
+    "ADJ": 0,
+    "ADV": 1,
+    "CON": 2,
+    "FW": 3,
+    "INT": 4,
+    "N": 5,
+    "NUM": 6,
+    "N_PRP": 7,
+    "N_V": 8,
+    "PRE": 9,
+    "PRO": 10,
+    "PUN": 11,
+    "UNC": 12,
+    "V": 13,
+    "V_AUX": 14,
+    "V_GER": 15,
+    "V_IMF": 16,
+    "V_IMV": 17,
+    "V_PRF": 18,
+    "V_REL": 19
   },
   "layer_norm_eps": 1e-05,
   "max_position_embeddings": 514,

runs/Oct17_10-28-48_6a22a682a44e/1634466547.3715649/events.out.tfevents.1634466547.6a22a682a44e.3256.1 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ea18e60b3a0bd59e3b7369aba4da5ae92532aa2e241f973748f6b58ca8dc5667
-size 4528

runs/Oct17_10-28-48_6a22a682a44e/events.out.tfevents.1634466547.6a22a682a44e.3256.0 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c065648c5f56f8300b841f262506d12d772ab58cf0b9893fa612f9b4a7afb41f
-size 5559

runs/Oct17_10-28-48_6a22a682a44e/events.out.tfevents.1634468593.6a22a682a44e.3256.2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7723f601a06a5253ffa02a77503160dae1b3615fcbf2e2b1da5c8ed951986582
-size 4880

tokenizer_config.json CHANGED Viewed

@@ -1 +1 @@

- {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "special_tokens_map_file": null, "name_or_path": "/~~content/tiroberta~~", "tokenizer_class": "RobertaTokenizer"}

+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "special_tokens_map_file": null, "name_or_path": "./", "tokenizer_class": "RobertaTokenizer"}