Upload 11 files

Files changed (12) hide show

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+swa1_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text

swa1_model/README.Rmd ADDED Viewed

+---
+title: "README"
+author: "Kevine Grace"
+date: "2025-04-05"
+output: html_document
+---
+---
+language: multilingual
+tags:
+- pos-tagging
+- afro-xlmr
+- token-classification
+license: mit
+---
+# Afro-XLM-Roberta Mini POS Tagger
+This model was fine-tuned on part-of-speech tagging using the `Davlan/afro-xlmr-mini` model.
+It supports token classification for low-resource African languages.
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+tokenizer = AutoTokenizer.from_pretrained("your-username/afroxlmr-pos")
+model = AutoModelForTokenClassification.from_pretrained("your-username/afroxlmr-pos")
+We have fine tune the pretrained model with another architechture and we had obtained a higher precision , recall and F1 score. For the training and dev data we have split our own data into train and dev and we have use their test set to get the final performance.
+## Data Set
+Our own Data : https://github.com/hausanlp/HERDPhobia
+Their Data : https://github.com/masakhane-io/masakhane-pos

swa1_model/config.json ADDED Viewed

+{
+  "architectures": [
+    "XLMRobertaForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "id2label": {
+    "0": "X",
+    "1": "ADJ",
+    "10": "PART",
+    "11": "PRON",
+    "12": "PROPN",
+    "13": "PUNCT",
+    "14": "SCONJ",
+    "15": "SYM",
+    "16": "VERB",
+    "2": "ADP",
+    "3": "ADV",
+    "4": "AUX",
+    "5": "CCONJ",
+    "6": "DET",
+    "7": "INTJ",
+    "8": "NOUN",
+    "9": "NUM"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "label2id": {
+    "ADJ": 1,
+    "ADP": 2,
+    "ADV": 3,
+    "AUX": 4,
+    "CCONJ": 5,
+    "DET": 6,
+    "INTJ": 7,
+    "NOUN": 8,
+    "NUM": 9,
+    "PART": 10,
+    "PRON": 11,
+    "PROPN": 12,
+    "PUNCT": 13,
+    "SCONJ": 14,
+    "SYM": 15,
+    "VERB": 16,
+    "X": 0
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
+}

swa1_model/eval_results.txt ADDED Viewed

+f1 = 0.9117867305676202
+loss = 1.2744013667106628
+precision = 0.9149383829275624
+recall = 0.9086567164179105

swa1_model/model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ddd2cf63282268d2b8eda52fad6bedb2e026388954ead827c97611a1163642f
+size 470021564

swa1_model/sentencepiece.bpe.model ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

swa1_model/special_tokens_map.json ADDED Viewed

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

swa1_model/test_predictions.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

swa1_model/test_results.txt ADDED Viewed

+f1 = 0.9227457031109836
+loss = 1.2530506451924641
+precision = 0.9243045072244986
+recall = 0.921192147872188

swa1_model/tokenizer.json ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734

swa1_model/tokenizer_config.json ADDED Viewed

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
+}

swa1_model/training_args.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:10825191ad43743c4e1dd1c758b41c663be2cb10428b3d65f0badbc95c5b13ff
+size 1976