Kelvinmbewe
/

mbert_LusakaLang_MultiTask

Text Classification

sentiment-analysis

topic-classification

language-identification

Eval Results (legacy)

Model card Files Files and versions

Kelvinmbewe commited on 20 days ago

Commit

bfe0f07

·

verified ·

1 Parent(s): cf76437

Initial upload of LusakaLang MultiTask model

Files changed (4) hide show

config.json +21 -11
pytorch_model.bin +3 -0
special_tokens_map.json +5 -35
tokenizer_config.json +0 -7

config.json CHANGED Viewed

@@ -1,12 +1,22 @@
 {
-  "architectures": [
-    "LusakaLangMultiTaskModel"
-  ],
-  "dtype": "float32",
-  "hidden_size": 768,
-  "lang_model_name": "Kelvinmbewe/mbert_Lusaka_Language_Analysis",
-  "model_type": "lusakalang_multitask",
-  "sent_model_name": "Kelvinmbewe/mbert_LusakaLang_Sentiment_Analysis",
-  "topic_model_name": "Kelvinmbewe/mbert_LusakaLang_Topic",
-  "transformers_version": "4.57.3"
-}

 {
+    "model_type": "bert-multitask",
+    "base_model": "google-bert/bert-base-multilingual-cased",
+    "language_labels": {
+        "0": "bemba",
+        "1": "nyanja",
+        "2": "english",
+        "3": "unknown"
+    },
+    "sentiment_labels": {
+        "0": "negative",
+        "1": "neutral",
+        "2": "positive"
+    },
+    "topic_labels": {
+        "0": "customer_support",
+        "1": "driver_behaviour",
+        "2": "others",
+        "3": "payment_issues"
+    },
+    "hidden_size": 768
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bbdb9b2baef6a059af427e2fe697babc47bdf43116ee609c20392bb09b83215
+size 711531710

special_tokens_map.json CHANGED Viewed

@@ -1,37 +1,7 @@
 {
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "[MASK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
 }

tokenizer_config.json CHANGED Viewed

@@ -46,18 +46,11 @@
   "do_lower_case": false,
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
-  "max_length": 128,
   "model_max_length": 512,
-  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
-  "pad_token_type_id": 0,
-  "padding_side": "right",
   "sep_token": "[SEP]",
-  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }

   "do_lower_case": false,
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }