Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

label_mappings.json +76 -0
model.safetensors +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +56 -0
training_args.bin +3 -0
training_config.json +12 -0
vocab.txt +0 -0

label_mappings.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "tag2id": {
+    "B-amount": 0,
+    "B-date": 1,
+    "B-description": 2,
+    "B-indicator_amount": 3,
+    "B-indicator_date": 4,
+    "B-indicator_description": 5,
+    "B-indicator_receiver_hr": 6,
+    "B-indicator_receiver_inn": 7,
+    "B-indicator_receiver_name": 8,
+    "B-indicator_transaction_code": 9,
+    "B-receiver_hr": 10,
+    "B-receiver_inn": 11,
+    "B-receiver_name": 12,
+    "B-transaction_code": 13,
+    "I-amount": 14,
+    "I-description": 15,
+    "I-indicator_amount": 16,
+    "I-indicator_date": 17,
+    "I-indicator_description": 18,
+    "I-indicator_receiver_hr": 19,
+    "I-indicator_receiver_inn": 20,
+    "I-indicator_receiver_name": 21,
+    "I-indicator_transaction_code": 22,
+    "I-receiver_name": 23,
+    "O": 24
+  },
+  "id2tag": {
+    "0": "B-amount",
+    "1": "B-date",
+    "2": "B-description",
+    "3": "B-indicator_amount",
+    "4": "B-indicator_date",
+    "5": "B-indicator_description",
+    "6": "B-indicator_receiver_hr",
+    "7": "B-indicator_receiver_inn",
+    "8": "B-indicator_receiver_name",
+    "9": "B-indicator_transaction_code",
+    "10": "B-receiver_hr",
+    "11": "B-receiver_inn",
+    "12": "B-receiver_name",
+    "13": "B-transaction_code",
+    "14": "I-amount",
+    "15": "I-description",
+    "16": "I-indicator_amount",
+    "17": "I-indicator_date",
+    "18": "I-indicator_description",
+    "19": "I-indicator_receiver_hr",
+    "20": "I-indicator_receiver_inn",
+    "21": "I-indicator_receiver_name",
+    "22": "I-indicator_transaction_code",
+    "23": "I-receiver_name",
+    "24": "O"
+  },
+  "intent2id": {
+    "create_transaction": 0
+  },
+  "id2intent": {
+    "0": "create_transaction"
+  },
+  "lang2id": {
+    "en": 0,
+    "mixed": 1,
+    "ru": 2,
+    "uz_cyrl": 3,
+    "uz_latn": 4
+  },
+  "id2lang": {
+    "0": "en",
+    "1": "mixed",
+    "2": "ru",
+    "3": "uz_cyrl",
+    "4": "uz_latn"
+  }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a2b3b9bdeae2492d396b1ab6c50873ab8ec1c35294ae89525b935bffcc904bc
+size 671307516

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91b56ea529dc82b6f5c780abb1941f688bd147268d902b7ef28b6b6763fe6c41
+size 5777

training_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "model_name": "google-bert/bert-base-multilingual-uncased",
+  "num_train_samples": 188336,
+  "num_val_samples": 23542,
+  "num_test_samples": 23543,
+  "num_epochs": 5,
+  "batch_size": 32,
+  "ner_f1": 0.9996063934364621,
+  "intent_f1": 1.0,
+  "lang_accuracy": 0.8373189483073525,
+  "avg_f1": 0.999803196718231
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff