Upload 7 files

Browse files

Files changed (7) hide show

README +4 -0
config.json +36 -0
meta.json +86 -0
pytorch_model.bin +3 -0
tf_model.h5 +3 -0
tokenizer_config.json +3 -0
vocab.txt +0 -0

README ADDED Viewed

	@@ -0,0 +1,4 @@

+This archive is part of the NLPL Word Vectors Repository (http://vectors.nlpl.eu/repository/), version 2.0, published on Friday, December 27, 2019.
+Please see the file 'meta.json' in this archive and the overall repository metadata file http://vectors.nlpl.eu/repository/20.json for additional information.
+The life-time identifier for this model is:
+http://vectors.nlpl.eu/repository/20/221.zip

config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "ltg/norbert2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 50104
+}

meta.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+    "algorithm": {
+        "id": 9,
+        "name": "BERT",
+        "url": "https://github.com/google-research/bert",
+        "version": null
+    },
+    "contents": [
+        {
+            "filename": "config.json",
+            "format": "json"
+        },
+        {
+            "filename": "tokenizer_config.json",
+            "format": "json"
+        },
+        {
+            "filename": "model.ckpt.index",
+            "format": "data"
+        },
+        {
+            "filename": "model.ckpt.data-00000-of-00001",
+            "format": "data"
+        },
+        {
+            "filename": "meta.json",
+            "format": "json"
+        },
+        {
+            "filename": "vocab.txt",
+            "format": "text"
+        },
+        {
+            "filename": "pytorch_model.bin",
+            "format": "data"
+        },
+        {
+            "filename": "tf_model.h5",
+            "format": "data"
+        }
+    ],
+    "corpus": [
+        {
+            "NER": false,
+            "case preserved": true,
+            "description": "Norwegian Colossal Corpus (NCC)",
+            "id": 126,
+            "language": "nor",
+            "lemmatized": false,
+            "public": true,
+            "stop words removal": null,
+            "tagger": "null",
+            "tagset": null,
+            "tokens": 5000000000,
+            "tool": null,
+            "url": "https://huggingface.co/datasets/NbAiLab/NCC"
+        },
+        {
+            "NER": false,
+            "case preserved": true,
+            "description": "C4 Web Corpus",
+            "id": 127,
+            "language": "nor",
+            "lemmatized": false,
+            "public": true,
+            "stop words removal": null,
+            "tagger": null,
+            "tagset": null,
+            "tokens": 9500000000,
+            "tool": "https://github.com/allenai/allennlp/discussions/5265",
+            "url": "https://aclanthology.org/2021.naacl-main.41/"
+        }
+    ],
+    "creators": [
+        {
+            "email": "andreku@ifi.uio.no",
+            "name": "Andrey Kutuzov"
+        }
+    ],
+    "dimensions": 768,
+    "documentation": "http://norlm.nlpl.eu",
+    "external_id": "Cased Norwegian BERT Base 2.0 (NorBERT 2)",
+    "handle": "http://vectors.nlpl.eu/repository/20/221.zip",
+    "id": 221,
+    "iterations": 3
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adfd656074c1de762820156f2235545a10ab3807dc05c6faefa58cb85da8ac35
+size 498166901

tf_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8741bc2e9666330a411b6e2d1d59280e31c047d141b73f82976ee3d2445b7449
+size 498346776

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "do_lower_case": false
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff