| { | |
| "algorithm": { | |
| "id": 9, | |
| "name": "BERT", | |
| "url": "https://github.com/google-research/bert", | |
| "version": null | |
| }, | |
| "contents": [ | |
| { | |
| "filename": "config.json", | |
| "format": "json" | |
| }, | |
| { | |
| "filename": "tokenizer_config.json", | |
| "format": "json" | |
| }, | |
| { | |
| "filename": "model.ckpt.index", | |
| "format": "data" | |
| }, | |
| { | |
| "filename": "model.ckpt.data-00000-of-00001", | |
| "format": "data" | |
| }, | |
| { | |
| "filename": "meta.json", | |
| "format": "json" | |
| }, | |
| { | |
| "filename": "vocab.txt", | |
| "format": "text" | |
| }, | |
| { | |
| "filename": "pytorch_model.bin", | |
| "format": "data" | |
| }, | |
| { | |
| "filename": "tf_model.h5", | |
| "format": "data" | |
| } | |
| ], | |
| "corpus": [ | |
| { | |
| "NER": false, | |
| "case preserved": true, | |
| "description": "Norwegian Colossal Corpus (NCC)", | |
| "id": 126, | |
| "language": "nor", | |
| "lemmatized": false, | |
| "public": true, | |
| "stop words removal": null, | |
| "tagger": "null", | |
| "tagset": null, | |
| "tokens": 5000000000, | |
| "tool": null, | |
| "url": "https://huggingface.co/datasets/NbAiLab/NCC" | |
| }, | |
| { | |
| "NER": false, | |
| "case preserved": true, | |
| "description": "C4 Web Corpus", | |
| "id": 127, | |
| "language": "nor", | |
| "lemmatized": false, | |
| "public": true, | |
| "stop words removal": null, | |
| "tagger": null, | |
| "tagset": null, | |
| "tokens": 9500000000, | |
| "tool": "https://github.com/allenai/allennlp/discussions/5265", | |
| "url": "https://aclanthology.org/2021.naacl-main.41/" | |
| } | |
| ], | |
| "creators": [ | |
| { | |
| "email": "andreku@ifi.uio.no", | |
| "name": "Andrey Kutuzov" | |
| } | |
| ], | |
| "dimensions": 768, | |
| "documentation": "http://norlm.nlpl.eu", | |
| "external_id": "Cased Norwegian BERT Base 2.0 (NorBERT 2)", | |
| "handle": "http://vectors.nlpl.eu/repository/20/221.zip", | |
| "id": 221, | |
| "iterations": 3 | |
| } |