| { | |
| "bomFormat": "CycloneDX", | |
| "specVersion": "1.6", | |
| "serialNumber": "urn:uuid:2d71328a-f87c-48dc-9f4e-ba83929b1cb0", | |
| "version": 1, | |
| "metadata": { | |
| "timestamp": "2025-06-05T09:39:43.530276+00:00", | |
| "component": { | |
| "type": "machine-learning-model", | |
| "bom-ref": "dslim/bert-base-NER-40551b74-59a4-53a9-ae36-e1dca4f66e41", | |
| "name": "dslim/bert-base-NER", | |
| "externalReferences": [ | |
| { | |
| "url": "https://huggingface.co/dslim/bert-base-NER", | |
| "type": "documentation" | |
| } | |
| ], | |
| "modelCard": { | |
| "modelParameters": { | |
| "task": "token-classification", | |
| "architectureFamily": "bert", | |
| "modelArchitecture": "BertForTokenClassification", | |
| "datasets": [ | |
| { | |
| "ref": "conll2003-be67a053-25af-52ad-93c8-134501f8fa4b" | |
| } | |
| ] | |
| }, | |
| "properties": [ | |
| { | |
| "name": "library_name", | |
| "value": "transformers" | |
| } | |
| ], | |
| "quantitativeAnalysis": { | |
| "performanceMetrics": [ | |
| { | |
| "slice": "dataset: conll2003, split: test, config: conll2003", | |
| "type": "accuracy", | |
| "value": 0.9118041001560013 | |
| }, | |
| { | |
| "slice": "dataset: conll2003, split: test, config: conll2003", | |
| "type": "precision", | |
| "value": 0.9211550382257732 | |
| }, | |
| { | |
| "slice": "dataset: conll2003, split: test, config: conll2003", | |
| "type": "recall", | |
| "value": 0.9306415698281261 | |
| }, | |
| { | |
| "slice": "dataset: conll2003, split: test, config: conll2003", | |
| "type": "f1", | |
| "value": 0.9258740048459675 | |
| }, | |
| { | |
| "slice": "dataset: conll2003, split: test, config: conll2003", | |
| "type": "loss", | |
| "value": 0.48325642943382263 | |
| } | |
| ] | |
| } | |
| }, | |
| "authors": [ | |
| { | |
| "name": "dslim" | |
| } | |
| ], | |
| "licenses": [ | |
| { | |
| "license": { | |
| "id": "MIT", | |
| "url": "https://spdx.org/licenses/MIT.html" | |
| } | |
| } | |
| ], | |
| "description": "**bert-base-NER** is a fine-tuned BERT model that is ready to use for **Named Entity Recognition** and achieves **state-of-the-art performance** for the NER task. It has been trained to recognize four types of entities: location (LOC), organizations (ORG), person (PER) and Miscellaneous (MISC).Specifically, this model is a *bert-base-cased* model that was fine-tuned on the English version of the standard [CoNLL-2003 Named Entity Recognition](https://www.aclweb.org/anthology/W03-0419.pdf) dataset.If you'd like to use a larger BERT-large model fine-tuned on the same dataset, a [**bert-large-NER**](https://huggingface.co/dslim/bert-large-NER/) version is also available.", | |
| "tags": [ | |
| "transformers", | |
| "pytorch", | |
| "tf", | |
| "jax", | |
| "onnx", | |
| "safetensors", | |
| "bert", | |
| "token-classification", | |
| "en", | |
| "dataset:conll2003", | |
| "arxiv:1810.04805", | |
| "license:mit", | |
| "model-index", | |
| "autotrain_compatible", | |
| "endpoints_compatible", | |
| "region:us" | |
| ] | |
| } | |
| }, | |
| "components": [ | |
| { | |
| "type": "data", | |
| "bom-ref": "conll2003-be67a053-25af-52ad-93c8-134501f8fa4b", | |
| "name": "conll2003", | |
| "data": [ | |
| { | |
| "type": "dataset", | |
| "bom-ref": "conll2003-be67a053-25af-52ad-93c8-134501f8fa4b", | |
| "name": "conll2003", | |
| "contents": { | |
| "url": "https://huggingface.co/datasets/conll2003", | |
| "properties": [ | |
| { | |
| "name": "task_categories", | |
| "value": "token-classification" | |
| }, | |
| { | |
| "name": "task_ids", | |
| "value": "named-entity-recognition, part-of-speech" | |
| }, | |
| { | |
| "name": "language", | |
| "value": "en" | |
| }, | |
| { | |
| "name": "size_categories", | |
| "value": "10K<n<100K" | |
| }, | |
| { | |
| "name": "annotations_creators", | |
| "value": "crowdsourced" | |
| }, | |
| { | |
| "name": "language_creators", | |
| "value": "found" | |
| }, | |
| { | |
| "name": "pretty_name", | |
| "value": "CoNLL-2003" | |
| }, | |
| { | |
| "name": "source_datasets", | |
| "value": "extended|other-reuters-corpus" | |
| }, | |
| { | |
| "name": "paperswithcode_id", | |
| "value": "conll-2003" | |
| }, | |
| { | |
| "name": "license", | |
| "value": "other" | |
| } | |
| ] | |
| }, | |
| "governance": { | |
| "owners": [ | |
| { | |
| "organization": { | |
| "name": "eriktks", | |
| "url": "https://huggingface.co/eriktks" | |
| } | |
| } | |
| ] | |
| }, | |
| "description": "The shared task of CoNLL-2003 concerns language-independent named entity recognition. We will concentrate on\nfour types of named entities: persons, locations, organizations and names of miscellaneous entities that do\nnot belong to the previous three groups.\n\nThe CoNLL-2003 shared task data files contain four columns separated by a single space. Each word has been put on\na separate line and there is an empty line after each sentence. The first item on each line is a word, the second\na part-of-speech (POS) tag, the third a syntactic chunk tag and the fourth the named entity tag. The chunk tags\nand the named entity tags have the format I-TYPE which means that the word is inside a phrase of type TYPE. Only\nif two phrases of the same type immediately follow each other, the first word of the second phrase will have tag\nB-TYPE to show that it starts a new phrase. A word with tag O is not part of a phrase. Note the dataset uses IOB2\ntagging scheme, whereas the original dataset uses IOB1.\n\nFor more details see https://www.clips.uantwerpen.be/conll2003/ner/ and https://www.aclweb.org/anthology/W03-0419" | |
| } | |
| ] | |
| } | |
| ] | |
| } |