File size: 8,325 Bytes
e58bdce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
{
"bomFormat": "CycloneDX",
"specVersion": "1.6",
"serialNumber": "urn:uuid:2d71328a-f87c-48dc-9f4e-ba83929b1cb0",
"version": 1,
"metadata": {
"timestamp": "2025-06-05T09:39:43.530276+00:00",
"component": {
"type": "machine-learning-model",
"bom-ref": "dslim/bert-base-NER-40551b74-59a4-53a9-ae36-e1dca4f66e41",
"name": "dslim/bert-base-NER",
"externalReferences": [
{
"url": "https://huggingface.co/dslim/bert-base-NER",
"type": "documentation"
}
],
"modelCard": {
"modelParameters": {
"task": "token-classification",
"architectureFamily": "bert",
"modelArchitecture": "BertForTokenClassification",
"datasets": [
{
"ref": "conll2003-be67a053-25af-52ad-93c8-134501f8fa4b"
}
]
},
"properties": [
{
"name": "library_name",
"value": "transformers"
}
],
"quantitativeAnalysis": {
"performanceMetrics": [
{
"slice": "dataset: conll2003, split: test, config: conll2003",
"type": "accuracy",
"value": 0.9118041001560013
},
{
"slice": "dataset: conll2003, split: test, config: conll2003",
"type": "precision",
"value": 0.9211550382257732
},
{
"slice": "dataset: conll2003, split: test, config: conll2003",
"type": "recall",
"value": 0.9306415698281261
},
{
"slice": "dataset: conll2003, split: test, config: conll2003",
"type": "f1",
"value": 0.9258740048459675
},
{
"slice": "dataset: conll2003, split: test, config: conll2003",
"type": "loss",
"value": 0.48325642943382263
}
]
}
},
"authors": [
{
"name": "dslim"
}
],
"licenses": [
{
"license": {
"id": "MIT",
"url": "https://spdx.org/licenses/MIT.html"
}
}
],
"description": "**bert-base-NER** is a fine-tuned BERT model that is ready to use for **Named Entity Recognition** and achieves **state-of-the-art performance** for the NER task. It has been trained to recognize four types of entities: location (LOC), organizations (ORG), person (PER) and Miscellaneous (MISC).Specifically, this model is a *bert-base-cased* model that was fine-tuned on the English version of the standard [CoNLL-2003 Named Entity Recognition](https://www.aclweb.org/anthology/W03-0419.pdf) dataset.If you'd like to use a larger BERT-large model fine-tuned on the same dataset, a [**bert-large-NER**](https://huggingface.co/dslim/bert-large-NER/) version is also available.",
"tags": [
"transformers",
"pytorch",
"tf",
"jax",
"onnx",
"safetensors",
"bert",
"token-classification",
"en",
"dataset:conll2003",
"arxiv:1810.04805",
"license:mit",
"model-index",
"autotrain_compatible",
"endpoints_compatible",
"region:us"
]
}
},
"components": [
{
"type": "data",
"bom-ref": "conll2003-be67a053-25af-52ad-93c8-134501f8fa4b",
"name": "conll2003",
"data": [
{
"type": "dataset",
"bom-ref": "conll2003-be67a053-25af-52ad-93c8-134501f8fa4b",
"name": "conll2003",
"contents": {
"url": "https://huggingface.co/datasets/conll2003",
"properties": [
{
"name": "task_categories",
"value": "token-classification"
},
{
"name": "task_ids",
"value": "named-entity-recognition, part-of-speech"
},
{
"name": "language",
"value": "en"
},
{
"name": "size_categories",
"value": "10K<n<100K"
},
{
"name": "annotations_creators",
"value": "crowdsourced"
},
{
"name": "language_creators",
"value": "found"
},
{
"name": "pretty_name",
"value": "CoNLL-2003"
},
{
"name": "source_datasets",
"value": "extended|other-reuters-corpus"
},
{
"name": "paperswithcode_id",
"value": "conll-2003"
},
{
"name": "license",
"value": "other"
}
]
},
"governance": {
"owners": [
{
"organization": {
"name": "eriktks",
"url": "https://huggingface.co/eriktks"
}
}
]
},
"description": "The shared task of CoNLL-2003 concerns language-independent named entity recognition. We will concentrate on\nfour types of named entities: persons, locations, organizations and names of miscellaneous entities that do\nnot belong to the previous three groups.\n\nThe CoNLL-2003 shared task data files contain four columns separated by a single space. Each word has been put on\na separate line and there is an empty line after each sentence. The first item on each line is a word, the second\na part-of-speech (POS) tag, the third a syntactic chunk tag and the fourth the named entity tag. The chunk tags\nand the named entity tags have the format I-TYPE which means that the word is inside a phrase of type TYPE. Only\nif two phrases of the same type immediately follow each other, the first word of the second phrase will have tag\nB-TYPE to show that it starts a new phrase. A word with tag O is not part of a phrase. Note the dataset uses IOB2\ntagging scheme, whereas the original dataset uses IOB1.\n\nFor more details see https://www.clips.uantwerpen.be/conll2003/ner/ and https://www.aclweb.org/anthology/W03-0419"
}
]
}
]
} |