back_rag_huggingface / model_data_json /Angelakeke_RaTE-NER-Deberta.json
shayan5422's picture
Upload 3710 files
21cad66 verified
{
"model_id": "Angelakeke/RaTE-NER-Deberta",
"downloads": 47763,
"tags": [
"transformers",
"safetensors",
"deberta-v2",
"token-classification",
"medical",
"radiology",
"en",
"license:mit",
"autotrain_compatible",
"endpoints_compatible",
"region:us"
],
"description": "--- license: mit language: - en tags: - medical - radiology model-index: - name: rate-ner-rad results: [] pipeline_tag: token-classification widget: - text: No suspicious focal mass lesion is seen in the left kidney. example_title: Example in radiopaedia --- # RaTE-NER-Deberta This model is a fine-tuned version of DeBERTa on the RaTE-NER dataset. ## Model description This model is trained to serve the RaTEScore metric, if you are interested in our pipeline, please refer to our paper and Github. This model also can be used to extract **Abnormality, Non-Abnormality, Anatomy, Disease, Non-Disease** in medical radiology reports. ## Usage <details> <summary> Click to expand the usage of this model. </summary> <pre><code> from transformers import AutoTokenizer, AutoModelForTokenClassification import torch def post_process(tokenized_text, predicted_entities, tokenizer): entity_spans = [] start = end = None entity_type = None for i, (token, label) in enumerate(zip(tokenized_text, predicted_entities[:len(tokenized_text)])): if token in [\"[CLS]\", \"[SEP]\"]: continue if label != \"O\" and i < len(predicted_entities) - 1: if label.startswith(\"B-\") and predicted_entities[i+1].startswith(\"I-\"): start = i entity_type = label[2:] elif label.startswith(\"B-\") and predicted_entities[i+1].startswith(\"B-\"): start = i end = i entity_spans.append((start, end, label[2:])) start = i entity_type = label[2:] elif label.startswith(\"B-\") and predicted_entities[i+1].startswith(\"O\"): start = i end = i entity_spans.append((start, end, label[2:])) start = end = None entity_type = None elif label.startswith(\"I-\") and predicted_entities[i+1].startswith(\"B-\"): end = i if start is not None: entity_spans.append((start, end, entity_type)) start = i entity_type = label[2:] elif label.startswith(\"I-\") and predicted_entities[i+1].startswith(\"O\"): end = i if start is not None: entity_spans.append((start, end, entity_type)) start = end = None entity_type = None if start is not None and end is None: end = len(tokenized_text) - 2 entity_spans.append((start, end, entity_type)) save_pair = [] for start, end, entity_type in entity_spans: entity_str = tokenizer.convert_tokens_to_string(tokenized_text[start:end+1]) save_pair.append((entity_str, entity_type)) return save_pair def run_ner(texts, idx2label, tokenizer, model, device): inputs = tokenizer(texts, max_length=512, padding=True, truncation=True, return_tensors=\"pt\").to(device) with torch.no_grad(): outputs = model(**inputs) predicted_labels = torch.argmax(outputs.logits, dim=2).tolist() save_pairs = [] for i in range(len(texts)): predicted_entities = [idx2label[label] for label in predicted_labels[i]] non_pad_mask = inputs[\"input_ids\"][i] != tokenizer.pad_token_id non_pad_length = non_pad_mask.sum().item() non_pad_input_ids = inputs[\"input_ids\"][i][:non_pad_length] tokenized_text = tokenizer.convert_ids_to_tokens(non_pad_input_ids) save_pair = post_process(tokenized_text, predicted_entities, tokenizer) if i == 0: save_pairs = save_pair else: save_pairs.extend(save_pair) return save_pairs ner_labels = ['B-ABNORMALITY', 'I-ABNORMALITY', 'B-NON-ABNORMALITY', 'I-NON-ABNORMALITY', 'B-DISEASE', 'I-DISEASE', 'B-NON-DISEASE', 'I-NON-DISEASE', 'B-ANATOMY', 'I-ANATOMY', 'O'] idx2label = {i: label for i, label in enumerate(ner_labels)} tokenizer = AutoTokenizer.from_pretrained('Angelakeke/RaTE-NER-Deberta') model = AutoModelForTokenClassification.from_pretrained('Angelakeke/RaTE-NER-Deberta') device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") model.to(device) model.eval() We recommend to inference by sentences. text = \"\" texts = text.split('. ') save_pair = run_ner(texts, idx2label, tokenizer, model, device) </code></pre> </details> ## Author Author: Weike Zhao If you have any questions, please feel free to contact zwk0629@sjtu.edu.cn. ## Citation",
"model_explanation_gemini": "Extracts medical entities (Abnormality, Non-Abnormality, Anatomy, Disease, Non-Disease) from radiology reports using a fine-tuned DeBERTa model trained on the RaTE-NER dataset. \n\n**Features**: \n- Token classification for medical NER \n- Supports entity types: Abnormality, Non-Abnormality, Anatomy, Disease, Non-Disease \n- Optimized for radiology reports \n- Built on DeBERTa architecture \n\n**Comparison**:",
"release_year": null,
"parameter_count": null,
"is_fine_tuned": false,
"category": "Named Entity Recognition",
"model_family": "BERT",
"api_enhanced": true
}