Commit
·
84165a6
1
Parent(s):
43cad7d
Update README.md
Browse files
README.md
CHANGED
|
@@ -58,7 +58,7 @@ Lima-López, S., Farré-Maduell, E., Miranda-Escalada, A., Brivá-Iglesias, V.,
|
|
| 58 |
|
| 59 |
- **Developed by:** Alfredo Madrid
|
| 60 |
- **Language(s) (NLP):** Spanish
|
| 61 |
-
- **License:**
|
| 62 |
- **Finetuned from model [optional]:** PlanTL-GOB-ES/roberta-base-biomedical-es
|
| 63 |
|
| 64 |
### Model Sources
|
|
@@ -70,7 +70,105 @@ Lima-López, S., Farré-Maduell, E., Miranda-Escalada, A., Brivá-Iglesias, V.,
|
|
| 70 |
|
| 71 |
## Uses
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
[More Information Needed]
|
|
|
|
| 58 |
|
| 59 |
- **Developed by:** Alfredo Madrid
|
| 60 |
- **Language(s) (NLP):** Spanish
|
| 61 |
+
- **License:** CC BY-SA 4.0
|
| 62 |
- **Finetuned from model [optional]:** PlanTL-GOB-ES/roberta-base-biomedical-es
|
| 63 |
|
| 64 |
### Model Sources
|
|
|
|
| 70 |
|
| 71 |
## Uses
|
| 72 |
|
| 73 |
+
**Model 1**
|
| 74 |
+
|
| 75 |
+
```
|
| 76 |
+
import torch
|
| 77 |
+
import pandas as pd
|
| 78 |
+
import numpy as np
|
| 79 |
+
|
| 80 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
| 81 |
+
model = AutoModelForTokenClassification.from_pretrained("MEDDO_FINAL_ROBERTA_ner_sentencia_510_8_10_2e-05_1e-08")
|
| 82 |
+
tokenizer = AutoTokenizer.from_pretrained("MEDDO_FINAL_ROBERTA_ner_sentencia_510_8_10_2e-05_1e-08")
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
```
|
| 86 |
+
note = "El paciente trabaja en una empresa de construccion los jueves"
|
| 87 |
+
tokenized_sentence = tokenizer.encode(note, truncation=True)
|
| 88 |
+
tokenized_words_ids = tokenizer(note, truncation=True)
|
| 89 |
+
word_ids = tokenized_words_ids.word_ids
|
| 90 |
+
input_ids = torch.tensor([tokenized_sentence])
|
| 91 |
+
with torch.no_grad():
|
| 92 |
+
output = model(input_ids)
|
| 93 |
+
label_indices = np.argmax(output[0].to('cpu').numpy(), axis=2)
|
| 94 |
+
tokens = tokenizer.convert_ids_to_tokens(input_ids.numpy()[0])
|
| 95 |
+
label_indices
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
```
|
| 99 |
+
df = pd.DataFrame(zip(tokens, label_indices[0], word_ids(0)), columns=["labels", "tokens", "relation"])
|
| 100 |
+
df['labels'] = df['labels'].str.replace('##', '')
|
| 101 |
+
df['tokens'] = df['tokens'].map({0: 'B-PROFESION', 1: 'B-SITUACION_LABORAL', 2: 'I-SITUACION_LABORAL', 3: 'I-ACTIVIDAD', 4: 'I-PROFESION', 5: 'O', 6: 'B-ACTIVIDAD', 7: 'PAD'})
|
| 102 |
+
df = df[1:-1]
|
| 103 |
+
df['relation'] = df['relation'].astype('int')
|
| 104 |
+
df['labels'] = df.groupby('relation')['labels'].transform(lambda x: ''.join(x))
|
| 105 |
+
df = df.groupby('relation').first()
|
| 106 |
+
df
|
| 107 |
+
```
|
| 108 |
+
**Output**
|
| 109 |
+
| relation | labels | tokens |
|
| 110 |
+
|:--------:|:-------------:|:-----------:|
|
| 111 |
+
| 0 | ĠEl | O |
|
| 112 |
+
| 1 | Ġpaciente | O |
|
| 113 |
+
| 2 | Ġtrabaja | B-PROFESION |
|
| 114 |
+
| 3 | Ġen | I-PROFESION |
|
| 115 |
+
| 4 | Ġuna | I-PROFESION |
|
| 116 |
+
| 5 | Ġempresa | I-PROFESION |
|
| 117 |
+
| 6 | Ġde | I-PROFESION |
|
| 118 |
+
| 7 | Ġconstruccion | I-PROFESION |
|
| 119 |
+
| 8 | Ġlos | O |
|
| 120 |
+
| 9 | Ġjueves | O |
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
**Model 2**
|
| 124 |
+
```
|
| 125 |
+
import torch
|
| 126 |
+
import pandas as pd
|
| 127 |
+
import numpy as np
|
| 128 |
+
|
| 129 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
| 130 |
+
model = AutoModelForTokenClassification.from_pretrained("MEDDO_FINAL_ROBERTA_class_sentencia_510_8_10_2e-05_1e-08")
|
| 131 |
+
tokenizer = AutoTokenizer.from_pretrained("MEDDO_FINAL_ROBERTA_class_sentencia_510_8_10_2e-05_1e-08")
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
```
|
| 135 |
+
note = "El paciente trabaja en una empresa de construccion los jueves"
|
| 136 |
+
tokenized_sentence = tokenizer.encode(note, truncation=True)
|
| 137 |
+
tokenized_words_ids = tokenizer(note, truncation=True)
|
| 138 |
+
word_ids = tokenized_words_ids.word_ids
|
| 139 |
+
input_ids = torch.tensor([tokenized_sentence])
|
| 140 |
+
with torch.no_grad():
|
| 141 |
+
output = model(input_ids)
|
| 142 |
+
label_indices = np.argmax(output[0].to('cpu').numpy(), axis=2)
|
| 143 |
+
tokens = tokenizer.convert_ids_to_tokens(input_ids.to('cpu').numpy()[0])
|
| 144 |
+
label_indices
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
```
|
| 148 |
+
df = pd.DataFrame(zip(tokens, label_indices[0], word_ids(0)), columns=["labels", "tokens", "relation"])
|
| 149 |
+
df['labels'] = df['labels'].str.replace('##', '')
|
| 150 |
+
df['tokens'] = df['tokens'].map({0: 'B-FAMILIAR', 1: 'I-PACIENTE', 2: 'I-OTROS', 3: 'B-SANITARIO', 4: 'B-PACIENTE', 5: 'I-FAMILIAR', 6: 'O', 7: 'B-OTROS', 8: 'I-SANITARIO', 9: 'PAD'}
|
| 151 |
+
)
|
| 152 |
+
df = df[1:-1]
|
| 153 |
+
df['relation'] = df['relation'].astype('int')
|
| 154 |
+
df['labels'] = df.groupby('relation')['labels'].transform(lambda x: ''.join(x))
|
| 155 |
+
df = df.groupby('relation').first()
|
| 156 |
+
df
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
**Output**
|
| 160 |
+
|
| 161 |
+
| relation | labels | tokens |
|
| 162 |
+
|:--------:|:-------------:|:-----------:|
|
| 163 |
+
| 0 | ĠEl | O |
|
| 164 |
+
| 1 | Ġpaciente | O |
|
| 165 |
+
| 2 | Ġtrabaja | B-PACIENTE |
|
| 166 |
+
| 3 | Ġen | I-PACIENTE |
|
| 167 |
+
| 4 | Ġuna | I-PACIENTE |
|
| 168 |
+
| 5 | Ġempresa | I-PACIENTE |
|
| 169 |
+
| 6 | Ġde | I-PACIENTE |
|
| 170 |
+
| 7 | Ġconstruccion | I-PACIENTE |
|
| 171 |
+
| 8 | Ġlos | O |
|
| 172 |
+
| 9 | Ġjueves | O |
|
| 173 |
|
| 174 |
[More Information Needed]
|