Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import transformers
|
| 3 |
-
from transformers import pipeline, TokenClassificationPipeline, BertForTokenClassification , AutoTokenizer
|
| 4 |
|
| 5 |
x = st.text_area('enter')
|
| 6 |
|
| 7 |
#model.to("cpu")
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1", truncation = True, padding=True, model_max_length=512,)
|
| 9 |
model_checkpoint = BertForTokenClassification.from_pretrained("dexay/Ner2HgF", )
|
|
|
|
|
|
|
|
|
|
| 10 |
token_classifier = pipeline("token-classification", tokenizer = tokenizer,model=model_checkpoint, )
|
| 11 |
|
| 12 |
|
|
@@ -140,9 +143,71 @@ for itsent in az:
|
|
| 140 |
|
| 141 |
#lstSentEnc,lstSentEnt,lstSentbilbl
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
if x:
|
| 144 |
out = token_classifier(x)
|
| 145 |
-
st.markdown(
|
| 146 |
|
| 147 |
|
| 148 |
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import transformers
|
| 3 |
+
from transformers import pipeline, TokenClassificationPipeline, BertForTokenClassification , AutoTokenizer , TextClassificationPipeline , AutoModelForSequenceClassification
|
| 4 |
|
| 5 |
x = st.text_area('enter')
|
| 6 |
|
| 7 |
#model.to("cpu")
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1", truncation = True, padding=True, model_max_length=512,)
|
| 9 |
model_checkpoint = BertForTokenClassification.from_pretrained("dexay/Ner2HgF", )
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
model_re = AutoModelForSequenceClassification.from_pretrained("dexay/reDs3others", truncation = True, padding=True, model_max_length=512,)
|
| 13 |
token_classifier = pipeline("token-classification", tokenizer = tokenizer,model=model_checkpoint, )
|
| 14 |
|
| 15 |
|
|
|
|
| 143 |
|
| 144 |
#lstSentEnc,lstSentEnt,lstSentbilbl
|
| 145 |
|
| 146 |
+
# Relation extraction part
|
| 147 |
+
|
| 148 |
+
token_classifier = pipeline("text-classification", tokenizer = tokenizer,model=model_re,
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
rrdata = lstSentEnc
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
outre = token_classifier(rrdata)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
trLABELS = ['INCREASE_RISK(e1,e2)',
|
| 159 |
+
'SPEED_UP(e2,e1)',
|
| 160 |
+
'DECREASE_ACTIVITY(e1,e2)',
|
| 161 |
+
'NO_ASSOCIATION(e1,e2)',
|
| 162 |
+
'DECREASE(e1,e2)',
|
| 163 |
+
'BLOCK(e1,e2)',
|
| 164 |
+
'CAUSE(e1,e2)',
|
| 165 |
+
'ACTIVATE(e2,e1)',
|
| 166 |
+
'DEVELOP(e2,e1)',
|
| 167 |
+
'ALTER(e1,e2)',
|
| 168 |
+
'INCREASE_RISK(e2,e1)',
|
| 169 |
+
'SPEED_UP(e1,e2)',
|
| 170 |
+
'INTERFER(e1,e2)',
|
| 171 |
+
'DECREASE(e2,e1)',
|
| 172 |
+
'NO_ASSOCIATION(e2,e1)',
|
| 173 |
+
'INCREASE(e2,e1)',
|
| 174 |
+
'INTERFER(e2,e1)',
|
| 175 |
+
'ACTIVATE(e1,e2)',
|
| 176 |
+
'INCREASE(e1,e2)',
|
| 177 |
+
'MIMIC(e1,e2)',
|
| 178 |
+
'MIMIC(e2,e1)',
|
| 179 |
+
'BLOCK(e2,e1)',
|
| 180 |
+
'other',
|
| 181 |
+
'BIND(e2,e1)',
|
| 182 |
+
'INCREASE_ACTIVITY(e2,e1)',
|
| 183 |
+
'ALTER(e2,e1)',
|
| 184 |
+
'CAUSE(e2,e1)',
|
| 185 |
+
'BIND(e1,e2)',
|
| 186 |
+
'DEVELOP(e1,e2)',
|
| 187 |
+
'DECREASE_ACTIVITY(e2,e1)']
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
outrelbl = []
|
| 192 |
+
for e in outre:
|
| 193 |
+
outrelbl += [trLABELS[int(e['label'][-1])] if len(e["label"])==7 else trLABELS[int(e['label'][-2:])] ]
|
| 194 |
+
|
| 195 |
+
for i in range(len(outrelbl)):
|
| 196 |
+
if "(e2,e1)" in outrelbl[i]:
|
| 197 |
+
lstSentbilbl[i][0],lstSentbilbl[i][1] = lstSentbilbl[i][1],lstSentbilbl[i][0]
|
| 198 |
+
lstSentEnt[i][0],lstSentEnt[i][1] = lstSentEnt[i][1],lstSentEnt[i][0]
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
edccan = []
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
for i in range(len(outrelbl)):
|
| 205 |
+
if outrelbl[i]== "other":
|
| 206 |
+
edccan += [[lstSentEnc[i],lstSentEnt[i][0], lstSentEnt[i][1],lstSentbilbl[i][0]+" "+outrelbl[i][:-7]+" "+lstSentbilbl[i][1]]]
|
| 207 |
+
|
| 208 |
if x:
|
| 209 |
out = token_classifier(x)
|
| 210 |
+
st.markdown(edccan)
|
| 211 |
|
| 212 |
|
| 213 |
|