Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,10 @@ import pandas as pd
|
|
| 3 |
import transformers
|
| 4 |
from transformers import pipeline, TokenClassificationPipeline, BertForTokenClassification , AutoTokenizer , TextClassificationPipeline , AutoModelForSequenceClassification
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
#model.to("cpu")
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1", truncation = True, padding=True, model_max_length=512,)
|
|
@@ -13,6 +16,7 @@ model_checkpoint = BertForTokenClassification.from_pretrained("dexay/Ner2HgF", )
|
|
| 13 |
model_re = AutoModelForSequenceClassification.from_pretrained("dexay/reDs3others", )
|
| 14 |
token_classifier = pipeline("token-classification", tokenizer = tokenizer,model=model_checkpoint, )
|
| 15 |
|
|
|
|
| 16 |
|
| 17 |
biotext = x
|
| 18 |
|
|
@@ -144,6 +148,9 @@ for itsent in az:
|
|
| 144 |
|
| 145 |
#lstSentEnc,lstSentEnt,lstSentbilbl
|
| 146 |
|
|
|
|
|
|
|
|
|
|
| 147 |
# Relation extraction part
|
| 148 |
|
| 149 |
token_classifier = pipeline("text-classification", tokenizer = tokenizer,model=model_re,
|
|
@@ -203,7 +210,7 @@ edccan = []
|
|
| 203 |
|
| 204 |
|
| 205 |
for i in range(len(outrelbl)):
|
| 206 |
-
if outrelbl[i]=
|
| 207 |
edccan += [[lstSentEnc[i],lstSentEnt[i][0], lstSentEnt[i][1],lstSentbilbl[i][0]+" "+outrelbl[i][:-7]+" "+lstSentbilbl[i][1]]]
|
| 208 |
|
| 209 |
edccandf = pd.DataFrame(edccan, columns= ["Sentence", "Entity 1", "Entity 2", "Relation"] )
|
|
|
|
| 3 |
import transformers
|
| 4 |
from transformers import pipeline, TokenClassificationPipeline, BertForTokenClassification , AutoTokenizer , TextClassificationPipeline , AutoModelForSequenceClassification
|
| 5 |
|
| 6 |
+
st.header("Knowledge extraction on Endocrine disruptors")
|
| 7 |
+
st.text("This tool lets you extract relation triples concerning interactions between: endocrine disrupting chemicals, hormones, receptors and cancers.")
|
| 8 |
+
st.text("It is the result of an end of studies project within ESI school and dedicated to biomedical researchers looking to extract precise information about the subject without digging into long publications.")
|
| 9 |
+
x = st.text_area('Entre you text on EDCs:')
|
| 10 |
|
| 11 |
#model.to("cpu")
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1", truncation = True, padding=True, model_max_length=512,)
|
|
|
|
| 16 |
model_re = AutoModelForSequenceClassification.from_pretrained("dexay/reDs3others", )
|
| 17 |
token_classifier = pipeline("token-classification", tokenizer = tokenizer,model=model_checkpoint, )
|
| 18 |
|
| 19 |
+
st.text("Knowledge extraction is in progress ...")
|
| 20 |
|
| 21 |
biotext = x
|
| 22 |
|
|
|
|
| 148 |
|
| 149 |
#lstSentEnc,lstSentEnt,lstSentbilbl
|
| 150 |
|
| 151 |
+
st.text("Entities detected, Next: Relation detection ...")
|
| 152 |
+
|
| 153 |
+
|
| 154 |
# Relation extraction part
|
| 155 |
|
| 156 |
token_classifier = pipeline("text-classification", tokenizer = tokenizer,model=model_re,
|
|
|
|
| 210 |
|
| 211 |
|
| 212 |
for i in range(len(outrelbl)):
|
| 213 |
+
if outrelbl[i] != "other":
|
| 214 |
edccan += [[lstSentEnc[i],lstSentEnt[i][0], lstSentEnt[i][1],lstSentbilbl[i][0]+" "+outrelbl[i][:-7]+" "+lstSentbilbl[i][1]]]
|
| 215 |
|
| 216 |
edccandf = pd.DataFrame(edccan, columns= ["Sentence", "Entity 1", "Entity 2", "Relation"] )
|