import streamlit as st
import streamlit.components.v1 as components
import base64
import pandas as pd
import numpy as np
import os
import sys
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
from inference import *
from src.config.configs import Params
params = Params()
st.set_page_config(
page_title="Abstract Skimming Tool",
page_icon="🔍",
layout="wide",
initial_sidebar_state="expanded",
)
#### ADD BILSTM MODEL
MODEL_MAP = {"BiLSTM": "penta", "TransformerEncoder-based model": "tf_encoder"}
# Colors for prediction
BACK_GROUNDS = ["background: rgb(0,0,0); background: linear-gradient(29deg, rgba(0,0,0,1) 75%, rgba(213,0,0,1) 95%);",
"background: rgb(0,0,0); background: linear-gradient(29deg, rgba(0,0,0,1) 75%, rgba(0,183,213,1) 95%);",
"background: rgb(0,0,0); background: linear-gradient(29deg, rgba(0,0,0,1) 75%, rgba(213,164,0,1) 95%);",
"background: rgb(0,0,0); background: linear-gradient(29deg, rgba(0,0,0,1) 75%, rgba(54,213,0,1) 95%);",
"background: rgb(0,0,0); background: linear-gradient(29deg, rgba(0,0,0,1) 75%, rgba(125,0,213,1) 95%);",
]
def get_embeddings(embedding_arg):
"""
Load embeddings from embedding_arg
"""
embedding_arg = str(embedding_arg).lower()
dataset = Dataset(train_txt=params.TRAIN_DIR, val_txt=params.VAL_DIR, test_txt=params.TEST_DIR)
embeddings = Embeddings()
class_index = dataset.classes
# Word_vectorizer, word_embed
word_vectorizer, word_embed = embeddings._get_word_embeddings(dataset.train_sentences)
char_vectorizer, char_embed = embeddings._get_char_embeddings(dataset.train_char)
# Get type embedding
glove_embed = embeddings._get_glove_embeddings(vectorizer=word_vectorizer, glove_txt=params.GLOVE_DIR) if str(embedding_arg).lower() == "glove" else None
if embedding_arg == "bert":
bert_process, bert_layer = embeddings._get_bert_embeddings()
else:
bert_process, bert_layer = None, None
return word_vectorizer, char_vectorizer, word_embed, char_embed, glove_embed, bert_process, bert_layer, class_index
def load_model(model, word_vectorizer, char_vectorizer, word_embed, char_embed, pretrained_embedding,
glove_embed, bert_process, bert_layer):
"""
Load model from user's request
"""
model = str(MODEL_MAP[model]).lower()
pretrained_embedding = str(pretrained_embedding).lower()
model_dir = CHECK_POINT_MAP[model][pretrained_embedding]
if model == "penta":
loaded_model = PentaEmbeddingModel(word_vectorizer=word_vectorizer, char_vectorizer=char_vectorizer,
word_embed=word_embed, char_embed=char_embed,pretrained_embedding=pretrained_embedding,
glove_embed=glove_embed, bert_process=bert_process, bert_layer=bert_layer)._get_model()
loaded_model.load_weights(model_dir + "/best_model.ckpt")
elif model == "tf_encoder":
loaded_model = TransformerModel(word_vectorizer=word_vectorizer, char_vectorizer=char_vectorizer, word_embed=word_embed, char_embed = char_embed,
num_layers=params.NUM_LAYERS, d_model=params.D_MODEL, nhead=params.N_HEAD,
dim_feedforward=params.DIM_FEEDFORWARD,pretrained_embedding=pretrained_embedding, glove_embed=glove_embed,
bert_process=bert_process, bert_layer= bert_layer)._get_model()
model_dir = CHECK_POINT_MAP[model][pretrained_embedding]
loaded_model.load_weights(model_dir + "/best_model.ckpt")
else:
loaded_model = HierarchyBiLSTM(word_vectorizer=word_vectorizer, char_vectorizer=char_vectorizer, word_embed=word_embed, char_embed = char_embed,
pretrained_embedding=pretrained_embedding, glove_embed=glove_embed,
bert_process=bert_process, bert_layer= bert_layer)._get_model()
model_dir = CHECK_POINT_MAP[model][pretrained_embedding]
loaded_model.load_weights(model_dir + "/best_model.ckpt")
return loaded_model
def put_sentences_into_classes(infer_sentences, preds_class):
"""
Separate infer sentences into its own predicted classes
"""
objective = ''
background = ''
method = ''
conclusion = ''
result = ''
for line, pred in zip(infer_sentences, preds_class):
if pred == 'OBJECTIVE':
objective = objective + " " +line
elif pred == 'BACKGROUND':
background = background + " " +line
elif pred == 'METHODS':
method = method + " " +line
elif pred == 'RESULTS':
result = result + " " +line
elif pred == 'CONCLUSIONS':
conclusion = conclusion + " " + line
else:
raise NameError("There is something wrong while predicting...")
return objective, background, method, conclusion, result
@st.cache_data(experimental_allow_widgets=True)
def pre_load():
"""
Pre-load some element when loading app
"""
st.markdown("
Abstract Skimming Tool
",
unsafe_allow_html=True)
st.markdown("A NLP model that enables researchers to skim PUDMED medical abstracts faster and extract better information.
",
unsafe_allow_html=True)
st.write()
st.write()
# Selectbox 1
model_options = ["BiLSTM","TransformerEncoder-based model"]
# Selectbox 2
embed_options = {
"BiLSTM": ["None", "Glove", "BERT"],
"TransformerEncoder-based model": ["None", "Glove", "BERT"],
}
# Get option from user
model = st.sidebar.selectbox("Select your model", model_options)
pretrained_embedding = st.sidebar.selectbox("Select word embedding: ", embed_options[model])
# Load properties from user's option
word_vectorizer, char_vectorizer, word_embed, char_embed, glove_embed, bert_process, bert_layer, class_index = get_embeddings(pretrained_embedding)
loaded_model = load_model(model=model, word_vectorizer= word_vectorizer, char_vectorizer = char_vectorizer,
word_embed=word_embed, char_embed=char_embed, pretrained_embedding=pretrained_embedding,
glove_embed=glove_embed, bert_process=bert_process, bert_layer = bert_layer)
return loaded_model, class_index
def get_prediction(loaded_model, class_index):
"""
Get prediction
"""
st.write('Some examples of PUDMED abstracts:')
excol1, excol2, excol3, excol4 = st.columns(4)
with excol1:
abstract1 = st.text_area(label="", value = "Hepatitis C virus (HCV) and alcoholic liver disease (ALD), either alone or in combination, count for more than two thirds of all liver diseases in the Western world. There is no safe level of drinking in HCV-infected patients and the most effective goal for these patients is total abstinence. Baclofen, a GABA(B) receptor agonist, represents a promising pharmacotherapy for alcohol dependence (AD). Previously, we performed a randomized clinical trial (RCT), which demonstrated the safety and efficacy of baclofen in patients affected by AD and cirrhosis. The goal of this post-hoc analysis was to explore baclofen's effect in a subgroup of alcohol-dependent HCV-infected cirrhotic patients. Any patient with HCV infection was selected for this analysis. Among the 84 subjects randomized in the main trial, 24 alcohol-dependent cirrhotic patients had a HCV infection; 12 received baclofen 10mg t.i.d. and 12 received placebo for 12-weeks. With respect to the placebo group (3/12, 25.0%), a significantly higher number of patients who achieved and maintained total alcohol abstinence was found in the baclofen group (10/12, 83.3%; p=0.0123). Furthermore, in the baclofen group, compared to placebo, there was a significantly higher increase in albumin values from baseline (p=0.0132) and a trend toward a significant reduction in INR levels from baseline (p=0.0716). In conclusion, baclofen was safe and significantly more effective than placebo in promoting alcohol abstinence, and improving some Liver Function Tests (LFTs) (i.e. albumin, INR) in alcohol-dependent HCV-infected cirrhotic patients. Baclofen may represent a clinically relevant alcohol pharmacotherapy for these patients", height=400, disabled = True)
with excol2:
abstract2 = st.text_area(label="", value = "This RCT examined the efficacy of a manualized social intervention for children with HFASDs. Participants were randomly assigned to treatment or wait-list conditions. Treatment included instruction and therapeutic activities targeting social skills, face-emotion recognition, interest expansion, and interpretation of non-literal language. A response-cost program was applied to reduce problem behaviors and foster skills acquisition. Significant treatment effects were found for five of seven primary outcome measures (parent ratings and direct child measures). Secondary measures based on staff ratings (treatment group only) corroborated gains reported by parents. High levels of parent, child and staff satisfaction were reported, along with high levels of treatment fidelity. Standardized effect size estimates were primarily in the medium and large ranges and favored the treatment group.", height=400, disabled = True)
with excol3:
abstract3 = st.text_area(label="", value = "Various herbal medicines have been used around the world for more than 5,000 years. Herbal medicines, or herbal supplements, are defined as any products originating from plants and used to preserve or recover health. In the United States, the popularity of herbal supplements has increased in the last several decades. Many physicians do not ask patients about herbal supplement use, and one-third of patients do not inform their physician about supplement use. However, physicians should ask, because although many supplements are considered low risk and safe, some have significant risks of adverse effects. For example, St John's wort (Hypericum perforatum) can have significant drug interactions with prescription or over-the-counter drugs. The effectiveness of herbal supplements in the management of specific conditions varies. For some conditions, there is robust clinical data supporting the use of specific herbal supplements, but for other conditions there is poor or insufficient data. The content and safety of herbal supplements are the purview of the Food and Drug Administration (FDA). However, the FDA primarily responds to after-the-fact reports of postmarketing safety concerns. When an herbal supplement-related adverse effect is suspected, patients or physicians should report it to the FDA via the MedWatch reporting system.", height=400, disabled = True)
with excol4:
abstract4 = st.text_area(label="", value = "Mental illness, including depression, anxiety and bipolar disorder, accounts for a significant proportion of global disability and poses a substantial social, economic and heath burden. Treatment is presently dominated by pharmacotherapy, such as antidepressants, and psychotherapy, such as cognitive behavioural therapy; however, such treatments avert less than half of the disease burden, suggesting that additional strategies are needed to prevent and treat mental disorders. There are now consistent mechanistic, observational and interventional data to suggest diet quality may be a modifiable risk factor for mental illness. This review provides an overview of the nutritional psychiatry field. It includes a discussion of the neurobiological mechanisms likely modulated by diet, the use of dietary and nutraceutical interventions in mental disorders, and recommendations for further research. Potential biological pathways related to mental disorders include inflammation, oxidative stress, the gut microbiome, epigenetic modifications and neuroplasticity. Consistent epidemiological evidence, particularly for depression, suggests an association between measures of diet quality and mental health, across multiple populations and age groups; these do not appear to be explained by other demographic, lifestyle factors or reverse causality. Our recently published intervention trial provides preliminary clinical evidence that dietary interventions in clinically diagnosed populations are feasible and can provide significant clinical benefit. Furthermore, nutraceuticals including n-3 fatty acids, folate, S-adenosylmethionine, N-acetyl cysteine and probiotics, among others, are promising avenues for future research. Continued research is now required to investigate the efficacy of intervention studies in large cohorts and within clinically relevant populations, particularly in patients with schizophrenia, bipolar and anxiety disorders.", height=400, disabled = True)
# Your abstract
col1, col2 = st.columns(2)
# -------------COL 1---------------------
with col1:
st.write('\n\nEnter your abstract here: ')
abstract = st.text_area(label='', height=400)
predict = st.button('Extract')
if predict:
with st.spinner('Wait for prediction....'):
# --------------Extract feature-------------------------
# Sentencizer
list_sens = sent_tokenize(abstract)
# Store original sentence
list_sens_org = list_sens
#Replace numeric at @
list_sens = replace_numeric_chars_with_at(list_sens)
# Extract features
line_samples = get_information_infer(list_sens)
# Create dataframe
infer_df = pd.DataFrame(line_samples)
# Get features
infer_sentences = infer_df['text']
infer_chars = [split_into_char(line) for line in infer_sentences]
# Str type
infer_sentences = np.array(infer_sentences, dtype=str)
infer_chars = np.array(infer_chars,dtype= str)
line_ids_one_hot = tf.one_hot(infer_df['line_id'].to_numpy(), depth = params.LINE_IDS_DEPTH)
length_lines_one_hot = tf.one_hot(infer_df['length_lines'].to_numpy(), depth = params.LENGTH_LINES_DEPTH)
total_lines_one_hot = tf.one_hot(infer_df['total_lines'].to_numpy(), depth= params.TOTAL_LINES_DEPTH)
preds = loaded_model.predict(x = (infer_sentences, infer_chars, line_ids_one_hot, length_lines_one_hot, total_lines_one_hot))
preds_index = np.argmax(preds, axis = 1)
# Get label
preds_class = [class_index[preds_index[i]] for i in range(0, len(preds_index))]
objective, background, method, conclusion, result = put_sentences_into_classes(infer_sentences=list_sens_org, preds_class=preds_class)
with col2:
st.markdown(f'### Objective: ')
get_block(objective,BACK_GROUNDS[0])
st.markdown(f'### Background: ')
get_block(background,BACK_GROUNDS[1])
st.markdown(f'### Method: ')
get_block(method, BACK_GROUNDS[2])
st.markdown(f'### Result: ')
get_block(result, BACK_GROUNDS[3])
st.markdown(f'### Conclusion: ')
get_block(conclusion, BACK_GROUNDS[4])
if __name__ == "__main__":
set_background("images/bg10.png")
loaded_model, class_index = pre_load()
get_prediction(loaded_model=loaded_model, class_index=class_index)
st.write("\n\n---\n\n")
st.write("Built with Docker and Streamlit")