|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import joblib |
|
|
from scipy.stats import rankdata |
|
|
import time |
|
|
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode |
|
|
import matplotlib.pyplot as plt |
|
|
import io |
|
|
from ulm import run_ulm |
|
|
from mlm import run_mlm |
|
|
from plotting import plot_barplot |
|
|
import tensorflow as tf |
|
|
import os |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="OncoMark", layout="wide") |
|
|
st.image("oncomark_title.png", caption="", use_container_width=True) |
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.header("Upload Data") |
|
|
uploaded_file = st.sidebar.file_uploader("Upload your data file (CSV)", type=["csv"]) |
|
|
st.sidebar.markdown("[Need help? View tutorial](https://oncomark.readthedocs.io/en/latest/usage/)", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_path = 'hallmark_model.keras' |
|
|
scaler_path = 'hallmark_scaler.joblib' |
|
|
feature_file = 'hallmark_feature.txt' |
|
|
|
|
|
|
|
|
model = tf.keras.models.load_model(os.path.join(os.path.dirname(__file__), model_path)) |
|
|
scaler = joblib.load(os.path.join(os.path.dirname(__file__), scaler_path)) |
|
|
|
|
|
|
|
|
with open((os.path.join(os.path.dirname(__file__), feature_file)), 'r') as file: |
|
|
feature_names = file.read().splitlines() |
|
|
|
|
|
|
|
|
hall_list = ['AIM', 'DCE', 'EGS', 'GIM', 'RCD', 'SPS', 'AID', 'IA', 'ERI', 'TPI'] |
|
|
collectri = pd.read_csv('collectri_df.csv') |
|
|
progeny = pd.read_csv('progeny_df.csv') |
|
|
|
|
|
|
|
|
if uploaded_file is not None: |
|
|
data = pd.read_csv(uploaded_file, index_col=0) |
|
|
tf_acts, tf_pvals = run_ulm(mat=data, net=collectri, verbose=False) |
|
|
pathway_acts, pathway_pvals = run_mlm(mat=data, net=progeny, verbose=False) |
|
|
|
|
|
st.write("### Uploaded Data") |
|
|
st.write(data.iloc[:5, :50]) |
|
|
data = data.loc[:, ~data.columns.duplicated(keep='first')] |
|
|
data = data.reindex(columns=feature_names, fill_value=0).fillna(0) |
|
|
data_index = data.index |
|
|
data = rankdata(data * -1, axis=1, method='average') |
|
|
data = np.log2(data) |
|
|
data = scaler.transform(data) |
|
|
else: |
|
|
st.write("### Example Input Format") |
|
|
st.info("**Note:** I am flexible and can handle both normalized and non-normalized input data. Upload your data as is, and the model will adjust accordingly to provide accurate predictions.") |
|
|
raw_count_data = pd.DataFrame({ |
|
|
'GeneA': [120, 150, 80], |
|
|
'GeneB': [200, 180, 190], |
|
|
'GeneC': [90, 75, 110], |
|
|
'GeneD': [60, 95, 100] |
|
|
}, index=['Sample1', 'Sample2', 'Sample3']) |
|
|
st.write(raw_count_data) |
|
|
|
|
|
|
|
|
def model_predict(input_data): |
|
|
predictions = model.predict(data) |
|
|
prediction_df = pd.DataFrame() |
|
|
for task_id, hall_name in enumerate(hall_list): |
|
|
prediction_df[hall_name] = predictions[task_id].flatten() |
|
|
prediction_df.index = data_index |
|
|
return prediction_df |
|
|
|
|
|
def display_loading_animation(): |
|
|
with st.empty(): |
|
|
for i in range(3): |
|
|
st.write("π Predicting" + "." * (i + 1)) |
|
|
time.sleep(1.0) |
|
|
st.write("π Almost there...") |
|
|
|
|
|
|
|
|
predictions = None |
|
|
|
|
|
|
|
|
if uploaded_file is not None: |
|
|
st.write("### Predictions") |
|
|
display_loading_animation() |
|
|
predictions = model_predict(data) |
|
|
predictions = predictions.reset_index() |
|
|
|
|
|
else: |
|
|
st.write("### Predictions") |
|
|
st.info("Upload your data to see predictions.") |
|
|
|
|
|
|
|
|
selected = None |
|
|
|
|
|
|
|
|
if predictions is not None: |
|
|
|
|
|
gb = GridOptionsBuilder.from_dataframe(predictions) |
|
|
gb.configure_selection(selection_mode='single', use_checkbox=False) |
|
|
gb.configure_default_column(resizable=True, autoWidth=True, maxWidth=100) |
|
|
grid_options = gb.build() |
|
|
|
|
|
grid_response = AgGrid( |
|
|
predictions, |
|
|
gridOptions=grid_options, |
|
|
update_mode=GridUpdateMode.SELECTION_CHANGED, |
|
|
height=300, |
|
|
enable_enterprise_modules=False, |
|
|
allow_unsafe_jscode=True, |
|
|
theme='streamlit', |
|
|
custom_css={ |
|
|
".ag-row-selected": { |
|
|
"background-color": "#90EE90 !important" |
|
|
} |
|
|
} |
|
|
) |
|
|
|
|
|
csv_grid = predictions.to_csv().encode('utf-8') |
|
|
st.download_button( |
|
|
label="Download Table as CSV", |
|
|
data=csv_grid, |
|
|
file_name='aggrid_table.csv', |
|
|
mime='text/csv' |
|
|
) |
|
|
|
|
|
|
|
|
selected = grid_response['selected_rows'] |
|
|
if selected is not None: |
|
|
st.write("### Analysis") |
|
|
selected_df = pd.DataFrame(selected) |
|
|
sample_name = selected_df['index'][0] |
|
|
|
|
|
st.write('##### Transcription factor activity') |
|
|
st.info('If it is positive, we interpret that the TF is active and if it is negative we interpret that it is inactive.') |
|
|
plot_barplot( |
|
|
acts=tf_acts, |
|
|
contrast=sample_name, |
|
|
top=50, |
|
|
vertical=False, |
|
|
figsize=(11, 5)) |
|
|
buf = io.BytesIO() |
|
|
plt.savefig(buf, format='png', dpi=300) |
|
|
buf.seek(0) |
|
|
st.pyplot(plt) |
|
|
|
|
|
st.download_button( |
|
|
label="Download Plot as PNG", |
|
|
data=buf, |
|
|
file_name='tf_hallmark_{}.png'.format(sample_name), |
|
|
mime='image/png' |
|
|
) |
|
|
|
|
|
st.write('##### Pathway activity') |
|
|
st.info('If it is positive, we interpret that the pathway is active and if it is negative we interpret that it is inactive.') |
|
|
plot_barplot( |
|
|
pathway_acts, |
|
|
sample_name, |
|
|
top=50, |
|
|
vertical=False, |
|
|
figsize=(6, 3)) |
|
|
buf = io.BytesIO() |
|
|
plt.savefig(buf, format='png', dpi=300) |
|
|
buf.seek(0) |
|
|
st.pyplot(plt) |
|
|
|
|
|
st.download_button( |
|
|
label="Download Plot as PNG", |
|
|
data=buf, |
|
|
file_name='pathway_hallmark_{}.png'.format(sample_name), |
|
|
mime='image/png' |
|
|
) |
|
|
|
|
|
else: |
|
|
st.write("### Analysis") |
|
|
st.info('Click on a sample under predictions to see the analysis') |
|
|
else: |
|
|
st.write("### Analysis") |
|
|
st.info('Click on a sample under predictions to see the analysis') |
|
|
|
|
|
|
|
|
st.write("----") |
|
|
st.markdown("[Visit our GitHub Repository](https://github.com/SML-CompBio/OncoMark)", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|