Spaces:

oncomark
/

ai

Running

App Files Files Community

ai / OncoMark.py

iamspriyadarshi

Update OncoMark.py

1bace92 verified 9 months ago

raw

history blame contribute delete

6.64 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import joblib
	from scipy.stats import rankdata
	import time
	from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
	import matplotlib.pyplot as plt
	import io
	from ulm import run_ulm
	from mlm import run_mlm
	from plotting import plot_barplot
	import tensorflow as tf
	import os

	# Page Configuration
	st.set_page_config(page_title="OncoMark", layout="wide")
	st.image("oncomark_title.png", caption="", use_container_width=True)
	# st.title("OncoMark")

	# Sidebar for uploading data
	st.sidebar.header("Upload Data")
	uploaded_file = st.sidebar.file_uploader("Upload your data file (CSV)", type=["csv"])
	st.sidebar.markdown("[Need help? View tutorial](https://oncomark.readthedocs.io/en/latest/usage/)", unsafe_allow_html=True)

	# Description and Instructions
	# st.write("AI to predict cancer hallmarks from transcriptomics data.")

	# Load model
	model_path = 'hallmark_model.keras'
	scaler_path = 'hallmark_scaler.joblib'
	feature_file = 'hallmark_feature.txt'

	# Load the pre-trained model and scaler
	model = tf.keras.models.load_model(os.path.join(os.path.dirname(__file__), model_path))
	scaler = joblib.load(os.path.join(os.path.dirname(__file__), scaler_path))

	# Load feature names
	with open((os.path.join(os.path.dirname(__file__), feature_file)), 'r') as file:
	feature_names = file.read().splitlines()

	# Define hallmark tasks
	hall_list = ['AIM', 'DCE', 'EGS', 'GIM', 'RCD', 'SPS', 'AID', 'IA', 'ERI', 'TPI']
	collectri = pd.read_csv('collectri_df.csv')
	progeny = pd.read_csv('progeny_df.csv')

	# Show an example structure if no data is uploaded
	if uploaded_file is not None:
	data = pd.read_csv(uploaded_file, index_col=0)
	tf_acts, tf_pvals = run_ulm(mat=data, net=collectri, verbose=False)
	pathway_acts, pathway_pvals = run_mlm(mat=data, net=progeny, verbose=False)

	st.write("### Uploaded Data")
	st.write(data.iloc[:5, :50])
	data = data.loc[:, ~data.columns.duplicated(keep='first')]
	data = data.reindex(columns=feature_names, fill_value=0).fillna(0)
	data_index = data.index
	data = rankdata(data * -1, axis=1, method='average')
	data = np.log2(data)
	data = scaler.transform(data)
	else:
	st.write("### Example Input Format")
	st.info("Note: I am flexible and can handle both normalized and non-normalized input data. Upload your data as is, and the model will adjust accordingly to provide accurate predictions.")
	raw_count_data = pd.DataFrame({
	'GeneA': [120, 150, 80],
	'GeneB': [200, 180, 190],
	'GeneC': [90, 75, 110],
	'GeneD': [60, 95, 100]
	}, index=['Sample1', 'Sample2', 'Sample3'])
	st.write(raw_count_data)

	# Dummy model function (replace with actual model prediction)
	def model_predict(input_data):
	predictions = model.predict(data)
	prediction_df = pd.DataFrame()
	for task_id, hall_name in enumerate(hall_list):
	prediction_df[hall_name] = predictions[task_id].flatten()
	prediction_df.index = data_index
	return prediction_df

	def display_loading_animation():
	with st.empty():
	for i in range(3):
	st.write("🔍 Predicting" + "." * (i + 1))
	time.sleep(1.0)
	st.write("🚀 Almost there...")

	# Initialize predictions to None
	predictions = None

	# Predict and display results if data is uploaded
	if uploaded_file is not None:
	st.write("### Predictions")
	display_loading_animation()
	predictions = model_predict(data)
	predictions = predictions.reset_index()
	# st.write(predictions)
	else:
	st.write("### Predictions")
	st.info("Upload your data to see predictions.")


	selected = None

	# Display analysis if predictions are available
	if predictions is not None:
	# Display predictions in AgGrid
	gb = GridOptionsBuilder.from_dataframe(predictions)
	gb.configure_selection(selection_mode='single', use_checkbox=False)
	gb.configure_default_column(resizable=True, autoWidth=True, maxWidth=100)
	grid_options = gb.build()

	grid_response = AgGrid(
	predictions,
	gridOptions=grid_options,
	update_mode=GridUpdateMode.SELECTION_CHANGED,
	height=300,
	enable_enterprise_modules=False,
	allow_unsafe_jscode=True,
	theme='streamlit',
	custom_css={
	".ag-row-selected": {
	"background-color": "#90EE90 !important"
	}
	}
	)

	csv_grid = predictions.to_csv().encode('utf-8')
	st.download_button(
	label="Download Table as CSV",
	data=csv_grid,
	file_name='aggrid_table.csv',
	mime='text/csv'
	)

	# Extract selected row data and display bar plot on selection
	selected = grid_response['selected_rows']
	if selected is not None:
	st.write("### Analysis")
	selected_df = pd.DataFrame(selected)
	sample_name = selected_df['index'][0]

	st.write('##### Transcription factor activity')
	st.info('If it is positive, we interpret that the TF is active and if it is negative we interpret that it is inactive.')
	plot_barplot(
	acts=tf_acts,
	contrast=sample_name,
	top=50,
	vertical=False,
	figsize=(11, 5))
	buf = io.BytesIO()
	plt.savefig(buf, format='png', dpi=300)
	buf.seek(0)
	st.pyplot(plt)
	# Provide option to download the plot
	st.download_button(
	label="Download Plot as PNG",
	data=buf,
	file_name='tf_hallmark_{}.png'.format(sample_name),
	mime='image/png'
	)

	st.write('##### Pathway activity')
	st.info('If it is positive, we interpret that the pathway is active and if it is negative we interpret that it is inactive.')
	plot_barplot(
	pathway_acts,
	sample_name,
	top=50,
	vertical=False,
	figsize=(6, 3))
	buf = io.BytesIO()
	plt.savefig(buf, format='png', dpi=300)
	buf.seek(0)
	st.pyplot(plt)
	# Provide option to download the plot
	st.download_button(
	label="Download Plot as PNG",
	data=buf,
	file_name='pathway_hallmark_{}.png'.format(sample_name),
	mime='image/png'
	)

	else:
	st.write("### Analysis")
	st.info('Click on a sample under predictions to see the analysis')
	else:
	st.write("### Analysis")
	st.info('Click on a sample under predictions to see the analysis')

	# Footer
	st.write("----")
	st.markdown("[Visit our GitHub Repository](https://github.com/SML-CompBio/OncoMark)", unsafe_allow_html=True)

	# Running the app: use `streamlit run filename.py`