iamspriyadarshi commited on
Commit
272636e
·
verified ·
1 Parent(s): 11767f5

Update OncoMark.py

Browse files
Files changed (1) hide show
  1. OncoMark.py +195 -193
OncoMark.py CHANGED
@@ -1,193 +1,195 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import joblib
5
- from scipy.stats import rankdata
6
- import time
7
- from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
8
- import matplotlib.pyplot as plt
9
- import io
10
- from ulm import run_ulm
11
- from mlm import run_mlm
12
- from plotting import plot_barplot
13
- import tensorflow as tf
14
- import os
15
-
16
- # Page Configuration
17
- st.set_page_config(page_title="OncoMark", layout="wide")
18
- st.image("oncomark_title.png", caption="", use_container_width=True)
19
- # st.title("OncoMark")
20
-
21
- # Sidebar for uploading data
22
- st.sidebar.header("Upload Data")
23
- uploaded_file = st.sidebar.file_uploader("Upload your data file (CSV)", type=["csv"])
24
-
25
- # Description and Instructions
26
- # st.write("AI to predict cancer hallmarks from transcriptomics data.")
27
-
28
- # Load model
29
- model_path = 'hallmark_model.keras'
30
- scaler_path = 'hallmark_scaler.joblib'
31
- feature_file = 'hallmark_feature.txt'
32
-
33
- # Load the pre-trained model and scaler
34
- model = tf.keras.models.load_model(os.path.join(os.path.dirname(__file__), model_path))
35
- scaler = joblib.load(os.path.join(os.path.dirname(__file__), scaler_path))
36
-
37
- # Load feature names
38
- with open((os.path.join(os.path.dirname(__file__), feature_file)), 'r') as file:
39
- feature_names = file.read().splitlines()
40
-
41
- # Define hallmark tasks
42
- hall_list = ['AIM', 'DCE', 'EGS', 'GIM', 'RCD', 'SPS', 'AID', 'IA', 'ERI', 'TPI']
43
- collectri = pd.read_csv('collectri_df.csv')
44
- progeny = pd.read_csv('progeny_df.csv')
45
-
46
- # Show an example structure if no data is uploaded
47
- if uploaded_file is not None:
48
- data = pd.read_csv(uploaded_file, index_col=0)
49
- tf_acts, tf_pvals = run_ulm(mat=data, net=collectri, verbose=False)
50
- pathway_acts, pathway_pvals = run_mlm(mat=data, net=progeny, verbose=False)
51
-
52
- st.write("### Uploaded Data")
53
- st.write(data.iloc[:5, :50])
54
- data = data.loc[:, ~data.columns.duplicated(keep='first')]
55
- data = data.reindex(columns=feature_names, fill_value=0).fillna(0)
56
- data_index = data.index
57
- data = rankdata(data * -1, axis=1, method='average')
58
- data = np.log2(data)
59
- data = scaler.transform(data)
60
- else:
61
- st.write("### Example Input Format")
62
- st.info("**Note:** I am flexible and can handle both normalized and non-normalized input data. Upload your data as is, and the model will adjust accordingly to provide accurate predictions.")
63
- raw_count_data = pd.DataFrame({
64
- 'GeneA': [120, 150, 80],
65
- 'GeneB': [200, 180, 190],
66
- 'GeneC': [90, 75, 110],
67
- 'GeneD': [60, 95, 100]
68
- }, index=['Sample1', 'Sample2', 'Sample3'])
69
- st.write(raw_count_data)
70
-
71
- # Dummy model function (replace with actual model prediction)
72
- def model_predict(input_data):
73
- predictions = model.predict(data)
74
- prediction_df = pd.DataFrame()
75
- for task_id, hall_name in enumerate(hall_list):
76
- prediction_df[hall_name] = predictions[task_id].flatten()
77
- prediction_df.index = data_index
78
- return prediction_df
79
-
80
- def display_loading_animation():
81
- with st.empty():
82
- for i in range(3):
83
- st.write("🔍 Predicting" + "." * (i + 1))
84
- time.sleep(1.0)
85
- st.write("🚀 Almost there...")
86
-
87
- # Initialize predictions to None
88
- predictions = None
89
-
90
- # Predict and display results if data is uploaded
91
- if uploaded_file is not None:
92
- st.write("### Predictions")
93
- display_loading_animation()
94
- predictions = model_predict(data)
95
- predictions = predictions.reset_index()
96
- # st.write(predictions)
97
- else:
98
- st.write("### Predictions")
99
- st.info("Upload your data to see predictions.")
100
-
101
-
102
- selected = None
103
-
104
- # Display analysis if predictions are available
105
- if predictions is not None:
106
- # Display predictions in AgGrid
107
- gb = GridOptionsBuilder.from_dataframe(predictions)
108
- gb.configure_selection(selection_mode='single', use_checkbox=False)
109
- gb.configure_default_column(resizable=True, autoWidth=True, maxWidth=100)
110
- grid_options = gb.build()
111
-
112
- grid_response = AgGrid(
113
- predictions,
114
- gridOptions=grid_options,
115
- update_mode=GridUpdateMode.SELECTION_CHANGED,
116
- height=300,
117
- enable_enterprise_modules=False,
118
- allow_unsafe_jscode=True,
119
- theme='streamlit',
120
- custom_css={
121
- ".ag-row-selected": {
122
- "background-color": "#90EE90 !important"
123
- }
124
- }
125
- )
126
-
127
- csv_grid = predictions.to_csv().encode('utf-8')
128
- st.download_button(
129
- label="Download Table as CSV",
130
- data=csv_grid,
131
- file_name='aggrid_table.csv',
132
- mime='text/csv'
133
- )
134
-
135
- # Extract selected row data and display bar plot on selection
136
- selected = grid_response['selected_rows']
137
- if selected is not None:
138
- st.write("### Analysis")
139
- selected_df = pd.DataFrame(selected)
140
- sample_name = selected_df['index'][0]
141
-
142
- st.write('##### Transcription factor activity')
143
- st.info('If it is positive, we interpret that the TF is active and if it is negative we interpret that it is inactive.')
144
- plot_barplot(
145
- acts=tf_acts,
146
- contrast=sample_name,
147
- top=50,
148
- vertical=False,
149
- figsize=(11, 5))
150
- buf = io.BytesIO()
151
- plt.savefig(buf, format='png', dpi=300)
152
- buf.seek(0)
153
- st.pyplot(plt)
154
- # Provide option to download the plot
155
- st.download_button(
156
- label="Download Plot as PNG",
157
- data=buf,
158
- file_name='tf_hallmark_{}.png'.format(sample_name),
159
- mime='image/png'
160
- )
161
-
162
- st.write('##### Pathway activity')
163
- st.info('If it is positive, we interpret that the pathway is active and if it is negative we interpret that it is inactive.')
164
- plot_barplot(
165
- pathway_acts,
166
- sample_name,
167
- top=50,
168
- vertical=False,
169
- figsize=(6, 3))
170
- buf = io.BytesIO()
171
- plt.savefig(buf, format='png', dpi=300)
172
- buf.seek(0)
173
- st.pyplot(plt)
174
- # Provide option to download the plot
175
- st.download_button(
176
- label="Download Plot as PNG",
177
- data=buf,
178
- file_name='pathway_hallmark_{}.png'.format(sample_name),
179
- mime='image/png'
180
- )
181
-
182
- else:
183
- st.write("### Analysis")
184
- st.info('Click on a sample under predictions to see the analysis')
185
- else:
186
- st.write("### Analysis")
187
- st.info('Click on a sample under predictions to see the analysis')
188
-
189
- # Footer
190
- st.write("----")
191
- st.markdown("[Visit our GitHub Repository](https://github.com/SML-CompBio/OncoMark)", unsafe_allow_html=True)
192
-
193
- # Running the app: use `streamlit run filename.py`
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ from scipy.stats import rankdata
6
+ import time
7
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
8
+ import matplotlib.pyplot as plt
9
+ import io
10
+ from ulm import run_ulm
11
+ from mlm import run_mlm
12
+ from plotting import plot_barplot
13
+ import tensorflow as tf
14
+ import os
15
+
16
+ # Page Configuration
17
+ st.set_page_config(page_title="OncoMark", layout="wide")
18
+ st.image("oncomark_title.png", caption="", use_container_width=True)
19
+ st.markdown("[📘 See Tutorial Guide](https://oncomark.readthedocs.io/en/latest/usage/)", unsafe_allow_html=True)
20
+ # st.title("OncoMark")
21
+
22
+ # Sidebar for uploading data
23
+ st.sidebar.header("Upload Data")
24
+ uploaded_file = st.sidebar.file_uploader("Upload your data file (CSV)", type=["csv"])
25
+ st.sidebar.markdown("[Need help? View tutorial](https://oncomark.readthedocs.io/en/latest/usage/)", unsafe_allow_html=True)
26
+
27
+ # Description and Instructions
28
+ # st.write("AI to predict cancer hallmarks from transcriptomics data.")
29
+
30
+ # Load model
31
+ model_path = 'hallmark_model.keras'
32
+ scaler_path = 'hallmark_scaler.joblib'
33
+ feature_file = 'hallmark_feature.txt'
34
+
35
+ # Load the pre-trained model and scaler
36
+ model = tf.keras.models.load_model(os.path.join(os.path.dirname(__file__), model_path))
37
+ scaler = joblib.load(os.path.join(os.path.dirname(__file__), scaler_path))
38
+
39
+ # Load feature names
40
+ with open((os.path.join(os.path.dirname(__file__), feature_file)), 'r') as file:
41
+ feature_names = file.read().splitlines()
42
+
43
+ # Define hallmark tasks
44
+ hall_list = ['AIM', 'DCE', 'EGS', 'GIM', 'RCD', 'SPS', 'AID', 'IA', 'ERI', 'TPI']
45
+ collectri = pd.read_csv('collectri_df.csv')
46
+ progeny = pd.read_csv('progeny_df.csv')
47
+
48
+ # Show an example structure if no data is uploaded
49
+ if uploaded_file is not None:
50
+ data = pd.read_csv(uploaded_file, index_col=0)
51
+ tf_acts, tf_pvals = run_ulm(mat=data, net=collectri, verbose=False)
52
+ pathway_acts, pathway_pvals = run_mlm(mat=data, net=progeny, verbose=False)
53
+
54
+ st.write("### Uploaded Data")
55
+ st.write(data.iloc[:5, :50])
56
+ data = data.loc[:, ~data.columns.duplicated(keep='first')]
57
+ data = data.reindex(columns=feature_names, fill_value=0).fillna(0)
58
+ data_index = data.index
59
+ data = rankdata(data * -1, axis=1, method='average')
60
+ data = np.log2(data)
61
+ data = scaler.transform(data)
62
+ else:
63
+ st.write("### Example Input Format")
64
+ st.info("**Note:** I am flexible and can handle both normalized and non-normalized input data. Upload your data as is, and the model will adjust accordingly to provide accurate predictions.")
65
+ raw_count_data = pd.DataFrame({
66
+ 'GeneA': [120, 150, 80],
67
+ 'GeneB': [200, 180, 190],
68
+ 'GeneC': [90, 75, 110],
69
+ 'GeneD': [60, 95, 100]
70
+ }, index=['Sample1', 'Sample2', 'Sample3'])
71
+ st.write(raw_count_data)
72
+
73
+ # Dummy model function (replace with actual model prediction)
74
+ def model_predict(input_data):
75
+ predictions = model.predict(data)
76
+ prediction_df = pd.DataFrame()
77
+ for task_id, hall_name in enumerate(hall_list):
78
+ prediction_df[hall_name] = predictions[task_id].flatten()
79
+ prediction_df.index = data_index
80
+ return prediction_df
81
+
82
+ def display_loading_animation():
83
+ with st.empty():
84
+ for i in range(3):
85
+ st.write("🔍 Predicting" + "." * (i + 1))
86
+ time.sleep(1.0)
87
+ st.write("🚀 Almost there...")
88
+
89
+ # Initialize predictions to None
90
+ predictions = None
91
+
92
+ # Predict and display results if data is uploaded
93
+ if uploaded_file is not None:
94
+ st.write("### Predictions")
95
+ display_loading_animation()
96
+ predictions = model_predict(data)
97
+ predictions = predictions.reset_index()
98
+ # st.write(predictions)
99
+ else:
100
+ st.write("### Predictions")
101
+ st.info("Upload your data to see predictions.")
102
+
103
+
104
+ selected = None
105
+
106
+ # Display analysis if predictions are available
107
+ if predictions is not None:
108
+ # Display predictions in AgGrid
109
+ gb = GridOptionsBuilder.from_dataframe(predictions)
110
+ gb.configure_selection(selection_mode='single', use_checkbox=False)
111
+ gb.configure_default_column(resizable=True, autoWidth=True, maxWidth=100)
112
+ grid_options = gb.build()
113
+
114
+ grid_response = AgGrid(
115
+ predictions,
116
+ gridOptions=grid_options,
117
+ update_mode=GridUpdateMode.SELECTION_CHANGED,
118
+ height=300,
119
+ enable_enterprise_modules=False,
120
+ allow_unsafe_jscode=True,
121
+ theme='streamlit',
122
+ custom_css={
123
+ ".ag-row-selected": {
124
+ "background-color": "#90EE90 !important"
125
+ }
126
+ }
127
+ )
128
+
129
+ csv_grid = predictions.to_csv().encode('utf-8')
130
+ st.download_button(
131
+ label="Download Table as CSV",
132
+ data=csv_grid,
133
+ file_name='aggrid_table.csv',
134
+ mime='text/csv'
135
+ )
136
+
137
+ # Extract selected row data and display bar plot on selection
138
+ selected = grid_response['selected_rows']
139
+ if selected is not None:
140
+ st.write("### Analysis")
141
+ selected_df = pd.DataFrame(selected)
142
+ sample_name = selected_df['index'][0]
143
+
144
+ st.write('##### Transcription factor activity')
145
+ st.info('If it is positive, we interpret that the TF is active and if it is negative we interpret that it is inactive.')
146
+ plot_barplot(
147
+ acts=tf_acts,
148
+ contrast=sample_name,
149
+ top=50,
150
+ vertical=False,
151
+ figsize=(11, 5))
152
+ buf = io.BytesIO()
153
+ plt.savefig(buf, format='png', dpi=300)
154
+ buf.seek(0)
155
+ st.pyplot(plt)
156
+ # Provide option to download the plot
157
+ st.download_button(
158
+ label="Download Plot as PNG",
159
+ data=buf,
160
+ file_name='tf_hallmark_{}.png'.format(sample_name),
161
+ mime='image/png'
162
+ )
163
+
164
+ st.write('##### Pathway activity')
165
+ st.info('If it is positive, we interpret that the pathway is active and if it is negative we interpret that it is inactive.')
166
+ plot_barplot(
167
+ pathway_acts,
168
+ sample_name,
169
+ top=50,
170
+ vertical=False,
171
+ figsize=(6, 3))
172
+ buf = io.BytesIO()
173
+ plt.savefig(buf, format='png', dpi=300)
174
+ buf.seek(0)
175
+ st.pyplot(plt)
176
+ # Provide option to download the plot
177
+ st.download_button(
178
+ label="Download Plot as PNG",
179
+ data=buf,
180
+ file_name='pathway_hallmark_{}.png'.format(sample_name),
181
+ mime='image/png'
182
+ )
183
+
184
+ else:
185
+ st.write("### Analysis")
186
+ st.info('Click on a sample under predictions to see the analysis')
187
+ else:
188
+ st.write("### Analysis")
189
+ st.info('Click on a sample under predictions to see the analysis')
190
+
191
+ # Footer
192
+ st.write("----")
193
+ st.markdown("[Visit our GitHub Repository](https://github.com/SML-CompBio/OncoMark)", unsafe_allow_html=True)
194
+
195
+ # Running the app: use `streamlit run filename.py`