ligdis commited on
Commit
37b61da
·
verified ·
1 Parent(s): 647aa0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -30
app.py CHANGED
@@ -10,7 +10,6 @@ root = os.path.dirname(os.path.abspath(__file__))
10
  FREQUENT_CUTOFF = 40
11
  MEDIUM_CUTOFF = 10
12
 
13
-
14
  st.set_page_config(
15
  page_title="Ligand Disovery 2: Explore Protein-sets",
16
  page_icon=":home:",
@@ -112,38 +111,34 @@ options = sorted([x for k,v in pid2name_primary.items() for x in [k,v]])
112
 
113
  # layout
114
 
115
- st.title("Ligand Discovery 2: Explore Protein-sets")
116
- st.write("We screened 407 fully-functionalized small molecule fragments ('Ligands') in HEK293t cells. For {0} of the Ligands, we found at least one protein enriched. In total, we enriched {1} proteins at least once. Query your protein sets of interest and explore them in light of our dataset!".format(len(fid2pid), len(pid2fid)))
117
-
118
- cols = st.columns([1,1,2])
119
 
120
- col = cols[0]
121
-
122
- manual_input = col.multiselect(label="Input proteins manually", options = [""] + sorted(options), default=[], help="Select proteins by UniProt Accession code or Gene Symbol")
123
  user_pids = {}
124
  user_input = []
125
  for i in manual_input:
126
  user_pids[i] = any2pid[i]
127
  user_input += [i]
128
 
129
- col = cols[1]
130
 
131
  fids = sorted(set(db["FragID"]))
132
- fid_input = col.selectbox(label="Select pre-screened Ligand by identifier", options = [""] + fids, help="Select an already profiled Ligand in our primary screening (page Interactions). Use the Ligand identifier (example, C001)")
133
  if fid_input != "":
134
  user_input = fid2pid[fid_input]
135
  user_pids = dict((r,r) for r in user_input)
136
 
137
- col = cols[2]
138
 
139
  example_file = db
140
- file_input = col.file_uploader(label="Upload a file", help="Provide a file containing one UniProt Accession code or Gene Symbol per row.")
141
  if file_input:
142
  user_input = list(pd.read_csv(file_input, header=None)[0])
143
  for i in user_input:
144
  user_pids[i] = any2pid[i]
145
 
146
- col.download_button(label="Download example file", data=convert_df_no_header(pd.DataFrame({"uniprot_ac": example_input})), file_name="protein_profile_example.csv", mime="text/csv")
147
 
148
  # checks
149
 
@@ -157,7 +152,7 @@ if not file_input:
157
  file_input = None
158
 
159
  if not manual_input and not file_input and not fid_input:
160
- st.info("Use any of the options above to explore a protein profile...")
161
  query_is_available = False
162
  else:
163
  c = 0
@@ -165,7 +160,7 @@ else:
165
  if x is not None:
166
  c += 1
167
  if c > 1:
168
- st.error("More than one input type has been provided! Please only choose one of the options, i.e. input proteins manually, or select a pre-screened Ligand, or upload a file. Refresh this window to get started again.")
169
  query_is_available = False
170
  else:
171
  query_is_available = True
@@ -176,13 +171,12 @@ def serialize_s(cat, r):
176
  return s
177
 
178
  if query_is_available:
179
- columns = st.columns([2, 2, 2, 1, 1])
180
 
181
  done = set()
182
 
183
  col = columns[0]
184
  cat_name = "Frequently enriched"
185
- col.subheader(cat_name)
186
  S = []
187
  R = []
188
  for r in user_input:
@@ -192,12 +186,11 @@ if query_is_available:
192
  S += [serialize_s(cat_name, R[-1])]
193
  done.update([r])
194
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
195
- col.metric(label="Counts", value=df.shape[0])
196
  col.dataframe(df, use_container_width=True)
197
 
198
  col = columns[1]
199
  cat_name = "Medium specificity"
200
- col.subheader(cat_name)
201
  R = []
202
  for r in user_input:
203
  pid = user_pids[r]
@@ -206,12 +199,14 @@ if query_is_available:
206
  S += [serialize_s(cat_name, R[-1])]
207
  done.update([r])
208
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
209
- col.metric(label="Counts", value=df.shape[0])
210
  col.dataframe(df, use_container_width=True)
211
 
212
- col = columns[2]
 
 
 
213
  cat_name = "High specificity"
214
- col.subheader(cat_name)
215
  R = []
216
  for r in user_input:
217
  pid = user_pids[r]
@@ -220,12 +215,11 @@ if query_is_available:
220
  S += [serialize_s(cat_name, R[-1])]
221
  done.update([r])
222
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
223
- col.metric(label="Counts", value=df.shape[0])
224
  col.dataframe(df, use_container_width=True)
225
 
226
- col = columns[3]
227
  cat_name = "Never enriched"
228
- col.subheader(cat_name)
229
  R = []
230
  for r in user_input:
231
  if r in done:
@@ -236,12 +230,11 @@ if query_is_available:
236
  S += [serialize_s(cat_name, R[-1])]
237
  done.update([r])
238
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
239
- col.metric(label="Counts", value=df.shape[0])
240
  col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
241
 
242
- col = columns[4]
243
  cat_name = "Not in HEK293t"
244
- col.subheader(cat_name)
245
  R = []
246
  for r in user_input:
247
  if r in done:
@@ -252,10 +245,10 @@ if query_is_available:
252
  R += [[pid, pid2name_primary[pid], len(pid2fid[pid]), fids_]]
253
  S += [serialize_s(cat_name, R[-1])]
254
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
255
- col.metric(label="Counts", value=df.shape[0])
256
  col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
257
 
258
  data = pd.DataFrame(S, columns = ["Category", "UniProt", "GeneName", "Hits", "Fragments"])
259
  data = data.sort_values(by=["Hits", "GeneName", "Category"], ascending=[False, True, True]).reset_index(drop=True)
260
  data = convert_df(data)
261
- st.download_button(label="Download search results", data=data, file_name="ligand_discovery_search_results.csv", mime="text/csv")
 
10
  FREQUENT_CUTOFF = 40
11
  MEDIUM_CUTOFF = 10
12
 
 
13
  st.set_page_config(
14
  page_title="Ligand Disovery 2: Explore Protein-sets",
15
  page_icon=":home:",
 
111
 
112
  # layout
113
 
114
+ st.sidebar.title("Ligand Discovery 2: Explore Protein-sets")
115
+ st.sidebar.write("We screened 407 fully-functionalized small molecule fragments ('Ligands') in HEK293t cells. For {0} of the Ligands, we found at least one protein enriched. In total, we enriched {1} proteins at least once. Query your protein sets of interest and explore them in light of our dataset!".format(len(fid2pid), len(pid2fid)))
 
 
116
 
117
+ manual_input = st.sidebar.multiselect(label="Input proteins manually", options = [""] + sorted(options), default=[], help="Select proteins by UniProt Accession code or Gene Symbol")
 
 
118
  user_pids = {}
119
  user_input = []
120
  for i in manual_input:
121
  user_pids[i] = any2pid[i]
122
  user_input += [i]
123
 
124
+ st.sidebar.subheader("OR")
125
 
126
  fids = sorted(set(db["FragID"]))
127
+ fid_input = st.sidebar.selectbox(label="Select pre-screened Ligand by identifier", options = [""] + fids, help="Select an already profiled Ligand in our primary screening (page Interactions). Use the Ligand identifier (example, C001)")
128
  if fid_input != "":
129
  user_input = fid2pid[fid_input]
130
  user_pids = dict((r,r) for r in user_input)
131
 
132
+ st.sidebar.subheader("OR")
133
 
134
  example_file = db
135
+ file_input = st.sidebar.file_uploader(label="Upload a file", help="Provide a file containing one UniProt Accession code or Gene Symbol per row.")
136
  if file_input:
137
  user_input = list(pd.read_csv(file_input, header=None)[0])
138
  for i in user_input:
139
  user_pids[i] = any2pid[i]
140
 
141
+ st.sidebar.download_button(label="Download example file", data=convert_df_no_header(pd.DataFrame({"uniprot_ac": example_input})), file_name="protein_profile_example.csv", mime="text/csv")
142
 
143
  # checks
144
 
 
152
  file_input = None
153
 
154
  if not manual_input and not file_input and not fid_input:
155
+ st.sidebar.info("Use any of the options above to explore a protein profile...")
156
  query_is_available = False
157
  else:
158
  c = 0
 
160
  if x is not None:
161
  c += 1
162
  if c > 1:
163
+ st.sidebar.error("More than one input type has been provided! Please only choose one of the options, i.e. input proteins manually, or select a pre-screened Ligand, or upload a file. Refresh this window to get started again.")
164
  query_is_available = False
165
  else:
166
  query_is_available = True
 
171
  return s
172
 
173
  if query_is_available:
174
+ columns = st.columns([0.5, 0.5])
175
 
176
  done = set()
177
 
178
  col = columns[0]
179
  cat_name = "Frequently enriched"
 
180
  S = []
181
  R = []
182
  for r in user_input:
 
186
  S += [serialize_s(cat_name, R[-1])]
187
  done.update([r])
188
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
189
+ col.markdown("**{0} (Low specificity)** : {1}".format(cat_name, df.shape[0]))
190
  col.dataframe(df, use_container_width=True)
191
 
192
  col = columns[1]
193
  cat_name = "Medium specificity"
 
194
  R = []
195
  for r in user_input:
196
  pid = user_pids[r]
 
199
  S += [serialize_s(cat_name, R[-1])]
200
  done.update([r])
201
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
202
+ col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
203
  col.dataframe(df, use_container_width=True)
204
 
205
+ st.divider()
206
+ columns = st.columns([0.5, 0.25, 0.25])
207
+
208
+ col = columns[0]
209
  cat_name = "High specificity"
 
210
  R = []
211
  for r in user_input:
212
  pid = user_pids[r]
 
215
  S += [serialize_s(cat_name, R[-1])]
216
  done.update([r])
217
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
218
+ col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
219
  col.dataframe(df, use_container_width=True)
220
 
221
+ col = columns[1]
222
  cat_name = "Never enriched"
 
223
  R = []
224
  for r in user_input:
225
  if r in done:
 
230
  S += [serialize_s(cat_name, R[-1])]
231
  done.update([r])
232
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
233
+ col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
234
  col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
235
 
236
+ col = columns[2]
237
  cat_name = "Not in HEK293t"
 
238
  R = []
239
  for r in user_input:
240
  if r in done:
 
245
  R += [[pid, pid2name_primary[pid], len(pid2fid[pid]), fids_]]
246
  S += [serialize_s(cat_name, R[-1])]
247
  df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
248
+ col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
249
  col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
250
 
251
  data = pd.DataFrame(S, columns = ["Category", "UniProt", "GeneName", "Hits", "Fragments"])
252
  data = data.sort_values(by=["Hits", "GeneName", "Category"], ascending=[False, True, True]).reset_index(drop=True)
253
  data = convert_df(data)
254
+ st.download_button(label="Download search results", data=data, file_name="ligand_discovery_search_results.csv", mime="text/csv")