Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,6 @@ root = os.path.dirname(os.path.abspath(__file__))
|
|
| 10 |
FREQUENT_CUTOFF = 40
|
| 11 |
MEDIUM_CUTOFF = 10
|
| 12 |
|
| 13 |
-
|
| 14 |
st.set_page_config(
|
| 15 |
page_title="Ligand Disovery 2: Explore Protein-sets",
|
| 16 |
page_icon=":home:",
|
|
@@ -112,38 +111,34 @@ options = sorted([x for k,v in pid2name_primary.items() for x in [k,v]])
|
|
| 112 |
|
| 113 |
# layout
|
| 114 |
|
| 115 |
-
st.title("Ligand Discovery 2: Explore Protein-sets")
|
| 116 |
-
st.write("We screened 407 fully-functionalized small molecule fragments ('Ligands') in HEK293t cells. For {0} of the Ligands, we found at least one protein enriched. In total, we enriched {1} proteins at least once. Query your protein sets of interest and explore them in light of our dataset!".format(len(fid2pid), len(pid2fid)))
|
| 117 |
-
|
| 118 |
-
cols = st.columns([1,1,2])
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
manual_input = col.multiselect(label="Input proteins manually", options = [""] + sorted(options), default=[], help="Select proteins by UniProt Accession code or Gene Symbol")
|
| 123 |
user_pids = {}
|
| 124 |
user_input = []
|
| 125 |
for i in manual_input:
|
| 126 |
user_pids[i] = any2pid[i]
|
| 127 |
user_input += [i]
|
| 128 |
|
| 129 |
-
|
| 130 |
|
| 131 |
fids = sorted(set(db["FragID"]))
|
| 132 |
-
fid_input =
|
| 133 |
if fid_input != "":
|
| 134 |
user_input = fid2pid[fid_input]
|
| 135 |
user_pids = dict((r,r) for r in user_input)
|
| 136 |
|
| 137 |
-
|
| 138 |
|
| 139 |
example_file = db
|
| 140 |
-
file_input =
|
| 141 |
if file_input:
|
| 142 |
user_input = list(pd.read_csv(file_input, header=None)[0])
|
| 143 |
for i in user_input:
|
| 144 |
user_pids[i] = any2pid[i]
|
| 145 |
|
| 146 |
-
|
| 147 |
|
| 148 |
# checks
|
| 149 |
|
|
@@ -157,7 +152,7 @@ if not file_input:
|
|
| 157 |
file_input = None
|
| 158 |
|
| 159 |
if not manual_input and not file_input and not fid_input:
|
| 160 |
-
st.info("Use any of the options above to explore a protein profile...")
|
| 161 |
query_is_available = False
|
| 162 |
else:
|
| 163 |
c = 0
|
|
@@ -165,7 +160,7 @@ else:
|
|
| 165 |
if x is not None:
|
| 166 |
c += 1
|
| 167 |
if c > 1:
|
| 168 |
-
st.error("More than one input type has been provided! Please only choose one of the options, i.e. input proteins manually, or select a pre-screened Ligand, or upload a file. Refresh this window to get started again.")
|
| 169 |
query_is_available = False
|
| 170 |
else:
|
| 171 |
query_is_available = True
|
|
@@ -176,13 +171,12 @@ def serialize_s(cat, r):
|
|
| 176 |
return s
|
| 177 |
|
| 178 |
if query_is_available:
|
| 179 |
-
columns = st.columns([
|
| 180 |
|
| 181 |
done = set()
|
| 182 |
|
| 183 |
col = columns[0]
|
| 184 |
cat_name = "Frequently enriched"
|
| 185 |
-
col.subheader(cat_name)
|
| 186 |
S = []
|
| 187 |
R = []
|
| 188 |
for r in user_input:
|
|
@@ -192,12 +186,11 @@ if query_is_available:
|
|
| 192 |
S += [serialize_s(cat_name, R[-1])]
|
| 193 |
done.update([r])
|
| 194 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 195 |
-
col.
|
| 196 |
col.dataframe(df, use_container_width=True)
|
| 197 |
|
| 198 |
col = columns[1]
|
| 199 |
cat_name = "Medium specificity"
|
| 200 |
-
col.subheader(cat_name)
|
| 201 |
R = []
|
| 202 |
for r in user_input:
|
| 203 |
pid = user_pids[r]
|
|
@@ -206,12 +199,14 @@ if query_is_available:
|
|
| 206 |
S += [serialize_s(cat_name, R[-1])]
|
| 207 |
done.update([r])
|
| 208 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 209 |
-
col.
|
| 210 |
col.dataframe(df, use_container_width=True)
|
| 211 |
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
| 213 |
cat_name = "High specificity"
|
| 214 |
-
col.subheader(cat_name)
|
| 215 |
R = []
|
| 216 |
for r in user_input:
|
| 217 |
pid = user_pids[r]
|
|
@@ -220,12 +215,11 @@ if query_is_available:
|
|
| 220 |
S += [serialize_s(cat_name, R[-1])]
|
| 221 |
done.update([r])
|
| 222 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 223 |
-
col.
|
| 224 |
col.dataframe(df, use_container_width=True)
|
| 225 |
|
| 226 |
-
col = columns[
|
| 227 |
cat_name = "Never enriched"
|
| 228 |
-
col.subheader(cat_name)
|
| 229 |
R = []
|
| 230 |
for r in user_input:
|
| 231 |
if r in done:
|
|
@@ -236,12 +230,11 @@ if query_is_available:
|
|
| 236 |
S += [serialize_s(cat_name, R[-1])]
|
| 237 |
done.update([r])
|
| 238 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 239 |
-
col.
|
| 240 |
col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
|
| 241 |
|
| 242 |
-
col = columns[
|
| 243 |
cat_name = "Not in HEK293t"
|
| 244 |
-
col.subheader(cat_name)
|
| 245 |
R = []
|
| 246 |
for r in user_input:
|
| 247 |
if r in done:
|
|
@@ -252,10 +245,10 @@ if query_is_available:
|
|
| 252 |
R += [[pid, pid2name_primary[pid], len(pid2fid[pid]), fids_]]
|
| 253 |
S += [serialize_s(cat_name, R[-1])]
|
| 254 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 255 |
-
col.
|
| 256 |
col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
|
| 257 |
|
| 258 |
data = pd.DataFrame(S, columns = ["Category", "UniProt", "GeneName", "Hits", "Fragments"])
|
| 259 |
data = data.sort_values(by=["Hits", "GeneName", "Category"], ascending=[False, True, True]).reset_index(drop=True)
|
| 260 |
data = convert_df(data)
|
| 261 |
-
st.download_button(label="Download search results", data=data, file_name="ligand_discovery_search_results.csv", mime="text/csv")
|
|
|
|
| 10 |
FREQUENT_CUTOFF = 40
|
| 11 |
MEDIUM_CUTOFF = 10
|
| 12 |
|
|
|
|
| 13 |
st.set_page_config(
|
| 14 |
page_title="Ligand Disovery 2: Explore Protein-sets",
|
| 15 |
page_icon=":home:",
|
|
|
|
| 111 |
|
| 112 |
# layout
|
| 113 |
|
| 114 |
+
st.sidebar.title("Ligand Discovery 2: Explore Protein-sets")
|
| 115 |
+
st.sidebar.write("We screened 407 fully-functionalized small molecule fragments ('Ligands') in HEK293t cells. For {0} of the Ligands, we found at least one protein enriched. In total, we enriched {1} proteins at least once. Query your protein sets of interest and explore them in light of our dataset!".format(len(fid2pid), len(pid2fid)))
|
|
|
|
|
|
|
| 116 |
|
| 117 |
+
manual_input = st.sidebar.multiselect(label="Input proteins manually", options = [""] + sorted(options), default=[], help="Select proteins by UniProt Accession code or Gene Symbol")
|
|
|
|
|
|
|
| 118 |
user_pids = {}
|
| 119 |
user_input = []
|
| 120 |
for i in manual_input:
|
| 121 |
user_pids[i] = any2pid[i]
|
| 122 |
user_input += [i]
|
| 123 |
|
| 124 |
+
st.sidebar.subheader("OR")
|
| 125 |
|
| 126 |
fids = sorted(set(db["FragID"]))
|
| 127 |
+
fid_input = st.sidebar.selectbox(label="Select pre-screened Ligand by identifier", options = [""] + fids, help="Select an already profiled Ligand in our primary screening (page Interactions). Use the Ligand identifier (example, C001)")
|
| 128 |
if fid_input != "":
|
| 129 |
user_input = fid2pid[fid_input]
|
| 130 |
user_pids = dict((r,r) for r in user_input)
|
| 131 |
|
| 132 |
+
st.sidebar.subheader("OR")
|
| 133 |
|
| 134 |
example_file = db
|
| 135 |
+
file_input = st.sidebar.file_uploader(label="Upload a file", help="Provide a file containing one UniProt Accession code or Gene Symbol per row.")
|
| 136 |
if file_input:
|
| 137 |
user_input = list(pd.read_csv(file_input, header=None)[0])
|
| 138 |
for i in user_input:
|
| 139 |
user_pids[i] = any2pid[i]
|
| 140 |
|
| 141 |
+
st.sidebar.download_button(label="Download example file", data=convert_df_no_header(pd.DataFrame({"uniprot_ac": example_input})), file_name="protein_profile_example.csv", mime="text/csv")
|
| 142 |
|
| 143 |
# checks
|
| 144 |
|
|
|
|
| 152 |
file_input = None
|
| 153 |
|
| 154 |
if not manual_input and not file_input and not fid_input:
|
| 155 |
+
st.sidebar.info("Use any of the options above to explore a protein profile...")
|
| 156 |
query_is_available = False
|
| 157 |
else:
|
| 158 |
c = 0
|
|
|
|
| 160 |
if x is not None:
|
| 161 |
c += 1
|
| 162 |
if c > 1:
|
| 163 |
+
st.sidebar.error("More than one input type has been provided! Please only choose one of the options, i.e. input proteins manually, or select a pre-screened Ligand, or upload a file. Refresh this window to get started again.")
|
| 164 |
query_is_available = False
|
| 165 |
else:
|
| 166 |
query_is_available = True
|
|
|
|
| 171 |
return s
|
| 172 |
|
| 173 |
if query_is_available:
|
| 174 |
+
columns = st.columns([0.5, 0.5])
|
| 175 |
|
| 176 |
done = set()
|
| 177 |
|
| 178 |
col = columns[0]
|
| 179 |
cat_name = "Frequently enriched"
|
|
|
|
| 180 |
S = []
|
| 181 |
R = []
|
| 182 |
for r in user_input:
|
|
|
|
| 186 |
S += [serialize_s(cat_name, R[-1])]
|
| 187 |
done.update([r])
|
| 188 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 189 |
+
col.markdown("**{0} (Low specificity)** : {1}".format(cat_name, df.shape[0]))
|
| 190 |
col.dataframe(df, use_container_width=True)
|
| 191 |
|
| 192 |
col = columns[1]
|
| 193 |
cat_name = "Medium specificity"
|
|
|
|
| 194 |
R = []
|
| 195 |
for r in user_input:
|
| 196 |
pid = user_pids[r]
|
|
|
|
| 199 |
S += [serialize_s(cat_name, R[-1])]
|
| 200 |
done.update([r])
|
| 201 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 202 |
+
col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
|
| 203 |
col.dataframe(df, use_container_width=True)
|
| 204 |
|
| 205 |
+
st.divider()
|
| 206 |
+
columns = st.columns([0.5, 0.25, 0.25])
|
| 207 |
+
|
| 208 |
+
col = columns[0]
|
| 209 |
cat_name = "High specificity"
|
|
|
|
| 210 |
R = []
|
| 211 |
for r in user_input:
|
| 212 |
pid = user_pids[r]
|
|
|
|
| 215 |
S += [serialize_s(cat_name, R[-1])]
|
| 216 |
done.update([r])
|
| 217 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 218 |
+
col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
|
| 219 |
col.dataframe(df, use_container_width=True)
|
| 220 |
|
| 221 |
+
col = columns[1]
|
| 222 |
cat_name = "Never enriched"
|
|
|
|
| 223 |
R = []
|
| 224 |
for r in user_input:
|
| 225 |
if r in done:
|
|
|
|
| 230 |
S += [serialize_s(cat_name, R[-1])]
|
| 231 |
done.update([r])
|
| 232 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 233 |
+
col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
|
| 234 |
col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
|
| 235 |
|
| 236 |
+
col = columns[2]
|
| 237 |
cat_name = "Not in HEK293t"
|
|
|
|
| 238 |
R = []
|
| 239 |
for r in user_input:
|
| 240 |
if r in done:
|
|
|
|
| 245 |
R += [[pid, pid2name_primary[pid], len(pid2fid[pid]), fids_]]
|
| 246 |
S += [serialize_s(cat_name, R[-1])]
|
| 247 |
df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
|
| 248 |
+
col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
|
| 249 |
col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
|
| 250 |
|
| 251 |
data = pd.DataFrame(S, columns = ["Category", "UniProt", "GeneName", "Hits", "Fragments"])
|
| 252 |
data = data.sort_values(by=["Hits", "GeneName", "Category"], ascending=[False, True, True]).reset_index(drop=True)
|
| 253 |
data = convert_df(data)
|
| 254 |
+
st.download_button(label="Download search results", data=data, file_name="ligand_discovery_search_results.csv", mime="text/csv")
|