Spaces:

ligdis
/

2

Running

App Files Files Community

ligdis commited on Mar 5, 2025

Commit

37b61da

verified ·

1 Parent(s): 647aa0d

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -30

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ root = os.path.dirname(os.path.abspath(__file__))
 FREQUENT_CUTOFF = 40
 MEDIUM_CUTOFF = 10
 st.set_page_config(
     page_title="Ligand Disovery 2: Explore Protein-sets",
     page_icon=":home:",
@@ -112,38 +111,34 @@ options = sorted([x for k,v in pid2name_primary.items() for x in [k,v]])
 # layout
-st.title("Ligand Discovery 2: Explore Protein-sets")
-st.write("We screened 407 fully-functionalized small molecule fragments ('Ligands') in HEK293t cells. For {0} of the Ligands, we found at least one protein enriched. In total, we enriched {1} proteins at least once. Query your protein sets of interest and explore them in light of our dataset!".format(len(fid2pid), len(pid2fid)))
-cols = st.columns([1,1,2])
-col = cols[0]
-manual_input = col.multiselect(label="Input proteins manually", options = [""] + sorted(options), default=[], help="Select proteins by UniProt Accession code or Gene Symbol")
 user_pids = {}
 user_input = []
 for i in manual_input:
     user_pids[i] = any2pid[i]
     user_input += [i]
-col = cols[1]
 fids = sorted(set(db["FragID"]))
-fid_input = col.selectbox(label="Select pre-screened Ligand by identifier", options = [""] + fids, help="Select an already profiled Ligand in our primary screening (page Interactions). Use the Ligand identifier (example, C001)")
 if fid_input != "":
     user_input = fid2pid[fid_input]
     user_pids = dict((r,r) for r in user_input)
-col = cols[2]
 example_file = db
-file_input = col.file_uploader(label="Upload a file", help="Provide a file containing one UniProt Accession code or Gene Symbol per row.")
 if file_input:
     user_input = list(pd.read_csv(file_input, header=None)[0])
     for i in user_input:
         user_pids[i] = any2pid[i]
-col.download_button(label="Download example file", data=convert_df_no_header(pd.DataFrame({"uniprot_ac": example_input})), file_name="protein_profile_example.csv", mime="text/csv")
 # checks
@@ -157,7 +152,7 @@ if not file_input:
     file_input = None
 if not manual_input and not file_input and not fid_input:
-    st.info("Use any of the options above to explore a protein profile...")
     query_is_available = False
 else:
     c = 0
@@ -165,7 +160,7 @@ else:
         if x is not None:
             c += 1
     if c > 1:
-        st.error("More than one input type has been provided! Please only choose one of the options, i.e. input proteins manually, or select a pre-screened Ligand, or upload a file. Refresh this window to get started again.")
         query_is_available = False
     else:
         query_is_available = True
@@ -176,13 +171,12 @@ def serialize_s(cat, r):
     return s
 if query_is_available:
-    columns = st.columns([2, 2, 2, 1, 1])
     done = set()
     col = columns[0]
     cat_name = "Frequently enriched"
-    col.subheader(cat_name)
     S = []
     R = []
     for r in user_input:
@@ -192,12 +186,11 @@ if query_is_available:
             S += [serialize_s(cat_name, R[-1])]
             done.update([r])
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
-    col.metric(label="Counts", value=df.shape[0])
     col.dataframe(df, use_container_width=True)
     col = columns[1]
     cat_name = "Medium specificity"
-    col.subheader(cat_name)
     R = []
     for r in user_input:
         pid = user_pids[r]
@@ -206,12 +199,14 @@ if query_is_available:
             S += [serialize_s(cat_name, R[-1])]
             done.update([r])
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
-    col.metric(label="Counts", value=df.shape[0])
     col.dataframe(df, use_container_width=True)
-    col = columns[2]
     cat_name = "High specificity"
-    col.subheader(cat_name)
     R = []
     for r in user_input:
         pid = user_pids[r]
@@ -220,12 +215,11 @@ if query_is_available:
             S += [serialize_s(cat_name, R[-1])]
             done.update([r])
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
-    col.metric(label="Counts", value=df.shape[0])
     col.dataframe(df, use_container_width=True)
-    col = columns[3]
     cat_name = "Never enriched"
-    col.subheader(cat_name)
     R = []
     for r in user_input:
         if r in done:
@@ -236,12 +230,11 @@ if query_is_available:
             S += [serialize_s(cat_name, R[-1])]
             done.update([r])
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
-    col.metric(label="Counts", value=df.shape[0])
     col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
-    col = columns[4]
     cat_name = "Not in HEK293t"
-    col.subheader(cat_name)
     R = []
     for r in user_input:
         if r in done:
@@ -252,10 +245,10 @@ if query_is_available:
             R += [[pid, pid2name_primary[pid], len(pid2fid[pid]), fids_]]
             S += [serialize_s(cat_name, R[-1])]
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
-    col.metric(label="Counts", value=df.shape[0])
     col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
     data = pd.DataFrame(S, columns = ["Category", "UniProt", "GeneName", "Hits", "Fragments"])
     data = data.sort_values(by=["Hits", "GeneName", "Category"], ascending=[False, True, True]).reset_index(drop=True)
     data = convert_df(data)
-    st.download_button(label="Download search results", data=data, file_name="ligand_discovery_search_results.csv", mime="text/csv")

 FREQUENT_CUTOFF = 40
 MEDIUM_CUTOFF = 10
 st.set_page_config(
     page_title="Ligand Disovery 2: Explore Protein-sets",
     page_icon=":home:",
 # layout
+st.sidebar.title("Ligand Discovery 2: Explore Protein-sets")
+st.sidebar.write("We screened 407 fully-functionalized small molecule fragments ('Ligands') in HEK293t cells. For {0} of the Ligands, we found at least one protein enriched. In total, we enriched {1} proteins at least once. Query your protein sets of interest and explore them in light of our dataset!".format(len(fid2pid), len(pid2fid)))
+manual_input = st.sidebar.multiselect(label="Input proteins manually", options = [""] + sorted(options), default=[], help="Select proteins by UniProt Accession code or Gene Symbol")
 user_pids = {}
 user_input = []
 for i in manual_input:
     user_pids[i] = any2pid[i]
     user_input += [i]
+st.sidebar.subheader("OR")
 fids = sorted(set(db["FragID"]))
+fid_input = st.sidebar.selectbox(label="Select pre-screened Ligand by identifier", options = [""] + fids, help="Select an already profiled Ligand in our primary screening (page Interactions). Use the Ligand identifier (example, C001)")
 if fid_input != "":
     user_input = fid2pid[fid_input]
     user_pids = dict((r,r) for r in user_input)
+st.sidebar.subheader("OR")
 example_file = db
+file_input = st.sidebar.file_uploader(label="Upload a file", help="Provide a file containing one UniProt Accession code or Gene Symbol per row.")
 if file_input:
     user_input = list(pd.read_csv(file_input, header=None)[0])
     for i in user_input:
         user_pids[i] = any2pid[i]
+st.sidebar.download_button(label="Download example file", data=convert_df_no_header(pd.DataFrame({"uniprot_ac": example_input})), file_name="protein_profile_example.csv", mime="text/csv")
 # checks
     file_input = None
 if not manual_input and not file_input and not fid_input:
+    st.sidebar.info("Use any of the options above to explore a protein profile...")
     query_is_available = False
 else:
     c = 0
         if x is not None:
             c += 1
     if c > 1:
+        st.sidebar.error("More than one input type has been provided! Please only choose one of the options, i.e. input proteins manually, or select a pre-screened Ligand, or upload a file. Refresh this window to get started again.")
         query_is_available = False
     else:
         query_is_available = True
     return s
 if query_is_available:
+    columns = st.columns([0.5, 0.5])
     done = set()
     col = columns[0]
     cat_name = "Frequently enriched"
     S = []
     R = []
     for r in user_input:
             S += [serialize_s(cat_name, R[-1])]
             done.update([r])
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
+    col.markdown("**{0} (Low specificity)** : {1}".format(cat_name, df.shape[0]))
     col.dataframe(df, use_container_width=True)
     col = columns[1]
     cat_name = "Medium specificity"
     R = []
     for r in user_input:
         pid = user_pids[r]
             S += [serialize_s(cat_name, R[-1])]
             done.update([r])
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
+    col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
     col.dataframe(df, use_container_width=True)
+    st.divider()
+    columns = st.columns([0.5, 0.25, 0.25])
+    col = columns[0]
     cat_name = "High specificity"
     R = []
     for r in user_input:
         pid = user_pids[r]
             S += [serialize_s(cat_name, R[-1])]
             done.update([r])
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
+    col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
     col.dataframe(df, use_container_width=True)
+    col = columns[1]
     cat_name = "Never enriched"
     R = []
     for r in user_input:
         if r in done:
             S += [serialize_s(cat_name, R[-1])]
             done.update([r])
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
+    col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
     col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
+    col = columns[2]
     cat_name = "Not in HEK293t"
     R = []
     for r in user_input:
         if r in done:
             R += [[pid, pid2name_primary[pid], len(pid2fid[pid]), fids_]]
             S += [serialize_s(cat_name, R[-1])]
     df = pd.DataFrame(R, columns=["UniProt", "GeneName", "Hits", "Fragments"])
+    col.markdown("**{0}** : {1}".format(cat_name, df.shape[0]))
     col.dataframe(df[["UniProt", "GeneName"]], use_container_width=True)
     data = pd.DataFrame(S, columns = ["Category", "UniProt", "GeneName", "Hits", "Fragments"])
     data = data.sort_values(by=["Hits", "GeneName", "Category"], ascending=[False, True, True]).reset_index(drop=True)
     data = convert_df(data)
+    st.download_button(label="Download search results", data=data, file_name="ligand_discovery_search_results.csv", mime="text/csv")