Spaces:

maomlab
/

CryptoCEN-ExpressionScatter

Build error

App Files Files Community

maom commited on Jan 28, 2024

Commit

57a3fc8

verified ·

1 Parent(s): f23e50d

initial chart

Browse files

Files changed (1) hide show

app.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import datasets
+import streamlit as st
+import numpy as np
+import altair as alt
+st.set_page_config(layout='wide')
+st.markdown("""
+# CryptoCEN Expression Scatter
+**CryptoCEN** is a co-expression network for *Cryptococcus neoformans* built on 1,524 RNA-seq runs across 34 studies.
+A pair of genes are said to be co-expressed when their expression is correlated across different conditions and
+is often a marker for genes to be involved in similar processes.
+To Cite:
+MJ O'Meara, JR Rapala, CB Nichols, C Alexandre, B Billmyre, JL Steenwyk, A Alspaugh,
+TR O'Meara CryptoCEN: A Co-Expression Network for Cryptococcus neoformans reveals
+novel proteins involved in DNA damage repair
+* Code available at https://github.com/maomlab/CalCEN/tree/master/vignettes/CryptoCEN
+* Full network and dataset: https://huggingface.co/datasets/maomlab/CryptoCEN
+## Look up top-coexpressed partners:
+Put in the ``CNAG_#####`` gene_id for a gene and expand the table to get the top 50 co-expressed genes.
+``coexp_score`` ranges between ``[0-1]``, where ``1`` is the best and greater than ``0.85`` can be considered significant.
+""")
+estimated_expression = datasets.load_dataset(
+    path = "maomlab/CryptoCEN",
+    data_files = {"estimated_expression": "estimated_expression.tsv"})
+estimated_expression = estimated_expression["estimated_expression"].to_pandas()
+estimated_expression_meta = datasets.load_dataset(
+    path = "maomlab/CryptoCEN/Data",
+    data_files = {"estimated_expression_meta": "estimated_expression_meta.tsv"})
+estimated_expression_meta = estimated_expression_meta["estimated_expression_meta"].to_pandas()
+col1, col2, col3 = st.columns(spec = [0.3, 0.2, 0.5])
+with col1:
+    gene_id_1 = st.text_input(
+        label = "Gene ID 1",
+        value = "CNAG_04365",
+        max_chars = 10,
+        help = "CNAG Gene ID e.g. CNAG_04365")
+with col2:
+    gene_id_2 = st.text_input(
+        label = "Gene ID 2",
+        value = "CNAG_04222",
+        max_chars = 10,
+        help = "CNAG Gene ID e.g. CNAG_04222")
+chart_data = pd.DataFrame({
+    "expression_1": estimated_expression[estimated_expression.index == gene_id_1,],
+    "expression_2": estimated_expression[estimated_expression.index == gene_id_2,],
+    "run_accession": estimated_expression.columns,
+    "run_accession_meta": estimated_expression_meta["run_accession"],
+    "study_accession": estimated_expression_meta["study_accession"])
+print(f"run_ids are equal: {sum(chart_data["run_accession"] == chart_data["run_accession_meta"])}")
+chart = (
+   alt.Chart(chart_data)
+   .mark_circle()
+   .encode(x="expression_1", y="expression_2", size=5, color="study_accession", tooltip=["run_accession", "study_accession"]))
+st.altair_chart(chart, use_container_width=True)