import numpy as np
import pandas as pd
import datasets
import streamlit as st
from streamlit_cytoscapejs import st_cytoscapejs
import networkx as nx

st.set_page_config(layout='wide')

# parse out gene_ids from URL query args to it's possible to link to this page
query_params = st.query_params
if "gene_ids" in query_params.keys():
    input_gene_ids = query_params["gene_ids"]
else:
    input_gene_ids = "B9J08_000884,B9J08_004112"
    
# use "\n" as the separator so it shows correctly in the text area
input_gene_ids = input_gene_ids.replace(",", "\n")

if "coexp_score_threshold" in query_params.keys():
    coexp_score_threshold = query_params["coexp_score_threshold"]        
else:
    coexp_score_threshold = "0.85"

if "max_per_gene" in query_params.keys():
    max_per_gene = query_params["max_per_gene"]        
else:
    max_per_gene = "25"

st.markdown("""
# CaurisCEN Network
**CaurisCEN** is a co-expression network for *Candida auris* built on 577 RNA-seq runs across 2 96-well plates formats in 3 biological replicas.
A pair of genes are said to be co-expressed when their expression is correlated across different conditions and
is often a marker for genes to be involved in similar processes. 
To Cite:

    Rapala JR, MJ O'Meara, TR O'Meara
    CaurisCEN: A Co-Expression Network for Candida auris

* Code available at https://github.com/maomlab/CalCEN/tree/master/vignettes/CaurisCEN
* Full network and dataset: https://huggingface.co/datasets/maomlab/CaurisCEN

## Plot a network for a set of genes
Put a ``B9J08_######`` gene_id, one each row to seed the network
""")

gene_metadata = datasets.load_dataset(
    path = "maomlab/CaurisCEN",
    name = "gene_metadata",
    data_dir = "gene_metadata/data")['train'].to_pandas()

top_coexp_hits = datasets.load_dataset(
    path = "maomlab/CaurisCEN",
    name = "top_coexp_hits_general",
    data_dir = "top_coexp_hits_general/data")['train'].to_pandas()


col1, col2, col3, padding = st.columns(spec = [0.2, 0.2, 0.2, 0.4])
with col1:
    input_gene_ids = st.text_area(
        label = "Gene IDs",
        value = f"{input_gene_ids}",
        height = 130,
        help = "B9J08 Gene IDs e.g. B9J08_000884")


with col2:
    coexp_score_threshold = st.text_input(
        label = "Co-expression threshold [0-1]",
        value = f"{coexp_score_threshold}",
        help = "Default: 0.85")

    try:
        coexp_score_threshold = float(coexp_score_threshold)
    except:
        st.error(f"Co-expression threshold should be a number between 0 and 1, instead it is '{coexp_score_threshold}'")
    if coexp_score_threshold < 0 or 1 < coexp_score_threshold:
        st.error(f"Co-expression threshold should be a number between 0 and 1, instead it is '{coexp_score_threshold}'")

    max_per_gene = st.text_input(
        label = "Max per gene",
        value = f"{max_per_gene}",
        help = "Default: 25")

    try:
        max_per_gene = int(max_per_gene)
    except:
        st.error(f"Max per gene should be a number greater than 0, instead it is '{max_per_gene}'")
    if max_per_gene <= 0:
        st.error(f"Max per gene should be a number greater than 0, instead it is '{max_per_gene}'")
    
    
##################################
# Parse and check the user input #
##################################

seed_gene_ids = []
for input_gene_id in input_gene_ids.split("\n"):
    gene_id = input_gene_id.strip()
    if gene_id == "":
        continue
    else:
        seed_gene_ids.append(gene_id)

neighbors = []
for seed_gene_id in seed_gene_ids:
    hits = top_coexp_hits[
            (top_coexp_hits.feature_name_1 == seed_gene_id) & (top_coexp_hits.score > coexp_score_threshold)]
    if len(hits.index) > max_per_gene:
        hits = hits[0:max_per_gene]
    neighbors.append(hits)

neighbors = pd.concat(neighbors)
    
neighbor_gene_ids = list(set(neighbors.feature_name_2))
gene_ids = seed_gene_ids + neighbor_gene_ids
gene_types = ['seed'] * len(seed_gene_ids) + ['neighbor'] * len(neighbor_gene_ids)

old_locus_tags = []
gene_names = []
sacch_orthologs = []
descriptions = []
    
for gene_id in gene_ids:
    try:
        locus_tag_old = gene_metadata.loc[gene_metadata["locus_tag_old"] == gene_id]["locus_tag_old"].values[0]
        gene_name = gene_metadata.loc[gene_metadata["locus_tag_old"] == gene_id]["gene_name"].values[0]
        sacch_ortholog = gene_metadata.loc[gene_metadata["locus_tag_old"] == gene_id]["sacch_ortholog"].values[0]
        description = gene_metadata.loc[gene_metadata["locus_tag_old"] == gene_id]["description"].values[0]
    except:
        st.error(f"Unable to locate locus_tag_new for Gene ID: {gene_id}, it should be of the form 'B9J08_#######'")
        gene_id = None
        gene_names = None
        sacch_ortholog = None
        description = None

    old_locus_tags.append(locus_tag_old)
    gene_names.append(gene_name)
    sacch_orthologs.append(sacch_ortholog)
    descriptions.append(description)

print(f"""
Constructing node_info
  seed_gene_ids: {len(seed_gene_ids)},
  neighbor_gene_ids: {len(neighbor_gene_ids)},
  gene_index: {len(gene_ids)},
  locus_tag_old: {len(old_locus_tags)},
  gene_types: {len(gene_types)},
  gene_name: {len(gene_names)},
  sacc_ortholog: {len(sacch_orthologs)},
  descriptions: {len(descriptions)}
""")
node_info = pd.DataFrame({
    "gene_index": range(len(gene_ids)),
    "locus_tag_old" : old_locus_tags,
    "gene_type" : gene_types,
    "gene_name" : gene_names,
    "sacch_ortholog": sacch_orthologs,
    "description": descriptions
    })

neighbors = neighbors.merge(
    right = node_info,
    left_on = "feature_name_1",
    right_on = "locus_tag_old")

neighbors = neighbors.merge(
    right = node_info,
    left_on = "feature_name_2",
    right_on = "locus_tag_old",
    suffixes = ("_a", "_b"))

################################
# Use NetworkX to layout graph #
################################
# note I think CytoscapeJS can layout graphs
# but I'm unsure how to do it through the streamlit-cytoscapejs interface :(

st.write(neighbors)


G = nx.Graph()
for i in range(len(neighbors.index)):
    edge = neighbors.iloc[i]
    G.add_edge(
        edge["gene_index_a"],
        edge["gene_index_b"],
        weight = edge["score"])
layout = nx.spring_layout(G)


node_color_lut = {
    "seed" : "#4866F0",    # blue
    "neighbor" : "#F0C547" # gold
}

elements = []
singleton_index = 0
for i in range(len(node_info.index)):
    node = node_info.iloc[i]

    if node["gene_index"] in layout.keys():
        layout_x = layout[node["gene_index"]][0] * 600 + 1500/2
        layout_y = layout[node["gene_index"]][1] * 600 + 1500/2
    else:
        layout_x = (singleton_index % 8) * 150 + 100
        layout_y = np.floor(singleton_index / 8) * 50 + 30
        singleton_index += 1

    
    elements.append({
        "data": {
            "id": node["locus_tag_old"],
            "label": node["gene_name"] if node["gene_name"] is not None else node["locus_tag_old"],
            "color": node_color_lut[node["gene_type"]]},
        "position": {
            "x" : layout_x,
            "y" : layout_y}})

for i in range(len(neighbors.index)):
    edge = neighbors.iloc[i]
    elements.append({
        "data" : {
            "source" : edge["feature_name_1"],
            "target" : edge["feature_name_2"],
            "width" :
                20 if edge["score"] > 0.98 else
                15 if edge["score"] > 0.93 else
                10 if edge["score"] > 0.90 else
                8  if edge["score"] > 0.88 else
                5}})

with col3:
    st.text('') # help alignment with input box
    st.download_button(
        label = "Download as as TSV",
        data = neighbors.to_csv(sep ='\t').encode('utf-8'),
        file_name = f"CaurisCEN_network.tsv",
        mime = "text/csv")

##########################################################

stylesheet = [
    {"selector": "node", "style": {
        "width": 140,
        "height": 30,
        "shape": "rectangle",
        "label" : "data(label)",
        "labelFontSize": 100,
        'background-color': 'data(color)',
        "text-halign": "center",
        "text-valign": "center",
    }},
    {"selector": "edge", "style": {
        "width": "data(width)"
    }}
]

st.title("CaurisCEN Network")
clicked_elements = st_cytoscapejs(
    elements = elements,
    stylesheet = stylesheet,
    width = 1000,
    height= 1000,
    key = "1")