Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| from datasets import load_dataset | |
| import matplotlib.pyplot as plt | |
| from scipy.interpolate import interp1d | |
| from shiny import render | |
| from shiny.express import input, output, ui | |
| # from utils import ( | |
| # generate_2d_sequence, | |
| # plot_seq_full_label | |
| # ) | |
| import os | |
| import matplotlib as mpl | |
| import seaborn as sns | |
| mpl.rcParams.update(mpl.rcParamsDefault) | |
| ds = load_dataset('Hack90/virus_tiny', keep_in_memory = True, cache_dir = None) | |
| df_virus = pd.DataFrame(ds['train']) | |
| def shannon_entropy(seq): | |
| seq=re.sub("[^ATCG]","",seq) | |
| seq = seq.replace('A', 'T') | |
| seq = seq.replace('G', 'C') | |
| p = seq.count('T') / len(seq) | |
| e = 8.69 - 8.31 | |
| c_h = ((-p * math.log(p)) - (1-p)* math.log(1-p)) * math.log((1-p)/p) | |
| c_h = c_h /e | |
| seq=seq.replace('T', '5 ') | |
| seq=seq.replace('C', '4 ') | |
| seq = np.array(seq.split()).astype(int) | |
| shann = -sum((p*math.log(p), ((1-p)*math.log(1-p)))) | |
| shann = shann/2 | |
| return c_h , shann | |
| ui.page_opts(fillable=True) | |
| with ui.navset_card_tab(id="tab"): | |
| with ui.nav_panel("Species View"): | |
| ui.panel_title("What is the distribution of complexity across viral species?") | |
| with ui.card(): | |
| ui.input_slider("sample", "samples", 0, len(df_virus), 40) | |
| def plot_loss_rates(df,samples): | |
| complexity = [] | |
| for k in range(len(df.iloc[:samples])): | |
| complexity.append(shannon_entropy(df['sequence'].iloc[k])) | |
| df_nana = pd.DataFrame(complexity) | |
| df_nana['x'] = df_nana[1] * 2 | |
| df_nana['y'] = df_nana[0] | |
| # fig, ax = plt.subplots() | |
| # Create a figure and axis | |
| fig, ax = plt.subplots() | |
| # Create the scatter plot | |
| scatter = ax.scatter(df_nana['x'], df_nana['y'], s=0.5) | |
| # Add a colorbar | |
| cbar = fig.colorbar(scatter, ax=ax) | |
| cbar.set_label('Label') | |
| # Set labels and title | |
| # ax.set_xlabel('X') | |
| # ax.set_ylabel('Y') | |
| # ax.set_title(f"Loss ra") | |
| # ax.set_xlabel("Training steps") | |
| # ax.set_ylabel("Loss rate") | |
| return fig | |
| def plot_context_size_scaling(): | |
| fig = plot_loss_rates(df_virus,input.sample() ) | |
| if fig: | |
| return fig | |
| # with ui.nav_panel("Histone Modification"): | |
| # ui.panel_title("Is there a pattern to histone modification?") | |
| # with ui.layout_columns(): | |
| # with ui.card(): | |
| # ui.input_slider("sample_histone", "sample", 0, df_histone_len, 40) | |
| # def plot_histone(df,sample): | |
| # y_values = generate_2d_sequence(df['seq'].iloc[sample])[0] | |
| # x_values = generate_2d_sequence(df['seq'].iloc[sample])[1] | |
| # integers = str((np.argwhere(df['labels'][sample] == np.amax(df['labels'][sample]))).flatten().tolist()) | |
| # # Create a DataFrame with the x values, y values, and integers | |
| # data = {'x': x_values, 'y': y_values, 'color': integers} | |
| # fig, ax = plt.subplots() | |
| # sns.scatterplot(x='x', y='y', hue='color', data=data, palette='viridis', ax=ax) | |
| # ax.legend() | |
| # # ax.set_title(f"Loss ra") | |
| # # ax.set_xlabel("Training steps") | |
| # # ax.set_ylabel("Loss rate") | |
| # return fig | |
| # @render.plot() | |
| # def plot_histones_two(): | |
| # fig = plot_histone(df_histone,input.sample_histone() ) | |
| # if fig: | |
| # return fig | |
| # with ui.nav_panel("Enhancer Annontations"): | |
| # ui.panel_title("Is there a pattern to enhancer annotations?") | |
| # with ui.layout_columns(): | |
| # with ui.card(): | |
| # ui.input_slider("sample_enhancer", "sample", 0, df_enhancer_annotation_len, 40) | |
| # @render.plot() | |
| # def plot_enhancer(): | |
| # fig = plot_loss_rates(df_enhancer_annotation,input.sample_enhancer() , True) | |
| # if fig: | |
| # return fig | |