clusternea / app.py
hkayabilisim's picture
Added custom marker color and y-axis (only for 1D)
73e2237
#%%
import solara
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from matplotlib.figure import Figure
from sklearn.decomposition import PCA
import plotly.express as px
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding, SpectralEmbedding, MDS
# Global non-reactive variablea
df_original = pd.read_csv('nea_051022.csv')
all_features = list(df_original.columns)
default_features = ['a','e','i','om','w','q','ad','H']
scalers = ['none','minmax','standard']
training_ratio_default = 5
# Global reactive variables
drop_nans = solara.reactive(True)
features = solara.reactive(default_features)
feature_maps = {f: solara.reactive('x') for f in all_features}
scaler = solara.reactive('none')
training_ratio = solara.reactive(training_ratio_default)
info_message = solara.reactive("")
df = solara.reactive(df_original[features.value].copy)
@solara.component
def process_data():
df_out = df_original[features.value].copy()
df_out = df_out.sample(frac=training_ratio.value/100.0, random_state=42)
df_out = df_out.dropna() if drop_nans.value else df_out
info_string = ""
for f in features.value:
x = df_out[f].to_numpy()
y = eval(feature_maps[f].value)
df_out.loc[:,f] = y
if scaler.value != 'none':
if scaler.value == 'minmax':
transformer = MinMaxScaler()
elif scaler.value == 'standard':
transformer = StandardScaler()
else:
transformer = StandardScaler()
try:
transformer = transformer.fit(df_out)
df_out = pd.DataFrame(transformer.transform(df_out), columns=df_out.columns, index=df_out.index)
except Exception as e:
df_out = None
info_string = str(e)
df.set(df_out)
solara.DataFrame(df_out, items_per_page=5)
def reset():
features.value = default_features
drop_nans.set(True)
training_ratio.set(training_ratio_default)
for f in all_features:
feature_maps[f].set('x')
scaler.set('none')
@solara.component
def InfoBox():
if len(info_message.value) > 0:
solara.Error(info_message.value)
@solara.component
def BoxPlot():
if df.value is None or len(df.value) == 0:
return
fig = Figure()
ax = fig.subplots()
df.value.plot.box(figsize=(20,5), ax=ax)
ymin = min(df.value.min(numeric_only=True))
ymax = max(df.value.max(numeric_only=True))
yrange = solara.use_reactive((ymin, ymax))
ax.set_ylim(yrange.value[0], yrange.value[1])
solara.SliderRangeFloat("yrange min and max", value=yrange,min=ymin, max=ymax)
solara.FigureMatplotlib(fig)
solara.Markdown(f'{yrange}')
@solara.component
def EmbeddingPlot():
embedding_method, set_embedding_method = solara.use_state('PCA')
n_components, set_n_component = solara.use_state(2)
n_neighbors, set_n_neighbors = solara.use_state(5)
lle_methods = ['standard', 'hessian', 'modified', 'ltsa']
lle_method, set_lle_method = solara.use_state('standard')
eigen_solver, set_eigen_solver = solara.use_state('auto')
eigen_solver_se, set_eigen_solver_se = solara.use_state(None)
marker_color, set_marker_color = solara.use_state('pha')
y_axis, set_y_axis = solara.use_state('moid')
with solara.Row():
solara.ToggleButtonsSingle(value=embedding_method, values=['PCA','TSNE','ISOMAP','LLE','SE','MDS'], dense=True, on_value=set_embedding_method)
solara.Select(label="Embedding Dimension (All)", value=n_components, values=[1,2,3], on_value=set_n_component)
solara.Select(label="LLE Method (LLE)", value=lle_method, values=lle_methods, on_value=set_lle_method)
solara.Select(label="Number of Neighbors (SE, LLE, Isomap)", value=n_neighbors, values=[1,2,3,4,5,6,7,8,9,10], on_value=set_n_neighbors)
solara.Select(label='eigen_solver (LLE, Isomap)', value=eigen_solver, values=['auto', 'arpack', 'dense'], on_value=set_eigen_solver)
solara.Select(label='eigen_solver (SE)', value=eigen_solver_se, values=[None, 'arpack', 'lobpcg', 'amg'], on_value=set_eigen_solver_se)
if embedding_method == 'PCA':
embedder = PCA(n_components=n_components)
elif embedding_method == 'SE':
embedder = SpectralEmbedding(n_components=n_components,
n_neighbors=n_neighbors,
eigen_solver=eigen_solver_se)
elif embedding_method == 'TSNE':
embedder = TSNE(n_components=n_components)
elif embedding_method == 'ISOMAP':
embedder = Isomap(n_components=n_components,
n_neighbors=n_neighbors,
eigen_solver=eigen_solver)
elif embedding_method == 'LLE':
embedder = LocallyLinearEmbedding(n_components=n_components,
n_neighbors=n_neighbors,
method=lle_method,
eigen_solver=eigen_solver)
elif embedding_method == 'MDS':
embedder = MDS(n_components=n_components, normalized_stress='auto')
try:
data_embedded = embedder.fit_transform(df.value)
df_embedded = pd.DataFrame(data_embedded)
df_embedded['pha'] = list(df_original.loc[df.value.index,'pha'])
df_embedded['moid'] = list(df_original.loc[df.value.index,'moid'])
df_embedded['H'] = list(df_original.loc[df.value.index,'H'])
df_embedded['pdes'] = list(df_original.loc[df.value.index,'pdes'])
df_embedded['a'] = list(df_original.loc[df.value.index,'a'])
df_embedded['e'] = list(df_original.loc[df.value.index,'e'])
df_embedded['i'] = list(df_original.loc[df.value.index,'i'])
df_embedded['om'] = list(df_original.loc[df.value.index,'om'])
df_embedded['w'] = list(df_original.loc[df.value.index,'w'])
df_embedded_clean = df_embedded.dropna()
if n_components == 1:
fig = px.scatter(df_embedded, x=0, y=y_axis, color=marker_color,
hover_name='pdes',width=1024, height=768)
elif n_components == 2:
fig = px.scatter(df_embedded_clean, x=0,y=1,color=marker_color,
hover_name='pdes',width=1024, height=768)
elif n_components == 3:
fig = px.scatter_3d(df_embedded, x=0,y=1,z=2,color=marker_color,
hover_name='pdes',width=1024, height=768)
with solara.Row():
with solara.Column():
solara.Select(label='Marker Color',
value=marker_color,
values=[None,'pha','moid','H','a','e','i','om','w'],
on_value=set_marker_color)
solara.Select(label='Y-Axis (only in 1D plot)',
value=y_axis,
values=['moid','H','a','e','i','om','w'],
on_value=set_y_axis)
solara.FigurePlotly(fig)
except Exception as e:
solara.Error(str(e))
@solara.component
def Page():
with solara.Card(title="Original Raw Data"):
solara.DataFrame(df_original, items_per_page=5)
with solara.Card(title="Transformed Data"):
process_data() # features, feature_maps, drop_nans, scaler, training_ratio)
solara.Button(label='Reset', on_click=reset)
solara.ToggleButtonsMultiple(features, all_features)
with solara.GridFixed(columns=10):
for f in features.value:
solara.InputText(label=f, value=feature_maps[f], on_value=lambda text : feature_maps[f].set(text))
with solara.Row():
solara.Checkbox(label='Drop Nans', value=drop_nans, on_value=lambda value : drop_nans.set(value))
solara.ToggleButtonsSingle(value=scaler, values=scalers)
solara.SliderInt(label='Training Ratio %', value=training_ratio, min=1, max=99)
InfoBox()
EmbeddingPlot()
BoxPlot()
Page()