Spaces:
Sleeping
Sleeping
| #%% | |
| import solara | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import MinMaxScaler, StandardScaler | |
| from matplotlib.figure import Figure | |
| from sklearn.decomposition import PCA | |
| import plotly.express as px | |
| from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding, SpectralEmbedding, MDS | |
| # Global non-reactive variablea | |
| df_original = pd.read_csv('nea_051022.csv') | |
| all_features = list(df_original.columns) | |
| default_features = ['a','e','i','om','w','q','ad','H'] | |
| scalers = ['none','minmax','standard'] | |
| training_ratio_default = 5 | |
| # Global reactive variables | |
| drop_nans = solara.reactive(True) | |
| features = solara.reactive(default_features) | |
| feature_maps = {f: solara.reactive('x') for f in all_features} | |
| scaler = solara.reactive('none') | |
| training_ratio = solara.reactive(training_ratio_default) | |
| info_message = solara.reactive("") | |
| df = solara.reactive(df_original[features.value].copy) | |
| def process_data(): | |
| df_out = df_original[features.value].copy() | |
| df_out = df_out.sample(frac=training_ratio.value/100.0, random_state=42) | |
| df_out = df_out.dropna() if drop_nans.value else df_out | |
| info_string = "" | |
| for f in features.value: | |
| x = df_out[f].to_numpy() | |
| y = eval(feature_maps[f].value) | |
| df_out.loc[:,f] = y | |
| if scaler.value != 'none': | |
| if scaler.value == 'minmax': | |
| transformer = MinMaxScaler() | |
| elif scaler.value == 'standard': | |
| transformer = StandardScaler() | |
| else: | |
| transformer = StandardScaler() | |
| try: | |
| transformer = transformer.fit(df_out) | |
| df_out = pd.DataFrame(transformer.transform(df_out), columns=df_out.columns, index=df_out.index) | |
| except Exception as e: | |
| df_out = None | |
| info_string = str(e) | |
| df.set(df_out) | |
| solara.DataFrame(df_out, items_per_page=5) | |
| def reset(): | |
| features.value = default_features | |
| drop_nans.set(True) | |
| training_ratio.set(training_ratio_default) | |
| for f in all_features: | |
| feature_maps[f].set('x') | |
| scaler.set('none') | |
| def InfoBox(): | |
| if len(info_message.value) > 0: | |
| solara.Error(info_message.value) | |
| def BoxPlot(): | |
| if df.value is None or len(df.value) == 0: | |
| return | |
| fig = Figure() | |
| ax = fig.subplots() | |
| df.value.plot.box(figsize=(20,5), ax=ax) | |
| ymin = min(df.value.min(numeric_only=True)) | |
| ymax = max(df.value.max(numeric_only=True)) | |
| yrange = solara.use_reactive((ymin, ymax)) | |
| ax.set_ylim(yrange.value[0], yrange.value[1]) | |
| solara.SliderRangeFloat("yrange min and max", value=yrange,min=ymin, max=ymax) | |
| solara.FigureMatplotlib(fig) | |
| solara.Markdown(f'{yrange}') | |
| def EmbeddingPlot(): | |
| embedding_method, set_embedding_method = solara.use_state('PCA') | |
| n_components, set_n_component = solara.use_state(2) | |
| n_neighbors, set_n_neighbors = solara.use_state(5) | |
| lle_methods = ['standard', 'hessian', 'modified', 'ltsa'] | |
| lle_method, set_lle_method = solara.use_state('standard') | |
| eigen_solver, set_eigen_solver = solara.use_state('auto') | |
| eigen_solver_se, set_eigen_solver_se = solara.use_state(None) | |
| marker_color, set_marker_color = solara.use_state('pha') | |
| y_axis, set_y_axis = solara.use_state('moid') | |
| with solara.Row(): | |
| solara.ToggleButtonsSingle(value=embedding_method, values=['PCA','TSNE','ISOMAP','LLE','SE','MDS'], dense=True, on_value=set_embedding_method) | |
| solara.Select(label="Embedding Dimension (All)", value=n_components, values=[1,2,3], on_value=set_n_component) | |
| solara.Select(label="LLE Method (LLE)", value=lle_method, values=lle_methods, on_value=set_lle_method) | |
| solara.Select(label="Number of Neighbors (SE, LLE, Isomap)", value=n_neighbors, values=[1,2,3,4,5,6,7,8,9,10], on_value=set_n_neighbors) | |
| solara.Select(label='eigen_solver (LLE, Isomap)', value=eigen_solver, values=['auto', 'arpack', 'dense'], on_value=set_eigen_solver) | |
| solara.Select(label='eigen_solver (SE)', value=eigen_solver_se, values=[None, 'arpack', 'lobpcg', 'amg'], on_value=set_eigen_solver_se) | |
| if embedding_method == 'PCA': | |
| embedder = PCA(n_components=n_components) | |
| elif embedding_method == 'SE': | |
| embedder = SpectralEmbedding(n_components=n_components, | |
| n_neighbors=n_neighbors, | |
| eigen_solver=eigen_solver_se) | |
| elif embedding_method == 'TSNE': | |
| embedder = TSNE(n_components=n_components) | |
| elif embedding_method == 'ISOMAP': | |
| embedder = Isomap(n_components=n_components, | |
| n_neighbors=n_neighbors, | |
| eigen_solver=eigen_solver) | |
| elif embedding_method == 'LLE': | |
| embedder = LocallyLinearEmbedding(n_components=n_components, | |
| n_neighbors=n_neighbors, | |
| method=lle_method, | |
| eigen_solver=eigen_solver) | |
| elif embedding_method == 'MDS': | |
| embedder = MDS(n_components=n_components, normalized_stress='auto') | |
| try: | |
| data_embedded = embedder.fit_transform(df.value) | |
| df_embedded = pd.DataFrame(data_embedded) | |
| df_embedded['pha'] = list(df_original.loc[df.value.index,'pha']) | |
| df_embedded['moid'] = list(df_original.loc[df.value.index,'moid']) | |
| df_embedded['H'] = list(df_original.loc[df.value.index,'H']) | |
| df_embedded['pdes'] = list(df_original.loc[df.value.index,'pdes']) | |
| df_embedded['a'] = list(df_original.loc[df.value.index,'a']) | |
| df_embedded['e'] = list(df_original.loc[df.value.index,'e']) | |
| df_embedded['i'] = list(df_original.loc[df.value.index,'i']) | |
| df_embedded['om'] = list(df_original.loc[df.value.index,'om']) | |
| df_embedded['w'] = list(df_original.loc[df.value.index,'w']) | |
| df_embedded_clean = df_embedded.dropna() | |
| if n_components == 1: | |
| fig = px.scatter(df_embedded, x=0, y=y_axis, color=marker_color, | |
| hover_name='pdes',width=1024, height=768) | |
| elif n_components == 2: | |
| fig = px.scatter(df_embedded_clean, x=0,y=1,color=marker_color, | |
| hover_name='pdes',width=1024, height=768) | |
| elif n_components == 3: | |
| fig = px.scatter_3d(df_embedded, x=0,y=1,z=2,color=marker_color, | |
| hover_name='pdes',width=1024, height=768) | |
| with solara.Row(): | |
| with solara.Column(): | |
| solara.Select(label='Marker Color', | |
| value=marker_color, | |
| values=[None,'pha','moid','H','a','e','i','om','w'], | |
| on_value=set_marker_color) | |
| solara.Select(label='Y-Axis (only in 1D plot)', | |
| value=y_axis, | |
| values=['moid','H','a','e','i','om','w'], | |
| on_value=set_y_axis) | |
| solara.FigurePlotly(fig) | |
| except Exception as e: | |
| solara.Error(str(e)) | |
| def Page(): | |
| with solara.Card(title="Original Raw Data"): | |
| solara.DataFrame(df_original, items_per_page=5) | |
| with solara.Card(title="Transformed Data"): | |
| process_data() # features, feature_maps, drop_nans, scaler, training_ratio) | |
| solara.Button(label='Reset', on_click=reset) | |
| solara.ToggleButtonsMultiple(features, all_features) | |
| with solara.GridFixed(columns=10): | |
| for f in features.value: | |
| solara.InputText(label=f, value=feature_maps[f], on_value=lambda text : feature_maps[f].set(text)) | |
| with solara.Row(): | |
| solara.Checkbox(label='Drop Nans', value=drop_nans, on_value=lambda value : drop_nans.set(value)) | |
| solara.ToggleButtonsSingle(value=scaler, values=scalers) | |
| solara.SliderInt(label='Training Ratio %', value=training_ratio, min=1, max=99) | |
| InfoBox() | |
| EmbeddingPlot() | |
| BoxPlot() | |
| Page() |