iris_dataset_pca_vs_lda

Sleeping

App Files Files Community

NTaylor commited on May 15, 2023

Commit

bebe690

1 Parent(s): 43b5430

edited how data was loaded as it was for unknown reasons causing out of scope errors...

Browse files

Files changed (1) hide show

app.py +21 -42

app.py CHANGED Viewed

@@ -23,56 +23,35 @@ import gradio as gr
 from sklearn import datasets
 from sklearn.decomposition import PCA
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 # load data
 iris = datasets.load_iris()
-X = iris.data
-y = iris.target
 target_names = iris.target_names
-def plot_lda_pca(n_samples = 100,
-                 n_components=2,
-                 n_features=4):
-    '''
-    Function to plot LDA and PCA clustering.
-    Parameters
-    ----------
-    n_components : int, default=2
-        Number of components to keep.
-    n_features : int, default=5
-        Number of features to generate.
-    Returns
-    -------
-    fig : matplotlib.pyplot.figure
-        Figure object.
-    '''
-    # take sample of data
-    X = X[:n_samples, :n_features]
-    y = y[:n_samples]
     # fit PCA
-    pca = PCA(n_components=n_components)
     X_r = pca.fit(X).transform(X)
-    print(f"shape of X_r: {X_r.shape}")
     # fit LDA
-    lda = LinearDiscriminantAnalysis(n_components=n_components)
     X_r2 = lda.fit(X, y).transform(X)
-    print(f"shape of X_r2: {X_r2.shape}")
-    # take first two components
-    X_r = X_r[:, :2]
-    X_r2 = X_r2[:, :2]
-    print(f"shape of X_r after: {X_r.shape}")
-    print(f"shape of X_r2 after: {X_r2.shape}")
     # Percentage of variance explained for each components
     print(
         "explained variance ratio (first two components): %s"
@@ -119,15 +98,15 @@ with gr.Blocks(title=title) as demo:
     gr.Markdown(" Different number of features and number of components affect how well the low rank space is recovered. <br>"
                 "  Larger Depth trying to overfit and learn even the finner details of the data.<br>"
                )
-    # set max samples
     max_samples = len(iris.data)
-    with gr.Row():
-        n_samples = gr.Slider(value=100, minimum=2, maximum=max_samples, step=1, label="n_samples")
-        n_features = gr.Slider(value=4, minimum=2, maximum=4, step=1, label="n_features")
     btn = gr.Button(value="Run")
-    btn.click(plot_lda_pca,inputs= [n_samples, n_features], outputs= gr.Plot(label='PCA vs LDA clustering') ) #
 demo.launch()

 from sklearn import datasets
 from sklearn.decomposition import PCA
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+import numpy as np
 # load data
 iris = datasets.load_iris()
+all_X = iris.data
+all_y = iris.target
 target_names = iris.target_names
+# save models using skop
+def plot_lda_pca(n_samples = 50, n_features = 4):
+    # print(f"all X is: {all_X}")
+    idx = np.random.randint(0, len(iris.data), n_samples)
+    # sub-sample
+    X = all_X[idx, :n_features]
+    y = all_y[idx]
     # fit PCA
+    pca = PCA(n_components=2)
     X_r = pca.fit(X).transform(X)
     # fit LDA
+    lda = LinearDiscriminantAnalysis(n_components=2)
     X_r2 = lda.fit(X, y).transform(X)
     # Percentage of variance explained for each components
     print(
         "explained variance ratio (first two components): %s"
     gr.Markdown(" Different number of features and number of components affect how well the low rank space is recovered. <br>"
                 "  Larger Depth trying to overfit and learn even the finner details of the data.<br>"
                )
     max_samples = len(iris.data)
+    with gr.Row():
+        n_samples = gr.Slider(value=100, minimum=10, maximum=max_samples, step=10, label="n_samples")
+        n_features = gr.Slider(value=2, minimum=2, maximum=4, step=1, label="n_features")
     btn = gr.Button(value="Run")
+    btn.click(plot_lda_pca, inputs = [n_samples, n_features], outputs= gr.Plot(label='PCA vs LDA clustering') ) #
 demo.launch()