Spaces:

sklearn-docs
/

support-vectors-LinearSVC

Sleeping

App Files Files Community

jucamohedano commited on Jun 2, 2023

Commit

00c3ce9

1 Parent(s): 0d44b47

update widgets and documentation

Browse files

Files changed (1) hide show

app.py +111 -16

app.py CHANGED Viewed

@@ -1,20 +1,30 @@
 import gradio as gr
 import numpy as np
 import matplotlib.pyplot as plt
 import warnings
 from functools import partial
-from sklearn.datasets import make_blobs
 from sklearn.svm import LinearSVC
 from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.exceptions import ConvergenceWarning
-def train_model(C, n_samples):
     default_base = {"n_samples": 20}
     # Algorithms to compare
     params = default_base.copy()
-    params.update({"n_samples":n_samples})
     X, y = make_blobs(n_samples=params["n_samples"], centers=2, random_state=0)
@@ -24,7 +34,14 @@ def train_model(C, n_samples):
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", category=ConvergenceWarning)
-        clf = LinearSVC(C=C, loss="hinge", random_state=42).fit(X, y)
         # obtain the support vectors through the decision function
         decision_function = clf.decision_function(X)
         # we can also calculate the decision function manually
@@ -69,24 +86,102 @@ def iter_grid(n_rows, n_cols):
 title = "📈 Linear Support Vector Classification"
 with gr.Blocks(title=title) as demo:
     gr.Markdown(f"## {title}")
-    gr.Markdown("Unlike SVC (based on LIBSVM), LinearSVC "
-                + "(based on LIBLINEAR) does not provide the"
-                + "support vectors. This example demonstrates"
-                + "how to obtain the support vectors in LinearSVC.")
-    input_models = ["Bisecting K-Means", "K-Means"]
     n_samples = gr.Slider(minimum=20, maximum=100, step=5,
     label = "Number of Samples")
-    input_model = "LinearSVC"
-    # Regularization parameter C included in loop
-    for _, C in zip(iter_grid(1,2), [1, 100]):
-        plot = gr.Plot(label=input_model)
-        fn = partial(train_model, C)
-        n_samples.change(fn=fn, inputs=[n_samples], outputs=plot)
 demo.launch()

+#%%
 import gradio as gr
 import numpy as np
 import matplotlib.pyplot as plt
 import warnings
 from functools import partial
+from sklearn.datasets import make_blobs, make_spd_matrix
 from sklearn.svm import LinearSVC
 from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.exceptions import ConvergenceWarning
+#%%
+def train_model(n_samples, C, penalty, loss, max_iter):
+    if penalty == "l1" and loss == "hinge":
+        raise gr.Error("The combination of penalty='l1' and loss='hinge' is not supported")
     default_base = {"n_samples": 20}
     # Algorithms to compare
     params = default_base.copy()
+    params.update({"n_samples":n_samples,
+                   "C": C,
+                   "penalty": penalty,
+                   "loss": loss,
+                   "max_iter": max_iter})
     X, y = make_blobs(n_samples=params["n_samples"], centers=2, random_state=0)
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", category=ConvergenceWarning)
+        # add penalty, l1 and l2. Default is l2
+        # add loss, square_hinge is Default. the other loss is hinge
+        # multi_class{‘ovr’, ‘crammer_singer’}, default=’ovr’
+        clf = LinearSVC(penalty=penalty, C=params["C"],
+                        loss=params["loss"],
+                        max_iter=params["max_iter"],
+                        random_state=42).fit(X, y)
         # obtain the support vectors through the decision function
         decision_function = clf.decision_function(X)
         # we can also calculate the decision function manually
 title = "📈 Linear Support Vector Classification"
 with gr.Blocks(title=title) as demo:
     gr.Markdown(f"## {title}")
+    gr.Markdown("The LinearSVC is an implementation of a \
+                Support Vector Machine (SVM) for classification. \
+                It aims to find the optimal linear \
+                decision boundary that separates classes in the input data.")
+    gr.Markdown("The most important parameters of `LinearSVC` are:")
+    param_C = "\
+    1. `C`: The inverse of the regularization strength. \
+        A smaller `C` value increases the amount of regularization, \
+        promoting simpler models, while a larger `C` value reduces \
+        regularization, allowing more complex models. \
+        It controls the trade-off between fitting the \
+        training data and generalization to unseen data."
+    param_loss=" \
+    2. `loss`: The loss function used for training. \
+        The default is `squared_hinge`, which is a variant \
+        of hinge loss. Other options include `hinge` \
+        each with different properties and performance characteristics."
+    param_penalty="\
+    3. `penalty`: The type of regularization penalty \
+        applied to the model. The default is `l2`, which uses \
+        the L2 norm. Other options include `l1` and \
+        which use the L1 norm and a combination of L1 and L2 norms, \
+        respectively."
+    param_dual="\
+    4. `dual`: Determines whether the dual or primal optimization \
+        problem is solved. By default, `dual=True` when the number \
+        of samples is less than the number of features, and `dual=False` \
+        otherwise. For large-scale problems, setting `dual=False`  \
+        can be more efficient."
+    param_tol="\
+    5. `tol`: The tolerance for stopping criteria. \
+        The solver stops when the optimization reaches \
+        a specified tolerance level."
+    param_max_iter="\
+    6. `max_iter`: The maximum number of iterations for solver \
+        convergence. If not specified, the default value is set."
+    gr.Markdown(param_C)
+    gr.Markdown(param_loss)
+    gr.Markdown(param_penalty)
+    gr.Markdown(param_dual)
+    gr.Markdown(param_tol)
+    gr.Markdown(param_max_iter)
     n_samples = gr.Slider(minimum=20, maximum=100, step=5,
     label = "Number of Samples")
+    with gr.Row():
+        input_model = "LinearSVC"
+        fn = partial(train_model)
+        with gr.Row():
+            penalty = gr.Dropdown(["l1", "l2"], value="l2", interactive=True, label="Penalty to prevent overfitting")
+            loss = gr.Dropdown(["hinge", "squared hinge"], value="hinge", interactive=True, label="Loss function")
+        with gr.Row():
+            max_iter = gr.Slider(minimum=100, maximum=2000, step=100, value=1000,
+            label = "Maximum number of iterations to find the optimal hyperplane")
+            param_C = gr.Number(value=1,
+            label = "Regularization parameter C",
+            # info="When C is smal the regularization effect is stronger. "
+            #     + "This can help to avoid overfitting but may lead to higher bias. "
+            #     + "On the other hand, when C is large, the regularization effect "
+            #     + "is weaker, and the model can have larger parameter values, "
+            #     + "allowing for more complex decision boundaries that fit the "
+            #     + "training data more closely. This may increase the risk of "
+            #     + "overfitting and result in a higher variance model."
+                )
+        with gr.Row():
+            penalty2 = gr.Dropdown(["l1", "l2"], value="l2", interactive=True, label="Penalty to prevent overfitting")
+            loss2 = gr.Dropdown(["hinge", "squared hinge"], value="hinge", interactive=True, label="Loss function")
+        with gr.Row():
+            max_iter2 = gr.Slider(minimum=100, maximum=2000, step=100, value=1000,
+            label = "Maximum number of iterations to find the optimal hyperplane")
+            param_C2 = gr.Number(value=100,
+            label = "Regularization parameter C"
+                )
+    with gr.Row():
+        plot = gr.Plot(label=input_model)
+        n_samples.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
+        param_C.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
+        penalty.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
+        loss.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
+        max_iter.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
+        plot2 = gr.Plot(label=input_model)
+        n_samples.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
+        param_C2.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
+        penalty2.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
+        loss2.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
+        max_iter2.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
 demo.launch()
+# %%