Commit
·
00c3ce9
1
Parent(s):
0d44b47
update widgets and documentation
Browse files
app.py
CHANGED
|
@@ -1,20 +1,30 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
import warnings
|
| 5 |
|
| 6 |
from functools import partial
|
| 7 |
-
from sklearn.datasets import make_blobs
|
| 8 |
from sklearn.svm import LinearSVC
|
| 9 |
from sklearn.inspection import DecisionBoundaryDisplay
|
| 10 |
from sklearn.exceptions import ConvergenceWarning
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
default_base = {"n_samples": 20}
|
| 14 |
|
| 15 |
# Algorithms to compare
|
| 16 |
params = default_base.copy()
|
| 17 |
-
params.update({"n_samples":n_samples
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
X, y = make_blobs(n_samples=params["n_samples"], centers=2, random_state=0)
|
| 20 |
|
|
@@ -24,7 +34,14 @@ def train_model(C, n_samples):
|
|
| 24 |
with warnings.catch_warnings():
|
| 25 |
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
| 26 |
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# obtain the support vectors through the decision function
|
| 29 |
decision_function = clf.decision_function(X)
|
| 30 |
# we can also calculate the decision function manually
|
|
@@ -69,24 +86,102 @@ def iter_grid(n_rows, n_cols):
|
|
| 69 |
title = "📈 Linear Support Vector Classification"
|
| 70 |
with gr.Blocks(title=title) as demo:
|
| 71 |
gr.Markdown(f"## {title}")
|
| 72 |
-
gr.Markdown("
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
-
input_models = ["Bisecting K-Means", "K-Means"]
|
| 79 |
|
| 80 |
n_samples = gr.Slider(minimum=20, maximum=100, step=5,
|
| 81 |
label = "Number of Samples")
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
|
|
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
demo.launch()
|
|
|
|
|
|
|
|
|
| 1 |
+
#%%
|
| 2 |
import gradio as gr
|
| 3 |
import numpy as np
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
import warnings
|
| 6 |
|
| 7 |
from functools import partial
|
| 8 |
+
from sklearn.datasets import make_blobs, make_spd_matrix
|
| 9 |
from sklearn.svm import LinearSVC
|
| 10 |
from sklearn.inspection import DecisionBoundaryDisplay
|
| 11 |
from sklearn.exceptions import ConvergenceWarning
|
| 12 |
|
| 13 |
+
#%%
|
| 14 |
+
def train_model(n_samples, C, penalty, loss, max_iter):
|
| 15 |
+
|
| 16 |
+
if penalty == "l1" and loss == "hinge":
|
| 17 |
+
raise gr.Error("The combination of penalty='l1' and loss='hinge' is not supported")
|
| 18 |
+
|
| 19 |
default_base = {"n_samples": 20}
|
| 20 |
|
| 21 |
# Algorithms to compare
|
| 22 |
params = default_base.copy()
|
| 23 |
+
params.update({"n_samples":n_samples,
|
| 24 |
+
"C": C,
|
| 25 |
+
"penalty": penalty,
|
| 26 |
+
"loss": loss,
|
| 27 |
+
"max_iter": max_iter})
|
| 28 |
|
| 29 |
X, y = make_blobs(n_samples=params["n_samples"], centers=2, random_state=0)
|
| 30 |
|
|
|
|
| 34 |
with warnings.catch_warnings():
|
| 35 |
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
| 36 |
|
| 37 |
+
# add penalty, l1 and l2. Default is l2
|
| 38 |
+
# add loss, square_hinge is Default. the other loss is hinge
|
| 39 |
+
# multi_class{‘ovr’, ‘crammer_singer’}, default=’ovr’
|
| 40 |
+
|
| 41 |
+
clf = LinearSVC(penalty=penalty, C=params["C"],
|
| 42 |
+
loss=params["loss"],
|
| 43 |
+
max_iter=params["max_iter"],
|
| 44 |
+
random_state=42).fit(X, y)
|
| 45 |
# obtain the support vectors through the decision function
|
| 46 |
decision_function = clf.decision_function(X)
|
| 47 |
# we can also calculate the decision function manually
|
|
|
|
| 86 |
title = "📈 Linear Support Vector Classification"
|
| 87 |
with gr.Blocks(title=title) as demo:
|
| 88 |
gr.Markdown(f"## {title}")
|
| 89 |
+
gr.Markdown("The LinearSVC is an implementation of a \
|
| 90 |
+
Support Vector Machine (SVM) for classification. \
|
| 91 |
+
It aims to find the optimal linear \
|
| 92 |
+
decision boundary that separates classes in the input data.")
|
| 93 |
+
gr.Markdown("The most important parameters of `LinearSVC` are:")
|
| 94 |
+
param_C = "\
|
| 95 |
+
1. `C`: The inverse of the regularization strength. \
|
| 96 |
+
A smaller `C` value increases the amount of regularization, \
|
| 97 |
+
promoting simpler models, while a larger `C` value reduces \
|
| 98 |
+
regularization, allowing more complex models. \
|
| 99 |
+
It controls the trade-off between fitting the \
|
| 100 |
+
training data and generalization to unseen data."
|
| 101 |
+
param_loss=" \
|
| 102 |
+
2. `loss`: The loss function used for training. \
|
| 103 |
+
The default is `squared_hinge`, which is a variant \
|
| 104 |
+
of hinge loss. Other options include `hinge` \
|
| 105 |
+
each with different properties and performance characteristics."
|
| 106 |
+
param_penalty="\
|
| 107 |
+
3. `penalty`: The type of regularization penalty \
|
| 108 |
+
applied to the model. The default is `l2`, which uses \
|
| 109 |
+
the L2 norm. Other options include `l1` and \
|
| 110 |
+
which use the L1 norm and a combination of L1 and L2 norms, \
|
| 111 |
+
respectively."
|
| 112 |
+
param_dual="\
|
| 113 |
+
4. `dual`: Determines whether the dual or primal optimization \
|
| 114 |
+
problem is solved. By default, `dual=True` when the number \
|
| 115 |
+
of samples is less than the number of features, and `dual=False` \
|
| 116 |
+
otherwise. For large-scale problems, setting `dual=False` \
|
| 117 |
+
can be more efficient."
|
| 118 |
+
param_tol="\
|
| 119 |
+
5. `tol`: The tolerance for stopping criteria. \
|
| 120 |
+
The solver stops when the optimization reaches \
|
| 121 |
+
a specified tolerance level."
|
| 122 |
+
param_max_iter="\
|
| 123 |
+
6. `max_iter`: The maximum number of iterations for solver \
|
| 124 |
+
convergence. If not specified, the default value is set."
|
| 125 |
+
gr.Markdown(param_C)
|
| 126 |
+
gr.Markdown(param_loss)
|
| 127 |
+
gr.Markdown(param_penalty)
|
| 128 |
+
gr.Markdown(param_dual)
|
| 129 |
+
gr.Markdown(param_tol)
|
| 130 |
+
gr.Markdown(param_max_iter)
|
| 131 |
|
|
|
|
| 132 |
|
| 133 |
n_samples = gr.Slider(minimum=20, maximum=100, step=5,
|
| 134 |
label = "Number of Samples")
|
| 135 |
|
| 136 |
+
|
| 137 |
+
with gr.Row():
|
| 138 |
+
input_model = "LinearSVC"
|
| 139 |
+
fn = partial(train_model)
|
| 140 |
+
|
| 141 |
+
with gr.Row():
|
| 142 |
+
penalty = gr.Dropdown(["l1", "l2"], value="l2", interactive=True, label="Penalty to prevent overfitting")
|
| 143 |
+
loss = gr.Dropdown(["hinge", "squared hinge"], value="hinge", interactive=True, label="Loss function")
|
| 144 |
+
|
| 145 |
+
with gr.Row():
|
| 146 |
+
max_iter = gr.Slider(minimum=100, maximum=2000, step=100, value=1000,
|
| 147 |
+
label = "Maximum number of iterations to find the optimal hyperplane")
|
| 148 |
+
param_C = gr.Number(value=1,
|
| 149 |
+
label = "Regularization parameter C",
|
| 150 |
+
# info="When C is smal the regularization effect is stronger. "
|
| 151 |
+
# + "This can help to avoid overfitting but may lead to higher bias. "
|
| 152 |
+
# + "On the other hand, when C is large, the regularization effect "
|
| 153 |
+
# + "is weaker, and the model can have larger parameter values, "
|
| 154 |
+
# + "allowing for more complex decision boundaries that fit the "
|
| 155 |
+
# + "training data more closely. This may increase the risk of "
|
| 156 |
+
# + "overfitting and result in a higher variance model."
|
| 157 |
+
)
|
| 158 |
|
| 159 |
+
with gr.Row():
|
| 160 |
+
penalty2 = gr.Dropdown(["l1", "l2"], value="l2", interactive=True, label="Penalty to prevent overfitting")
|
| 161 |
+
loss2 = gr.Dropdown(["hinge", "squared hinge"], value="hinge", interactive=True, label="Loss function")
|
| 162 |
|
| 163 |
+
with gr.Row():
|
| 164 |
+
max_iter2 = gr.Slider(minimum=100, maximum=2000, step=100, value=1000,
|
| 165 |
+
label = "Maximum number of iterations to find the optimal hyperplane")
|
| 166 |
+
param_C2 = gr.Number(value=100,
|
| 167 |
+
label = "Regularization parameter C"
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
with gr.Row():
|
| 171 |
+
plot = gr.Plot(label=input_model)
|
| 172 |
+
n_samples.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
|
| 173 |
+
param_C.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
|
| 174 |
+
penalty.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
|
| 175 |
+
loss.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
|
| 176 |
+
max_iter.change(fn=fn, inputs=[n_samples, param_C, penalty, loss, max_iter], outputs=plot)
|
| 177 |
+
|
| 178 |
+
plot2 = gr.Plot(label=input_model)
|
| 179 |
+
n_samples.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
|
| 180 |
+
param_C2.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
|
| 181 |
+
penalty2.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
|
| 182 |
+
loss2.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
|
| 183 |
+
max_iter2.change(fn=fn, inputs=[n_samples, param_C2, penalty2, loss2, max_iter2], outputs=plot2)
|
| 184 |
|
| 185 |
demo.launch()
|
| 186 |
+
|
| 187 |
+
# %%
|