Spaces:
Build error
Build error
| import gradio as gr | |
| import pandas as pd | |
| import plotly.express as px | |
| from sklearn.svm import LinearSVC | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.datasets import make_classification | |
| from sklearn.metrics import classification_report | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.feature_selection import SelectKBest, f_classif | |
| def app_fn(k: int, n_features: int, n_informative: int, n_redundant: int): | |
| X, y = make_classification( | |
| n_features=n_features, | |
| n_informative=n_informative, | |
| n_redundant=n_redundant, | |
| n_classes=2, | |
| n_clusters_per_class=2, | |
| random_state=42, | |
| ) | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) | |
| anova_filter = SelectKBest(f_classif, k=k) | |
| clf = LinearSVC() | |
| anova_svm = make_pipeline(anova_filter, clf) | |
| anova_svm.fit(X_train, y_train) | |
| y_pred = anova_svm.predict(X_test) | |
| report = classification_report(y_test, y_pred, output_dict=True) | |
| report_df = pd.DataFrame(report).transpose() | |
| report_df = report_df.reset_index().rename(columns={"index": "class"}).round(2) | |
| report_df["accuracy"] = report_df.loc[report_df["class"]=="accuracy"].values.flatten()[-1] | |
| report_df = report_df.loc[report_df["class"]!="accuracy"] | |
| features = anova_svm[:-1].inverse_transform(anova_svm[-1].coef_).flatten() > 0 | |
| features = features.astype(int) | |
| fig = px.bar(y=features) | |
| # Changing y-axis ticks to show 0 and 1 instead of False and True | |
| fig.update_yaxes(ticktext=["False", "True"], tickvals=[0, 1]) | |
| fig.update_layout( | |
| title="Selected Features", | |
| xaxis_title="Feature Index", | |
| yaxis_title="Selected", | |
| legend_title="Selected", | |
| ) | |
| return report_df, fig | |
| title = "Pipeline ANOVA SVM" | |
| with gr.Blocks() as demo: | |
| gr.Markdown(f"# {title}") | |
| gr.Markdown( | |
| """ | |
| ### This example creates a pipeline where in the first step k features are selected with ANOVA and then we pass the selected features \ | |
| to a Linear SVM. This pipeline is then trained using a synthetic dataset and evaluated on a test holdout. \ | |
| A table displaying the classification report with the metrics and a char showing the index of the selected features are shown at the bottom. | |
| See original example [here](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection_pipeline.html#sphx-glr-auto-examples-feature-selection-plot-feature-selection-pipeline-py) | |
| """ | |
| ) | |
| with gr.Row(): | |
| k = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Number of Features to Select") | |
| n_features = gr.inputs.Slider(minimum=1, maximum=20, default=20, step=1, label="Total Features") | |
| n_informative = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Informative Features") | |
| n_redundant = gr.inputs.Slider(minimum=0, maximum=20, default=0, step=1, label="Redundant Features") | |
| btn = gr.Button(label="Run") | |
| with gr.Row(): | |
| report = gr.DataFrame(label="Classification Report") | |
| features = gr.Plot(label="Selected Features") | |
| btn.click( | |
| fn=app_fn, | |
| inputs=[k, n_features, n_informative, n_redundant], | |
| outputs=[report, features], | |
| ) | |
| demo.load( | |
| fn=app_fn, | |
| inputs=[k, n_features, n_informative, n_redundant], | |
| outputs=[report, features], | |
| ) | |
| demo.launch() | |