Spaces:

sklearn-docs
/

Pipeline-ANOVA-SVM

Build error

App Files Files Community

Pipeline-ANOVA-SVM / app.py

EduardoPacheco

Update app.py

761c88a almost 3 years ago

raw

history blame contribute delete

3.43 kB

	import gradio as gr
	import pandas as pd
	import plotly.express as px
	from sklearn.svm import LinearSVC
	from sklearn.pipeline import make_pipeline
	from sklearn.datasets import make_classification
	from sklearn.metrics import classification_report
	from sklearn.model_selection import train_test_split
	from sklearn.feature_selection import SelectKBest, f_classif


	def app_fn(k: int, n_features: int, n_informative: int, n_redundant: int):
	X, y = make_classification(
	n_features=n_features,
	n_informative=n_informative,
	n_redundant=n_redundant,
	n_classes=2,
	n_clusters_per_class=2,
	random_state=42,
	)
	X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
	anova_filter = SelectKBest(f_classif, k=k)
	clf = LinearSVC()
	anova_svm = make_pipeline(anova_filter, clf)
	anova_svm.fit(X_train, y_train)

	y_pred = anova_svm.predict(X_test)
	report = classification_report(y_test, y_pred, output_dict=True)
	report_df = pd.DataFrame(report).transpose()
	report_df = report_df.reset_index().rename(columns={"index": "class"}).round(2)
	report_df["accuracy"] = report_df.loc[report_df["class"]=="accuracy"].values.flatten()[-1]
	report_df = report_df.loc[report_df["class"]!="accuracy"]

	features = anova_svm[:-1].inverse_transform(anova_svm[-1].coef_).flatten() > 0
	features = features.astype(int)
	fig = px.bar(y=features)
	# Changing y-axis ticks to show 0 and 1 instead of False and True
	fig.update_yaxes(ticktext=["False", "True"], tickvals=[0, 1])
	fig.update_layout(
	title="Selected Features",
	xaxis_title="Feature Index",
	yaxis_title="Selected",
	legend_title="Selected",
	)
	return report_df, fig

	title = "Pipeline ANOVA SVM"
	with gr.Blocks() as demo:
	gr.Markdown(f"# {title}")
	gr.Markdown(
	"""
	### This example creates a pipeline where in the first step k features are selected with ANOVA and then we pass the selected features \
	to a Linear SVM. This pipeline is then trained using a synthetic dataset and evaluated on a test holdout. \
	A table displaying the classification report with the metrics and a char showing the index of the selected features are shown at the bottom.

	See original example [here](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection_pipeline.html#sphx-glr-auto-examples-feature-selection-plot-feature-selection-pipeline-py)
	"""
	)
	with gr.Row():
	k = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Number of Features to Select")
	n_features = gr.inputs.Slider(minimum=1, maximum=20, default=20, step=1, label="Total Features")
	n_informative = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Informative Features")
	n_redundant = gr.inputs.Slider(minimum=0, maximum=20, default=0, step=1, label="Redundant Features")
	btn = gr.Button(label="Run")
	with gr.Row():
	report = gr.DataFrame(label="Classification Report")
	features = gr.Plot(label="Selected Features")

	btn.click(
	fn=app_fn,
	inputs=[k, n_features, n_informative, n_redundant],
	outputs=[report, features],
	)
	demo.load(
	fn=app_fn,
	inputs=[k, n_features, n_informative, n_redundant],
	outputs=[report, features],
	)

	demo.launch()