Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from mafese.wrapper.mha import MhaSelector | |
| from mafese import Data | |
| from mafese.utils.mealpy_util import get_all_optimizers | |
| from permetrics.classification import ClassificationMetric | |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score | |
| ALL_METRICS = list(ClassificationMetric.SUPPORT.keys()) | |
| def format_results_to_list(y_true, y_pred, metric_list): | |
| try: | |
| evaluator = ClassificationMetric(y_true, y_pred) | |
| paras = [{"average": "weighted"}] * len(metric_list) | |
| results = evaluator.get_metrics_by_list_names(metric_list, paras) | |
| results_list = [["Metric", "Value"]] | |
| results_list += [[k, round(v, 4)] for k, v in results.items()] | |
| return results_list | |
| except Exception as e: | |
| return [["Metric", "Value"], ["Error", str(e)]] | |
| def run_feature_selection(csv_file, method, validation, epoch, population, classifier, transfer, metrics): | |
| try: | |
| # Read the uploaded CSV file | |
| with open(csv_file.name, "rb") as f: | |
| df = pd.read_csv(f, header=None) | |
| # Ensure there are at least 2 columns (features + label) | |
| if df.shape[1] < 2: | |
| raise ValueError("CSV must contain at least one feature column and one label column.") | |
| # Check for missing values | |
| if df.isnull().values.any(): | |
| raise ValueError("Dataset contains missing (NaN) values. Please clean your data before uploading.") | |
| # Separate features and label | |
| X = np.array(df.iloc[:, :-1]) | |
| y_raw = df.iloc[:, -1].astype(str).str.strip() | |
| if len(set(y_raw)) <= 1: | |
| raise ValueError("Label column must contain at least two distinct classes.") | |
| # Encode categorical labels | |
| from sklearn.preprocessing import LabelEncoder | |
| le = LabelEncoder() | |
| y = le.fit_transform(y_raw) | |
| # Create data object and split | |
| data = Data(X, y) | |
| data.split_train_test(test_size=validation) | |
| # Initialize the feature selector | |
| selector = MhaSelector( | |
| problem="classification", | |
| estimator=classifier, | |
| optimizer=method, | |
| optimizer_paras={"epoch": int(epoch), "pop_size": int(population)}, | |
| transfer_func=transfer | |
| ) | |
| # Fit the selector | |
| try: | |
| selector.fit(data.X_train, data.y_train, fit_weights=(0.9, 0.1), verbose=False) | |
| selected_indexes = selector.selected_feature_indexes.copy() | |
| except Exception as fit_err: | |
| return ("-", "-", "-", [["Metric", "Value"], ["Training Error", str(fit_err)]]) | |
| # Manually slice and fit using selected indexes | |
| try: | |
| X_sel_train = data.X_train[:, selected_indexes] | |
| X_sel_test = data.X_test[:, selected_indexes] | |
| if X_sel_train.ndim == 1: | |
| X_sel_train = X_sel_train.reshape(-1, 1) | |
| if X_sel_test.ndim == 1: | |
| X_sel_test = X_sel_test.reshape(-1, 1) | |
| if X_sel_test.shape[1] != X_sel_train.shape[1]: | |
| raise ValueError(f"Feature mismatch: Train={X_sel_train.shape[1]} vs Test={X_sel_test.shape[1]}") | |
| # Retrain the classifier manually to guarantee consistency | |
| selector.estimator.fit(X_sel_train, data.y_train) | |
| # Predict | |
| y_pred = selector.estimator.predict(X_sel_test) | |
| except Exception as pred_err: | |
| return ("-", "-", "-", [["Metric", "Value"], ["Prediction Error", str(pred_err)]]) | |
| # Evaluate performance | |
| results_list = format_results_to_list(data.y_test, y_pred, metrics) | |
| return ( | |
| str(selected_indexes), | |
| str(selector.selected_feature_solution), | |
| str(selector.selected_feature_masks), | |
| results_list | |
| ) | |
| except Exception as outer_e: | |
| return ("-", "-", "-", [["Metric", "Value"], ["Unhandled Error", str(outer_e)]]) | |
| SUPPORTED_ESTIMATORS = ["knn", "svm", "rf", "adaboost", "xgb", "tree", "ann"] | |
| SUPPORTED_TRANSFERS = [ | |
| "vstf_01", "vstf_02", "vstf_03", "vstf_04", | |
| "sstf_01", "sstf_02", "sstf_03", "sstf_04", "rtf" | |
| ] | |
| SUPPORTED_OPTIMIZERS = list(get_all_optimizers().keys()) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# MetaFSelect: NoCode Metaheuristic Feature Selection") | |
| with gr.Row(): | |
| with gr.Column(): | |
| csv_input = gr.File(label="📁 Upload CSV (last column = label)") | |
| method = gr.Dropdown(SUPPORTED_OPTIMIZERS, value="OriginalMPA", label="🔍 Optimizer") | |
| validation = gr.Number(value=0.2, label="📊 Validation Ratio") | |
| epoch = gr.Number(value=50, label="⏳ Epochs") | |
| population = gr.Number(value=20, label="👥 Population Size") | |
| classifier = gr.Dropdown(SUPPORTED_ESTIMATORS, value="knn", label="🧪 Classifier") | |
| transfer = gr.Dropdown(SUPPORTED_TRANSFERS, value="vstf_01", label="🔄 Transfer Function") | |
| metrics = gr.CheckboxGroup(choices=ALL_METRICS, value=["AS", "PS", "RS", "F1S"], label="📐 Evaluation Metrics") | |
| btn = gr.Button("🚀 Run Feature Selection") | |
| with gr.Column(): | |
| out_idx = gr.Textbox(label="📌 Selected Feature Indexes") | |
| out_sol = gr.Textbox(label="📍 Selected Feature Solution") | |
| out_mask = gr.Textbox(label="🧬 Selected Feature Mask") | |
| out_table = gr.Dataframe(headers=["Metric", "Value"], datatype=["str", "number"], label="📈 Evaluation Results") | |
| btn.click( | |
| fn=run_feature_selection, | |
| inputs=[csv_input, method, validation, epoch, population, classifier, transfer, metrics], | |
| outputs=[out_idx, out_sol, out_mask, out_table] | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("### 📘 About This App") | |
| gr.Markdown(""" | |
| **Created by [Muhammed Kotan](https://github.com/muhammedkotan)** | |
| Sakarya University, Department of Information Systems Engineering | |
| This app demonstrates a **metaheuristic-based feature selection system** using the [mafese](https://github.com/thieu1995/mafese) library, supported by: | |
| - [mealpy](https://github.com/thieu1995/mealpy) – metaheuristic optimizers | |
| - [permetrics](https://github.com/thieu1995/permetrics) – standard evaluation metrics | |
| - [Gradio](https://www.gradio.app/) – fast UI development | |
| - scikit-learn, pandas, numpy – ML and data handling | |
| 📂 **Sample Datasets:** | |
| To try out this system, you can use sample datasets available here: | |
| 👉 [mafese/data/cls/](https://github.com/thieu1995/mafese/tree/main/mafese/data/cls) | |
| 📄 **Citation :** | |
| - M. Kotan and S. B. Abdullahi, "MetaFSelect: No-Code Metaheuristic Feature Selection for Machine Learning," 2025 33rd Telecommunications Forum (TELFOR), Belgrade, Serbia, 2025, pp. 1-4, doi: 10.1109/TELFOR67910.2025.11314278. | |
| 📄 **Related research papers :** | |
| - Oğur, N. B., Kotan, M., Balta, D., Yavuz, B. Ç., Oğur, Y. S., Yuvacı, H. U., & Yazıcı, E. (2023). Detection of depression and anxiety in the perinatal period using Marine Predators Algorithm and kNN. Computers in Biology and Medicine, 161, 107003. | |
| - Kotan, M. Seymen O. F. (2023). Makine Öğreniminde Öznitelik Seçme Yöntemlerinin Kullanımı: Güncel Python Uygulamaları, In Book: Mühendislikte İleri ve Çağdaş Çalışmalar 1, Duvar Yayınları | |
| - Kotan, M., Faruk Seymen, Ö., Çallı, L., Kasım, S., Çarklı Yavuz, B., & Över Özçelik, T. (2025). A novel methodological approach to SaaS churn prediction using whale optimization algorithm. PloS one, 20(5), e0319998. | |
| 📬 **Questions or Feedback?** | |
| Reach out via [GitHub](https://github.com/muhammedkotan) or [ResearchGate](https://www.researchgate.net/profile/Muhammed-Kotan-2?ev=hdr_xprf) | |
| --- | |
| """) | |
| demo.queue().launch() |