import gradio as gr import pandas as pd import numpy as np from mafese.wrapper.mha import MhaSelector from mafese import Data from mafese.utils.mealpy_util import get_all_optimizers from permetrics.classification import ClassificationMetric from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score ALL_METRICS = list(ClassificationMetric.SUPPORT.keys()) def format_results_to_list(y_true, y_pred, metric_list): try: evaluator = ClassificationMetric(y_true, y_pred) paras = [{"average": "weighted"}] * len(metric_list) results = evaluator.get_metrics_by_list_names(metric_list, paras) results_list = [["Metric", "Value"]] results_list += [[k, round(v, 4)] for k, v in results.items()] return results_list except Exception as e: return [["Metric", "Value"], ["Error", str(e)]] def run_feature_selection(csv_file, method, validation, epoch, population, classifier, transfer, metrics): try: # Read the uploaded CSV file with open(csv_file.name, "rb") as f: df = pd.read_csv(f, header=None) # Ensure there are at least 2 columns (features + label) if df.shape[1] < 2: raise ValueError("CSV must contain at least one feature column and one label column.") # Check for missing values if df.isnull().values.any(): raise ValueError("Dataset contains missing (NaN) values. Please clean your data before uploading.") # Separate features and label X = np.array(df.iloc[:, :-1]) y_raw = df.iloc[:, -1].astype(str).str.strip() if len(set(y_raw)) <= 1: raise ValueError("Label column must contain at least two distinct classes.") # Encode categorical labels from sklearn.preprocessing import LabelEncoder le = LabelEncoder() y = le.fit_transform(y_raw) # Create data object and split data = Data(X, y) data.split_train_test(test_size=validation) # Initialize the feature selector selector = MhaSelector( problem="classification", estimator=classifier, optimizer=method, optimizer_paras={"epoch": int(epoch), "pop_size": int(population)}, transfer_func=transfer ) # Fit the selector try: selector.fit(data.X_train, data.y_train, fit_weights=(0.9, 0.1), verbose=False) selected_indexes = selector.selected_feature_indexes.copy() except Exception as fit_err: return ("-", "-", "-", [["Metric", "Value"], ["Training Error", str(fit_err)]]) # Manually slice and fit using selected indexes try: X_sel_train = data.X_train[:, selected_indexes] X_sel_test = data.X_test[:, selected_indexes] if X_sel_train.ndim == 1: X_sel_train = X_sel_train.reshape(-1, 1) if X_sel_test.ndim == 1: X_sel_test = X_sel_test.reshape(-1, 1) if X_sel_test.shape[1] != X_sel_train.shape[1]: raise ValueError(f"Feature mismatch: Train={X_sel_train.shape[1]} vs Test={X_sel_test.shape[1]}") # Retrain the classifier manually to guarantee consistency selector.estimator.fit(X_sel_train, data.y_train) # Predict y_pred = selector.estimator.predict(X_sel_test) except Exception as pred_err: return ("-", "-", "-", [["Metric", "Value"], ["Prediction Error", str(pred_err)]]) # Evaluate performance results_list = format_results_to_list(data.y_test, y_pred, metrics) return ( str(selected_indexes), str(selector.selected_feature_solution), str(selector.selected_feature_masks), results_list ) except Exception as outer_e: return ("-", "-", "-", [["Metric", "Value"], ["Unhandled Error", str(outer_e)]]) SUPPORTED_ESTIMATORS = ["knn", "svm", "rf", "adaboost", "xgb", "tree", "ann"] SUPPORTED_TRANSFERS = [ "vstf_01", "vstf_02", "vstf_03", "vstf_04", "sstf_01", "sstf_02", "sstf_03", "sstf_04", "rtf" ] SUPPORTED_OPTIMIZERS = list(get_all_optimizers().keys()) with gr.Blocks() as demo: gr.Markdown("# MetaFSelect: NoCode Metaheuristic Feature Selection") with gr.Row(): with gr.Column(): csv_input = gr.File(label="📁 Upload CSV (last column = label)") method = gr.Dropdown(SUPPORTED_OPTIMIZERS, value="OriginalMPA", label="🔍 Optimizer") validation = gr.Number(value=0.2, label="📊 Validation Ratio") epoch = gr.Number(value=50, label="⏳ Epochs") population = gr.Number(value=20, label="👥 Population Size") classifier = gr.Dropdown(SUPPORTED_ESTIMATORS, value="knn", label="🧪 Classifier") transfer = gr.Dropdown(SUPPORTED_TRANSFERS, value="vstf_01", label="🔄 Transfer Function") metrics = gr.CheckboxGroup(choices=ALL_METRICS, value=["AS", "PS", "RS", "F1S"], label="📐 Evaluation Metrics") btn = gr.Button("🚀 Run Feature Selection") with gr.Column(): out_idx = gr.Textbox(label="📌 Selected Feature Indexes") out_sol = gr.Textbox(label="📍 Selected Feature Solution") out_mask = gr.Textbox(label="🧬 Selected Feature Mask") out_table = gr.Dataframe(headers=["Metric", "Value"], datatype=["str", "number"], label="📈 Evaluation Results") btn.click( fn=run_feature_selection, inputs=[csv_input, method, validation, epoch, population, classifier, transfer, metrics], outputs=[out_idx, out_sol, out_mask, out_table] ) gr.Markdown("---") gr.Markdown("### 📘 About This App") gr.Markdown(""" **Created by [Muhammed Kotan](https://github.com/muhammedkotan)** Sakarya University, Department of Information Systems Engineering This app demonstrates a **metaheuristic-based feature selection system** using the [mafese](https://github.com/thieu1995/mafese) library, supported by: - [mealpy](https://github.com/thieu1995/mealpy) – metaheuristic optimizers - [permetrics](https://github.com/thieu1995/permetrics) – standard evaluation metrics - [Gradio](https://www.gradio.app/) – fast UI development - scikit-learn, pandas, numpy – ML and data handling 📂 **Sample Datasets:** To try out this system, you can use sample datasets available here: 👉 [mafese/data/cls/](https://github.com/thieu1995/mafese/tree/main/mafese/data/cls) 📄 **Citation :** - M. Kotan and S. B. Abdullahi, "MetaFSelect: No-Code Metaheuristic Feature Selection for Machine Learning," 2025 33rd Telecommunications Forum (TELFOR), Belgrade, Serbia, 2025, pp. 1-4, doi: 10.1109/TELFOR67910.2025.11314278. 📄 **Related research papers :** - Oğur, N. B., Kotan, M., Balta, D., Yavuz, B. Ç., Oğur, Y. S., Yuvacı, H. U., & Yazıcı, E. (2023). Detection of depression and anxiety in the perinatal period using Marine Predators Algorithm and kNN. Computers in Biology and Medicine, 161, 107003. - Kotan, M. Seymen O. F. (2023). Makine Öğreniminde Öznitelik Seçme Yöntemlerinin Kullanımı: Güncel Python Uygulamaları, In Book: Mühendislikte İleri ve Çağdaş Çalışmalar 1, Duvar Yayınları - Kotan, M., Faruk Seymen, Ö., Çallı, L., Kasım, S., Çarklı Yavuz, B., & Över Özçelik, T. (2025). A novel methodological approach to SaaS churn prediction using whale optimization algorithm. PloS one, 20(5), e0319998. 📬 **Questions or Feedback?** Reach out via [GitHub](https://github.com/muhammedkotan) or [ResearchGate](https://www.researchgate.net/profile/Muhammed-Kotan-2?ev=hdr_xprf) --- """) demo.queue().launch()