mkotan's picture
Update app.py
5275b57 verified
import gradio as gr
import pandas as pd
import numpy as np
from mafese.wrapper.mha import MhaSelector
from mafese import Data
from mafese.utils.mealpy_util import get_all_optimizers
from permetrics.classification import ClassificationMetric
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
ALL_METRICS = list(ClassificationMetric.SUPPORT.keys())
def format_results_to_list(y_true, y_pred, metric_list):
try:
evaluator = ClassificationMetric(y_true, y_pred)
paras = [{"average": "weighted"}] * len(metric_list)
results = evaluator.get_metrics_by_list_names(metric_list, paras)
results_list = [["Metric", "Value"]]
results_list += [[k, round(v, 4)] for k, v in results.items()]
return results_list
except Exception as e:
return [["Metric", "Value"], ["Error", str(e)]]
def run_feature_selection(csv_file, method, validation, epoch, population, classifier, transfer, metrics):
try:
# Read the uploaded CSV file
with open(csv_file.name, "rb") as f:
df = pd.read_csv(f, header=None)
# Ensure there are at least 2 columns (features + label)
if df.shape[1] < 2:
raise ValueError("CSV must contain at least one feature column and one label column.")
# Check for missing values
if df.isnull().values.any():
raise ValueError("Dataset contains missing (NaN) values. Please clean your data before uploading.")
# Separate features and label
X = np.array(df.iloc[:, :-1])
y_raw = df.iloc[:, -1].astype(str).str.strip()
if len(set(y_raw)) <= 1:
raise ValueError("Label column must contain at least two distinct classes.")
# Encode categorical labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y_raw)
# Create data object and split
data = Data(X, y)
data.split_train_test(test_size=validation)
# Initialize the feature selector
selector = MhaSelector(
problem="classification",
estimator=classifier,
optimizer=method,
optimizer_paras={"epoch": int(epoch), "pop_size": int(population)},
transfer_func=transfer
)
# Fit the selector
try:
selector.fit(data.X_train, data.y_train, fit_weights=(0.9, 0.1), verbose=False)
selected_indexes = selector.selected_feature_indexes.copy()
except Exception as fit_err:
return ("-", "-", "-", [["Metric", "Value"], ["Training Error", str(fit_err)]])
# Manually slice and fit using selected indexes
try:
X_sel_train = data.X_train[:, selected_indexes]
X_sel_test = data.X_test[:, selected_indexes]
if X_sel_train.ndim == 1:
X_sel_train = X_sel_train.reshape(-1, 1)
if X_sel_test.ndim == 1:
X_sel_test = X_sel_test.reshape(-1, 1)
if X_sel_test.shape[1] != X_sel_train.shape[1]:
raise ValueError(f"Feature mismatch: Train={X_sel_train.shape[1]} vs Test={X_sel_test.shape[1]}")
# Retrain the classifier manually to guarantee consistency
selector.estimator.fit(X_sel_train, data.y_train)
# Predict
y_pred = selector.estimator.predict(X_sel_test)
except Exception as pred_err:
return ("-", "-", "-", [["Metric", "Value"], ["Prediction Error", str(pred_err)]])
# Evaluate performance
results_list = format_results_to_list(data.y_test, y_pred, metrics)
return (
str(selected_indexes),
str(selector.selected_feature_solution),
str(selector.selected_feature_masks),
results_list
)
except Exception as outer_e:
return ("-", "-", "-", [["Metric", "Value"], ["Unhandled Error", str(outer_e)]])
SUPPORTED_ESTIMATORS = ["knn", "svm", "rf", "adaboost", "xgb", "tree", "ann"]
SUPPORTED_TRANSFERS = [
"vstf_01", "vstf_02", "vstf_03", "vstf_04",
"sstf_01", "sstf_02", "sstf_03", "sstf_04", "rtf"
]
SUPPORTED_OPTIMIZERS = list(get_all_optimizers().keys())
with gr.Blocks() as demo:
gr.Markdown("# MetaFSelect: NoCode Metaheuristic Feature Selection")
with gr.Row():
with gr.Column():
csv_input = gr.File(label="📁 Upload CSV (last column = label)")
method = gr.Dropdown(SUPPORTED_OPTIMIZERS, value="OriginalMPA", label="🔍 Optimizer")
validation = gr.Number(value=0.2, label="📊 Validation Ratio")
epoch = gr.Number(value=50, label="⏳ Epochs")
population = gr.Number(value=20, label="👥 Population Size")
classifier = gr.Dropdown(SUPPORTED_ESTIMATORS, value="knn", label="🧪 Classifier")
transfer = gr.Dropdown(SUPPORTED_TRANSFERS, value="vstf_01", label="🔄 Transfer Function")
metrics = gr.CheckboxGroup(choices=ALL_METRICS, value=["AS", "PS", "RS", "F1S"], label="📐 Evaluation Metrics")
btn = gr.Button("🚀 Run Feature Selection")
with gr.Column():
out_idx = gr.Textbox(label="📌 Selected Feature Indexes")
out_sol = gr.Textbox(label="📍 Selected Feature Solution")
out_mask = gr.Textbox(label="🧬 Selected Feature Mask")
out_table = gr.Dataframe(headers=["Metric", "Value"], datatype=["str", "number"], label="📈 Evaluation Results")
btn.click(
fn=run_feature_selection,
inputs=[csv_input, method, validation, epoch, population, classifier, transfer, metrics],
outputs=[out_idx, out_sol, out_mask, out_table]
)
gr.Markdown("---")
gr.Markdown("### 📘 About This App")
gr.Markdown("""
**Created by [Muhammed Kotan](https://github.com/muhammedkotan)**
Sakarya University, Department of Information Systems Engineering
This app demonstrates a **metaheuristic-based feature selection system** using the [mafese](https://github.com/thieu1995/mafese) library, supported by:
- [mealpy](https://github.com/thieu1995/mealpy) – metaheuristic optimizers
- [permetrics](https://github.com/thieu1995/permetrics) – standard evaluation metrics
- [Gradio](https://www.gradio.app/) – fast UI development
- scikit-learn, pandas, numpy – ML and data handling
📂 **Sample Datasets:**
To try out this system, you can use sample datasets available here:
👉 [mafese/data/cls/](https://github.com/thieu1995/mafese/tree/main/mafese/data/cls)
📄 **Citation :**
- M. Kotan and S. B. Abdullahi, "MetaFSelect: No-Code Metaheuristic Feature Selection for Machine Learning," 2025 33rd Telecommunications Forum (TELFOR), Belgrade, Serbia, 2025, pp. 1-4, doi: 10.1109/TELFOR67910.2025.11314278.
📄 **Related research papers :**
- Oğur, N. B., Kotan, M., Balta, D., Yavuz, B. Ç., Oğur, Y. S., Yuvacı, H. U., & Yazıcı, E. (2023). Detection of depression and anxiety in the perinatal period using Marine Predators Algorithm and kNN. Computers in Biology and Medicine, 161, 107003.
- Kotan, M. Seymen O. F. (2023). Makine Öğreniminde Öznitelik Seçme Yöntemlerinin Kullanımı: Güncel Python Uygulamaları, In Book: Mühendislikte İleri ve Çağdaş Çalışmalar 1, Duvar Yayınları
- Kotan, M., Faruk Seymen, Ö., Çallı, L., Kasım, S., Çarklı Yavuz, B., & Över Özçelik, T. (2025). A novel methodological approach to SaaS churn prediction using whale optimization algorithm. PloS one, 20(5), e0319998.
📬 **Questions or Feedback?**
Reach out via [GitHub](https://github.com/muhammedkotan) or [ResearchGate](https://www.researchgate.net/profile/Muhammed-Kotan-2?ev=hdr_xprf)
---
""")
demo.queue().launch()