| """ |
| Iris Flower Classifier - Hugging Face Space |
| ============================================= |
| Descarga el modelo XGBoost desde Kaggle y sirve una interfaz Gradio. |
| Modelo: gustavodelacruztovar/iris-xgboost-feature-engineered |
| Dataset: gustavodelacruztovar/iris-flower-feature-engineered |
| """ |
|
|
| import gradio as gr |
| import joblib |
| import numpy as np |
| import pandas as pd |
| import json |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| import os |
| import zipfile |
| import urllib.request |
| import tempfile |
|
|
| |
| |
| |
| MODEL_DIR = "model_artifacts" |
| DATA_DIR = "data_artifacts" |
|
|
| KAGGLE_MODEL = "gustavodelacruztovar/iris-xgboost-feature-engineered/Other/default/1" |
| KAGGLE_DATASET = "gustavodelacruztovar/iris-flower-feature-engineered" |
|
|
|
|
| def download_from_kaggle(): |
| """Descarga modelo y dataset desde Kaggle usando la API.""" |
| |
| kaggle_dir = os.path.expanduser("~/.kaggle") |
| kaggle_json = os.path.join(kaggle_dir, "kaggle.json") |
| if not os.path.exists(kaggle_json): |
| username = os.environ.get("KAGGLE_USERNAME", "") |
| key = os.environ.get("KAGGLE_KEY", "") |
| if username and key: |
| os.makedirs(kaggle_dir, exist_ok=True) |
| with open(kaggle_json, "w") as f: |
| json.dump({"username": username, "key": key}, f) |
| os.chmod(kaggle_json, 0o600) |
|
|
| from kaggle.api.kaggle_api_extended import KaggleApi |
|
|
| api = KaggleApi() |
| api.authenticate() |
|
|
| |
| if not os.path.exists(os.path.join(MODEL_DIR, "model.joblib")): |
| print("Descargando modelo desde Kaggle...") |
| os.makedirs(MODEL_DIR, exist_ok=True) |
| api.model_instance_version_download( |
| "gustavodelacruztovar/iris-xgboost-feature-engineered/Other/default/1", |
| path=MODEL_DIR, |
| untar=True, |
| ) |
| |
| for f in os.listdir(MODEL_DIR): |
| if f.endswith(".zip"): |
| with zipfile.ZipFile(os.path.join(MODEL_DIR, f), "r") as z: |
| z.extractall(MODEL_DIR) |
| os.remove(os.path.join(MODEL_DIR, f)) |
| print("✓ Modelo descargado") |
|
|
| |
| if not os.path.exists(os.path.join(DATA_DIR, "iris_engineered.csv")): |
| print("Descargando dataset desde Kaggle...") |
| os.makedirs(DATA_DIR, exist_ok=True) |
| api.dataset_download_files(KAGGLE_DATASET, path=DATA_DIR, unzip=True) |
| print("✓ Dataset descargado") |
|
|
|
|
| download_from_kaggle() |
|
|
| |
| |
| |
| model = joblib.load(os.path.join(MODEL_DIR, "model.joblib")) |
| le = joblib.load(os.path.join(MODEL_DIR, "label_encoder.joblib")) |
|
|
| with open(os.path.join(MODEL_DIR, "model_info.json")) as f: |
| model_info = json.load(f) |
|
|
| df = pd.read_csv(os.path.join(DATA_DIR, "iris_engineered.csv")) |
|
|
| |
| |
| |
| FEATURE_RANGES = { |
| "sepal_length": (4.0, 8.0, 5.8), |
| "sepal_width": (2.0, 4.5, 3.0), |
| "petal_length": (1.0, 7.0, 3.8), |
| "petal_width": (0.1, 2.5, 1.2), |
| } |
|
|
| SPECIES_EMOJI = { |
| "Iris-setosa": "🌸", |
| "Iris-versicolor": "🌺", |
| "Iris-virginica": "🌻", |
| } |
|
|
| ENGINEERED_FEATURES = model_info.get("features", []) |
| ORIGINAL_FEATURES = ["sepal_length", "sepal_width", "petal_length", "petal_width"] |
|
|
|
|
| def engineer_features(sepal_length, sepal_width, petal_length, petal_width): |
| """Aplica el mismo feature engineering del entrenamiento.""" |
| row = { |
| "sepal_length": sepal_length, |
| "sepal_width": sepal_width, |
| "petal_length": petal_length, |
| "petal_width": petal_width, |
| "sepal_ratio": sepal_length / sepal_width, |
| "petal_ratio": petal_length / petal_width, |
| "sepal_petal_length_ratio": sepal_length / petal_length, |
| "sepal_petal_width_ratio": sepal_width / petal_width, |
| "sepal_area": sepal_length * sepal_width, |
| "petal_area": petal_length * petal_width, |
| "area_ratio": (sepal_length * sepal_width) / (petal_length * petal_width), |
| "length_diff": sepal_length - petal_length, |
| "width_diff": sepal_width - petal_width, |
| "log_petal_area": np.log1p(petal_length * petal_width), |
| "log_sepal_area": np.log1p(sepal_length * sepal_width), |
| "sepal_perimeter": 2 * (sepal_length + sepal_width), |
| "petal_perimeter": 2 * (petal_length + petal_width), |
| } |
| return np.array([[row[f] for f in ENGINEERED_FEATURES]]) |
|
|
|
|
| def predict(sepal_length, sepal_width, petal_length, petal_width): |
| """Predecir especie de Iris con features engineered.""" |
| features = engineer_features(sepal_length, sepal_width, petal_length, petal_width) |
| proba = model.predict_proba(features)[0] |
| return { |
| f"{SPECIES_EMOJI.get(cls, '')} {cls}": float(p) |
| for cls, p in zip(le.classes_, proba) |
| } |
|
|
|
|
| def create_eda_plot(column, plot_type): |
| """Generar gráfico EDA interactivo.""" |
| fig, ax = plt.subplots(figsize=(10, 6)) |
| if plot_type == "Histograma": |
| for species in df["species"].unique(): |
| subset = df[df["species"] == species] |
| ax.hist(subset[column], alpha=0.6, label=species, bins=15) |
| ax.legend() |
| elif plot_type == "Boxplot": |
| sns.boxplot(x="species", y=column, data=df, ax=ax) |
| elif plot_type == "Violin": |
| sns.violinplot(x="species", y=column, data=df, ax=ax) |
| elif plot_type == "Scatter (vs petal_length)": |
| for species in df["species"].unique(): |
| subset = df[df["species"] == species] |
| ax.scatter(subset[column], subset["petal_length"], alpha=0.7, label=species) |
| ax.set_ylabel("petal_length") |
| ax.legend() |
| ax.set_title(f"{plot_type} de {column}") |
| ax.set_xlabel(column) |
| plt.tight_layout() |
| return fig |
|
|
|
|
| def show_correlation(): |
| """Mostrar matriz de correlación de features originales.""" |
| fig, ax = plt.subplots(figsize=(8, 6)) |
| corr = df[ORIGINAL_FEATURES].corr() |
| mask = np.triu(np.ones_like(corr, dtype=bool)) |
| sns.heatmap(corr, mask=mask, annot=True, fmt=".2f", cmap="coolwarm", center=0, ax=ax) |
| ax.set_title("Matriz de Correlación") |
| plt.tight_layout() |
| return fig |
|
|
|
|
| def show_pairplot(): |
| """Generar pairplot.""" |
| fig = sns.pairplot(df[ORIGINAL_FEATURES + ["species"]], hue="species", diag_kind="kde", height=2.2) |
| return fig.figure |
|
|
|
|
| |
| |
| |
| with gr.Blocks(title="Iris Flower Classifier") as demo: |
| gr.Markdown( |
| """ |
| # 🌺 Iris Flower Classifier |
| Clasificador de flores Iris usando **XGBoost** con **17 features engineered**. |
| |
| Modelo descargado desde [Kaggle Models](https://www.kaggle.com/models/gustavodelacruztovar/iris-xgboost-feature-engineered) |
| | Dataset desde [Kaggle Datasets](https://www.kaggle.com/datasets/gustavodelacruztovar/iris-flower-feature-engineered) |
| """ |
| ) |
|
|
| with gr.Tab("🔮 Predicción"): |
| with gr.Row(): |
| with gr.Column(): |
| sl = gr.Slider(*FEATURE_RANGES["sepal_length"], label="Sepal Length (cm)") |
| sw = gr.Slider(*FEATURE_RANGES["sepal_width"], label="Sepal Width (cm)") |
| pl = gr.Slider(*FEATURE_RANGES["petal_length"], label="Petal Length (cm)") |
| pw = gr.Slider(*FEATURE_RANGES["petal_width"], label="Petal Width (cm)") |
| predict_btn = gr.Button("Clasificar", variant="primary") |
| with gr.Column(): |
| output_label = gr.Label(num_top_classes=3, label="Predicción") |
|
|
| predict_btn.click(predict, inputs=[sl, sw, pl, pw], outputs=output_label) |
|
|
| gr.Examples( |
| examples=[ |
| [5.1, 3.5, 1.4, 0.2], |
| [6.2, 2.9, 4.3, 1.3], |
| [7.7, 3.0, 6.1, 2.3], |
| ], |
| inputs=[sl, sw, pl, pw], |
| label="Ejemplos por especie", |
| ) |
|
|
| with gr.Tab("📊 EDA Interactivo"): |
| with gr.Row(): |
| col_selector = gr.Dropdown( |
| choices=ORIGINAL_FEATURES, value="petal_length", label="Feature" |
| ) |
| plot_type = gr.Dropdown( |
| choices=["Histograma", "Boxplot", "Violin", "Scatter (vs petal_length)"], |
| value="Histograma", |
| label="Tipo de gráfico", |
| ) |
| eda_plot = gr.Plot(label="Visualización") |
| col_selector.change(create_eda_plot, [col_selector, plot_type], eda_plot) |
| plot_type.change(create_eda_plot, [col_selector, plot_type], eda_plot) |
|
|
| with gr.Row(): |
| corr_btn = gr.Button("Matriz de Correlación") |
| pair_btn = gr.Button("Pairplot") |
| extra_plot = gr.Plot(label="Análisis") |
| corr_btn.click(show_correlation, outputs=extra_plot) |
| pair_btn.click(show_pairplot, outputs=extra_plot) |
|
|
| with gr.Tab("📋 Datos"): |
| gr.Markdown("### Dataset Iris con Feature Engineering (150 muestras × 18 columnas)") |
| gr.DataFrame(value=df.head(50), label="Primeras 50 filas") |
| gr.DataFrame(value=df.describe().reset_index(), label="Estadísticas") |
|
|
| with gr.Tab("📈 Métricas del Modelo"): |
| gr.Markdown(f""" |
| ### Rendimiento del modelo XGBoost (17 features engineered) |
| - **Test Accuracy**: {model_info['metrics']['test_accuracy']:.4f} |
| - **CV Accuracy**: {model_info['metrics']['cv_accuracy_mean']:.4f} ± {model_info['metrics']['cv_accuracy_std']:.4f} |
| - **Clases**: {', '.join(model_info['classes'])} |
| - **Features**: {len(ENGINEERED_FEATURES)} ({len(ORIGINAL_FEATURES)} originales + {len(ENGINEERED_FEATURES) - len(ORIGINAL_FEATURES)} engineered) |
| """) |
| gr.JSON(value=model_info, label="Metadata del modelo") |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|