Spaces:

BananaCircuit
/

IRIS

Sleeping

App Files Files Community

BananaCircuit commited on Mar 5

Commit

a6632ff

verified ·

1 Parent(s): 757fe25

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -18

app.py CHANGED Viewed

@@ -1,45 +1,102 @@
 import gradio as gr
 import pandas as pd
 from datasets import load_dataset
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 def run_iris(seed: int = 42, test_size: float = 0.2, C: float = 1.0) -> str:
     """
-    Train and evaluate a baseline classifier on the Hugging Face IRIS dataset.
     Args:
         seed: Random seed for train/test split.
-        test_size: Fraction of samples used for test set.
         C: Inverse regularization strength for LogisticRegression.
     Returns:
-        A text report including accuracy, classification report, and confusion matrix.
     """
     ds = load_dataset("scikit-learn/iris")
     df = ds["train"].to_pandas()
-    feature_cols = [c for c in df.columns if c != "label"]
     X = df[feature_cols]
-    y = df["label"]
     X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=test_size, random_state=seed, stratify=y
     )
-    model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, C=C))
     model.fit(X_train, y_train)
     pred = model.predict(X_test)
     acc = accuracy_score(y_test, pred)
     report = classification_report(y_test, pred, digits=4)
     cm = confusion_matrix(y_test, pred)
     cm_df = pd.DataFrame(cm)
-    return f"Accuracy: {acc:.4f}\n\n{report}\n\nConfusion matrix:\n{cm_df}"
 demo = gr.Interface(
     fn=run_iris,
@@ -48,21 +105,16 @@ demo = gr.Interface(
         gr.Slider(0.1, 0.5, value=0.2, step=0.05, label="test_size"),
         gr.Slider(0.1, 10.0, value=1.0, step=0.1, label="LogReg C"),
     ],
-    outputs=gr.Textbox(label="Result", lines=16),
-    title="IRIS: Train & Evaluate",
 )
-if __name__ == "__main__":
-    demo.launch(mcp_server=True)
-    PORT = int(os.environ.get("PORT", 7860))
-import os
 demo.launch(
     mcp_server=True,
     show_error=True,
     server_name="0.0.0.0",
-    server_port=int(os.environ.get("PORT", 7860)),
-    ssr_mode=False
 )

+import os
 import gradio as gr
 import pandas as pd
 from datasets import load_dataset
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+def _pick_label_column(df: pd.DataFrame) -> str:
+    """
+    Pick the label/target column robustly across Iris dataset variants.
+    Common names include: label, target, species, variety, class
+    """
+    candidates = ["label", "target", "species", "variety", "class"]
+    for c in candidates:
+        if c in df.columns:
+            return c
+    # Heuristic fallback:
+    # 1) If any non-numeric column exists, treat the first one as label
+    non_numeric = [c for c in df.columns if not pd.api.types.is_numeric_dtype(df[c])]
+    if non_numeric:
+        return non_numeric[0]
+    # 2) Otherwise, use the last column as label
+    return df.columns[-1]
 def run_iris(seed: int = 42, test_size: float = 0.2, C: float = 1.0) -> str:
     """
+    Train and evaluate a baseline Logistic Regression classifier on the Hugging Face IRIS dataset.
     Args:
         seed: Random seed for train/test split.
+        test_size: Fraction of samples to use as test set (0.1 ~ 0.5 recommended).
         C: Inverse regularization strength for LogisticRegression.
     Returns:
+        A text report including chosen label column, dataset columns, accuracy,
+        classification report, and confusion matrix.
     """
+    # Load dataset
     ds = load_dataset("scikit-learn/iris")
     df = ds["train"].to_pandas()
+    # Pick label column robustly
+    label_col = _pick_label_column(df)
+    # Build X/y
+    feature_cols = [c for c in df.columns if c != label_col]
+    if not feature_cols:
+        raise ValueError(f"No feature columns found. Columns={list(df.columns)} label_col={label_col}")
     X = df[feature_cols]
+    y = df[label_col]
+    # If labels are strings, encode to integers
+    if not pd.api.types.is_numeric_dtype(y):
+        y = pd.factorize(y)[0]
+    # Split
     X_train, X_test, y_train, y_test = train_test_split(
+        X,
+        y,
+        test_size=float(test_size),
+        random_state=int(seed),
+        stratify=y,
+    )
+    # Model
+    model = make_pipeline(
+        StandardScaler(),
+        LogisticRegression(max_iter=1000, C=float(C)),
     )
     model.fit(X_train, y_train)
     pred = model.predict(X_test)
+    # Metrics
     acc = accuracy_score(y_test, pred)
     report = classification_report(y_test, pred, digits=4)
     cm = confusion_matrix(y_test, pred)
+    # Render confusion matrix nicely
     cm_df = pd.DataFrame(cm)
+    return (
+        f"Using label_col: {label_col}\n"
+        f"Columns: {list(df.columns)}\n"
+        f"Features: {feature_cols}\n\n"
+        f"Accuracy: {acc:.4f}\n\n"
+        f"Classification report:\n{report}\n\n"
+        f"Confusion matrix:\n{cm_df}\n"
+    )
 demo = gr.Interface(
     fn=run_iris,
         gr.Slider(0.1, 0.5, value=0.2, step=0.05, label="test_size"),
         gr.Slider(0.1, 10.0, value=1.0, step=0.1, label="LogReg C"),
     ],
+    outputs=gr.Textbox(label="Result", lines=18),
+    title="IRIS: Train & Evaluate (MCP-enabled)",
 )
+PORT = int(os.environ.get("PORT", "7860"))
 demo.launch(
     mcp_server=True,
     show_error=True,
     server_name="0.0.0.0",
+    server_port=PORT,
+    ssr_mode=False,
 )