BananaCircuit commited on
Commit
a6632ff
·
verified ·
1 Parent(s): 757fe25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -18
app.py CHANGED
@@ -1,45 +1,102 @@
 
1
  import gradio as gr
2
  import pandas as pd
3
  from datasets import load_dataset
 
4
  from sklearn.model_selection import train_test_split
5
  from sklearn.pipeline import make_pipeline
6
  from sklearn.preprocessing import StandardScaler
7
  from sklearn.linear_model import LogisticRegression
8
  from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def run_iris(seed: int = 42, test_size: float = 0.2, C: float = 1.0) -> str:
11
  """
12
- Train and evaluate a baseline classifier on the Hugging Face IRIS dataset.
13
 
14
  Args:
15
  seed: Random seed for train/test split.
16
- test_size: Fraction of samples used for test set.
17
  C: Inverse regularization strength for LogisticRegression.
18
 
19
  Returns:
20
- A text report including accuracy, classification report, and confusion matrix.
 
21
  """
 
22
  ds = load_dataset("scikit-learn/iris")
23
  df = ds["train"].to_pandas()
24
 
25
- feature_cols = [c for c in df.columns if c != "label"]
 
 
 
 
 
 
 
26
  X = df[feature_cols]
27
- y = df["label"]
 
 
 
 
28
 
 
29
  X_train, X_test, y_train, y_test = train_test_split(
30
- X, y, test_size=test_size, random_state=seed, stratify=y
 
 
 
 
 
 
 
 
 
 
31
  )
32
 
33
- model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, C=C))
34
  model.fit(X_train, y_train)
35
  pred = model.predict(X_test)
36
 
 
37
  acc = accuracy_score(y_test, pred)
38
  report = classification_report(y_test, pred, digits=4)
39
  cm = confusion_matrix(y_test, pred)
40
 
 
41
  cm_df = pd.DataFrame(cm)
42
- return f"Accuracy: {acc:.4f}\n\n{report}\n\nConfusion matrix:\n{cm_df}"
 
 
 
 
 
 
 
 
 
43
 
44
  demo = gr.Interface(
45
  fn=run_iris,
@@ -48,21 +105,16 @@ demo = gr.Interface(
48
  gr.Slider(0.1, 0.5, value=0.2, step=0.05, label="test_size"),
49
  gr.Slider(0.1, 10.0, value=1.0, step=0.1, label="LogReg C"),
50
  ],
51
- outputs=gr.Textbox(label="Result", lines=16),
52
- title="IRIS: Train & Evaluate",
53
  )
54
 
55
- if __name__ == "__main__":
56
- demo.launch(mcp_server=True)
57
-
58
- PORT = int(os.environ.get("PORT", 7860))
59
-
60
- import os
61
 
62
  demo.launch(
63
  mcp_server=True,
64
  show_error=True,
65
  server_name="0.0.0.0",
66
- server_port=int(os.environ.get("PORT", 7860)),
67
- ssr_mode=False
68
  )
 
1
+ import os
2
  import gradio as gr
3
  import pandas as pd
4
  from datasets import load_dataset
5
+
6
  from sklearn.model_selection import train_test_split
7
  from sklearn.pipeline import make_pipeline
8
  from sklearn.preprocessing import StandardScaler
9
  from sklearn.linear_model import LogisticRegression
10
  from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
11
 
12
+
13
+ def _pick_label_column(df: pd.DataFrame) -> str:
14
+ """
15
+ Pick the label/target column robustly across Iris dataset variants.
16
+ Common names include: label, target, species, variety, class
17
+ """
18
+ candidates = ["label", "target", "species", "variety", "class"]
19
+ for c in candidates:
20
+ if c in df.columns:
21
+ return c
22
+
23
+ # Heuristic fallback:
24
+ # 1) If any non-numeric column exists, treat the first one as label
25
+ non_numeric = [c for c in df.columns if not pd.api.types.is_numeric_dtype(df[c])]
26
+ if non_numeric:
27
+ return non_numeric[0]
28
+
29
+ # 2) Otherwise, use the last column as label
30
+ return df.columns[-1]
31
+
32
+
33
  def run_iris(seed: int = 42, test_size: float = 0.2, C: float = 1.0) -> str:
34
  """
35
+ Train and evaluate a baseline Logistic Regression classifier on the Hugging Face IRIS dataset.
36
 
37
  Args:
38
  seed: Random seed for train/test split.
39
+ test_size: Fraction of samples to use as test set (0.1 ~ 0.5 recommended).
40
  C: Inverse regularization strength for LogisticRegression.
41
 
42
  Returns:
43
+ A text report including chosen label column, dataset columns, accuracy,
44
+ classification report, and confusion matrix.
45
  """
46
+ # Load dataset
47
  ds = load_dataset("scikit-learn/iris")
48
  df = ds["train"].to_pandas()
49
 
50
+ # Pick label column robustly
51
+ label_col = _pick_label_column(df)
52
+
53
+ # Build X/y
54
+ feature_cols = [c for c in df.columns if c != label_col]
55
+ if not feature_cols:
56
+ raise ValueError(f"No feature columns found. Columns={list(df.columns)} label_col={label_col}")
57
+
58
  X = df[feature_cols]
59
+ y = df[label_col]
60
+
61
+ # If labels are strings, encode to integers
62
+ if not pd.api.types.is_numeric_dtype(y):
63
+ y = pd.factorize(y)[0]
64
 
65
+ # Split
66
  X_train, X_test, y_train, y_test = train_test_split(
67
+ X,
68
+ y,
69
+ test_size=float(test_size),
70
+ random_state=int(seed),
71
+ stratify=y,
72
+ )
73
+
74
+ # Model
75
+ model = make_pipeline(
76
+ StandardScaler(),
77
+ LogisticRegression(max_iter=1000, C=float(C)),
78
  )
79
 
 
80
  model.fit(X_train, y_train)
81
  pred = model.predict(X_test)
82
 
83
+ # Metrics
84
  acc = accuracy_score(y_test, pred)
85
  report = classification_report(y_test, pred, digits=4)
86
  cm = confusion_matrix(y_test, pred)
87
 
88
+ # Render confusion matrix nicely
89
  cm_df = pd.DataFrame(cm)
90
+
91
+ return (
92
+ f"Using label_col: {label_col}\n"
93
+ f"Columns: {list(df.columns)}\n"
94
+ f"Features: {feature_cols}\n\n"
95
+ f"Accuracy: {acc:.4f}\n\n"
96
+ f"Classification report:\n{report}\n\n"
97
+ f"Confusion matrix:\n{cm_df}\n"
98
+ )
99
+
100
 
101
  demo = gr.Interface(
102
  fn=run_iris,
 
105
  gr.Slider(0.1, 0.5, value=0.2, step=0.05, label="test_size"),
106
  gr.Slider(0.1, 10.0, value=1.0, step=0.1, label="LogReg C"),
107
  ],
108
+ outputs=gr.Textbox(label="Result", lines=18),
109
+ title="IRIS: Train & Evaluate (MCP-enabled)",
110
  )
111
 
112
+ PORT = int(os.environ.get("PORT", "7860"))
 
 
 
 
 
113
 
114
  demo.launch(
115
  mcp_server=True,
116
  show_error=True,
117
  server_name="0.0.0.0",
118
+ server_port=PORT,
119
+ ssr_mode=False,
120
  )