Synav commited on
Commit
b330c61
·
verified ·
1 Parent(s): 7d7e2cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -57
app.py CHANGED
@@ -26,7 +26,7 @@ from sklearn.preprocessing import OneHotEncoder, StandardScaler
26
  from sklearn.impute import SimpleImputer
27
  from sklearn.linear_model import LogisticRegression
28
  from sklearn.model_selection import train_test_split
29
- from sklearn.metrics import roc_auc_score, accuracy_score
30
 
31
 
32
  # ============================================================
@@ -35,7 +35,7 @@ from sklearn.metrics import roc_auc_score, accuracy_score
35
  LABEL_COL = "AA"
36
  N_FEATURES = 26
37
  N_NUM = 13 # first 13 numeric, last 13 categorical
38
- LABEL_COL = "AA"
39
 
40
  def get_feature_cols_from_df(df: pd.DataFrame):
41
  """
@@ -103,33 +103,39 @@ def coerce_binary_label(y: pd.Series):
103
  pos = uniq_str[-1]
104
  return y.astype(str).eq(pos).astype(int).to_numpy(), pos
105
 
106
- def infer_schema_from_df(df: pd.DataFrame):
107
- """
108
- Uses the Excel header row (df.columns) as variable names.
109
- Assumptions:
110
- - First 26 columns are features (in order)
111
- - Column 'AA' is the binary label and must exist
112
- - Numeric = first 13 features; Categorical = remaining 13
113
- """
114
- if LABEL_COL not in df.columns:
115
- raise ValueError("Missing required label column 'AA'.")
116
-
117
- # Keep original column order, exclude AA
118
- feature_cols_all = [c for c in df.columns if c != LABEL_COL]
119
 
120
- if len(feature_cols_all) < N_FEATURES:
121
- raise ValueError(f"Need at least {N_FEATURES} feature columns (excluding AA). Found {len(feature_cols_all)}.")
122
-
123
- feature_cols = feature_cols_all[:N_FEATURES]
124
- num_cols = feature_cols[:N_NUM]
125
- cat_cols = feature_cols[N_NUM:]
126
-
127
- return feature_cols, num_cols, cat_cols
128
 
129
 
130
  # ============================================================
131
  # Training + persistence
132
  # ============================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  def train_and_save(df: pd.DataFrame, feature_cols, num_cols, cat_cols):
134
  X = df[feature_cols].copy()
135
  y_raw = df[LABEL_COL].copy()
@@ -211,32 +217,9 @@ def train_and_save(df: pd.DataFrame, feature_cols, num_cols, cat_cols):
211
  with open("meta.json", "w", encoding="utf-8") as f:
212
  json.dump(meta, f, indent=2)
213
 
214
- return pipe, meta, X
215
 
216
- def compute_classification_metrics(y_true, y_proba, threshold: float = 0.5):
217
- y_pred = (y_proba >= threshold).astype(int)
218
 
219
- tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
220
-
221
- sensitivity = tp / (tp + fn) if (tp + fn) else 0.0 # recall, TPR
222
- specificity = tn / (tn + fp) if (tn + fp) else 0.0 # TNR
223
- precision = precision_score(y_true, y_pred, zero_division=0)
224
- recall = recall_score(y_true, y_pred, zero_division=0)
225
- f1 = f1_score(y_true, y_pred, zero_division=0)
226
- acc = accuracy_score(y_true, y_pred)
227
- bacc = balanced_accuracy_score(y_true, y_pred)
228
-
229
- return {
230
- "threshold": float(threshold),
231
- "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
232
- "sensitivity": float(sensitivity),
233
- "specificity": float(specificity),
234
- "precision": float(precision),
235
- "recall": float(recall),
236
- "f1": float(f1),
237
- "accuracy": float(acc),
238
- "balanced_accuracy": float(bacc),
239
- }
240
 
241
 
242
 
@@ -446,7 +429,7 @@ with tab_train:
446
 
447
  if st.button("Train model"):
448
  with st.spinner("Training model..."):
449
- pipe, meta, X_bg = train_and_save(df, feature_cols, num_cols, cat_cols)
450
  explainer = build_shap_explainer(pipe, X_bg)
451
 
452
  st.session_state.pipe = pipe
@@ -533,15 +516,15 @@ with tab_train:
533
  help="Used as releases/<version>/ in the model repository",
534
  )
535
 
536
- if st.button("Publish model.joblib + meta.json to Model Repo"):
537
- try:
538
- with st.spinner("Uploading to Hugging Face Model repo..."):
539
- paths = publish_to_hub(MODEL_REPO_ID, version_tag)
540
-
541
- st.success("Uploaded successfully to your model repository.")
542
- st.json(paths)
543
- except Exception as e:
544
- st.error(f"Upload failed: {e}")
545
 
546
 
547
  # ---------------- PREDICT ----------------
 
26
  from sklearn.impute import SimpleImputer
27
  from sklearn.linear_model import LogisticRegression
28
  from sklearn.model_selection import train_test_split
29
+
30
 
31
 
32
  # ============================================================
 
35
  LABEL_COL = "AA"
36
  N_FEATURES = 26
37
  N_NUM = 13 # first 13 numeric, last 13 categorical
38
+
39
 
40
  def get_feature_cols_from_df(df: pd.DataFrame):
41
  """
 
103
  pos = uniq_str[-1]
104
  return y.astype(str).eq(pos).astype(int).to_numpy(), pos
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
 
 
 
 
 
 
 
 
107
 
108
 
109
  # ============================================================
110
  # Training + persistence
111
  # ============================================================
112
+
113
+ def compute_classification_metrics(y_true, y_proba, threshold: float = 0.5):
114
+ y_pred = (y_proba >= threshold).astype(int)
115
+
116
+ tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
117
+
118
+ sensitivity = tp / (tp + fn) if (tp + fn) else 0.0 # recall, TPR
119
+ specificity = tn / (tn + fp) if (tn + fp) else 0.0 # TNR
120
+ precision = precision_score(y_true, y_pred, zero_division=0)
121
+ recall = recall_score(y_true, y_pred, zero_division=0)
122
+ f1 = f1_score(y_true, y_pred, zero_division=0)
123
+ acc = accuracy_score(y_true, y_pred)
124
+ bacc = balanced_accuracy_score(y_true, y_pred)
125
+
126
+ return {
127
+ "threshold": float(threshold),
128
+ "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
129
+ "sensitivity": float(sensitivity),
130
+ "specificity": float(specificity),
131
+ "precision": float(precision),
132
+ "recall": float(recall),
133
+ "f1": float(f1),
134
+ "accuracy": float(acc),
135
+ "balanced_accuracy": float(bacc),
136
+ }
137
+
138
+
139
  def train_and_save(df: pd.DataFrame, feature_cols, num_cols, cat_cols):
140
  X = df[feature_cols].copy()
141
  y_raw = df[LABEL_COL].copy()
 
217
  with open("meta.json", "w", encoding="utf-8") as f:
218
  json.dump(meta, f, indent=2)
219
 
220
+ return pipe, meta, X, y_test, proba
221
 
 
 
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
 
225
 
 
429
 
430
  if st.button("Train model"):
431
  with st.spinner("Training model..."):
432
+ pipe, meta, X_bg, y_test, proba = train_and_save(df, feature_cols, num_cols, cat_cols)
433
  explainer = build_shap_explainer(pipe, X_bg)
434
 
435
  st.session_state.pipe = pipe
 
516
  help="Used as releases/<version>/ in the model repository",
517
  )
518
 
519
+ if st.button("Publish model.joblib + meta.json to Model Repo"):
520
+ try:
521
+ with st.spinner("Uploading to Hugging Face Model repo..."):
522
+ paths = publish_to_hub(MODEL_REPO_ID, version_tag)
523
+
524
+ st.success("Uploaded successfully to your model repository.")
525
+ st.json(paths)
526
+ except Exception as e:
527
+ st.error(f"Upload failed: {e}")
528
 
529
 
530
  # ---------------- PREDICT ----------------