Update app.py
Browse files
app.py
CHANGED
|
@@ -96,10 +96,19 @@ def train_and_save(df: pd.DataFrame):
|
|
| 96 |
X = df[FEATURE_COLS].copy()
|
| 97 |
y_raw = df[LABEL_COL].copy()
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
y01, pos_class = coerce_binary_label(y_raw)
|
| 105 |
|
|
@@ -216,11 +225,13 @@ with tab_predict:
|
|
| 216 |
if infer_file:
|
| 217 |
df_inf = pd.read_excel(infer_file, engine="openpyxl")
|
| 218 |
X_inf = df_inf[FEATURE_COLS].copy()
|
|
|
|
|
|
|
| 219 |
|
| 220 |
for c in NUM_COLS:
|
| 221 |
X_inf[c] = pd.to_numeric(X_inf[c], errors="coerce")
|
| 222 |
for c in CAT_COLS:
|
| 223 |
-
X_inf[c] = X_inf[c].astype("
|
| 224 |
|
| 225 |
pipe = st.session_state.pipe
|
| 226 |
proba = pipe.predict_proba(X_inf)[:, 1]
|
|
|
|
| 96 |
X = df[FEATURE_COLS].copy()
|
| 97 |
y_raw = df[LABEL_COL].copy()
|
| 98 |
|
| 99 |
+
# Replace pandas NA with numpy nan globally (critical for sklearn)
|
| 100 |
+
X = X.replace({pd.NA: np.nan})
|
| 101 |
+
|
| 102 |
+
# Force numeric columns to float with NaNs
|
| 103 |
+
for c in NUM_COLS:
|
| 104 |
+
X[c] = pd.to_numeric(X[c], errors="coerce")
|
| 105 |
+
|
| 106 |
+
# Force categorical columns to plain object strings, but keep missing as np.nan (not pd.NA)
|
| 107 |
+
for c in CAT_COLS:
|
| 108 |
+
X[c] = X[c].astype("object")
|
| 109 |
+
X.loc[X[c].isna(), c] = np.nan
|
| 110 |
+
|
| 111 |
+
|
| 112 |
|
| 113 |
y01, pos_class = coerce_binary_label(y_raw)
|
| 114 |
|
|
|
|
| 225 |
if infer_file:
|
| 226 |
df_inf = pd.read_excel(infer_file, engine="openpyxl")
|
| 227 |
X_inf = df_inf[FEATURE_COLS].copy()
|
| 228 |
+
X_inf = X_inf.replace({pd.NA: np.nan})
|
| 229 |
+
|
| 230 |
|
| 231 |
for c in NUM_COLS:
|
| 232 |
X_inf[c] = pd.to_numeric(X_inf[c], errors="coerce")
|
| 233 |
for c in CAT_COLS:
|
| 234 |
+
X_inf[c] = X_inf[c].astype("object")
|
| 235 |
|
| 236 |
pipe = st.session_state.pipe
|
| 237 |
proba = pipe.predict_proba(X_inf)[:, 1]
|