Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -199,17 +199,24 @@ def normalize_to_abbr(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 199 |
out.columns = newcols
|
| 200 |
return out
|
| 201 |
|
| 202 |
-
# ----
|
| 203 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
"""
|
| 205 |
Returns a NumPy array with columns ordered exactly as in model training.
|
| 206 |
Using np.ndarray bypasses sklearn's feature-name validation.
|
| 207 |
"""
|
| 208 |
-
df_abbr = normalize_to_abbr(
|
| 209 |
# mapping abbr -> actual column present
|
| 210 |
colmap = { _abbr(c): c for c in df_abbr.columns }
|
| 211 |
|
| 212 |
-
train_names =
|
| 213 |
order_cols = []
|
| 214 |
missing = []
|
| 215 |
for nm in train_names:
|
|
@@ -220,16 +227,21 @@ def _make_X_for_model(df: pd.DataFrame, model, fallback_features: list[str]) ->
|
|
| 220 |
missing.append(nm)
|
| 221 |
|
| 222 |
if missing:
|
| 223 |
-
st.markdown(
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
| 225 |
st.stop()
|
| 226 |
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
| 229 |
|
| 230 |
def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[str]) -> bool:
|
| 231 |
df_abbr = normalize_to_abbr(df)
|
| 232 |
-
need = [_abbr(nm) for nm in
|
| 233 |
have = {_abbr(c) for c in df_abbr.columns}
|
| 234 |
miss = [n for n in need if n not in have]
|
| 235 |
if miss:
|
|
@@ -237,6 +249,19 @@ def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[st
|
|
| 237 |
return False
|
| 238 |
return True
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
def find_sheet(book, names):
|
| 241 |
low2orig = {k.lower(): k for k in book.keys()}
|
| 242 |
for nm in names:
|
|
@@ -557,8 +582,9 @@ if st.session_state.app_step == "dev":
|
|
| 557 |
tr = normalize_to_abbr(tr_raw)
|
| 558 |
te = normalize_to_abbr(te_raw)
|
| 559 |
|
| 560 |
-
|
| 561 |
-
|
|
|
|
| 562 |
|
| 563 |
st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
|
| 564 |
st.session_state.results["m_train"]={
|
|
@@ -757,7 +783,7 @@ if st.session_state.app_step == "validate":
|
|
| 757 |
st.stop()
|
| 758 |
|
| 759 |
df = normalize_to_abbr(df_raw)
|
| 760 |
-
df[PRED_COL] =
|
| 761 |
st.session_state.results["Validate"]=df
|
| 762 |
|
| 763 |
ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
|
|
@@ -869,7 +895,7 @@ if st.session_state.app_step == "predict":
|
|
| 869 |
st.stop()
|
| 870 |
|
| 871 |
df = normalize_to_abbr(df_raw)
|
| 872 |
-
df[PRED_COL] =
|
| 873 |
st.session_state.results["PredictOnly"]=df
|
| 874 |
|
| 875 |
ranges = st.session_state.train_ranges; oor_pct = 0.0
|
|
|
|
| 199 |
out.columns = newcols
|
| 200 |
return out
|
| 201 |
|
| 202 |
+
# ---- Model feature order + X builder (returns NumPy to bypass name checks) ----
|
| 203 |
+
def _training_feature_order(model, fallback_features: list[str]) -> list[str]:
|
| 204 |
+
"""Return the model's training feature order if available, else fallback."""
|
| 205 |
+
names = list(getattr(model, "feature_names_in_", []))
|
| 206 |
+
if names:
|
| 207 |
+
return [str(n) for n in names]
|
| 208 |
+
return list(fallback_features)
|
| 209 |
+
|
| 210 |
+
def _make_X(df_raw: pd.DataFrame, model, fallback_features: list[str]) -> np.ndarray:
|
| 211 |
"""
|
| 212 |
Returns a NumPy array with columns ordered exactly as in model training.
|
| 213 |
Using np.ndarray bypasses sklearn's feature-name validation.
|
| 214 |
"""
|
| 215 |
+
df_abbr = normalize_to_abbr(df_raw)
|
| 216 |
# mapping abbr -> actual column present
|
| 217 |
colmap = { _abbr(c): c for c in df_abbr.columns }
|
| 218 |
|
| 219 |
+
train_names = _training_feature_order(model, fallback_features)
|
| 220 |
order_cols = []
|
| 221 |
missing = []
|
| 222 |
for nm in train_names:
|
|
|
|
| 227 |
missing.append(nm)
|
| 228 |
|
| 229 |
if missing:
|
| 230 |
+
st.markdown(
|
| 231 |
+
'<div class="st-message-box st-error">Missing required columns for prediction (by model training): '
|
| 232 |
+
+ ", ".join(missing) + '</div>',
|
| 233 |
+
unsafe_allow_html=True
|
| 234 |
+
)
|
| 235 |
st.stop()
|
| 236 |
|
| 237 |
+
X_df = df_abbr[order_cols].apply(pd.to_numeric, errors="coerce")
|
| 238 |
+
X_np = X_df.to_numpy(dtype=float, copy=False)
|
| 239 |
+
# Safety: ensure plain ndarray (no pandas attrs)
|
| 240 |
+
return np.asarray(X_np, dtype=float)
|
| 241 |
|
| 242 |
def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[str]) -> bool:
|
| 243 |
df_abbr = normalize_to_abbr(df)
|
| 244 |
+
need = [_abbr(nm) for nm in _training_feature_order(model, fallback_features)]
|
| 245 |
have = {_abbr(c) for c in df_abbr.columns}
|
| 246 |
miss = [n for n in need if n not in have]
|
| 247 |
if miss:
|
|
|
|
| 249 |
return False
|
| 250 |
return True
|
| 251 |
|
| 252 |
+
def safe_predict(model, df_raw: pd.DataFrame, fallback_features: list[str]) -> np.ndarray:
|
| 253 |
+
"""
|
| 254 |
+
Centralized, name-check-proof prediction:
|
| 255 |
+
- Builds X in training order
|
| 256 |
+
- Converts to NumPy (bypasses sklearn feature-name validation)
|
| 257 |
+
"""
|
| 258 |
+
X = _make_X(df_raw, model, fallback_features)
|
| 259 |
+
try:
|
| 260 |
+
return model.predict(X)
|
| 261 |
+
except Exception as e:
|
| 262 |
+
# As a last resort, try basic float casting
|
| 263 |
+
return model.predict(np.asarray(X, dtype=float))
|
| 264 |
+
|
| 265 |
def find_sheet(book, names):
|
| 266 |
low2orig = {k.lower(): k for k in book.keys()}
|
| 267 |
for nm in names:
|
|
|
|
| 582 |
tr = normalize_to_abbr(tr_raw)
|
| 583 |
te = normalize_to_abbr(te_raw)
|
| 584 |
|
| 585 |
+
# ---- SAFE PREDICT (NumPy only) ----
|
| 586 |
+
tr[PRED_COL] = safe_predict(model, tr_raw, FEATURES)
|
| 587 |
+
te[PRED_COL] = safe_predict(model, te_raw, FEATURES)
|
| 588 |
|
| 589 |
st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
|
| 590 |
st.session_state.results["m_train"]={
|
|
|
|
| 783 |
st.stop()
|
| 784 |
|
| 785 |
df = normalize_to_abbr(df_raw)
|
| 786 |
+
df[PRED_COL] = safe_predict(model, df_raw, FEATURES)
|
| 787 |
st.session_state.results["Validate"]=df
|
| 788 |
|
| 789 |
ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
|
|
|
|
| 895 |
st.stop()
|
| 896 |
|
| 897 |
df = normalize_to_abbr(df_raw)
|
| 898 |
+
df[PRED_COL] = safe_predict(model, df_raw, FEATURES)
|
| 899 |
st.session_state.results["PredictOnly"]=df
|
| 900 |
|
| 901 |
ranges = st.session_state.train_ranges; oor_pct = 0.0
|