UCS2014 commited on
Commit
881d940
·
verified ·
1 Parent(s): 5408123

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -13
app.py CHANGED
@@ -199,17 +199,24 @@ def normalize_to_abbr(df: pd.DataFrame) -> pd.DataFrame:
199
  out.columns = newcols
200
  return out
201
 
202
- # ---- Build X in the model's training order & avoid name check ----
203
- def _make_X_for_model(df: pd.DataFrame, model, fallback_features: list[str]) -> np.ndarray:
 
 
 
 
 
 
 
204
  """
205
  Returns a NumPy array with columns ordered exactly as in model training.
206
  Using np.ndarray bypasses sklearn's feature-name validation.
207
  """
208
- df_abbr = normalize_to_abbr(df)
209
  # mapping abbr -> actual column present
210
  colmap = { _abbr(c): c for c in df_abbr.columns }
211
 
212
- train_names = list(getattr(model, "feature_names_in_", fallback_features))
213
  order_cols = []
214
  missing = []
215
  for nm in train_names:
@@ -220,16 +227,21 @@ def _make_X_for_model(df: pd.DataFrame, model, fallback_features: list[str]) ->
220
  missing.append(nm)
221
 
222
  if missing:
223
- st.markdown('<div class="st-message-box st-error">Missing required columns for prediction (by model training): '
224
- + ", ".join(missing) + '</div>', unsafe_allow_html=True)
 
 
 
225
  st.stop()
226
 
227
- X = df_abbr[order_cols].apply(pd.to_numeric, errors="coerce")
228
- return X.to_numpy()
 
 
229
 
230
  def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[str]) -> bool:
231
  df_abbr = normalize_to_abbr(df)
232
- need = [_abbr(nm) for nm in list(getattr(model, "feature_names_in_", fallback_features))]
233
  have = {_abbr(c) for c in df_abbr.columns}
234
  miss = [n for n in need if n not in have]
235
  if miss:
@@ -237,6 +249,19 @@ def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[st
237
  return False
238
  return True
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  def find_sheet(book, names):
241
  low2orig = {k.lower(): k for k in book.keys()}
242
  for nm in names:
@@ -557,8 +582,9 @@ if st.session_state.app_step == "dev":
557
  tr = normalize_to_abbr(tr_raw)
558
  te = normalize_to_abbr(te_raw)
559
 
560
- tr[PRED_COL] = model.predict(_make_X_for_model(tr_raw, model, FEATURES))
561
- te[PRED_COL] = model.predict(_make_X_for_model(te_raw, model, FEATURES))
 
562
 
563
  st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
564
  st.session_state.results["m_train"]={
@@ -757,7 +783,7 @@ if st.session_state.app_step == "validate":
757
  st.stop()
758
 
759
  df = normalize_to_abbr(df_raw)
760
- df[PRED_COL] = model.predict(_make_X_for_model(df_raw, model, FEATURES))
761
  st.session_state.results["Validate"]=df
762
 
763
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
@@ -869,7 +895,7 @@ if st.session_state.app_step == "predict":
869
  st.stop()
870
 
871
  df = normalize_to_abbr(df_raw)
872
- df[PRED_COL] = model.predict(_make_X_for_model(df_raw, model, FEATURES))
873
  st.session_state.results["PredictOnly"]=df
874
 
875
  ranges = st.session_state.train_ranges; oor_pct = 0.0
 
199
  out.columns = newcols
200
  return out
201
 
202
+ # ---- Model feature order + X builder (returns NumPy to bypass name checks) ----
203
+ def _training_feature_order(model, fallback_features: list[str]) -> list[str]:
204
+ """Return the model's training feature order if available, else fallback."""
205
+ names = list(getattr(model, "feature_names_in_", []))
206
+ if names:
207
+ return [str(n) for n in names]
208
+ return list(fallback_features)
209
+
210
+ def _make_X(df_raw: pd.DataFrame, model, fallback_features: list[str]) -> np.ndarray:
211
  """
212
  Returns a NumPy array with columns ordered exactly as in model training.
213
  Using np.ndarray bypasses sklearn's feature-name validation.
214
  """
215
+ df_abbr = normalize_to_abbr(df_raw)
216
  # mapping abbr -> actual column present
217
  colmap = { _abbr(c): c for c in df_abbr.columns }
218
 
219
+ train_names = _training_feature_order(model, fallback_features)
220
  order_cols = []
221
  missing = []
222
  for nm in train_names:
 
227
  missing.append(nm)
228
 
229
  if missing:
230
+ st.markdown(
231
+ '<div class="st-message-box st-error">Missing required columns for prediction (by model training): '
232
+ + ", ".join(missing) + '</div>',
233
+ unsafe_allow_html=True
234
+ )
235
  st.stop()
236
 
237
+ X_df = df_abbr[order_cols].apply(pd.to_numeric, errors="coerce")
238
+ X_np = X_df.to_numpy(dtype=float, copy=False)
239
+ # Safety: ensure plain ndarray (no pandas attrs)
240
+ return np.asarray(X_np, dtype=float)
241
 
242
  def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[str]) -> bool:
243
  df_abbr = normalize_to_abbr(df)
244
+ need = [_abbr(nm) for nm in _training_feature_order(model, fallback_features)]
245
  have = {_abbr(c) for c in df_abbr.columns}
246
  miss = [n for n in need if n not in have]
247
  if miss:
 
249
  return False
250
  return True
251
 
252
+ def safe_predict(model, df_raw: pd.DataFrame, fallback_features: list[str]) -> np.ndarray:
253
+ """
254
+ Centralized, name-check-proof prediction:
255
+ - Builds X in training order
256
+ - Converts to NumPy (bypasses sklearn feature-name validation)
257
+ """
258
+ X = _make_X(df_raw, model, fallback_features)
259
+ try:
260
+ return model.predict(X)
261
+ except Exception as e:
262
+ # As a last resort, try basic float casting
263
+ return model.predict(np.asarray(X, dtype=float))
264
+
265
  def find_sheet(book, names):
266
  low2orig = {k.lower(): k for k in book.keys()}
267
  for nm in names:
 
582
  tr = normalize_to_abbr(tr_raw)
583
  te = normalize_to_abbr(te_raw)
584
 
585
+ # ---- SAFE PREDICT (NumPy only) ----
586
+ tr[PRED_COL] = safe_predict(model, tr_raw, FEATURES)
587
+ te[PRED_COL] = safe_predict(model, te_raw, FEATURES)
588
 
589
  st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
590
  st.session_state.results["m_train"]={
 
783
  st.stop()
784
 
785
  df = normalize_to_abbr(df_raw)
786
+ df[PRED_COL] = safe_predict(model, df_raw, FEATURES)
787
  st.session_state.results["Validate"]=df
788
 
789
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
 
895
  st.stop()
896
 
897
  df = normalize_to_abbr(df_raw)
898
+ df[PRED_COL] = safe_predict(model, df_raw, FEATURES)
899
  st.session_state.results["PredictOnly"]=df
900
 
901
  ranges = st.session_state.train_ranges; oor_pct = 0.0