singhn9 commited on
Commit
1cb0b5f
verified
1 Parent(s): 1e28a2d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +7 -2
src/streamlit_app.py CHANGED
@@ -471,6 +471,9 @@ with tabs[4]:
471
 
472
  features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
473
  st.markdown(f"Auto target: `{target}` 路 Suggested family hint: `{model_hint}`")
 
 
 
474
 
475
  # ---------- SAFE target & X preparation ----------
476
  # Ensure target is a single column name (string). If it's a list, pick the first and warn.
@@ -499,7 +502,8 @@ with tabs[4]:
499
  st.error(f"Multi-output target detected (shape {y.shape}). Select a single target column.")
500
  st.stop()
501
 
502
- y = np.ravel(y)
 
503
 
504
  # Drop known leak or identifier columns
505
  leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"]
@@ -642,7 +646,8 @@ with tabs[4]:
642
  oof = np.zeros(X_sel.shape[0])
643
  for tr_idx, val_idx in kf.split(X_sel):
644
  X_tr, X_val = X_sel.iloc[tr_idx], X_sel.iloc[val_idx]
645
- y_tr = y.iloc[tr_idx]
 
646
  try:
647
  model_obj.fit(X_tr, y_tr)
648
  preds = model_obj.predict(X_val)
 
471
 
472
  features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
473
  st.markdown(f"Auto target: `{target}` 路 Suggested family hint: `{model_hint}`")
474
+ # --- Sampling configuration ---
475
+ max_rows = min(df.shape[0], 20000)
476
+ sample_size = st.slider("Sample rows", 500, max_rows, min(1500, max_rows), step=100)
477
 
478
  # ---------- SAFE target & X preparation ----------
479
  # Ensure target is a single column name (string). If it's a list, pick the first and warn.
 
502
  st.error(f"Multi-output target detected (shape {y.shape}). Select a single target column.")
503
  st.stop()
504
 
505
+ y = pd.Series(np.ravel(y), name=target)
506
+
507
 
508
  # Drop known leak or identifier columns
509
  leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"]
 
646
  oof = np.zeros(X_sel.shape[0])
647
  for tr_idx, val_idx in kf.split(X_sel):
648
  X_tr, X_val = X_sel.iloc[tr_idx], X_sel.iloc[val_idx]
649
+ y_tr = y[tr_idx] if not hasattr(y, "iloc") else y.iloc[tr_idx]
650
+
651
  try:
652
  model_obj.fit(X_tr, y_tr)
653
  preds = model_obj.predict(X_val)