import os import time import threading import logging import pathlib import traceback import streamlit as st import pandas as pd import numpy as np from predict_utils import download_model_from_hf, load_model, inputs_to_dataframe # ----------------------- # Config / logging # ----------------------- logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger("wellness-space") st.set_page_config(page_title="Tourism Package Purchase Predictor", layout="centered") st.title("🎯 Wellness Tourism Package - Purchase Predictor") st.markdown("Enter customer & interaction details and click **Predict** to get probability and label.") # ----------------------- # Configuration (set these as Space variables or leave defaults) # ----------------------- HF_MODEL_REPO = os.environ.get("HF_MODEL_REPO", "sathishaiuse/wellness-classifier-model") # change to your model repo HF_MODEL_FILENAME = os.environ.get("HF_MODEL_FILENAME", None) # optional, fallback logic will attempt candidates HF_TOKEN = os.environ.get("HF_TOKEN", None) # The feature order must match training pipeline FEATURE_ORDER = [ "Age", "CityTier", "NumberOfPersonVisiting", "PreferredPropertyStar", "NumberOfTrips", "Passport", "OwnCar", "NumberOfChildrenVisiting", "MonthlyIncome", "PitchSatisfactionScore", "NumberOfFollowups", "DurationOfPitch", "TypeofContact", "Occupation", "Gender", "MaritalStatus", "Designation", "ProductPitched" ] # ----------------------- # Diagnostics helpers # ----------------------- def ui_log(msg): """Write short diagnostics both to UI and logger (safe if UI not available).""" try: st.write(msg) except Exception: pass logger.info(msg) def list_dir_preview(path, n=20): """Return a small preview list of files under path for UI/logs.""" p = pathlib.Path(path) if not p.exists(): return f"{path} (not found)" try: items = list(p.glob("**/*")) # show names, not Path objects return [str(x.relative_to(p)) for x in items[:n]] except Exception as e: return f"error listing {path}: {e}" # ----------------------- # Download & load model (non-blocking, fail-fast) # ----------------------- @st.cache_resource(ttl=60*60) def _background_load_model(repo, filename, token, local_dir, timeout_sec): """ Background model loader with timeout. Returns tuple (model_obj or None, local_path or None, error or None). This wrapper is cacheable by Streamlit. """ result = {"model": None, "path": None, "err": None} def target(): try: logger.debug("Background: starting download_model_from_hf") local_path = download_model_from_hf(repo, filename, token=token, local_dir=local_dir) logger.debug("Background: download_model_from_hf returned: %s", local_path) model = load_model(local_path) result.update(model=model, path=local_path, err=None) logger.debug("Background: model loaded from %s", local_path) except Exception as e: tb = traceback.format_exc() logger.exception("Background model load error: %s", e) result.update(model=None, path=None, err=str(e) + "\n" + tb) th = threading.Thread(target=target, daemon=True) th.start() th.join(timeout_sec) if th.is_alive(): # timed out logger.error("Background model load timed out after %s seconds", timeout_sec) result.update(model=None, path=None, err=f"timeout after {timeout_sec}s") return result["model"], result["path"], result["err"] def get_model(timeout_sec=30): """ High-level loader used by the app. Uses cached background loader. Returns (model, path, err). Model may be None on failure/timeouts. """ # local_dir inside container where we attempt to download or expect baked-in model local_dir = "/tmp/model" model, path, err = _background_load_model(HF_MODEL_REPO, HF_MODEL_FILENAME, HF_TOKEN, local_dir, timeout_sec) return model, path, err # Show quick environment diagnostics before attempting heavy work ui_log(f"ENV HF_MODEL_REPO={HF_MODEL_REPO}") ui_log(f"ENV HF_MODEL_FILENAME={HF_MODEL_FILENAME}") ui_log(f"ENV HF_TOKEN present? {bool(HF_TOKEN)}") # Show small filesystem previews to confirm pre-baked model (if any) for d in ["/opt/model_cache", "/tmp/model", "/app", "/home/user/app", "."]: ui_log(f"Listing preview for {d}: {list_dir_preview(d, n=10)}") # Attempt to get model but do not block long on startup t0 = time.time() model, model_path, model_err = get_model(timeout_sec=30) # 30s timeout; increase with caution t1 = time.time() ui_log(f"Model load attempt finished in {t1-t0:.1f}s; model_path={model_path}; error={bool(model_err)}") if model_err: logger.debug("Model error detail: %s", model_err) if model is None: st.warning("Model not loaded (fast-fail). The app is still usable for UI testing. " "To fix: pre-download the model during Docker build, or make the model repo public, or increase timeout.") st.info("Model diagnostics: check container logs for full error details.") # do not st.stop() — allow the app to run so HF health checks see a bound server else: st.caption(f"Using model file: `{model_path}`") # ----------------------- # Build input form # ----------------------- with st.form("predict_form"): st.subheader("Customer Details") col1, col2, col3 = st.columns(3) Age = col1.number_input("Age", min_value=18, max_value=100, value=30) CityTier = col1.selectbox("CityTier", options=[1,2,3], index=0) NumberOfPersonVisiting = col1.number_input("NumberOfPersonVisiting", min_value=1, max_value=10, value=2) PreferredPropertyStar = col2.selectbox("PreferredPropertyStar", options=[1,2,3,4,5], index=3) NumberOfTrips = col2.number_input("NumberOfTrips (annually)", min_value=0, max_value=20, value=2) Passport = col2.selectbox("Passport (0=No, 1=Yes)", options=[0,1], index=1) OwnCar = col3.selectbox("OwnCar (0=No,1=Yes)", options=[0,1], index=1) NumberOfChildrenVisiting = col3.number_input("NumberOfChildrenVisiting", min_value=0, max_value=10, value=0) MonthlyIncome = col3.number_input("MonthlyIncome", min_value=0, value=30000) st.subheader("Interaction Details") PitchSatisfactionScore = st.slider("PitchSatisfactionScore (1-10)", 0, 10, 7) ProductPitched = st.selectbox("ProductPitched", options=["Wellness","Holiday","Adventure","Relaxation"], index=0) NumberOfFollowups = st.number_input("NumberOfFollowups", min_value=0, max_value=20, value=2) DurationOfPitch = st.number_input("DurationOfPitch (minutes)", min_value=0, max_value=120, value=15) st.subheader("Demographics / Job") TypeofContact = st.selectbox("TypeofContact", options=["Company Invited", "Self Inquiry"]) Occupation = st.text_input("Occupation", value="Salaried") Gender = st.selectbox("Gender", options=["Male","Female","Other"]) MaritalStatus = st.selectbox("MaritalStatus", options=["Single","Married","Divorced"]) Designation = st.text_input("Designation", value="Employee") submitted = st.form_submit_button("Predict") # ----------------------- # Prediction logic on submit # ----------------------- if submitted: if model is None: st.error("Prediction unavailable because model is not loaded. See container logs / Space settings for model deployment options.") else: # construct single-record dict rec = { "Age": Age, "CityTier": CityTier, "NumberOfPersonVisiting": NumberOfPersonVisiting, "PreferredPropertyStar": PreferredPropertyStar, "NumberOfTrips": NumberOfTrips, "Passport": Passport, "OwnCar": OwnCar, "NumberOfChildrenVisiting": NumberOfChildrenVisiting, "MonthlyIncome": MonthlyIncome, "PitchSatisfactionScore": PitchSatisfactionScore, "NumberOfFollowups": NumberOfFollowups, "DurationOfPitch": DurationOfPitch, "TypeofContact": TypeofContact, "Occupation": Occupation, "Gender": Gender, "MaritalStatus": MaritalStatus, "Designation": Designation, "ProductPitched": ProductPitched } try: df = inputs_to_dataframe(rec, FEATURE_ORDER) # The model is expected to be a sklearn Pipeline if hasattr(model, "predict_proba"): probs = model.predict_proba(df)[:,1] pred = (probs >= 0.5).astype(int) st.metric("Predicted Probability (purchase)", f"{probs[0]:.4f}") st.write("Predicted Label (ProdTaken):", int(pred[0])) else: pred = model.predict(df) st.write("Predicted Label (ProdTaken):", int(pred[0])) except Exception as e: logger.exception("Prediction failed") st.error(f"Prediction failed: {e}")