Spaces:

sathishaiuse
/

wellness-classifier-model

Runtime error

File size: 9,057 Bytes

import os
import time
import threading
import logging
import pathlib
import traceback

import streamlit as st
import pandas as pd
import numpy as np
from predict_utils import download_model_from_hf, load_model, inputs_to_dataframe

# -----------------------
# Config / logging
# -----------------------
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("wellness-space")

st.set_page_config(page_title="Tourism Package Purchase Predictor", layout="centered")

st.title("🎯 Wellness Tourism Package - Purchase Predictor")
st.markdown("Enter customer & interaction details and click **Predict** to get probability and label.")

# -----------------------
# Configuration (set these as Space variables or leave defaults)
# -----------------------
HF_MODEL_REPO = os.environ.get("HF_MODEL_REPO", "sathishaiuse/wellness-classifier-model")  # change to your model repo
HF_MODEL_FILENAME = os.environ.get("HF_MODEL_FILENAME", None)  # optional, fallback logic will attempt candidates
HF_TOKEN = os.environ.get("HF_TOKEN", None)

# The feature order must match training pipeline
FEATURE_ORDER = [
    "Age",
    "CityTier",
    "NumberOfPersonVisiting",
    "PreferredPropertyStar",
    "NumberOfTrips",
    "Passport",
    "OwnCar",
    "NumberOfChildrenVisiting",
    "MonthlyIncome",
    "PitchSatisfactionScore",
    "NumberOfFollowups",
    "DurationOfPitch",
    "TypeofContact",
    "Occupation",
    "Gender",
    "MaritalStatus",
    "Designation",
    "ProductPitched"
]

# -----------------------
# Diagnostics helpers
# -----------------------
def ui_log(msg):
    """Write short diagnostics both to UI and logger (safe if UI not available)."""
    try:
        st.write(msg)
    except Exception:
        pass
    logger.info(msg)

def list_dir_preview(path, n=20):
    """Return a small preview list of files under path for UI/logs."""
    p = pathlib.Path(path)
    if not p.exists():
        return f"{path} (not found)"
    try:
        items = list(p.glob("**/*"))
        # show names, not Path objects
        return [str(x.relative_to(p)) for x in items[:n]]
    except Exception as e:
        return f"error listing {path}: {e}"

# -----------------------
# Download & load model (non-blocking, fail-fast)
# -----------------------
@st.cache_resource(ttl=60*60)
def _background_load_model(repo, filename, token, local_dir, timeout_sec):
    """
    Background model loader with timeout.
    Returns tuple (model_obj or None, local_path or None, error or None).
    This wrapper is cacheable by Streamlit.
    """
    result = {"model": None, "path": None, "err": None}

    def target():
        try:
            logger.debug("Background: starting download_model_from_hf")
            local_path = download_model_from_hf(repo, filename, token=token, local_dir=local_dir)
            logger.debug("Background: download_model_from_hf returned: %s", local_path)
            model = load_model(local_path)
            result.update(model=model, path=local_path, err=None)
            logger.debug("Background: model loaded from %s", local_path)
        except Exception as e:
            tb = traceback.format_exc()
            logger.exception("Background model load error: %s", e)
            result.update(model=None, path=None, err=str(e) + "\n" + tb)

    th = threading.Thread(target=target, daemon=True)
    th.start()
    th.join(timeout_sec)

    if th.is_alive():
        # timed out
        logger.error("Background model load timed out after %s seconds", timeout_sec)
        result.update(model=None, path=None, err=f"timeout after {timeout_sec}s")
    return result["model"], result["path"], result["err"]

def get_model(timeout_sec=30):
    """
    High-level loader used by the app. Uses cached background loader.
    Returns (model, path, err). Model may be None on failure/timeouts.
    """
    # local_dir inside container where we attempt to download or expect baked-in model
    local_dir = "/tmp/model"
    model, path, err = _background_load_model(HF_MODEL_REPO, HF_MODEL_FILENAME, HF_TOKEN, local_dir, timeout_sec)
    return model, path, err

# Show quick environment diagnostics before attempting heavy work
ui_log(f"ENV HF_MODEL_REPO={HF_MODEL_REPO}")
ui_log(f"ENV HF_MODEL_FILENAME={HF_MODEL_FILENAME}")
ui_log(f"ENV HF_TOKEN present? {bool(HF_TOKEN)}")

# Show small filesystem previews to confirm pre-baked model (if any)
for d in ["/opt/model_cache", "/tmp/model", "/app", "/home/user/app", "."]:
    ui_log(f"Listing preview for {d}: {list_dir_preview(d, n=10)}")

# Attempt to get model but do not block long on startup
t0 = time.time()
model, model_path, model_err = get_model(timeout_sec=30)  # 30s timeout; increase with caution
t1 = time.time()
ui_log(f"Model load attempt finished in {t1-t0:.1f}s; model_path={model_path}; error={bool(model_err)}")
if model_err:
    logger.debug("Model error detail: %s", model_err)

if model is None:
    st.warning("Model not loaded (fast-fail). The app is still usable for UI testing. "
               "To fix: pre-download the model during Docker build, or make the model repo public, or increase timeout.")
    st.info("Model diagnostics: check container logs for full error details.")
    # do not st.stop() — allow the app to run so HF health checks see a bound server
else:
    st.caption(f"Using model file: `{model_path}`")

# -----------------------
# Build input form
# -----------------------
with st.form("predict_form"):
    st.subheader("Customer Details")
    col1, col2, col3 = st.columns(3)
    Age = col1.number_input("Age", min_value=18, max_value=100, value=30)
    CityTier = col1.selectbox("CityTier", options=[1,2,3], index=0)
    NumberOfPersonVisiting = col1.number_input("NumberOfPersonVisiting", min_value=1, max_value=10, value=2)
    PreferredPropertyStar = col2.selectbox("PreferredPropertyStar", options=[1,2,3,4,5], index=3)
    NumberOfTrips = col2.number_input("NumberOfTrips (annually)", min_value=0, max_value=20, value=2)
    Passport = col2.selectbox("Passport (0=No, 1=Yes)", options=[0,1], index=1)
    OwnCar = col3.selectbox("OwnCar (0=No,1=Yes)", options=[0,1], index=1)
    NumberOfChildrenVisiting = col3.number_input("NumberOfChildrenVisiting", min_value=0, max_value=10, value=0)
    MonthlyIncome = col3.number_input("MonthlyIncome", min_value=0, value=30000)

    st.subheader("Interaction Details")
    PitchSatisfactionScore = st.slider("PitchSatisfactionScore (1-10)", 0, 10, 7)
    ProductPitched = st.selectbox("ProductPitched", options=["Wellness","Holiday","Adventure","Relaxation"], index=0)
    NumberOfFollowups = st.number_input("NumberOfFollowups", min_value=0, max_value=20, value=2)
    DurationOfPitch = st.number_input("DurationOfPitch (minutes)", min_value=0, max_value=120, value=15)

    st.subheader("Demographics / Job")
    TypeofContact = st.selectbox("TypeofContact", options=["Company Invited", "Self Inquiry"])
    Occupation = st.text_input("Occupation", value="Salaried")
    Gender = st.selectbox("Gender", options=["Male","Female","Other"])
    MaritalStatus = st.selectbox("MaritalStatus", options=["Single","Married","Divorced"])
    Designation = st.text_input("Designation", value="Employee")

    submitted = st.form_submit_button("Predict")

# -----------------------
# Prediction logic on submit
# -----------------------
if submitted:
    if model is None:
        st.error("Prediction unavailable because model is not loaded. See container logs / Space settings for model deployment options.")
    else:
        # construct single-record dict
        rec = {
            "Age": Age,
            "CityTier": CityTier,
            "NumberOfPersonVisiting": NumberOfPersonVisiting,
            "PreferredPropertyStar": PreferredPropertyStar,
            "NumberOfTrips": NumberOfTrips,
            "Passport": Passport,
            "OwnCar": OwnCar,
            "NumberOfChildrenVisiting": NumberOfChildrenVisiting,
            "MonthlyIncome": MonthlyIncome,
            "PitchSatisfactionScore": PitchSatisfactionScore,
            "NumberOfFollowups": NumberOfFollowups,
            "DurationOfPitch": DurationOfPitch,
            "TypeofContact": TypeofContact,
            "Occupation": Occupation,
            "Gender": Gender,
            "MaritalStatus": MaritalStatus,
            "Designation": Designation,
            "ProductPitched": ProductPitched
        }

        try:
            df = inputs_to_dataframe(rec, FEATURE_ORDER)
            # The model is expected to be a sklearn Pipeline
            if hasattr(model, "predict_proba"):
                probs = model.predict_proba(df)[:,1]
                pred = (probs >= 0.5).astype(int)
                st.metric("Predicted Probability (purchase)", f"{probs[0]:.4f}")
                st.write("Predicted Label (ProdTaken):", int(pred[0]))
            else:
                pred = model.predict(df)
                st.write("Predicted Label (ProdTaken):", int(pred[0]))
        except Exception as e:
            logger.exception("Prediction failed")
            st.error(f"Prediction failed: {e}")