Spaces:

sathishaiuse
/

wellness-classifier-model

Runtime error

App Files Files Community

sathishaiuse commited on Dec 3, 2025

Commit

e2d5c54

verified ·

1 Parent(s): 7780f5d

Upload deployment files from CI

Browse files

Files changed (4) hide show

Dockerfile +23 -0
app.py +127 -0
predict_utils.py +78 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Use a minimal base image with Python 3.9 installed
+FROM python:3.9
+# Set the working directory inside the container to /app
+WORKDIR /app
+# Copy all files from the current directory on the host to the container's /app directory
+COPY . .
+# Install Python dependencies listed in requirements.txt
+RUN pip3 install -r requirements.txt
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+# Define the command to run the Streamlit app on port "8501" and make it accessible externally
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import streamlit as st
+import pandas as pd
+import numpy as np
+from predict_utils import download_model_from_hf, load_model, inputs_to_dataframe
+st.set_page_config(page_title="Tourism Package Purchase Predictor", layout="centered")
+st.title("🎯 Wellness Tourism Package - Purchase Predictor")
+st.markdown("Enter customer & interaction details and click **Predict** to get probability and label.")
+# -----------------------
+# Configuration (set these as Space variables or leave defaults)
+# -----------------------
+HF_MODEL_REPO = os.environ.get("HF_MODEL_REPO", "sathishaiuse/wellness-classifier-model")  # change to your model repo
+HF_MODEL_FILENAME = os.environ.get("HF_MODEL_FILENAME", None)  # optional, fallback logic will attempt candidates
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+# The feature order must match training pipeline
+FEATURE_ORDER = [
+    "Age",
+    "CityTier",
+    "NumberOfPersonVisiting",
+    "PreferredPropertyStar",
+    "NumberOfTrips",
+    "Passport",
+    "OwnCar",
+    "NumberOfChildrenVisiting",
+    "MonthlyIncome",
+    "PitchSatisfactionScore",
+    "NumberOfFollowups",
+    "DurationOfPitch",
+    "TypeofContact",
+    "Occupation",
+    "Gender",
+    "MaritalStatus",
+    "Designation",
+    "ProductPitched"
+]
+# -----------------------
+# Download & load model (on first run)
+# -----------------------
+@st.cache_resource(ttl=60*60)
+def get_model():
+    try:
+        local_path = download_model_from_hf(HF_MODEL_REPO, HF_MODEL_FILENAME, token=HF_TOKEN, local_dir="/tmp/model")
+        model = load_model(local_path)
+        return model, local_path
+    except Exception as e:
+        st.error(f"Failed to download/load model: {e}")
+        return None, None
+model, model_path = get_model()
+if model is None:
+    st.warning("Model not loaded. Check HF_MODEL_REPO, HF_MODEL_FILENAME and HF_TOKEN (if private repo).")
+    st.stop()
+st.caption(f"Using model file: `{model_path}`")
+# -----------------------
+# Build input form
+# -----------------------
+with st.form("predict_form"):
+    st.subheader("Customer Details")
+    col1, col2, col3 = st.columns(3)
+    Age = col1.number_input("Age", min_value=18, max_value=100, value=30)
+    CityTier = col1.selectbox("CityTier", options=[1,2,3], index=0)
+    NumberOfPersonVisiting = col1.number_input("NumberOfPersonVisiting", min_value=1, max_value=10, value=2)
+    PreferredPropertyStar = col2.selectbox("PreferredPropertyStar", options=[1,2,3,4,5], index=3)
+    NumberOfTrips = col2.number_input("NumberOfTrips (annually)", min_value=0, max_value=20, value=2)
+    Passport = col2.selectbox("Passport (0=No, 1=Yes)", options=[0,1], index=1)
+    OwnCar = col3.selectbox("OwnCar (0=No,1=Yes)", options=[0,1], index=1)
+    NumberOfChildrenVisiting = col3.number_input("NumberOfChildrenVisiting", min_value=0, max_value=10, value=0)
+    MonthlyIncome = col3.number_input("MonthlyIncome", min_value=0, value=30000)
+    st.subheader("Interaction Details")
+    PitchSatisfactionScore = st.slider("PitchSatisfactionScore (1-10)", 0, 10, 7)
+    ProductPitched = st.selectbox("ProductPitched", options=["Wellness","Holiday","Adventure","Relaxation"], index=0)
+    NumberOfFollowups = st.number_input("NumberOfFollowups", min_value=0, max_value=20, value=2)
+    DurationOfPitch = st.number_input("DurationOfPitch (minutes)", min_value=0, max_value=120, value=15)
+    st.subheader("Demographics / Job")
+    TypeofContact = st.selectbox("TypeofContact", options=["Company Invited", "Self Inquiry"])
+    Occupation = st.text_input("Occupation", value="Salaried")
+    Gender = st.selectbox("Gender", options=["Male","Female","Other"])
+    MaritalStatus = st.selectbox("MaritalStatus", options=["Single","Married","Divorced"])
+    Designation = st.text_input("Designation", value="Employee")
+    submitted = st.form_submit_button("Predict")
+if submitted:
+    # construct single-record dict
+    rec = {
+        "Age": Age,
+        "CityTier": CityTier,
+        "NumberOfPersonVisiting": NumberOfPersonVisiting,
+        "PreferredPropertyStar": PreferredPropertyStar,
+        "NumberOfTrips": NumberOfTrips,
+        "Passport": Passport,
+        "OwnCar": OwnCar,
+        "NumberOfChildrenVisiting": NumberOfChildrenVisiting,
+        "MonthlyIncome": MonthlyIncome,
+        "PitchSatisfactionScore": PitchSatisfactionScore,
+        "NumberOfFollowups": NumberOfFollowups,
+        "DurationOfPitch": DurationOfPitch,
+        "TypeofContact": TypeofContact,
+        "Occupation": Occupation,
+        "Gender": Gender,
+        "MaritalStatus": MaritalStatus,
+        "Designation": Designation,
+        "ProductPitched": ProductPitched
+    }
+    try:
+        df = inputs_to_dataframe(rec, FEATURE_ORDER)
+        # The model is expected to be a sklearn Pipeline
+        if hasattr(model, "predict_proba"):
+            probs = model.predict_proba(df)[:,1]
+            pred = (probs >= 0.5).astype(int)
+            st.metric("Predicted Probability (purchase)", f"{probs[0]:.4f}")
+            st.write("Predicted Label (ProdTaken):", int(pred[0]))
+        else:
+            pred = model.predict(df)
+            st.write("Predicted Label (ProdTaken):", int(pred[0]))
+    except Exception as e:
+        st.error(f"Prediction failed: {e}")

predict_utils.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import joblib
+import shutil
+from huggingface_hub import hf_hub_download, HfApi
+from typing import List
+def download_model_from_hf(model_repo: str, model_filename: str = None, token: str = None, local_dir: str = "/app/model"):
+    """
+    Try to download the model file from HF model repo.
+    If model_filename is None, attempt fallback names (best_overall_XGBoost, RandomForest, Bagging, DecisionTree).
+    Returns local filepath.
+    """
+    os.makedirs(local_dir, exist_ok=True)
+    api = HfApi(token=token)
+    candidates = []
+    if model_filename:
+        candidates.append(model_filename)
+    # fallback candidates (order of preference)
+    candidates.extend([
+        "best_overall_XGBoost.joblib",
+        "best_overall_RandomForest.joblib",
+        "best_overall_Bagging.joblib",
+        "best_overall_DecisionTree.joblib",
+        "best_XGBoost.joblib",
+        "best_RandomForest.joblib",
+        "best_Bagging.joblib",
+        "best_DecisionTree.joblib",
+    ])
+    last_exception = None
+    for fn in candidates:
+        try:
+            print(f"Trying to download '{fn}' from '{model_repo}' ...")
+            remote = hf_hub_download(repo_id=model_repo, filename=fn, repo_type="model", use_auth_token=token)
+            # hf_hub_download returns a cache path; copy into local_dir with same filename
+            dest = os.path.join(local_dir, os.path.basename(remote))
+            if remote != dest:
+                shutil.copy(remote, dest)
+            print("Downloaded model to:", dest)
+            return dest
+        except Exception as e:
+            last_exception = e
+            print(f"Could not download {fn}: {e}")
+    # If we got here no candidate succeeded
+    raise FileNotFoundError(f"Model not found in repo '{model_repo}'. Tried: {candidates}. Last error: {last_exception}")
+def load_model(local_model_path: str):
+    """Load joblib model/pipeline from given local path."""
+    return joblib.load(local_model_path)
+def inputs_to_dataframe(payload: dict, feature_order: List[str]):
+    """
+    Convert one record (dict) to dataframe with fixed column order.
+    """
+    import pandas as pd
+    if isinstance(payload, dict):
+        rows = [payload]
+    elif isinstance(payload, list):
+        rows = payload
+    else:
+        raise ValueError("Payload must be dict or list of dicts")
+    df = pd.DataFrame(rows)
+    # ensure columns exist
+    for c in feature_order:
+        if c not in df.columns:
+            df[c] = pd.NA
+    df = df[feature_order].copy()
+    # try cast numeric columns where possible
+    for col in df.columns:
+        try:
+            df[col] = pd.to_numeric(df[col], errors="ignore")
+        except Exception:
+            pass
+    return df

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit==1.26.0
+pandas==2.2.2
+numpy==1.26.4
+scikit-learn==1.3.2
+joblib==1.3.2
+huggingface-hub==0.18.1
+xgboost==1.7.6