sathishaiuse commited on
Commit
e2d5c54
·
verified ·
1 Parent(s): 7780f5d

Upload deployment files from CI

Browse files
Files changed (4) hide show
  1. Dockerfile +23 -0
  2. app.py +127 -0
  3. predict_utils.py +78 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a minimal base image with Python 3.9 installed
2
+ FROM python:3.9
3
+
4
+ # Set the working directory inside the container to /app
5
+ WORKDIR /app
6
+
7
+ # Copy all files from the current directory on the host to the container's /app directory
8
+ COPY . .
9
+
10
+ # Install Python dependencies listed in requirements.txt
11
+ RUN pip3 install -r requirements.txt
12
+
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
+
20
+ COPY --chown=user . $HOME/app
21
+
22
+ # Define the command to run the Streamlit app on port "8501" and make it accessible externally
23
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ from predict_utils import download_model_from_hf, load_model, inputs_to_dataframe
6
+
7
+ st.set_page_config(page_title="Tourism Package Purchase Predictor", layout="centered")
8
+
9
+ st.title("🎯 Wellness Tourism Package - Purchase Predictor")
10
+ st.markdown("Enter customer & interaction details and click **Predict** to get probability and label.")
11
+
12
+ # -----------------------
13
+ # Configuration (set these as Space variables or leave defaults)
14
+ # -----------------------
15
+ HF_MODEL_REPO = os.environ.get("HF_MODEL_REPO", "sathishaiuse/wellness-classifier-model") # change to your model repo
16
+ HF_MODEL_FILENAME = os.environ.get("HF_MODEL_FILENAME", None) # optional, fallback logic will attempt candidates
17
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
18
+
19
+ # The feature order must match training pipeline
20
+ FEATURE_ORDER = [
21
+ "Age",
22
+ "CityTier",
23
+ "NumberOfPersonVisiting",
24
+ "PreferredPropertyStar",
25
+ "NumberOfTrips",
26
+ "Passport",
27
+ "OwnCar",
28
+ "NumberOfChildrenVisiting",
29
+ "MonthlyIncome",
30
+ "PitchSatisfactionScore",
31
+ "NumberOfFollowups",
32
+ "DurationOfPitch",
33
+ "TypeofContact",
34
+ "Occupation",
35
+ "Gender",
36
+ "MaritalStatus",
37
+ "Designation",
38
+ "ProductPitched"
39
+ ]
40
+
41
+ # -----------------------
42
+ # Download & load model (on first run)
43
+ # -----------------------
44
+ @st.cache_resource(ttl=60*60)
45
+ def get_model():
46
+ try:
47
+ local_path = download_model_from_hf(HF_MODEL_REPO, HF_MODEL_FILENAME, token=HF_TOKEN, local_dir="/tmp/model")
48
+ model = load_model(local_path)
49
+ return model, local_path
50
+ except Exception as e:
51
+ st.error(f"Failed to download/load model: {e}")
52
+ return None, None
53
+
54
+ model, model_path = get_model()
55
+ if model is None:
56
+ st.warning("Model not loaded. Check HF_MODEL_REPO, HF_MODEL_FILENAME and HF_TOKEN (if private repo).")
57
+ st.stop()
58
+
59
+ st.caption(f"Using model file: `{model_path}`")
60
+
61
+ # -----------------------
62
+ # Build input form
63
+ # -----------------------
64
+ with st.form("predict_form"):
65
+ st.subheader("Customer Details")
66
+ col1, col2, col3 = st.columns(3)
67
+ Age = col1.number_input("Age", min_value=18, max_value=100, value=30)
68
+ CityTier = col1.selectbox("CityTier", options=[1,2,3], index=0)
69
+ NumberOfPersonVisiting = col1.number_input("NumberOfPersonVisiting", min_value=1, max_value=10, value=2)
70
+ PreferredPropertyStar = col2.selectbox("PreferredPropertyStar", options=[1,2,3,4,5], index=3)
71
+ NumberOfTrips = col2.number_input("NumberOfTrips (annually)", min_value=0, max_value=20, value=2)
72
+ Passport = col2.selectbox("Passport (0=No, 1=Yes)", options=[0,1], index=1)
73
+ OwnCar = col3.selectbox("OwnCar (0=No,1=Yes)", options=[0,1], index=1)
74
+ NumberOfChildrenVisiting = col3.number_input("NumberOfChildrenVisiting", min_value=0, max_value=10, value=0)
75
+ MonthlyIncome = col3.number_input("MonthlyIncome", min_value=0, value=30000)
76
+
77
+ st.subheader("Interaction Details")
78
+ PitchSatisfactionScore = st.slider("PitchSatisfactionScore (1-10)", 0, 10, 7)
79
+ ProductPitched = st.selectbox("ProductPitched", options=["Wellness","Holiday","Adventure","Relaxation"], index=0)
80
+ NumberOfFollowups = st.number_input("NumberOfFollowups", min_value=0, max_value=20, value=2)
81
+ DurationOfPitch = st.number_input("DurationOfPitch (minutes)", min_value=0, max_value=120, value=15)
82
+
83
+ st.subheader("Demographics / Job")
84
+ TypeofContact = st.selectbox("TypeofContact", options=["Company Invited", "Self Inquiry"])
85
+ Occupation = st.text_input("Occupation", value="Salaried")
86
+ Gender = st.selectbox("Gender", options=["Male","Female","Other"])
87
+ MaritalStatus = st.selectbox("MaritalStatus", options=["Single","Married","Divorced"])
88
+ Designation = st.text_input("Designation", value="Employee")
89
+
90
+ submitted = st.form_submit_button("Predict")
91
+
92
+ if submitted:
93
+ # construct single-record dict
94
+ rec = {
95
+ "Age": Age,
96
+ "CityTier": CityTier,
97
+ "NumberOfPersonVisiting": NumberOfPersonVisiting,
98
+ "PreferredPropertyStar": PreferredPropertyStar,
99
+ "NumberOfTrips": NumberOfTrips,
100
+ "Passport": Passport,
101
+ "OwnCar": OwnCar,
102
+ "NumberOfChildrenVisiting": NumberOfChildrenVisiting,
103
+ "MonthlyIncome": MonthlyIncome,
104
+ "PitchSatisfactionScore": PitchSatisfactionScore,
105
+ "NumberOfFollowups": NumberOfFollowups,
106
+ "DurationOfPitch": DurationOfPitch,
107
+ "TypeofContact": TypeofContact,
108
+ "Occupation": Occupation,
109
+ "Gender": Gender,
110
+ "MaritalStatus": MaritalStatus,
111
+ "Designation": Designation,
112
+ "ProductPitched": ProductPitched
113
+ }
114
+
115
+ try:
116
+ df = inputs_to_dataframe(rec, FEATURE_ORDER)
117
+ # The model is expected to be a sklearn Pipeline
118
+ if hasattr(model, "predict_proba"):
119
+ probs = model.predict_proba(df)[:,1]
120
+ pred = (probs >= 0.5).astype(int)
121
+ st.metric("Predicted Probability (purchase)", f"{probs[0]:.4f}")
122
+ st.write("Predicted Label (ProdTaken):", int(pred[0]))
123
+ else:
124
+ pred = model.predict(df)
125
+ st.write("Predicted Label (ProdTaken):", int(pred[0]))
126
+ except Exception as e:
127
+ st.error(f"Prediction failed: {e}")
predict_utils.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import joblib
3
+ import shutil
4
+ from huggingface_hub import hf_hub_download, HfApi
5
+ from typing import List
6
+
7
+ def download_model_from_hf(model_repo: str, model_filename: str = None, token: str = None, local_dir: str = "/app/model"):
8
+ """
9
+ Try to download the model file from HF model repo.
10
+ If model_filename is None, attempt fallback names (best_overall_XGBoost, RandomForest, Bagging, DecisionTree).
11
+ Returns local filepath.
12
+ """
13
+ os.makedirs(local_dir, exist_ok=True)
14
+ api = HfApi(token=token)
15
+
16
+ candidates = []
17
+ if model_filename:
18
+ candidates.append(model_filename)
19
+
20
+ # fallback candidates (order of preference)
21
+ candidates.extend([
22
+ "best_overall_XGBoost.joblib",
23
+ "best_overall_RandomForest.joblib",
24
+ "best_overall_Bagging.joblib",
25
+ "best_overall_DecisionTree.joblib",
26
+ "best_XGBoost.joblib",
27
+ "best_RandomForest.joblib",
28
+ "best_Bagging.joblib",
29
+ "best_DecisionTree.joblib",
30
+ ])
31
+
32
+ last_exception = None
33
+ for fn in candidates:
34
+ try:
35
+ print(f"Trying to download '{fn}' from '{model_repo}' ...")
36
+ remote = hf_hub_download(repo_id=model_repo, filename=fn, repo_type="model", use_auth_token=token)
37
+ # hf_hub_download returns a cache path; copy into local_dir with same filename
38
+ dest = os.path.join(local_dir, os.path.basename(remote))
39
+ if remote != dest:
40
+ shutil.copy(remote, dest)
41
+ print("Downloaded model to:", dest)
42
+ return dest
43
+ except Exception as e:
44
+ last_exception = e
45
+ print(f"Could not download {fn}: {e}")
46
+
47
+ # If we got here no candidate succeeded
48
+ raise FileNotFoundError(f"Model not found in repo '{model_repo}'. Tried: {candidates}. Last error: {last_exception}")
49
+
50
+ def load_model(local_model_path: str):
51
+ """Load joblib model/pipeline from given local path."""
52
+ return joblib.load(local_model_path)
53
+
54
+ def inputs_to_dataframe(payload: dict, feature_order: List[str]):
55
+ """
56
+ Convert one record (dict) to dataframe with fixed column order.
57
+ """
58
+ import pandas as pd
59
+ if isinstance(payload, dict):
60
+ rows = [payload]
61
+ elif isinstance(payload, list):
62
+ rows = payload
63
+ else:
64
+ raise ValueError("Payload must be dict or list of dicts")
65
+
66
+ df = pd.DataFrame(rows)
67
+ # ensure columns exist
68
+ for c in feature_order:
69
+ if c not in df.columns:
70
+ df[c] = pd.NA
71
+ df = df[feature_order].copy()
72
+ # try cast numeric columns where possible
73
+ for col in df.columns:
74
+ try:
75
+ df[col] = pd.to_numeric(df[col], errors="ignore")
76
+ except Exception:
77
+ pass
78
+ return df
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit==1.26.0
2
+ pandas==2.2.2
3
+ numpy==1.26.4
4
+ scikit-learn==1.3.2
5
+ joblib==1.3.2
6
+ huggingface-hub==0.18.1
7
+ xgboost==1.7.6