surnellas commited on
Commit
88f0763
·
verified ·
1 Parent(s): d5b9c4c

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +15 -12
  2. app.py +166 -0
  3. requirements.txt +7 -3
Dockerfile CHANGED
@@ -1,20 +1,23 @@
1
- FROM python:3.13.5-slim
 
2
 
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
-
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
 
14
  RUN pip3 install -r requirements.txt
15
 
16
- EXPOSE 8501
 
 
 
 
 
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ # Use a minimal base image with Python 3.9 installed
2
+ FROM python:3.11
3
 
4
+ # Set the working directory inside the container to /app
5
  WORKDIR /app
6
 
7
+ # Copy all files from the current directory on the host to the container's /app directory
8
+ COPY . .
 
 
 
 
 
 
9
 
10
+ # Install Python dependencies listed in requirements.txt
11
  RUN pip3 install -r requirements.txt
12
 
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
 
20
+ COPY --chown=user . $HOME/app
21
 
22
+ # Define the command to run the Streamlit app on port "8501" and make it accessible externally
23
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import joblib
6
+ from huggingface_hub import hf_hub_download
7
+
8
+ # Config
9
+ REPO_ID = "surnellas/Visit-With-Us"
10
+ MODEL_FILENAME = "best_tourism_model_v1.joblib"
11
+ DATA_FILENAME = "tourism.csv"
12
+ CLASSIFICATION_THRESHOLD = 0.45
13
+
14
+ st.title("Visit-With-Us — Wellness Package Purchase Prediction")
15
+ st.write(
16
+ "Enter customer details below. The model predicts the probability that the customer "
17
+ "will purchase the Wellness Tourism Package."
18
+ )
19
+
20
+ # Feature lists (used by the model)
21
+ numeric_features = [
22
+ "Age",
23
+ "CityTier",
24
+ "DurationOfPitch",
25
+ "NumberOfPersonVisiting",
26
+ "NumberOfFollowups",
27
+ "PreferredPropertyStar",
28
+ "NumberOfTrips",
29
+ "Passport",
30
+ "PitchSatisfactionScore",
31
+ "OwnCar",
32
+ "NumberOfChildrenVisiting",
33
+ "MonthlyIncome",
34
+ ]
35
+
36
+ categorical_features = [
37
+ "TypeofContact",
38
+ "Occupation",
39
+ "Gender",
40
+ "ProductPitched",
41
+ "MaritalStatus",
42
+ "Designation",
43
+ ]
44
+
45
+ # Try to download dataset from HF to extract sensible options and ranges
46
+ defaults = {}
47
+ options = {}
48
+ try:
49
+ local_data = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=DATA_FILENAME, token=os.environ.get("HF_TOKEN"))
50
+ template_df = pd.read_csv(local_data)
51
+ # Convert object columns to category for safer unique values
52
+ for c in categorical_features:
53
+ if c in template_df.columns:
54
+ options[c] = sorted(template_df[c].astype(str).unique().tolist())
55
+ for n in numeric_features:
56
+ if n in template_df.columns:
57
+ defaults[n] = {
58
+ "min": int(template_df[n].min()),
59
+ "max": int(template_df[n].max()),
60
+ "mean": float(template_df[n].median()),
61
+ }
62
+ except Exception:
63
+ # Fallback defaults if we cannot download dataset
64
+ options = {
65
+ "TypeofContact": ["Company Invited", "Self Enquiry"],
66
+ "Occupation": ["Salaried", "Small Business", "Free Lancer", "Other"],
67
+ "Gender": ["Male", "Female"],
68
+ "ProductPitched": ["Basic", "Standard", "Deluxe", "Super Deluxe", "King"],
69
+ "MaritalStatus": ["Single", "Married", "Divorced", "Unmarried"],
70
+ "Designation": ["Executive", "Manager", "Senior Manager", "AVP", "VP"],
71
+ }
72
+ defaults = {
73
+ "Age": {"min": 18, "max": 80, "mean": 35},
74
+ "CityTier": {"min": 1, "max": 3, "mean": 2},
75
+ "DurationOfPitch": {"min": 1, "max": 60, "mean": 10},
76
+ "NumberOfPersonVisiting": {"min": 1, "max": 10, "mean": 3},
77
+ "NumberOfFollowups": {"min": 0, "max": 12, "mean": 3},
78
+ "PreferredPropertyStar": {"min": 1, "max": 5, "mean": 3},
79
+ "NumberOfTrips": {"min": 0, "max": 20, "mean": 2},
80
+ "Passport": {"min": 0, "max": 1, "mean": 1},
81
+ "PitchSatisfactionScore": {"min": 1, "max": 5, "mean": 3},
82
+ "OwnCar": {"min": 0, "max": 1, "mean": 1},
83
+ "NumberOfChildrenVisiting": {"min": 0, "max": 5, "mean": 0},
84
+ "MonthlyIncome": {"min": 0, "max": 200000, "mean": 30000},
85
+ }
86
+
87
+ # UI inputs for numeric features
88
+ st.sidebar.header("Numeric inputs")
89
+ user_inputs = {}
90
+ for n in numeric_features:
91
+ conf = defaults.get(n, {"min": 0, "max": 1000, "mean": 0})
92
+ step = 1 if isinstance(conf["mean"], int) or n != "MonthlyIncome" else 1
93
+ if n in ["Passport", "OwnCar"]:
94
+ # Use selectbox for binary features
95
+ user_inputs[n] = st.sidebar.selectbox(n, options=[0, 1], index=int(conf["mean"]))
96
+ else:
97
+ # number_input with reasonable range
98
+ if n == "MonthlyIncome":
99
+ user_inputs[n] = st.sidebar.number_input(
100
+ n,
101
+ min_value=int(conf["min"]),
102
+ max_value=int(conf["max"]) if conf["max"] > 0 else 1_000_000,
103
+ value=int(conf["mean"]),
104
+ step=step
105
+ )
106
+ else:
107
+ user_inputs[n] = st.sidebar.number_input(
108
+ n,
109
+ min_value=int(conf["min"]),
110
+ max_value=int(conf["max"]) if conf["max"] > 0 else 10000,
111
+ value=int(conf["mean"]),
112
+ step=1,
113
+ )
114
+
115
+ # UI inputs for categorical features
116
+ st.sidebar.header("Categorical inputs")
117
+ for c in categorical_features:
118
+ vals = options.get(c)
119
+ if vals:
120
+ user_inputs[c] = st.sidebar.selectbox(c, vals)
121
+ else:
122
+ # If we don't know categories, allow free text
123
+ user_inputs[c] = st.sidebar.text_input(c, "")
124
+
125
+ # Assemble input as DataFrame (matching training columns)
126
+ input_df = pd.DataFrame([user_inputs])
127
+
128
+ # Ensure categorical dtype for relevant cols
129
+ for c in categorical_features:
130
+ if c in input_df.columns:
131
+ input_df[c] = input_df[c].astype("category")
132
+
133
+ st.subheader("Input preview")
134
+ st.write(input_df.T)
135
+
136
+ # Load model (download from HF hub)
137
+ model = None
138
+ load_error = None
139
+ try:
140
+ model_path = hf_hub_download(repo_id=REPO_ID, repo_type="model", filename=MODEL_FILENAME, token=os.environ.get("HF_TOKEN"))
141
+ model = joblib.load(model_path)
142
+ except Exception as e:
143
+ load_error = str(e)
144
+
145
+ if load_error:
146
+ st.error("Failed to load model from Hugging Face Hub. Check HF_TOKEN and network.\n\n" + load_error)
147
+ else:
148
+ if st.button("Predict purchase probability"):
149
+ # Ensure ordering of columns matches model's expected features
150
+ ordered_cols = numeric_features + categorical_features
151
+ # Some environments may store y columns as dataframes; ensure all columns present
152
+ missing = [c for c in ordered_cols if c not in input_df.columns]
153
+ if missing:
154
+ st.error(f"Missing features required by model: {missing}")
155
+ else:
156
+ X_input = input_df[ordered_cols].copy()
157
+ proba = model.predict_proba(X_input)[:, 1][0]
158
+ pred = int(proba >= CLASSIFICATION_THRESHOLD)
159
+
160
+ st.metric("Purchase Probability", f"{proba:.3f}")
161
+ st.metric("Predicted Purchase", "Yes" if pred == 1 else "No")
162
+ st.write(
163
+ "Notes: probability threshold = "
164
+ + str(CLASSIFICATION_THRESHOLD)
165
+ + ". Adjust threshold for sensitivity/precision tradeoff."
166
+ )
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
1
+ pandas==2.2.2
2
+ huggingface_hub==0.32.6
3
+ streamlit==1.43.2
4
+ joblib==1.5.1
5
+ scikit-learn==1.6.0
6
+ xgboost==2.1.4
7
+ mlflow==3.0.1