deepakpathania commited on
Commit
8d8e847
·
verified ·
1 Parent(s): 5805ab3

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +15 -12
  2. app.py +108 -0
  3. requirements.txt +7 -2
Dockerfile CHANGED
@@ -1,20 +1,23 @@
1
- FROM python:3.13.5-slim
 
2
 
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
-
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
 
14
  RUN pip3 install -r requirements.txt
15
 
16
- EXPOSE 8501
 
 
 
 
 
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ # Use a minimal base image with Python 3.9 installed
2
+ FROM python:3.9
3
 
4
+ # Set the working directory inside the container to /app
5
  WORKDIR /app
6
 
7
+ # Copy all files from the current directory on the host to the container's /app directory
8
+ COPY . .
 
 
 
 
 
 
9
 
10
+ # Install Python dependencies listed in requirements.txt
11
  RUN pip3 install -r requirements.txt
12
 
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
 
20
+ COPY --chown=user . $HOME/app
21
 
22
+ # Define the command to run the Streamlit app on port "8501" and make it accessible externally
23
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from huggingface_hub import hf_hub_download
4
+ import joblib
5
+ import os
6
+
7
+ # --- Mappings for Label Encoding (based on alphabetical sort of unique values from original data) ---
8
+ TYPEOFCONTACT_MAP = {'Company Invited': 0, 'Self Inquiry': 1}
9
+ OCCUPATION_MAP = {'Free Lancer': 0, 'Large Business': 1, 'Salaried': 2, 'Small Business': 3}
10
+ GENDER_MAP = {'Fe Male': 0, 'Female': 1, 'Male': 2, 'Unaware': 3}
11
+ PRODUCTPITCHED_MAP = {'Basic': 0, 'Deluxe': 1, 'King': 2, 'Standard': 3, 'Super Deluxe': 4}
12
+ MARITALSTATUS_MAP = {'Divorced': 0, 'Married': 1, 'Single': 2}
13
+ DESIGNATION_MAP = {'AVP': 0, 'Executive': 1, 'Manager': 2, 'Senior Manager': 3, 'VP': 4}
14
+
15
+ # Download and load the model
16
+ MODEL_REPO_ID = "deepakpathania/tourism_xgboost_model"
17
+ MODEL_FILENAME = "xgboost_model/best_tourism_model_v1.joblib"
18
+
19
+ try:
20
+ model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME)
21
+ model = joblib.load(model_path)
22
+ except Exception as e:
23
+ st.error(f"Error loading model from Hugging Face Hub: {e}")
24
+ st.stop()
25
+
26
+ # Streamlit UI for Wellness Tourism Package Purchase Prediction
27
+ st.title("Wellness Tourism Package Purchase Prediction")
28
+ st.write("""
29
+ This application predicts whether a customer will purchase the newly introduced Wellness Tourism Package.
30
+ Fill in the customer details below to get a prediction.
31
+ """)
32
+
33
+ # User input fields
34
+ st.header("Customer Details")
35
+
36
+ age = st.slider("Age", min_value=18, max_value=80, value=35)
37
+ type_of_contact = st.selectbox("Type of Contact", list(TYPEOFCONTACT_MAP.keys()))
38
+ city_tier = st.selectbox("City Tier", [1, 2, 3])
39
+ duration_of_pitch = st.slider("Duration of Pitch (minutes)", min_value=5, max_value=100, value=15)
40
+ occupation = st.selectbox("Occupation", list(OCCUPATION_MAP.keys()))
41
+ gender = st.selectbox("Gender", list(GENDER_MAP.keys())) # Using full list due to EDA observation
42
+ number_of_person_visiting = st.slider("Number of Persons Visiting", min_value=1, max_value=5, value=3)
43
+ number_of_followups = st.slider("Number of Follow-ups", min_value=1, max_value=6, value=3)
44
+ product_pitched = st.selectbox("Product Pitched", list(PRODUCTPITCHED_MAP.keys()))
45
+ preferred_property_star = st.selectbox("Preferred Property Star", [3.0, 4.0, 5.0])
46
+ marital_status = st.selectbox("Marital Status", list(MARITALSTATUS_MAP.keys()))
47
+ number_of_trips = st.slider("Number of Trips Annually", min_value=1, max_value=25, value=3)
48
+ passport = st.selectbox("Passport Holder?", [0, 1], format_func=lambda x: "Yes" if x == 1 else "No")
49
+ pitch_satisfaction_score = st.slider("Pitch Satisfaction Score", min_value=1, max_value=5, value=3)
50
+ own_car = st.selectbox("Owns Car?", [0, 1], format_func=lambda x: "Yes" if x == 1 else "No")
51
+ number_of_children_visiting = st.slider("Number of Children Visiting (under 5)", min_value=0, max_value=3, value=1)
52
+ designation = st.selectbox("Designation", list(DESIGNATION_MAP.keys()))
53
+ monthly_income = st.number_input("Monthly Income", min_value=1000.0, max_value=100000.0, value=25000.0, step=100.0)
54
+
55
+ # Prepare input data for the model (matching Xtrain structure after LabelEncoding)
56
+ if st.button("Predict Purchase"):
57
+ # Convert categorical inputs to numerical using defined mappings
58
+ encoded_type_of_contact = TYPEOFCONTACT_MAP[type_of_contact]
59
+ encoded_occupation = OCCUPATION_MAP[occupation]
60
+ encoded_gender = GENDER_MAP[gender]
61
+ encoded_product_pitched = PRODUCTPITCHED_MAP[product_pitched]
62
+ encoded_marital_status = MARITALSTATUS_MAP[marital_status]
63
+ encoded_designation = DESIGNATION_MAP[designation]
64
+
65
+ # Create a DataFrame with the same column order as Xtrain
66
+ input_data = pd.DataFrame([{
67
+ 'Age': age,
68
+ 'TypeofContact': encoded_type_of_contact,
69
+ 'CityTier': city_tier,
70
+ 'DurationOfPitch': duration_of_pitch,
71
+ 'Occupation': encoded_occupation,
72
+ 'Gender': encoded_gender,
73
+ 'NumberOfPersonVisiting': number_of_person_visiting,
74
+ 'NumberOfFollowups': number_of_followups,
75
+ 'ProductPitched': encoded_product_pitched,
76
+ 'PreferredPropertyStar': preferred_property_star,
77
+ 'MaritalStatus': encoded_marital_status,
78
+ 'NumberOfTrips': number_of_trips,
79
+ 'Passport': passport,
80
+ 'PitchSatisfactionScore': pitch_satisfaction_score,
81
+ 'OwnCar': own_car,
82
+ 'NumberOfChildrenVisiting': number_of_children_visiting,
83
+ 'Designation': encoded_designation,
84
+ 'MonthlyIncome': monthly_income
85
+ }])
86
+
87
+ # Ensure column order matches Xtrain used during training
88
+ # This list should match the column order in Xtrain exactly.
89
+ # Using a predefined list as inference from Xtrain.columns from kernel state is reliable.
90
+ column_order = [
91
+ 'Age', 'TypeofContact', 'CityTier', 'DurationOfPitch', 'Occupation', 'Gender',
92
+ 'NumberOfPersonVisiting', 'NumberOfFollowups', 'ProductPitched',
93
+ 'PreferredPropertyStar', 'MaritalStatus', 'NumberOfTrips', 'Passport',
94
+ 'PitchSatisfactionScore', 'OwnCar', 'NumberOfChildrenVisiting', 'Designation',
95
+ 'MonthlyIncome'
96
+ ]
97
+ input_data = input_data[column_order]
98
+
99
+ prediction = model.predict(input_data)[0]
100
+ prediction_proba = model.predict_proba(input_data)[:, 1][0]
101
+
102
+ st.subheader("Prediction Result:")
103
+ if prediction == 1:
104
+ st.success(f"The model predicts: **Customer WILL purchase the Wellness Tourism Package!** (Probability: {prediction_proba:.2f})")
105
+ else:
106
+ st.info(f"The model predicts: **Customer will NOT purchase the Wellness Tourism Package.** (Probability: {prediction_proba:.2f})")
107
+
108
+ st.write("Note: The model's classification threshold is 0.45.")
requirements.txt CHANGED
@@ -1,3 +1,8 @@
1
- altair
2
  pandas
3
- streamlit
 
 
 
 
 
 
 
 
1
  pandas
2
+ scikit-learn
3
+ xgboost
4
+ joblib
5
+ mlflow==3.0.1
6
+ pyngrok==7.2.12
7
+ streamlit
8
+ huggingface_hub