nansri commited on
Commit
1038117
·
verified ·
1 Parent(s): 66762bf

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. Dockerfile +17 -0
  2. app.py +99 -0
  3. push_to_hf_space.py +28 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system deps (minimal)
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ COPY requirements.txt .
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ COPY app.py .
14
+
15
+ EXPOSE 7860
16
+
17
+ CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"]
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ import joblib
6
+
7
+ from huggingface_hub import hf_hub_download
8
+ from datasets import load_dataset
9
+
10
+ # -----------------------------
11
+ # CONFIG (edit if needed)
12
+ # -----------------------------
13
+ HF_MODEL_REPO = "nansri/wellness-purchase-predictor"
14
+ MODEL_FILENAME = "best_model.joblib"
15
+
16
+ # Used only to build dropdown options + default values
17
+ HF_DATASET_REPO = "nansri/visit-with-us-wellness"
18
+ TRAIN_FILE = "processed/train.csv"
19
+
20
+ st.set_page_config(page_title="Wellness Package Predictor", layout="centered")
21
+
22
+ # -----------------------------
23
+ # Load model from HF hub
24
+ # -----------------------------
25
+ @st.cache_resource
26
+ def load_model():
27
+ model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=MODEL_FILENAME, repo_type="model")
28
+ return joblib.load(model_path)
29
+
30
+ # -----------------------------
31
+ # Load metadata (optional but helpful)
32
+ # -----------------------------
33
+ @st.cache_data
34
+ def load_train_metadata():
35
+ ds = load_dataset(HF_DATASET_REPO, data_files={"train": TRAIN_FILE})
36
+ train_df = ds["train"].to_pandas()
37
+ # feature columns (exclude target)
38
+ feature_cols = [c for c in train_df.columns if c != "ProdTaken"]
39
+ # identify numeric vs categorical
40
+ num_cols = train_df[feature_cols].select_dtypes(include=np.number).columns.tolist()
41
+ cat_cols = [c for c in feature_cols if c not in num_cols]
42
+
43
+ # defaults
44
+ medians = {c: float(train_df[c].median()) for c in num_cols}
45
+ modes = {c: str(train_df[c].mode(dropna=True).iloc[0]) if train_df[c].notna().any() else "" for c in cat_cols}
46
+ # categories for dropdown
47
+ categories = {c: sorted([str(x) for x in train_df[c].dropna().unique().tolist()]) for c in cat_cols}
48
+
49
+ return feature_cols, num_cols, cat_cols, medians, modes, categories
50
+
51
+ model = load_model()
52
+ feature_cols, num_cols, cat_cols, medians, modes, categories = load_train_metadata()
53
+
54
+ # -----------------------------
55
+ # UI
56
+ # -----------------------------
57
+ st.title("Wellness Tourism Package Purchase Predictor")
58
+ st.write("Enter customer details to predict likelihood of purchasing the Wellness Tourism Package.")
59
+
60
+ inputs = {}
61
+
62
+ with st.form("input_form"):
63
+ st.subheader("Customer & Interaction Inputs")
64
+
65
+ # Numeric inputs
66
+ st.markdown("**Numeric Features**")
67
+ for col in num_cols:
68
+ default_val = medians.get(col, 0.0)
69
+ # int-like columns can still be float in data; allow float entry safely
70
+ inputs[col] = st.number_input(col, value=float(default_val))
71
+
72
+ # Categorical inputs
73
+ st.markdown("**Categorical Features**")
74
+ for col in cat_cols:
75
+ opts = categories.get(col, [])
76
+ default_opt = modes.get(col, opts[0] if opts else "")
77
+ if default_opt not in opts and opts:
78
+ default_opt = opts[0]
79
+ if opts:
80
+ inputs[col] = st.selectbox(col, options=opts, index=opts.index(default_opt))
81
+ else:
82
+ inputs[col] = st.text_input(col, value=default_opt)
83
+
84
+ submitted = st.form_submit_button("Predict")
85
+
86
+ if submitted:
87
+ # Create dataframe from inputs (rubric requirement)
88
+ input_df = pd.DataFrame([inputs], columns=feature_cols)
89
+ st.write("### Input DataFrame")
90
+ st.dataframe(input_df)
91
+
92
+ # Predict
93
+ try:
94
+ proba = model.predict_proba(input_df)[:, 1][0]
95
+ pred = int(proba >= 0.5)
96
+ st.success(f"Prediction (ProdTaken): {pred} | Purchase Probability: {proba:.3f}")
97
+ except Exception:
98
+ pred = int(model.predict(input_df)[0])
99
+ st.success(f"Prediction (ProdTaken): {pred}")
push_to_hf_space.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from huggingface_hub import HfApi
3
+ import os
4
+
5
+ HF_USERNAME = "nansri" # change if needed
6
+ SPACE_NAME = "wellness-predictor-space"
7
+ SPACE_REPO = f"{HF_USERNAME}/{SPACE_NAME}"
8
+
9
+ DEPLOY_DIR = "/content/drive/MyDrive/PGP -GL/MLOps/deployment"
10
+
11
+ api = HfApi()
12
+
13
+ # Create Docker Space (required since we have Dockerfile)
14
+ api.create_repo(
15
+ repo_id=SPACE_REPO,
16
+ repo_type="space",
17
+ space_sdk="docker",
18
+ exist_ok=True
19
+ )
20
+
21
+ # Upload the entire deployment folder
22
+ api.upload_folder(
23
+ folder_path=DEPLOY_DIR,
24
+ repo_id=SPACE_REPO,
25
+ repo_type="space"
26
+ )
27
+
28
+ print("✅ Deployed to HF Space:", SPACE_REPO)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ joblib
6
+ huggingface_hub
7
+ datasets