Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +17 -0
- app.py +99 -0
- push_to_hf_space.py +28 -0
- requirements.txt +7 -0
Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system deps (minimal)
|
| 6 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
+
build-essential \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
COPY requirements.txt .
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
COPY app.py .
|
| 14 |
+
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"]
|
app.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import joblib
|
| 6 |
+
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
from datasets import load_dataset
|
| 9 |
+
|
| 10 |
+
# -----------------------------
|
| 11 |
+
# CONFIG (edit if needed)
|
| 12 |
+
# -----------------------------
|
| 13 |
+
HF_MODEL_REPO = "nansri/wellness-purchase-predictor"
|
| 14 |
+
MODEL_FILENAME = "best_model.joblib"
|
| 15 |
+
|
| 16 |
+
# Used only to build dropdown options + default values
|
| 17 |
+
HF_DATASET_REPO = "nansri/visit-with-us-wellness"
|
| 18 |
+
TRAIN_FILE = "processed/train.csv"
|
| 19 |
+
|
| 20 |
+
st.set_page_config(page_title="Wellness Package Predictor", layout="centered")
|
| 21 |
+
|
| 22 |
+
# -----------------------------
|
| 23 |
+
# Load model from HF hub
|
| 24 |
+
# -----------------------------
|
| 25 |
+
@st.cache_resource
|
| 26 |
+
def load_model():
|
| 27 |
+
model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=MODEL_FILENAME, repo_type="model")
|
| 28 |
+
return joblib.load(model_path)
|
| 29 |
+
|
| 30 |
+
# -----------------------------
|
| 31 |
+
# Load metadata (optional but helpful)
|
| 32 |
+
# -----------------------------
|
| 33 |
+
@st.cache_data
|
| 34 |
+
def load_train_metadata():
|
| 35 |
+
ds = load_dataset(HF_DATASET_REPO, data_files={"train": TRAIN_FILE})
|
| 36 |
+
train_df = ds["train"].to_pandas()
|
| 37 |
+
# feature columns (exclude target)
|
| 38 |
+
feature_cols = [c for c in train_df.columns if c != "ProdTaken"]
|
| 39 |
+
# identify numeric vs categorical
|
| 40 |
+
num_cols = train_df[feature_cols].select_dtypes(include=np.number).columns.tolist()
|
| 41 |
+
cat_cols = [c for c in feature_cols if c not in num_cols]
|
| 42 |
+
|
| 43 |
+
# defaults
|
| 44 |
+
medians = {c: float(train_df[c].median()) for c in num_cols}
|
| 45 |
+
modes = {c: str(train_df[c].mode(dropna=True).iloc[0]) if train_df[c].notna().any() else "" for c in cat_cols}
|
| 46 |
+
# categories for dropdown
|
| 47 |
+
categories = {c: sorted([str(x) for x in train_df[c].dropna().unique().tolist()]) for c in cat_cols}
|
| 48 |
+
|
| 49 |
+
return feature_cols, num_cols, cat_cols, medians, modes, categories
|
| 50 |
+
|
| 51 |
+
model = load_model()
|
| 52 |
+
feature_cols, num_cols, cat_cols, medians, modes, categories = load_train_metadata()
|
| 53 |
+
|
| 54 |
+
# -----------------------------
|
| 55 |
+
# UI
|
| 56 |
+
# -----------------------------
|
| 57 |
+
st.title("Wellness Tourism Package Purchase Predictor")
|
| 58 |
+
st.write("Enter customer details to predict likelihood of purchasing the Wellness Tourism Package.")
|
| 59 |
+
|
| 60 |
+
inputs = {}
|
| 61 |
+
|
| 62 |
+
with st.form("input_form"):
|
| 63 |
+
st.subheader("Customer & Interaction Inputs")
|
| 64 |
+
|
| 65 |
+
# Numeric inputs
|
| 66 |
+
st.markdown("**Numeric Features**")
|
| 67 |
+
for col in num_cols:
|
| 68 |
+
default_val = medians.get(col, 0.0)
|
| 69 |
+
# int-like columns can still be float in data; allow float entry safely
|
| 70 |
+
inputs[col] = st.number_input(col, value=float(default_val))
|
| 71 |
+
|
| 72 |
+
# Categorical inputs
|
| 73 |
+
st.markdown("**Categorical Features**")
|
| 74 |
+
for col in cat_cols:
|
| 75 |
+
opts = categories.get(col, [])
|
| 76 |
+
default_opt = modes.get(col, opts[0] if opts else "")
|
| 77 |
+
if default_opt not in opts and opts:
|
| 78 |
+
default_opt = opts[0]
|
| 79 |
+
if opts:
|
| 80 |
+
inputs[col] = st.selectbox(col, options=opts, index=opts.index(default_opt))
|
| 81 |
+
else:
|
| 82 |
+
inputs[col] = st.text_input(col, value=default_opt)
|
| 83 |
+
|
| 84 |
+
submitted = st.form_submit_button("Predict")
|
| 85 |
+
|
| 86 |
+
if submitted:
|
| 87 |
+
# Create dataframe from inputs (rubric requirement)
|
| 88 |
+
input_df = pd.DataFrame([inputs], columns=feature_cols)
|
| 89 |
+
st.write("### Input DataFrame")
|
| 90 |
+
st.dataframe(input_df)
|
| 91 |
+
|
| 92 |
+
# Predict
|
| 93 |
+
try:
|
| 94 |
+
proba = model.predict_proba(input_df)[:, 1][0]
|
| 95 |
+
pred = int(proba >= 0.5)
|
| 96 |
+
st.success(f"Prediction (ProdTaken): {pred} | Purchase Probability: {proba:.3f}")
|
| 97 |
+
except Exception:
|
| 98 |
+
pred = int(model.predict(input_df)[0])
|
| 99 |
+
st.success(f"Prediction (ProdTaken): {pred}")
|
push_to_hf_space.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from huggingface_hub import HfApi
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
HF_USERNAME = "nansri" # change if needed
|
| 6 |
+
SPACE_NAME = "wellness-predictor-space"
|
| 7 |
+
SPACE_REPO = f"{HF_USERNAME}/{SPACE_NAME}"
|
| 8 |
+
|
| 9 |
+
DEPLOY_DIR = "/content/drive/MyDrive/PGP -GL/MLOps/deployment"
|
| 10 |
+
|
| 11 |
+
api = HfApi()
|
| 12 |
+
|
| 13 |
+
# Create Docker Space (required since we have Dockerfile)
|
| 14 |
+
api.create_repo(
|
| 15 |
+
repo_id=SPACE_REPO,
|
| 16 |
+
repo_type="space",
|
| 17 |
+
space_sdk="docker",
|
| 18 |
+
exist_ok=True
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# Upload the entire deployment folder
|
| 22 |
+
api.upload_folder(
|
| 23 |
+
folder_path=DEPLOY_DIR,
|
| 24 |
+
repo_id=SPACE_REPO,
|
| 25 |
+
repo_type="space"
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
print("✅ Deployed to HF Space:", SPACE_REPO)
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
scikit-learn
|
| 5 |
+
joblib
|
| 6 |
+
huggingface_hub
|
| 7 |
+
datasets
|