Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| from huggingface_hub import hf_hub_download | |
| from datasets import load_dataset | |
| # ----------------------------- | |
| # CONFIG (edit if needed) | |
| # ----------------------------- | |
| HF_MODEL_REPO = "nansri/wellness-purchase-predictor" | |
| MODEL_FILENAME = "best_model.joblib" | |
| # Used only to build dropdown options + default values | |
| HF_DATASET_REPO = "nansri/visit-with-us-wellness" | |
| TRAIN_FILE = "processed/train.csv" | |
| st.set_page_config(page_title="Wellness Package Predictor", layout="centered") | |
| # ----------------------------- | |
| # Load model from HF hub | |
| # ----------------------------- | |
| def load_model(): | |
| model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=MODEL_FILENAME, repo_type="model") | |
| return joblib.load(model_path) | |
| # ----------------------------- | |
| # Load metadata (optional but helpful) | |
| # ----------------------------- | |
| def load_train_metadata(): | |
| ds = load_dataset(HF_DATASET_REPO, data_files={"train": TRAIN_FILE}) | |
| train_df = ds["train"].to_pandas() | |
| # feature columns (exclude target) | |
| feature_cols = [c for c in train_df.columns if c != "ProdTaken"] | |
| # identify numeric vs categorical | |
| num_cols = train_df[feature_cols].select_dtypes(include=np.number).columns.tolist() | |
| cat_cols = [c for c in feature_cols if c not in num_cols] | |
| # defaults | |
| medians = {c: float(train_df[c].median()) for c in num_cols} | |
| modes = {c: str(train_df[c].mode(dropna=True).iloc[0]) if train_df[c].notna().any() else "" for c in cat_cols} | |
| # categories for dropdown | |
| categories = {c: sorted([str(x) for x in train_df[c].dropna().unique().tolist()]) for c in cat_cols} | |
| return feature_cols, num_cols, cat_cols, medians, modes, categories | |
| model = load_model() | |
| feature_cols, num_cols, cat_cols, medians, modes, categories = load_train_metadata() | |
| # ----------------------------- | |
| # UI | |
| # ----------------------------- | |
| st.title("Wellness Tourism Package Purchase Predictor") | |
| st.write("Enter customer details to predict likelihood of purchasing the Wellness Tourism Package.") | |
| inputs = {} | |
| with st.form("input_form"): | |
| st.subheader("Customer & Interaction Inputs") | |
| # Numeric inputs | |
| st.markdown("**Numeric Features**") | |
| for col in num_cols: | |
| default_val = medians.get(col, 0.0) | |
| # int-like columns can still be float in data; allow float entry safely | |
| inputs[col] = st.number_input(col, value=float(default_val)) | |
| # Categorical inputs | |
| st.markdown("**Categorical Features**") | |
| for col in cat_cols: | |
| opts = categories.get(col, []) | |
| default_opt = modes.get(col, opts[0] if opts else "") | |
| if default_opt not in opts and opts: | |
| default_opt = opts[0] | |
| if opts: | |
| inputs[col] = st.selectbox(col, options=opts, index=opts.index(default_opt)) | |
| else: | |
| inputs[col] = st.text_input(col, value=default_opt) | |
| submitted = st.form_submit_button("Predict") | |
| if submitted: | |
| # Create dataframe from inputs (rubric requirement) | |
| input_df = pd.DataFrame([inputs], columns=feature_cols) | |
| st.write("### Input DataFrame") | |
| st.dataframe(input_df) | |
| # Predict | |
| try: | |
| proba = model.predict_proba(input_df)[:, 1][0] | |
| pred = int(proba >= 0.5) | |
| st.success(f"Prediction (ProdTaken): {pred} | Purchase Probability: {proba:.3f}") | |
| except Exception: | |
| pred = int(model.predict(input_df)[0]) | |
| st.success(f"Prediction (ProdTaken): {pred}") | |