File size: 3,844 Bytes
5bc4d50
 
 
 
 
859be77
 
 
33a4490
84032a6
5bc4d50
 
33a4490
5bc4d50
33a4490
 
5bc4d50
 
 
33a4490
 
5bc4d50
33a4490
5bc4d50
33a4490
 
5bc4d50
33a4490
 
 
5bc4d50
33a4490
5bc4d50
33a4490
 
5bc4d50
33a4490
 
 
 
 
 
5bc4d50
33a4490
5bc4d50
33a4490
 
5bc4d50
33a4490
 
 
 
5bc4d50
33a4490
5bc4d50
33a4490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5bc4d50
33a4490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
import pandas as pd
import joblib
from huggingface_hub import hf_hub_download

# Ensure st.set_page_config() is the very first Streamlit command
st.set_page_config(page_title="Tourism Predictor", layout="wide")

# 1. Load Model from Hub
REPO_ID = "SagarAtHf/tourismpackagepredict-model"
FILENAME = "productionmodel.joblib"

@st.cache_resource
def load_model():
    model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
    return joblib.load(model_path)

model = load_model()

st.title("🌴 Full Feature Wellness Tourism Predictor")
st.write("Please fill in all 19 parameters to get an accurate prediction.")

# 2. Complete Form with All 19 Features
with st.form("prediction_form"):
    # Using 4 columns to fit all 19 features neatly
    c1, c2, c3, c4 = st.columns(4)
    
    with c1:
        age = st.number_input("Age", 18, 100, 30)
        type_of_contact = st.selectbox("Type of Contact", ["Self Enquiry", "Company Invited"])
        city_tier = st.selectbox("City Tier", [1, 2, 3])
        duration_pitch = st.number_input("Duration of Pitch (mins)", 0, 120, 15)
        occupation = st.selectbox("Occupation", ["Salaried", "Small Business", "Large Business", "Free Lancer"])

    with c2:
        gender = st.selectbox("Gender", ["Male", "Female"])
        num_person = st.number_input("Number of Persons Visiting", 1, 10, 2)
        num_followups = st.number_input("Number of Follow-ups", 1, 10, 3)
        product_pitched = st.selectbox("Product Pitched", ["Basic", "Deluxe", "Standard", "Super Deluxe", "King"])
        prop_stars = st.slider("Preferred Property Star", 3, 5, 3)

    with c3:
        marital_status = st.selectbox("Marital Status", ["Married", "Unmarried", "Divorced"])
        num_trips = st.number_input("Number of Trips", 1, 20, 1)
        passport = st.selectbox("Has Passport?", [0, 1], format_func=lambda x: "Yes" if x==1 else "No")
        pitch_satisfaction = st.slider("Pitch Satisfaction Score", 1, 5, 3)
        own_car = st.selectbox("Owns a Car?", [0, 1], format_func=lambda x: "Yes" if x==1 else "No")

    with c4:
        num_children = st.number_input("Number of Children", 0, 5, 0)
        designation = st.selectbox("Designation", ["Executive", "Manager", "Senior Manager", "AVP", "VP"])
        monthly_income = st.number_input("Monthly Income", value=25000)

    submit = st.form_submit_button("Generate Prediction")

# 3. Prediction Logic
if submit:
    # IMPORTANT: Dictionary keys must match the EXACT column names used during training
    data = {
        "Age": age,
        "TypeofContact": type_of_contact,
        "CityTier": city_tier,
        "DurationOfPitch": duration_pitch,
        "Occupation": occupation,
        "Gender": gender,
        "NumberOfPersonVisiting": num_person,
        "NumberOfFollowups": num_followups,
        "ProductPitched": product_pitched,
        "PreferredPropertyStar": prop_stars,
        "MaritalStatus": marital_status,
        "NumberOfTrips": num_trips,
        "Passport": passport,
        "PitchSatisfactionScore": pitch_satisfaction,
        "OwnCar": own_car,
        "NumberOfChildrenVisiting": num_children,
        "Designation": designation,
        "MonthlyIncome": monthly_income
    }
    
    input_df = pd.DataFrame([data])

    # Get the probability
    try:
        # Note: Pipeline applies ColumnTransformer automatically
        prob = model.predict_proba(input_df)[0][1]
        
        st.divider()
        if prob >= 0.45:
            st.success(f"### Result: 🎯 High Potential (Prob: {prob:.2%})")
            st.balloons()
        else:
            st.warning(f"### Result: ⏳ Low Likelihood (Prob: {prob:.2%})")
            
    except Exception as e:
        st.error(f"Prediction Error: {e}")
        st.info("Ensure the column names in app.py match your training data exactly.")