File size: 4,098 Bytes
279c335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from flask import Flask, request, jsonify
import joblib, pandas as pd, os

childcare_api = Flask(__name__)

MODEL_PATH = "xgb_tuned_model.joblib"
model = joblib.load(MODEL_PATH)

# ── Validation constants ──────────────────────────────────────
VALID_CARE_PROGRAM    = {"Full Day", "Half Day", "After School"}
VALID_FACILITY_SIZE   = {"Small", "Medium", "Large"}
VALID_CITY_TYPE       = {"Tier 1", "Tier 2", "Tier 3"}
VALID_FACILITY_TYPE   = {"Full-Service Center", "Montessori School", "Home Daycare", "Corporate Daycare"}
VALID_CHILD_ID_CHAR   = {"FD", "HD", "AS"}
VALID_ACTIVITY_CAT    = {"Academic", "Creative", "Wellness"}

REQUIRED_FIELDS = [
    "Child_Age_Months", "Child_Care_Program", "Child_Attendance_Rate",
    "Child_Monthly_Fee", "Facility_Size", "Facility_Location_City_Type",
    "Facility_Type", "Child_Id_char", "Facility_Establishment_Year",
    "Activity_Type_Category",
]

def validate_input(data):
    # 1. Check required fields
    missing = [f for f in REQUIRED_FIELDS if f not in data]
    if missing:
        return False, f"Missing required fields: {missing}"

    # 2. Type checks
    try:
        float(data["Child_Age_Months"])
        float(data["Child_Attendance_Rate"])
        float(data["Child_Monthly_Fee"])
        int(data["Facility_Establishment_Year"])
    except (ValueError, TypeError) as e:
        return False, f"Type error: {e}"

    # 3. Range checks
    if not (0 <= float(data["Child_Age_Months"]) <= 120):
        return False, "Child_Age_Months must be between 0 and 120"
    if not (0.0 <= float(data["Child_Attendance_Rate"]) <= 1.0):
        return False, "Child_Attendance_Rate must be between 0.0 and 1.0"
    if not (0 <= float(data["Child_Monthly_Fee"]) <= 10000):
        return False, "Child_Monthly_Fee must be between 0 and 10000"

    # 4. Categorical checks
    if data["Child_Care_Program"] not in VALID_CARE_PROGRAM:
        return False, f"Child_Care_Program must be one of {VALID_CARE_PROGRAM}"
    if data["Facility_Size"] not in VALID_FACILITY_SIZE:
        return False, f"Facility_Size must be one of {VALID_FACILITY_SIZE}"
    if data["Facility_Location_City_Type"] not in VALID_CITY_TYPE:
        return False, f"Facility_Location_City_Type must be one of {VALID_CITY_TYPE}"
    if data["Facility_Type"] not in VALID_FACILITY_TYPE:
        return False, f"Facility_Type must be one of {VALID_FACILITY_TYPE}"
    if data["Child_Id_char"] not in VALID_CHILD_ID_CHAR:
        return False, f"Child_Id_char must be one of {VALID_CHILD_ID_CHAR}"
    if data["Activity_Type_Category"] not in VALID_ACTIVITY_CAT:
        return False, f"Activity_Type_Category must be one of {VALID_ACTIVITY_CAT}"

    return True, None


@childcare_api.get("/health")
def health():
    return jsonify({"status": "healthy", "model": "XGBoost ChildCare Revenue Pipeline"})


@childcare_api.post("/v1/predict")
def predict_revenue():
    data = request.get_json(force=True)

    is_valid, error_msg = validate_input(data)
    if not is_valid:
        return jsonify({"error": error_msg}), 400

    input_df = pd.DataFrame([{
        "Child_Age_Months":            float(data["Child_Age_Months"]),
        "Child_Care_Program":          data["Child_Care_Program"],
        "Child_Attendance_Rate":       float(data["Child_Attendance_Rate"]),
        "Child_Monthly_Fee":           float(data["Child_Monthly_Fee"]),
        "Facility_Size":               data["Facility_Size"],
        "Facility_Location_City_Type": data["Facility_Location_City_Type"],
        "Facility_Type":               data["Facility_Type"],
        "Child_Id_char":               data["Child_Id_char"],
        "Facility_Establishment_Year": int(data["Facility_Establishment_Year"]),
        "Activity_Type_Category":      data["Activity_Type_Category"],
    }])

    prediction = float(model.predict(input_df)[0])
    return jsonify({"Revenue": round(prediction, 2)}), 200


if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    childcare_api.run(host="0.0.0.0", port=port)