File size: 4,139 Bytes
ec16458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import joblib
import pandas as pd
from flask import Flask, request, jsonify

# -----------------------------
# Load pipeline (preprocessor + model)
# -----------------------------
MODEL_PATH = "superkart_sales_model_v1.joblib"
model = joblib.load(MODEL_PATH)

# -----------------------------
# Helpers: map strings -> ordinal codes (if user sends strings)
# Your training expected numeric Store_Size & City_Type
# -----------------------------
SIZE_MAP = {"Small": 1, "Medium": 2, "High": 3}
CITY_MAP = {"Tier 3": 1, "Tier 2": 2, "Tier 1": 3}

# Required columns in the SAME names used during training
EXPECTED_COLUMNS = [
    "Product_Weight",
    "Product_Allocated_Area",
    "Product_MRP",
    "Store_Establishment_Year",
    "Store_Size",                    # numeric 1/2/3 OR strings -> mapped
    "Store_Location_City_Type",      # numeric 1/2/3 OR strings -> mapped
    "Product_Sugar_Content",         # categorical
    "Product_Type",                  # categorical
    "Store_Type"                     # categorical
]

def coerce_and_validate(df: pd.DataFrame) -> pd.DataFrame:
    # Keep only expected cols, in order
    df = df.copy()
    missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    df = df[EXPECTED_COLUMNS]

    # Map strings for ordinal columns if needed
    if df["Store_Size"].dtype == object:
        df["Store_Size"] = df["Store_Size"].map(SIZE_MAP)
    if df["Store_Location_City_Type"].dtype == object:
        df["Store_Location_City_Type"] = df["Store_Location_City_Type"].map(CITY_MAP)

    # Final sanity: ensure numeric for ordinal columns
    for col in ["Store_Size", "Store_Location_City_Type",
                "Product_Weight", "Product_Allocated_Area", "Product_MRP", "Store_Establishment_Year"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Basic NA handling (model was trained on clean data; here we drop rows with NA)
    if df.isna().any().any():
        # You can switch to imputation if preferred
        df = df.dropna(axis=0).copy()

    return df

# -----------------------------
# Flask app
# -----------------------------
app = Flask("SuperKart Sales Predictor")

@app.get("/")
def home():
    return "SuperKart Sales Prediction API is up!"

@app.post("/v1/predict")
def predict_single():
    """
    JSON body example:
    {
      "Product_Weight": 12.5,
      "Product_Allocated_Area": 30,
      "Product_MRP": 199.0,
      "Store_Establishment_Year": 2008,
      "Store_Size": "Medium",                // or 2
      "Store_Location_City_Type": "Tier 1",  // or 3
      "Product_Sugar_Content": "Regular",
      "Product_Type": "Snack Foods",
      "Store_Type": "Supermarket Type 1"
    }
    """
    try:
        data = request.get_json(force=True)
        df = pd.DataFrame([data])
        df = coerce_and_validate(df)
        if df.empty:
            return jsonify({"error": "Input invalid or resulted in empty rows after cleaning."}), 400
        pred = float(model.predict(df)[0])
        return jsonify({"Predicted_Product_Store_Sales_Total": round(pred, 2)})
    except Exception as e:
        return jsonify({"error": str(e)}), 400

@app.post("/v1/predict_batch")
def predict_batch():
    """
    Form-data upload: file=CSV
    CSV must include the EXPECTED_COLUMNS headers.
    """
    try:
        if "file" not in request.files:
            return jsonify({"error": "Please upload a CSV file with key 'file'."}), 400
        file = request.files["file"]
        df = pd.read_csv(file)
        df_clean = coerce_and_validate(df)
        if df_clean.empty:
            return jsonify({"error": "All rows invalid or empty after cleaning."}), 400
        preds = model.predict(df_clean)
        out = df.copy()
        out["Predicted_Product_Store_Sales_Total"] = preds
        # Return top rows to avoid huge payloads
        return out.head(50).to_json(orient="records")
    except Exception as e:
        return jsonify({"error": str(e)}), 400

if __name__ == "__main__":
    # For local dev (Colab), use:
    app.run(host="0.0.0.0", port=7860, debug=True)