|
|
import joblib |
|
|
import pandas as pd |
|
|
from flask import Flask, request, jsonify |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_PATH = "superkart_sales_model_v1.joblib" |
|
|
model = joblib.load(MODEL_PATH) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SIZE_MAP = {"Small": 1, "Medium": 2, "High": 3} |
|
|
CITY_MAP = {"Tier 3": 1, "Tier 2": 2, "Tier 1": 3} |
|
|
|
|
|
|
|
|
EXPECTED_COLUMNS = [ |
|
|
"Product_Weight", |
|
|
"Product_Allocated_Area", |
|
|
"Product_MRP", |
|
|
"Store_Establishment_Year", |
|
|
"Store_Size", |
|
|
"Store_Location_City_Type", |
|
|
"Product_Sugar_Content", |
|
|
"Product_Type", |
|
|
"Store_Type" |
|
|
] |
|
|
|
|
|
def coerce_and_validate(df: pd.DataFrame) -> pd.DataFrame: |
|
|
|
|
|
df = df.copy() |
|
|
missing = [c for c in EXPECTED_COLUMNS if c not in df.columns] |
|
|
if missing: |
|
|
raise ValueError(f"Missing required columns: {missing}") |
|
|
|
|
|
df = df[EXPECTED_COLUMNS] |
|
|
|
|
|
|
|
|
if df["Store_Size"].dtype == object: |
|
|
df["Store_Size"] = df["Store_Size"].map(SIZE_MAP) |
|
|
if df["Store_Location_City_Type"].dtype == object: |
|
|
df["Store_Location_City_Type"] = df["Store_Location_City_Type"].map(CITY_MAP) |
|
|
|
|
|
|
|
|
for col in ["Store_Size", "Store_Location_City_Type", |
|
|
"Product_Weight", "Product_Allocated_Area", "Product_MRP", "Store_Establishment_Year"]: |
|
|
df[col] = pd.to_numeric(df[col], errors="coerce") |
|
|
|
|
|
|
|
|
if df.isna().any().any(): |
|
|
|
|
|
df = df.dropna(axis=0).copy() |
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = Flask("SuperKart Sales Predictor") |
|
|
|
|
|
@app.get("/") |
|
|
def home(): |
|
|
return "SuperKart Sales Prediction API is up!" |
|
|
|
|
|
@app.post("/v1/predict") |
|
|
def predict_single(): |
|
|
""" |
|
|
JSON body example: |
|
|
{ |
|
|
"Product_Weight": 12.5, |
|
|
"Product_Allocated_Area": 30, |
|
|
"Product_MRP": 199.0, |
|
|
"Store_Establishment_Year": 2008, |
|
|
"Store_Size": "Medium", // or 2 |
|
|
"Store_Location_City_Type": "Tier 1", // or 3 |
|
|
"Product_Sugar_Content": "Regular", |
|
|
"Product_Type": "Snack Foods", |
|
|
"Store_Type": "Supermarket Type 1" |
|
|
} |
|
|
""" |
|
|
try: |
|
|
data = request.get_json(force=True) |
|
|
df = pd.DataFrame([data]) |
|
|
df = coerce_and_validate(df) |
|
|
if df.empty: |
|
|
return jsonify({"error": "Input invalid or resulted in empty rows after cleaning."}), 400 |
|
|
pred = float(model.predict(df)[0]) |
|
|
return jsonify({"Predicted_Product_Store_Sales_Total": round(pred, 2)}) |
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}), 400 |
|
|
|
|
|
@app.post("/v1/predict_batch") |
|
|
def predict_batch(): |
|
|
""" |
|
|
Form-data upload: file=CSV |
|
|
CSV must include the EXPECTED_COLUMNS headers. |
|
|
""" |
|
|
try: |
|
|
if "file" not in request.files: |
|
|
return jsonify({"error": "Please upload a CSV file with key 'file'."}), 400 |
|
|
file = request.files["file"] |
|
|
df = pd.read_csv(file) |
|
|
df_clean = coerce_and_validate(df) |
|
|
if df_clean.empty: |
|
|
return jsonify({"error": "All rows invalid or empty after cleaning."}), 400 |
|
|
preds = model.predict(df_clean) |
|
|
out = df.copy() |
|
|
out["Predicted_Product_Store_Sales_Total"] = preds |
|
|
|
|
|
return out.head(50).to_json(orient="records") |
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}), 400 |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
app.run(host="0.0.0.0", port=7860, debug=True) |
|
|
|