import joblib import pandas as pd from flask import Flask, request, jsonify # ----------------------------- # Load pipeline (preprocessor + model) # ----------------------------- MODEL_PATH = "superkart_sales_model_v1.joblib" model = joblib.load(MODEL_PATH) # ----------------------------- # Helpers: map strings -> ordinal codes (if user sends strings) # Your training expected numeric Store_Size & City_Type # ----------------------------- SIZE_MAP = {"Small": 1, "Medium": 2, "High": 3} CITY_MAP = {"Tier 3": 1, "Tier 2": 2, "Tier 1": 3} # Required columns in the SAME names used during training EXPECTED_COLUMNS = [ "Product_Weight", "Product_Allocated_Area", "Product_MRP", "Store_Establishment_Year", "Store_Size", # numeric 1/2/3 OR strings -> mapped "Store_Location_City_Type", # numeric 1/2/3 OR strings -> mapped "Product_Sugar_Content", # categorical "Product_Type", # categorical "Store_Type" # categorical ] def coerce_and_validate(df: pd.DataFrame) -> pd.DataFrame: # Keep only expected cols, in order df = df.copy() missing = [c for c in EXPECTED_COLUMNS if c not in df.columns] if missing: raise ValueError(f"Missing required columns: {missing}") df = df[EXPECTED_COLUMNS] # Map strings for ordinal columns if needed if df["Store_Size"].dtype == object: df["Store_Size"] = df["Store_Size"].map(SIZE_MAP) if df["Store_Location_City_Type"].dtype == object: df["Store_Location_City_Type"] = df["Store_Location_City_Type"].map(CITY_MAP) # Final sanity: ensure numeric for ordinal columns for col in ["Store_Size", "Store_Location_City_Type", "Product_Weight", "Product_Allocated_Area", "Product_MRP", "Store_Establishment_Year"]: df[col] = pd.to_numeric(df[col], errors="coerce") # Basic NA handling (model was trained on clean data; here we drop rows with NA) if df.isna().any().any(): # You can switch to imputation if preferred df = df.dropna(axis=0).copy() return df # ----------------------------- # Flask app # ----------------------------- app = Flask("SuperKart Sales Predictor") @app.get("/") def home(): return "SuperKart Sales Prediction API is up!" @app.post("/v1/predict") def predict_single(): """ JSON body example: { "Product_Weight": 12.5, "Product_Allocated_Area": 30, "Product_MRP": 199.0, "Store_Establishment_Year": 2008, "Store_Size": "Medium", // or 2 "Store_Location_City_Type": "Tier 1", // or 3 "Product_Sugar_Content": "Regular", "Product_Type": "Snack Foods", "Store_Type": "Supermarket Type 1" } """ try: data = request.get_json(force=True) df = pd.DataFrame([data]) df = coerce_and_validate(df) if df.empty: return jsonify({"error": "Input invalid or resulted in empty rows after cleaning."}), 400 pred = float(model.predict(df)[0]) return jsonify({"Predicted_Product_Store_Sales_Total": round(pred, 2)}) except Exception as e: return jsonify({"error": str(e)}), 400 @app.post("/v1/predict_batch") def predict_batch(): """ Form-data upload: file=CSV CSV must include the EXPECTED_COLUMNS headers. """ try: if "file" not in request.files: return jsonify({"error": "Please upload a CSV file with key 'file'."}), 400 file = request.files["file"] df = pd.read_csv(file) df_clean = coerce_and_validate(df) if df_clean.empty: return jsonify({"error": "All rows invalid or empty after cleaning."}), 400 preds = model.predict(df_clean) out = df.copy() out["Predicted_Product_Store_Sales_Total"] = preds # Return top rows to avoid huge payloads return out.head(50).to_json(orient="records") except Exception as e: return jsonify({"error": str(e)}), 400 if __name__ == "__main__": # For local dev (Colab), use: app.run(host="0.0.0.0", port=7860, debug=True)