Spaces:

jkng77433
/

Backend

Sleeping

File size: 7,095 Bytes

4a175f3


import os
import joblib
import pandas as pd
import numpy as np
from datetime import datetime
from flask import Flask, request, jsonify
import shutil  # if using ensure_model_present

# Resolve base directory robustly (works in Colab/Notebook and scripts)
try:
    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
except NameError:
    # __file__ is not defined in notebooks; fall back to CWD
    BASE_DIR = os.getcwd()

DEFAULT_MODEL_PATH = os.path.join(BASE_DIR, "superkart_rf_best_pipeline.joblib")
MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH)

APP_NAME = "SuperKart_Sales_Forecast_API"
# DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(__file__), "superkart_rf_best_pipeline.joblib")
MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH)
CURRENT_YEAR = int(os.environ.get("CURRENT_YEAR", datetime.now().year))

# Optional helper
def ensure_model_present():
    if MODEL_PATH == DEFAULT_MODEL_PATH and not os.path.exists(MODEL_PATH):
        candidates = [
            os.path.join("/content/backend_files", "superkart_rf_best_pipeline.joblib"),
            os.path.join("/content", "superkart_rf_best_pipeline.joblib"),
        ]
        for src in candidates:
            if os.path.exists(src):
                os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
                shutil.copy(src, MODEL_PATH)
                print(f"[INFO] Copied model from {src} to {MODEL_PATH}")
                return
        raise FileNotFoundError(
            f"Model file not found. Checked: {candidates}. "
            "Upload the model or set env var MODEL_PATH to the correct file."
        )

RAW_FIELDS = [
    "Product_Id",
    "Product_Weight",
    "Product_Sugar_Content",
    "Product_Allocated_Area",
    "Product_Type",
    "Product_MRP",
    "Store_Id",
    "Store_Establishment_Year",
    "Store_Age",
    "Store_Size",
    "Store_Location_City_Type",
    "Store_Type",
]

def map_product_category(pid):
    pid = str(pid)
    prefix = pid[:2].upper()
    if prefix == "FD": return "Food"
    if prefix == "NC": return "Non-Consumable"
    if prefix == "DR": return "Drinks"
    return "Other"

def clean_sugar(x):
    s = str(x).strip().lower()
    if "low" in s: return "Low Sugar"
    if "no" in s:  return "No Sugar"
    if "reg" in s or "regular" in s: return "Regular"
    return s.title() if s else s

def bin_allocated_area(x):
    v = pd.to_numeric(x, errors="coerce")
    if pd.isna(v):
        return np.nan
    # Use the same thresholds you trained with; these are placeholders
    if v < 0.02:
        return "Very Small"
    elif v < 0.05:
        return "Small"
    elif v < 0.10:
        return "Medium"
    else:
        return "Large"

def bin_mrp(x):
    v = pd.to_numeric(x, errors="coerce")
    if pd.isna(v): return np.nan
    if v < 100: return "Low"
    elif v < 150: return "Medium"
    elif v < 200: return "High"
    else: return "Premium"

def engineer_features(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = df_raw.copy()

    if "Product_Id" in df.columns:
        df["Product_Category"] = df["Product_Id"].map(map_product_category)
    else:
        df["Product_Category"] = np.nan

    if "Product_Sugar_Content" in df.columns:
        df["Product_Sugar_Content"] = df["Product_Sugar_Content"].apply(clean_sugar)

# Allocated_Area_Bins (must match training exactly)
    if "Product_Allocated_Area" in df.columns:
        df["Allocated_Area_Bins"] = df["Product_Allocated_Area"].apply(bin_allocated_area)
    else:
        df["Allocated_Area_Bins"] = np.nan

    if "Store_Age" not in df.columns or df["Store_Age"].isna().all():
        if "Store_Establishment_Year" in df.columns:
            df["Store_Age"] = (CURRENT_YEAR - pd.to_numeric(df["Store_Establishment_Year"], errors="coerce")).clip(lower=0)
        else:
            df["Store_Age"] = np.nan

    if "Product_MRP" in df.columns:
        df["MRP_Bins"] = df["Product_MRP"].apply(bin_mrp)
    else:
        df["MRP_Bins"] = np.nan

    if "Product_MRP" in df.columns and "Product_Weight" in df.columns:
        mrp = pd.to_numeric(df["Product_MRP"], errors="coerce")
        wgt = pd.to_numeric(df["Product_Weight"], errors="coerce").replace(0, np.nan)
        df["Unit_Value"] = mrp / wgt
    else:
        df["Unit_Value"] = np.nan

    if "Store_Type" in df.columns and "Product_Type" in df.columns:
        df["Store_Product_Interaction"] = df["Store_Type"].astype(str) + "__" + df["Product_Type"].astype(str)
    else:
        df["Store_Product_Interaction"] = np.nan

    if "MRP_Bins" in df.columns and "Store_Type" in df.columns:
        df["MRPBin_StoreType"] = df["MRP_Bins"].astype(str) + "__" + df["Store_Type"].astype(str)

    return df

app = Flask(APP_NAME)

# Ensure model present (optional)
try:
    ensure_model_present()
except NameError:
    pass  # helper not defined if you removed it
except Exception as e:
    print(f"[WARN] {e}")

# Load model
try:
    model = joblib.load(MODEL_PATH)
    print(f"[INFO] Loaded model from {MODEL_PATH}")
except Exception as e:
    print(f"[ERROR] Failed to load model: {e}")
    model = None

@app.get("/")
def root():
    return jsonify({
        "service": APP_NAME,
        "status": "ok",
        "message": "POST to /v1/forecast/single (JSON) or /v1/forecast/batch (CSV as 'file')",
        "raw_fields": RAW_FIELDS
    })

@app.post("/v1/forecast/single")
def predict_single():
    if model is None:
        return jsonify({"error": "Model not loaded"}), 500

    payload = request.get_json(silent=True) or {}
    row = {col: payload.get(col, None) for col in RAW_FIELDS}
    df_raw = pd.DataFrame([row])

    try:
        df_feat = engineer_features(df_raw)
        for c in ["Product_Id", "Store_Id"]:
            if c in df_feat.columns:
                df_feat = df_feat.drop(columns=[c])

        yhat = float(model.predict(df_feat)[0])
        return jsonify({
            "Predicted_Product_Store_Sales_Total": round(yhat, 2),
            "input_used": df_feat.iloc[0].to_dict()
        })
    except Exception as e:
        return jsonify({"error": f"Inference failed: {e}"}), 400

@app.post("/v1/forecast/batch")
def predict_batch():
    if model is None:
        return jsonify({"error": "Model not loaded"}), 500

    file = request.files.get("file")
    if file is None:
        return jsonify({"error": "Please POST a CSV file under form field 'file'"}), 400

    try:
        df_raw = pd.read_csv(file)
        for col in RAW_FIELDS:
            if col not in df_raw.columns:
                df_raw[col] = None

        df_feat = engineer_features(df_raw)
        for c in ["Product_Id", "Store_Id"]:
            if c in df_feat.columns:
                df_feat = df_feat.drop(columns=[c])

        preds = model.predict(df_feat)
        out = df_raw.copy()
        out["Predicted_Product_Store_Sales_Total"] = preds
        return jsonify(out.to_dict(orient="records"))
    except Exception as e:
        return jsonify({"error": f"Inference failed: {e}"}), 400
 
if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    app.run(host="0.0.0.0", port=port)