import joblib
import pandas as pd
from flask import Flask, request, jsonify

# -----------------------------
# Load pipeline (preprocessor + model)
# -----------------------------
MODEL_PATH = "superkart_sales_model_v1.joblib"
model = joblib.load(MODEL_PATH)

# -----------------------------
# Helpers: map strings -> ordinal codes (if user sends strings)
# Your training expected numeric Store_Size & City_Type
# -----------------------------
SIZE_MAP = {"Small": 1, "Medium": 2, "High": 3}
CITY_MAP = {"Tier 3": 1, "Tier 2": 2, "Tier 1": 3}

# Required columns in the SAME names used during training
EXPECTED_COLUMNS = [
    "Product_Weight",
    "Product_Allocated_Area",
    "Product_MRP",
    "Store_Establishment_Year",
    "Store_Size",                    # numeric 1/2/3 OR strings -> mapped
    "Store_Location_City_Type",      # numeric 1/2/3 OR strings -> mapped
    "Product_Sugar_Content",         # categorical
    "Product_Type",                  # categorical
    "Store_Type"                     # categorical
]

def coerce_and_validate(df: pd.DataFrame) -> pd.DataFrame:
    # Keep only expected cols, in order
    df = df.copy()
    missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    df = df[EXPECTED_COLUMNS]

    # Map strings for ordinal columns if needed
    if df["Store_Size"].dtype == object:
        df["Store_Size"] = df["Store_Size"].map(SIZE_MAP)
    if df["Store_Location_City_Type"].dtype == object:
        df["Store_Location_City_Type"] = df["Store_Location_City_Type"].map(CITY_MAP)

    # Final sanity: ensure numeric for ordinal columns
    for col in ["Store_Size", "Store_Location_City_Type",
                "Product_Weight", "Product_Allocated_Area", "Product_MRP", "Store_Establishment_Year"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Basic NA handling (model was trained on clean data; here we drop rows with NA)
    if df.isna().any().any():
        # You can switch to imputation if preferred
        df = df.dropna(axis=0).copy()

    return df

# -----------------------------
# Flask app
# -----------------------------
app = Flask("SuperKart Sales Predictor")

@app.get("/")
def home():
    return "SuperKart Sales Prediction API is up!"

@app.post("/v1/predict")
def predict_single():
    """
    JSON body example:
    {
      "Product_Weight": 12.5,
      "Product_Allocated_Area": 30,
      "Product_MRP": 199.0,
      "Store_Establishment_Year": 2008,
      "Store_Size": "Medium",                // or 2
      "Store_Location_City_Type": "Tier 1",  // or 3
      "Product_Sugar_Content": "Regular",
      "Product_Type": "Snack Foods",
      "Store_Type": "Supermarket Type 1"
    }
    """
    try:
        data = request.get_json(force=True)
        df = pd.DataFrame([data])
        df = coerce_and_validate(df)
        if df.empty:
            return jsonify({"error": "Input invalid or resulted in empty rows after cleaning."}), 400
        pred = float(model.predict(df)[0])
        return jsonify({"Predicted_Product_Store_Sales_Total": round(pred, 2)})
    except Exception as e:
        return jsonify({"error": str(e)}), 400

@app.post("/v1/predict_batch")
def predict_batch():
    """
    Form-data upload: file=CSV
    CSV must include the EXPECTED_COLUMNS headers.
    """
    try:
        if "file" not in request.files:
            return jsonify({"error": "Please upload a CSV file with key 'file'."}), 400
        file = request.files["file"]
        df = pd.read_csv(file)
        df_clean = coerce_and_validate(df)
        if df_clean.empty:
            return jsonify({"error": "All rows invalid or empty after cleaning."}), 400
        preds = model.predict(df_clean)
        out = df.copy()
        out["Predicted_Product_Store_Sales_Total"] = preds
        # Return top rows to avoid huge payloads
        return out.head(50).to_json(orient="records")
    except Exception as e:
        return jsonify({"error": str(e)}), 400

if __name__ == "__main__":
    # For local dev (Colab), use:
    app.run(host="0.0.0.0", port=7860, debug=True)