File size: 7,095 Bytes
4a175f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220

import os
import joblib
import pandas as pd
import numpy as np
from datetime import datetime
from flask import Flask, request, jsonify
import shutil  # if using ensure_model_present

# Resolve base directory robustly (works in Colab/Notebook and scripts)
try:
    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
except NameError:
    # __file__ is not defined in notebooks; fall back to CWD
    BASE_DIR = os.getcwd()

DEFAULT_MODEL_PATH = os.path.join(BASE_DIR, "superkart_rf_best_pipeline.joblib")
MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH)

APP_NAME = "SuperKart_Sales_Forecast_API"
# DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(__file__), "superkart_rf_best_pipeline.joblib")
MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH)
CURRENT_YEAR = int(os.environ.get("CURRENT_YEAR", datetime.now().year))

# Optional helper
def ensure_model_present():
    if MODEL_PATH == DEFAULT_MODEL_PATH and not os.path.exists(MODEL_PATH):
        candidates = [
            os.path.join("/content/backend_files", "superkart_rf_best_pipeline.joblib"),
            os.path.join("/content", "superkart_rf_best_pipeline.joblib"),
        ]
        for src in candidates:
            if os.path.exists(src):
                os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
                shutil.copy(src, MODEL_PATH)
                print(f"[INFO] Copied model from {src} to {MODEL_PATH}")
                return
        raise FileNotFoundError(
            f"Model file not found. Checked: {candidates}. "
            "Upload the model or set env var MODEL_PATH to the correct file."
        )

RAW_FIELDS = [
    "Product_Id",
    "Product_Weight",
    "Product_Sugar_Content",
    "Product_Allocated_Area",
    "Product_Type",
    "Product_MRP",
    "Store_Id",
    "Store_Establishment_Year",
    "Store_Age",
    "Store_Size",
    "Store_Location_City_Type",
    "Store_Type",
]

def map_product_category(pid):
    pid = str(pid)
    prefix = pid[:2].upper()
    if prefix == "FD": return "Food"
    if prefix == "NC": return "Non-Consumable"
    if prefix == "DR": return "Drinks"
    return "Other"

def clean_sugar(x):
    s = str(x).strip().lower()
    if "low" in s: return "Low Sugar"
    if "no" in s:  return "No Sugar"
    if "reg" in s or "regular" in s: return "Regular"
    return s.title() if s else s

def bin_allocated_area(x):
    v = pd.to_numeric(x, errors="coerce")
    if pd.isna(v):
        return np.nan
    # Use the same thresholds you trained with; these are placeholders
    if v < 0.02:
        return "Very Small"
    elif v < 0.05:
        return "Small"
    elif v < 0.10:
        return "Medium"
    else:
        return "Large"

def bin_mrp(x):
    v = pd.to_numeric(x, errors="coerce")
    if pd.isna(v): return np.nan
    if v < 100: return "Low"
    elif v < 150: return "Medium"
    elif v < 200: return "High"
    else: return "Premium"

def engineer_features(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = df_raw.copy()

    if "Product_Id" in df.columns:
        df["Product_Category"] = df["Product_Id"].map(map_product_category)
    else:
        df["Product_Category"] = np.nan

    if "Product_Sugar_Content" in df.columns:
        df["Product_Sugar_Content"] = df["Product_Sugar_Content"].apply(clean_sugar)

# Allocated_Area_Bins (must match training exactly)
    if "Product_Allocated_Area" in df.columns:
        df["Allocated_Area_Bins"] = df["Product_Allocated_Area"].apply(bin_allocated_area)
    else:
        df["Allocated_Area_Bins"] = np.nan

    if "Store_Age" not in df.columns or df["Store_Age"].isna().all():
        if "Store_Establishment_Year" in df.columns:
            df["Store_Age"] = (CURRENT_YEAR - pd.to_numeric(df["Store_Establishment_Year"], errors="coerce")).clip(lower=0)
        else:
            df["Store_Age"] = np.nan

    if "Product_MRP" in df.columns:
        df["MRP_Bins"] = df["Product_MRP"].apply(bin_mrp)
    else:
        df["MRP_Bins"] = np.nan

    if "Product_MRP" in df.columns and "Product_Weight" in df.columns:
        mrp = pd.to_numeric(df["Product_MRP"], errors="coerce")
        wgt = pd.to_numeric(df["Product_Weight"], errors="coerce").replace(0, np.nan)
        df["Unit_Value"] = mrp / wgt
    else:
        df["Unit_Value"] = np.nan

    if "Store_Type" in df.columns and "Product_Type" in df.columns:
        df["Store_Product_Interaction"] = df["Store_Type"].astype(str) + "__" + df["Product_Type"].astype(str)
    else:
        df["Store_Product_Interaction"] = np.nan

    if "MRP_Bins" in df.columns and "Store_Type" in df.columns:
        df["MRPBin_StoreType"] = df["MRP_Bins"].astype(str) + "__" + df["Store_Type"].astype(str)

    return df

app = Flask(APP_NAME)

# Ensure model present (optional)
try:
    ensure_model_present()
except NameError:
    pass  # helper not defined if you removed it
except Exception as e:
    print(f"[WARN] {e}")

# Load model
try:
    model = joblib.load(MODEL_PATH)
    print(f"[INFO] Loaded model from {MODEL_PATH}")
except Exception as e:
    print(f"[ERROR] Failed to load model: {e}")
    model = None

@app.get("/")
def root():
    return jsonify({
        "service": APP_NAME,
        "status": "ok",
        "message": "POST to /v1/forecast/single (JSON) or /v1/forecast/batch (CSV as 'file')",
        "raw_fields": RAW_FIELDS
    })

@app.post("/v1/forecast/single")
def predict_single():
    if model is None:
        return jsonify({"error": "Model not loaded"}), 500

    payload = request.get_json(silent=True) or {}
    row = {col: payload.get(col, None) for col in RAW_FIELDS}
    df_raw = pd.DataFrame([row])

    try:
        df_feat = engineer_features(df_raw)
        for c in ["Product_Id", "Store_Id"]:
            if c in df_feat.columns:
                df_feat = df_feat.drop(columns=[c])

        yhat = float(model.predict(df_feat)[0])
        return jsonify({
            "Predicted_Product_Store_Sales_Total": round(yhat, 2),
            "input_used": df_feat.iloc[0].to_dict()
        })
    except Exception as e:
        return jsonify({"error": f"Inference failed: {e}"}), 400

@app.post("/v1/forecast/batch")
def predict_batch():
    if model is None:
        return jsonify({"error": "Model not loaded"}), 500

    file = request.files.get("file")
    if file is None:
        return jsonify({"error": "Please POST a CSV file under form field 'file'"}), 400

    try:
        df_raw = pd.read_csv(file)
        for col in RAW_FIELDS:
            if col not in df_raw.columns:
                df_raw[col] = None

        df_feat = engineer_features(df_raw)
        for c in ["Product_Id", "Store_Id"]:
            if c in df_feat.columns:
                df_feat = df_feat.drop(columns=[c])

        preds = model.predict(df_feat)
        out = df_raw.copy()
        out["Predicted_Product_Store_Sales_Total"] = preds
        return jsonify(out.to_dict(orient="records"))
    except Exception as e:
        return jsonify({"error": f"Inference failed: {e}"}), 400
 
if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    app.run(host="0.0.0.0", port=port)