import os import joblib import pandas as pd import numpy as np from datetime import datetime from flask import Flask, request, jsonify import shutil # if using ensure_model_present # Resolve base directory robustly (works in Colab/Notebook and scripts) try: BASE_DIR = os.path.dirname(os.path.abspath(__file__)) except NameError: # __file__ is not defined in notebooks; fall back to CWD BASE_DIR = os.getcwd() DEFAULT_MODEL_PATH = os.path.join(BASE_DIR, "superkart_rf_best_pipeline.joblib") MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH) APP_NAME = "SuperKart_Sales_Forecast_API" # DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(__file__), "superkart_rf_best_pipeline.joblib") MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH) CURRENT_YEAR = int(os.environ.get("CURRENT_YEAR", datetime.now().year)) # Optional helper def ensure_model_present(): if MODEL_PATH == DEFAULT_MODEL_PATH and not os.path.exists(MODEL_PATH): candidates = [ os.path.join("/content/backend_files", "superkart_rf_best_pipeline.joblib"), os.path.join("/content", "superkart_rf_best_pipeline.joblib"), ] for src in candidates: if os.path.exists(src): os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True) shutil.copy(src, MODEL_PATH) print(f"[INFO] Copied model from {src} to {MODEL_PATH}") return raise FileNotFoundError( f"Model file not found. Checked: {candidates}. " "Upload the model or set env var MODEL_PATH to the correct file." ) RAW_FIELDS = [ "Product_Id", "Product_Weight", "Product_Sugar_Content", "Product_Allocated_Area", "Product_Type", "Product_MRP", "Store_Id", "Store_Establishment_Year", "Store_Age", "Store_Size", "Store_Location_City_Type", "Store_Type", ] def map_product_category(pid): pid = str(pid) prefix = pid[:2].upper() if prefix == "FD": return "Food" if prefix == "NC": return "Non-Consumable" if prefix == "DR": return "Drinks" return "Other" def clean_sugar(x): s = str(x).strip().lower() if "low" in s: return "Low Sugar" if "no" in s: return "No Sugar" if "reg" in s or "regular" in s: return "Regular" return s.title() if s else s def bin_allocated_area(x): v = pd.to_numeric(x, errors="coerce") if pd.isna(v): return np.nan # Use the same thresholds you trained with; these are placeholders if v < 0.02: return "Very Small" elif v < 0.05: return "Small" elif v < 0.10: return "Medium" else: return "Large" def bin_mrp(x): v = pd.to_numeric(x, errors="coerce") if pd.isna(v): return np.nan if v < 100: return "Low" elif v < 150: return "Medium" elif v < 200: return "High" else: return "Premium" def engineer_features(df_raw: pd.DataFrame) -> pd.DataFrame: df = df_raw.copy() if "Product_Id" in df.columns: df["Product_Category"] = df["Product_Id"].map(map_product_category) else: df["Product_Category"] = np.nan if "Product_Sugar_Content" in df.columns: df["Product_Sugar_Content"] = df["Product_Sugar_Content"].apply(clean_sugar) # Allocated_Area_Bins (must match training exactly) if "Product_Allocated_Area" in df.columns: df["Allocated_Area_Bins"] = df["Product_Allocated_Area"].apply(bin_allocated_area) else: df["Allocated_Area_Bins"] = np.nan if "Store_Age" not in df.columns or df["Store_Age"].isna().all(): if "Store_Establishment_Year" in df.columns: df["Store_Age"] = (CURRENT_YEAR - pd.to_numeric(df["Store_Establishment_Year"], errors="coerce")).clip(lower=0) else: df["Store_Age"] = np.nan if "Product_MRP" in df.columns: df["MRP_Bins"] = df["Product_MRP"].apply(bin_mrp) else: df["MRP_Bins"] = np.nan if "Product_MRP" in df.columns and "Product_Weight" in df.columns: mrp = pd.to_numeric(df["Product_MRP"], errors="coerce") wgt = pd.to_numeric(df["Product_Weight"], errors="coerce").replace(0, np.nan) df["Unit_Value"] = mrp / wgt else: df["Unit_Value"] = np.nan if "Store_Type" in df.columns and "Product_Type" in df.columns: df["Store_Product_Interaction"] = df["Store_Type"].astype(str) + "__" + df["Product_Type"].astype(str) else: df["Store_Product_Interaction"] = np.nan if "MRP_Bins" in df.columns and "Store_Type" in df.columns: df["MRPBin_StoreType"] = df["MRP_Bins"].astype(str) + "__" + df["Store_Type"].astype(str) return df app = Flask(APP_NAME) # Ensure model present (optional) try: ensure_model_present() except NameError: pass # helper not defined if you removed it except Exception as e: print(f"[WARN] {e}") # Load model try: model = joblib.load(MODEL_PATH) print(f"[INFO] Loaded model from {MODEL_PATH}") except Exception as e: print(f"[ERROR] Failed to load model: {e}") model = None @app.get("/") def root(): return jsonify({ "service": APP_NAME, "status": "ok", "message": "POST to /v1/forecast/single (JSON) or /v1/forecast/batch (CSV as 'file')", "raw_fields": RAW_FIELDS }) @app.post("/v1/forecast/single") def predict_single(): if model is None: return jsonify({"error": "Model not loaded"}), 500 payload = request.get_json(silent=True) or {} row = {col: payload.get(col, None) for col in RAW_FIELDS} df_raw = pd.DataFrame([row]) try: df_feat = engineer_features(df_raw) for c in ["Product_Id", "Store_Id"]: if c in df_feat.columns: df_feat = df_feat.drop(columns=[c]) yhat = float(model.predict(df_feat)[0]) return jsonify({ "Predicted_Product_Store_Sales_Total": round(yhat, 2), "input_used": df_feat.iloc[0].to_dict() }) except Exception as e: return jsonify({"error": f"Inference failed: {e}"}), 400 @app.post("/v1/forecast/batch") def predict_batch(): if model is None: return jsonify({"error": "Model not loaded"}), 500 file = request.files.get("file") if file is None: return jsonify({"error": "Please POST a CSV file under form field 'file'"}), 400 try: df_raw = pd.read_csv(file) for col in RAW_FIELDS: if col not in df_raw.columns: df_raw[col] = None df_feat = engineer_features(df_raw) for c in ["Product_Id", "Store_Id"]: if c in df_feat.columns: df_feat = df_feat.drop(columns=[c]) preds = model.predict(df_feat) out = df_raw.copy() out["Predicted_Product_Store_Sales_Total"] = preds return jsonify(out.to_dict(orient="records")) except Exception as e: return jsonify({"error": f"Inference failed: {e}"}), 400 if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) app.run(host="0.0.0.0", port=port)