Rajanan commited on
Commit
ec16458
·
verified ·
1 Parent(s): 2850876

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. Dockerfile +9 -0
  2. app.py +118 -0
  3. requirements.txt +7 -0
  4. superkart_sales_model_v1.joblib +3 -0
Dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+ COPY . .
5
+
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ EXPOSE 7860
9
+ CMD ["gunicorn", "-w", "2", "-b", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import joblib
2
+ import pandas as pd
3
+ from flask import Flask, request, jsonify
4
+
5
+ # -----------------------------
6
+ # Load pipeline (preprocessor + model)
7
+ # -----------------------------
8
+ MODEL_PATH = "superkart_sales_model_v1.joblib"
9
+ model = joblib.load(MODEL_PATH)
10
+
11
+ # -----------------------------
12
+ # Helpers: map strings -> ordinal codes (if user sends strings)
13
+ # Your training expected numeric Store_Size & City_Type
14
+ # -----------------------------
15
+ SIZE_MAP = {"Small": 1, "Medium": 2, "High": 3}
16
+ CITY_MAP = {"Tier 3": 1, "Tier 2": 2, "Tier 1": 3}
17
+
18
+ # Required columns in the SAME names used during training
19
+ EXPECTED_COLUMNS = [
20
+ "Product_Weight",
21
+ "Product_Allocated_Area",
22
+ "Product_MRP",
23
+ "Store_Establishment_Year",
24
+ "Store_Size", # numeric 1/2/3 OR strings -> mapped
25
+ "Store_Location_City_Type", # numeric 1/2/3 OR strings -> mapped
26
+ "Product_Sugar_Content", # categorical
27
+ "Product_Type", # categorical
28
+ "Store_Type" # categorical
29
+ ]
30
+
31
+ def coerce_and_validate(df: pd.DataFrame) -> pd.DataFrame:
32
+ # Keep only expected cols, in order
33
+ df = df.copy()
34
+ missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]
35
+ if missing:
36
+ raise ValueError(f"Missing required columns: {missing}")
37
+
38
+ df = df[EXPECTED_COLUMNS]
39
+
40
+ # Map strings for ordinal columns if needed
41
+ if df["Store_Size"].dtype == object:
42
+ df["Store_Size"] = df["Store_Size"].map(SIZE_MAP)
43
+ if df["Store_Location_City_Type"].dtype == object:
44
+ df["Store_Location_City_Type"] = df["Store_Location_City_Type"].map(CITY_MAP)
45
+
46
+ # Final sanity: ensure numeric for ordinal columns
47
+ for col in ["Store_Size", "Store_Location_City_Type",
48
+ "Product_Weight", "Product_Allocated_Area", "Product_MRP", "Store_Establishment_Year"]:
49
+ df[col] = pd.to_numeric(df[col], errors="coerce")
50
+
51
+ # Basic NA handling (model was trained on clean data; here we drop rows with NA)
52
+ if df.isna().any().any():
53
+ # You can switch to imputation if preferred
54
+ df = df.dropna(axis=0).copy()
55
+
56
+ return df
57
+
58
+ # -----------------------------
59
+ # Flask app
60
+ # -----------------------------
61
+ app = Flask("SuperKart Sales Predictor")
62
+
63
+ @app.get("/")
64
+ def home():
65
+ return "SuperKart Sales Prediction API is up!"
66
+
67
+ @app.post("/v1/predict")
68
+ def predict_single():
69
+ """
70
+ JSON body example:
71
+ {
72
+ "Product_Weight": 12.5,
73
+ "Product_Allocated_Area": 30,
74
+ "Product_MRP": 199.0,
75
+ "Store_Establishment_Year": 2008,
76
+ "Store_Size": "Medium", // or 2
77
+ "Store_Location_City_Type": "Tier 1", // or 3
78
+ "Product_Sugar_Content": "Regular",
79
+ "Product_Type": "Snack Foods",
80
+ "Store_Type": "Supermarket Type 1"
81
+ }
82
+ """
83
+ try:
84
+ data = request.get_json(force=True)
85
+ df = pd.DataFrame([data])
86
+ df = coerce_and_validate(df)
87
+ if df.empty:
88
+ return jsonify({"error": "Input invalid or resulted in empty rows after cleaning."}), 400
89
+ pred = float(model.predict(df)[0])
90
+ return jsonify({"Predicted_Product_Store_Sales_Total": round(pred, 2)})
91
+ except Exception as e:
92
+ return jsonify({"error": str(e)}), 400
93
+
94
+ @app.post("/v1/predict_batch")
95
+ def predict_batch():
96
+ """
97
+ Form-data upload: file=CSV
98
+ CSV must include the EXPECTED_COLUMNS headers.
99
+ """
100
+ try:
101
+ if "file" not in request.files:
102
+ return jsonify({"error": "Please upload a CSV file with key 'file'."}), 400
103
+ file = request.files["file"]
104
+ df = pd.read_csv(file)
105
+ df_clean = coerce_and_validate(df)
106
+ if df_clean.empty:
107
+ return jsonify({"error": "All rows invalid or empty after cleaning."}), 400
108
+ preds = model.predict(df_clean)
109
+ out = df.copy()
110
+ out["Predicted_Product_Store_Sales_Total"] = preds
111
+ # Return top rows to avoid huge payloads
112
+ return out.head(50).to_json(orient="records")
113
+ except Exception as e:
114
+ return jsonify({"error": str(e)}), 400
115
+
116
+ if __name__ == "__main__":
117
+ # For local dev (Colab), use:
118
+ app.run(host="0.0.0.0", port=7860, debug=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pandas==2.2.2
2
+ numpy==2.0.2
3
+ scikit-learn==1.6.1
4
+ xgboost==2.1.4
5
+ joblib==1.4.2
6
+ flask==2.2.2
7
+ gunicorn==20.1.0
superkart_sales_model_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d2c83997bde504b399b76634a401ee47ddf63fc108cb14f250d8e47c41bea8
3
+ size 36093786