MLbySush commited on
Commit
ef2e6ab
·
verified ·
1 Parent(s): 495264a

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Copy requirements and app code from build context
7
+ # We copy requirements first to leverage layer caching
8
+ COPY ..
9
+
10
+ # Upgrade pip and install dependencies
11
+ RUN pip install --upgrade pip
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Expose the port (matches app.run host/port)
15
+ EXPOSE 7860
16
+
17
+ # Start with gunicorn; 'app:app' expects app.py to define 'app = Flask(...)'
18
+ CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # ----------------------------
3
+ # Config / Model path
4
+ # ----------------------------
5
+ MODEL_PATH = os.path.join("backend_files", "superKart_price_prediction_model_v1_0.joblib")
6
+
7
+ # ----------------------------
8
+ # Initialize app and load model
9
+ # ----------------------------
10
+ app = Flask("SuperKart Sales Predictor")
11
+
12
+ # Load model
13
+ if not os.path.exists(MODEL_PATH):
14
+ raise FileNotFoundError(f"Model file not found at {MODEL_PATH}. ")
15
+ model = joblib.load(MODEL_PATH)
16
+
17
+ # These are the raw input feature names before preprocessing
18
+ NUMERIC_COLS = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Age']
19
+ CATEGORICAL_COLS = ['Product_Sugar_Content', 'Product_Type', 'Store_Size',
20
+ 'Store_Location_City_Type', 'Store_Type']
21
+
22
+ EXPECTED_COLUMNS = NUMERIC_COLS + CATEGORICAL_COLS
23
+
24
+ # ----------------------------
25
+ # Utility function
26
+ # ----------------------------
27
+ def validate_and_prepare_input(df: pd.DataFrame):
28
+ """
29
+ Ensure the dataframe has the required columns. If Store_Establishment_Year
30
+ is provided instead of Store_Age, it will be converted to Store_Age.
31
+ Returns the prepared dataframe and a list of missing columns (empty if ok).
32
+ """
33
+ df = df.copy()
34
+ missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]
35
+
36
+ # Code for if user provided Store_Establishment_Year, convert to Store_Age
37
+ if 'Store_Establishment_Year' in df.columns and 'Store_Age' in missing:
38
+ df['Store_Age'] = 2025 - df['Store_Establishment_Year']
39
+ missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]
40
+
41
+ return df, missing
42
+
43
+ # ----------------------------
44
+ # Routes
45
+ # ----------------------------
46
+ @app.get("/")
47
+ def home():
48
+ """Health check / Landing page"""
49
+ return jsonify({
50
+ "service": "SuperKart Sales Predictor",
51
+ "status": "running"
52
+ })
53
+
54
+ @app.post("/v1/predict")
55
+ def predict_single():
56
+ """
57
+ Predict sales for a single product-store record.
58
+
59
+ Expected JSON schema (example):
60
+ {
61
+ "Product_Weight": 12.5,
62
+ "Product_Allocated_Area": 0.056,
63
+ "Product_MRP": 149.0,
64
+ "Store_Age": 16,
65
+ "Product_Sugar_Content": "Low Sugar",
66
+ "Product_Type": "Dairy",
67
+ "Store_Size": "High",
68
+ "Store_Location_City_Type": "Tier 1",
69
+ "Store_Type": "Supermarket Type 1"
70
+ }
71
+ """
72
+ try:
73
+ data = request.get_json(force=True)
74
+ if not isinstance(data, dict):
75
+ return jsonify({"error": "Input JSON must be an object/dict"}), 400
76
+
77
+ # Convert to DataFrame
78
+ input_df = pd.DataFrame([data])
79
+
80
+ # Validate and prepare
81
+ input_df, missing = validate_and_prepare_input(input_df)
82
+ if missing:
83
+ return jsonify({"error": "Missing required columns", "missing_columns": missing}), 400
84
+
85
+ # Keep only expected columns (ignore extra fields)
86
+ input_df = input_df[EXPECTED_COLUMNS]
87
+
88
+ # Predict using pipeline (pipeline will apply preprocessors)
89
+ pred = model.predict(input_df)
90
+ prediction_value = float(pred[0])
91
+
92
+ return jsonify({"prediction": prediction_value}), 200
93
+
94
+ except Exception as e:
95
+ return jsonify({"error": "Exception during prediction", "details": str(e)}), 500
96
+
97
+ @app.post("/v1/predict_batch")
98
+ def predict_batch():
99
+ """
100
+ Predict sales for a batch of records supplied as a CSV file upload.
101
+ The CSV should contain the expected columns (or Store_Establishment_Year
102
+ instead of Store_Age which will be converted automatically).
103
+ """
104
+ try:
105
+ if 'file' not in request.files:
106
+ return jsonify({"error": "No file part in the request. Upload a CSV file with key 'file'."}), 400
107
+
108
+ file = request.files['file']
109
+ if file.filename == "":
110
+ return jsonify({"error": "Empty filename. Please upload a CSV file."}), 400
111
+
112
+ # Read CSV
113
+ input_df = pd.read_csv(file)
114
+
115
+ input_df, missing = validate_and_prepare_input(input_df)
116
+ if missing:
117
+ return jsonify({"error": "Missing required columns in uploaded CSV", "missing_columns": missing}), 400
118
+
119
+ # Keep only expected columns and predict
120
+ input_df = input_df[EXPECTED_COLUMNS]
121
+ preds = model.predict(input_df)
122
+
123
+ # Return predictions aligned with original input index
124
+ output = input_df.copy()
125
+ output['predicted_Product_Store_Sales_Total'] = preds.astype(float)
126
+
127
+ # Convert to records for JSON response (limit size if necessary)
128
+ results = output.reset_index().to_dict(orient='records')
129
+
130
+ return jsonify({"predictions_count": len(results), "predictions": results}), 200
131
+
132
+ except Exception as e:
133
+ return jsonify({"error": "Exception during batch prediction", "details": str(e)}), 500
134
+
135
+ # ----------------------------
136
+ # Run app
137
+ # ----------------------------
138
+ if __name__ == "__main__":
139
+ # Listen on 0.0.0.0 for containerized environments. In dev, use port 7860 or 5000 as required.
140
+ app.run(host="0.0.0.0", port=7860, debug=False)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ numpy==2.0.2
2
+ pandas==2.2.2
3
+ scikit-learn==1.6.1
4
+ joblib==1.4.2
5
+ xgboost==2.1.4
6
+ gunicorn==20.1.0
7
+ flask==3.0.3
8
+ requests==2.32.3
9
+ huggingface_hub==0.30.1
superKart_price_prediction_model_v1_0.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e62fab907d5931f97fd4bc4b69136940967359e97dbf69592fe221a457c91c0
3
+ size 27090851