grkavi0912 commited on
Commit
3c58142
·
verified ·
1 Parent(s): 23a805e

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +163 -0
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -------------------------------------------------------
2
+ # Flask Web Framework for Product Store Sales Prediction
3
+ # -------------------------------------------------------
4
+
5
+ # Import necessary libraries
6
+ import os
7
+ import numpy as np
8
+ import pandas as pd
9
+ import joblib
10
+ from flask import Flask, request, jsonify
11
+
12
+ # Initialize the Flask application
13
+ product_sales_api = Flask("SuperKart Product Sales Predictor")
14
+
15
+ # Define the path to the model file - it will be at the root of the Space
16
+ model_path_in_space = "random_forest_pipeline.joblib"
17
+
18
+ # Load the trained RandomForest model pipeline
19
+ try:
20
+ model = joblib.load(model_path_in_space)
21
+ print(f"Model loaded successfully from {model_path_in_space}")
22
+ except Exception as e:
23
+ print(f"Error loading model: {e}")
24
+ model = None # Set model to None to indicate loading failure
25
+
26
+
27
+ # -------------------------------------------------------
28
+ # Define a route for the home page (GET request)
29
+ # -------------------------------------------------------
30
+ @product_sales_api.route('/')
31
+ def home():
32
+ """
33
+ This function handles GET requests to the root URL ('/') of the API.
34
+ It returns a simple welcome message.
35
+ """
36
+ if model is None:
37
+ return "Error: Model could not be loaded. Please check the logs.", 500
38
+ return "Welcome to the SuperKart Product Store Sales Prediction API!"
39
+
40
+ # -------------------------------------------------------
41
+ # Define an endpoint for single product prediction (POST request)
42
+ # -------------------------------------------------------
43
+ @product_sales_api.route('/v1/sales', methods=['POST'])
44
+ def predict_sales():
45
+ """
46
+ This function handles POST requests to the '/v1/sales' endpoint.
47
+ It expects a JSON payload containing product features and returns
48
+ the predicted Product_Store_Sales_Total as a JSON response.
49
+ """
50
+ if model is None:
51
+ return jsonify({'error': 'Model not loaded'}), 500
52
+
53
+ try:
54
+ # Get the JSON data from the request body
55
+ product_data = request.get_json()
56
+
57
+ # Convert the JSON data into a Pandas DataFrame
58
+ # Ensure the column names match the features used during training
59
+ # and are in the correct order if your model/pipeline is sensitive to it.
60
+ # Based on your preprocessing and model, the expected input features
61
+ # after one-hot encoding are needed. You might need to map the input
62
+ # JSON keys to the expected columns in your preprocessor/model.
63
+
64
+ # A more robust approach here would be to reconstruct the expected
65
+ # DataFrame structure based on the features your model was trained on.
66
+ # For simplicity and demonstration, let's assume the input JSON
67
+ # has keys corresponding to the original features BEFORE preprocessing
68
+ # and the preprocessor handles the transformation.
69
+ # These were: 'Product_Weight', 'Product_Allocated_Area', 'Product_MRP',
70
+ # 'Store_Establishment_Year', 'Product_Sugar_Content', 'Product_Type',
71
+ # 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'
72
+
73
+ # It's crucial that the keys in the incoming JSON match these original column names.
74
+
75
+ input_sample = {}
76
+ # Populate input_sample from product_data, handle missing keys if necessary
77
+ # For demonstration, assuming all keys are present:
78
+ original_feature_cols = [
79
+ 'Product_Weight', 'Product_Allocated_Area', 'Product_MRP',
80
+ 'Store_Establishment_Year', 'Product_Sugar_Content', 'Product_Type',
81
+ 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'
82
+ ]
83
+ for col in original_feature_cols:
84
+ # Use .get() to safely access keys, provide a default or handle missing later
85
+ input_sample[col] = product_data.get(col)
86
+
87
+
88
+ input_df = pd.DataFrame([input_sample])
89
+
90
+ # Ensure categorical columns have the correct dtype
91
+ categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
92
+ for col in categorical_cols:
93
+ if col in input_df.columns:
94
+ input_df[col] = input_df[col].astype('category')
95
+
96
+
97
+ # Make prediction using the trained model pipeline
98
+ # The pipeline handles preprocessing
99
+ prediction = model.predict(input_df)[0]
100
+
101
+ # Return the predicted sales total as JSON
102
+ return jsonify({'Predicted_Product_Store_Sales_Total': float(prediction)})
103
+
104
+ except Exception as e:
105
+ # Log the error for debugging
106
+ print(f"Error during single prediction: {e}")
107
+ return jsonify({'error': str(e)}), 500
108
+
109
+ # -------------------------------------------------------
110
+ # Define an endpoint for batch predictions (CSV upload)
111
+ # -------------------------------------------------------
112
+ @product_sales_api.route('/v1/salesbatch', methods=['POST'])
113
+ def predict_sales_batch():
114
+ """
115
+ This function handles POST requests to the '/v1/salesbatch' endpoint.
116
+ It expects a CSV file upload and returns predictions for multiple records.
117
+ """
118
+ if model is None:
119
+ return jsonify({'error': 'Model not loaded'}), 500
120
+
121
+ try:
122
+ # Get the uploaded CSV file
123
+ if 'file' not in request.files:
124
+ return jsonify({'error': 'No file part in the request'}), 400
125
+
126
+ file = request.files['file']
127
+
128
+ # If the user does not select a file, the browser submits an
129
+ # empty file without a filename.
130
+ if file.filename == '':
131
+ return jsonify({'error': 'No selected file'}), 400
132
+
133
+ if file:
134
+ # Read the CSV file into a DataFrame
135
+ # Assume the CSV columns match the original training features
136
+ data = pd.read_csv(file)
137
+
138
+ # Ensure categorical columns have the correct dtype after reading from CSV
139
+ categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
140
+ for col in categorical_cols:
141
+ if col in data.columns:
142
+ data[col] = data[col].astype('category')
143
+
144
+
145
+ # Make batch predictions using the trained model pipeline
146
+ predictions = model.predict(data)
147
+ data['Predicted_Product_Store_Sales_Total'] = predictions
148
+
149
+ # Return the results as JSON
150
+ return data.to_json(orient='records')
151
+
152
+ except Exception as e:
153
+ # Log the error for debugging
154
+ print(f"Error during batch prediction: {e}")
155
+ return jsonify({'error': str(e)}), 500
156
+
157
+ # -------------------------------------------------------
158
+ # Run the Flask API (typically not run in deployment, Gunicorn handles this)
159
+ # -------------------------------------------------------
160
+ # This part is mainly for local testing. In a Docker deployment with Gunicorn,
161
+ # Gunicorn will call the 'product_sales_api' application directly.
162
+ # if __name__ == '__main__':
163
+ # product_sales_api.run(host='0.0.0.0', port=5000, debug=True)