Spaces:
Runtime error
Runtime error
Upload backend app files
Browse files
app.py
CHANGED
|
@@ -1,25 +1,28 @@
|
|
| 1 |
from flask import Flask, request, jsonify
|
| 2 |
import joblib
|
| 3 |
import pandas as pd
|
|
|
|
| 4 |
|
| 5 |
app = Flask(__name__)
|
| 6 |
|
| 7 |
# Load the serialized full pipeline
|
| 8 |
try:
|
| 9 |
full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib')
|
| 10 |
-
|
| 11 |
-
#
|
| 12 |
-
# This
|
| 13 |
-
#
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
|
| 24 |
except Exception as e:
|
| 25 |
full_pipeline = None
|
|
@@ -33,11 +36,31 @@ def predict():
|
|
| 33 |
try:
|
| 34 |
data = request.get_json(force=True)
|
| 35 |
|
| 36 |
-
# Convert input data to DataFrame
|
| 37 |
input_df = pd.DataFrame([data])
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# Reorder columns to match the order expected by the pipeline
|
| 40 |
# This assumes all expected columns are present in the input data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
input_df = input_df[pipeline_columns]
|
| 42 |
|
| 43 |
|
|
@@ -51,4 +74,9 @@ def predict():
|
|
| 51 |
return jsonify({'error': str(e)}), 400
|
| 52 |
|
| 53 |
if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
app.run(debug=True, host='0.0.0.0', port=5000)
|
|
|
|
| 1 |
from flask import Flask, request, jsonify
|
| 2 |
import joblib
|
| 3 |
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
|
| 6 |
app = Flask(__name__)
|
| 7 |
|
| 8 |
# Load the serialized full pipeline
|
| 9 |
try:
|
| 10 |
full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib')
|
| 11 |
+
|
| 12 |
+
# Get the column names from the preprocessor step
|
| 13 |
+
# This handles both one-hot encoded and passthrough columns
|
| 14 |
+
# We need to access the transformers and their get_feature_names_out methods
|
| 15 |
+
transformer = full_pipeline.named_steps['preprocessor']
|
| 16 |
+
ohe_features = list(transformer.named_transformers_['onehotencoder'].get_feature_names_out(transformer.transformers_[0][2]))
|
| 17 |
+
passthrough_features = list(transformer.named_transformers_['remainder'].get_feature_names_out())
|
| 18 |
+
|
| 19 |
+
# Combine the feature names in the order they appear in the processed data
|
| 20 |
+
# The order should be passthrough features followed by one-hot encoded features
|
| 21 |
+
pipeline_columns = passthrough_features + ohe_features
|
| 22 |
+
|
| 23 |
+
# Note: This assumes the numerical columns (handled by StandardScaler in the pipeline)
|
| 24 |
+
# are the 'remainder' and appear before the one-hot encoded columns.
|
| 25 |
+
# If the order is different in your pipeline, adjust the concatenation accordingly.
|
| 26 |
|
| 27 |
except Exception as e:
|
| 28 |
full_pipeline = None
|
|
|
|
| 36 |
try:
|
| 37 |
data = request.get_json(force=True)
|
| 38 |
|
| 39 |
+
# Convert input data to DataFrame
|
| 40 |
input_df = pd.DataFrame([data])
|
| 41 |
|
| 42 |
+
# Create 'Store_Age' feature if not present
|
| 43 |
+
if 'Store_Establishment_Year' in input_df.columns and 'Store_Age' not in input_df.columns:
|
| 44 |
+
input_df['Store_Age'] = 2025 - input_df['Store_Establishment_Year']
|
| 45 |
+
input_df = input_df.drop('Store_Establishment_Year', axis=1) # Drop the original year column
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# Ensure the input DataFrame has the same columns as the training data and in the same order
|
| 49 |
+
# We need to reindex the input_df to match the columns the pipeline expects
|
| 50 |
+
# This requires knowing the exact columns expected by the pipeline after preprocessing
|
| 51 |
+
# A safer way is to define the expected columns based on the pipeline structure
|
| 52 |
+
|
| 53 |
# Reorder columns to match the order expected by the pipeline
|
| 54 |
# This assumes all expected columns are present in the input data
|
| 55 |
+
# We need the list of columns the pipeline expects after preprocessing
|
| 56 |
+
# This list was constructed during pipeline loading
|
| 57 |
+
|
| 58 |
+
# Ensure all expected columns are in the input data, add missing ones with default values (e.g., 0 for one-hot encoded)
|
| 59 |
+
for col in pipeline_columns:
|
| 60 |
+
if col not in input_df.columns:
|
| 61 |
+
input_df[col] = 0 # Assuming 0 is a safe default for missing features
|
| 62 |
+
|
| 63 |
+
# Reorder columns to match the pipeline's expected order
|
| 64 |
input_df = input_df[pipeline_columns]
|
| 65 |
|
| 66 |
|
|
|
|
| 74 |
return jsonify({'error': str(e)}), 400
|
| 75 |
|
| 76 |
if __name__ == '__main__':
|
| 77 |
+
# Create the backend_app directory if it doesn't exist
|
| 78 |
+
import os
|
| 79 |
+
os.makedirs('backend_app', exist_ok=True)
|
| 80 |
+
# Ensure deployment_files directory exists
|
| 81 |
+
os.makedirs('deployment_files', exist_ok=True)
|
| 82 |
app.run(debug=True, host='0.0.0.0', port=5000)
|