Spaces:
Runtime error
Runtime error
File size: 3,531 Bytes
1af38a8 b63f944 8f9892a b63f944 1af38a8 8f9892a 1af38a8 b63f944 1af38a8 8f9892a 1af38a8 8f9892a 1af38a8 8f9892a 1af38a8 b63f944 1af38a8 b63f944 1af38a8 8f9892a 1af38a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from flask import Flask, request, jsonify
import joblib
import pandas as pd
import numpy as np
app = Flask(__name__)
# Load the serialized full pipeline
try:
full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib')
# Get the column names from the preprocessor step
# This handles both one-hot encoded and passthrough columns
# We need to access the transformers and their get_feature_names_out methods
transformer = full_pipeline.named_steps['preprocessor']
ohe_features = list(transformer.named_transformers_['onehotencoder'].get_feature_names_out(transformer.transformers_[0][2]))
passthrough_features = list(transformer.named_transformers_['remainder'].get_feature_names_out())
# Combine the feature names in the order they appear in the processed data
# The order should be passthrough features followed by one-hot encoded features
pipeline_columns = passthrough_features + ohe_features
# Note: This assumes the numerical columns (handled by StandardScaler in the pipeline)
# are the 'remainder' and appear before the one-hot encoded columns.
# If the order is different in your pipeline, adjust the concatenation accordingly.
except Exception as e:
full_pipeline = None
print(f"Error loading pipeline: {e}")
@app.route('/predict', methods=['POST'])
def predict():
if full_pipeline is None:
return jsonify({'error': 'Model not loaded'}), 500
try:
data = request.get_json(force=True)
# Convert input data to DataFrame
input_df = pd.DataFrame([data])
# Create 'Store_Age' feature if not present
if 'Store_Establishment_Year' in input_df.columns and 'Store_Age' not in input_df.columns:
input_df['Store_Age'] = 2025 - input_df['Store_Establishment_Year']
input_df = input_df.drop('Store_Establishment_Year', axis=1) # Drop the original year column
# Ensure the input DataFrame has the same columns as the training data and in the same order
# We need to reindex the input_df to match the columns the pipeline expects
# This requires knowing the exact columns expected by the pipeline after preprocessing
# A safer way is to define the expected columns based on the pipeline structure
# Reorder columns to match the order expected by the pipeline
# This assumes all expected columns are present in the input data
# We need the list of columns the pipeline expects after preprocessing
# This list was constructed during pipeline loading
# Ensure all expected columns are in the input data, add missing ones with default values (e.g., 0 for one-hot encoded)
for col in pipeline_columns:
if col not in input_df.columns:
input_df[col] = 0 # Assuming 0 is a safe default for missing features
# Reorder columns to match the pipeline's expected order
input_df = input_df[pipeline_columns]
# Make prediction
prediction = full_pipeline.predict(input_df)
# Return prediction as JSON
return jsonify({'prediction': prediction.tolist()})
except Exception as e:
return jsonify({'error': str(e)}), 400
if __name__ == '__main__':
# Create the backend_app directory if it doesn't exist
import os
os.makedirs('backend_app', exist_ok=True)
# Ensure deployment_files directory exists
os.makedirs('deployment_files', exist_ok=True)
app.run(debug=True, host='0.0.0.0', port=5000)
|