from flask import Flask, request, jsonify import joblib import pandas as pd import numpy as np app = Flask(__name__) # Load the serialized full pipeline try: full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib') # Get the column names from the preprocessor step # This handles both one-hot encoded and passthrough columns # We need to access the transformers and their get_feature_names_out methods transformer = full_pipeline.named_steps['preprocessor'] ohe_features = list(transformer.named_transformers_['onehotencoder'].get_feature_names_out(transformer.transformers_[0][2])) passthrough_features = list(transformer.named_transformers_['remainder'].get_feature_names_out()) # Combine the feature names in the order they appear in the processed data # The order should be passthrough features followed by one-hot encoded features pipeline_columns = passthrough_features + ohe_features # Note: This assumes the numerical columns (handled by StandardScaler in the pipeline) # are the 'remainder' and appear before the one-hot encoded columns. # If the order is different in your pipeline, adjust the concatenation accordingly. except Exception as e: full_pipeline = None print(f"Error loading pipeline: {e}") @app.route('/predict', methods=['POST']) def predict(): if full_pipeline is None: return jsonify({'error': 'Model not loaded'}), 500 try: data = request.get_json(force=True) # Convert input data to DataFrame input_df = pd.DataFrame([data]) # Create 'Store_Age' feature if not present if 'Store_Establishment_Year' in input_df.columns and 'Store_Age' not in input_df.columns: input_df['Store_Age'] = 2025 - input_df['Store_Establishment_Year'] input_df = input_df.drop('Store_Establishment_Year', axis=1) # Drop the original year column # Ensure the input DataFrame has the same columns as the training data and in the same order # We need to reindex the input_df to match the columns the pipeline expects # This requires knowing the exact columns expected by the pipeline after preprocessing # A safer way is to define the expected columns based on the pipeline structure # Reorder columns to match the order expected by the pipeline # This assumes all expected columns are present in the input data # We need the list of columns the pipeline expects after preprocessing # This list was constructed during pipeline loading # Ensure all expected columns are in the input data, add missing ones with default values (e.g., 0 for one-hot encoded) for col in pipeline_columns: if col not in input_df.columns: input_df[col] = 0 # Assuming 0 is a safe default for missing features # Reorder columns to match the pipeline's expected order input_df = input_df[pipeline_columns] # Make prediction prediction = full_pipeline.predict(input_df) # Return prediction as JSON return jsonify({'prediction': prediction.tolist()}) except Exception as e: return jsonify({'error': str(e)}), 400 if __name__ == '__main__': # Create the backend_app directory if it doesn't exist import os os.makedirs('backend_app', exist_ok=True) # Ensure deployment_files directory exists os.makedirs('deployment_files', exist_ok=True) app.run(debug=True, host='0.0.0.0', port=5000)