Spaces:
Runtime error
Runtime error
| from flask import Flask, request, jsonify | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| app = Flask(__name__) | |
| # Load the serialized full pipeline | |
| try: | |
| full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib') | |
| # Get the column names from the preprocessor step | |
| # This handles both one-hot encoded and passthrough columns | |
| # We need to access the transformers and their get_feature_names_out methods | |
| transformer = full_pipeline.named_steps['preprocessor'] | |
| ohe_features = list(transformer.named_transformers_['onehotencoder'].get_feature_names_out(transformer.transformers_[0][2])) | |
| passthrough_features = list(transformer.named_transformers_['remainder'].get_feature_names_out()) | |
| # Combine the feature names in the order they appear in the processed data | |
| # The order should be passthrough features followed by one-hot encoded features | |
| pipeline_columns = passthrough_features + ohe_features | |
| # Note: This assumes the numerical columns (handled by StandardScaler in the pipeline) | |
| # are the 'remainder' and appear before the one-hot encoded columns. | |
| # If the order is different in your pipeline, adjust the concatenation accordingly. | |
| except Exception as e: | |
| full_pipeline = None | |
| print(f"Error loading pipeline: {e}") | |
| def predict(): | |
| if full_pipeline is None: | |
| return jsonify({'error': 'Model not loaded'}), 500 | |
| try: | |
| data = request.get_json(force=True) | |
| # Convert input data to DataFrame | |
| input_df = pd.DataFrame([data]) | |
| # Create 'Store_Age' feature if not present | |
| if 'Store_Establishment_Year' in input_df.columns and 'Store_Age' not in input_df.columns: | |
| input_df['Store_Age'] = 2025 - input_df['Store_Establishment_Year'] | |
| input_df = input_df.drop('Store_Establishment_Year', axis=1) # Drop the original year column | |
| # Ensure the input DataFrame has the same columns as the training data and in the same order | |
| # We need to reindex the input_df to match the columns the pipeline expects | |
| # This requires knowing the exact columns expected by the pipeline after preprocessing | |
| # A safer way is to define the expected columns based on the pipeline structure | |
| # Reorder columns to match the order expected by the pipeline | |
| # This assumes all expected columns are present in the input data | |
| # We need the list of columns the pipeline expects after preprocessing | |
| # This list was constructed during pipeline loading | |
| # Ensure all expected columns are in the input data, add missing ones with default values (e.g., 0 for one-hot encoded) | |
| for col in pipeline_columns: | |
| if col not in input_df.columns: | |
| input_df[col] = 0 # Assuming 0 is a safe default for missing features | |
| # Reorder columns to match the pipeline's expected order | |
| input_df = input_df[pipeline_columns] | |
| # Make prediction | |
| prediction = full_pipeline.predict(input_df) | |
| # Return prediction as JSON | |
| return jsonify({'prediction': prediction.tolist()}) | |
| except Exception as e: | |
| return jsonify({'error': str(e)}), 400 | |
| if __name__ == '__main__': | |
| # Create the backend_app directory if it doesn't exist | |
| import os | |
| os.makedirs('backend_app', exist_ok=True) | |
| # Ensure deployment_files directory exists | |
| os.makedirs('deployment_files', exist_ok=True) | |
| app.run(debug=True, host='0.0.0.0', port=5000) | |