Spaces:

Dattaluri
/

Great_Learning

Runtime error

App Files Files Community

Great_Learning / app.py

Dattaluri

Upload backend app files

8f9892a verified 4 months ago

raw

history blame contribute delete

3.53 kB

	from flask import Flask, request, jsonify
	import joblib
	import pandas as pd
	import numpy as np

	app = Flask(__name__)

	# Load the serialized full pipeline
	try:
	full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib')

	# Get the column names from the preprocessor step
	# This handles both one-hot encoded and passthrough columns
	# We need to access the transformers and their get_feature_names_out methods
	transformer = full_pipeline.named_steps['preprocessor']
	ohe_features = list(transformer.named_transformers_['onehotencoder'].get_feature_names_out(transformer.transformers_[0][2]))
	passthrough_features = list(transformer.named_transformers_['remainder'].get_feature_names_out())

	# Combine the feature names in the order they appear in the processed data
	# The order should be passthrough features followed by one-hot encoded features
	pipeline_columns = passthrough_features + ohe_features

	# Note: This assumes the numerical columns (handled by StandardScaler in the pipeline)
	# are the 'remainder' and appear before the one-hot encoded columns.
	# If the order is different in your pipeline, adjust the concatenation accordingly.

	except Exception as e:
	full_pipeline = None
	print(f"Error loading pipeline: {e}")

	@app.route('/predict', methods=['POST'])
	def predict():
	if full_pipeline is None:
	return jsonify({'error': 'Model not loaded'}), 500

	try:
	data = request.get_json(force=True)

	# Convert input data to DataFrame
	input_df = pd.DataFrame([data])

	# Create 'Store_Age' feature if not present
	if 'Store_Establishment_Year' in input_df.columns and 'Store_Age' not in input_df.columns:
	input_df['Store_Age'] = 2025 - input_df['Store_Establishment_Year']
	input_df = input_df.drop('Store_Establishment_Year', axis=1) # Drop the original year column


	# Ensure the input DataFrame has the same columns as the training data and in the same order
	# We need to reindex the input_df to match the columns the pipeline expects
	# This requires knowing the exact columns expected by the pipeline after preprocessing
	# A safer way is to define the expected columns based on the pipeline structure

	# Reorder columns to match the order expected by the pipeline
	# This assumes all expected columns are present in the input data
	# We need the list of columns the pipeline expects after preprocessing
	# This list was constructed during pipeline loading

	# Ensure all expected columns are in the input data, add missing ones with default values (e.g., 0 for one-hot encoded)
	for col in pipeline_columns:
	if col not in input_df.columns:
	input_df[col] = 0 # Assuming 0 is a safe default for missing features

	# Reorder columns to match the pipeline's expected order
	input_df = input_df[pipeline_columns]


	# Make prediction
	prediction = full_pipeline.predict(input_df)

	# Return prediction as JSON
	return jsonify({'prediction': prediction.tolist()})

	except Exception as e:
	return jsonify({'error': str(e)}), 400

	if __name__ == '__main__':
	# Create the backend_app directory if it doesn't exist
	import os
	os.makedirs('backend_app', exist_ok=True)
	# Ensure deployment_files directory exists
	os.makedirs('deployment_files', exist_ok=True)
	app.run(debug=True, host='0.0.0.0', port=5000)