from flask import Flask, request, jsonify
import joblib
import pandas as pd
import numpy as np

app = Flask(__name__)

# Load the serialized full pipeline
try:
    full_pipeline = joblib.load('deployment_files/SuperKart_model_v1_0.joblib')

    # Get the column names from the preprocessor step
    # This handles both one-hot encoded and passthrough columns
    # We need to access the transformers and their get_feature_names_out methods
    transformer = full_pipeline.named_steps['preprocessor']
    ohe_features = list(transformer.named_transformers_['onehotencoder'].get_feature_names_out(transformer.transformers_[0][2]))
    passthrough_features = list(transformer.named_transformers_['remainder'].get_feature_names_out())

    # Combine the feature names in the order they appear in the processed data
    # The order should be passthrough features followed by one-hot encoded features
    pipeline_columns = passthrough_features + ohe_features

    # Note: This assumes the numerical columns (handled by StandardScaler in the pipeline)
    # are the 'remainder' and appear before the one-hot encoded columns.
    # If the order is different in your pipeline, adjust the concatenation accordingly.

except Exception as e:
    full_pipeline = None
    print(f"Error loading pipeline: {e}")

@app.route('/predict', methods=['POST'])
def predict():
    if full_pipeline is None:
        return jsonify({'error': 'Model not loaded'}), 500

    try:
        data = request.get_json(force=True)

        # Convert input data to DataFrame
        input_df = pd.DataFrame([data])

        # Create 'Store_Age' feature if not present
        if 'Store_Establishment_Year' in input_df.columns and 'Store_Age' not in input_df.columns:
             input_df['Store_Age'] = 2025 - input_df['Store_Establishment_Year']
             input_df = input_df.drop('Store_Establishment_Year', axis=1) # Drop the original year column


        # Ensure the input DataFrame has the same columns as the training data and in the same order
        # We need to reindex the input_df to match the columns the pipeline expects
        # This requires knowing the exact columns expected by the pipeline after preprocessing
        # A safer way is to define the expected columns based on the pipeline structure

        # Reorder columns to match the order expected by the pipeline
        # This assumes all expected columns are present in the input data
        # We need the list of columns the pipeline expects after preprocessing
        # This list was constructed during pipeline loading

        # Ensure all expected columns are in the input data, add missing ones with default values (e.g., 0 for one-hot encoded)
        for col in pipeline_columns:
            if col not in input_df.columns:
                input_df[col] = 0 # Assuming 0 is a safe default for missing features

        # Reorder columns to match the pipeline's expected order
        input_df = input_df[pipeline_columns]


        # Make prediction
        prediction = full_pipeline.predict(input_df)

        # Return prediction as JSON
        return jsonify({'prediction': prediction.tolist()})

    except Exception as e:
        return jsonify({'error': str(e)}), 400

if __name__ == '__main__':
    # Create the backend_app directory if it doesn't exist
    import os
    os.makedirs('backend_app', exist_ok=True)
    # Ensure deployment_files directory exists
    os.makedirs('deployment_files', exist_ok=True)
    app.run(debug=True, host='0.0.0.0', port=5000)