import os
import joblib
import pandas as pd

# must import Flask, request, jsonify before using them
from flask import Flask, request, jsonify


# ----------------------------
# Config / Model path
# ----------------------------
MODEL_PATH ="superKart_price_prediction_model_v1_0.joblib"

# ----------------------------
# Initialize app and load model
# ----------------------------
app = Flask("SuperKart Sales Predictor")

# Load model
if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"Model file not found at {MODEL_PATH}. ")
model = joblib.load(MODEL_PATH)

# These are the raw input feature names before preprocessing
NUMERIC_COLS = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Age']
CATEGORICAL_COLS = ['Product_Sugar_Content', 'Product_Type', 'Store_Size',
                    'Store_Location_City_Type', 'Store_Type']

EXPECTED_COLUMNS = NUMERIC_COLS + CATEGORICAL_COLS

# ----------------------------
# Utility function
# ----------------------------
def validate_and_prepare_input(df: pd.DataFrame):
    """
    Ensure the dataframe has the required columns. If Store_Establishment_Year
    is provided instead of Store_Age, it will be converted to Store_Age.
    Returns the prepared dataframe and a list of missing columns (empty if ok).
    """
    df = df.copy()
    missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]

    # Code for if user provided Store_Establishment_Year, convert to Store_Age
    if 'Store_Establishment_Year' in df.columns and 'Store_Age' in missing:
        df['Store_Age'] = 2025 - df['Store_Establishment_Year']
        missing = [c for c in EXPECTED_COLUMNS if c not in df.columns]

    return df, missing

# ----------------------------
# Routes
# ----------------------------
@app.get("/")
def home():
    """Health check / Landing page"""
    return jsonify({
        "service": "SuperKart Sales Predictor",
        "status": "running"
    })

@app.post("/v1/predict")
def predict_single():
    """
    Predict sales for a single product-store record.

    Expected JSON schema (example):
    {
      "Product_Weight": 12.5,
      "Product_Allocated_Area": 0.056,
      "Product_MRP": 149.0,
      "Store_Age": 16,
      "Product_Sugar_Content": "Low Sugar",
      "Product_Type": "Dairy",
      "Store_Size": "High",
      "Store_Location_City_Type": "Tier 1",
      "Store_Type": "Supermarket Type 1"
    }
    """
    try:
        data = request.get_json(force=True)
        if not isinstance(data, dict):
            return jsonify({"error": "Input JSON must be an object/dict"}), 400

        # Convert to DataFrame
        input_df = pd.DataFrame([data])

        # Validate and prepare
        input_df, missing = validate_and_prepare_input(input_df)
        if missing:
            return jsonify({"error": "Missing required columns", "missing_columns": missing}), 400

        # Keep only expected columns (ignore extra fields)
        input_df = input_df[EXPECTED_COLUMNS]

        # Predict using pipeline (pipeline will apply preprocessors)
        pred = model.predict(input_df)
        prediction_value = float(pred[0])

        return jsonify({"prediction": prediction_value}), 200

    except Exception as e:
        return jsonify({"error": "Exception during prediction", "details": str(e)}), 500

@app.post("/v1/predict_batch")
def predict_batch():
    """
    Predict sales for a batch of records supplied as a CSV file upload.
    The CSV should contain the expected columns (or Store_Establishment_Year
    instead of Store_Age which will be converted automatically).
    """
    try:
        if 'file' not in request.files:
            return jsonify({"error": "No file part in the request. Upload a CSV file with key 'file'."}), 400

        file = request.files['file']
        if file.filename == "":
            return jsonify({"error": "Empty filename. Please upload a CSV file."}), 400

        # Read CSV
        input_df = pd.read_csv(file)

        input_df, missing = validate_and_prepare_input(input_df)
        if missing:
            return jsonify({"error": "Missing required columns in uploaded CSV", "missing_columns": missing}), 400

        # Keep only expected columns and predict
        input_df = input_df[EXPECTED_COLUMNS]
        preds = model.predict(input_df)

        # Return predictions aligned with original input index
        output = input_df.copy()
        output['predicted_Product_Store_Sales_Total'] = preds.astype(float)

        # Convert to records for JSON response (limit size if necessary)
        results = output.reset_index().to_dict(orient='records')

        return jsonify({"predictions_count": len(results), "predictions": results}), 200

    except Exception as e:
        return jsonify({"error": "Exception during batch prediction", "details": str(e)}), 500

# ----------------------------
# Run app
# ----------------------------
if __name__ == "__main__":
    # Listen on 0.0.0.0 for containerized environments. In dev, use port 7860 or 5000 as required.
    app.run(host="0.0.0.0", port=7860, debug=False)