Spaces:

manasranjanpani
/

ExtraaLearnCustomerPredictionBackend

Sleeping

File size: 5,632 Bytes

# Import necessary libraries
import numpy as np
import joblib
import pandas as pd
from flask import Flask, request, jsonify

# Initialize the Flask application
extraaLearn_predictor_api = Flask("ExtraaLearn paid customers Predictor")

# Load the trained machine learning model
model = joblib.load("extraaLearn_model_prediction_model_v1_0.joblib")

# -----------------------------
# Feature mapping and expected data types
# -----------------------------
feature_mapping = {  
    "age": "age",  # Should be integer
    "currentOccupation": "current_occupation",  # Categorical
    "firstInteraction": "first_interaction",  # Categorical
    "profileCompleted": "profile_completed",  # Categorical
    "websiteVisits": "website_visits",  # Should be integer
    "timeSpentOnWebsite": "time_spent_on_website",  # Should be integer
    "pageViewsPerVisit": "page_views_per_visit",  # Should be integer
    "lastActivity": "last_activity",  # Categorical
    "printMediaType1": "print_media_type1",  # Categorical
    "printMediaType2": "print_media_type2",  # Categorical
    "digitalMedia": "digital_media",  # Categorical
    "educationalChannels": "educational_channels",  # Categorical
    "referral": "referral",  # Categorical    
}

# Expected data types for validation
expected_types = {    
    "age": int,
    "currentOccupation": str,
    "firstInteraction": str,
    "profileCompleted": str,
    "websiteVisits": int,
    "timeSpentOnWebsite": int,
    "pageViewsPerVisit": int,
    "lastActivity": str,
    "printMediaType1": str,
    "printMediaType2": str,
    "digitalMedia": str,
    "educationalChannels": str,
    "referral": str   
}

# -----------------------------
# Data validation and preprocessing
# -----------------------------
def validate_and_preprocess_input(data):
    """Validate input data and convert to correct types"""
    validated_data = {}
    
    for field, expected_type in expected_types.items():
        if field not in data:
            raise ValueError(f"Missing required field: {field}")
        
        value = data[field]
        
        # Convert to expected type
        try:
            if expected_type == int:
                validated_data[field] = int(value)
            elif expected_type == str:
                validated_data[field] = str(value)
            else:
                validated_data[field] = value
        except (ValueError, TypeError) as e:
            raise ValueError(f"Invalid type for {field}: expected {expected_type.__name__}, got {type(value).__name__}")
    
    return validated_data

# -----------------------------
# Routes
# -----------------------------

@extraaLearn_predictor_api.get("/ping")
def ping():
    """Simple health check endpoint."""
    return jsonify({"status": "ok"})

@extraaLearn_predictor_api.get("/")
def home():
    """Welcome message for the API."""
    return "Welcome to the ExtraaLearn customers Prediction API!"

@extraaLearn_predictor_api.post("/v1/customers")
def predict_sales_revenue():
    """
    Handles POST requests to predict sales revenue for a single product/store.
    """
    try:
        # Get the JSON data from the request body
        property_data = request.get_json()
        
        if not property_data:
            return jsonify({"error": "No JSON data provided"}), 400

        # Validate and preprocess input
        validated_data = validate_and_preprocess_input(property_data)

        # Map input keys to model feature names
        sample = {}
        for api_key, model_key in feature_mapping.items():
            sample[model_key] = validated_data[api_key]

        # Convert the extracted data into a Pandas DataFrame
        input_data = pd.DataFrame([sample])

        # Debug: Print the input data for inspection
        print("Input data types:", input_data.dtypes)
        print("Input data:", input_data)

        # Make prediction
        predicted_customer = model.predict(input_data)[0]
        
        # Convert numpy types to Python native types for JSON serialization
        if hasattr(predicted_customer, 'item'):
            predicted_customer = predicted_customer.item()
        predicted_customer = float(predicted_customer)

        return jsonify({
            "predicted_customer_status": predicted_customer,
            "status": "success"
        })

    except ValueError as e:
        return jsonify({"error": str(e)}), 400
    except Exception as e:
        return jsonify({"error": f"Prediction failed: {str(e)}"}), 500

# Batch prediction (updated with similar fixes)
@extraaLearn_predictor_api.post("/v1/customersbatch")
def predict_sales_batch():
    """
    Handles POST requests for batch prediction.
    Expects a CSV file with multiple records.
    """
    try:
        file = request.files.get("file")
        if file is None:
            return jsonify({"error": "CSV file is required"}), 400

        # Read the CSV file into a Pandas DataFrame
        input_data = pd.read_csv(file)

        # Make predictions
        predictions = model.predict(input_data)
        
        # Convert numpy types to Python native types
        predicted_customers = [float(p) for p in predictions]

        if "id" in input_data.columns:
            property_ids = input_data["id"].astype(str).tolist()
            output_dict = dict(zip(property_ids, predicted_customers))
        else:
            output_dict = {"predictions": predicted_customers}

        return jsonify(output_dict)

    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)