manasranjanpani's picture
Upload folder using huggingface_hub
2872a39 verified
# Import necessary libraries
import numpy as np
import joblib
import pandas as pd
from flask import Flask, request, jsonify
# Initialize the Flask application
extraaLearn_predictor_api = Flask("ExtraaLearn paid customers Predictor")
# Load the trained machine learning model
model = joblib.load("extraaLearn_model_prediction_model_v1_0.joblib")
# -----------------------------
# Feature mapping and expected data types
# -----------------------------
feature_mapping = {
"age": "age", # Should be integer
"currentOccupation": "current_occupation", # Categorical
"firstInteraction": "first_interaction", # Categorical
"profileCompleted": "profile_completed", # Categorical
"websiteVisits": "website_visits", # Should be integer
"timeSpentOnWebsite": "time_spent_on_website", # Should be integer
"pageViewsPerVisit": "page_views_per_visit", # Should be integer
"lastActivity": "last_activity", # Categorical
"printMediaType1": "print_media_type1", # Categorical
"printMediaType2": "print_media_type2", # Categorical
"digitalMedia": "digital_media", # Categorical
"educationalChannels": "educational_channels", # Categorical
"referral": "referral", # Categorical
}
# Expected data types for validation
expected_types = {
"age": int,
"currentOccupation": str,
"firstInteraction": str,
"profileCompleted": str,
"websiteVisits": int,
"timeSpentOnWebsite": int,
"pageViewsPerVisit": int,
"lastActivity": str,
"printMediaType1": str,
"printMediaType2": str,
"digitalMedia": str,
"educationalChannels": str,
"referral": str
}
# -----------------------------
# Data validation and preprocessing
# -----------------------------
def validate_and_preprocess_input(data):
"""Validate input data and convert to correct types"""
validated_data = {}
for field, expected_type in expected_types.items():
if field not in data:
raise ValueError(f"Missing required field: {field}")
value = data[field]
# Convert to expected type
try:
if expected_type == int:
validated_data[field] = int(value)
elif expected_type == str:
validated_data[field] = str(value)
else:
validated_data[field] = value
except (ValueError, TypeError) as e:
raise ValueError(f"Invalid type for {field}: expected {expected_type.__name__}, got {type(value).__name__}")
return validated_data
# -----------------------------
# Routes
# -----------------------------
@extraaLearn_predictor_api.get("/ping")
def ping():
"""Simple health check endpoint."""
return jsonify({"status": "ok"})
@extraaLearn_predictor_api.get("/")
def home():
"""Welcome message for the API."""
return "Welcome to the ExtraaLearn customers Prediction API!"
@extraaLearn_predictor_api.post("/v1/customers")
def predict_sales_revenue():
"""
Handles POST requests to predict sales revenue for a single product/store.
"""
try:
# Get the JSON data from the request body
property_data = request.get_json()
if not property_data:
return jsonify({"error": "No JSON data provided"}), 400
# Validate and preprocess input
validated_data = validate_and_preprocess_input(property_data)
# Map input keys to model feature names
sample = {}
for api_key, model_key in feature_mapping.items():
sample[model_key] = validated_data[api_key]
# Convert the extracted data into a Pandas DataFrame
input_data = pd.DataFrame([sample])
# Debug: Print the input data for inspection
print("Input data types:", input_data.dtypes)
print("Input data:", input_data)
# Make prediction
predicted_customer = model.predict(input_data)[0]
# Convert numpy types to Python native types for JSON serialization
if hasattr(predicted_customer, 'item'):
predicted_customer = predicted_customer.item()
predicted_customer = float(predicted_customer)
return jsonify({
"predicted_customer_status": predicted_customer,
"status": "success"
})
except ValueError as e:
return jsonify({"error": str(e)}), 400
except Exception as e:
return jsonify({"error": f"Prediction failed: {str(e)}"}), 500
# Batch prediction (updated with similar fixes)
@extraaLearn_predictor_api.post("/v1/customersbatch")
def predict_sales_batch():
"""
Handles POST requests for batch prediction.
Expects a CSV file with multiple records.
"""
try:
file = request.files.get("file")
if file is None:
return jsonify({"error": "CSV file is required"}), 400
# Read the CSV file into a Pandas DataFrame
input_data = pd.read_csv(file)
# Make predictions
predictions = model.predict(input_data)
# Convert numpy types to Python native types
predicted_customers = [float(p) for p in predictions]
if "id" in input_data.columns:
property_ids = input_data["id"].astype(str).tolist()
output_dict = dict(zip(property_ids, predicted_customers))
else:
output_dict = {"predictions": predicted_customers}
return jsonify(output_dict)
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)