Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,19 +1,21 @@
|
|
| 1 |
# Import necessary libraries
|
| 2 |
import numpy as np
|
| 3 |
-
import joblib
|
| 4 |
-
import pandas as pd
|
| 5 |
-
from flask import Flask, request, jsonify
|
| 6 |
|
| 7 |
# Initialize the Flask application
|
| 8 |
extraaLearn_predictor_api = Flask("ExtraaLearn paid customers Predictor")
|
| 9 |
|
| 10 |
# Load the trained machine learning model
|
| 11 |
-
model = joblib.load("extraaLearn_model_prediction_model_v1_0
|
|
|
|
| 12 |
|
| 13 |
# -----------------------------
|
| 14 |
-
# Feature mapping
|
| 15 |
# -----------------------------
|
| 16 |
-
feature_mapping = {
|
|
|
|
| 17 |
"age": "age",
|
| 18 |
"currentOccupation": "current_occupation",
|
| 19 |
"firstInteraction": "first_interaction",
|
|
@@ -26,97 +28,153 @@ feature_mapping = {
|
|
| 26 |
"printMediaType2": "print_media_type2",
|
| 27 |
"digitalMedia": "digital_media",
|
| 28 |
"educationalChannels": "educational_channels",
|
| 29 |
-
"referral": "referral",
|
|
|
|
| 30 |
}
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# -----------------------------
|
| 34 |
-
# Routes
|
| 35 |
# -----------------------------
|
| 36 |
|
| 37 |
-
# Health check
|
| 38 |
@extraaLearn_predictor_api.get("/ping")
|
| 39 |
def ping():
|
| 40 |
-
"""Simple health check endpoint."""
|
| 41 |
return jsonify({"status": "ok"})
|
| 42 |
|
| 43 |
-
|
| 44 |
-
# Home route
|
| 45 |
@extraaLearn_predictor_api.get("/")
|
| 46 |
def home():
|
| 47 |
-
"""Welcome message for the API."""
|
| 48 |
return "Welcome to the ExtraaLearn customers Prediction API!"
|
| 49 |
|
| 50 |
-
|
| 51 |
-
# Single prediction
|
| 52 |
@extraaLearn_predictor_api.post("/v1/customers")
|
| 53 |
-
def
|
| 54 |
"""
|
| 55 |
Handles POST requests to predict sales revenue for a single product/store.
|
| 56 |
-
Expects a JSON payload with features.
|
| 57 |
-
"""
|
| 58 |
-
# try:
|
| 59 |
-
# Get the JSON data from the request body
|
| 60 |
-
property_data = request.get_json()
|
| 61 |
-
|
| 62 |
-
# Map input keys to model feature names
|
| 63 |
-
sample = {}
|
| 64 |
-
for api_key, model_key in feature_mapping.items():
|
| 65 |
-
if api_key not in property_data:
|
| 66 |
-
return jsonify({"error": f"Missing required field: {api_key}"}), 400
|
| 67 |
-
sample[model_key] = property_data[api_key]
|
| 68 |
-
|
| 69 |
-
# Convert the extracted data into a Pandas DataFrame
|
| 70 |
-
input_data = pd.DataFrame([sample])
|
| 71 |
-
|
| 72 |
-
# Make prediction (log-transformed sales total)
|
| 73 |
-
predicted_customer = model.predict(input_data)[0]
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
return jsonify({"Predicted_customer": predicted_customer})
|
| 78 |
-
|
| 79 |
-
# except Exception as e:
|
| 80 |
-
# return jsonify({"error": str(e)}), 500
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
# Batch prediction
|
| 84 |
-
@extraaLearn_predictor_api.post("/v1/customersbatch")
|
| 85 |
-
def predict_sales_batch():
|
| 86 |
-
"""
|
| 87 |
-
Handles POST requests for batch prediction.
|
| 88 |
-
Expects a CSV file with multiple records.
|
| 89 |
"""
|
| 90 |
try:
|
| 91 |
-
# Get the
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
except Exception as e:
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
-
# Run the Flask application in debug mode if this script is executed directly
|
| 121 |
if __name__ == "__main__":
|
| 122 |
extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)
|
|
|
|
| 1 |
# Import necessary libraries
|
| 2 |
import numpy as np
|
| 3 |
+
import joblib
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from flask import Flask, request, jsonify
|
| 6 |
|
| 7 |
# Initialize the Flask application
|
| 8 |
extraaLearn_predictor_api = Flask("ExtraaLearn paid customers Predictor")
|
| 9 |
|
| 10 |
# Load the trained machine learning model
|
| 11 |
+
model = joblib.load("extraaLearn_model_prediction_model_v1_0
|
| 12 |
+
.joblib")
|
| 13 |
|
| 14 |
# -----------------------------
|
| 15 |
+
# Feature mapping
|
| 16 |
# -----------------------------
|
| 17 |
+
feature_mapping = {
|
| 18 |
+
"id": "ID",
|
| 19 |
"age": "age",
|
| 20 |
"currentOccupation": "current_occupation",
|
| 21 |
"firstInteraction": "first_interaction",
|
|
|
|
| 28 |
"printMediaType2": "print_media_type2",
|
| 29 |
"digitalMedia": "digital_media",
|
| 30 |
"educationalChannels": "educational_channels",
|
| 31 |
+
"referral": "referral",
|
| 32 |
+
"status": "status"
|
| 33 |
}
|
| 34 |
|
| 35 |
+
# -----------------------------
|
| 36 |
+
# Debug function to check model expectations
|
| 37 |
+
# -----------------------------
|
| 38 |
+
def debug_model_expectations():
|
| 39 |
+
"""Debug what the model expects for features"""
|
| 40 |
+
print("=== MODEL DEBUG INFORMATION ===")
|
| 41 |
+
|
| 42 |
+
# Check if model has feature names
|
| 43 |
+
if hasattr(model, 'feature_names_in_'):
|
| 44 |
+
print(f"Model expects these features: {list(model.feature_names_in_)}")
|
| 45 |
+
else:
|
| 46 |
+
print("Model doesn't have explicit feature names")
|
| 47 |
+
|
| 48 |
+
# If it's a pipeline, check preprocessing steps
|
| 49 |
+
if hasattr(model, 'named_steps'):
|
| 50 |
+
print("Model is a pipeline with steps:")
|
| 51 |
+
for step_name, step in model.named_steps.items():
|
| 52 |
+
print(f" - {step_name}: {type(step)}")
|
| 53 |
+
|
| 54 |
+
# Check for column transformers or encoders
|
| 55 |
+
if hasattr(step, 'transformers_'):
|
| 56 |
+
for name, transformer, columns in step.transformers_:
|
| 57 |
+
print(f" Transformer '{name}' handles columns: {columns}")
|
| 58 |
+
if hasattr(transformer, 'categories_'):
|
| 59 |
+
print(f" Categories: {transformer.categories_}")
|
| 60 |
+
|
| 61 |
+
print("=== END MODEL DEBUG ===")
|
| 62 |
+
|
| 63 |
+
# Run debug on startup
|
| 64 |
+
debug_model_expectations()
|
| 65 |
|
| 66 |
# -----------------------------
|
| 67 |
+
# Routes with enhanced debugging
|
| 68 |
# -----------------------------
|
| 69 |
|
|
|
|
| 70 |
@extraaLearn_predictor_api.get("/ping")
|
| 71 |
def ping():
|
|
|
|
| 72 |
return jsonify({"status": "ok"})
|
| 73 |
|
|
|
|
|
|
|
| 74 |
@extraaLearn_predictor_api.get("/")
|
| 75 |
def home():
|
|
|
|
| 76 |
return "Welcome to the ExtraaLearn customers Prediction API!"
|
| 77 |
|
|
|
|
|
|
|
| 78 |
@extraaLearn_predictor_api.post("/v1/customers")
|
| 79 |
+
def predict_sales_revenue():
|
| 80 |
"""
|
| 81 |
Handles POST requests to predict sales revenue for a single product/store.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
"""
|
| 83 |
try:
|
| 84 |
+
# Get the JSON data from the request body
|
| 85 |
+
property_data = request.get_json()
|
| 86 |
+
|
| 87 |
+
if not property_data:
|
| 88 |
+
return jsonify({"error": "No JSON data provided"}), 400
|
| 89 |
+
|
| 90 |
+
print("=== RECEIVED PAYLOAD ===")
|
| 91 |
+
for key, value in property_data.items():
|
| 92 |
+
print(f" {key}: {value} (type: {type(value)})")
|
| 93 |
+
|
| 94 |
+
# Check for missing fields
|
| 95 |
+
missing_fields = []
|
| 96 |
+
for api_key in feature_mapping.keys():
|
| 97 |
+
if api_key not in property_data:
|
| 98 |
+
missing_fields.append(api_key)
|
| 99 |
+
|
| 100 |
+
if missing_fields:
|
| 101 |
+
return jsonify({
|
| 102 |
+
"error": f"Missing required fields: {missing_fields}",
|
| 103 |
+
"required_fields": list(feature_mapping.keys())
|
| 104 |
+
}), 400
|
| 105 |
+
|
| 106 |
+
# Map input keys to model feature names
|
| 107 |
+
sample = {}
|
| 108 |
+
for api_key, model_key in feature_mapping.items():
|
| 109 |
+
sample[model_key] = property_data[api_key]
|
| 110 |
+
|
| 111 |
+
# Convert to DataFrame
|
| 112 |
+
input_data = pd.DataFrame([sample])
|
| 113 |
+
|
| 114 |
+
print("=== DATA SENT TO MODEL ===")
|
| 115 |
+
print("DataFrame dtypes:")
|
| 116 |
+
for col in input_data.columns:
|
| 117 |
+
print(f" {col}: {input_data[col].dtype} - value: {input_data[col].iloc[0]}")
|
| 118 |
+
|
| 119 |
+
# Make prediction
|
| 120 |
+
print("=== MAKING PREDICTION ===")
|
| 121 |
+
predicted_customer = model.predict(input_data)[0]
|
| 122 |
+
|
| 123 |
+
# Convert to serializable type
|
| 124 |
+
if hasattr(predicted_customer, 'item'):
|
| 125 |
+
predicted_customer = predicted_customer.item()
|
| 126 |
+
predicted_customer = float(predicted_customer)
|
| 127 |
+
|
| 128 |
+
print(f"=== PREDICTION SUCCESS: {predicted_customer} ===")
|
| 129 |
+
|
| 130 |
+
return jsonify({
|
| 131 |
+
"Predicted_Sales": predicted_customer,
|
| 132 |
+
"status": "success",
|
| 133 |
+
"input_received": {
|
| 134 |
+
"fields_received": list(property_data.keys()),
|
| 135 |
+
"fields_processed": list(sample.keys())
|
| 136 |
+
}
|
| 137 |
+
})
|
| 138 |
|
| 139 |
except Exception as e:
|
| 140 |
+
print(f"=== PREDICTION FAILED ===")
|
| 141 |
+
print(f"Error type: {type(e).__name__}")
|
| 142 |
+
print(f"Error message: {str(e)}")
|
| 143 |
+
|
| 144 |
+
# Try to identify which feature might be causing the issue
|
| 145 |
+
error_msg = str(e)
|
| 146 |
+
if "unknown category" in error_msg.lower():
|
| 147 |
+
return jsonify({
|
| 148 |
+
"error": "Unknown category value provided",
|
| 149 |
+
"details": "One of your categorical values doesn't match what the model was trained on",
|
| 150 |
+
"debug_info": "Check the server logs for specific category issues"
|
| 151 |
+
}), 400
|
| 152 |
+
elif "isnan" in error_msg:
|
| 153 |
+
return jsonify({
|
| 154 |
+
"error": "Data type mismatch",
|
| 155 |
+
"details": "The model encountered unexpected data types",
|
| 156 |
+
"debug_info": "Check that numeric fields contain numbers and categorical fields contain strings"
|
| 157 |
+
}), 400
|
| 158 |
+
else:
|
| 159 |
+
return jsonify({
|
| 160 |
+
"error": f"Prediction failed: {str(e)}",
|
| 161 |
+
"debug_info": "Check server logs for detailed error information"
|
| 162 |
+
}), 500
|
| 163 |
+
|
| 164 |
+
# Test endpoint to check model expectations
|
| 165 |
+
@extraaLearn_predictor_api.get("/v1/model-info")
|
| 166 |
+
def get_model_info():
|
| 167 |
+
"""Endpoint to get information about what the model expects"""
|
| 168 |
+
info = {
|
| 169 |
+
"required_fields": list(feature_mapping.keys()),
|
| 170 |
+
"feature_mapping": feature_mapping,
|
| 171 |
+
"model_type": str(type(model))
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
if hasattr(model, 'feature_names_in_'):
|
| 175 |
+
info["model_expected_features"] = list(model.feature_names_in_)
|
| 176 |
+
|
| 177 |
+
return jsonify(info)
|
| 178 |
|
|
|
|
| 179 |
if __name__ == "__main__":
|
| 180 |
extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)
|