Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -11,65 +11,79 @@ extraaLearn_predictor_api = Flask("ExtraaLearn paid customers Predictor")
|
|
| 11 |
model = joblib.load("extraaLearn_model_prediction_model_v1_0.joblib")
|
| 12 |
|
| 13 |
# -----------------------------
|
| 14 |
-
# Feature mapping
|
| 15 |
# -----------------------------
|
| 16 |
-
feature_mapping = {
|
| 17 |
-
"age": "age",
|
| 18 |
-
"currentOccupation": "current_occupation",
|
| 19 |
-
"firstInteraction": "first_interaction",
|
| 20 |
-
"profileCompleted": "profile_completed",
|
| 21 |
-
"websiteVisits": "website_visits",
|
| 22 |
-
"timeSpentOnWebsite": "time_spent_on_website",
|
| 23 |
-
"pageViewsPerVisit": "page_views_per_visit",
|
| 24 |
-
"lastActivity": "last_activity",
|
| 25 |
-
"printMediaType1": "print_media_type1",
|
| 26 |
-
"printMediaType2": "print_media_type2",
|
| 27 |
-
"digitalMedia": "digital_media",
|
| 28 |
-
"educationalChannels": "educational_channels",
|
| 29 |
-
"referral": "referral"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
}
|
| 31 |
|
| 32 |
# -----------------------------
|
| 33 |
-
#
|
| 34 |
# -----------------------------
|
| 35 |
-
def
|
| 36 |
-
"""
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
print("=== END MODEL DEBUG ===")
|
| 59 |
-
|
| 60 |
-
# Run debug on startup
|
| 61 |
-
debug_model_expectations()
|
| 62 |
|
| 63 |
# -----------------------------
|
| 64 |
-
# Routes
|
| 65 |
# -----------------------------
|
| 66 |
|
| 67 |
@extraaLearn_predictor_api.get("/ping")
|
| 68 |
def ping():
|
|
|
|
| 69 |
return jsonify({"status": "ok"})
|
| 70 |
|
| 71 |
@extraaLearn_predictor_api.get("/")
|
| 72 |
def home():
|
|
|
|
| 73 |
return "Welcome to the ExtraaLearn customers Prediction API!"
|
| 74 |
|
| 75 |
@extraaLearn_predictor_api.post("/v1/customers")
|
|
@@ -80,98 +94,74 @@ def predict_sales_revenue():
|
|
| 80 |
try:
|
| 81 |
# Get the JSON data from the request body
|
| 82 |
property_data = request.get_json()
|
| 83 |
-
|
| 84 |
if not property_data:
|
| 85 |
return jsonify({"error": "No JSON data provided"}), 400
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
print(f" {key}: {value} (type: {type(value)})")
|
| 90 |
-
|
| 91 |
-
# Check for missing fields
|
| 92 |
-
missing_fields = []
|
| 93 |
-
for api_key in feature_mapping.keys():
|
| 94 |
-
if api_key not in property_data:
|
| 95 |
-
missing_fields.append(api_key)
|
| 96 |
-
|
| 97 |
-
if missing_fields:
|
| 98 |
-
return jsonify({
|
| 99 |
-
"error": f"Missing required fields: {missing_fields}",
|
| 100 |
-
"required_fields": list(feature_mapping.keys())
|
| 101 |
-
}), 400
|
| 102 |
|
| 103 |
# Map input keys to model feature names
|
| 104 |
sample = {}
|
| 105 |
for api_key, model_key in feature_mapping.items():
|
| 106 |
-
sample[model_key] =
|
| 107 |
|
| 108 |
-
# Convert
|
| 109 |
input_data = pd.DataFrame([sample])
|
| 110 |
|
| 111 |
-
|
| 112 |
-
print("
|
| 113 |
-
|
| 114 |
-
print(f" {col}: {input_data[col].dtype} - value: {input_data[col].iloc[0]}")
|
| 115 |
|
| 116 |
# Make prediction
|
| 117 |
-
print("=== MAKING PREDICTION ===")
|
| 118 |
predicted_customer = model.predict(input_data)[0]
|
| 119 |
-
|
| 120 |
-
# Convert to
|
| 121 |
if hasattr(predicted_customer, 'item'):
|
| 122 |
predicted_customer = predicted_customer.item()
|
| 123 |
predicted_customer = float(predicted_customer)
|
| 124 |
|
| 125 |
-
print(f"=== PREDICTION SUCCESS: {predicted_customer} ===")
|
| 126 |
-
|
| 127 |
return jsonify({
|
| 128 |
"Predicted_Sales": predicted_customer,
|
| 129 |
-
"status": "success"
|
| 130 |
-
"input_received": {
|
| 131 |
-
"fields_received": list(property_data.keys()),
|
| 132 |
-
"fields_processed": list(sample.keys())
|
| 133 |
-
}
|
| 134 |
})
|
| 135 |
|
|
|
|
|
|
|
| 136 |
except Exception as e:
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
else:
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
@extraaLearn_predictor_api.get("/v1/model-info")
|
| 163 |
-
def get_model_info():
|
| 164 |
-
"""Endpoint to get information about what the model expects"""
|
| 165 |
-
info = {
|
| 166 |
-
"required_fields": list(feature_mapping.keys()),
|
| 167 |
-
"feature_mapping": feature_mapping,
|
| 168 |
-
"model_type": str(type(model))
|
| 169 |
-
}
|
| 170 |
-
|
| 171 |
-
if hasattr(model, 'feature_names_in_'):
|
| 172 |
-
info["model_expected_features"] = list(model.feature_names_in_)
|
| 173 |
-
|
| 174 |
-
return jsonify(info)
|
| 175 |
|
| 176 |
if __name__ == "__main__":
|
| 177 |
extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)
|
|
|
|
| 11 |
model = joblib.load("extraaLearn_model_prediction_model_v1_0.joblib")
|
| 12 |
|
| 13 |
# -----------------------------
|
| 14 |
+
# Feature mapping and expected data types
|
| 15 |
# -----------------------------
|
| 16 |
+
feature_mapping = {
|
| 17 |
+
"age": "age", # Should be integer
|
| 18 |
+
"currentOccupation": "current_occupation", # Categorical
|
| 19 |
+
"firstInteraction": "first_interaction", # Categorical
|
| 20 |
+
"profileCompleted": "profile_completed", # Categorical
|
| 21 |
+
"websiteVisits": "website_visits", # Should be integer
|
| 22 |
+
"timeSpentOnWebsite": "time_spent_on_website", # Should be integer
|
| 23 |
+
"pageViewsPerVisit": "page_views_per_visit", # Should be integer
|
| 24 |
+
"lastActivity": "last_activity", # Categorical
|
| 25 |
+
"printMediaType1": "print_media_type1", # Categorical
|
| 26 |
+
"printMediaType2": "print_media_type2", # Categorical
|
| 27 |
+
"digitalMedia": "digital_media", # Categorical
|
| 28 |
+
"educationalChannels": "educational_channels", # Categorical
|
| 29 |
+
"referral": "referral", # Categorical
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# Expected data types for validation
|
| 33 |
+
expected_types = {
|
| 34 |
+
"age": int,
|
| 35 |
+
"currentOccupation": str,
|
| 36 |
+
"firstInteraction": str,
|
| 37 |
+
"profileCompleted": str,
|
| 38 |
+
"websiteVisits": int,
|
| 39 |
+
"timeSpentOnWebsite": int,
|
| 40 |
+
"pageViewsPerVisit": int,
|
| 41 |
+
"lastActivity": str,
|
| 42 |
+
"printMediaType1": str,
|
| 43 |
+
"printMediaType2": str,
|
| 44 |
+
"digitalMedia": str,
|
| 45 |
+
"educationalChannels": str,
|
| 46 |
+
"referral": str
|
| 47 |
}
|
| 48 |
|
| 49 |
# -----------------------------
|
| 50 |
+
# Data validation and preprocessing
|
| 51 |
# -----------------------------
|
| 52 |
+
def validate_and_preprocess_input(data):
|
| 53 |
+
"""Validate input data and convert to correct types"""
|
| 54 |
+
validated_data = {}
|
| 55 |
+
|
| 56 |
+
for field, expected_type in expected_types.items():
|
| 57 |
+
if field not in data:
|
| 58 |
+
raise ValueError(f"Missing required field: {field}")
|
| 59 |
+
|
| 60 |
+
value = data[field]
|
| 61 |
+
|
| 62 |
+
# Convert to expected type
|
| 63 |
+
try:
|
| 64 |
+
if expected_type == int:
|
| 65 |
+
validated_data[field] = int(value)
|
| 66 |
+
elif expected_type == str:
|
| 67 |
+
validated_data[field] = str(value)
|
| 68 |
+
else:
|
| 69 |
+
validated_data[field] = value
|
| 70 |
+
except (ValueError, TypeError) as e:
|
| 71 |
+
raise ValueError(f"Invalid type for {field}: expected {expected_type.__name__}, got {type(value).__name__}")
|
| 72 |
+
|
| 73 |
+
return validated_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
# -----------------------------
|
| 76 |
+
# Routes
|
| 77 |
# -----------------------------
|
| 78 |
|
| 79 |
@extraaLearn_predictor_api.get("/ping")
|
| 80 |
def ping():
|
| 81 |
+
"""Simple health check endpoint."""
|
| 82 |
return jsonify({"status": "ok"})
|
| 83 |
|
| 84 |
@extraaLearn_predictor_api.get("/")
|
| 85 |
def home():
|
| 86 |
+
"""Welcome message for the API."""
|
| 87 |
return "Welcome to the ExtraaLearn customers Prediction API!"
|
| 88 |
|
| 89 |
@extraaLearn_predictor_api.post("/v1/customers")
|
|
|
|
| 94 |
try:
|
| 95 |
# Get the JSON data from the request body
|
| 96 |
property_data = request.get_json()
|
| 97 |
+
|
| 98 |
if not property_data:
|
| 99 |
return jsonify({"error": "No JSON data provided"}), 400
|
| 100 |
|
| 101 |
+
# Validate and preprocess input
|
| 102 |
+
validated_data = validate_and_preprocess_input(property_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
# Map input keys to model feature names
|
| 105 |
sample = {}
|
| 106 |
for api_key, model_key in feature_mapping.items():
|
| 107 |
+
sample[model_key] = validated_data[api_key]
|
| 108 |
|
| 109 |
+
# Convert the extracted data into a Pandas DataFrame
|
| 110 |
input_data = pd.DataFrame([sample])
|
| 111 |
|
| 112 |
+
# Debug: Print the input data for inspection
|
| 113 |
+
print("Input data types:", input_data.dtypes)
|
| 114 |
+
print("Input data:", input_data)
|
|
|
|
| 115 |
|
| 116 |
# Make prediction
|
|
|
|
| 117 |
predicted_customer = model.predict(input_data)[0]
|
| 118 |
+
|
| 119 |
+
# Convert numpy types to Python native types for JSON serialization
|
| 120 |
if hasattr(predicted_customer, 'item'):
|
| 121 |
predicted_customer = predicted_customer.item()
|
| 122 |
predicted_customer = float(predicted_customer)
|
| 123 |
|
|
|
|
|
|
|
| 124 |
return jsonify({
|
| 125 |
"Predicted_Sales": predicted_customer,
|
| 126 |
+
"status": "success"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
})
|
| 128 |
|
| 129 |
+
except ValueError as e:
|
| 130 |
+
return jsonify({"error": str(e)}), 400
|
| 131 |
except Exception as e:
|
| 132 |
+
return jsonify({"error": f"Prediction failed: {str(e)}"}), 500
|
| 133 |
+
|
| 134 |
+
# Batch prediction (updated with similar fixes)
|
| 135 |
+
@extraaLearn_predictor_api.post("/v1/customersbatch")
|
| 136 |
+
def predict_sales_batch():
|
| 137 |
+
"""
|
| 138 |
+
Handles POST requests for batch prediction.
|
| 139 |
+
Expects a CSV file with multiple records.
|
| 140 |
+
"""
|
| 141 |
+
try:
|
| 142 |
+
file = request.files.get("file")
|
| 143 |
+
if file is None:
|
| 144 |
+
return jsonify({"error": "CSV file is required"}), 400
|
| 145 |
+
|
| 146 |
+
# Read the CSV file into a Pandas DataFrame
|
| 147 |
+
input_data = pd.read_csv(file)
|
| 148 |
+
|
| 149 |
+
# Make predictions
|
| 150 |
+
predictions = model.predict(input_data)
|
| 151 |
+
|
| 152 |
+
# Convert numpy types to Python native types
|
| 153 |
+
predicted_customers = [float(p) for p in predictions]
|
| 154 |
+
|
| 155 |
+
if "id" in input_data.columns:
|
| 156 |
+
property_ids = input_data["id"].astype(str).tolist()
|
| 157 |
+
output_dict = dict(zip(property_ids, predicted_customers))
|
| 158 |
else:
|
| 159 |
+
output_dict = {"predictions": predicted_customers}
|
| 160 |
+
|
| 161 |
+
return jsonify(output_dict)
|
| 162 |
+
|
| 163 |
+
except Exception as e:
|
| 164 |
+
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
if __name__ == "__main__":
|
| 167 |
extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)
|