Spaces:

manasranjanpani
/

ExtraaLearnCustomerPredictionBackend

Sleeping

App Files Files Community

manasranjanpani commited on Oct 5, 2025

Commit

c71245f

verified ·

1 Parent(s): d501c26

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +103 -113

app.py CHANGED Viewed

@@ -11,65 +11,79 @@ extraaLearn_predictor_api = Flask("ExtraaLearn paid customers Predictor")
 model = joblib.load("extraaLearn_model_prediction_model_v1_0.joblib")
 # -----------------------------
-# Feature mapping
 # -----------------------------
-feature_mapping = {
-    "age": "age",
-    "currentOccupation": "current_occupation",
-    "firstInteraction": "first_interaction",
-    "profileCompleted": "profile_completed",
-    "websiteVisits": "website_visits",
-    "timeSpentOnWebsite": "time_spent_on_website",
-    "pageViewsPerVisit": "page_views_per_visit",
-    "lastActivity": "last_activity",
-    "printMediaType1": "print_media_type1",
-    "printMediaType2": "print_media_type2",
-    "digitalMedia": "digital_media",
-    "educationalChannels": "educational_channels",
-    "referral": "referral"
 }
 # -----------------------------
-# Debug function to check model expectations
 # -----------------------------
-def debug_model_expectations():
-    """Debug what the model expects for features"""
-    print("=== MODEL DEBUG INFORMATION ===")
-    # Check if model has feature names
-    if hasattr(model, 'feature_names_in_'):
-        print(f"Model expects these features: {list(model.feature_names_in_)}")
-    else:
-        print("Model doesn't have explicit feature names")
-    # If it's a pipeline, check preprocessing steps
-    if hasattr(model, 'named_steps'):
-        print("Model is a pipeline with steps:")
-        for step_name, step in model.named_steps.items():
-            print(f"  - {step_name}: {type(step)}")
-            # Check for column transformers or encoders
-            if hasattr(step, 'transformers_'):
-                for name, transformer, columns in step.transformers_:
-                    print(f"    Transformer '{name}' handles columns: {columns}")
-                    if hasattr(transformer, 'categories_'):
-                        print(f"    Categories: {transformer.categories_}")
-    print("=== END MODEL DEBUG ===")
-# Run debug on startup
-debug_model_expectations()
 # -----------------------------
-# Routes with enhanced debugging
 # -----------------------------
 @extraaLearn_predictor_api.get("/ping")
 def ping():
     return jsonify({"status": "ok"})
 @extraaLearn_predictor_api.get("/")
 def home():
     return "Welcome to the ExtraaLearn customers Prediction API!"
 @extraaLearn_predictor_api.post("/v1/customers")
@@ -80,98 +94,74 @@ def predict_sales_revenue():
     try:
         # Get the JSON data from the request body
         property_data = request.get_json()
         if not property_data:
             return jsonify({"error": "No JSON data provided"}), 400
-        print("=== RECEIVED PAYLOAD ===")
-        for key, value in property_data.items():
-            print(f"  {key}: {value} (type: {type(value)})")
-        # Check for missing fields
-        missing_fields = []
-        for api_key in feature_mapping.keys():
-            if api_key not in property_data:
-                missing_fields.append(api_key)
-        if missing_fields:
-            return jsonify({
-                "error": f"Missing required fields: {missing_fields}",
-                "required_fields": list(feature_mapping.keys())
-            }), 400
         # Map input keys to model feature names
         sample = {}
         for api_key, model_key in feature_mapping.items():
-            sample[model_key] = property_data[api_key]
-        # Convert to DataFrame
         input_data = pd.DataFrame([sample])
-        print("=== DATA SENT TO MODEL ===")
-        print("DataFrame dtypes:")
-        for col in input_data.columns:
-            print(f"  {col}: {input_data[col].dtype} - value: {input_data[col].iloc[0]}")
         # Make prediction
-        print("=== MAKING PREDICTION ===")
         predicted_customer = model.predict(input_data)[0]
-        # Convert to serializable type
         if hasattr(predicted_customer, 'item'):
             predicted_customer = predicted_customer.item()
         predicted_customer = float(predicted_customer)
-        print(f"=== PREDICTION SUCCESS: {predicted_customer} ===")
         return jsonify({
             "Predicted_Sales": predicted_customer,
-            "status": "success",
-            "input_received": {
-                "fields_received": list(property_data.keys()),
-                "fields_processed": list(sample.keys())
-            }
         })
     except Exception as e:
-        print(f"=== PREDICTION FAILED ===")
-        print(f"Error type: {type(e).__name__}")
-        print(f"Error message: {str(e)}")
-        # Try to identify which feature might be causing the issue
-        error_msg = str(e)
-        if "unknown category" in error_msg.lower():
-            return jsonify({
-                "error": "Unknown category value provided",
-                "details": "One of your categorical values doesn't match what the model was trained on",
-                "debug_info": "Check the server logs for specific category issues"
-            }), 400
-        elif "isnan" in error_msg:
-            return jsonify({
-                "error": "Data type mismatch",
-                "details": "The model encountered unexpected data types",
-                "debug_info": "Check that numeric fields contain numbers and categorical fields contain strings"
-            }), 400
         else:
-            return jsonify({
-                "error": f"Prediction failed: {str(e)}",
-                "debug_info": "Check server logs for detailed error information"
-            }), 500
-# Test endpoint to check model expectations
-@extraaLearn_predictor_api.get("/v1/model-info")
-def get_model_info():
-    """Endpoint to get information about what the model expects"""
-    info = {
-        "required_fields": list(feature_mapping.keys()),
-        "feature_mapping": feature_mapping,
-        "model_type": str(type(model))
-    }
-    if hasattr(model, 'feature_names_in_'):
-        info["model_expected_features"] = list(model.feature_names_in_)
-    return jsonify(info)
 if __name__ == "__main__":
     extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)

 model = joblib.load("extraaLearn_model_prediction_model_v1_0.joblib")
 # -----------------------------
+# Feature mapping and expected data types
 # -----------------------------
+feature_mapping = {
+    "age": "age",  # Should be integer
+    "currentOccupation": "current_occupation",  # Categorical
+    "firstInteraction": "first_interaction",  # Categorical
+    "profileCompleted": "profile_completed",  # Categorical
+    "websiteVisits": "website_visits",  # Should be integer
+    "timeSpentOnWebsite": "time_spent_on_website",  # Should be integer
+    "pageViewsPerVisit": "page_views_per_visit",  # Should be integer
+    "lastActivity": "last_activity",  # Categorical
+    "printMediaType1": "print_media_type1",  # Categorical
+    "printMediaType2": "print_media_type2",  # Categorical
+    "digitalMedia": "digital_media",  # Categorical
+    "educationalChannels": "educational_channels",  # Categorical
+    "referral": "referral",  # Categorical
+}
+# Expected data types for validation
+expected_types = {
+    "age": int,
+    "currentOccupation": str,
+    "firstInteraction": str,
+    "profileCompleted": str,
+    "websiteVisits": int,
+    "timeSpentOnWebsite": int,
+    "pageViewsPerVisit": int,
+    "lastActivity": str,
+    "printMediaType1": str,
+    "printMediaType2": str,
+    "digitalMedia": str,
+    "educationalChannels": str,
+    "referral": str
 }
 # -----------------------------
+# Data validation and preprocessing
 # -----------------------------
+def validate_and_preprocess_input(data):
+    """Validate input data and convert to correct types"""
+    validated_data = {}
+    for field, expected_type in expected_types.items():
+        if field not in data:
+            raise ValueError(f"Missing required field: {field}")
+        value = data[field]
+        # Convert to expected type
+        try:
+            if expected_type == int:
+                validated_data[field] = int(value)
+            elif expected_type == str:
+                validated_data[field] = str(value)
+            else:
+                validated_data[field] = value
+        except (ValueError, TypeError) as e:
+            raise ValueError(f"Invalid type for {field}: expected {expected_type.__name__}, got {type(value).__name__}")
+    return validated_data
 # -----------------------------
+# Routes
 # -----------------------------
 @extraaLearn_predictor_api.get("/ping")
 def ping():
+    """Simple health check endpoint."""
     return jsonify({"status": "ok"})
 @extraaLearn_predictor_api.get("/")
 def home():
+    """Welcome message for the API."""
     return "Welcome to the ExtraaLearn customers Prediction API!"
 @extraaLearn_predictor_api.post("/v1/customers")
     try:
         # Get the JSON data from the request body
         property_data = request.get_json()
         if not property_data:
             return jsonify({"error": "No JSON data provided"}), 400
+        # Validate and preprocess input
+        validated_data = validate_and_preprocess_input(property_data)
         # Map input keys to model feature names
         sample = {}
         for api_key, model_key in feature_mapping.items():
+            sample[model_key] = validated_data[api_key]
+        # Convert the extracted data into a Pandas DataFrame
         input_data = pd.DataFrame([sample])
+        # Debug: Print the input data for inspection
+        print("Input data types:", input_data.dtypes)
+        print("Input data:", input_data)
         # Make prediction
         predicted_customer = model.predict(input_data)[0]
+        # Convert numpy types to Python native types for JSON serialization
         if hasattr(predicted_customer, 'item'):
             predicted_customer = predicted_customer.item()
         predicted_customer = float(predicted_customer)
         return jsonify({
             "Predicted_Sales": predicted_customer,
+            "status": "success"
         })
+    except ValueError as e:
+        return jsonify({"error": str(e)}), 400
     except Exception as e:
+        return jsonify({"error": f"Prediction failed: {str(e)}"}), 500
+# Batch prediction (updated with similar fixes)
+@extraaLearn_predictor_api.post("/v1/customersbatch")
+def predict_sales_batch():
+    """
+    Handles POST requests for batch prediction.
+    Expects a CSV file with multiple records.
+    """
+    try:
+        file = request.files.get("file")
+        if file is None:
+            return jsonify({"error": "CSV file is required"}), 400
+        # Read the CSV file into a Pandas DataFrame
+        input_data = pd.read_csv(file)
+        # Make predictions
+        predictions = model.predict(input_data)
+        # Convert numpy types to Python native types
+        predicted_customers = [float(p) for p in predictions]
+        if "id" in input_data.columns:
+            property_ids = input_data["id"].astype(str).tolist()
+            output_dict = dict(zip(property_ids, predicted_customers))
         else:
+            output_dict = {"predictions": predicted_customers}
+        return jsonify(output_dict)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
 if __name__ == "__main__":
     extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)