manasranjanpani commited on
Commit
c71245f
·
verified ·
1 Parent(s): d501c26

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +103 -113
app.py CHANGED
@@ -11,65 +11,79 @@ extraaLearn_predictor_api = Flask("ExtraaLearn paid customers Predictor")
11
  model = joblib.load("extraaLearn_model_prediction_model_v1_0.joblib")
12
 
13
  # -----------------------------
14
- # Feature mapping
15
  # -----------------------------
16
- feature_mapping = {
17
- "age": "age",
18
- "currentOccupation": "current_occupation",
19
- "firstInteraction": "first_interaction",
20
- "profileCompleted": "profile_completed",
21
- "websiteVisits": "website_visits",
22
- "timeSpentOnWebsite": "time_spent_on_website",
23
- "pageViewsPerVisit": "page_views_per_visit",
24
- "lastActivity": "last_activity",
25
- "printMediaType1": "print_media_type1",
26
- "printMediaType2": "print_media_type2",
27
- "digitalMedia": "digital_media",
28
- "educationalChannels": "educational_channels",
29
- "referral": "referral"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  }
31
 
32
  # -----------------------------
33
- # Debug function to check model expectations
34
  # -----------------------------
35
- def debug_model_expectations():
36
- """Debug what the model expects for features"""
37
- print("=== MODEL DEBUG INFORMATION ===")
38
-
39
- # Check if model has feature names
40
- if hasattr(model, 'feature_names_in_'):
41
- print(f"Model expects these features: {list(model.feature_names_in_)}")
42
- else:
43
- print("Model doesn't have explicit feature names")
44
-
45
- # If it's a pipeline, check preprocessing steps
46
- if hasattr(model, 'named_steps'):
47
- print("Model is a pipeline with steps:")
48
- for step_name, step in model.named_steps.items():
49
- print(f" - {step_name}: {type(step)}")
50
-
51
- # Check for column transformers or encoders
52
- if hasattr(step, 'transformers_'):
53
- for name, transformer, columns in step.transformers_:
54
- print(f" Transformer '{name}' handles columns: {columns}")
55
- if hasattr(transformer, 'categories_'):
56
- print(f" Categories: {transformer.categories_}")
57
-
58
- print("=== END MODEL DEBUG ===")
59
-
60
- # Run debug on startup
61
- debug_model_expectations()
62
 
63
  # -----------------------------
64
- # Routes with enhanced debugging
65
  # -----------------------------
66
 
67
  @extraaLearn_predictor_api.get("/ping")
68
  def ping():
 
69
  return jsonify({"status": "ok"})
70
 
71
  @extraaLearn_predictor_api.get("/")
72
  def home():
 
73
  return "Welcome to the ExtraaLearn customers Prediction API!"
74
 
75
  @extraaLearn_predictor_api.post("/v1/customers")
@@ -80,98 +94,74 @@ def predict_sales_revenue():
80
  try:
81
  # Get the JSON data from the request body
82
  property_data = request.get_json()
83
-
84
  if not property_data:
85
  return jsonify({"error": "No JSON data provided"}), 400
86
 
87
- print("=== RECEIVED PAYLOAD ===")
88
- for key, value in property_data.items():
89
- print(f" {key}: {value} (type: {type(value)})")
90
-
91
- # Check for missing fields
92
- missing_fields = []
93
- for api_key in feature_mapping.keys():
94
- if api_key not in property_data:
95
- missing_fields.append(api_key)
96
-
97
- if missing_fields:
98
- return jsonify({
99
- "error": f"Missing required fields: {missing_fields}",
100
- "required_fields": list(feature_mapping.keys())
101
- }), 400
102
 
103
  # Map input keys to model feature names
104
  sample = {}
105
  for api_key, model_key in feature_mapping.items():
106
- sample[model_key] = property_data[api_key]
107
 
108
- # Convert to DataFrame
109
  input_data = pd.DataFrame([sample])
110
 
111
- print("=== DATA SENT TO MODEL ===")
112
- print("DataFrame dtypes:")
113
- for col in input_data.columns:
114
- print(f" {col}: {input_data[col].dtype} - value: {input_data[col].iloc[0]}")
115
 
116
  # Make prediction
117
- print("=== MAKING PREDICTION ===")
118
  predicted_customer = model.predict(input_data)[0]
119
-
120
- # Convert to serializable type
121
  if hasattr(predicted_customer, 'item'):
122
  predicted_customer = predicted_customer.item()
123
  predicted_customer = float(predicted_customer)
124
 
125
- print(f"=== PREDICTION SUCCESS: {predicted_customer} ===")
126
-
127
  return jsonify({
128
  "Predicted_Sales": predicted_customer,
129
- "status": "success",
130
- "input_received": {
131
- "fields_received": list(property_data.keys()),
132
- "fields_processed": list(sample.keys())
133
- }
134
  })
135
 
 
 
136
  except Exception as e:
137
- print(f"=== PREDICTION FAILED ===")
138
- print(f"Error type: {type(e).__name__}")
139
- print(f"Error message: {str(e)}")
140
-
141
- # Try to identify which feature might be causing the issue
142
- error_msg = str(e)
143
- if "unknown category" in error_msg.lower():
144
- return jsonify({
145
- "error": "Unknown category value provided",
146
- "details": "One of your categorical values doesn't match what the model was trained on",
147
- "debug_info": "Check the server logs for specific category issues"
148
- }), 400
149
- elif "isnan" in error_msg:
150
- return jsonify({
151
- "error": "Data type mismatch",
152
- "details": "The model encountered unexpected data types",
153
- "debug_info": "Check that numeric fields contain numbers and categorical fields contain strings"
154
- }), 400
 
 
 
 
 
 
 
 
155
  else:
156
- return jsonify({
157
- "error": f"Prediction failed: {str(e)}",
158
- "debug_info": "Check server logs for detailed error information"
159
- }), 500
160
-
161
- # Test endpoint to check model expectations
162
- @extraaLearn_predictor_api.get("/v1/model-info")
163
- def get_model_info():
164
- """Endpoint to get information about what the model expects"""
165
- info = {
166
- "required_fields": list(feature_mapping.keys()),
167
- "feature_mapping": feature_mapping,
168
- "model_type": str(type(model))
169
- }
170
-
171
- if hasattr(model, 'feature_names_in_'):
172
- info["model_expected_features"] = list(model.feature_names_in_)
173
-
174
- return jsonify(info)
175
 
176
  if __name__ == "__main__":
177
  extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)
 
11
  model = joblib.load("extraaLearn_model_prediction_model_v1_0.joblib")
12
 
13
  # -----------------------------
14
+ # Feature mapping and expected data types
15
  # -----------------------------
16
+ feature_mapping = {
17
+ "age": "age", # Should be integer
18
+ "currentOccupation": "current_occupation", # Categorical
19
+ "firstInteraction": "first_interaction", # Categorical
20
+ "profileCompleted": "profile_completed", # Categorical
21
+ "websiteVisits": "website_visits", # Should be integer
22
+ "timeSpentOnWebsite": "time_spent_on_website", # Should be integer
23
+ "pageViewsPerVisit": "page_views_per_visit", # Should be integer
24
+ "lastActivity": "last_activity", # Categorical
25
+ "printMediaType1": "print_media_type1", # Categorical
26
+ "printMediaType2": "print_media_type2", # Categorical
27
+ "digitalMedia": "digital_media", # Categorical
28
+ "educationalChannels": "educational_channels", # Categorical
29
+ "referral": "referral", # Categorical
30
+ }
31
+
32
+ # Expected data types for validation
33
+ expected_types = {
34
+ "age": int,
35
+ "currentOccupation": str,
36
+ "firstInteraction": str,
37
+ "profileCompleted": str,
38
+ "websiteVisits": int,
39
+ "timeSpentOnWebsite": int,
40
+ "pageViewsPerVisit": int,
41
+ "lastActivity": str,
42
+ "printMediaType1": str,
43
+ "printMediaType2": str,
44
+ "digitalMedia": str,
45
+ "educationalChannels": str,
46
+ "referral": str
47
  }
48
 
49
  # -----------------------------
50
+ # Data validation and preprocessing
51
  # -----------------------------
52
+ def validate_and_preprocess_input(data):
53
+ """Validate input data and convert to correct types"""
54
+ validated_data = {}
55
+
56
+ for field, expected_type in expected_types.items():
57
+ if field not in data:
58
+ raise ValueError(f"Missing required field: {field}")
59
+
60
+ value = data[field]
61
+
62
+ # Convert to expected type
63
+ try:
64
+ if expected_type == int:
65
+ validated_data[field] = int(value)
66
+ elif expected_type == str:
67
+ validated_data[field] = str(value)
68
+ else:
69
+ validated_data[field] = value
70
+ except (ValueError, TypeError) as e:
71
+ raise ValueError(f"Invalid type for {field}: expected {expected_type.__name__}, got {type(value).__name__}")
72
+
73
+ return validated_data
 
 
 
 
 
74
 
75
  # -----------------------------
76
+ # Routes
77
  # -----------------------------
78
 
79
  @extraaLearn_predictor_api.get("/ping")
80
  def ping():
81
+ """Simple health check endpoint."""
82
  return jsonify({"status": "ok"})
83
 
84
  @extraaLearn_predictor_api.get("/")
85
  def home():
86
+ """Welcome message for the API."""
87
  return "Welcome to the ExtraaLearn customers Prediction API!"
88
 
89
  @extraaLearn_predictor_api.post("/v1/customers")
 
94
  try:
95
  # Get the JSON data from the request body
96
  property_data = request.get_json()
97
+
98
  if not property_data:
99
  return jsonify({"error": "No JSON data provided"}), 400
100
 
101
+ # Validate and preprocess input
102
+ validated_data = validate_and_preprocess_input(property_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  # Map input keys to model feature names
105
  sample = {}
106
  for api_key, model_key in feature_mapping.items():
107
+ sample[model_key] = validated_data[api_key]
108
 
109
+ # Convert the extracted data into a Pandas DataFrame
110
  input_data = pd.DataFrame([sample])
111
 
112
+ # Debug: Print the input data for inspection
113
+ print("Input data types:", input_data.dtypes)
114
+ print("Input data:", input_data)
 
115
 
116
  # Make prediction
 
117
  predicted_customer = model.predict(input_data)[0]
118
+
119
+ # Convert numpy types to Python native types for JSON serialization
120
  if hasattr(predicted_customer, 'item'):
121
  predicted_customer = predicted_customer.item()
122
  predicted_customer = float(predicted_customer)
123
 
 
 
124
  return jsonify({
125
  "Predicted_Sales": predicted_customer,
126
+ "status": "success"
 
 
 
 
127
  })
128
 
129
+ except ValueError as e:
130
+ return jsonify({"error": str(e)}), 400
131
  except Exception as e:
132
+ return jsonify({"error": f"Prediction failed: {str(e)}"}), 500
133
+
134
+ # Batch prediction (updated with similar fixes)
135
+ @extraaLearn_predictor_api.post("/v1/customersbatch")
136
+ def predict_sales_batch():
137
+ """
138
+ Handles POST requests for batch prediction.
139
+ Expects a CSV file with multiple records.
140
+ """
141
+ try:
142
+ file = request.files.get("file")
143
+ if file is None:
144
+ return jsonify({"error": "CSV file is required"}), 400
145
+
146
+ # Read the CSV file into a Pandas DataFrame
147
+ input_data = pd.read_csv(file)
148
+
149
+ # Make predictions
150
+ predictions = model.predict(input_data)
151
+
152
+ # Convert numpy types to Python native types
153
+ predicted_customers = [float(p) for p in predictions]
154
+
155
+ if "id" in input_data.columns:
156
+ property_ids = input_data["id"].astype(str).tolist()
157
+ output_dict = dict(zip(property_ids, predicted_customers))
158
  else:
159
+ output_dict = {"predictions": predicted_customers}
160
+
161
+ return jsonify(output_dict)
162
+
163
+ except Exception as e:
164
+ return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  if __name__ == "__main__":
167
  extraaLearn_predictor_api.run(host="0.0.0.0", port=7860, debug=True)