from flask import Flask, request, jsonify import pandas as pd import numpy as np import xgboost as xgb import joblib # Initialize Flask app shipping_return_predictor_api = Flask("Shipping Return Prediction") # Load trained model and encoders model = joblib.load("shipment_return_predictor_model.pkl") target_encoder = joblib.load("target_encoder.pkl") feature_encoders = joblib.load("feature_encoders.pkl") # Dictionary of encoders for categorical features feature_columns = joblib.load("feature_columns.pkl") # List of feature columns used in training @shipping_return_predictor_api.get('/') def home(): return "Welcome to the Shipping Return Prediction API!" @shipping_return_predictor_api.route('/predict', methods=['POST']) def predict(): try: # Parse input JSON product_data = request.get_json() # Extract relevant product features from the input data sample = { 'ServiceType': product_data['ServiceType'], 'FragilePerishable': product_data['FragilePerishable'], 'Value': product_data['Value'], 'Weight': product_data['Weight'], 'InsuranceCoverage': product_data['InsuranceCoverage'], 'ShipperCity': product_data['ShipperCity'], 'ConsigneeCity': product_data['ConsigneeCity'], 'ConsigneeCountry': product_data['ConsigneeCountry'] } # Convert the extracted data into a DataFrame input_df = pd.DataFrame([sample]) # input_df = pd.DataFrame([input_data]) # Apply encoding to categorical features for col, encoder in feature_encoders.items(): if col in input_df.columns: input_val = input_df[col].values[0] if input_val in encoder.classes_: input_df[col] = encoder.transform([input_val]) else: # Append "Unknown" to classes if not already there if "Unknown" not in encoder.classes_: encoder.classes_ = np.append(encoder.classes_, "Unknown") input_df[col] = encoder.transform(["Unknown"]) # Ensure all feature columns are present for col in feature_columns: if col not in input_df.columns: input_df[col] = 0 input_df = input_df[feature_columns] # Predict probabilities probs = model.predict_proba(input_df) preds = np.argmax(probs, axis=1) pred_probs = probs[np.arange(len(preds)), preds] # Get feature contributions booster = model.get_booster() dmatrix = xgb.DMatrix(input_df, feature_names=input_df.columns.tolist()) contribs = booster.predict(dmatrix, pred_contribs=True) # Extract top contributing features top_features = [] for i, pred_class in enumerate(preds): contrib_vector = contribs[i] class_contribs = contrib_vector[pred_class] feature_contribs = dict(zip(input_df.columns.tolist(), class_contribs)) sorted_features = sorted(feature_contribs.items(), key=lambda x: abs(x[1]), reverse=True) top_features.append([f[0] for f in sorted_features[:3]]) # Map predicted class to original label # predicted_labels = target_encoder.inverse_transform(preds) # ["ServiceType", "FragilePerishable", "Value", "Weight", "InsuranceCoverage", # "ShipperCity", "ConsigneeCity", "ConsigneeCountry"] reason_list = { 'ServiceType':"Type of Service", 'FragilePerishable':"Fragility or Perishability", 'Value':"Package Value", 'Weight':"Package Weight", 'InsuranceCoverage':"Insurance", 'ShipperCity':"City where the package ships", 'ConsigneeCity':"Destination City", 'ConsigneeCountry':"Destination Country", 'DeliveryAckDateTime':"Package Delivery" } predicted_reasons = [] for reason in top_features[0]: predicted_reasons.append(reason_list[reason]) # Prepare response response = { # "Predicted Label": predicted_labels[0], "Probability": format(pred_probs[0]*100, '.2f'), "PredictedReasons": predicted_reasons } return jsonify(response) except Exception as e: return jsonify({"error": str(e)}), 500 @shipping_return_predictor_api.post('/predictbatch') def predict_shipping_return_batch(): try: file = request.files['file'] input_df = pd.read_csv(file) # Backup original tracking numbers tracking_numbers = input_df['TrackingNumber'].tolist() # Handle categorical encoding for col, encoder in feature_encoders.items(): if col in input_df.columns: # Convert encoder classes to list once for clarity encoder_classes = encoder.classes_.tolist() # Replace unseen values with "Unknown" input_df[col] = input_df[col].apply(lambda val: val if val in encoder_classes else "Unknown") # Append "Unknown" if not already present if "Unknown" not in encoder_classes: encoder.classes_ = np.append(encoder.classes_, "Unknown") # Transform input_df[col] = encoder.transform(input_df[col]) # Add missing columns for col in feature_columns: if col not in input_df.columns: input_df[col] = 0 # Reorder columns input_df = input_df[feature_columns] # Predict probabilities probs = model.predict_proba(input_df) preds = np.argmax(probs, axis=1) pred_probs = probs[np.arange(len(preds)), preds] # SHAP-style contribution analysis booster = model.get_booster() dmatrix = xgb.DMatrix(input_df, feature_names=input_df.columns.tolist()) contribs = booster.predict(dmatrix, pred_contribs=True) reason_list = { 'ServiceType': "Type of Service", 'FragilePerishable': "Fragility or Perishability", 'Value': "Package Value", 'Weight': "Package Weight", 'InsuranceCoverage': "Insurance", 'ShipperCity': "City where the package ships", 'ConsigneeCity': "Destination City", 'ConsigneeCountry': "Destination Country", 'DeliveryAckDateTime': "Package Delivery" } # Compile result per row results = [] for i, pred_class in enumerate(preds): contrib_vector = contribs[i][:-1] # Skip bias term feature_contribs = dict(zip(input_df.columns.tolist(), contrib_vector)) sorted_features = sorted(feature_contribs.items(), key=lambda x: abs(x[1]), reverse=True) top_features = [f[0] for f in sorted_features[:3]] top_reasons = [reason_list.get(f, f) for f in top_features] results.append({ "TrackingNumber": tracking_numbers[i], # "PredictedLabel": target_encoder.inverse_transform([pred_class])[0], # Optional "Probability": format(pred_probs[i] * 100, '.2f'), "TopReasons": top_reasons }) return jsonify(results) except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == '__main__': shipping_return_predictor_api.run(debug=True)