rajaprabu27's picture
Upload folder using huggingface_hub
93ec0f0 verified
from flask import Flask, request, jsonify
import pandas as pd
import numpy as np
import xgboost as xgb
import joblib
# Initialize Flask app
shipping_return_predictor_api = Flask("Shipping Return Prediction")
# Load trained model and encoders
model = joblib.load("shipment_return_predictor_model.pkl")
target_encoder = joblib.load("target_encoder.pkl")
feature_encoders = joblib.load("feature_encoders.pkl") # Dictionary of encoders for categorical features
feature_columns = joblib.load("feature_columns.pkl") # List of feature columns used in training
@shipping_return_predictor_api.get('/')
def home():
return "Welcome to the Shipping Return Prediction API!"
@shipping_return_predictor_api.route('/predict', methods=['POST'])
def predict():
try:
# Parse input JSON
product_data = request.get_json()
# Extract relevant product features from the input data
sample = {
'ServiceType': product_data['ServiceType'],
'FragilePerishable': product_data['FragilePerishable'],
'Value': product_data['Value'],
'Weight': product_data['Weight'],
'InsuranceCoverage': product_data['InsuranceCoverage'],
'ShipperCity': product_data['ShipperCity'],
'ConsigneeCity': product_data['ConsigneeCity'],
'ConsigneeCountry': product_data['ConsigneeCountry']
}
# Convert the extracted data into a DataFrame
input_df = pd.DataFrame([sample])
# input_df = pd.DataFrame([input_data])
# Apply encoding to categorical features
for col, encoder in feature_encoders.items():
if col in input_df.columns:
input_val = input_df[col].values[0]
if input_val in encoder.classes_:
input_df[col] = encoder.transform([input_val])
else:
# Append "Unknown" to classes if not already there
if "Unknown" not in encoder.classes_:
encoder.classes_ = np.append(encoder.classes_, "Unknown")
input_df[col] = encoder.transform(["Unknown"])
# Ensure all feature columns are present
for col in feature_columns:
if col not in input_df.columns:
input_df[col] = 0
input_df = input_df[feature_columns]
# Predict probabilities
probs = model.predict_proba(input_df)
preds = np.argmax(probs, axis=1)
pred_probs = probs[np.arange(len(preds)), preds]
# Get feature contributions
booster = model.get_booster()
dmatrix = xgb.DMatrix(input_df, feature_names=input_df.columns.tolist())
contribs = booster.predict(dmatrix, pred_contribs=True)
# Extract top contributing features
top_features = []
for i, pred_class in enumerate(preds):
contrib_vector = contribs[i]
class_contribs = contrib_vector[pred_class]
feature_contribs = dict(zip(input_df.columns.tolist(), class_contribs))
sorted_features = sorted(feature_contribs.items(), key=lambda x: abs(x[1]), reverse=True)
top_features.append([f[0] for f in sorted_features[:3]])
# Map predicted class to original label
# predicted_labels = target_encoder.inverse_transform(preds)
# ["ServiceType", "FragilePerishable", "Value", "Weight", "InsuranceCoverage",
# "ShipperCity", "ConsigneeCity", "ConsigneeCountry"]
reason_list = {
'ServiceType':"Type of Service",
'FragilePerishable':"Fragility or Perishability",
'Value':"Package Value",
'Weight':"Package Weight",
'InsuranceCoverage':"Insurance",
'ShipperCity':"City where the package ships",
'ConsigneeCity':"Destination City",
'ConsigneeCountry':"Destination Country",
'DeliveryAckDateTime':"Package Delivery"
}
predicted_reasons = []
for reason in top_features[0]:
predicted_reasons.append(reason_list[reason])
# Prepare response
response = {
# "Predicted Label": predicted_labels[0],
"Probability": format(pred_probs[0]*100, '.2f'),
"PredictedReasons": predicted_reasons
}
return jsonify(response)
except Exception as e:
return jsonify({"error": str(e)}), 500
@shipping_return_predictor_api.post('/predictbatch')
def predict_shipping_return_batch():
try:
file = request.files['file']
input_df = pd.read_csv(file)
# Backup original tracking numbers
tracking_numbers = input_df['TrackingNumber'].tolist()
# Handle categorical encoding
for col, encoder in feature_encoders.items():
if col in input_df.columns:
# Convert encoder classes to list once for clarity
encoder_classes = encoder.classes_.tolist()
# Replace unseen values with "Unknown"
input_df[col] = input_df[col].apply(lambda val: val if val in encoder_classes else "Unknown")
# Append "Unknown" if not already present
if "Unknown" not in encoder_classes:
encoder.classes_ = np.append(encoder.classes_, "Unknown")
# Transform
input_df[col] = encoder.transform(input_df[col])
# Add missing columns
for col in feature_columns:
if col not in input_df.columns:
input_df[col] = 0
# Reorder columns
input_df = input_df[feature_columns]
# Predict probabilities
probs = model.predict_proba(input_df)
preds = np.argmax(probs, axis=1)
pred_probs = probs[np.arange(len(preds)), preds]
# SHAP-style contribution analysis
booster = model.get_booster()
dmatrix = xgb.DMatrix(input_df, feature_names=input_df.columns.tolist())
contribs = booster.predict(dmatrix, pred_contribs=True)
reason_list = {
'ServiceType': "Type of Service",
'FragilePerishable': "Fragility or Perishability",
'Value': "Package Value",
'Weight': "Package Weight",
'InsuranceCoverage': "Insurance",
'ShipperCity': "City where the package ships",
'ConsigneeCity': "Destination City",
'ConsigneeCountry': "Destination Country",
'DeliveryAckDateTime': "Package Delivery"
}
# Compile result per row
results = []
for i, pred_class in enumerate(preds):
contrib_vector = contribs[i][:-1] # Skip bias term
feature_contribs = dict(zip(input_df.columns.tolist(), contrib_vector))
sorted_features = sorted(feature_contribs.items(), key=lambda x: abs(x[1]), reverse=True)
top_features = [f[0] for f in sorted_features[:3]]
top_reasons = [reason_list.get(f, f) for f in top_features]
results.append({
"TrackingNumber": tracking_numbers[i],
# "PredictedLabel": target_encoder.inverse_transform([pred_class])[0], # Optional
"Probability": format(pred_probs[i] * 100, '.2f'),
"TopReasons": top_reasons
})
return jsonify(results)
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
shipping_return_predictor_api.run(debug=True)