File size: 3,637 Bytes
1eaee2c 1e4446c a647fb1 1eaee2c 1e4446c 1eaee2c 1e4446c 1eaee2c 376866e 1e4446c 1eaee2c 1e4446c 376866e 1eaee2c 1e4446c 1eaee2c 376866e 1eaee2c 1e4446c 1eaee2c 1e4446c a647fb1 376866e f3de9c7 376866e f3de9c7 376866e f3de9c7 376866e f3de9c7 376866e f3de9c7 376866e f3de9c7 376866e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import pandas as pd
import numpy as np
import requests
from io import BytesIO
import joblib
from pathlib import Path
try:
MODEL_URL = "https://huggingface.co/samithcs/risk_predictor/resolve/main/risk_predictor/hist_gradient_boosting_risk_predictor.joblib"
RESPONSE = requests.get(MODEL_URL)
risk_model = joblib.load(BytesIO(RESPONSE.content))
FEATURE_COLS = list(risk_model.feature_names_in_) if hasattr(risk_model, "feature_names_in_") else None
except Exception:
risk_model = None
FEATURE_COLS = None
REGION_BASE_RISKS = {
"Shanghai": 0.55, "Singapore": 0.30, "Mumbai": 0.45, "Dubai": 0.35,
"UAE": 0.35, "USA": 0.30, "Germany": 0.25, "China": 0.55,
"India": 0.45, "Hong Kong": 0.50, "Rotterdam": 0.28, "Los Angeles": 0.40
}
EVENT_RISK_MULTIPLIERS = {
"strike": 0.30, "port strike": 0.35, "typhoon": 0.35, "hurricane": 0.35,
"earthquake": 0.40, "flood": 0.25, "port closure": 0.45, "supplier outage": 0.25,
"customs delay": 0.15, "congestion": 0.20, "pandemic": 0.30, "war": 0.50,
"sanctions": 0.40
}
def build_feature_row(feature_cols, query_dict, reference_row):
row = reference_row.copy()
shipping_mode = query_dict.get('shipping_mode', 'Standard Class')
for col in feature_cols:
if 'Shipping_Mode' in col and shipping_mode in col:
row[col] = 1
region = query_dict.get('region', '')
for col in feature_cols:
if ('Order_Country' in col or 'Order_Region' in col) and region in col:
row[col] = 1
for col in feature_cols:
if 'Order_Status_COMPLETE' in col:
row[col] = 1
return row
def calculate_rule_based_risk(region, days, incidents):
base_risk = REGION_BASE_RISKS.get(region, 0.40)
event_risk = 0.0
if incidents:
for incident in incidents:
incident_lower = str(incident).lower()
for keyword, multiplier in EVENT_RISK_MULTIPLIERS.items():
if keyword in incident_lower:
event_risk += multiplier
time_factor = max(0.1, 1.0 - (days / 30.0))
return min(1.0, base_risk * 0.5 + event_risk * 0.4 + time_factor * 0.1)
def predict_risk(region: str, days: int = 5, origin=None, destination=None,
event_type=None, incidents=None, shipping_mode=None):
try:
if shipping_mode is None:
shipping_mode = "Standard Class"
rule_risk = calculate_rule_based_risk(region, days, incidents or [])
ml_risk = 0.40
if risk_model is not None and FEATURE_COLS is not None:
try:
reference_row = pd.Series({col: 0 for col in FEATURE_COLS})
query_dict = {
"region": region,
"days": days,
"origin": origin,
"destination": destination,
"shipping_mode": shipping_mode,
}
test_features = pd.DataFrame(
[build_feature_row(FEATURE_COLS, query_dict, reference_row)]
)
ml_risk = float(risk_model.predict_proba(test_features)[0, 1])
except Exception as e:
ml_risk = 0.40
if incidents and len(incidents) > 0:
final_risk = (ml_risk * 0.40) + (rule_risk * 0.60)
else:
final_risk = (ml_risk * 0.70) + (rule_risk * 0.30)
final_risk = float(np.clip(final_risk, 0.0, 1.0))
return round(final_risk, 2)
except Exception as e:
return 0.50
|