File size: 2,715 Bytes
1eaee2c
772e21d
1eaee2c
 
a647fb1
 
1eaee2c
 
772e21d
 
 
82c49b8
 
1eaee2c
772e21d
 
 
 
 
 
 
1eaee2c
772e21d
 
 
 
a647fb1
 
1eaee2c
772e21d
 
 
82c49b8
772e21d
a647fb1
1eaee2c
 
 
 
 
d2fb8ef
1eaee2c
 
 
 
 
 
 
 
 
 
 
 
a647fb1
1eaee2c
 
 
 
 
 
 
d2fb8ef
1eaee2c
772e21d
 
 
1eaee2c
 
 
 
772e21d
 
 
1eaee2c
d2fb8ef
1eaee2c
772e21d
 
 
 
 
 
 
 
d2fb8ef
772e21d
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib
import requests
from io import BytesIO


# ----------------------------
# Load model and scaler from HF Hub
# ----------------------------
model_url = "https://huggingface.co/samithcs/timeseries_risk/tree/main/timeseries_risk/lstm_risk_model.keras"
scaler_url = "https://huggingface.co/samithcs/timeseries_risk/tree/main/timeseries_risk/scaler.joblib"

# Download model to a local temp file
model_path = "/tmp/lstm_risk_model.keras"
with requests.get(model_url, stream=True) as r:
    r.raise_for_status()
    with open(model_path, "wb") as f:
        for chunk in r.iter_content(chunk_size=8192):
            f.write(chunk)

# Load model
model = tf.keras.models.load_model(model_path)

# Load scaler
response = requests.get(scaler_url)
scaler = joblib.load(BytesIO(response.content))

# ----------------------------
# Load your CSV dataset (optional)
# ----------------------------
data_path = "https://huggingface.co/samithcs/risk_predictor/tree/main/supply_chain_disruptions_features.csv"  
df = pd.read_csv(data_path)

region_col = "Order City"
region_name = "Shanghai"

df_region = df[df[region_col] == region_name].copy()
if len(df_region) < 100:
    
    df_region = df.sample(200, random_state=42) if len(df) >= 200 else df

feature_cols = [
    "Days for shipping (real)", "Sales per customer", "Order Item Discount",
    "Order Item Product Price", "Order Item Quantity"
]
label_col = "Late_delivery_risk"
seq_length = 7

X_all = df_region[feature_cols].fillna(0).astype(float).values
y_all = df_region[label_col].fillna(0).astype(int).values

X_scaled = scaler.transform(X_all)

X_seq, y_seq = [], []
for i in range(len(X_scaled) - seq_length):
    X_seq.append(X_scaled[i:i+seq_length])
    y_seq.append(y_all[i+seq_length])
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)


# ----------------------------
# Train/test split (optional)
# ----------------------------
test_size = int(0.2 * len(X_seq))
X_train, X_test = X_seq[:-test_size], X_seq[-test_size:]
y_train, y_test = y_seq[:-test_size], y_seq[-test_size:]

# ----------------------------
# Evaluate using loaded model
# ----------------------------
test_loss, test_acc = model.evaluate(X_test, y_test)


# ----------------------------
# Prediction function
# ----------------------------
def predict_risk_for_next_day(sequence, threshold=0.5):
    seq_scaled = scaler.transform(sequence)
    seq_window = np.expand_dims(seq_scaled, axis=0)
    pred_prob = model.predict(seq_window)[0][0]
    pred_label = int(pred_prob > threshold)
    
    return pred_prob, pred_label

# Example usage
if X_test.shape[0] > 0:
    predict_risk_for_next_day(X_test[0], threshold=0.5)