Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -7,25 +7,22 @@ import torch.nn as nn
|
|
| 7 |
from torch.utils.data import Dataset, DataLoader
|
| 8 |
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
|
| 9 |
from sklearn.metrics import mean_absolute_error
|
| 10 |
-
from fastapi import FastAPI
|
| 11 |
from pydantic import BaseModel
|
| 12 |
|
| 13 |
-
# ----------------------------
|
| 14 |
-
#
|
| 15 |
-
#
|
| 16 |
-
CSV_PATH = "observation.csv" # Upload this in HF Space
|
| 17 |
-
MODEL_PATH = "traffic_transformer_model_2.pth"
|
| 18 |
INPUT_LEN = 72
|
| 19 |
BATCH_SIZE = 128
|
| 20 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 21 |
|
| 22 |
-
CONGESTION_THRESHOLDS = {
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
# ----------------------------
|
| 27 |
-
# HELPERS
|
| 28 |
-
# ----------------------------
|
| 29 |
def determine_congestion_level(mean_count, thresholds):
|
| 30 |
if mean_count <= thresholds['LOW']:
|
| 31 |
return "Low"
|
|
@@ -34,12 +31,23 @@ def determine_congestion_level(mean_count, thresholds):
|
|
| 34 |
else:
|
| 35 |
return "High"
|
| 36 |
|
| 37 |
-
|
| 38 |
class TransformerForecaster(nn.Module):
|
| 39 |
def __init__(self, input_size, hidden_size=256, num_heads=8, dropout=0.1, seq_len=INPUT_LEN):
|
| 40 |
super().__init__()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
self.pos_embedding = nn.Parameter(torch.randn(1, seq_len, input_size) * 0.01)
|
| 42 |
self.layer_norm = nn.LayerNorm(input_size)
|
|
|
|
| 43 |
encoder_layer = nn.TransformerEncoderLayer(
|
| 44 |
d_model=input_size,
|
| 45 |
nhead=num_heads,
|
|
@@ -63,34 +71,34 @@ class TransformerForecaster(nn.Module):
|
|
| 63 |
x = x[:, -1, :]
|
| 64 |
return self.fc(x).squeeze(-1)
|
| 65 |
|
| 66 |
-
|
| 67 |
class TrafficDataset(Dataset):
|
| 68 |
def __init__(self, data, input_len=INPUT_LEN, feature_dim=None):
|
| 69 |
self.X, self.y = [], []
|
| 70 |
for i in range(len(data) - input_len):
|
| 71 |
-
self.X.append(data[i:i+input_len, :feature_dim])
|
| 72 |
-
self.y.append(data[i+input_len, -1])
|
| 73 |
self.X = torch.tensor(np.array(self.X), dtype=torch.float32)
|
| 74 |
self.y = torch.tensor(np.array(self.y), dtype=torch.float32)
|
| 75 |
-
|
| 76 |
def __len__(self):
|
| 77 |
return len(self.X)
|
| 78 |
-
|
| 79 |
def __getitem__(self, idx):
|
| 80 |
return self.X[idx], self.y[idx]
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
-
# ----------------------------
|
| 84 |
-
# LOAD MODEL ON STARTUP
|
| 85 |
-
# ----------------------------
|
| 86 |
@app.on_event("startup")
|
| 87 |
-
def
|
| 88 |
-
global
|
| 89 |
|
| 90 |
-
print("Loading
|
| 91 |
df = pd.read_csv(CSV_PATH)
|
| 92 |
df = df.drop(columns=["Datetime"], errors="ignore")
|
| 93 |
|
|
|
|
| 94 |
def extract_hour(timeslot):
|
| 95 |
if pd.isna(timeslot): return None
|
| 96 |
try:
|
|
@@ -105,9 +113,9 @@ def load_model():
|
|
| 105 |
df.dropna(subset=['StartHour'], inplace=True)
|
| 106 |
df['StartHour'] = df['StartHour'].astype(int)
|
| 107 |
|
|
|
|
| 108 |
le_to = LabelEncoder()
|
| 109 |
df['To_encoded'] = le_to.fit_transform(df['To'])
|
| 110 |
-
|
| 111 |
df['DayOfYear'] = (df['Month'] - 1) * 30 + df['Day']
|
| 112 |
df['Day_sin'] = np.sin(2 * np.pi * df['DayOfYear'] / 365)
|
| 113 |
df['Day_cos'] = np.cos(2 * np.pi * df['DayOfYear'] / 365)
|
|
@@ -122,9 +130,8 @@ def load_model():
|
|
| 122 |
df['MA_3'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(3).mean())
|
| 123 |
df['EMA_5'] = df.groupby('To')['CarCount'].transform(lambda x: x.ewm(span=5, adjust=False).mean())
|
| 124 |
df['ROLL12_mean'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(12).mean())
|
| 125 |
-
df['ROLL12_std']
|
| 126 |
df['Diff_1'] = df.groupby('To')['CarCount'].diff(1)
|
| 127 |
-
|
| 128 |
df.dropna(inplace=True)
|
| 129 |
df.reset_index(drop=True, inplace=True)
|
| 130 |
|
|
@@ -139,42 +146,33 @@ def load_model():
|
|
| 139 |
scaler_y = MinMaxScaler()
|
| 140 |
df['y_scaled'] = scaler_y.fit_transform(df[['CarCount']])
|
| 141 |
|
| 142 |
-
#
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
model_loaded.load_state_dict(
|
| 147 |
model_loaded.to(DEVICE)
|
| 148 |
model_loaded.eval()
|
| 149 |
|
| 150 |
-
print("✅ Model
|
| 151 |
-
|
| 152 |
-
# ----------------------------
|
| 153 |
-
# API INPUT / OUTPUT SCHEMA
|
| 154 |
-
# ----------------------------
|
| 155 |
-
class TrafficInput(BaseModel):
|
| 156 |
-
place: str = Query(..., description="Target location for congestion prediction")
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
# ----------------------------
|
| 160 |
-
# API ROUTES
|
| 161 |
-
# ----------------------------
|
| 162 |
-
@app.get("/")
|
| 163 |
-
def root():
|
| 164 |
-
return {"message": "Traffic Congestion Predictor API is running 🚦"}
|
| 165 |
|
| 166 |
-
@app.post("/
|
| 167 |
-
def
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
df_target = df[df['To'] == place].copy()
|
| 171 |
if df_target.empty:
|
| 172 |
-
return {"error": f"No data found for
|
| 173 |
|
| 174 |
values = df_target[feature_cols + ['y_scaled']].values
|
| 175 |
feature_dim = len(feature_cols)
|
| 176 |
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
|
| 179 |
|
| 180 |
preds_scaled_all, actuals_scaled_all = [], []
|
|
@@ -200,8 +198,9 @@ def predict_congestion(input_data: TrafficInput):
|
|
| 200 |
congestion_level = determine_congestion_level(mean_predicted_car_count, CONGESTION_THRESHOLDS)
|
| 201 |
|
| 202 |
return {
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
| 7 |
from torch.utils.data import Dataset, DataLoader
|
| 8 |
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
|
| 9 |
from sklearn.metrics import mean_absolute_error
|
| 10 |
+
from fastapi import FastAPI
|
| 11 |
from pydantic import BaseModel
|
| 12 |
|
| 13 |
+
# -------------------- CONFIG --------------------
|
| 14 |
+
CSV_PATH = "observation.csv" # place your CSV in the same dir
|
| 15 |
+
MODEL_PATH = "best_model.pt" # pretrained weights file
|
|
|
|
|
|
|
| 16 |
INPUT_LEN = 72
|
| 17 |
BATCH_SIZE = 128
|
| 18 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 19 |
|
| 20 |
+
CONGESTION_THRESHOLDS = {
|
| 21 |
+
'LOW': 200,
|
| 22 |
+
'MEDIUM': 300
|
| 23 |
+
}
|
| 24 |
|
| 25 |
+
# -------------------- HELPER FUNCS --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def determine_congestion_level(mean_count, thresholds):
|
| 27 |
if mean_count <= thresholds['LOW']:
|
| 28 |
return "Low"
|
|
|
|
| 31 |
else:
|
| 32 |
return "High"
|
| 33 |
|
| 34 |
+
# -------------------- MODEL --------------------
|
| 35 |
class TransformerForecaster(nn.Module):
|
| 36 |
def __init__(self, input_size, hidden_size=256, num_heads=8, dropout=0.1, seq_len=INPUT_LEN):
|
| 37 |
super().__init__()
|
| 38 |
+
|
| 39 |
+
# ✅ Adjust number of heads dynamically if needed
|
| 40 |
+
if input_size % num_heads != 0:
|
| 41 |
+
for h in [1, 2, 4, 8, 16]:
|
| 42 |
+
if input_size % h == 0:
|
| 43 |
+
num_heads = h
|
| 44 |
+
break
|
| 45 |
+
else:
|
| 46 |
+
num_heads = 1 # fallback to 1 head
|
| 47 |
+
|
| 48 |
self.pos_embedding = nn.Parameter(torch.randn(1, seq_len, input_size) * 0.01)
|
| 49 |
self.layer_norm = nn.LayerNorm(input_size)
|
| 50 |
+
|
| 51 |
encoder_layer = nn.TransformerEncoderLayer(
|
| 52 |
d_model=input_size,
|
| 53 |
nhead=num_heads,
|
|
|
|
| 71 |
x = x[:, -1, :]
|
| 72 |
return self.fc(x).squeeze(-1)
|
| 73 |
|
|
|
|
| 74 |
class TrafficDataset(Dataset):
|
| 75 |
def __init__(self, data, input_len=INPUT_LEN, feature_dim=None):
|
| 76 |
self.X, self.y = [], []
|
| 77 |
for i in range(len(data) - input_len):
|
| 78 |
+
self.X.append(data[i:i + input_len, :feature_dim])
|
| 79 |
+
self.y.append(data[i + input_len, -1])
|
| 80 |
self.X = torch.tensor(np.array(self.X), dtype=torch.float32)
|
| 81 |
self.y = torch.tensor(np.array(self.y), dtype=torch.float32)
|
|
|
|
| 82 |
def __len__(self):
|
| 83 |
return len(self.X)
|
|
|
|
| 84 |
def __getitem__(self, idx):
|
| 85 |
return self.X[idx], self.y[idx]
|
| 86 |
|
| 87 |
+
# -------------------- FASTAPI --------------------
|
| 88 |
+
app = FastAPI(title="Traffic Congestion Inference API")
|
| 89 |
+
|
| 90 |
+
class RequestModel(BaseModel):
|
| 91 |
+
target_place: str
|
| 92 |
|
|
|
|
|
|
|
|
|
|
| 93 |
@app.on_event("startup")
|
| 94 |
+
def load_data_and_model():
|
| 95 |
+
global df, scaler_X, scaler_y, feature_cols, model_loaded
|
| 96 |
|
| 97 |
+
print("Loading CSV...")
|
| 98 |
df = pd.read_csv(CSV_PATH)
|
| 99 |
df = df.drop(columns=["Datetime"], errors="ignore")
|
| 100 |
|
| 101 |
+
# Parse hours
|
| 102 |
def extract_hour(timeslot):
|
| 103 |
if pd.isna(timeslot): return None
|
| 104 |
try:
|
|
|
|
| 113 |
df.dropna(subset=['StartHour'], inplace=True)
|
| 114 |
df['StartHour'] = df['StartHour'].astype(int)
|
| 115 |
|
| 116 |
+
# Encode and features
|
| 117 |
le_to = LabelEncoder()
|
| 118 |
df['To_encoded'] = le_to.fit_transform(df['To'])
|
|
|
|
| 119 |
df['DayOfYear'] = (df['Month'] - 1) * 30 + df['Day']
|
| 120 |
df['Day_sin'] = np.sin(2 * np.pi * df['DayOfYear'] / 365)
|
| 121 |
df['Day_cos'] = np.cos(2 * np.pi * df['DayOfYear'] / 365)
|
|
|
|
| 130 |
df['MA_3'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(3).mean())
|
| 131 |
df['EMA_5'] = df.groupby('To')['CarCount'].transform(lambda x: x.ewm(span=5, adjust=False).mean())
|
| 132 |
df['ROLL12_mean'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(12).mean())
|
| 133 |
+
df['ROLL12_std'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(12).std())
|
| 134 |
df['Diff_1'] = df.groupby('To')['CarCount'].diff(1)
|
|
|
|
| 135 |
df.dropna(inplace=True)
|
| 136 |
df.reset_index(drop=True, inplace=True)
|
| 137 |
|
|
|
|
| 146 |
scaler_y = MinMaxScaler()
|
| 147 |
df['y_scaled'] = scaler_y.fit_transform(df[['CarCount']])
|
| 148 |
|
| 149 |
+
# Load model
|
| 150 |
+
print("Loading model...")
|
| 151 |
+
sample_input = len(feature_cols)
|
| 152 |
+
model_loaded = TransformerForecaster(sample_input)
|
| 153 |
+
model_loaded.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
|
| 154 |
model_loaded.to(DEVICE)
|
| 155 |
model_loaded.eval()
|
| 156 |
|
| 157 |
+
print("✅ Model and data loaded successfully.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
@app.post("/infer")
|
| 160 |
+
def infer(request: RequestModel):
|
| 161 |
+
target_place = request.target_place
|
| 162 |
+
df_target = df[df['To'] == target_place].copy()
|
|
|
|
| 163 |
if df_target.empty:
|
| 164 |
+
return {"error": f"No data found for target location: {target_place}"}
|
| 165 |
|
| 166 |
values = df_target[feature_cols + ['y_scaled']].values
|
| 167 |
feature_dim = len(feature_cols)
|
| 168 |
|
| 169 |
+
train_size = int(len(values) * 0.8)
|
| 170 |
+
test_data = values[train_size:]
|
| 171 |
+
|
| 172 |
+
if len(test_data) < INPUT_LEN + 10:
|
| 173 |
+
return {"error": f"Not enough data for {target_place}"}
|
| 174 |
+
|
| 175 |
+
test_dataset = TrafficDataset(test_data, input_len=INPUT_LEN, feature_dim=feature_dim)
|
| 176 |
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
|
| 177 |
|
| 178 |
preds_scaled_all, actuals_scaled_all = [], []
|
|
|
|
| 198 |
congestion_level = determine_congestion_level(mean_predicted_car_count, CONGESTION_THRESHOLDS)
|
| 199 |
|
| 200 |
return {
|
| 201 |
+
"target_place": target_place,
|
| 202 |
+
"mean_absolute_error": float(mae_200),
|
| 203 |
+
"mean_predicted_car_count": float(mean_predicted_car_count),
|
| 204 |
+
"congestion_level": str(congestion_level)
|
| 205 |
+
}
|
| 206 |
+
|