agentsay commited on
Commit
3a16cbb
·
verified ·
1 Parent(s): 5b64956

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +58 -59
main.py CHANGED
@@ -7,25 +7,22 @@ import torch.nn as nn
7
  from torch.utils.data import Dataset, DataLoader
8
  from sklearn.preprocessing import LabelEncoder, MinMaxScaler
9
  from sklearn.metrics import mean_absolute_error
10
- from fastapi import FastAPI, Query
11
  from pydantic import BaseModel
12
 
13
- # ----------------------------
14
- # CONFIGURATION
15
- # ----------------------------
16
- CSV_PATH = "observation.csv" # Upload this in HF Space
17
- MODEL_PATH = "traffic_transformer_model_2.pth"
18
  INPUT_LEN = 72
19
  BATCH_SIZE = 128
20
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
21
 
22
- CONGESTION_THRESHOLDS = {'LOW': 200, 'MEDIUM': 300}
 
 
 
23
 
24
- app = FastAPI(title="Traffic Congestion Predictor", version="1.0")
25
-
26
- # ----------------------------
27
- # HELPERS
28
- # ----------------------------
29
  def determine_congestion_level(mean_count, thresholds):
30
  if mean_count <= thresholds['LOW']:
31
  return "Low"
@@ -34,12 +31,23 @@ def determine_congestion_level(mean_count, thresholds):
34
  else:
35
  return "High"
36
 
37
-
38
  class TransformerForecaster(nn.Module):
39
  def __init__(self, input_size, hidden_size=256, num_heads=8, dropout=0.1, seq_len=INPUT_LEN):
40
  super().__init__()
 
 
 
 
 
 
 
 
 
 
41
  self.pos_embedding = nn.Parameter(torch.randn(1, seq_len, input_size) * 0.01)
42
  self.layer_norm = nn.LayerNorm(input_size)
 
43
  encoder_layer = nn.TransformerEncoderLayer(
44
  d_model=input_size,
45
  nhead=num_heads,
@@ -63,34 +71,34 @@ class TransformerForecaster(nn.Module):
63
  x = x[:, -1, :]
64
  return self.fc(x).squeeze(-1)
65
 
66
-
67
  class TrafficDataset(Dataset):
68
  def __init__(self, data, input_len=INPUT_LEN, feature_dim=None):
69
  self.X, self.y = [], []
70
  for i in range(len(data) - input_len):
71
- self.X.append(data[i:i+input_len, :feature_dim])
72
- self.y.append(data[i+input_len, -1])
73
  self.X = torch.tensor(np.array(self.X), dtype=torch.float32)
74
  self.y = torch.tensor(np.array(self.y), dtype=torch.float32)
75
-
76
  def __len__(self):
77
  return len(self.X)
78
-
79
  def __getitem__(self, idx):
80
  return self.X[idx], self.y[idx]
81
 
 
 
 
 
 
82
 
83
- # ----------------------------
84
- # LOAD MODEL ON STARTUP
85
- # ----------------------------
86
  @app.on_event("startup")
87
- def load_model():
88
- global model_loaded, df, le_to, scaler_X, scaler_y, feature_cols
89
 
90
- print("Loading model and preparing dataset...")
91
  df = pd.read_csv(CSV_PATH)
92
  df = df.drop(columns=["Datetime"], errors="ignore")
93
 
 
94
  def extract_hour(timeslot):
95
  if pd.isna(timeslot): return None
96
  try:
@@ -105,9 +113,9 @@ def load_model():
105
  df.dropna(subset=['StartHour'], inplace=True)
106
  df['StartHour'] = df['StartHour'].astype(int)
107
 
 
108
  le_to = LabelEncoder()
109
  df['To_encoded'] = le_to.fit_transform(df['To'])
110
-
111
  df['DayOfYear'] = (df['Month'] - 1) * 30 + df['Day']
112
  df['Day_sin'] = np.sin(2 * np.pi * df['DayOfYear'] / 365)
113
  df['Day_cos'] = np.cos(2 * np.pi * df['DayOfYear'] / 365)
@@ -122,9 +130,8 @@ def load_model():
122
  df['MA_3'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(3).mean())
123
  df['EMA_5'] = df.groupby('To')['CarCount'].transform(lambda x: x.ewm(span=5, adjust=False).mean())
124
  df['ROLL12_mean'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(12).mean())
125
- df['ROLL12_std'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(12).std())
126
  df['Diff_1'] = df.groupby('To')['CarCount'].diff(1)
127
-
128
  df.dropna(inplace=True)
129
  df.reset_index(drop=True, inplace=True)
130
 
@@ -139,42 +146,33 @@ def load_model():
139
  scaler_y = MinMaxScaler()
140
  df['y_scaled'] = scaler_y.fit_transform(df[['CarCount']])
141
 
142
- # Recreate the model architecture and load only weights
143
- feature_dim = len(feature_cols)
144
- model_loaded = TransformerForecaster(input_size=feature_dim)
145
- state_dict = torch.load(MODEL_PATH, map_location=DEVICE, weights_only=True)
146
- model_loaded.load_state_dict(state_dict)
147
  model_loaded.to(DEVICE)
148
  model_loaded.eval()
149
 
150
- print("✅ Model weights successfully loaded on device:", DEVICE)
151
-
152
- # ----------------------------
153
- # API INPUT / OUTPUT SCHEMA
154
- # ----------------------------
155
- class TrafficInput(BaseModel):
156
- place: str = Query(..., description="Target location for congestion prediction")
157
-
158
-
159
- # ----------------------------
160
- # API ROUTES
161
- # ----------------------------
162
- @app.get("/")
163
- def root():
164
- return {"message": "Traffic Congestion Predictor API is running 🚦"}
165
 
166
- @app.post("/predict")
167
- def predict_congestion(input_data: TrafficInput):
168
- place = input_data.place
169
-
170
- df_target = df[df['To'] == place].copy()
171
  if df_target.empty:
172
- return {"error": f"No data found for the target location: {place}"}
173
 
174
  values = df_target[feature_cols + ['y_scaled']].values
175
  feature_dim = len(feature_cols)
176
 
177
- test_dataset = TrafficDataset(values, input_len=INPUT_LEN, feature_dim=feature_dim)
 
 
 
 
 
 
178
  test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
179
 
180
  preds_scaled_all, actuals_scaled_all = [], []
@@ -200,8 +198,9 @@ def predict_congestion(input_data: TrafficInput):
200
  congestion_level = determine_congestion_level(mean_predicted_car_count, CONGESTION_THRESHOLDS)
201
 
202
  return {
203
- "target_place": place,
204
- "mean_absolute_error": round(mae_200, 3),
205
- "mean_predicted_car_count": round(mean_predicted_car_count, 2),
206
- "inferred_congestion_level": congestion_level
207
- }
 
 
7
  from torch.utils.data import Dataset, DataLoader
8
  from sklearn.preprocessing import LabelEncoder, MinMaxScaler
9
  from sklearn.metrics import mean_absolute_error
10
+ from fastapi import FastAPI
11
  from pydantic import BaseModel
12
 
13
+ # -------------------- CONFIG --------------------
14
+ CSV_PATH = "observation.csv" # place your CSV in the same dir
15
+ MODEL_PATH = "best_model.pt" # pretrained weights file
 
 
16
  INPUT_LEN = 72
17
  BATCH_SIZE = 128
18
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
19
 
20
+ CONGESTION_THRESHOLDS = {
21
+ 'LOW': 200,
22
+ 'MEDIUM': 300
23
+ }
24
 
25
+ # -------------------- HELPER FUNCS --------------------
 
 
 
 
26
  def determine_congestion_level(mean_count, thresholds):
27
  if mean_count <= thresholds['LOW']:
28
  return "Low"
 
31
  else:
32
  return "High"
33
 
34
+ # -------------------- MODEL --------------------
35
  class TransformerForecaster(nn.Module):
36
  def __init__(self, input_size, hidden_size=256, num_heads=8, dropout=0.1, seq_len=INPUT_LEN):
37
  super().__init__()
38
+
39
+ # ✅ Adjust number of heads dynamically if needed
40
+ if input_size % num_heads != 0:
41
+ for h in [1, 2, 4, 8, 16]:
42
+ if input_size % h == 0:
43
+ num_heads = h
44
+ break
45
+ else:
46
+ num_heads = 1 # fallback to 1 head
47
+
48
  self.pos_embedding = nn.Parameter(torch.randn(1, seq_len, input_size) * 0.01)
49
  self.layer_norm = nn.LayerNorm(input_size)
50
+
51
  encoder_layer = nn.TransformerEncoderLayer(
52
  d_model=input_size,
53
  nhead=num_heads,
 
71
  x = x[:, -1, :]
72
  return self.fc(x).squeeze(-1)
73
 
 
74
  class TrafficDataset(Dataset):
75
  def __init__(self, data, input_len=INPUT_LEN, feature_dim=None):
76
  self.X, self.y = [], []
77
  for i in range(len(data) - input_len):
78
+ self.X.append(data[i:i + input_len, :feature_dim])
79
+ self.y.append(data[i + input_len, -1])
80
  self.X = torch.tensor(np.array(self.X), dtype=torch.float32)
81
  self.y = torch.tensor(np.array(self.y), dtype=torch.float32)
 
82
  def __len__(self):
83
  return len(self.X)
 
84
  def __getitem__(self, idx):
85
  return self.X[idx], self.y[idx]
86
 
87
+ # -------------------- FASTAPI --------------------
88
+ app = FastAPI(title="Traffic Congestion Inference API")
89
+
90
+ class RequestModel(BaseModel):
91
+ target_place: str
92
 
 
 
 
93
  @app.on_event("startup")
94
+ def load_data_and_model():
95
+ global df, scaler_X, scaler_y, feature_cols, model_loaded
96
 
97
+ print("Loading CSV...")
98
  df = pd.read_csv(CSV_PATH)
99
  df = df.drop(columns=["Datetime"], errors="ignore")
100
 
101
+ # Parse hours
102
  def extract_hour(timeslot):
103
  if pd.isna(timeslot): return None
104
  try:
 
113
  df.dropna(subset=['StartHour'], inplace=True)
114
  df['StartHour'] = df['StartHour'].astype(int)
115
 
116
+ # Encode and features
117
  le_to = LabelEncoder()
118
  df['To_encoded'] = le_to.fit_transform(df['To'])
 
119
  df['DayOfYear'] = (df['Month'] - 1) * 30 + df['Day']
120
  df['Day_sin'] = np.sin(2 * np.pi * df['DayOfYear'] / 365)
121
  df['Day_cos'] = np.cos(2 * np.pi * df['DayOfYear'] / 365)
 
130
  df['MA_3'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(3).mean())
131
  df['EMA_5'] = df.groupby('To')['CarCount'].transform(lambda x: x.ewm(span=5, adjust=False).mean())
132
  df['ROLL12_mean'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(12).mean())
133
+ df['ROLL12_std'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(12).std())
134
  df['Diff_1'] = df.groupby('To')['CarCount'].diff(1)
 
135
  df.dropna(inplace=True)
136
  df.reset_index(drop=True, inplace=True)
137
 
 
146
  scaler_y = MinMaxScaler()
147
  df['y_scaled'] = scaler_y.fit_transform(df[['CarCount']])
148
 
149
+ # Load model
150
+ print("Loading model...")
151
+ sample_input = len(feature_cols)
152
+ model_loaded = TransformerForecaster(sample_input)
153
+ model_loaded.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
154
  model_loaded.to(DEVICE)
155
  model_loaded.eval()
156
 
157
+ print("✅ Model and data loaded successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ @app.post("/infer")
160
+ def infer(request: RequestModel):
161
+ target_place = request.target_place
162
+ df_target = df[df['To'] == target_place].copy()
 
163
  if df_target.empty:
164
+ return {"error": f"No data found for target location: {target_place}"}
165
 
166
  values = df_target[feature_cols + ['y_scaled']].values
167
  feature_dim = len(feature_cols)
168
 
169
+ train_size = int(len(values) * 0.8)
170
+ test_data = values[train_size:]
171
+
172
+ if len(test_data) < INPUT_LEN + 10:
173
+ return {"error": f"Not enough data for {target_place}"}
174
+
175
+ test_dataset = TrafficDataset(test_data, input_len=INPUT_LEN, feature_dim=feature_dim)
176
  test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
177
 
178
  preds_scaled_all, actuals_scaled_all = [], []
 
198
  congestion_level = determine_congestion_level(mean_predicted_car_count, CONGESTION_THRESHOLDS)
199
 
200
  return {
201
+ "target_place": target_place,
202
+ "mean_absolute_error": float(mae_200),
203
+ "mean_predicted_car_count": float(mean_predicted_car_count),
204
+ "congestion_level": str(congestion_level)
205
+ }
206
+