agentsay commited on
Commit
49d77bc
·
verified ·
1 Parent(s): fa2ea69

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ observation.csv filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official PyTorch runtime (includes CUDA for GPU)
2
+ FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn8-runtime
3
+
4
+ # Set environment
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PYTHONDONTWRITEBYTECODE=1 \
7
+ PORT=7860 \
8
+ HOST=0.0.0.0
9
+
10
+ # Create working directory
11
+ WORKDIR /app
12
+
13
+ # Copy files
14
+ COPY . /app
15
+
16
+ # Install dependencies
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Expose port for Hugging Face
20
+ EXPOSE 7860
21
+
22
+ # Start FastAPI app using Uvicorn
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import numpy as np
4
+ import pandas as pd
5
+ import torch
6
+ import torch.nn as nn
7
+ from torch.utils.data import Dataset, DataLoader
8
+ from sklearn.preprocessing import LabelEncoder, MinMaxScaler
9
+ from sklearn.metrics import mean_absolute_error
10
+ from fastapi import FastAPI, HTTPException
11
+ from pydantic import BaseModel
12
+
13
+ # ================= CONFIG =================
14
+ CSV_PATH = "observation.csv"
15
+ SAVE_FULL = "traffic_transformer_model_2.pth"
16
+ INPUT_LEN = 72
17
+ BATCH_SIZE = 128
18
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
19
+
20
+ CONGESTION_THRESHOLDS = {
21
+ 'LOW': 200,
22
+ 'MEDIUM': 300
23
+ }
24
+
25
+ # ================= UTILS =================
26
+ def determine_congestion_level(mean_count, thresholds):
27
+ if mean_count <= thresholds['LOW']:
28
+ return "Low"
29
+ elif mean_count <= thresholds['MEDIUM']:
30
+ return "Medium"
31
+ else:
32
+ return "High"
33
+
34
+ # ================= MODEL =================
35
+ class TransformerForecaster(nn.Module):
36
+ def __init__(self, input_size, hidden_size=256, num_heads=8, dropout=0.1, seq_len=INPUT_LEN):
37
+ super().__init__()
38
+ self.pos_embedding = nn.Parameter(torch.randn(1, seq_len, input_size) * 0.01)
39
+ self.layer_norm = nn.LayerNorm(input_size)
40
+ encoder_layer = nn.TransformerEncoderLayer(
41
+ d_model=input_size,
42
+ nhead=num_heads,
43
+ dim_feedforward=hidden_size,
44
+ dropout=dropout,
45
+ batch_first=True,
46
+ activation='gelu'
47
+ )
48
+ self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=3)
49
+ self.fc = nn.Sequential(
50
+ nn.Linear(input_size, hidden_size),
51
+ nn.ReLU(),
52
+ nn.Dropout(dropout),
53
+ nn.Linear(hidden_size, 1)
54
+ )
55
+
56
+ def forward(self, x):
57
+ x = x + self.pos_embedding[:, :x.size(1), :]
58
+ x = self.layer_norm(x)
59
+ x = self.transformer(x)
60
+ x = x[:, -1, :]
61
+ return self.fc(x).squeeze(-1)
62
+
63
+ class TrafficDataset(Dataset):
64
+ def __init__(self, data, input_len=INPUT_LEN, feature_dim=None):
65
+ self.X, self.y = [], []
66
+ for i in range(len(data) - input_len):
67
+ self.X.append(data[i:i + input_len, :feature_dim])
68
+ self.y.append(data[i + input_len, -1])
69
+ self.X = torch.tensor(np.array(self.X), dtype=torch.float32)
70
+ self.y = torch.tensor(np.array(self.y), dtype=torch.float32)
71
+
72
+ def __len__(self):
73
+ return len(self.X)
74
+
75
+ def __getitem__(self, idx):
76
+ return self.X[idx], self.y[idx]
77
+
78
+ # ================= FASTAPI APP =================
79
+ app = FastAPI(title="Traffic Congestion API", version="1.0")
80
+
81
+ class InputData(BaseModel):
82
+ target_place: str
83
+
84
+ print("🔁 Loading and preparing data...")
85
+
86
+ try:
87
+ df = pd.read_csv(CSV_PATH)
88
+ except FileNotFoundError:
89
+ raise FileNotFoundError(f"File not found at {CSV_PATH}")
90
+
91
+ # ========== DATA PREPROCESSING ==========
92
+ df = df.drop(columns=["Datetime"], errors="ignore")
93
+
94
+ def extract_hour(timeslot):
95
+ if pd.isna(timeslot): return None
96
+ try:
97
+ start_time = timeslot.split('-')[0].strip()
98
+ start_time = re.sub(r'(?i)(AM|PM)', r' \1', start_time).strip().upper()
99
+ return pd.to_datetime(start_time, format='%I:%M %p').hour
100
+ except:
101
+ try:
102
+ return pd.to_datetime(start_time, format='%H:%M').hour
103
+ except:
104
+ return None
105
+
106
+ df['StartHour'] = df['TimeSlot'].apply(extract_hour)
107
+ df.dropna(subset=['StartHour'], inplace=True)
108
+ df['StartHour'] = df['StartHour'].astype(int)
109
+
110
+ le_to = LabelEncoder()
111
+ df['To_encoded'] = le_to.fit_transform(df['To'])
112
+
113
+ df['DayOfYear'] = (df['Month'] - 1) * 30 + df['Day']
114
+ df['Day_sin'] = np.sin(2 * np.pi * df['DayOfYear'] / 365)
115
+ df['Day_cos'] = np.cos(2 * np.pi * df['DayOfYear'] / 365)
116
+ df['Hour_sin'] = np.sin(2 * np.pi * df['StartHour'] / 24)
117
+ df['Hour_cos'] = np.cos(2 * np.pi * df['StartHour'] / 24)
118
+
119
+ df = df.sort_values(['To', 'Year', 'Month', 'Day', 'StartHour']).reset_index(drop=True)
120
+
121
+ for lag in [1, 2, 3, 6, 12, 24]:
122
+ df[f'Lag_{lag}'] = df.groupby('To')['CarCount'].shift(lag)
123
+
124
+ df['MA_3'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(3).mean())
125
+ df['EMA_5'] = df.groupby('To')['CarCount'].transform(lambda x: x.ewm(span=5, adjust=False).mean())
126
+ df['ROLL12_mean'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(12).mean())
127
+ df['ROLL12_std'] = df.groupby('To')['CarCount'].transform(lambda x: x.rolling(12).std())
128
+ df['Diff_1'] = df.groupby('To')['CarCount'].diff(1)
129
+
130
+ df.dropna(inplace=True)
131
+ df.reset_index(drop=True, inplace=True)
132
+
133
+ feature_cols = [
134
+ 'StartHour', 'Hour_sin', 'Hour_cos', 'Day_sin', 'Day_cos',
135
+ 'To_encoded', 'MA_3', 'EMA_5', 'ROLL12_mean', 'ROLL12_std', 'Diff_1'
136
+ ] + [f'Lag_{i}' for i in [1, 2, 3, 6, 12, 24]]
137
+
138
+ scaler_X = MinMaxScaler()
139
+ df[feature_cols] = scaler_X.fit_transform(df[feature_cols])
140
+
141
+ scaler_y = MinMaxScaler()
142
+ df['y_scaled'] = scaler_y.fit_transform(df[['CarCount']])
143
+
144
+ # ========== MODEL LOADING ==========
145
+ try:
146
+ model_loaded = torch.load(SAVE_FULL, map_location=DEVICE)
147
+ model_loaded.eval()
148
+ print(f"✅ Model loaded successfully from {SAVE_FULL}")
149
+ except Exception as e:
150
+ raise RuntimeError(f"Failed to load model: {e}")
151
+
152
+ # ================= API ENDPOINT =================
153
+ @app.post("/predict")
154
+ def predict_congestion(data: InputData):
155
+ target_place = data.target_place.strip()
156
+ df_target = df[df['To'] == target_place].copy()
157
+
158
+ if df_target.empty:
159
+ raise HTTPException(status_code=404, detail=f"No data found for location: {target_place}")
160
+
161
+ values = df_target[feature_cols + ['y_scaled']].values
162
+ feature_dim = len(feature_cols)
163
+ train_size = int(len(values) * 0.8)
164
+ test_data = values[train_size:]
165
+
166
+ if len(test_data) < INPUT_LEN + 50:
167
+ raise HTTPException(status_code=400, detail=f"Not enough data for {target_place}")
168
+
169
+ test_dataset = TrafficDataset(test_data, input_len=INPUT_LEN, feature_dim=feature_dim)
170
+ test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
171
+
172
+ preds_scaled_all, actuals_scaled_all = [], []
173
+
174
+ with torch.no_grad():
175
+ for X_batch, y_batch in test_loader:
176
+ X_batch = X_batch.to(DEVICE)
177
+ y_pred = model_loaded(X_batch).view(-1).cpu().numpy()
178
+ preds_scaled_all.extend(y_pred)
179
+ actuals_scaled_all.extend(y_batch.view(-1).cpu().numpy())
180
+ if len(preds_scaled_all) >= 200:
181
+ break
182
+
183
+ preds_all = scaler_y.inverse_transform(np.array(preds_scaled_all).reshape(-1, 1)).flatten()
184
+ mean_predicted_car_count = float(np.mean(preds_all[:200]))
185
+ congestion_level = determine_congestion_level(mean_predicted_car_count, CONGESTION_THRESHOLDS)
186
+
187
+ return {
188
+ "target_place": target_place,
189
+ "mean_predicted_car_count": round(mean_predicted_car_count, 2),
190
+ "inferred_congestion_level": congestion_level
191
+ }
192
+
observation.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf32a2b56af6bd5f1cc9bfba4636e1dfb1b1c86f8b28b284bc4c53d2fc9b23ce
3
+ size 41704189
requirements.txt ADDED
File without changes
traffic_transformer_model_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce350fdb9766d1a01bf225497766b4a3d583b9c4ff9065875a7a04481f6e2b4a
3
+ size 172097