AxelHolst commited on
Commit
e28725d
·
1 Parent(s): 515b160

Initial deployment

Browse files
Files changed (6) hide show
  1. README.md +42 -4
  2. app.py +346 -0
  3. holidays.py +68 -0
  4. predictor.py +222 -0
  5. requirements.txt +8 -0
  6. weather.py +106 -0
README.md CHANGED
@@ -1,14 +1,52 @@
1
  ---
2
  title: HappySardines
3
- emoji: 🦀
4
- colorFrom: yellow
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  short_description: Predict bus crowding levels in Östergötland, Sweden
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: HappySardines
3
+ emoji: 🐟
4
+ colorFrom: blue
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  short_description: Predict bus crowding levels in Östergötland, Sweden
12
  ---
13
 
14
+ # 🐟 HappySardines
15
+
16
+ **How packed are buses in Östergötland?**
17
+
18
+ Drop a pin on the map, pick a time, and find out how crowded buses typically are in that area. Built with ML using historical transit data from Östgötatrafiken.
19
+
20
+ ## How it works
21
+
22
+ This tool predicts typical bus crowding levels based on:
23
+ - **Location** - Different areas have different ridership patterns
24
+ - **Time** - Rush hours vs. off-peak
25
+ - **Day of week** - Weekdays vs. weekends
26
+ - **Weather** - Temperature, precipitation, etc.
27
+ - **Holidays** - Swedish red days and work-free days
28
+
29
+ ## Data sources
30
+
31
+ - Historical bus occupancy data from Östgötatrafiken (GTFS-RT, Nov-Dec 2025)
32
+ - Weather forecasts from [Open-Meteo](https://open-meteo.com/)
33
+ - Swedish holiday calendar from [Svenska Dagar API](https://sholiday.faboul.se/)
34
+
35
+ ## Limitations
36
+
37
+ - Predictions are based on historical patterns, not real-time data
38
+ - Accuracy varies by location and time
39
+ - The model predicts general area crowding, not specific bus lines
40
+
41
+ ## Technical details
42
+
43
+ - **Model**: XGBoost Classifier trained on ~6M trip records
44
+ - **Features**: Location, time, weather, holidays
45
+ - **Feature Store**: Hopsworks
46
+ - **Framework**: Gradio
47
+
48
+ ## Credits
49
+
50
+ Built for **KTH ID2223 - Scalable Machine Learning and Deep Learning**
51
+
52
+ By: Axel & Kajsa
app.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HappySardines - Bus Occupancy Predictor UI
3
+
4
+ A Gradio app that predicts how crowded buses are in Östergötland based on
5
+ location, time, weather, and holidays.
6
+ """
7
+
8
+ import os
9
+ import gradio as gr
10
+ import folium
11
+ import pandas as pd
12
+ import numpy as np
13
+ from datetime import datetime, timedelta
14
+
15
+ # Import prediction and data fetching modules
16
+ from predictor import predict_occupancy, predict_occupancy_mock, OCCUPANCY_LABELS
17
+ from weather import get_weather_for_prediction
18
+ from holidays import get_holiday_features
19
+
20
+ # Try to load model on startup, fall back to mock
21
+ USE_MOCK = os.environ.get("USE_MOCK", "false").lower() == "true"
22
+
23
+ if not USE_MOCK:
24
+ try:
25
+ from predictor import load_model
26
+ load_model()
27
+ print("Model loaded successfully - using real predictions")
28
+ except Exception as e:
29
+ print(f"Could not load model: {e}")
30
+ print("Using mock predictions for testing")
31
+ USE_MOCK = True
32
+
33
+ # Select predictor function
34
+ _predict_fn = predict_occupancy_mock if USE_MOCK else predict_occupancy
35
+
36
+ # Default map center: Linköping
37
+ DEFAULT_LAT = 58.4108
38
+ DEFAULT_LON = 15.6214
39
+ DEFAULT_ZOOM = 12
40
+
41
+ # Östergötland bounds (roughly)
42
+ BOUNDS = {
43
+ "min_lat": 57.8,
44
+ "max_lat": 58.9,
45
+ "min_lon": 14.5,
46
+ "max_lon": 16.8
47
+ }
48
+
49
+
50
+ def create_map(lat=DEFAULT_LAT, lon=DEFAULT_LON, marker_lat=None, marker_lon=None):
51
+ """Create a Folium map with optional marker."""
52
+ m = folium.Map(
53
+ location=[lat, lon],
54
+ zoom_start=DEFAULT_ZOOM,
55
+ tiles="CartoDB positron"
56
+ )
57
+
58
+ # Add click instruction
59
+ if marker_lat is None:
60
+ folium.Marker(
61
+ [lat, lon],
62
+ popup="Click anywhere on the map to select a location",
63
+ icon=folium.Icon(color="gray", icon="info-sign")
64
+ ).add_to(m)
65
+ else:
66
+ # Add user's selected marker
67
+ folium.Marker(
68
+ [marker_lat, marker_lon],
69
+ popup=f"Selected: {marker_lat:.4f}, {marker_lon:.4f}",
70
+ icon=folium.Icon(color="blue", icon="map-marker", prefix="fa")
71
+ ).add_to(m)
72
+
73
+ return m._repr_html_()
74
+
75
+
76
+ def parse_map_click(map_html, click_data):
77
+ """Parse click coordinates from map interaction."""
78
+ # This is a placeholder - Gradio's map handling varies by version
79
+ # We'll use a simpler approach with coordinate inputs
80
+ return None, None
81
+
82
+
83
+ def make_prediction(lat, lon, date_choice, hour):
84
+ """
85
+ Make occupancy prediction for given inputs.
86
+
87
+ Returns formatted result HTML.
88
+ """
89
+ if lat is None or lon is None:
90
+ return create_result_card(
91
+ "Please select a location",
92
+ "Click on the map or enter coordinates to get a prediction.",
93
+ "gray",
94
+ None
95
+ )
96
+
97
+ # Validate coordinates are in Östergötland
98
+ if not (BOUNDS["min_lat"] <= lat <= BOUNDS["max_lat"] and
99
+ BOUNDS["min_lon"] <= lon <= BOUNDS["max_lon"]):
100
+ return create_result_card(
101
+ "Location outside coverage area",
102
+ "Please select a location within Östergötland.",
103
+ "gray",
104
+ None
105
+ )
106
+
107
+ # Determine date
108
+ today = datetime.now().date()
109
+ if date_choice == "Today":
110
+ selected_date = today
111
+ else: # Tomorrow
112
+ selected_date = today + timedelta(days=1)
113
+
114
+ selected_datetime = datetime.combine(selected_date, datetime.min.time().replace(hour=int(hour)))
115
+
116
+ try:
117
+ # Get weather forecast
118
+ weather = get_weather_for_prediction(lat, lon, selected_datetime)
119
+
120
+ # Get holiday features
121
+ holidays = get_holiday_features(selected_datetime)
122
+
123
+ # Make prediction
124
+ prediction, confidence, probabilities = _predict_fn(
125
+ lat=lat,
126
+ lon=lon,
127
+ hour=int(hour),
128
+ day_of_week=selected_date.weekday(),
129
+ weather=weather,
130
+ holidays=holidays
131
+ )
132
+
133
+ # Format result
134
+ label_info = OCCUPANCY_LABELS[prediction]
135
+
136
+ # Build context string
137
+ day_name = selected_date.strftime("%A")
138
+ day_type = "Holiday" if holidays.get("is_red_day") else ("Work-free day" if holidays.get("is_work_free") else "Regular workday")
139
+ temp = weather.get("temperature_2m", "?")
140
+
141
+ context = f"{temp:.0f}°C • {day_name} • {day_type}"
142
+
143
+ return create_result_card(
144
+ label_info["label"],
145
+ label_info["message"],
146
+ label_info["color"],
147
+ context,
148
+ confidence
149
+ )
150
+
151
+ except Exception as e:
152
+ return create_result_card(
153
+ "Prediction failed",
154
+ f"Error: {str(e)}",
155
+ "gray",
156
+ None
157
+ )
158
+
159
+
160
+ def create_result_card(title, message, color, context, confidence=None):
161
+ """Create HTML result card."""
162
+ color_map = {
163
+ "green": "#22c55e",
164
+ "yellow": "#eab308",
165
+ "orange": "#f97316",
166
+ "red": "#ef4444",
167
+ "gray": "#6b7280"
168
+ }
169
+ bg_color = color_map.get(color, "#6b7280")
170
+
171
+ confidence_html = ""
172
+ if confidence is not None:
173
+ confidence_html = f'<div style="font-size: 0.9em; opacity: 0.8;">Confidence: {confidence:.0%}</div>'
174
+
175
+ context_html = ""
176
+ if context:
177
+ context_html = f'<div style="margin-top: 15px; font-size: 0.9em; opacity: 0.7;">{context}</div>'
178
+
179
+ return f"""
180
+ <div style="
181
+ background: linear-gradient(135deg, {bg_color}22, {bg_color}11);
182
+ border-left: 4px solid {bg_color};
183
+ border-radius: 12px;
184
+ padding: 24px;
185
+ margin: 10px 0;
186
+ ">
187
+ <div style="
188
+ font-size: 1.4em;
189
+ font-weight: 600;
190
+ color: {bg_color};
191
+ margin-bottom: 8px;
192
+ ">{title}</div>
193
+ <div style="
194
+ font-size: 1.1em;
195
+ color: #374151;
196
+ line-height: 1.5;
197
+ ">{message}</div>
198
+ {confidence_html}
199
+ {context_html}
200
+ </div>
201
+ """
202
+
203
+
204
+ def update_map_with_marker(lat, lon):
205
+ """Update map with new marker position."""
206
+ if lat is not None and lon is not None:
207
+ return create_map(lat, lon, lat, lon)
208
+ return create_map()
209
+
210
+
211
+ # Custom CSS
212
+ CUSTOM_CSS = """
213
+ .main-title {
214
+ text-align: center;
215
+ margin-bottom: 0;
216
+ }
217
+ .subtitle {
218
+ text-align: center;
219
+ color: #6b7280;
220
+ margin-top: 5px;
221
+ margin-bottom: 20px;
222
+ }
223
+ """
224
+
225
+ # Build Gradio interface
226
+ with gr.Blocks(title="HappySardines") as app:
227
+
228
+ # Header
229
+ gr.Markdown("# 🐟 HappySardines", elem_classes=["main-title"])
230
+ gr.Markdown("*How packed are buses in Östergötland?*", elem_classes=["subtitle"])
231
+
232
+ with gr.Row():
233
+ # Left column: Map
234
+ with gr.Column(scale=2):
235
+ gr.Markdown("### Select Location")
236
+ gr.Markdown("Enter coordinates or use the map as reference:")
237
+
238
+ map_display = gr.HTML(value=create_map())
239
+
240
+ with gr.Row():
241
+ lat_input = gr.Number(
242
+ label="Latitude",
243
+ value=DEFAULT_LAT,
244
+ precision=6,
245
+ minimum=BOUNDS["min_lat"],
246
+ maximum=BOUNDS["max_lat"]
247
+ )
248
+ lon_input = gr.Number(
249
+ label="Longitude",
250
+ value=DEFAULT_LON,
251
+ precision=6,
252
+ minimum=BOUNDS["min_lon"],
253
+ maximum=BOUNDS["max_lon"]
254
+ )
255
+
256
+ update_map_btn = gr.Button("Update Map", variant="secondary", size="sm")
257
+
258
+ # Right column: Controls
259
+ with gr.Column(scale=1):
260
+ gr.Markdown("### When?")
261
+
262
+ date_choice = gr.Radio(
263
+ choices=["Today", "Tomorrow"],
264
+ value="Today",
265
+ label="Date"
266
+ )
267
+
268
+ hour_slider = gr.Slider(
269
+ minimum=5,
270
+ maximum=23,
271
+ value=8,
272
+ step=1,
273
+ label="Hour",
274
+ info="Select time of day (24h format)"
275
+ )
276
+
277
+ # Show selected time
278
+ time_display = gr.Markdown("**Selected: 08:00**")
279
+
280
+ predict_btn = gr.Button("🔮 Predict Crowding", variant="primary", size="lg")
281
+
282
+ # Result section
283
+ gr.Markdown("### Prediction")
284
+ result_display = gr.HTML(
285
+ value=create_result_card(
286
+ "Select location and time",
287
+ "Then click 'Predict Crowding' to see the forecast.",
288
+ "gray",
289
+ None
290
+ )
291
+ )
292
+
293
+ # About section
294
+ with gr.Accordion("About this tool", open=False):
295
+ gr.Markdown("""
296
+ **How it works:**
297
+
298
+ This tool predicts typical bus crowding levels based on:
299
+ - **Location** - Different areas have different ridership patterns
300
+ - **Time** - Rush hours vs. off-peak
301
+ - **Day of week** - Weekdays vs. weekends
302
+ - **Weather** - Temperature, precipitation, etc.
303
+ - **Holidays** - Swedish red days and work-free days
304
+
305
+ **Data sources:**
306
+ - Historical bus occupancy data from Östgötatrafiken (GTFS-RT, Nov-Dec 2025)
307
+ - Weather forecasts from Open-Meteo
308
+ - Swedish holiday calendar from Svenska Dagar API
309
+
310
+ **Limitations:**
311
+ - Predictions are based on historical patterns, not real-time data
312
+ - Accuracy varies by location and time
313
+ - The model predicts general area crowding, not specific bus lines
314
+
315
+ **Built for KTH ID2223 - Scalable Machine Learning and Deep Learning**
316
+ """)
317
+
318
+ # Event handlers
319
+ def update_time_display(hour):
320
+ return f"**Selected: {int(hour):02d}:00**"
321
+
322
+ hour_slider.change(
323
+ fn=update_time_display,
324
+ inputs=[hour_slider],
325
+ outputs=[time_display]
326
+ )
327
+
328
+ update_map_btn.click(
329
+ fn=update_map_with_marker,
330
+ inputs=[lat_input, lon_input],
331
+ outputs=[map_display]
332
+ )
333
+
334
+ predict_btn.click(
335
+ fn=make_prediction,
336
+ inputs=[lat_input, lon_input, date_choice, hour_slider],
337
+ outputs=[result_display]
338
+ )
339
+
340
+
341
+ # For local testing
342
+ if __name__ == "__main__":
343
+ app.launch(
344
+ theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan"),
345
+ css=CUSTOM_CSS
346
+ )
holidays.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Swedish holiday lookup for HappySardines predictions.
3
+
4
+ Uses Svenska Dagar API to get holiday information.
5
+ """
6
+
7
+ import requests
8
+ from datetime import datetime
9
+
10
+ # Svenska Dagar API
11
+ SVENSKA_DAGAR_API_URL = "https://sholiday.faboul.se/dagar/v2.1"
12
+
13
+
14
+ def get_holiday_features(target_datetime: datetime) -> dict:
15
+ """
16
+ Get holiday features for a specific date.
17
+
18
+ Args:
19
+ target_datetime: Target datetime
20
+
21
+ Returns:
22
+ Dict with holiday features for the model
23
+ """
24
+ try:
25
+ date = target_datetime.date()
26
+ url = f"{SVENSKA_DAGAR_API_URL}/{date.year}/{date.month:02d}/{date.day:02d}"
27
+
28
+ response = requests.get(url, timeout=30)
29
+
30
+ if response.status_code != 200:
31
+ print(f"Holiday API error: {response.status_code}")
32
+ return _default_holidays(target_datetime)
33
+
34
+ data = response.json()
35
+ days = data.get("dagar", [])
36
+
37
+ if not days:
38
+ return _default_holidays(target_datetime)
39
+
40
+ day = days[0]
41
+
42
+ return {
43
+ "is_work_free": day.get("arbetsfri dag") == "Ja",
44
+ "is_red_day": day.get("röd dag") == "Ja",
45
+ "is_day_before_holiday": day.get("dag före arbetsfri helgdag") == "Ja",
46
+ "holiday_name": day.get("helgdag"),
47
+ "day_of_week": int(day.get("dag i vecka", target_datetime.weekday() + 1)) - 1, # Convert to 0-indexed
48
+ }
49
+
50
+ except Exception as e:
51
+ print(f"Error fetching holiday data: {e}")
52
+ return _default_holidays(target_datetime)
53
+
54
+
55
+ def _default_holidays(target_datetime: datetime) -> dict:
56
+ """Return default holiday values based on day of week."""
57
+ day_of_week = target_datetime.weekday()
58
+
59
+ # Weekends are typically work-free
60
+ is_weekend = day_of_week >= 5
61
+
62
+ return {
63
+ "is_work_free": is_weekend,
64
+ "is_red_day": day_of_week == 6, # Sundays are red days
65
+ "is_day_before_holiday": False,
66
+ "holiday_name": None,
67
+ "day_of_week": day_of_week,
68
+ }
predictor.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Model loading and prediction logic for HappySardines.
3
+
4
+ Loads the XGBoost model from Hopsworks Model Registry and makes predictions.
5
+ """
6
+
7
+ import os
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ # Global model cache
12
+ _model = None
13
+ _model_loaded = False
14
+
15
+ # Occupancy class labels with display info
16
+ OCCUPANCY_LABELS = {
17
+ 0: {
18
+ "label": "Empty",
19
+ "message": "Plenty of room - pick any seat!",
20
+ "color": "green",
21
+ "icon": "🟢"
22
+ },
23
+ 1: {
24
+ "label": "Many seats available",
25
+ "message": "Lots of seats to choose from.",
26
+ "color": "green",
27
+ "icon": "🟢"
28
+ },
29
+ 2: {
30
+ "label": "Few seats available",
31
+ "message": "Some seats left - you might need to look around.",
32
+ "color": "yellow",
33
+ "icon": "🟡"
34
+ },
35
+ 3: {
36
+ "label": "Standing room only",
37
+ "message": "Expect to stand - pack your patience!",
38
+ "color": "orange",
39
+ "icon": "🟠"
40
+ },
41
+ 4: {
42
+ "label": "Crushed standing",
43
+ "message": "Very crowded - consider waiting for the next one.",
44
+ "color": "red",
45
+ "icon": "🔴"
46
+ },
47
+ 5: {
48
+ "label": "Full",
49
+ "message": "Bus is full - you may not get on.",
50
+ "color": "red",
51
+ "icon": "🔴"
52
+ },
53
+ 6: {
54
+ "label": "Not accepting passengers",
55
+ "message": "Bus is not accepting passengers.",
56
+ "color": "gray",
57
+ "icon": "⚫"
58
+ }
59
+ }
60
+
61
+ # Feature order expected by the model
62
+ # Must match training pipeline exactly
63
+ FEATURE_ORDER = [
64
+ "avg_speed",
65
+ "max_speed",
66
+ "speed_std",
67
+ "n_positions",
68
+ "lat_mean",
69
+ "lon_mean",
70
+ "hour",
71
+ "day_of_week",
72
+ "temperature_2m",
73
+ "precipitation",
74
+ "cloud_cover",
75
+ "wind_speed_10m",
76
+ "is_work_free",
77
+ "is_red_day",
78
+ "is_day_before_holiday",
79
+ ]
80
+
81
+ # Default values for vehicle features (we don't have real-time vehicle data)
82
+ # These are approximate averages from the training data
83
+ DEFAULT_VEHICLE_FEATURES = {
84
+ "avg_speed": 20.0, # typical urban bus speed (km/h)
85
+ "max_speed": 45.0, # typical max speed
86
+ "speed_std": 12.0, # typical speed variation
87
+ "n_positions": 30, # typical GPS points per trip window
88
+ }
89
+
90
+
91
+ def load_model():
92
+ """
93
+ Load model from Hopsworks Model Registry.
94
+
95
+ Caches the model globally for reuse.
96
+ """
97
+ global _model, _model_loaded
98
+
99
+ if _model_loaded:
100
+ return _model
101
+
102
+ try:
103
+ import hopsworks
104
+ from xgboost import XGBClassifier
105
+
106
+ print("Connecting to Hopsworks...")
107
+ project = hopsworks.login()
108
+ mr = project.get_model_registry()
109
+
110
+ print("Fetching model from registry...")
111
+ model_entry = mr.get_model("occupancy_xgboost_model", version=None) # Latest version
112
+
113
+ print(f"Downloading model version {model_entry.version}...")
114
+ model_dir = model_entry.download()
115
+
116
+ print("Loading XGBoost model...")
117
+ model = XGBClassifier()
118
+ model.load_model(os.path.join(model_dir, "model.json"))
119
+
120
+ _model = model
121
+ _model_loaded = True
122
+ print("Model loaded successfully!")
123
+
124
+ return model
125
+
126
+ except Exception as e:
127
+ print(f"Error loading model: {e}")
128
+ raise
129
+
130
+
131
+ def predict_occupancy(lat, lon, hour, day_of_week, weather, holidays):
132
+ """
133
+ Predict occupancy for given inputs.
134
+
135
+ Args:
136
+ lat: Latitude
137
+ lon: Longitude
138
+ hour: Hour of day (0-23)
139
+ day_of_week: Day of week (0=Monday, 6=Sunday)
140
+ weather: Dict with temperature_2m, precipitation, cloud_cover, wind_speed_10m
141
+ holidays: Dict with is_work_free, is_red_day, is_day_before_holiday
142
+
143
+ Returns:
144
+ Tuple of (predicted_class, confidence, all_probabilities)
145
+ """
146
+ model = load_model()
147
+
148
+ # Assemble feature vector
149
+ features = {
150
+ # Vehicle features - use defaults
151
+ "avg_speed": DEFAULT_VEHICLE_FEATURES["avg_speed"],
152
+ "max_speed": DEFAULT_VEHICLE_FEATURES["max_speed"],
153
+ "speed_std": DEFAULT_VEHICLE_FEATURES["speed_std"],
154
+ "n_positions": DEFAULT_VEHICLE_FEATURES["n_positions"],
155
+
156
+ # Location
157
+ "lat_mean": lat,
158
+ "lon_mean": lon,
159
+
160
+ # Time
161
+ "hour": hour,
162
+ "day_of_week": day_of_week,
163
+
164
+ # Weather
165
+ "temperature_2m": weather.get("temperature_2m", 10.0),
166
+ "precipitation": weather.get("precipitation", 0.0),
167
+ "cloud_cover": weather.get("cloud_cover", 50.0),
168
+ "wind_speed_10m": weather.get("wind_speed_10m", 5.0),
169
+
170
+ # Holidays (convert bool to int)
171
+ "is_work_free": int(holidays.get("is_work_free", False)),
172
+ "is_red_day": int(holidays.get("is_red_day", False)),
173
+ "is_day_before_holiday": int(holidays.get("is_day_before_holiday", False)),
174
+ }
175
+
176
+ # Create DataFrame with correct feature order
177
+ X = pd.DataFrame([features])[FEATURE_ORDER]
178
+
179
+ # Get prediction probabilities
180
+ probabilities = model.predict_proba(X)[0]
181
+
182
+ # Get predicted class (highest probability)
183
+ predicted_class = int(np.argmax(probabilities))
184
+ confidence = float(probabilities[predicted_class])
185
+
186
+ return predicted_class, confidence, probabilities.tolist()
187
+
188
+
189
+ # Mock prediction for testing without Hopsworks
190
+ def predict_occupancy_mock(lat, lon, hour, day_of_week, weather, holidays):
191
+ """
192
+ Mock prediction for testing UI without model.
193
+ """
194
+ # Simple heuristic based on time
195
+ if 7 <= hour <= 9 or 16 <= hour <= 18:
196
+ # Rush hour
197
+ if holidays.get("is_work_free") or holidays.get("is_red_day"):
198
+ predicted_class = 1 # Holiday rush hour = many seats
199
+ else:
200
+ predicted_class = 2 if hour < 8 or hour > 17 else 3 # Peak = standing
201
+ elif 10 <= hour <= 15:
202
+ predicted_class = 1 # Midday = many seats
203
+ else:
204
+ predicted_class = 0 # Early/late = empty
205
+
206
+ # Mock probabilities
207
+ probabilities = [0.1] * 7
208
+ probabilities[predicted_class] = 0.6
209
+ confidence = 0.6
210
+
211
+ return predicted_class, confidence, probabilities
212
+
213
+
214
+ # For testing - use mock if model not available
215
+ def get_predictor():
216
+ """Get the appropriate predictor function."""
217
+ try:
218
+ load_model()
219
+ return predict_occupancy
220
+ except Exception as e:
221
+ print(f"Using mock predictor: {e}")
222
+ return predict_occupancy_mock
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ folium>=0.15.0
3
+ hopsworks
4
+ xgboost>=2.0.0
5
+ pandas
6
+ numpy
7
+ requests
8
+ python-dotenv
weather.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Weather forecast fetching for HappySardines predictions.
3
+
4
+ Uses Open-Meteo API to get weather forecasts.
5
+ """
6
+
7
+ import requests
8
+ from datetime import datetime
9
+
10
+ # Open-Meteo API
11
+ OPENMETEO_FORECAST_URL = "https://api.open-meteo.com/v1/forecast"
12
+
13
+ # Weather variables needed for prediction
14
+ WEATHER_VARIABLES = [
15
+ "temperature_2m",
16
+ "precipitation",
17
+ "cloud_cover",
18
+ "wind_speed_10m",
19
+ ]
20
+
21
+
22
+ def get_weather_for_prediction(lat: float, lon: float, target_datetime: datetime) -> dict:
23
+ """
24
+ Get weather forecast for a specific location and time.
25
+
26
+ Args:
27
+ lat: Latitude
28
+ lon: Longitude
29
+ target_datetime: Target datetime for prediction
30
+
31
+ Returns:
32
+ Dict with weather features for the model
33
+ """
34
+ try:
35
+ # Determine if we need forecast or recent past
36
+ now = datetime.now()
37
+ days_ahead = (target_datetime.date() - now.date()).days
38
+
39
+ # Open-Meteo provides up to 16 days forecast
40
+ if days_ahead > 16:
41
+ print(f"Warning: Date too far in future, using defaults")
42
+ return _default_weather()
43
+
44
+ params = {
45
+ "latitude": lat,
46
+ "longitude": lon,
47
+ "hourly": ",".join(WEATHER_VARIABLES),
48
+ "timezone": "Europe/Stockholm",
49
+ "forecast_days": max(2, days_ahead + 1), # At least today + tomorrow
50
+ }
51
+
52
+ # Include past days if looking at today
53
+ if days_ahead <= 0:
54
+ params["past_days"] = 1
55
+
56
+ response = requests.get(OPENMETEO_FORECAST_URL, params=params, timeout=30)
57
+
58
+ if response.status_code != 200:
59
+ print(f"Weather API error: {response.status_code}")
60
+ return _default_weather()
61
+
62
+ data = response.json()
63
+ hourly = data.get("hourly", {})
64
+
65
+ if not hourly:
66
+ return _default_weather()
67
+
68
+ # Find the matching hour in the response
69
+ times = hourly.get("time", [])
70
+ target_str = target_datetime.strftime("%Y-%m-%dT%H:00")
71
+
72
+ try:
73
+ idx = times.index(target_str)
74
+ except ValueError:
75
+ # Try to find closest hour
76
+ target_hour = target_datetime.hour
77
+ target_date = target_datetime.strftime("%Y-%m-%d")
78
+
79
+ for i, t in enumerate(times):
80
+ if t.startswith(target_date) and f"T{target_hour:02d}:" in t:
81
+ idx = i
82
+ break
83
+ else:
84
+ print(f"Could not find matching time for {target_datetime}")
85
+ return _default_weather()
86
+
87
+ return {
88
+ "temperature_2m": hourly.get("temperature_2m", [None])[idx] or 10.0,
89
+ "precipitation": hourly.get("precipitation", [None])[idx] or 0.0,
90
+ "cloud_cover": hourly.get("cloud_cover", [None])[idx] or 50.0,
91
+ "wind_speed_10m": hourly.get("wind_speed_10m", [None])[idx] or 5.0,
92
+ }
93
+
94
+ except Exception as e:
95
+ print(f"Error fetching weather: {e}")
96
+ return _default_weather()
97
+
98
+
99
+ def _default_weather() -> dict:
100
+ """Return default weather values."""
101
+ return {
102
+ "temperature_2m": 10.0, # Typical Swedish temp
103
+ "precipitation": 0.0,
104
+ "cloud_cover": 50.0,
105
+ "wind_speed_10m": 5.0,
106
+ }