Spaces:
Sleeping
Sleeping
| """ | |
| Model loading and prediction logic for HappySardines. | |
| Loads the XGBoost model from Hopsworks Model Registry and makes predictions. | |
| """ | |
| import os | |
| import numpy as np | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # Global model cache | |
| _model = None | |
| _model_loaded = False | |
| # Occupancy class labels with display info | |
| OCCUPANCY_LABELS = { | |
| 0: { | |
| "label": "Empty", | |
| "message": "Plenty of room - pick any seat!", | |
| "color": "green", | |
| "icon": "🟢" | |
| }, | |
| 1: { | |
| "label": "Many seats available", | |
| "message": "Lots of seats to choose from.", | |
| "color": "green", | |
| "icon": "🟢" | |
| }, | |
| 2: { | |
| "label": "Few seats available", | |
| "message": "Some seats left - you might need to look around.", | |
| "color": "yellow", | |
| "icon": "🟡" | |
| }, | |
| 3: { | |
| "label": "Standing room only", | |
| "message": "Expect to stand - pack your patience!", | |
| "color": "orange", | |
| "icon": "🟠" | |
| }, | |
| 4: { | |
| "label": "Crushed standing", | |
| "message": "Very crowded - consider waiting for the next one.", | |
| "color": "red", | |
| "icon": "🔴" | |
| }, | |
| 5: { | |
| "label": "Full", | |
| "message": "Bus is full - you may not get on.", | |
| "color": "red", | |
| "icon": "🔴" | |
| }, | |
| 6: { | |
| "label": "Not accepting passengers", | |
| "message": "Bus is not accepting passengers.", | |
| "color": "gray", | |
| "icon": "⚫" | |
| } | |
| } | |
| # Feature order expected by the model (occupancy_xgboost_model_new v4) | |
| # Must match training pipeline exactly - includes lat/lon bounds and bearing | |
| FEATURE_ORDER = [ | |
| "trip_id", | |
| "vehicle_id", | |
| "max_speed", | |
| "n_positions", | |
| "lat_min", | |
| "lat_max", | |
| "lat_mean", | |
| "lon_min", | |
| "lon_max", | |
| "lon_mean", | |
| "bearing_min", | |
| "bearing_max", | |
| "hour", | |
| "day_of_week", | |
| "temperature_2m", | |
| "precipitation", | |
| "cloud_cover", | |
| "wind_speed_10m", | |
| "rain", | |
| "snowfall", | |
| "is_work_free", | |
| "is_red_day", | |
| "is_day_before_holiday", | |
| ] | |
| # Default values for vehicle features (we don't have real-time vehicle data) | |
| # These are approximate averages from the training data | |
| DEFAULT_VEHICLE_FEATURES = { | |
| "max_speed": 45.0, # typical max speed | |
| "n_positions": 30, # typical GPS points per trip window | |
| "bearing_min": 0.0, # neutral bearing | |
| "bearing_max": 360.0, # full range (stationary/unknown direction) | |
| } | |
| def load_model(): | |
| """ | |
| Load model from Hopsworks Model Registry. | |
| Caches the model globally for reuse. | |
| """ | |
| global _model, _model_loaded | |
| if _model_loaded: | |
| return _model | |
| # Check for API key before attempting connection | |
| api_key = os.environ.get("HOPSWORKS_API_KEY") | |
| project = os.environ.get("HOPSWORKS_PROJECT") | |
| if not api_key: | |
| raise ValueError("HOPSWORKS_API_KEY environment variable not set. Please add it in Space settings.") | |
| try: | |
| import hopsworks | |
| from xgboost import XGBClassifier | |
| print("Connecting to Hopsworks...") | |
| project = hopsworks.login(project=project, api_key_value=api_key) | |
| mr = project.get_model_registry() | |
| print("Fetching model from registry...") | |
| # Get version 4 explicitly (the model trained with 23 features) | |
| model_entry = mr.get_model("occupancy_xgboost_model_new", version=4) | |
| print(f"Downloading model version {model_entry.version}...") | |
| model_dir = model_entry.download() | |
| print("Loading XGBoost model...") | |
| model = XGBClassifier() | |
| model.load_model(os.path.join(model_dir, "model.json")) | |
| _model = model | |
| _model_loaded = True | |
| print("Model loaded successfully!") | |
| return model | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| raise | |
| def predict_occupancy(lat, lon, hour, day_of_week, weather, holidays): | |
| """ | |
| Predict occupancy for given inputs. | |
| Args: | |
| lat: Latitude | |
| lon: Longitude | |
| hour: Hour of day (0-23) | |
| day_of_week: Day of week (0=Monday, 6=Sunday) | |
| weather: Dict with temperature_2m, precipitation, cloud_cover, wind_speed_10m | |
| holidays: Dict with is_work_free, is_red_day, is_day_before_holiday | |
| Returns: | |
| Tuple of (predicted_class, confidence, all_probabilities) | |
| """ | |
| model = load_model() | |
| # Assemble feature vector | |
| features = { | |
| # Vehicle features - use defaults | |
| "trip_id": 0, # placeholder | |
| "vehicle_id": 0, # placeholder | |
| "max_speed": DEFAULT_VEHICLE_FEATURES["max_speed"], | |
| "n_positions": DEFAULT_VEHICLE_FEATURES["n_positions"], | |
| # Location bounds (set equal to point for single-location prediction) | |
| "lat_min": lat, | |
| "lat_max": lat, | |
| "lat_mean": lat, | |
| "lon_min": lon, | |
| "lon_max": lon, | |
| "lon_mean": lon, | |
| # Bearing (neutral values for point prediction) | |
| "bearing_min": DEFAULT_VEHICLE_FEATURES["bearing_min"], | |
| "bearing_max": DEFAULT_VEHICLE_FEATURES["bearing_max"], | |
| # Time | |
| "hour": hour, | |
| "day_of_week": day_of_week, | |
| # Weather | |
| "temperature_2m": weather.get("temperature_2m", 10.0), | |
| "precipitation": weather.get("precipitation", 0.0), | |
| "cloud_cover": weather.get("cloud_cover", 50.0), | |
| "wind_speed_10m": weather.get("wind_speed_10m", 5.0), | |
| "rain": weather.get("rain", 0.0), | |
| "snowfall": weather.get("snowfall", 0.0), | |
| # Holidays (convert bool to int) | |
| "is_work_free": int(holidays.get("is_work_free", False)), | |
| "is_red_day": int(holidays.get("is_red_day", False)), | |
| "is_day_before_holiday": int(holidays.get("is_day_before_holiday", False)), | |
| } | |
| # Create DataFrame with correct feature order | |
| X = pd.DataFrame([features])[FEATURE_ORDER] | |
| # Get prediction probabilities | |
| probabilities = model.predict_proba(X)[0] | |
| # Get predicted class (highest probability) | |
| predicted_class = int(np.argmax(probabilities)) | |
| confidence = float(probabilities[predicted_class]) | |
| return predicted_class, confidence, probabilities.tolist() | |
| def predict_occupancy_batch(locations, hour, day_of_week, weather, holidays): | |
| """ | |
| Predict occupancy for multiple locations in a single batch. | |
| Much faster than calling predict_occupancy() in a loop. | |
| Args: | |
| locations: List of (lat, lon) tuples | |
| hour: Hour of day (0-23) | |
| day_of_week: Day of week (0=Monday, 6=Sunday) | |
| weather: Dict with temperature_2m, precipitation, cloud_cover, wind_speed_10m | |
| holidays: Dict with is_work_free, is_red_day, is_day_before_holiday | |
| Returns: | |
| List of (predicted_class, confidence) tuples | |
| """ | |
| model = load_model() | |
| # Build all feature rows at once | |
| rows = [] | |
| for lat, lon in locations: | |
| rows.append({ | |
| "trip_id": 0, | |
| "vehicle_id": 0, | |
| "max_speed": DEFAULT_VEHICLE_FEATURES["max_speed"], | |
| "n_positions": DEFAULT_VEHICLE_FEATURES["n_positions"], | |
| "lat_min": lat, | |
| "lat_max": lat, | |
| "lat_mean": lat, | |
| "lon_min": lon, | |
| "lon_max": lon, | |
| "lon_mean": lon, | |
| "bearing_min": DEFAULT_VEHICLE_FEATURES["bearing_min"], | |
| "bearing_max": DEFAULT_VEHICLE_FEATURES["bearing_max"], | |
| "hour": hour, | |
| "day_of_week": day_of_week, | |
| "temperature_2m": weather.get("temperature_2m", 10.0), | |
| "precipitation": weather.get("precipitation", 0.0), | |
| "cloud_cover": weather.get("cloud_cover", 50.0), | |
| "wind_speed_10m": weather.get("wind_speed_10m", 5.0), | |
| "rain": weather.get("rain", 0.0), | |
| "snowfall": weather.get("snowfall", 0.0), | |
| "is_work_free": int(holidays.get("is_work_free", False)), | |
| "is_red_day": int(holidays.get("is_red_day", False)), | |
| "is_day_before_holiday": int(holidays.get("is_day_before_holiday", False)), | |
| }) | |
| # Single DataFrame, single predict call | |
| X = pd.DataFrame(rows)[FEATURE_ORDER] | |
| probabilities = model.predict_proba(X) | |
| # Extract results | |
| results = [] | |
| for i, (lat, lon) in enumerate(locations): | |
| probs = probabilities[i] | |
| predicted_class = int(np.argmax(probs)) | |
| confidence = float(probs[predicted_class]) | |
| results.append((predicted_class, confidence)) | |
| return results | |