Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import time | |
| import requests | |
| from math import radians, sin, cos, sqrt, atan2 | |
| import random | |
| def haversine_distance(lat1, lon1, lat2, lon2): | |
| """ | |
| Calculate the Haversine distance between two points in kilometers. | |
| The Haversine distance is the great-circle distance between two points on a sphere. | |
| Parameters: | |
| ----------- | |
| lat1, lon1 : float | |
| Coordinates of the first point in decimal degrees | |
| lat2, lon2 : float | |
| Coordinates of the second point in decimal degrees | |
| Returns: | |
| -------- | |
| float | |
| Distance between the two points in kilometers | |
| """ | |
| # Convert decimal degrees to radians | |
| lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2]) | |
| # Haversine formula | |
| dlon = lon2 - lon1 | |
| dlat = lat2 - lat1 | |
| a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 | |
| c = 2 * atan2(sqrt(a), sqrt(1-a)) | |
| distance = 6371 * c # Radius of Earth in kilometers | |
| return distance | |
| def get_road_distance_with_retry(origin, destination, max_retries=3, initial_backoff=1): | |
| """ | |
| Get road distance between two points with retry logic | |
| Parameters: | |
| ----------- | |
| origin : dict | |
| Origin location with 'latitude' and 'longitude' keys | |
| destination : dict | |
| Destination location with 'latitude' and 'longitude' keys | |
| max_retries : int | |
| Maximum number of retry attempts | |
| initial_backoff : int | |
| Initial backoff time in seconds | |
| Returns: | |
| -------- | |
| tuple of (float, float) | |
| Distance in km and duration in minutes | |
| """ | |
| # URLs for different public OSRM instances to distribute load | |
| osrm_urls = [ | |
| "http://router.project-osrm.org", | |
| "https://routing.openstreetmap.de", | |
| # Add more public OSRM servers if available | |
| ] | |
| retry_count = 0 | |
| backoff = initial_backoff | |
| while retry_count < max_retries: | |
| try: | |
| # Use a random OSRM server from the list to distribute load | |
| base_url = random.choice(osrm_urls) | |
| url = f"{base_url}/route/v1/driving/{origin['longitude']},{origin['latitude']};{destination['longitude']},{destination['latitude']}?overview=false" | |
| # Add a timeout to prevent hanging connections | |
| response = requests.get(url, timeout=5) | |
| data = response.json() | |
| if data.get('code') == 'Ok': | |
| # Extract distance and duration | |
| distance = data['routes'][0]['distance'] / 1000 # meters to km | |
| duration = data['routes'][0]['duration'] / 60 # seconds to minutes | |
| return round(distance, 2), round(duration, 2) | |
| else: | |
| print(f"API returned error: {data.get('message', 'Unknown error')}") | |
| except requests.exceptions.RequestException as e: | |
| print(f"Request failed: {e}. Retry {retry_count+1}/{max_retries}") | |
| # Exponential backoff with jitter to prevent thundering herd | |
| jitter = random.uniform(0, 0.5 * backoff) | |
| sleep_time = backoff + jitter | |
| time.sleep(sleep_time) | |
| backoff *= 2 # Exponential backoff | |
| retry_count += 1 | |
| # Fallback to haversine after all retries failed | |
| print(f"All retries failed for route from ({origin['latitude']},{origin['longitude']}) to ({destination['latitude']},{destination['longitude']}). Using haversine distance.") | |
| distance = haversine_distance( | |
| origin['latitude'], origin['longitude'], | |
| destination['latitude'], destination['longitude'] | |
| ) | |
| distance = distance * 1.3 # Road factor | |
| time_mins = (distance / 40) * 60 # 40 km/h | |
| return round(distance, 2), round(time_mins, 2) | |
| def get_road_distance(origins, destinations, use_osrm=True): | |
| """ | |
| Calculate actual road distances and travel times between multiple origins and destinations | |
| using the OSRM (Open Source Routing Machine) API. | |
| Parameters: | |
| ----------- | |
| origins : list of dict | |
| List of origin locations with 'latitude' and 'longitude' keys | |
| destinations : list of dict | |
| List of destination locations with 'latitude' and 'longitude' keys | |
| use_osrm : bool, default=True | |
| Whether to use OSRM API or fall back to haversine distance | |
| Returns: | |
| -------- | |
| tuple of (numpy.ndarray, numpy.ndarray) | |
| Arrays containing distances (in km) and durations (in minutes) between each origin-destination pair | |
| """ | |
| n_origins = len(origins) | |
| n_destinations = len(destinations) | |
| distance_matrix = np.zeros((n_origins, n_destinations)) | |
| duration_matrix = np.zeros((n_origins, n_destinations)) | |
| # If OSRM is not requested, fall back to haversine distance | |
| if not use_osrm: | |
| print("Using haversine distance as fallback.") | |
| for i, origin in enumerate(origins): | |
| for j, dest in enumerate(destinations): | |
| distance = haversine_distance( | |
| origin['latitude'], origin['longitude'], | |
| dest['latitude'], dest['longitude'] | |
| ) | |
| # Adjust for road networks (roads are typically not straight lines) | |
| distance = distance * 1.3 # Apply a factor to approximate road distance | |
| time_mins = (distance / 40) * 60 # Assuming average speed of 40 km/h | |
| distance_matrix[i, j] = round(distance, 2) | |
| duration_matrix[i, j] = round(time_mins, 2) | |
| return distance_matrix, duration_matrix | |
| # Process in batches to prevent overwhelming the API | |
| print(f"Processing {n_origins} origins and {n_destinations} destinations in batches...") | |
| total_requests = n_origins * n_destinations | |
| completed = 0 | |
| try: | |
| # Try OSRM's table service for small datasets first (more efficient) | |
| if n_origins + n_destinations <= 50: | |
| print("Trying OSRM table API for efficient matrix calculation...") | |
| try: | |
| # Code for table API would go here, but we'll skip for now as it's more complex | |
| # and the batch approach is more reliable for handling errors | |
| raise NotImplementedError("Table API not implemented, falling back to individual routes") | |
| except Exception as e: | |
| print(f"Table API failed: {e}. Using individual routes instead.") | |
| # Continue with individual route requests below | |
| # Process with individual route requests | |
| for i, origin in enumerate(origins): | |
| for j, dest in enumerate(destinations): | |
| # Skip if origin and destination are the same point | |
| if i == j: | |
| distance_matrix[i, j] = 0 | |
| duration_matrix[i, j] = 0 | |
| completed += 1 | |
| continue | |
| # Get distance with retry logic | |
| distance, duration = get_road_distance_with_retry(origin, dest) | |
| distance_matrix[i, j] = distance | |
| duration_matrix[i, j] = duration | |
| # Show progress | |
| completed += 1 | |
| if completed % 10 == 0: | |
| print(f"Progress: {completed}/{total_requests} routes calculated ({(completed/total_requests)*100:.1f}%)") | |
| # Add randomized delay to prevent overwhelming the API | |
| time.sleep(random.uniform(0.1, 0.5)) | |
| except KeyboardInterrupt: | |
| print("\nOperation interrupted by user. Saving partial results...") | |
| return distance_matrix, duration_matrix | |
| def generate_travel_matrix(use_osrm=True): | |
| """ | |
| Generate travel time and distance matrices between all locations in the delivery problem. | |
| Parameters: | |
| ----------- | |
| use_osrm : bool, default=True | |
| Whether to use OSRM API for real road distances instead of haversine | |
| Returns: | |
| -------- | |
| tuple of (pd.DataFrame, pd.DataFrame, dict) | |
| Distance matrix, base time matrix, and hourly time matrices | |
| """ | |
| # Create data directories if they don't exist | |
| data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data') | |
| time_matrix_dir = os.path.join(data_dir, 'time-matrix') | |
| delivery_data_dir = os.path.join(data_dir, 'delivery-data') | |
| vehicle_data_dir = os.path.join(data_dir, 'vehicle-data') | |
| # Ensure all directories exist | |
| for directory in [time_matrix_dir, delivery_data_dir, vehicle_data_dir]: | |
| os.makedirs(directory, exist_ok=True) | |
| # Read delivery and vehicle data | |
| try: | |
| delivery_data = pd.read_csv(os.path.join(delivery_data_dir, 'delivery_data.csv')) | |
| vehicle_data = pd.read_csv(os.path.join(vehicle_data_dir, 'vehicle_data.csv')) | |
| except FileNotFoundError: | |
| print("Error: Please generate delivery and vehicle data first!") | |
| return | |
| # Extract locations | |
| delivery_locations = delivery_data[['delivery_id', 'latitude', 'longitude']].values | |
| depot_locations = vehicle_data[['vehicle_id', 'depot_latitude', 'depot_longitude']].values | |
| # Average speed for time calculation (km/h) | |
| avg_speed = vehicle_data['avg_speed_kmh'].mean() | |
| # Traffic factor matrix (to simulate traffic conditions at different times) | |
| hours_in_day = 24 | |
| traffic_factors = np.ones((hours_in_day, 1)) | |
| # Simulate morning rush hour (8-10 AM) | |
| traffic_factors[8:10] = 1.5 | |
| # Simulate evening rush hour (5-7 PM) | |
| traffic_factors[17:19] = 1.8 | |
| # Late night (less traffic) | |
| traffic_factors[22:] = 0.8 | |
| traffic_factors[:5] = 0.7 | |
| # Create a combined list of all locations (depots + delivery points) | |
| all_locations = [] | |
| # Add depot locations | |
| for row in depot_locations: | |
| all_locations.append({ | |
| 'id': row[0], # vehicle_id as location id | |
| 'type': 'depot', | |
| 'latitude': row[1], | |
| 'longitude': row[2] | |
| }) | |
| # Add delivery locations | |
| for row in delivery_locations: | |
| all_locations.append({ | |
| 'id': row[0], # delivery_id as location id | |
| 'type': 'delivery', | |
| 'latitude': row[1], | |
| 'longitude': row[2] | |
| }) | |
| print(f"Calculating distances between {len(all_locations)} locations...") | |
| # Save the locations file early so we have this data even if the process is interrupted | |
| location_df = pd.DataFrame(all_locations) | |
| location_df.to_csv(os.path.join(time_matrix_dir, 'locations.csv'), index=False) | |
| # Calculate distances and times using OSRM with improved error handling | |
| if use_osrm: | |
| print("Using OSRM API for road distances...") | |
| distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=True) | |
| else: | |
| print("Using haversine distance with road factor adjustment...") | |
| distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=False) | |
| # Create DataFrames for the matrices | |
| location_ids = [loc['id'] for loc in all_locations] | |
| distance_df = pd.DataFrame(distance_matrix, index=location_ids, columns=location_ids) | |
| time_df = pd.DataFrame(base_time_matrix, index=location_ids, columns=location_ids) | |
| # Save distance and base time matrices early in case later steps fail | |
| distance_df.to_csv(os.path.join(time_matrix_dir, 'distance_matrix.csv')) | |
| time_df.to_csv(os.path.join(time_matrix_dir, 'base_time_matrix.csv')) | |
| print("Basic distance and time matrices saved successfully.") | |
| # Create time matrices for different hours of the day | |
| hourly_time_matrices = {} | |
| for hour in range(24): | |
| traffic_factor = traffic_factors[hour][0] | |
| hourly_time = base_time_matrix * traffic_factor | |
| hourly_time_matrices[f"{hour:02d}:00"] = pd.DataFrame(hourly_time, index=location_ids, columns=location_ids) | |
| # Save a sample of time matrices (e.g., rush hour and normal time) | |
| try: | |
| hourly_time_matrices['08:00'].to_csv(os.path.join(time_matrix_dir, 'morning_rush_time_matrix.csv')) | |
| hourly_time_matrices['18:00'].to_csv(os.path.join(time_matrix_dir, 'evening_rush_time_matrix.csv')) | |
| hourly_time_matrices['12:00'].to_csv(os.path.join(time_matrix_dir, 'midday_time_matrix.csv')) | |
| hourly_time_matrices['00:00'].to_csv(os.path.join(time_matrix_dir, 'night_time_matrix.csv')) | |
| print("Time matrices for different hours saved successfully.") | |
| except Exception as e: | |
| print(f"Error saving hourly time matrices: {e}") | |
| print("Continuing with basic matrices only.") | |
| print("Travel matrices generation complete.") | |
| return distance_df, time_df, hourly_time_matrices | |
| if __name__ == "__main__": | |
| # For development, allow falling back to haversine if needed | |
| import argparse | |
| parser = argparse.ArgumentParser(description="Generate travel matrices for delivery route optimization") | |
| parser.add_argument("--use-osrm", action="store_true", help="Use OSRM API for real road distances") | |
| parser.add_argument("--use-haversine", action="store_true", help="Use haversine distance only (faster)") | |
| args = parser.parse_args() | |
| if args.use_haversine: | |
| generate_travel_matrix(use_osrm=False) | |
| else: | |
| # Default to OSRM unless explicitly disabled | |
| generate_travel_matrix(use_osrm=True) |