DeliveryRouteOptimisation / src /utils /generate_travel_matrix.py
Jing997's picture
initial commit
13ad395
import pandas as pd
import numpy as np
import os
import time
import requests
from math import radians, sin, cos, sqrt, atan2
import random
def haversine_distance(lat1, lon1, lat2, lon2):
"""
Calculate the Haversine distance between two points in kilometers.
The Haversine distance is the great-circle distance between two points on a sphere.
Parameters:
-----------
lat1, lon1 : float
Coordinates of the first point in decimal degrees
lat2, lon2 : float
Coordinates of the second point in decimal degrees
Returns:
--------
float
Distance between the two points in kilometers
"""
# Convert decimal degrees to radians
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
# Haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * atan2(sqrt(a), sqrt(1-a))
distance = 6371 * c # Radius of Earth in kilometers
return distance
def get_road_distance_with_retry(origin, destination, max_retries=3, initial_backoff=1):
"""
Get road distance between two points with retry logic
Parameters:
-----------
origin : dict
Origin location with 'latitude' and 'longitude' keys
destination : dict
Destination location with 'latitude' and 'longitude' keys
max_retries : int
Maximum number of retry attempts
initial_backoff : int
Initial backoff time in seconds
Returns:
--------
tuple of (float, float)
Distance in km and duration in minutes
"""
# URLs for different public OSRM instances to distribute load
osrm_urls = [
"http://router.project-osrm.org",
"https://routing.openstreetmap.de",
# Add more public OSRM servers if available
]
retry_count = 0
backoff = initial_backoff
while retry_count < max_retries:
try:
# Use a random OSRM server from the list to distribute load
base_url = random.choice(osrm_urls)
url = f"{base_url}/route/v1/driving/{origin['longitude']},{origin['latitude']};{destination['longitude']},{destination['latitude']}?overview=false"
# Add a timeout to prevent hanging connections
response = requests.get(url, timeout=5)
data = response.json()
if data.get('code') == 'Ok':
# Extract distance and duration
distance = data['routes'][0]['distance'] / 1000 # meters to km
duration = data['routes'][0]['duration'] / 60 # seconds to minutes
return round(distance, 2), round(duration, 2)
else:
print(f"API returned error: {data.get('message', 'Unknown error')}")
except requests.exceptions.RequestException as e:
print(f"Request failed: {e}. Retry {retry_count+1}/{max_retries}")
# Exponential backoff with jitter to prevent thundering herd
jitter = random.uniform(0, 0.5 * backoff)
sleep_time = backoff + jitter
time.sleep(sleep_time)
backoff *= 2 # Exponential backoff
retry_count += 1
# Fallback to haversine after all retries failed
print(f"All retries failed for route from ({origin['latitude']},{origin['longitude']}) to ({destination['latitude']},{destination['longitude']}). Using haversine distance.")
distance = haversine_distance(
origin['latitude'], origin['longitude'],
destination['latitude'], destination['longitude']
)
distance = distance * 1.3 # Road factor
time_mins = (distance / 40) * 60 # 40 km/h
return round(distance, 2), round(time_mins, 2)
def get_road_distance(origins, destinations, use_osrm=True):
"""
Calculate actual road distances and travel times between multiple origins and destinations
using the OSRM (Open Source Routing Machine) API.
Parameters:
-----------
origins : list of dict
List of origin locations with 'latitude' and 'longitude' keys
destinations : list of dict
List of destination locations with 'latitude' and 'longitude' keys
use_osrm : bool, default=True
Whether to use OSRM API or fall back to haversine distance
Returns:
--------
tuple of (numpy.ndarray, numpy.ndarray)
Arrays containing distances (in km) and durations (in minutes) between each origin-destination pair
"""
n_origins = len(origins)
n_destinations = len(destinations)
distance_matrix = np.zeros((n_origins, n_destinations))
duration_matrix = np.zeros((n_origins, n_destinations))
# If OSRM is not requested, fall back to haversine distance
if not use_osrm:
print("Using haversine distance as fallback.")
for i, origin in enumerate(origins):
for j, dest in enumerate(destinations):
distance = haversine_distance(
origin['latitude'], origin['longitude'],
dest['latitude'], dest['longitude']
)
# Adjust for road networks (roads are typically not straight lines)
distance = distance * 1.3 # Apply a factor to approximate road distance
time_mins = (distance / 40) * 60 # Assuming average speed of 40 km/h
distance_matrix[i, j] = round(distance, 2)
duration_matrix[i, j] = round(time_mins, 2)
return distance_matrix, duration_matrix
# Process in batches to prevent overwhelming the API
print(f"Processing {n_origins} origins and {n_destinations} destinations in batches...")
total_requests = n_origins * n_destinations
completed = 0
try:
# Try OSRM's table service for small datasets first (more efficient)
if n_origins + n_destinations <= 50:
print("Trying OSRM table API for efficient matrix calculation...")
try:
# Code for table API would go here, but we'll skip for now as it's more complex
# and the batch approach is more reliable for handling errors
raise NotImplementedError("Table API not implemented, falling back to individual routes")
except Exception as e:
print(f"Table API failed: {e}. Using individual routes instead.")
# Continue with individual route requests below
# Process with individual route requests
for i, origin in enumerate(origins):
for j, dest in enumerate(destinations):
# Skip if origin and destination are the same point
if i == j:
distance_matrix[i, j] = 0
duration_matrix[i, j] = 0
completed += 1
continue
# Get distance with retry logic
distance, duration = get_road_distance_with_retry(origin, dest)
distance_matrix[i, j] = distance
duration_matrix[i, j] = duration
# Show progress
completed += 1
if completed % 10 == 0:
print(f"Progress: {completed}/{total_requests} routes calculated ({(completed/total_requests)*100:.1f}%)")
# Add randomized delay to prevent overwhelming the API
time.sleep(random.uniform(0.1, 0.5))
except KeyboardInterrupt:
print("\nOperation interrupted by user. Saving partial results...")
return distance_matrix, duration_matrix
def generate_travel_matrix(use_osrm=True):
"""
Generate travel time and distance matrices between all locations in the delivery problem.
Parameters:
-----------
use_osrm : bool, default=True
Whether to use OSRM API for real road distances instead of haversine
Returns:
--------
tuple of (pd.DataFrame, pd.DataFrame, dict)
Distance matrix, base time matrix, and hourly time matrices
"""
# Create data directories if they don't exist
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data')
time_matrix_dir = os.path.join(data_dir, 'time-matrix')
delivery_data_dir = os.path.join(data_dir, 'delivery-data')
vehicle_data_dir = os.path.join(data_dir, 'vehicle-data')
# Ensure all directories exist
for directory in [time_matrix_dir, delivery_data_dir, vehicle_data_dir]:
os.makedirs(directory, exist_ok=True)
# Read delivery and vehicle data
try:
delivery_data = pd.read_csv(os.path.join(delivery_data_dir, 'delivery_data.csv'))
vehicle_data = pd.read_csv(os.path.join(vehicle_data_dir, 'vehicle_data.csv'))
except FileNotFoundError:
print("Error: Please generate delivery and vehicle data first!")
return
# Extract locations
delivery_locations = delivery_data[['delivery_id', 'latitude', 'longitude']].values
depot_locations = vehicle_data[['vehicle_id', 'depot_latitude', 'depot_longitude']].values
# Average speed for time calculation (km/h)
avg_speed = vehicle_data['avg_speed_kmh'].mean()
# Traffic factor matrix (to simulate traffic conditions at different times)
hours_in_day = 24
traffic_factors = np.ones((hours_in_day, 1))
# Simulate morning rush hour (8-10 AM)
traffic_factors[8:10] = 1.5
# Simulate evening rush hour (5-7 PM)
traffic_factors[17:19] = 1.8
# Late night (less traffic)
traffic_factors[22:] = 0.8
traffic_factors[:5] = 0.7
# Create a combined list of all locations (depots + delivery points)
all_locations = []
# Add depot locations
for row in depot_locations:
all_locations.append({
'id': row[0], # vehicle_id as location id
'type': 'depot',
'latitude': row[1],
'longitude': row[2]
})
# Add delivery locations
for row in delivery_locations:
all_locations.append({
'id': row[0], # delivery_id as location id
'type': 'delivery',
'latitude': row[1],
'longitude': row[2]
})
print(f"Calculating distances between {len(all_locations)} locations...")
# Save the locations file early so we have this data even if the process is interrupted
location_df = pd.DataFrame(all_locations)
location_df.to_csv(os.path.join(time_matrix_dir, 'locations.csv'), index=False)
# Calculate distances and times using OSRM with improved error handling
if use_osrm:
print("Using OSRM API for road distances...")
distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=True)
else:
print("Using haversine distance with road factor adjustment...")
distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=False)
# Create DataFrames for the matrices
location_ids = [loc['id'] for loc in all_locations]
distance_df = pd.DataFrame(distance_matrix, index=location_ids, columns=location_ids)
time_df = pd.DataFrame(base_time_matrix, index=location_ids, columns=location_ids)
# Save distance and base time matrices early in case later steps fail
distance_df.to_csv(os.path.join(time_matrix_dir, 'distance_matrix.csv'))
time_df.to_csv(os.path.join(time_matrix_dir, 'base_time_matrix.csv'))
print("Basic distance and time matrices saved successfully.")
# Create time matrices for different hours of the day
hourly_time_matrices = {}
for hour in range(24):
traffic_factor = traffic_factors[hour][0]
hourly_time = base_time_matrix * traffic_factor
hourly_time_matrices[f"{hour:02d}:00"] = pd.DataFrame(hourly_time, index=location_ids, columns=location_ids)
# Save a sample of time matrices (e.g., rush hour and normal time)
try:
hourly_time_matrices['08:00'].to_csv(os.path.join(time_matrix_dir, 'morning_rush_time_matrix.csv'))
hourly_time_matrices['18:00'].to_csv(os.path.join(time_matrix_dir, 'evening_rush_time_matrix.csv'))
hourly_time_matrices['12:00'].to_csv(os.path.join(time_matrix_dir, 'midday_time_matrix.csv'))
hourly_time_matrices['00:00'].to_csv(os.path.join(time_matrix_dir, 'night_time_matrix.csv'))
print("Time matrices for different hours saved successfully.")
except Exception as e:
print(f"Error saving hourly time matrices: {e}")
print("Continuing with basic matrices only.")
print("Travel matrices generation complete.")
return distance_df, time_df, hourly_time_matrices
if __name__ == "__main__":
# For development, allow falling back to haversine if needed
import argparse
parser = argparse.ArgumentParser(description="Generate travel matrices for delivery route optimization")
parser.add_argument("--use-osrm", action="store_true", help="Use OSRM API for real road distances")
parser.add_argument("--use-haversine", action="store_true", help="Use haversine distance only (faster)")
args = parser.parse_args()
if args.use_haversine:
generate_travel_matrix(use_osrm=False)
else:
# Default to OSRM unless explicitly disabled
generate_travel_matrix(use_osrm=True)