feat: Full VRP/TSP route optimizer with OR-Tools, 2-opt, time windows, before/after comparison, DBSCAN clustering

#29
by MouleeswaranM - opened
brain/app/services/route_optimization_engine.py ADDED
@@ -0,0 +1,644 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Route Optimization Engine β€” Full VRP/TSP Implementation
3
+ ========================================================
4
+
5
+ Addresses Challenge #4: "Suboptimal route selection increasing total travel distance"
6
+
7
+ Features:
8
+ 1. Multi-stop TSP within routes (OR-Tools Routing + 2-opt local search)
9
+ 2. Time window constraints per stop (AddDimension + SetRange)
10
+ 3. Before/after route comparison with distance savings
11
+ 4. DBSCAN clustering option (discovers K automatically, handles arbitrary shapes)
12
+ 5. Dynamic re-routing via cheapest-insertion heuristic
13
+ 6. Traffic-aware effective speed (via traffic_integration)
14
+
15
+ This module is called AFTER clustering assigns packages to routes,
16
+ to optimize the STOP ORDER within each route.
17
+ """
18
+
19
+ import math
20
+ import time
21
+ import logging
22
+ from typing import List, Dict, Any, Optional, Tuple
23
+ from dataclasses import dataclass, field
24
+
25
+ logger = logging.getLogger("fairrelay.route_optimizer")
26
+
27
+
28
+ # ═══════════════════════════════════════════════════════════════
29
+ # DATA STRUCTURES
30
+ # ═══════════════════════════════════════════════════════════════
31
+
32
+ @dataclass
33
+ class Stop:
34
+ """A delivery stop with coordinates and optional time window."""
35
+ id: str
36
+ lat: float
37
+ lng: float
38
+ address: str = ""
39
+ weight_kg: float = 0.0
40
+ service_time_min: float = 5.0 # Time spent at stop
41
+ time_window_start: Optional[int] = None # Minutes from route start
42
+ time_window_end: Optional[int] = None # Minutes from route start
43
+ priority: str = "normal"
44
+
45
+
46
+ @dataclass
47
+ class RouteOptResult:
48
+ """Result of route optimization."""
49
+ ordered_stops: List[Stop]
50
+ total_distance_km: float
51
+ total_time_minutes: float
52
+ naive_distance_km: float # Before optimization
53
+ distance_saved_km: float # Improvement
54
+ distance_saved_pct: float # % improvement
55
+ optimization_method: str # "or_tools_vrp" | "2_opt" | "nearest_neighbor"
56
+ time_windows_respected: bool
57
+ num_stops: int
58
+ polyline_points: List[Tuple[float, float]] # Ordered (lat, lng) for map
59
+
60
+
61
+ @dataclass
62
+ class RouteComparison:
63
+ """Before vs after comparison for Challenge #4."""
64
+ route_id: str
65
+ before: Dict[str, Any]
66
+ after: Dict[str, Any]
67
+ improvement: Dict[str, Any]
68
+
69
+
70
+ # ═══════════════════════════════════════════════════════════════
71
+ # CORE: HAVERSINE DISTANCE
72
+ # ═══════════════════════════════════════════════════════════════
73
+
74
+ def haversine_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
75
+ """Haversine distance in km."""
76
+ R = 6371
77
+ dlat = math.radians(lat2 - lat1)
78
+ dlng = math.radians(lng2 - lng1)
79
+ a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlng/2)**2
80
+ return R * 2 * math.asin(math.sqrt(a))
81
+
82
+
83
+ def build_distance_matrix(stops: List[Stop], depot_lat: float, depot_lng: float) -> List[List[int]]:
84
+ """Build distance matrix (in meters) with depot at index 0."""
85
+ all_points = [(depot_lat, depot_lng)] + [(s.lat, s.lng) for s in stops]
86
+ n = len(all_points)
87
+ matrix = [[0] * n for _ in range(n)]
88
+ for i in range(n):
89
+ for j in range(n):
90
+ if i != j:
91
+ d = haversine_km(all_points[i][0], all_points[i][1], all_points[j][0], all_points[j][1])
92
+ matrix[i][j] = int(d * 1000) # Convert to meters for OR-Tools
93
+ return matrix
94
+
95
+
96
+ # ═══════════════════════════════════════════════════════════════
97
+ # METHOD 1: OR-TOOLS VRP WITH TIME WINDOWS
98
+ # ═══════════════════════════════════════════════════════════════
99
+
100
+ def solve_vrp_ortools(
101
+ stops: List[Stop],
102
+ depot_lat: float,
103
+ depot_lng: float,
104
+ speed_kmh: float = 30.0,
105
+ max_time_seconds: int = 5,
106
+ ) -> Optional[List[int]]:
107
+ """
108
+ Solve TSP/VRP using OR-Tools Routing Library with time windows.
109
+
110
+ Returns ordered indices into stops list, or None if solver fails.
111
+ """
112
+ try:
113
+ from ortools.constraint_solver import routing_enums_pb2, pywrapcp
114
+ except ImportError:
115
+ return None
116
+
117
+ n = len(stops) + 1 # +1 for depot
118
+ if n <= 2:
119
+ return list(range(len(stops)))
120
+
121
+ # Build distance matrix
122
+ dist_matrix = build_distance_matrix(stops, depot_lat, depot_lng)
123
+
124
+ # Create routing index manager (1 vehicle, depot at node 0)
125
+ manager = pywrapcp.RoutingIndexManager(n, 1, 0)
126
+ routing = pywrapcp.RoutingModel(manager)
127
+
128
+ # Distance callback
129
+ def distance_callback(from_index, to_index):
130
+ from_node = manager.IndexToNode(from_index)
131
+ to_node = manager.IndexToNode(to_index)
132
+ return dist_matrix[from_node][to_node]
133
+
134
+ transit_callback_index = routing.RegisterTransitCallback(distance_callback)
135
+ routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
136
+
137
+ # Time dimension (for time windows)
138
+ has_time_windows = any(s.time_window_start is not None for s in stops)
139
+
140
+ if has_time_windows:
141
+ def time_callback(from_index, to_index):
142
+ from_node = manager.IndexToNode(from_index)
143
+ to_node = manager.IndexToNode(to_index)
144
+ # Travel time in minutes
145
+ dist_km = dist_matrix[from_node][to_node] / 1000
146
+ travel_min = (dist_km / speed_kmh) * 60
147
+ # Add service time at destination
148
+ if to_node > 0:
149
+ travel_min += stops[to_node - 1].service_time_min
150
+ return int(travel_min)
151
+
152
+ time_callback_index = routing.RegisterTransitCallback(time_callback)
153
+
154
+ # Add time dimension
155
+ max_route_time = 720 # 12 hours max
156
+ routing.AddDimension(
157
+ time_callback_index,
158
+ 30, # Slack (waiting time allowed)
159
+ max_route_time, # Max cumulative time
160
+ False, # Don't force start at 0
161
+ 'Time'
162
+ )
163
+ time_dimension = routing.GetDimensionOrDie('Time')
164
+
165
+ # Set time windows
166
+ for i, stop in enumerate(stops):
167
+ node_index = manager.NodeToIndex(i + 1)
168
+ if stop.time_window_start is not None and stop.time_window_end is not None:
169
+ time_dimension.CumulVar(node_index).SetRange(
170
+ int(stop.time_window_start),
171
+ int(stop.time_window_end)
172
+ )
173
+
174
+ # Depot time window (start at 0, end at max)
175
+ time_dimension.CumulVar(routing.Start(0)).SetRange(0, max_route_time)
176
+
177
+ # Solve with time limit
178
+ search_params = pywrapcp.DefaultRoutingSearchParameters()
179
+ search_params.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
180
+ search_params.local_search_metaheuristic = routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
181
+ search_params.time_limit.FromSeconds(max_time_seconds)
182
+
183
+ solution = routing.SolveWithParameters(search_params)
184
+
185
+ if solution:
186
+ # Extract route order
187
+ order = []
188
+ index = routing.Start(0)
189
+ while not routing.IsEnd(index):
190
+ node = manager.IndexToNode(index)
191
+ if node > 0: # Skip depot
192
+ order.append(node - 1) # Map back to stops index
193
+ index = solution.Value(routing.NextVar(index))
194
+ return order
195
+
196
+ return None
197
+
198
+
199
+ # ═══════════════════════════════════════════════════════════════
200
+ # METHOD 2: 2-OPT LOCAL SEARCH IMPROVEMENT
201
+ # ═══════════════════════════════════════════════════════════════
202
+
203
+ def two_opt_improve(
204
+ stops: List[Stop],
205
+ depot_lat: float,
206
+ depot_lng: float,
207
+ initial_order: List[int],
208
+ max_iterations: int = 1000,
209
+ ) -> List[int]:
210
+ """
211
+ Apply 2-opt improvement to a route order.
212
+
213
+ 2-opt reverses segments of the route to reduce total distance.
214
+ Guaranteed to converge to a local optimum.
215
+ """
216
+ def route_distance(order: List[int]) -> float:
217
+ """Total route distance including depot→first and last→depot."""
218
+ if not order:
219
+ return 0.0
220
+ # Depot to first stop
221
+ total = haversine_km(depot_lat, depot_lng, stops[order[0]].lat, stops[order[0]].lng)
222
+ # Between stops
223
+ for i in range(len(order) - 1):
224
+ total += haversine_km(stops[order[i]].lat, stops[order[i]].lng, stops[order[i+1]].lat, stops[order[i+1]].lng)
225
+ # Last stop back to depot
226
+ total += haversine_km(stops[order[-1]].lat, stops[order[-1]].lng, depot_lat, depot_lng)
227
+ return total
228
+
229
+ best_order = list(initial_order)
230
+ best_dist = route_distance(best_order)
231
+ improved = True
232
+ iterations = 0
233
+
234
+ while improved and iterations < max_iterations:
235
+ improved = False
236
+ iterations += 1
237
+ for i in range(len(best_order) - 1):
238
+ for j in range(i + 1, len(best_order)):
239
+ # Reverse segment [i:j+1]
240
+ new_order = best_order[:i] + best_order[i:j+1][::-1] + best_order[j+1:]
241
+ new_dist = route_distance(new_order)
242
+ if new_dist < best_dist - 0.01: # 10m improvement threshold
243
+ best_order = new_order
244
+ best_dist = new_dist
245
+ improved = True
246
+ break
247
+ if improved:
248
+ break
249
+
250
+ return best_order
251
+
252
+
253
+ # ═══════════════════════════════════════════════════════════════
254
+ # METHOD 3: NEAREST NEIGHBOR (BASELINE)
255
+ # ═══════════════════════════════════════════════════════════════
256
+
257
+ def nearest_neighbor_order(stops: List[Stop], depot_lat: float, depot_lng: float) -> List[int]:
258
+ """Simple nearest-neighbor heuristic as baseline."""
259
+ if not stops:
260
+ return []
261
+
262
+ remaining = list(range(len(stops)))
263
+ order = []
264
+ curr_lat, curr_lng = depot_lat, depot_lng
265
+
266
+ while remaining:
267
+ nearest_idx = min(remaining, key=lambda i: haversine_km(curr_lat, curr_lng, stops[i].lat, stops[i].lng))
268
+ order.append(nearest_idx)
269
+ remaining.remove(nearest_idx)
270
+ curr_lat, curr_lng = stops[nearest_idx].lat, stops[nearest_idx].lng
271
+
272
+ return order
273
+
274
+
275
+ # ═══════════════════════════════════════════════════════════════
276
+ # METHOD 4: CHEAPEST INSERTION (DYNAMIC RE-ROUTING)
277
+ # ═══════════════════════════════════════════════════════════════
278
+
279
+ def cheapest_insertion(
280
+ existing_order: List[int],
281
+ new_stop_idx: int,
282
+ stops: List[Stop],
283
+ depot_lat: float,
284
+ depot_lng: float,
285
+ ) -> List[int]:
286
+ """
287
+ Insert a new stop into an existing route at the cheapest position.
288
+ Used for dynamic re-routing when new packages arrive mid-route.
289
+ """
290
+ if not existing_order:
291
+ return [new_stop_idx]
292
+
293
+ def segment_cost(from_lat, from_lng, to_lat, to_lng):
294
+ return haversine_km(from_lat, from_lng, to_lat, to_lng)
295
+
296
+ new_stop = stops[new_stop_idx]
297
+ best_position = 0
298
+ best_cost_increase = float('inf')
299
+
300
+ # Try inserting at each position
301
+ for pos in range(len(existing_order) + 1):
302
+ if pos == 0:
303
+ prev_lat, prev_lng = depot_lat, depot_lng
304
+ else:
305
+ prev_stop = stops[existing_order[pos - 1]]
306
+ prev_lat, prev_lng = prev_stop.lat, prev_stop.lng
307
+
308
+ if pos == len(existing_order):
309
+ next_lat, next_lng = depot_lat, depot_lng
310
+ else:
311
+ next_stop = stops[existing_order[pos]]
312
+ next_lat, next_lng = next_stop.lat, next_stop.lng
313
+
314
+ # Cost of current segment (prev β†’ next)
315
+ current_cost = segment_cost(prev_lat, prev_lng, next_lat, next_lng)
316
+ # Cost after insertion (prev β†’ new β†’ next)
317
+ new_cost = (segment_cost(prev_lat, prev_lng, new_stop.lat, new_stop.lng) +
318
+ segment_cost(new_stop.lat, new_stop.lng, next_lat, next_lng))
319
+
320
+ cost_increase = new_cost - current_cost
321
+ if cost_increase < best_cost_increase:
322
+ best_cost_increase = cost_increase
323
+ best_position = pos
324
+
325
+ result = list(existing_order)
326
+ result.insert(best_position, new_stop_idx)
327
+ return result
328
+
329
+
330
+ # ═══════════════════════════════════════════════════════════════
331
+ # MAIN OPTIMIZER: COMBINES ALL METHODS
332
+ # ═══════════════════════════════════════════════════════════════
333
+
334
+ def optimize_route(
335
+ stops: List[Stop],
336
+ depot_lat: float,
337
+ depot_lng: float,
338
+ speed_kmh: float = 30.0,
339
+ use_time_windows: bool = True,
340
+ max_solver_time: int = 5,
341
+ ) -> RouteOptResult:
342
+ """
343
+ Full route optimization pipeline:
344
+ 1. Try OR-Tools VRP with time windows (best quality)
345
+ 2. Apply 2-opt local search improvement
346
+ 3. Fallback: nearest-neighbor + 2-opt
347
+
348
+ Returns optimized route with before/after comparison.
349
+ """
350
+ if not stops:
351
+ return RouteOptResult(
352
+ ordered_stops=[], total_distance_km=0, total_time_minutes=0,
353
+ naive_distance_km=0, distance_saved_km=0, distance_saved_pct=0,
354
+ optimization_method="empty", time_windows_respected=True,
355
+ num_stops=0, polyline_points=[],
356
+ )
357
+
358
+ t0 = time.time()
359
+
360
+ # Step 1: Compute naive (input order) distance as baseline
361
+ naive_order = list(range(len(stops)))
362
+ naive_dist = _compute_route_distance(stops, naive_order, depot_lat, depot_lng)
363
+
364
+ # Step 2: Try OR-Tools VRP
365
+ method = "nearest_neighbor"
366
+ ortools_order = None
367
+
368
+ if len(stops) >= 3:
369
+ ortools_order = solve_vrp_ortools(
370
+ stops, depot_lat, depot_lng, speed_kmh, max_solver_time
371
+ )
372
+
373
+ if ortools_order:
374
+ method = "or_tools_vrp"
375
+ best_order = ortools_order
376
+ else:
377
+ # Fallback: nearest neighbor
378
+ best_order = nearest_neighbor_order(stops, depot_lat, depot_lng)
379
+
380
+ # Step 3: Apply 2-opt improvement (always)
381
+ if len(stops) >= 4:
382
+ improved_order = two_opt_improve(stops, depot_lat, depot_lng, best_order)
383
+ improved_dist = _compute_route_distance(stops, improved_order, depot_lat, depot_lng)
384
+ current_dist = _compute_route_distance(stops, best_order, depot_lat, depot_lng)
385
+
386
+ if improved_dist < current_dist:
387
+ best_order = improved_order
388
+ method = f"{method}+2opt"
389
+
390
+ # Step 4: Compute final metrics
391
+ optimized_dist = _compute_route_distance(stops, best_order, depot_lat, depot_lng)
392
+ distance_saved = naive_dist - optimized_dist
393
+ saved_pct = (distance_saved / naive_dist * 100) if naive_dist > 0 else 0
394
+
395
+ # Compute total time (distance/speed + service times)
396
+ total_time = (optimized_dist / speed_kmh) * 60 + sum(stops[i].service_time_min for i in best_order)
397
+
398
+ # Check time windows respected
399
+ tw_respected = _check_time_windows(stops, best_order, depot_lat, depot_lng, speed_kmh)
400
+
401
+ # Build polyline
402
+ polyline = [(depot_lat, depot_lng)] + [(stops[i].lat, stops[i].lng) for i in best_order] + [(depot_lat, depot_lng)]
403
+
404
+ # Order stops
405
+ ordered_stops = [stops[i] for i in best_order]
406
+
407
+ return RouteOptResult(
408
+ ordered_stops=ordered_stops,
409
+ total_distance_km=round(optimized_dist, 2),
410
+ total_time_minutes=round(total_time, 1),
411
+ naive_distance_km=round(naive_dist, 2),
412
+ distance_saved_km=round(max(0, distance_saved), 2),
413
+ distance_saved_pct=round(max(0, saved_pct), 1),
414
+ optimization_method=method,
415
+ time_windows_respected=tw_respected,
416
+ num_stops=len(stops),
417
+ polyline_points=polyline,
418
+ )
419
+
420
+
421
+ def compare_routes(
422
+ routes: List[Dict[str, Any]],
423
+ depot_lat: float,
424
+ depot_lng: float,
425
+ speed_kmh: float = 30.0,
426
+ ) -> List[RouteComparison]:
427
+ """
428
+ Generate before/after route comparison for Challenge #4.
429
+
430
+ Args:
431
+ routes: List of route dicts with "stops" (list of stop dicts)
432
+ depot_lat, depot_lng: Warehouse coordinates
433
+ speed_kmh: Average speed
434
+
435
+ Returns:
436
+ List of RouteComparison objects showing improvement per route
437
+ """
438
+ comparisons = []
439
+
440
+ for route in routes:
441
+ route_id = route.get("id", f"route_{len(comparisons)}")
442
+ raw_stops = route.get("stops", route.get("packages", []))
443
+
444
+ # Convert to Stop objects
445
+ stops = [
446
+ Stop(
447
+ id=s.get("id", f"stop_{i}"),
448
+ lat=s.get("latitude", s.get("lat", 0)),
449
+ lng=s.get("longitude", s.get("lng", 0)),
450
+ address=s.get("address", ""),
451
+ weight_kg=s.get("weight_kg", 0),
452
+ service_time_min=s.get("service_time_min", 5),
453
+ time_window_start=s.get("time_window_start"),
454
+ time_window_end=s.get("time_window_end"),
455
+ priority=s.get("priority", "normal"),
456
+ )
457
+ for i, s in enumerate(raw_stops)
458
+ ]
459
+
460
+ if not stops:
461
+ continue
462
+
463
+ # Before: naive order (as received)
464
+ naive_dist = _compute_route_distance(stops, list(range(len(stops))), depot_lat, depot_lng)
465
+ naive_time = (naive_dist / speed_kmh) * 60 + sum(s.service_time_min for s in stops)
466
+
467
+ # After: optimized
468
+ result = optimize_route(stops, depot_lat, depot_lng, speed_kmh)
469
+
470
+ comparisons.append(RouteComparison(
471
+ route_id=route_id,
472
+ before={
473
+ "distance_km": round(naive_dist, 2),
474
+ "time_minutes": round(naive_time, 1),
475
+ "stop_order": [s.id for s in stops],
476
+ "co2_kg": round(naive_dist * 0.21, 2),
477
+ },
478
+ after={
479
+ "distance_km": result.total_distance_km,
480
+ "time_minutes": result.total_time_minutes,
481
+ "stop_order": [s.id for s in result.ordered_stops],
482
+ "co2_kg": round(result.total_distance_km * 0.21, 2),
483
+ "method": result.optimization_method,
484
+ "polyline": result.polyline_points,
485
+ },
486
+ improvement={
487
+ "distance_saved_km": result.distance_saved_km,
488
+ "distance_saved_pct": result.distance_saved_pct,
489
+ "time_saved_minutes": round(naive_time - result.total_time_minutes, 1),
490
+ "co2_saved_kg": round((naive_dist - result.total_distance_km) * 0.21, 2),
491
+ "time_windows_respected": result.time_windows_respected,
492
+ },
493
+ ))
494
+
495
+ return comparisons
496
+
497
+
498
+ # ═══════════════════════════════════════════════════════════════
499
+ # DBSCAN CLUSTERING (DISCOVERS K AUTOMATICALLY)
500
+ # ═══════════════════════════════════════════════════════════════
501
+
502
+ def cluster_packages_dbscan(
503
+ packages: List[Dict[str, Any]],
504
+ eps_km: float = 5.0,
505
+ min_samples: int = 2,
506
+ max_cluster_size: int = 30,
507
+ ) -> List[List[Dict[str, Any]]]:
508
+ """
509
+ DBSCAN-based clustering that discovers K automatically.
510
+ Handles arbitrary cluster shapes (unlike KMeans).
511
+
512
+ Fixes: Noise points (-1 label) are merged into nearest cluster.
513
+
514
+ Args:
515
+ packages: List of package dicts with latitude, longitude
516
+ eps_km: Max distance between points in same cluster (km)
517
+ min_samples: Min points to form a cluster
518
+ max_cluster_size: Split clusters exceeding this
519
+
520
+ Returns:
521
+ List of clusters (each cluster is a list of package dicts)
522
+ """
523
+ import numpy as np
524
+ from sklearn.cluster import DBSCAN
525
+ from sklearn.metrics.pairwise import haversine_distances
526
+
527
+ if not packages:
528
+ return []
529
+
530
+ if len(packages) <= min_samples:
531
+ return [packages]
532
+
533
+ # Convert to radians for haversine
534
+ coords_rad = np.array([
535
+ [math.radians(p["latitude"]), math.radians(p["longitude"])]
536
+ for p in packages
537
+ ])
538
+
539
+ # eps in radians (eps_km / Earth radius)
540
+ eps_rad = eps_km / 6371.0
541
+
542
+ # Run DBSCAN
543
+ db = DBSCAN(eps=eps_rad, min_samples=min_samples, metric='haversine')
544
+ labels = db.fit_predict(coords_rad)
545
+
546
+ # Group by cluster
547
+ clusters: Dict[int, List[int]] = {}
548
+ noise_indices: List[int] = []
549
+
550
+ for idx, label in enumerate(labels):
551
+ if label == -1:
552
+ noise_indices.append(idx)
553
+ else:
554
+ if label not in clusters:
555
+ clusters[label] = []
556
+ clusters[label].append(idx)
557
+
558
+ # FIX: Merge noise points into nearest cluster
559
+ if noise_indices and clusters:
560
+ # Compute cluster centroids
561
+ cluster_centroids = {}
562
+ for label, indices in clusters.items():
563
+ lats = [packages[i]["latitude"] for i in indices]
564
+ lngs = [packages[i]["longitude"] for i in indices]
565
+ cluster_centroids[label] = (sum(lats)/len(lats), sum(lngs)/len(lngs))
566
+
567
+ for noise_idx in noise_indices:
568
+ pkg = packages[noise_idx]
569
+ # Find nearest cluster
570
+ nearest_label = min(
571
+ cluster_centroids.keys(),
572
+ key=lambda l: haversine_km(
573
+ pkg["latitude"], pkg["longitude"],
574
+ cluster_centroids[l][0], cluster_centroids[l][1]
575
+ )
576
+ )
577
+ clusters[nearest_label].append(noise_idx)
578
+ elif noise_indices and not clusters:
579
+ # All points are noise β€” treat as one cluster
580
+ clusters[0] = noise_indices
581
+
582
+ # Split oversized clusters
583
+ final_clusters = []
584
+ for label, indices in clusters.items():
585
+ if len(indices) <= max_cluster_size:
586
+ final_clusters.append([packages[i] for i in indices])
587
+ else:
588
+ # Split using KMeans
589
+ from sklearn.cluster import KMeans
590
+ sub_coords = np.array([[packages[i]["latitude"], packages[i]["longitude"]] for i in indices])
591
+ n_sub = max(2, len(indices) // max_cluster_size + 1)
592
+ km = KMeans(n_clusters=n_sub, random_state=42, n_init=5)
593
+ sub_labels = km.fit_predict(sub_coords)
594
+ for sl in range(n_sub):
595
+ sub_indices = [indices[j] for j in range(len(indices)) if sub_labels[j] == sl]
596
+ if sub_indices:
597
+ final_clusters.append([packages[i] for i in sub_indices])
598
+
599
+ return final_clusters
600
+
601
+
602
+ # ═══════════════════════════════════════════════════════════════
603
+ # HELPERS
604
+ # ═══════════════════════════════════════════════════════════════
605
+
606
+ def _compute_route_distance(stops: List[Stop], order: List[int], depot_lat: float, depot_lng: float) -> float:
607
+ """Compute total route distance for a given stop order."""
608
+ if not order:
609
+ return 0.0
610
+ total = haversine_km(depot_lat, depot_lng, stops[order[0]].lat, stops[order[0]].lng)
611
+ for i in range(len(order) - 1):
612
+ total += haversine_km(stops[order[i]].lat, stops[order[i]].lng, stops[order[i+1]].lat, stops[order[i+1]].lng)
613
+ total += haversine_km(stops[order[-1]].lat, stops[order[-1]].lng, depot_lat, depot_lng)
614
+ return total
615
+
616
+
617
+ def _check_time_windows(stops: List[Stop], order: List[int], depot_lat: float, depot_lng: float, speed_kmh: float) -> bool:
618
+ """Check if all time windows are respected in the given order."""
619
+ if not order:
620
+ return True
621
+
622
+ current_time = 0.0 # Minutes from departure
623
+ current_lat, current_lng = depot_lat, depot_lng
624
+
625
+ for idx in order:
626
+ stop = stops[idx]
627
+ # Travel time to this stop
628
+ dist = haversine_km(current_lat, current_lng, stop.lat, stop.lng)
629
+ travel_time = (dist / speed_kmh) * 60
630
+ current_time += travel_time
631
+
632
+ # Check time window
633
+ if stop.time_window_end is not None and current_time > stop.time_window_end:
634
+ return False
635
+
636
+ # Wait if arrived too early
637
+ if stop.time_window_start is not None and current_time < stop.time_window_start:
638
+ current_time = stop.time_window_start
639
+
640
+ # Service time
641
+ current_time += stop.service_time_min
642
+ current_lat, current_lng = stop.lat, stop.lng
643
+
644
+ return True