File size: 8,282 Bytes
0e4403d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | """
graph_builder.py
Builds or loads the Indore road network graph.
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
SPEED OPTIMISATION: PICKLE INSTEAD OF GRAPHML
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
WHY GRAPHML IS SLOW:
GraphML is an XML text format. Every node ID, coordinate, edge
attribute (length, base_time, traffic_factor...) is stored as a
plain string like "0.034721". On load, Python has to:
1. Parse 95MB of XML character by character
2. Convert every value from string β float (thousands of edges)
3. Reconstruct the NetworkX graph object in memory
On a typical server this takes 15β25 seconds.
WHY PICKLE IS FAST:
Python's pickle format stores the graph's in-memory binary
representation directly. On load it just:
1. Reads the binary file into memory
2. Deserialises the already-typed Python objects
The same graph loads in 1β3 seconds β roughly 10x faster.
TRADEOFF:
Pickle files are not human-readable and are Python-version
specific. We keep the .graphml as a portable backup. The .pkl
is purely a runtime performance cache.
HOW IT WORKS:
- First run: loads/downloads graphml, saves BOTH graphml + pkl
- All subsequent runs: loads pkl directly, skips graphml entirely
- To force a rebuild: delete indore.pkl (graphml stays intact)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
IMPACT ON ROUTE RESULTS:
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Zero impact. The graph topology, edge weights, node positions,
and all attributes are byte-for-byte identical between graphml
and pickle. Pickle is purely a serialisation format change β
the graph data itself is unchanged.
"""
import os
import pickle
import logging
import osmnx as ox
logger = logging.getLogger(__name__)
# ββ Fallback speeds (km/h) ββββββββββββββββββββββββββββββββββββββββββββββββββββ
ROAD_SPEEDS_KMPH = {
"motorway": 65.0,
"motorway_link": 50.0,
"trunk": 45.0,
"trunk_link": 35.0,
"primary": 25.0,
"primary_link": 20.0,
"secondary": 20.0,
"secondary_link": 18.0,
"tertiary": 18.0,
"tertiary_link": 15.0,
"residential": 14.0,
"living_street": 10.0,
"service": 10.0,
"unclassified": 16.0,
}
DEFAULT_SPEED_KMPH = 18.0
ROAD_TRAFFIC_VOLUME = {
"motorway": 0.7,
"motorway_link": 0.6,
"trunk": 0.9,
"trunk_link": 0.8,
"primary": 1.8,
"primary_link": 1.5,
"secondary": 1.5,
"secondary_link": 1.3,
"tertiary": 1.1,
"tertiary_link": 1.0,
"residential": 0.6,
"living_street": 0.4,
"service": 0.4,
"unclassified": 0.8,
}
DEFAULT_TRAFFIC_VOLUME = 0.9
MAJOR_ROAD_TYPES = {
"motorway", "motorway_link",
"trunk", "trunk_link",
"primary", "primary_link",
"secondary", "secondary_link",
}
def _road_speed(data: dict) -> float:
road_type = data.get("highway", "")
if isinstance(road_type, list):
road_type = road_type[0]
return ROAD_SPEEDS_KMPH.get(road_type, DEFAULT_SPEED_KMPH)
def _compute_edge_times(G):
for u, v, k, data in G.edges(keys=True, data=True):
length_m = float(data.get("length") or 0)
length_km = length_m / 1000.0
data["length"] = length_m
speed = _road_speed(data)
data["base_time"] = round((length_km / max(speed, 1.0)) * 60.0, 6)
if "traffic_factor" not in data or data.get("traffic_factor") == "":
data["traffic_factor"] = 1.0
else:
try:
data["traffic_factor"] = float(data["traffic_factor"])
except (ValueError, TypeError):
data["traffic_factor"] = 1.0
road_type = data.get("highway", "")
if isinstance(road_type, list):
road_type = road_type[0]
data["road_penalty"] = 0.0 if road_type in MAJOR_ROAD_TYPES else 0.8
data["time_with_behavior"] = round(
data["base_time"] + data["road_penalty"], 6
)
return G
def sanitize_loaded_graph(G):
float_fields = [
"length", "base_time", "traffic_factor", "road_penalty",
"time_with_behavior", "signal_delay", "time_with_signal",
"live_time", "pollution_delay", "pollution_exposure",
"congestion_ratio",
]
for u, v, k, data in G.edges(keys=True, data=True):
for key in float_fields:
if key in data:
try:
data[key] = float(data[key])
except (ValueError, TypeError):
data.pop(key, None)
G = _compute_edge_times(G)
return G
def prepare_graph(G):
return _compute_edge_times(G)
def build_graph(
place_name="Indore, Madhya Pradesh, India",
save=True,
load_if_exists=True,
filepath="indore.graphml",
):
# ββ Derive pickle path from graphml path ββββββββββββββββββββββββββββββββββ
# e.g. "indore.graphml" β "indore.pkl"
pickle_path = os.path.splitext(filepath)[0] + ".pkl"
# ββ 1. Try pickle first (fastest) ββββββββββββββββββββββββββββββββββββββββ
if load_if_exists and os.path.exists(pickle_path):
logger.info(f"[Graph] Loading from pickle: {pickle_path}")
try:
with open(pickle_path, "rb") as f:
G = pickle.load(f)
# sanitize still runs to apply current speed table
# but skips the XML parsing entirely
G = sanitize_loaded_graph(G)
logger.info(f"[Graph] Loaded from pickle. Nodes: {len(G.nodes)} Edges: {len(G.edges)}")
return G
except Exception as e:
logger.warning(f"[Graph] Pickle load failed ({e}), falling back to graphml...")
# ββ 2. Try graphml (slower, but portable) ββββββββββββββββββββββββββββββββ
if load_if_exists and os.path.exists(filepath):
logger.info(f"[Graph] Loading from graphml: {filepath}")
G = ox.load_graphml(filepath)
G = sanitize_loaded_graph(G)
logger.info(f"[Graph] Loaded from graphml. Nodes: {len(G.nodes)} Edges: {len(G.edges)}")
# Save pickle now so next startup is fast
if save:
logger.info(f"[Graph] Saving pickle for fast future loads: {pickle_path}")
with open(pickle_path, "wb") as f:
pickle.dump(G, f, protocol=5)
return G
# ββ 3. Download fresh from OSM ββββββββββββββββββββββββββββββββββββββββββββ
logger.info(f"[Graph] Downloading road network for {place_name}...")
G = ox.graph_from_place(place_name, network_type="drive", simplify=True)
logger.info(f"[Graph] Download complete. Nodes: {len(G.nodes)} Edges: {len(G.edges)}")
G = prepare_graph(G)
if save:
ox.save_graphml(G, filepath)
logger.info(f"[Graph] Saved graphml: {filepath}")
with open(pickle_path, "wb") as f:
pickle.dump(G, f, protocol=5)
logger.info(f"[Graph] Saved pickle: {pickle_path}")
return G
if __name__ == "__main__":
G = build_graph()
sample = next(
(d for u, v, d in G.edges(data=True)
if d.get("highway") == "primary" and d.get("length", 0) > 100), None
)
if sample:
km = sample["length"] / 1000
mins = sample["base_time"]
spd = (km / mins) * 60
print(f"Primary edge check: {km:.3f}km β {mins:.2f}min β {spd:.1f}km/h") |