File size: 8,282 Bytes
0e4403d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
"""
graph_builder.py

Builds or loads the Indore road network graph.

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SPEED OPTIMISATION: PICKLE INSTEAD OF GRAPHML
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

WHY GRAPHML IS SLOW:
  GraphML is an XML text format. Every node ID, coordinate, edge
  attribute (length, base_time, traffic_factor...) is stored as a
  plain string like "0.034721". On load, Python has to:
    1. Parse 95MB of XML character by character
    2. Convert every value from string β†’ float (thousands of edges)
    3. Reconstruct the NetworkX graph object in memory

  On a typical server this takes 15–25 seconds.

WHY PICKLE IS FAST:
  Python's pickle format stores the graph's in-memory binary
  representation directly. On load it just:
    1. Reads the binary file into memory
    2. Deserialises the already-typed Python objects

  The same graph loads in 1–3 seconds β€” roughly 10x faster.

TRADEOFF:
  Pickle files are not human-readable and are Python-version
  specific. We keep the .graphml as a portable backup. The .pkl
  is purely a runtime performance cache.

HOW IT WORKS:
  - First run: loads/downloads graphml, saves BOTH graphml + pkl
  - All subsequent runs: loads pkl directly, skips graphml entirely
  - To force a rebuild: delete indore.pkl (graphml stays intact)

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
IMPACT ON ROUTE RESULTS:
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  Zero impact. The graph topology, edge weights, node positions,
  and all attributes are byte-for-byte identical between graphml
  and pickle. Pickle is purely a serialisation format change β€”
  the graph data itself is unchanged.
"""

import os
import pickle
import logging
import osmnx as ox

logger = logging.getLogger(__name__)

# ── Fallback speeds (km/h) ────────────────────────────────────────────────────
ROAD_SPEEDS_KMPH = {
    "motorway":       65.0,
    "motorway_link":  50.0,
    "trunk":          45.0,
    "trunk_link":     35.0,
    "primary":        25.0,
    "primary_link":   20.0,
    "secondary":      20.0,
    "secondary_link": 18.0,
    "tertiary":       18.0,
    "tertiary_link":  15.0,
    "residential":    14.0,
    "living_street":  10.0,
    "service":        10.0,
    "unclassified":   16.0,
}
DEFAULT_SPEED_KMPH = 18.0

ROAD_TRAFFIC_VOLUME = {
    "motorway":       0.7,
    "motorway_link":  0.6,
    "trunk":          0.9,
    "trunk_link":     0.8,
    "primary":        1.8,
    "primary_link":   1.5,
    "secondary":      1.5,
    "secondary_link": 1.3,
    "tertiary":       1.1,
    "tertiary_link":  1.0,
    "residential":    0.6,
    "living_street":  0.4,
    "service":        0.4,
    "unclassified":   0.8,
}
DEFAULT_TRAFFIC_VOLUME = 0.9

MAJOR_ROAD_TYPES = {
    "motorway", "motorway_link",
    "trunk", "trunk_link",
    "primary", "primary_link",
    "secondary", "secondary_link",
}


def _road_speed(data: dict) -> float:
    road_type = data.get("highway", "")
    if isinstance(road_type, list):
        road_type = road_type[0]
    return ROAD_SPEEDS_KMPH.get(road_type, DEFAULT_SPEED_KMPH)


def _compute_edge_times(G):
    for u, v, k, data in G.edges(keys=True, data=True):
        length_m  = float(data.get("length") or 0)
        length_km = length_m / 1000.0

        data["length"] = length_m

        speed = _road_speed(data)
        data["base_time"] = round((length_km / max(speed, 1.0)) * 60.0, 6)

        if "traffic_factor" not in data or data.get("traffic_factor") == "":
            data["traffic_factor"] = 1.0
        else:
            try:
                data["traffic_factor"] = float(data["traffic_factor"])
            except (ValueError, TypeError):
                data["traffic_factor"] = 1.0

        road_type = data.get("highway", "")
        if isinstance(road_type, list):
            road_type = road_type[0]

        data["road_penalty"] = 0.0 if road_type in MAJOR_ROAD_TYPES else 0.8
        data["time_with_behavior"] = round(
            data["base_time"] + data["road_penalty"], 6
        )

    return G


def sanitize_loaded_graph(G):
    float_fields = [
        "length", "base_time", "traffic_factor", "road_penalty",
        "time_with_behavior", "signal_delay", "time_with_signal",
        "live_time", "pollution_delay", "pollution_exposure",
        "congestion_ratio",
    ]
    for u, v, k, data in G.edges(keys=True, data=True):
        for key in float_fields:
            if key in data:
                try:
                    data[key] = float(data[key])
                except (ValueError, TypeError):
                    data.pop(key, None)

    G = _compute_edge_times(G)
    return G


def prepare_graph(G):
    return _compute_edge_times(G)


def build_graph(
    place_name="Indore, Madhya Pradesh, India",
    save=True,
    load_if_exists=True,
    filepath="indore.graphml",
):
    # ── Derive pickle path from graphml path ──────────────────────────────────
    # e.g. "indore.graphml" β†’ "indore.pkl"
    pickle_path = os.path.splitext(filepath)[0] + ".pkl"

    # ── 1. Try pickle first (fastest) ────────────────────────────────────────
    if load_if_exists and os.path.exists(pickle_path):
        logger.info(f"[Graph] Loading from pickle: {pickle_path}")
        try:
            with open(pickle_path, "rb") as f:
                G = pickle.load(f)
            # sanitize still runs to apply current speed table
            # but skips the XML parsing entirely
            G = sanitize_loaded_graph(G)
            logger.info(f"[Graph] Loaded from pickle. Nodes: {len(G.nodes)}  Edges: {len(G.edges)}")
            return G
        except Exception as e:
            logger.warning(f"[Graph] Pickle load failed ({e}), falling back to graphml...")

    # ── 2. Try graphml (slower, but portable) ────────────────────────────────
    if load_if_exists and os.path.exists(filepath):
        logger.info(f"[Graph] Loading from graphml: {filepath}")
        G = ox.load_graphml(filepath)
        G = sanitize_loaded_graph(G)
        logger.info(f"[Graph] Loaded from graphml. Nodes: {len(G.nodes)}  Edges: {len(G.edges)}")

        # Save pickle now so next startup is fast
        if save:
            logger.info(f"[Graph] Saving pickle for fast future loads: {pickle_path}")
            with open(pickle_path, "wb") as f:
                pickle.dump(G, f, protocol=5)

        return G

    # ── 3. Download fresh from OSM ────────────────────────────────────────────
    logger.info(f"[Graph] Downloading road network for {place_name}...")
    G = ox.graph_from_place(place_name, network_type="drive", simplify=True)
    logger.info(f"[Graph] Download complete. Nodes: {len(G.nodes)}  Edges: {len(G.edges)}")

    G = prepare_graph(G)

    if save:
        ox.save_graphml(G, filepath)
        logger.info(f"[Graph] Saved graphml: {filepath}")
        with open(pickle_path, "wb") as f:
            pickle.dump(G, f, protocol=5)
        logger.info(f"[Graph] Saved pickle: {pickle_path}")

    return G


if __name__ == "__main__":
    G = build_graph()
    sample = next(
        (d for u, v, d in G.edges(data=True)
         if d.get("highway") == "primary" and d.get("length", 0) > 100), None
    )
    if sample:
        km   = sample["length"] / 1000
        mins = sample["base_time"]
        spd  = (km / mins) * 60
        print(f"Primary edge check: {km:.3f}km β†’ {mins:.2f}min β†’ {spd:.1f}km/h")