File size: 9,047 Bytes
efd7cfc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"""
Script to extract mapping data from vietnamadminunits package
and generate data/mapping.json for standalone use.

Usage:
    uv run python scripts/build_mapping.py
"""

import json
from pathlib import Path


def build_mapping():
    import vietnamadminunits

    pkg_dir = Path(vietnamadminunits.__file__).parent

    # Load source data
    with open(pkg_dir / "data" / "converter_2025.json") as f:
        converter = json.load(f)
    with open(pkg_dir / "data" / "parser_legacy.json") as f:
        legacy = json.load(f)
    with open(pkg_dir / "data" / "parser_from_2025.json") as f:
        new_parser = json.load(f)

    # === Province mapping: old_key -> new_key ===
    # converter DICT_PROVINCE: {new_key: [old_key1, old_key2, ...]}
    province_mapping = {}
    for new_key, old_keys in converter["DICT_PROVINCE"].items():
        for old_key in old_keys:
            province_mapping[old_key] = new_key

    # === Province info: key -> display name ===
    province_names = {}
    for key, info in new_parser["DICT_PROVINCE"].items():
        province_names[key] = {
            "name": info["province"],
            "short": info["provinceShort"],
            "code": info["provinceCode"],
        }

    old_province_names = {}
    for key, info in legacy["DICT_PROVINCE"].items():
        old_province_names[key] = {
            "name": info["province"],
            "short": info["provinceShort"],
            "code": info["provinceCode"],
        }

    # === New ward info: province_key -> ward_key -> display name ===
    new_ward_names = {}
    for prov_key, wards in new_parser["DICT_PROVINCE_WARD_NO_ACCENTED"].items():
        new_ward_names[prov_key] = {}
        for ward_key, info in wards.items():
            new_ward_names[prov_key][ward_key] = {
                "name": info["ward"],
                "short": info["wardShort"],
                "type": info["wardType"],
                "code": info["wardCode"],
            }

    # === Old ward info: province_key -> district_key -> ward_key -> display name ===
    old_ward_names = {}
    for prov_key, districts in legacy["DICT_PROVINCE_DISTRICT_WARD_NO_ACCENTED"].items():
        old_ward_names[prov_key] = {}
        for dist_key, wards in districts.items():
            for ward_key, info in wards.items():
                old_ward_names[prov_key][f"{prov_key}_{dist_key}_{ward_key}"] = {
                    "name": info["ward"],
                    "short": info["wardShort"],
                    "type": info["wardType"],
                    "code": info["wardCode"],
                }

    # === Old district info ===
    old_district_names = {}
    for prov_key, districts in legacy.get("DICT_PROVINCE_DISTRICT", {}).items():
        old_district_names[prov_key] = {}
        for dist_key, info in districts.items():
            old_district_names[prov_key][dist_key] = {
                "name": info.get("district", ""),
                "short": info.get("districtShort", ""),
                "type": info.get("districtType", ""),
            }

    # === Ward mapping records ===
    ward_mapping = []

    # NO_DIVIDED: each new ward maps to one or more old wards (unchanged or renamed/merged)
    for new_prov_key, wards in converter["DICT_PROVINCE_WARD_NO_DIVIDED"].items():
        new_prov_info = province_names.get(new_prov_key, {})

        for new_ward_key, old_compound_keys in wards.items():
            new_ward_info = new_ward_names.get(new_prov_key, {}).get(new_ward_key, {})

            for old_compound_key in old_compound_keys:
                # Parse old compound key: "old_prov_key_old_dist_key_old_ward_key"
                parts = old_compound_key.split("_", 2)
                if len(parts) < 2:
                    continue
                old_prov_key = parts[0]
                rest = "_".join(parts[1:]) if len(parts) > 1 else ""

                # Find old ward info
                old_full_key = old_compound_key
                old_ward_info = {}
                old_dist_info = {}

                # Find in old_ward_names
                if old_prov_key in old_ward_names:
                    old_ward_info = old_ward_names[old_prov_key].get(old_full_key, {})

                # Parse district key from compound
                if len(parts) == 3:
                    old_dist_key = parts[1]
                    old_ward_key_str = parts[2]
                    if old_prov_key in old_district_names:
                        old_dist_info = old_district_names[old_prov_key].get(old_dist_key, {})
                elif len(parts) == 2:
                    old_dist_key = parts[1]
                    old_ward_key_str = ""
                    if old_prov_key in old_district_names:
                        old_dist_info = old_district_names[old_prov_key].get(old_dist_key, {})

                # Determine mapping type
                if len(old_compound_keys) == 1:
                    # Only one old ward maps to this new ward
                    if old_ward_info.get("name") == new_ward_info.get("name"):
                        mapping_type = "unchanged"
                    else:
                        mapping_type = "renamed"
                else:
                    mapping_type = "merged"

                record = {
                    "old_province": old_province_names.get(old_prov_key, {}).get("name", ""),
                    "old_province_key": old_prov_key,
                    "old_district": old_dist_info.get("name", ""),
                    "old_district_key": parts[1] if len(parts) >= 2 else "",
                    "old_ward": old_ward_info.get("name", ""),
                    "old_ward_key": old_ward_key_str if len(parts) == 3 else "",
                    "new_province": new_prov_info.get("name", ""),
                    "new_province_key": new_prov_key,
                    "new_ward": new_ward_info.get("name", ""),
                    "new_ward_key": new_ward_key,
                    "mapping_type": mapping_type,
                }
                ward_mapping.append(record)

    # DIVIDED: old wards split into multiple new wards
    for new_prov_key, old_wards in converter["DICT_PROVINCE_WARD_DIVIDED"].items():
        new_prov_info = province_names.get(new_prov_key, {})

        for old_compound_key, new_ward_options in old_wards.items():
            parts = old_compound_key.split("_", 2)
            if len(parts) < 2:
                continue
            old_prov_key = parts[0]

            old_ward_info = {}
            old_dist_info = {}
            if old_prov_key in old_ward_names:
                old_ward_info = old_ward_names[old_prov_key].get(old_compound_key, {})
            if len(parts) >= 2 and old_prov_key in old_district_names:
                old_dist_info = old_district_names[old_prov_key].get(parts[1], {})

            for option in new_ward_options:
                new_ward_key = option["newWardKey"]
                new_ward_info = new_ward_names.get(new_prov_key, {}).get(new_ward_key, {})

                record = {
                    "old_province": old_province_names.get(old_prov_key, {}).get("name", ""),
                    "old_province_key": old_prov_key,
                    "old_district": old_dist_info.get("name", ""),
                    "old_district_key": parts[1] if len(parts) >= 2 else "",
                    "old_ward": old_ward_info.get("name", ""),
                    "old_ward_key": parts[2] if len(parts) == 3 else "",
                    "new_province": new_prov_info.get("name", ""),
                    "new_province_key": new_prov_key,
                    "new_ward": new_ward_info.get("name", ""),
                    "new_ward_key": new_ward_key,
                    "mapping_type": "divided",
                    "is_default": option.get("isDefaultNewWard", False),
                }
                ward_mapping.append(record)

    # Build final mapping
    mapping = {
        "metadata": {
            "source": "vietnamadminunits",
            "version": "1.0.4",
            "effective_date": "2025-07-01",
            "old_provinces": len(old_province_names),
            "new_provinces": len(province_names),
            "total_records": len(ward_mapping),
        },
        "province_mapping": province_mapping,
        "province_names": province_names,
        "old_province_names": old_province_names,
        "ward_mapping": ward_mapping,
    }

    output = Path(__file__).parent.parent / "data" / "mapping.json"
    output.parent.mkdir(parents=True, exist_ok=True)
    with open(output, "w", encoding="utf-8") as f:
        json.dump(mapping, f, ensure_ascii=False, indent=2)

    print(f"Generated {output}")
    print(f"  Province mappings: {len(province_mapping)} old -> {len(province_names)} new")
    print(f"  Ward mapping records: {len(ward_mapping)}")

    # Stats
    types = {}
    for r in ward_mapping:
        t = r["mapping_type"]
        types[t] = types.get(t, 0) + 1
    for t, c in sorted(types.items()):
        print(f"    {t}: {c}")


if __name__ == "__main__":
    build_mapping()