File size: 9,047 Bytes
efd7cfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
"""
Script to extract mapping data from vietnamadminunits package
and generate data/mapping.json for standalone use.
Usage:
uv run python scripts/build_mapping.py
"""
import json
from pathlib import Path
def build_mapping():
import vietnamadminunits
pkg_dir = Path(vietnamadminunits.__file__).parent
# Load source data
with open(pkg_dir / "data" / "converter_2025.json") as f:
converter = json.load(f)
with open(pkg_dir / "data" / "parser_legacy.json") as f:
legacy = json.load(f)
with open(pkg_dir / "data" / "parser_from_2025.json") as f:
new_parser = json.load(f)
# === Province mapping: old_key -> new_key ===
# converter DICT_PROVINCE: {new_key: [old_key1, old_key2, ...]}
province_mapping = {}
for new_key, old_keys in converter["DICT_PROVINCE"].items():
for old_key in old_keys:
province_mapping[old_key] = new_key
# === Province info: key -> display name ===
province_names = {}
for key, info in new_parser["DICT_PROVINCE"].items():
province_names[key] = {
"name": info["province"],
"short": info["provinceShort"],
"code": info["provinceCode"],
}
old_province_names = {}
for key, info in legacy["DICT_PROVINCE"].items():
old_province_names[key] = {
"name": info["province"],
"short": info["provinceShort"],
"code": info["provinceCode"],
}
# === New ward info: province_key -> ward_key -> display name ===
new_ward_names = {}
for prov_key, wards in new_parser["DICT_PROVINCE_WARD_NO_ACCENTED"].items():
new_ward_names[prov_key] = {}
for ward_key, info in wards.items():
new_ward_names[prov_key][ward_key] = {
"name": info["ward"],
"short": info["wardShort"],
"type": info["wardType"],
"code": info["wardCode"],
}
# === Old ward info: province_key -> district_key -> ward_key -> display name ===
old_ward_names = {}
for prov_key, districts in legacy["DICT_PROVINCE_DISTRICT_WARD_NO_ACCENTED"].items():
old_ward_names[prov_key] = {}
for dist_key, wards in districts.items():
for ward_key, info in wards.items():
old_ward_names[prov_key][f"{prov_key}_{dist_key}_{ward_key}"] = {
"name": info["ward"],
"short": info["wardShort"],
"type": info["wardType"],
"code": info["wardCode"],
}
# === Old district info ===
old_district_names = {}
for prov_key, districts in legacy.get("DICT_PROVINCE_DISTRICT", {}).items():
old_district_names[prov_key] = {}
for dist_key, info in districts.items():
old_district_names[prov_key][dist_key] = {
"name": info.get("district", ""),
"short": info.get("districtShort", ""),
"type": info.get("districtType", ""),
}
# === Ward mapping records ===
ward_mapping = []
# NO_DIVIDED: each new ward maps to one or more old wards (unchanged or renamed/merged)
for new_prov_key, wards in converter["DICT_PROVINCE_WARD_NO_DIVIDED"].items():
new_prov_info = province_names.get(new_prov_key, {})
for new_ward_key, old_compound_keys in wards.items():
new_ward_info = new_ward_names.get(new_prov_key, {}).get(new_ward_key, {})
for old_compound_key in old_compound_keys:
# Parse old compound key: "old_prov_key_old_dist_key_old_ward_key"
parts = old_compound_key.split("_", 2)
if len(parts) < 2:
continue
old_prov_key = parts[0]
rest = "_".join(parts[1:]) if len(parts) > 1 else ""
# Find old ward info
old_full_key = old_compound_key
old_ward_info = {}
old_dist_info = {}
# Find in old_ward_names
if old_prov_key in old_ward_names:
old_ward_info = old_ward_names[old_prov_key].get(old_full_key, {})
# Parse district key from compound
if len(parts) == 3:
old_dist_key = parts[1]
old_ward_key_str = parts[2]
if old_prov_key in old_district_names:
old_dist_info = old_district_names[old_prov_key].get(old_dist_key, {})
elif len(parts) == 2:
old_dist_key = parts[1]
old_ward_key_str = ""
if old_prov_key in old_district_names:
old_dist_info = old_district_names[old_prov_key].get(old_dist_key, {})
# Determine mapping type
if len(old_compound_keys) == 1:
# Only one old ward maps to this new ward
if old_ward_info.get("name") == new_ward_info.get("name"):
mapping_type = "unchanged"
else:
mapping_type = "renamed"
else:
mapping_type = "merged"
record = {
"old_province": old_province_names.get(old_prov_key, {}).get("name", ""),
"old_province_key": old_prov_key,
"old_district": old_dist_info.get("name", ""),
"old_district_key": parts[1] if len(parts) >= 2 else "",
"old_ward": old_ward_info.get("name", ""),
"old_ward_key": old_ward_key_str if len(parts) == 3 else "",
"new_province": new_prov_info.get("name", ""),
"new_province_key": new_prov_key,
"new_ward": new_ward_info.get("name", ""),
"new_ward_key": new_ward_key,
"mapping_type": mapping_type,
}
ward_mapping.append(record)
# DIVIDED: old wards split into multiple new wards
for new_prov_key, old_wards in converter["DICT_PROVINCE_WARD_DIVIDED"].items():
new_prov_info = province_names.get(new_prov_key, {})
for old_compound_key, new_ward_options in old_wards.items():
parts = old_compound_key.split("_", 2)
if len(parts) < 2:
continue
old_prov_key = parts[0]
old_ward_info = {}
old_dist_info = {}
if old_prov_key in old_ward_names:
old_ward_info = old_ward_names[old_prov_key].get(old_compound_key, {})
if len(parts) >= 2 and old_prov_key in old_district_names:
old_dist_info = old_district_names[old_prov_key].get(parts[1], {})
for option in new_ward_options:
new_ward_key = option["newWardKey"]
new_ward_info = new_ward_names.get(new_prov_key, {}).get(new_ward_key, {})
record = {
"old_province": old_province_names.get(old_prov_key, {}).get("name", ""),
"old_province_key": old_prov_key,
"old_district": old_dist_info.get("name", ""),
"old_district_key": parts[1] if len(parts) >= 2 else "",
"old_ward": old_ward_info.get("name", ""),
"old_ward_key": parts[2] if len(parts) == 3 else "",
"new_province": new_prov_info.get("name", ""),
"new_province_key": new_prov_key,
"new_ward": new_ward_info.get("name", ""),
"new_ward_key": new_ward_key,
"mapping_type": "divided",
"is_default": option.get("isDefaultNewWard", False),
}
ward_mapping.append(record)
# Build final mapping
mapping = {
"metadata": {
"source": "vietnamadminunits",
"version": "1.0.4",
"effective_date": "2025-07-01",
"old_provinces": len(old_province_names),
"new_provinces": len(province_names),
"total_records": len(ward_mapping),
},
"province_mapping": province_mapping,
"province_names": province_names,
"old_province_names": old_province_names,
"ward_mapping": ward_mapping,
}
output = Path(__file__).parent.parent / "data" / "mapping.json"
output.parent.mkdir(parents=True, exist_ok=True)
with open(output, "w", encoding="utf-8") as f:
json.dump(mapping, f, ensure_ascii=False, indent=2)
print(f"Generated {output}")
print(f" Province mappings: {len(province_mapping)} old -> {len(province_names)} new")
print(f" Ward mapping records: {len(ward_mapping)}")
# Stats
types = {}
for r in ward_mapping:
t = r["mapping_type"]
types[t] = types.get(t, 0) + 1
for t, c in sorted(types.items()):
print(f" {t}: {c}")
if __name__ == "__main__":
build_mapping()
|