|
|
""" |
|
|
Script to extract mapping data from vietnamadminunits package |
|
|
and generate data/mapping.json for standalone use. |
|
|
|
|
|
Usage: |
|
|
uv run python scripts/build_mapping.py |
|
|
""" |
|
|
|
|
|
import json |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
def build_mapping(): |
|
|
import vietnamadminunits |
|
|
|
|
|
pkg_dir = Path(vietnamadminunits.__file__).parent |
|
|
|
|
|
|
|
|
with open(pkg_dir / "data" / "converter_2025.json") as f: |
|
|
converter = json.load(f) |
|
|
with open(pkg_dir / "data" / "parser_legacy.json") as f: |
|
|
legacy = json.load(f) |
|
|
with open(pkg_dir / "data" / "parser_from_2025.json") as f: |
|
|
new_parser = json.load(f) |
|
|
|
|
|
|
|
|
|
|
|
province_mapping = {} |
|
|
for new_key, old_keys in converter["DICT_PROVINCE"].items(): |
|
|
for old_key in old_keys: |
|
|
province_mapping[old_key] = new_key |
|
|
|
|
|
|
|
|
province_names = {} |
|
|
for key, info in new_parser["DICT_PROVINCE"].items(): |
|
|
province_names[key] = { |
|
|
"name": info["province"], |
|
|
"short": info["provinceShort"], |
|
|
"code": info["provinceCode"], |
|
|
} |
|
|
|
|
|
old_province_names = {} |
|
|
for key, info in legacy["DICT_PROVINCE"].items(): |
|
|
old_province_names[key] = { |
|
|
"name": info["province"], |
|
|
"short": info["provinceShort"], |
|
|
"code": info["provinceCode"], |
|
|
} |
|
|
|
|
|
|
|
|
new_ward_names = {} |
|
|
for prov_key, wards in new_parser["DICT_PROVINCE_WARD_NO_ACCENTED"].items(): |
|
|
new_ward_names[prov_key] = {} |
|
|
for ward_key, info in wards.items(): |
|
|
new_ward_names[prov_key][ward_key] = { |
|
|
"name": info["ward"], |
|
|
"short": info["wardShort"], |
|
|
"type": info["wardType"], |
|
|
"code": info["wardCode"], |
|
|
} |
|
|
|
|
|
|
|
|
old_ward_names = {} |
|
|
for prov_key, districts in legacy["DICT_PROVINCE_DISTRICT_WARD_NO_ACCENTED"].items(): |
|
|
old_ward_names[prov_key] = {} |
|
|
for dist_key, wards in districts.items(): |
|
|
for ward_key, info in wards.items(): |
|
|
old_ward_names[prov_key][f"{prov_key}_{dist_key}_{ward_key}"] = { |
|
|
"name": info["ward"], |
|
|
"short": info["wardShort"], |
|
|
"type": info["wardType"], |
|
|
"code": info["wardCode"], |
|
|
} |
|
|
|
|
|
|
|
|
old_district_names = {} |
|
|
for prov_key, districts in legacy.get("DICT_PROVINCE_DISTRICT", {}).items(): |
|
|
old_district_names[prov_key] = {} |
|
|
for dist_key, info in districts.items(): |
|
|
old_district_names[prov_key][dist_key] = { |
|
|
"name": info.get("district", ""), |
|
|
"short": info.get("districtShort", ""), |
|
|
"type": info.get("districtType", ""), |
|
|
} |
|
|
|
|
|
|
|
|
ward_mapping = [] |
|
|
|
|
|
|
|
|
for new_prov_key, wards in converter["DICT_PROVINCE_WARD_NO_DIVIDED"].items(): |
|
|
new_prov_info = province_names.get(new_prov_key, {}) |
|
|
|
|
|
for new_ward_key, old_compound_keys in wards.items(): |
|
|
new_ward_info = new_ward_names.get(new_prov_key, {}).get(new_ward_key, {}) |
|
|
|
|
|
for old_compound_key in old_compound_keys: |
|
|
|
|
|
parts = old_compound_key.split("_", 2) |
|
|
if len(parts) < 2: |
|
|
continue |
|
|
old_prov_key = parts[0] |
|
|
rest = "_".join(parts[1:]) if len(parts) > 1 else "" |
|
|
|
|
|
|
|
|
old_full_key = old_compound_key |
|
|
old_ward_info = {} |
|
|
old_dist_info = {} |
|
|
|
|
|
|
|
|
if old_prov_key in old_ward_names: |
|
|
old_ward_info = old_ward_names[old_prov_key].get(old_full_key, {}) |
|
|
|
|
|
|
|
|
if len(parts) == 3: |
|
|
old_dist_key = parts[1] |
|
|
old_ward_key_str = parts[2] |
|
|
if old_prov_key in old_district_names: |
|
|
old_dist_info = old_district_names[old_prov_key].get(old_dist_key, {}) |
|
|
elif len(parts) == 2: |
|
|
old_dist_key = parts[1] |
|
|
old_ward_key_str = "" |
|
|
if old_prov_key in old_district_names: |
|
|
old_dist_info = old_district_names[old_prov_key].get(old_dist_key, {}) |
|
|
|
|
|
|
|
|
if len(old_compound_keys) == 1: |
|
|
|
|
|
if old_ward_info.get("name") == new_ward_info.get("name"): |
|
|
mapping_type = "unchanged" |
|
|
else: |
|
|
mapping_type = "renamed" |
|
|
else: |
|
|
mapping_type = "merged" |
|
|
|
|
|
record = { |
|
|
"old_province": old_province_names.get(old_prov_key, {}).get("name", ""), |
|
|
"old_province_key": old_prov_key, |
|
|
"old_district": old_dist_info.get("name", ""), |
|
|
"old_district_key": parts[1] if len(parts) >= 2 else "", |
|
|
"old_ward": old_ward_info.get("name", ""), |
|
|
"old_ward_key": old_ward_key_str if len(parts) == 3 else "", |
|
|
"new_province": new_prov_info.get("name", ""), |
|
|
"new_province_key": new_prov_key, |
|
|
"new_ward": new_ward_info.get("name", ""), |
|
|
"new_ward_key": new_ward_key, |
|
|
"mapping_type": mapping_type, |
|
|
} |
|
|
ward_mapping.append(record) |
|
|
|
|
|
|
|
|
for new_prov_key, old_wards in converter["DICT_PROVINCE_WARD_DIVIDED"].items(): |
|
|
new_prov_info = province_names.get(new_prov_key, {}) |
|
|
|
|
|
for old_compound_key, new_ward_options in old_wards.items(): |
|
|
parts = old_compound_key.split("_", 2) |
|
|
if len(parts) < 2: |
|
|
continue |
|
|
old_prov_key = parts[0] |
|
|
|
|
|
old_ward_info = {} |
|
|
old_dist_info = {} |
|
|
if old_prov_key in old_ward_names: |
|
|
old_ward_info = old_ward_names[old_prov_key].get(old_compound_key, {}) |
|
|
if len(parts) >= 2 and old_prov_key in old_district_names: |
|
|
old_dist_info = old_district_names[old_prov_key].get(parts[1], {}) |
|
|
|
|
|
for option in new_ward_options: |
|
|
new_ward_key = option["newWardKey"] |
|
|
new_ward_info = new_ward_names.get(new_prov_key, {}).get(new_ward_key, {}) |
|
|
|
|
|
record = { |
|
|
"old_province": old_province_names.get(old_prov_key, {}).get("name", ""), |
|
|
"old_province_key": old_prov_key, |
|
|
"old_district": old_dist_info.get("name", ""), |
|
|
"old_district_key": parts[1] if len(parts) >= 2 else "", |
|
|
"old_ward": old_ward_info.get("name", ""), |
|
|
"old_ward_key": parts[2] if len(parts) == 3 else "", |
|
|
"new_province": new_prov_info.get("name", ""), |
|
|
"new_province_key": new_prov_key, |
|
|
"new_ward": new_ward_info.get("name", ""), |
|
|
"new_ward_key": new_ward_key, |
|
|
"mapping_type": "divided", |
|
|
"is_default": option.get("isDefaultNewWard", False), |
|
|
} |
|
|
ward_mapping.append(record) |
|
|
|
|
|
|
|
|
mapping = { |
|
|
"metadata": { |
|
|
"source": "vietnamadminunits", |
|
|
"version": "1.0.4", |
|
|
"effective_date": "2025-07-01", |
|
|
"old_provinces": len(old_province_names), |
|
|
"new_provinces": len(province_names), |
|
|
"total_records": len(ward_mapping), |
|
|
}, |
|
|
"province_mapping": province_mapping, |
|
|
"province_names": province_names, |
|
|
"old_province_names": old_province_names, |
|
|
"ward_mapping": ward_mapping, |
|
|
} |
|
|
|
|
|
output = Path(__file__).parent.parent / "data" / "mapping.json" |
|
|
output.parent.mkdir(parents=True, exist_ok=True) |
|
|
with open(output, "w", encoding="utf-8") as f: |
|
|
json.dump(mapping, f, ensure_ascii=False, indent=2) |
|
|
|
|
|
print(f"Generated {output}") |
|
|
print(f" Province mappings: {len(province_mapping)} old -> {len(province_names)} new") |
|
|
print(f" Ward mapping records: {len(ward_mapping)}") |
|
|
|
|
|
|
|
|
types = {} |
|
|
for r in ward_mapping: |
|
|
t = r["mapping_type"] |
|
|
types[t] = types.get(t, 0) + 1 |
|
|
for t, c in sorted(types.items()): |
|
|
print(f" {t}: {c}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
build_mapping() |
|
|
|