Spaces:
Sleeping
Sleeping
| import xml.etree.ElementTree as ET | |
| import json | |
| import sys | |
| import os | |
| def main(xml_path): | |
| if not os.path.isfile(xml_path): | |
| print(f"ERROR: cannot find tabular XML at '{xml_path}'") | |
| sys.exit(1) | |
| tree = ET.parse(xml_path) | |
| root = tree.getroot() | |
| icd_to_description = {} | |
| # Iterate over every <diag> in the entire file, recursively. | |
| # Each <diag> has: | |
| # • <name> (the ICD-10 code) | |
| # • <desc> (the human-readable description) | |
| # • zero or more nested <diag> children (sub-codes). | |
| for diag in root.iter("diag"): | |
| name_elem = diag.find("name") | |
| desc_elem = diag.find("desc") | |
| if name_elem is None or desc_elem is None: | |
| continue | |
| # Some <diag> nodes might have <name/> or <desc/> with no text; skip those. | |
| if name_elem.text is None or desc_elem.text is None: | |
| continue | |
| code = name_elem.text.strip() | |
| description = desc_elem.text.strip() | |
| # Only store non-empty strings: | |
| if code and description: | |
| icd_to_description[code] = description | |
| # Write out a flat JSON mapping code → description | |
| out_path = "icd_to_description.json" | |
| with open(out_path, "w", encoding="utf-8") as fp: | |
| json.dump(icd_to_description, fp, indent=2, ensure_ascii=False) | |
| print(f"Wrote {len(icd_to_description)} code entries to {out_path}") | |
| if __name__ == "__main__": | |
| if len(sys.argv) != 2: | |
| print("Usage: python parse_tabular.py <path/to/icd10cm_tabular_2025.xml>") | |
| sys.exit(1) | |
| main(sys.argv[1]) | |