import json import os import argparse from typing import Dict, Any def extract_mappings(input_file: str, output_file: str): """ Extracts unique ID -> Name mappings for Skills, Domains, and Careers from mentor profiles. """ if not os.path.exists(input_file): print(f"Error: Input file {input_file} not found.") return print(f"Reading from {input_file}...") with open(input_file, 'r', encoding='utf-8') as f: mentors = json.load(f) mappings = { "skills": {}, "domains": {}, "careers": {} } print(f"Processing {len(mentors)} mentors...") for mentor in mentors: # Extract Career career = mentor.get("career") if isinstance(career, dict) and "id" in career and "name" in career: mappings["careers"][str(career["id"])] = career["name"] # Extract Skills skills = mentor.get("skills", []) for skill in skills: if isinstance(skill, dict) and "id" in skill and "name" in skill: mappings["skills"][str(skill["id"])] = skill["name"] # Extract Domains domains = mentor.get("domains", []) for domain in domains: if isinstance(domain, dict) and "id" in domain and "name" in domain: mappings["domains"][str(domain["id"])] = domain["name"] # Ensure output directory exists os.makedirs(os.path.dirname(output_file), exist_ok=True) print(f"Writing mappings to {output_file}...") print(f" - Found {len(mappings['careers'])} careers") print(f" - Found {len(mappings['skills'])} skills") print(f" - Found {len(mappings['domains'])} domains") with open(output_file, 'w', encoding='utf-8') as f: json.dump(mappings, f, ensure_ascii=False, indent=2) print("Done!") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Extract Master Data Mappings") parser.add_argument("input_file", help="Path to mentor profiles JSON file") # Default output is data/master_data.json relative to project root project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) default_output = os.path.join(project_root, "data", "master_data.json") parser.add_argument("--output", default=default_output, help="Path to output JSON file") args = parser.parse_args() extract_mappings(args.input_file, args.output)