Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import argparse | |
| from typing import Dict, Any | |
| def extract_mappings(input_file: str, output_file: str): | |
| """ | |
| Extracts unique ID -> Name mappings for Skills, Domains, and Careers from mentor profiles. | |
| """ | |
| if not os.path.exists(input_file): | |
| print(f"Error: Input file {input_file} not found.") | |
| return | |
| print(f"Reading from {input_file}...") | |
| with open(input_file, 'r', encoding='utf-8') as f: | |
| mentors = json.load(f) | |
| mappings = { | |
| "skills": {}, | |
| "domains": {}, | |
| "careers": {} | |
| } | |
| print(f"Processing {len(mentors)} mentors...") | |
| for mentor in mentors: | |
| # Extract Career | |
| career = mentor.get("career") | |
| if isinstance(career, dict) and "id" in career and "name" in career: | |
| mappings["careers"][str(career["id"])] = career["name"] | |
| # Extract Skills | |
| skills = mentor.get("skills", []) | |
| for skill in skills: | |
| if isinstance(skill, dict) and "id" in skill and "name" in skill: | |
| mappings["skills"][str(skill["id"])] = skill["name"] | |
| # Extract Domains | |
| domains = mentor.get("domains", []) | |
| for domain in domains: | |
| if isinstance(domain, dict) and "id" in domain and "name" in domain: | |
| mappings["domains"][str(domain["id"])] = domain["name"] | |
| # Ensure output directory exists | |
| os.makedirs(os.path.dirname(output_file), exist_ok=True) | |
| print(f"Writing mappings to {output_file}...") | |
| print(f" - Found {len(mappings['careers'])} careers") | |
| print(f" - Found {len(mappings['skills'])} skills") | |
| print(f" - Found {len(mappings['domains'])} domains") | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| json.dump(mappings, f, ensure_ascii=False, indent=2) | |
| print("Done!") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Extract Master Data Mappings") | |
| parser.add_argument("input_file", help="Path to mentor profiles JSON file") | |
| # Default output is data/master_data.json relative to project root | |
| project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| default_output = os.path.join(project_root, "data", "master_data.json") | |
| parser.add_argument("--output", default=default_output, help="Path to output JSON file") | |
| args = parser.parse_args() | |
| extract_mappings(args.input_file, args.output) | |