mentorme858 / scripts /extract_mappings.py
Nguyễn Thanh Tùng
Improve prompt with semantic names
1b7ef16
import json
import os
import argparse
from typing import Dict, Any
def extract_mappings(input_file: str, output_file: str):
"""
Extracts unique ID -> Name mappings for Skills, Domains, and Careers from mentor profiles.
"""
if not os.path.exists(input_file):
print(f"Error: Input file {input_file} not found.")
return
print(f"Reading from {input_file}...")
with open(input_file, 'r', encoding='utf-8') as f:
mentors = json.load(f)
mappings = {
"skills": {},
"domains": {},
"careers": {}
}
print(f"Processing {len(mentors)} mentors...")
for mentor in mentors:
# Extract Career
career = mentor.get("career")
if isinstance(career, dict) and "id" in career and "name" in career:
mappings["careers"][str(career["id"])] = career["name"]
# Extract Skills
skills = mentor.get("skills", [])
for skill in skills:
if isinstance(skill, dict) and "id" in skill and "name" in skill:
mappings["skills"][str(skill["id"])] = skill["name"]
# Extract Domains
domains = mentor.get("domains", [])
for domain in domains:
if isinstance(domain, dict) and "id" in domain and "name" in domain:
mappings["domains"][str(domain["id"])] = domain["name"]
# Ensure output directory exists
os.makedirs(os.path.dirname(output_file), exist_ok=True)
print(f"Writing mappings to {output_file}...")
print(f" - Found {len(mappings['careers'])} careers")
print(f" - Found {len(mappings['skills'])} skills")
print(f" - Found {len(mappings['domains'])} domains")
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(mappings, f, ensure_ascii=False, indent=2)
print("Done!")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Extract Master Data Mappings")
parser.add_argument("input_file", help="Path to mentor profiles JSON file")
# Default output is data/master_data.json relative to project root
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
default_output = os.path.join(project_root, "data", "master_data.json")
parser.add_argument("--output", default=default_output, help="Path to output JSON file")
args = parser.parse_args()
extract_mappings(args.input_file, args.output)