File size: 2,443 Bytes
1b7ef16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import json
import os
import argparse
from typing import Dict, Any

def extract_mappings(input_file: str, output_file: str):
    """
    Extracts unique ID -> Name mappings for Skills, Domains, and Careers from mentor profiles.
    """
    if not os.path.exists(input_file):
        print(f"Error: Input file {input_file} not found.")
        return

    print(f"Reading from {input_file}...")
    with open(input_file, 'r', encoding='utf-8') as f:
        mentors = json.load(f)

    mappings = {
        "skills": {},
        "domains": {},
        "careers": {}
    }

    print(f"Processing {len(mentors)} mentors...")
    
    for mentor in mentors:
        # Extract Career
        career = mentor.get("career")
        if isinstance(career, dict) and "id" in career and "name" in career:
            mappings["careers"][str(career["id"])] = career["name"]
        
        # Extract Skills
        skills = mentor.get("skills", [])
        for skill in skills:
            if isinstance(skill, dict) and "id" in skill and "name" in skill:
                mappings["skills"][str(skill["id"])] = skill["name"]

        # Extract Domains
        domains = mentor.get("domains", [])
        for domain in domains:
            if isinstance(domain, dict) and "id" in domain and "name" in domain:
                mappings["domains"][str(domain["id"])] = domain["name"]

    # Ensure output directory exists
    os.makedirs(os.path.dirname(output_file), exist_ok=True)

    print(f"Writing mappings to {output_file}...")
    print(f"  - Found {len(mappings['careers'])} careers")
    print(f"  - Found {len(mappings['skills'])} skills")
    print(f"  - Found {len(mappings['domains'])} domains")

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(mappings, f, ensure_ascii=False, indent=2)
    
    print("Done!")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Extract Master Data Mappings")
    parser.add_argument("input_file", help="Path to mentor profiles JSON file")
    
    # Default output is data/master_data.json relative to project root
    project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    default_output = os.path.join(project_root, "data", "master_data.json")
    
    parser.add_argument("--output", default=default_output, help="Path to output JSON file")
    
    args = parser.parse_args()
    extract_mappings(args.input_file, args.output)