File size: 4,647 Bytes
dfb3d07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import networkx as nx
import json
import re
from pathlib import Path
from typing import List, Dict, Optional

class SkillGraphEngine:
    def __init__(self, graph_path: str = "backend/knowledge_graph/skills_graph.json"):
        self.graph = nx.DiGraph()
        path = Path(graph_path)
        if path.exists():
            with open(path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                for node in data.get("nodes", []):
                    self.graph.add_node(
                        node["id"], 
                        label=node.get("label", node["id"]),
                        domain=node.get("domain", "unknown"),
                        level=node.get("level", "intermediate"),
                        tags=node.get("tags", [])
                    )
                for edge in data.get("edges", []):
                    self.graph.add_edge(
                        edge["source"], 
                        edge["target"], 
                        type=edge.get("type", "PREREQUISITE"),
                        weight=edge.get("weight", 1.0),
                        hop_cost=edge.get("hop_cost", 1)
                    )
                    if edge.get("type") == "COUSIN":
                        # Bidirectional
                        self.graph.add_edge(
                            edge["target"], 
                            edge["source"], 
                            type=edge.get("type", "COUSIN"),
                            weight=edge.get("weight", 1.0),
                            hop_cost=edge.get("hop_cost", 1)
                        )

    def find_shortest_path(self, source_skills: List[str], target_skill: str, max_hops: int = 4) -> Optional[List[str]]:
        best_path = None
        best_cost = float('inf')
        
        if not self.graph.has_node(target_skill):
            return None

        for source in source_skills:
            if not self.graph.has_node(source):
                continue
            try:
                path = nx.shortest_path(self.graph, source=source, target=target_skill, weight="hop_cost")
                if len(path) - 1 <= max_hops:
                    cost = sum(self.graph[path[i]][path[i+1]]['hop_cost'] for i in range(len(path)-1))
                    if cost < best_cost:
                        best_cost = cost
                        best_path = path
            except nx.NetworkXNoPath:
                continue
        return best_path

    def get_adjacent(self, skill_id: str, hops: int = 2) -> List[Dict]:
        if not self.graph.has_node(skill_id):
            return []
        lengths = nx.single_source_dijkstra_path_length(self.graph, skill_id, cutoff=hops, weight="hop_cost")
        paths = nx.single_source_dijkstra_path(self.graph, skill_id, cutoff=hops, weight="hop_cost")
        
        results = []
        for target, cost in lengths.items():
            if target == skill_id:
                continue
            path = paths[target]
            edge_types = [self.graph[path[i]][path[i+1]]['type'] for i in range(len(path)-1)]
            results.append({
                "skill": target,
                "path": path,
                "edge_types": edge_types,
                "total_cost": float(cost)
            })
        results.sort(key=lambda x: x["total_cost"])
        return results

    def get_prerequisites(self, skill_id: str) -> List[str]:
        if not self.graph.has_node(skill_id):
            return []
        return [p for p in self.graph.predecessors(skill_id) if self.graph[p][skill_id]['type'] == 'PREREQUISITE']

    def get_domain(self, skill_id: str) -> str:
        if not self.graph.has_node(skill_id):
            return "unknown"
        return self.graph.nodes[skill_id].get("domain", "unknown")

    def path_to_steps(self, path: List[str]) -> List[Dict]:
        steps = []
        for i in range(len(path) - 1):
            source = path[i]
            target = path[i+1]
            edge_type = self.graph[source][target]['type']
            weeks = 1.5
            if edge_type == 'COUSIN':
                weeks = 0.75
            elif edge_type == 'BRIDGES':
                weeks = 2.5
            steps.append({
                "from": source,
                "to": target,
                "edge_type": edge_type,
                "weeks": weeks
            })
        return steps

    def normalize_skill_id(self, label: str) -> str:
        return re.sub(r"[^a-z0-9]+", "_", label.lower()).strip("_")

_engine = None

def get_engine() -> SkillGraphEngine:
    global _engine
    if _engine is None:
        _engine = SkillGraphEngine()
    return _engine