Spaces:
Sleeping
Sleeping
| import networkx as nx | |
| import json | |
| import re | |
| from pathlib import Path | |
| from typing import List, Dict, Optional | |
| class SkillGraphEngine: | |
| def __init__(self, graph_path: str = "backend/knowledge_graph/skills_graph.json"): | |
| self.graph = nx.DiGraph() | |
| path = Path(graph_path) | |
| if path.exists(): | |
| with open(path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| for node in data.get("nodes", []): | |
| self.graph.add_node( | |
| node["id"], | |
| label=node.get("label", node["id"]), | |
| domain=node.get("domain", "unknown"), | |
| level=node.get("level", "intermediate"), | |
| tags=node.get("tags", []) | |
| ) | |
| for edge in data.get("edges", []): | |
| self.graph.add_edge( | |
| edge["source"], | |
| edge["target"], | |
| type=edge.get("type", "PREREQUISITE"), | |
| weight=edge.get("weight", 1.0), | |
| hop_cost=edge.get("hop_cost", 1) | |
| ) | |
| if edge.get("type") == "COUSIN": | |
| # Bidirectional | |
| self.graph.add_edge( | |
| edge["target"], | |
| edge["source"], | |
| type=edge.get("type", "COUSIN"), | |
| weight=edge.get("weight", 1.0), | |
| hop_cost=edge.get("hop_cost", 1) | |
| ) | |
| def find_shortest_path(self, source_skills: List[str], target_skill: str, max_hops: int = 4) -> Optional[List[str]]: | |
| best_path = None | |
| best_cost = float('inf') | |
| if not self.graph.has_node(target_skill): | |
| return None | |
| for source in source_skills: | |
| if not self.graph.has_node(source): | |
| continue | |
| try: | |
| path = nx.shortest_path(self.graph, source=source, target=target_skill, weight="hop_cost") | |
| if len(path) - 1 <= max_hops: | |
| cost = sum(self.graph[path[i]][path[i+1]]['hop_cost'] for i in range(len(path)-1)) | |
| if cost < best_cost: | |
| best_cost = cost | |
| best_path = path | |
| except nx.NetworkXNoPath: | |
| continue | |
| return best_path | |
| def get_adjacent(self, skill_id: str, hops: int = 2) -> List[Dict]: | |
| if not self.graph.has_node(skill_id): | |
| return [] | |
| lengths = nx.single_source_dijkstra_path_length(self.graph, skill_id, cutoff=hops, weight="hop_cost") | |
| paths = nx.single_source_dijkstra_path(self.graph, skill_id, cutoff=hops, weight="hop_cost") | |
| results = [] | |
| for target, cost in lengths.items(): | |
| if target == skill_id: | |
| continue | |
| path = paths[target] | |
| edge_types = [self.graph[path[i]][path[i+1]]['type'] for i in range(len(path)-1)] | |
| results.append({ | |
| "skill": target, | |
| "path": path, | |
| "edge_types": edge_types, | |
| "total_cost": float(cost) | |
| }) | |
| results.sort(key=lambda x: x["total_cost"]) | |
| return results | |
| def get_prerequisites(self, skill_id: str) -> List[str]: | |
| if not self.graph.has_node(skill_id): | |
| return [] | |
| return [p for p in self.graph.predecessors(skill_id) if self.graph[p][skill_id]['type'] == 'PREREQUISITE'] | |
| def get_domain(self, skill_id: str) -> str: | |
| if not self.graph.has_node(skill_id): | |
| return "unknown" | |
| return self.graph.nodes[skill_id].get("domain", "unknown") | |
| def path_to_steps(self, path: List[str]) -> List[Dict]: | |
| steps = [] | |
| for i in range(len(path) - 1): | |
| source = path[i] | |
| target = path[i+1] | |
| edge_type = self.graph[source][target]['type'] | |
| weeks = 1.5 | |
| if edge_type == 'COUSIN': | |
| weeks = 0.75 | |
| elif edge_type == 'BRIDGES': | |
| weeks = 2.5 | |
| steps.append({ | |
| "from": source, | |
| "to": target, | |
| "edge_type": edge_type, | |
| "weeks": weeks | |
| }) | |
| return steps | |
| def normalize_skill_id(self, label: str) -> str: | |
| return re.sub(r"[^a-z0-9]+", "_", label.lower()).strip("_") | |
| _engine = None | |
| def get_engine() -> SkillGraphEngine: | |
| global _engine | |
| if _engine is None: | |
| _engine = SkillGraphEngine() | |
| return _engine | |