Spaces:
Sleeping
Sleeping
| \ | |
| import os | |
| import json | |
| import math | |
| import gradio as gr | |
| import networkx as nx | |
| from pyvis.network import Network | |
| DEFAULT_JSON = "job_position_skill_graph.json" | |
| CLUSTER_COLORS = { | |
| "programming": "#1f77b4", | |
| "databases": "#ff7f0e", | |
| "cloud": "#2ca02c", | |
| "devops": "#d62728", | |
| "version_control": "#9467bd", | |
| "data_processing": "#8c564b", | |
| "ml_ai": "#e377c2", | |
| "web_backend": "#7f7f7f", | |
| "web_frontend": "#bcbd22", | |
| "security": "#17becf", | |
| "networking": "#1b9e77", | |
| "mobile": "#d95f02", | |
| "analytics_bi": "#7570b3", | |
| "testing_qc": "#e7298a", | |
| "infra_sys": "#66a61e", | |
| "other": "#999999", | |
| } | |
| def _load_json(file_obj): | |
| if file_obj is not None: | |
| with open(file_obj.name, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| if os.path.exists(DEFAULT_JSON): | |
| with open(DEFAULT_JSON, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.") | |
| def _normalize_schema(data): | |
| """ | |
| Accepts multiple schemas and converts to internal format: | |
| { | |
| "positions": [{"name": "...","skills": {"cluster":[{"name":"...","count":N}, ...], ...}}, ...], | |
| "edges": [{"source":"...","target":"...","weight":0.2,"shared_skills":[...]}] | |
| } | |
| Supported inputs: | |
| A) Internal format (pass-through) | |
| B) positions as a dict -> convert to list | |
| C) Top-level dict mapping position_name -> {"name": "...", "skills": {...}} or {"skills":[...]} | |
| D) Top-level dict mapping position_name -> {"skills": {"cluster":[...]} } (no edges) | |
| """ | |
| if not isinstance(data, dict): | |
| raise gr.Error("JSON root must be an object.") | |
| # Case A: already in internal format with positions as list | |
| if "positions" in data and isinstance(data["positions"], list): | |
| return data | |
| norm = {"positions": [], "edges": data.get("edges", []) if isinstance(data.get("edges", []), list) else []} | |
| # Case B: positions is a dict | |
| if "positions" in data and isinstance(data["positions"], dict): | |
| for pos_name, pos_val in data["positions"].items(): | |
| if isinstance(pos_val, dict): | |
| name = pos_val.get("name") or pos_name | |
| skills = pos_val.get("skills", {}) | |
| else: | |
| name = str(pos_name) | |
| skills = {} | |
| norm["positions"].append({"name": name, "skills": _coerce_skills(skills)}) | |
| return norm | |
| # Case C/D: top-level keys (excluding known keys) are positions | |
| excluded = {"positions", "edges"} | |
| candidates = {k: v for k, v in data.items() if k not in excluded} | |
| if candidates: | |
| for pos_name, pos_val in candidates.items(): | |
| if isinstance(pos_val, dict): | |
| name = pos_val.get("name") or pos_name | |
| skills = pos_val.get("skills", {}) | |
| elif isinstance(pos_val, list): | |
| # interpret as flat skills list -> put under "other" cluster with count=1 | |
| skills = {"other": [{"name": s, "count": 1} for s in pos_val]} | |
| name = pos_name | |
| else: | |
| name = pos_name | |
| skills = {} | |
| norm["positions"].append({"name": name, "skills": _coerce_skills(skills)}) | |
| return norm | |
| raise gr.Error("Unrecognized JSON schema. Include 'positions' or a mapping of position names.") | |
| def _coerce_skills(skills): | |
| """ | |
| Ensure skills structure is {cluster: [{"name":..., "count": int}, ...], ...} | |
| Accepts: | |
| - dict of cluster -> list of dicts with name/count | |
| - dict of cluster -> list of strings (count=1) | |
| - list of strings -> will be wrapped into {'other': [...]} | |
| """ | |
| if isinstance(skills, list): | |
| return {"other": [{"name": str(s), "count": 1} for s in skills]} | |
| if isinstance(skills, dict): | |
| out = {} | |
| for cl, items in skills.items(): | |
| if isinstance(items, list): | |
| norm_items = [] | |
| for it in items: | |
| if isinstance(it, dict): | |
| nm = str(it.get("name", "")).strip() | |
| if not nm: | |
| continue | |
| cnt = int(it.get("count", 1)) | |
| norm_items.append({"name": nm, "count": cnt}) | |
| else: | |
| nm = str(it).strip() | |
| if not nm: | |
| continue | |
| norm_items.append({"name": nm, "count": 1}) | |
| if norm_items: | |
| out[cl or "other"] = norm_items | |
| return out | |
| return {} | |
| def _aggregate_skill_totals(data): | |
| totals = {} | |
| for pos in data.get("positions", []): | |
| for cluster, items in (pos.get("skills") or {}).items(): | |
| for it in items: | |
| name, cnt = it.get("name"), int(it.get("count", 0)) | |
| if not name: | |
| continue | |
| if name not in totals: | |
| totals[name] = {"total": 0, "clusters": set()} | |
| totals[name]["total"] += cnt | |
| totals[name]["clusters"].add(cluster or "other") | |
| for k, v in totals.items(): | |
| clusters = list(v["clusters"]) | |
| v["cluster"] = clusters[0] if clusters else "other" | |
| return totals | |
| def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min): | |
| G = nx.Graph() | |
| for pos in data.get("positions", []): | |
| pos_name = pos.get("name") | |
| if not pos_name: | |
| continue | |
| total_skills = sum(len(v) for v in (pos.get("skills") or {}).values()) | |
| G.add_node( | |
| f"pos::{pos_name}", | |
| label=pos_name, | |
| kind="position", | |
| size=max(15, min(60, 10 + 2*total_skills)), | |
| color="#333333", | |
| title=f"<b>{pos_name}</b><br/>skills groups: {list((pos.get('skills') or {}).keys())}", | |
| ) | |
| skill_totals = _aggregate_skill_totals(data) | |
| for pos in data.get("positions", []): | |
| pos_name = pos.get("name") | |
| if not pos_name: | |
| continue | |
| flat = [] | |
| for cluster, items in (pos.get("skills") or {}).items(): | |
| for it in items: | |
| if int(it.get("count", 0)) >= min_skill_count: | |
| flat.append((cluster or "other", it["name"], int(it["count"]))) | |
| if top_k_per_position and top_k_per_position > 0: | |
| flat = sorted(flat, key=lambda x: -x[2])[: top_k_per_position] | |
| for cluster, skill, cnt in flat: | |
| node_id = f"skill::{skill}" | |
| if node_id not in G: | |
| total = skill_totals.get(skill, {}).get("total", cnt) | |
| node_size = max(8, min(50, 6 + math.sqrt(total)*2)) | |
| color = CLUSTER_COLORS.get(cluster, "#999999") | |
| G.add_node( | |
| node_id, | |
| label=skill, | |
| kind="skill", | |
| size=node_size, | |
| color=color, | |
| title=f"<b>{skill}</b><br/>cluster: {cluster}<br/>total: {total}", | |
| ) | |
| G.add_edge( | |
| f"pos::{pos_name}", | |
| node_id, | |
| weight=cnt, | |
| title=f"{pos_name} → {skill}: {cnt}", | |
| ) | |
| if include_pos_pos_edges: | |
| for e in data.get("edges", []): | |
| if not isinstance(e, dict): | |
| continue | |
| w = float(e.get("weight", 0.0)) | |
| if w < pos_pos_weight_min: | |
| continue | |
| a = f"pos::{e.get('source')}" | |
| b = f"pos::{e.get('target')}" | |
| if a in G and b in G: | |
| G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}") | |
| return G | |
| def _nx_to_pyvis_html(G, physics, layout, height_px): | |
| net = Network( | |
| height=f"{height_px}px", | |
| width="100%", | |
| bgcolor="#ffffff", | |
| font_color="#222222", | |
| directed=False, | |
| notebook=False, | |
| ) | |
| if physics: | |
| net.force_atlas_2based() | |
| # PyVis expects pure JSON (no 'var options =') | |
| if layout == "hierarchical (positions → skills)": | |
| options = { | |
| "layout": { | |
| "hierarchical": { | |
| "enabled": True, | |
| "levelSeparation": 180, | |
| "nodeSpacing": 170, | |
| "treeSpacing": 200, | |
| "direction": "UD", | |
| "sortMethod": "hubsize" | |
| } | |
| }, | |
| "physics": {"enabled": bool(physics)} | |
| } | |
| else: | |
| options = { | |
| "physics": { | |
| "enabled": bool(physics), | |
| "stabilization": {"iterations": 150} | |
| } | |
| } | |
| import json as _json | |
| net.set_options(_json.dumps(options)) | |
| for n, data in G.nodes(data=True): | |
| net.add_node( | |
| n, | |
| label=data.get("label", n), | |
| color=data.get("color", "#97c2fc"), | |
| title=data.get("title", ""), | |
| size=data.get("size", 15), | |
| shape="dot" if data.get("kind") == "skill" else "ellipse", | |
| ) | |
| for u, v, edata in G.edges(data=True): | |
| net.add_edge(u, v, title=edata.get("title", ""), value=edata.get("weight", 1), color=edata.get("color")) | |
| return net.generate_html() | |
| def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px): | |
| data_raw = _load_json(json_file) | |
| data = _normalize_schema(data_raw) | |
| G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min) | |
| html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px) | |
| return html | |
| with gr.Blocks(title="Job Positions ↔ Hard Skills — Network Diagram") as demo: | |
| gr.Markdown("# Network Diagram: Positions ↔ Skills\\nUpload `job_position_skill_graph.json` or place it in the repo root.\\n- **Black ovals** = Job positions\\n- **Colored dots** = Skills (color by cluster)\\n- Edge weight = frequency of skill in that position") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"]) | |
| min_skill_count = gr.Slider(0, 50, value=5, step=1, label="Minimum skill count per position (filter noise)") | |
| top_k_per_position = gr.Slider(0, 100, value=20, step=1, label="Top-K skills per position (0 = all)") | |
| include_pos_pos_edges = gr.Checkbox(value=False, label="Include position↔position similarity edges") | |
| pos_pos_weight_min = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Min similarity (if enabled)") | |
| physics = gr.Checkbox(value=True, label="Enable physics (force layout)") | |
| layout = gr.Dropdown(choices=["free (force layout)", "hierarchical (positions → skills)"], value="free (force layout)", label="Layout") | |
| height_px = gr.Slider(500, 1400, value=900, step=50, label="Canvas height (px)") | |
| btn = gr.Button("Build Network", variant="primary") | |
| with gr.Column(scale=1): | |
| out_html = gr.HTML(label="Interactive Network") | |
| btn.click( | |
| fn=run, | |
| inputs=[json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px], | |
| outputs=[out_html] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |