\ import os import json import math import gradio as gr import networkx as nx from pyvis.network import Network DEFAULT_JSON = "job_position_skill_graph.json" CLUSTER_COLORS = { "programming": "#1f77b4", "databases": "#ff7f0e", "cloud": "#2ca02c", "devops": "#d62728", "version_control": "#9467bd", "data_processing": "#8c564b", "ml_ai": "#e377c2", "web_backend": "#7f7f7f", "web_frontend": "#bcbd22", "security": "#17becf", "networking": "#1b9e77", "mobile": "#d95f02", "analytics_bi": "#7570b3", "testing_qc": "#e7298a", "infra_sys": "#66a61e", "other": "#999999", } def _load_json(file_obj): if file_obj is not None: with open(file_obj.name, "r", encoding="utf-8") as f: return json.load(f) if os.path.exists(DEFAULT_JSON): with open(DEFAULT_JSON, "r", encoding="utf-8") as f: return json.load(f) raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.") def _normalize_schema(data): """ Accepts multiple schemas and converts to internal format: { "positions": [{"name": "...","skills": {"cluster":[{"name":"...","count":N}, ...], ...}}, ...], "edges": [{"source":"...","target":"...","weight":0.2,"shared_skills":[...]}] } Supported inputs: A) Internal format (pass-through) B) positions as a dict -> convert to list C) Top-level dict mapping position_name -> {"name": "...", "skills": {...}} or {"skills":[...]} D) Top-level dict mapping position_name -> {"skills": {"cluster":[...]} } (no edges) """ if not isinstance(data, dict): raise gr.Error("JSON root must be an object.") # Case A: already in internal format with positions as list if "positions" in data and isinstance(data["positions"], list): return data norm = {"positions": [], "edges": data.get("edges", []) if isinstance(data.get("edges", []), list) else []} # Case B: positions is a dict if "positions" in data and isinstance(data["positions"], dict): for pos_name, pos_val in data["positions"].items(): if isinstance(pos_val, dict): name = pos_val.get("name") or pos_name skills = pos_val.get("skills", {}) else: name = str(pos_name) skills = {} norm["positions"].append({"name": name, "skills": _coerce_skills(skills)}) return norm # Case C/D: top-level keys (excluding known keys) are positions excluded = {"positions", "edges"} candidates = {k: v for k, v in data.items() if k not in excluded} if candidates: for pos_name, pos_val in candidates.items(): if isinstance(pos_val, dict): name = pos_val.get("name") or pos_name skills = pos_val.get("skills", {}) elif isinstance(pos_val, list): # interpret as flat skills list -> put under "other" cluster with count=1 skills = {"other": [{"name": s, "count": 1} for s in pos_val]} name = pos_name else: name = pos_name skills = {} norm["positions"].append({"name": name, "skills": _coerce_skills(skills)}) return norm raise gr.Error("Unrecognized JSON schema. Include 'positions' or a mapping of position names.") def _coerce_skills(skills): """ Ensure skills structure is {cluster: [{"name":..., "count": int}, ...], ...} Accepts: - dict of cluster -> list of dicts with name/count - dict of cluster -> list of strings (count=1) - list of strings -> will be wrapped into {'other': [...]} """ if isinstance(skills, list): return {"other": [{"name": str(s), "count": 1} for s in skills]} if isinstance(skills, dict): out = {} for cl, items in skills.items(): if isinstance(items, list): norm_items = [] for it in items: if isinstance(it, dict): nm = str(it.get("name", "")).strip() if not nm: continue cnt = int(it.get("count", 1)) norm_items.append({"name": nm, "count": cnt}) else: nm = str(it).strip() if not nm: continue norm_items.append({"name": nm, "count": 1}) if norm_items: out[cl or "other"] = norm_items return out return {} def _aggregate_skill_totals(data): totals = {} for pos in data.get("positions", []): for cluster, items in (pos.get("skills") or {}).items(): for it in items: name, cnt = it.get("name"), int(it.get("count", 0)) if not name: continue if name not in totals: totals[name] = {"total": 0, "clusters": set()} totals[name]["total"] += cnt totals[name]["clusters"].add(cluster or "other") for k, v in totals.items(): clusters = list(v["clusters"]) v["cluster"] = clusters[0] if clusters else "other" return totals def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min): G = nx.Graph() for pos in data.get("positions", []): pos_name = pos.get("name") if not pos_name: continue total_skills = sum(len(v) for v in (pos.get("skills") or {}).values()) G.add_node( f"pos::{pos_name}", label=pos_name, kind="position", size=max(15, min(60, 10 + 2*total_skills)), color="#333333", title=f"{pos_name}
skills groups: {list((pos.get('skills') or {}).keys())}", ) skill_totals = _aggregate_skill_totals(data) for pos in data.get("positions", []): pos_name = pos.get("name") if not pos_name: continue flat = [] for cluster, items in (pos.get("skills") or {}).items(): for it in items: if int(it.get("count", 0)) >= min_skill_count: flat.append((cluster or "other", it["name"], int(it["count"]))) if top_k_per_position and top_k_per_position > 0: flat = sorted(flat, key=lambda x: -x[2])[: top_k_per_position] for cluster, skill, cnt in flat: node_id = f"skill::{skill}" if node_id not in G: total = skill_totals.get(skill, {}).get("total", cnt) node_size = max(8, min(50, 6 + math.sqrt(total)*2)) color = CLUSTER_COLORS.get(cluster, "#999999") G.add_node( node_id, label=skill, kind="skill", size=node_size, color=color, title=f"{skill}
cluster: {cluster}
total: {total}", ) G.add_edge( f"pos::{pos_name}", node_id, weight=cnt, title=f"{pos_name} → {skill}: {cnt}", ) if include_pos_pos_edges: for e in data.get("edges", []): if not isinstance(e, dict): continue w = float(e.get("weight", 0.0)) if w < pos_pos_weight_min: continue a = f"pos::{e.get('source')}" b = f"pos::{e.get('target')}" if a in G and b in G: G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}") return G def _nx_to_pyvis_html(G, physics, layout, height_px): net = Network( height=f"{height_px}px", width="100%", bgcolor="#ffffff", font_color="#222222", directed=False, notebook=False, ) if physics: net.force_atlas_2based() # PyVis expects pure JSON (no 'var options =') if layout == "hierarchical (positions → skills)": options = { "layout": { "hierarchical": { "enabled": True, "levelSeparation": 180, "nodeSpacing": 170, "treeSpacing": 200, "direction": "UD", "sortMethod": "hubsize" } }, "physics": {"enabled": bool(physics)} } else: options = { "physics": { "enabled": bool(physics), "stabilization": {"iterations": 150} } } import json as _json net.set_options(_json.dumps(options)) for n, data in G.nodes(data=True): net.add_node( n, label=data.get("label", n), color=data.get("color", "#97c2fc"), title=data.get("title", ""), size=data.get("size", 15), shape="dot" if data.get("kind") == "skill" else "ellipse", ) for u, v, edata in G.edges(data=True): net.add_edge(u, v, title=edata.get("title", ""), value=edata.get("weight", 1), color=edata.get("color")) return net.generate_html() def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px): data_raw = _load_json(json_file) data = _normalize_schema(data_raw) G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min) html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px) return html with gr.Blocks(title="Job Positions ↔ Hard Skills — Network Diagram") as demo: gr.Markdown("# Network Diagram: Positions ↔ Skills\\nUpload `job_position_skill_graph.json` or place it in the repo root.\\n- **Black ovals** = Job positions\\n- **Colored dots** = Skills (color by cluster)\\n- Edge weight = frequency of skill in that position") with gr.Row(): with gr.Column(scale=1): json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"]) min_skill_count = gr.Slider(0, 50, value=5, step=1, label="Minimum skill count per position (filter noise)") top_k_per_position = gr.Slider(0, 100, value=20, step=1, label="Top-K skills per position (0 = all)") include_pos_pos_edges = gr.Checkbox(value=False, label="Include position↔position similarity edges") pos_pos_weight_min = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Min similarity (if enabled)") physics = gr.Checkbox(value=True, label="Enable physics (force layout)") layout = gr.Dropdown(choices=["free (force layout)", "hierarchical (positions → skills)"], value="free (force layout)", label="Layout") height_px = gr.Slider(500, 1400, value=900, step=50, label="Canvas height (px)") btn = gr.Button("Build Network", variant="primary") with gr.Column(scale=1): out_html = gr.HTML(label="Interactive Network") btn.click( fn=run, inputs=[json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px], outputs=[out_html] ) if __name__ == "__main__": demo.launch()