\
import os
import json
import math
import gradio as gr
import networkx as nx
from pyvis.network import Network
DEFAULT_JSON = "job_position_skill_graph.json"
CLUSTER_COLORS = {
"programming": "#1f77b4",
"databases": "#ff7f0e",
"cloud": "#2ca02c",
"devops": "#d62728",
"version_control": "#9467bd",
"data_processing": "#8c564b",
"ml_ai": "#e377c2",
"web_backend": "#7f7f7f",
"web_frontend": "#bcbd22",
"security": "#17becf",
"networking": "#1b9e77",
"mobile": "#d95f02",
"analytics_bi": "#7570b3",
"testing_qc": "#e7298a",
"infra_sys": "#66a61e",
"other": "#999999",
}
def _load_json(file_obj):
if file_obj is not None:
with open(file_obj.name, "r", encoding="utf-8") as f:
return json.load(f)
if os.path.exists(DEFAULT_JSON):
with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
return json.load(f)
raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")
def _normalize_schema(data):
"""
Accepts multiple schemas and converts to internal format:
{
"positions": [{"name": "...","skills": {"cluster":[{"name":"...","count":N}, ...], ...}}, ...],
"edges": [{"source":"...","target":"...","weight":0.2,"shared_skills":[...]}]
}
Supported inputs:
A) Internal format (pass-through)
B) positions as a dict -> convert to list
C) Top-level dict mapping position_name -> {"name": "...", "skills": {...}} or {"skills":[...]}
D) Top-level dict mapping position_name -> {"skills": {"cluster":[...]} } (no edges)
"""
if not isinstance(data, dict):
raise gr.Error("JSON root must be an object.")
# Case A: already in internal format with positions as list
if "positions" in data and isinstance(data["positions"], list):
return data
norm = {"positions": [], "edges": data.get("edges", []) if isinstance(data.get("edges", []), list) else []}
# Case B: positions is a dict
if "positions" in data and isinstance(data["positions"], dict):
for pos_name, pos_val in data["positions"].items():
if isinstance(pos_val, dict):
name = pos_val.get("name") or pos_name
skills = pos_val.get("skills", {})
else:
name = str(pos_name)
skills = {}
norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
return norm
# Case C/D: top-level keys (excluding known keys) are positions
excluded = {"positions", "edges"}
candidates = {k: v for k, v in data.items() if k not in excluded}
if candidates:
for pos_name, pos_val in candidates.items():
if isinstance(pos_val, dict):
name = pos_val.get("name") or pos_name
skills = pos_val.get("skills", {})
elif isinstance(pos_val, list):
# interpret as flat skills list -> put under "other" cluster with count=1
skills = {"other": [{"name": s, "count": 1} for s in pos_val]}
name = pos_name
else:
name = pos_name
skills = {}
norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
return norm
raise gr.Error("Unrecognized JSON schema. Include 'positions' or a mapping of position names.")
def _coerce_skills(skills):
"""
Ensure skills structure is {cluster: [{"name":..., "count": int}, ...], ...}
Accepts:
- dict of cluster -> list of dicts with name/count
- dict of cluster -> list of strings (count=1)
- list of strings -> will be wrapped into {'other': [...]}
"""
if isinstance(skills, list):
return {"other": [{"name": str(s), "count": 1} for s in skills]}
if isinstance(skills, dict):
out = {}
for cl, items in skills.items():
if isinstance(items, list):
norm_items = []
for it in items:
if isinstance(it, dict):
nm = str(it.get("name", "")).strip()
if not nm:
continue
cnt = int(it.get("count", 1))
norm_items.append({"name": nm, "count": cnt})
else:
nm = str(it).strip()
if not nm:
continue
norm_items.append({"name": nm, "count": 1})
if norm_items:
out[cl or "other"] = norm_items
return out
return {}
def _aggregate_skill_totals(data):
totals = {}
for pos in data.get("positions", []):
for cluster, items in (pos.get("skills") or {}).items():
for it in items:
name, cnt = it.get("name"), int(it.get("count", 0))
if not name:
continue
if name not in totals:
totals[name] = {"total": 0, "clusters": set()}
totals[name]["total"] += cnt
totals[name]["clusters"].add(cluster or "other")
for k, v in totals.items():
clusters = list(v["clusters"])
v["cluster"] = clusters[0] if clusters else "other"
return totals
def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min):
G = nx.Graph()
for pos in data.get("positions", []):
pos_name = pos.get("name")
if not pos_name:
continue
total_skills = sum(len(v) for v in (pos.get("skills") or {}).values())
G.add_node(
f"pos::{pos_name}",
label=pos_name,
kind="position",
size=max(15, min(60, 10 + 2*total_skills)),
color="#333333",
title=f"{pos_name}
skills groups: {list((pos.get('skills') or {}).keys())}",
)
skill_totals = _aggregate_skill_totals(data)
for pos in data.get("positions", []):
pos_name = pos.get("name")
if not pos_name:
continue
flat = []
for cluster, items in (pos.get("skills") or {}).items():
for it in items:
if int(it.get("count", 0)) >= min_skill_count:
flat.append((cluster or "other", it["name"], int(it["count"])))
if top_k_per_position and top_k_per_position > 0:
flat = sorted(flat, key=lambda x: -x[2])[: top_k_per_position]
for cluster, skill, cnt in flat:
node_id = f"skill::{skill}"
if node_id not in G:
total = skill_totals.get(skill, {}).get("total", cnt)
node_size = max(8, min(50, 6 + math.sqrt(total)*2))
color = CLUSTER_COLORS.get(cluster, "#999999")
G.add_node(
node_id,
label=skill,
kind="skill",
size=node_size,
color=color,
title=f"{skill}
cluster: {cluster}
total: {total}",
)
G.add_edge(
f"pos::{pos_name}",
node_id,
weight=cnt,
title=f"{pos_name} → {skill}: {cnt}",
)
if include_pos_pos_edges:
for e in data.get("edges", []):
if not isinstance(e, dict):
continue
w = float(e.get("weight", 0.0))
if w < pos_pos_weight_min:
continue
a = f"pos::{e.get('source')}"
b = f"pos::{e.get('target')}"
if a in G and b in G:
G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
return G
def _nx_to_pyvis_html(G, physics, layout, height_px):
net = Network(
height=f"{height_px}px",
width="100%",
bgcolor="#ffffff",
font_color="#222222",
directed=False,
notebook=False,
)
if physics:
net.force_atlas_2based()
# PyVis expects pure JSON (no 'var options =')
if layout == "hierarchical (positions → skills)":
options = {
"layout": {
"hierarchical": {
"enabled": True,
"levelSeparation": 180,
"nodeSpacing": 170,
"treeSpacing": 200,
"direction": "UD",
"sortMethod": "hubsize"
}
},
"physics": {"enabled": bool(physics)}
}
else:
options = {
"physics": {
"enabled": bool(physics),
"stabilization": {"iterations": 150}
}
}
import json as _json
net.set_options(_json.dumps(options))
for n, data in G.nodes(data=True):
net.add_node(
n,
label=data.get("label", n),
color=data.get("color", "#97c2fc"),
title=data.get("title", ""),
size=data.get("size", 15),
shape="dot" if data.get("kind") == "skill" else "ellipse",
)
for u, v, edata in G.edges(data=True):
net.add_edge(u, v, title=edata.get("title", ""), value=edata.get("weight", 1), color=edata.get("color"))
return net.generate_html()
def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
data_raw = _load_json(json_file)
data = _normalize_schema(data_raw)
G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
return html
with gr.Blocks(title="Job Positions ↔ Hard Skills — Network Diagram") as demo:
gr.Markdown("# Network Diagram: Positions ↔ Skills\\nUpload `job_position_skill_graph.json` or place it in the repo root.\\n- **Black ovals** = Job positions\\n- **Colored dots** = Skills (color by cluster)\\n- Edge weight = frequency of skill in that position")
with gr.Row():
with gr.Column(scale=1):
json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
min_skill_count = gr.Slider(0, 50, value=5, step=1, label="Minimum skill count per position (filter noise)")
top_k_per_position = gr.Slider(0, 100, value=20, step=1, label="Top-K skills per position (0 = all)")
include_pos_pos_edges = gr.Checkbox(value=False, label="Include position↔position similarity edges")
pos_pos_weight_min = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Min similarity (if enabled)")
physics = gr.Checkbox(value=True, label="Enable physics (force layout)")
layout = gr.Dropdown(choices=["free (force layout)", "hierarchical (positions → skills)"], value="free (force layout)", label="Layout")
height_px = gr.Slider(500, 1400, value=900, step=50, label="Canvas height (px)")
btn = gr.Button("Build Network", variant="primary")
with gr.Column(scale=1):
out_html = gr.HTML(label="Interactive Network")
btn.click(
fn=run,
inputs=[json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px],
outputs=[out_html]
)
if __name__ == "__main__":
demo.launch()