Spaces:

Nucha
/

NetworkChart

Sleeping

App Files Files Community

NetworkChart / app.py

Nucha

Upload 2 files

1048569 verified 5 months ago

raw

history blame contribute delete

11.6 kB

	\
	import os
	import json
	import math
	import gradio as gr

	import networkx as nx
	from pyvis.network import Network

	DEFAULT_JSON = "job_position_skill_graph.json"

	CLUSTER_COLORS = {
	"programming": "#1f77b4",
	"databases": "#ff7f0e",
	"cloud": "#2ca02c",
	"devops": "#d62728",
	"version_control": "#9467bd",
	"data_processing": "#8c564b",
	"ml_ai": "#e377c2",
	"web_backend": "#7f7f7f",
	"web_frontend": "#bcbd22",
	"security": "#17becf",
	"networking": "#1b9e77",
	"mobile": "#d95f02",
	"analytics_bi": "#7570b3",
	"testing_qc": "#e7298a",
	"infra_sys": "#66a61e",
	"other": "#999999",
	}

	def _load_json(file_obj):
	if file_obj is not None:
	with open(file_obj.name, "r", encoding="utf-8") as f:
	return json.load(f)
	if os.path.exists(DEFAULT_JSON):
	with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
	return json.load(f)
	raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")

	def _normalize_schema(data):
	"""
	Accepts multiple schemas and converts to internal format:
	{
	"positions": [{"name": "...","skills": {"cluster":[{"name":"...","count":N}, ...], ...}}, ...],
	"edges": [{"source":"...","target":"...","weight":0.2,"shared_skills":[...]}]
	}
	Supported inputs:
	A) Internal format (pass-through)
	B) positions as a dict -> convert to list
	C) Top-level dict mapping position_name -> {"name": "...", "skills": {...}} or {"skills":[...]}
	D) Top-level dict mapping position_name -> {"skills": {"cluster":[...]} } (no edges)
	"""
	if not isinstance(data, dict):
	raise gr.Error("JSON root must be an object.")

	# Case A: already in internal format with positions as list
	if "positions" in data and isinstance(data["positions"], list):
	return data

	norm = {"positions": [], "edges": data.get("edges", []) if isinstance(data.get("edges", []), list) else []}

	# Case B: positions is a dict
	if "positions" in data and isinstance(data["positions"], dict):
	for pos_name, pos_val in data["positions"].items():
	if isinstance(pos_val, dict):
	name = pos_val.get("name") or pos_name
	skills = pos_val.get("skills", {})
	else:
	name = str(pos_name)
	skills = {}
	norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
	return norm

	# Case C/D: top-level keys (excluding known keys) are positions
	excluded = {"positions", "edges"}
	candidates = {k: v for k, v in data.items() if k not in excluded}
	if candidates:
	for pos_name, pos_val in candidates.items():
	if isinstance(pos_val, dict):
	name = pos_val.get("name") or pos_name
	skills = pos_val.get("skills", {})
	elif isinstance(pos_val, list):
	# interpret as flat skills list -> put under "other" cluster with count=1
	skills = {"other": [{"name": s, "count": 1} for s in pos_val]}
	name = pos_name
	else:
	name = pos_name
	skills = {}
	norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
	return norm

	raise gr.Error("Unrecognized JSON schema. Include 'positions' or a mapping of position names.")

	def _coerce_skills(skills):
	"""
	Ensure skills structure is {cluster: [{"name":..., "count": int}, ...], ...}
	Accepts:
	- dict of cluster -> list of dicts with name/count
	- dict of cluster -> list of strings (count=1)
	- list of strings -> will be wrapped into {'other': [...]}
	"""
	if isinstance(skills, list):
	return {"other": [{"name": str(s), "count": 1} for s in skills]}
	if isinstance(skills, dict):
	out = {}
	for cl, items in skills.items():
	if isinstance(items, list):
	norm_items = []
	for it in items:
	if isinstance(it, dict):
	nm = str(it.get("name", "")).strip()
	if not nm:
	continue
	cnt = int(it.get("count", 1))
	norm_items.append({"name": nm, "count": cnt})
	else:
	nm = str(it).strip()
	if not nm:
	continue
	norm_items.append({"name": nm, "count": 1})
	if norm_items:
	out[cl or "other"] = norm_items
	return out
	return {}

	def _aggregate_skill_totals(data):
	totals = {}
	for pos in data.get("positions", []):
	for cluster, items in (pos.get("skills") or {}).items():
	for it in items:
	name, cnt = it.get("name"), int(it.get("count", 0))
	if not name:
	continue
	if name not in totals:
	totals[name] = {"total": 0, "clusters": set()}
	totals[name]["total"] += cnt
	totals[name]["clusters"].add(cluster or "other")
	for k, v in totals.items():
	clusters = list(v["clusters"])
	v["cluster"] = clusters[0] if clusters else "other"
	return totals

	def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min):
	G = nx.Graph()

	for pos in data.get("positions", []):
	pos_name = pos.get("name")
	if not pos_name:
	continue
	total_skills = sum(len(v) for v in (pos.get("skills") or {}).values())
	G.add_node(
	f"pos::{pos_name}",
	label=pos_name,
	kind="position",
	size=max(15, min(60, 10 + 2*total_skills)),
	color="#333333",
	title=f"<b>{pos_name}</b><br/>skills groups: {list((pos.get('skills') or {}).keys())}",
	)

	skill_totals = _aggregate_skill_totals(data)

	for pos in data.get("positions", []):
	pos_name = pos.get("name")
	if not pos_name:
	continue
	flat = []
	for cluster, items in (pos.get("skills") or {}).items():
	for it in items:
	if int(it.get("count", 0)) >= min_skill_count:
	flat.append((cluster or "other", it["name"], int(it["count"])))
	if top_k_per_position and top_k_per_position > 0:
	flat = sorted(flat, key=lambda x: -x[2])[: top_k_per_position]

	for cluster, skill, cnt in flat:
	node_id = f"skill::{skill}"
	if node_id not in G:
	total = skill_totals.get(skill, {}).get("total", cnt)
	node_size = max(8, min(50, 6 + math.sqrt(total)*2))
	color = CLUSTER_COLORS.get(cluster, "#999999")
	G.add_node(
	node_id,
	label=skill,
	kind="skill",
	size=node_size,
	color=color,
	title=f"<b>{skill}</b><br/>cluster: {cluster}<br/>total: {total}",
	)
	G.add_edge(
	f"pos::{pos_name}",
	node_id,
	weight=cnt,
	title=f"{pos_name} → {skill}: {cnt}",
	)

	if include_pos_pos_edges:
	for e in data.get("edges", []):
	if not isinstance(e, dict):
	continue
	w = float(e.get("weight", 0.0))
	if w < pos_pos_weight_min:
	continue
	a = f"pos::{e.get('source')}"
	b = f"pos::{e.get('target')}"
	if a in G and b in G:
	G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
	return G

	def _nx_to_pyvis_html(G, physics, layout, height_px):
	net = Network(
	height=f"{height_px}px",
	width="100%",
	bgcolor="#ffffff",
	font_color="#222222",
	directed=False,
	notebook=False,
	)
	if physics:
	net.force_atlas_2based()

	# PyVis expects pure JSON (no 'var options =')
	if layout == "hierarchical (positions → skills)":
	options = {
	"layout": {
	"hierarchical": {
	"enabled": True,
	"levelSeparation": 180,
	"nodeSpacing": 170,
	"treeSpacing": 200,
	"direction": "UD",
	"sortMethod": "hubsize"
	}
	},
	"physics": {"enabled": bool(physics)}
	}
	else:
	options = {
	"physics": {
	"enabled": bool(physics),
	"stabilization": {"iterations": 150}
	}
	}
	import json as _json
	net.set_options(_json.dumps(options))

	for n, data in G.nodes(data=True):
	net.add_node(
	n,
	label=data.get("label", n),
	color=data.get("color", "#97c2fc"),
	title=data.get("title", ""),
	size=data.get("size", 15),
	shape="dot" if data.get("kind") == "skill" else "ellipse",
	)
	for u, v, edata in G.edges(data=True):
	net.add_edge(u, v, title=edata.get("title", ""), value=edata.get("weight", 1), color=edata.get("color"))

	return net.generate_html()

	def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
	data_raw = _load_json(json_file)
	data = _normalize_schema(data_raw)
	G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
	html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
	return html

	with gr.Blocks(title="Job Positions ↔ Hard Skills — Network Diagram") as demo:
	gr.Markdown("# Network Diagram: Positions ↔ Skills\\nUpload `job_position_skill_graph.json` or place it in the repo root.\\n- Black ovals = Job positions\\n- Colored dots = Skills (color by cluster)\\n- Edge weight = frequency of skill in that position")

	with gr.Row():
	with gr.Column(scale=1):
	json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
	min_skill_count = gr.Slider(0, 50, value=5, step=1, label="Minimum skill count per position (filter noise)")
	top_k_per_position = gr.Slider(0, 100, value=20, step=1, label="Top-K skills per position (0 = all)")
	include_pos_pos_edges = gr.Checkbox(value=False, label="Include position↔position similarity edges")
	pos_pos_weight_min = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Min similarity (if enabled)")
	physics = gr.Checkbox(value=True, label="Enable physics (force layout)")
	layout = gr.Dropdown(choices=["free (force layout)", "hierarchical (positions → skills)"], value="free (force layout)", label="Layout")
	height_px = gr.Slider(500, 1400, value=900, step=50, label="Canvas height (px)")
	btn = gr.Button("Build Network", variant="primary")
	with gr.Column(scale=1):
	out_html = gr.HTML(label="Interactive Network")

	btn.click(
	fn=run,
	inputs=[json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px],
	outputs=[out_html]
	)

	if __name__ == "__main__":
	demo.launch()