Spaces:

Nucha
/

NetworkChart

Sleeping

App Files Files Community

NetworkChart / app.py

Nucha

Upload 2 files

844f88a verified 8 months ago

raw

history blame

7.1 kB

	\
	import os
	import json
	import math
	import gradio as gr
	from pyvis.network import Network

	DEFAULT_JSON = "job_position_skill_graph.json" # Put this file at repo root

	# Color palette for clusters (fallback if more clusters appear)
	CLUSTER_COLORS = [
	"#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
	"#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
	]

	def load_graph_json(json_file):
	"""
	Load JSON either from uploaded file or from DEFAULT_JSON if present.
	Expected schema:
	{
	"positions": [{"name": "...","skills": {"cluster":[{"name":"skill","count":int},...]...}}],
	"edges": [{"source":"...","target":"...","weight":float,"shared_skills":[...]}]
	}
	"""
	if json_file is not None:
	# gr.File may pass a tempfile path string or a file object
	path = json_file.name if hasattr(json_file, "name") else json_file
	with open(path, "r", encoding="utf-8") as f:
	return json.load(f)
	if os.path.exists(DEFAULT_JSON):
	with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
	return json.load(f)
	raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")

	def infer_node_cluster_and_size(position, node_size_mode):
	"""
	Infer dominant cluster for coloring; compute base node size.
	node_size_mode: 'skills-total' or 'skills-top10'
	"""
	skills_by_cluster = position.get("skills", {})
	# Aggregate counts per cluster
	cluster_scores = {}
	total_skills_count = 0
	for cl, items in skills_by_cluster.items():
	s = sum(max(0, int(it.get("count", 0))) for it in items)
	cluster_scores[cl] = s
	total_skills_count += s
	if not cluster_scores:
	return ("other", 10)

	# Dominant cluster
	dominant = max(cluster_scores.items(), key=lambda x: x[1])[0]

	if node_size_mode == "skills-top10":
	# Sum only top 10 across clusters
	acc = 0
	for cl, items in skills_by_cluster.items():
	for it in sorted(items, key=lambda x: -int(x.get("count", 0)))[:10]:
	acc += int(it.get("count", 0))
	size = acc
	else:
	size = total_skills_count

	# Map size to a reasonable node size (10..60)
	if size <= 0:
	return (dominant, 10)
	# sqrt scale to compress big ranges
	scaled = 10 + min(50, 5 * math.sqrt(size))
	return (dominant, scaled)

	def build_tooltip(position, max_items_per_cluster=6):
	"""
	Build HTML tooltip listing top skills per cluster.
	"""
	name = position.get("name", "")
	skills_by_cluster = position.get("skills", {})
	parts = [f"<b>{name}</b>"]
	for cl, items in skills_by_cluster.items():
	if not items:
	continue
	top = sorted(items, key=lambda x: -int(x.get('count', 0)))[:max_items_per_cluster]
	inner = ", ".join([f"{it.get('name','')} ({int(it.get('count',0))})" for it in top])
	parts.append(f"<div><b>{cl}:</b> {inner}</div>")
	return "<br/>".join(parts)

	def render_network(json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout):
	data = load_graph_json(json_file)

	# Prepare pyvis network
	net = Network(height="720px", width="100%", bgcolor="#ffffff", font_color="#111111", directed=False, cdn_resources="in_line")
	# Physics options
	if physics:
	if layout == "Barnes-Hut":
	net.barnes_hut()
	else:
	# ForceAtlas2Based may look nice for dense graphs
	net.force_atlas_2based()
	else:
	net.set_options("""
	var options = { physics: { enabled: false } };
	""")

	# Build cluster -> color map based on encountered clusters
	cluster_names = []
	for pos in data.get("positions", []):
	for cl in (pos.get("skills") or {}).keys():
	if cl not in cluster_names:
	cluster_names.append(cl)
	color_map = {}
	for idx, cl in enumerate(cluster_names):
	color_map[cl] = CLUSTER_COLORS[idx % len(CLUSTER_COLORS)]
	color_map.setdefault("other", "#888888")

	# Optional position name filter (substring, case-insensitive)
	filter_position = (filter_position or "").strip().lower()

	# Add nodes
	node_ids = set()
	for pos in data.get("positions", []):
	name = pos.get("name", "")
	if filter_position and filter_position not in name.lower():
	continue
	dominant_cluster, size = infer_node_cluster_and_size(pos, node_size_mode)
	tooltip = build_tooltip(pos, max_items_per_cluster=max_items_per_cluster)
	net.add_node(
	n_id=name,
	label=name if show_labels else "",
	title=tooltip,
	color=color_map.get(dominant_cluster, color_map["other"]),
	size=size
	)
	node_ids.add(name)

	# Add edges with threshold filter
	kept_edges = 0
	for e in data.get("edges", []):
	w = float(e.get("weight", 0))
	if w < float(min_edge_weight):
	continue
	src, tgt = e.get("source"), e.get("target")
	if (src in node_ids) and (tgt in node_ids):
	title = f"weight={w:.2f} \| shared: {', '.join(e.get('shared_skills', [])[:10])}"
	net.add_edge(src, tgt, value=w, title=title)
	kept_edges += 1

	# If graph ends up empty, hint the user
	if len(node_ids) == 0:
	html = "<h3>No nodes to show</h3><p>Loosen filters or upload a JSON.</p>"
	return html

	# Generate HTML
	html = net.generate_html()
	return html

	with gr.Blocks(title="Job Position ↔ Hard Skills — Network") as demo:
	gr.Markdown("# Job Position ↔ Hard Skills — Network Diagram\nUpload a JSON or place job_position_skill_graph.json in repo root.")

	with gr.Row():
	with gr.Column(scale=1):
	json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
	min_edge_weight = gr.Slider(0.0, 1.0, value=0.15, step=0.01, label="Min edge weight (Jaccard)")
	show_labels = gr.Checkbox(value=True, label="Show node labels")
	physics = gr.Checkbox(value=True, label="Enable physics layout")
	layout = gr.Radio(choices=["Barnes-Hut", "ForceAtlas2Based"], value="ForceAtlas2Based", label="Layout algorithm")
	node_size_mode = gr.Radio(choices=["skills-total", "skills-top10"], value="skills-total", label="Node size scale by")
	max_items_per_cluster = gr.Slider(1, 20, value=6, step=1, label="Tooltip: max skills per cluster")
	filter_position = gr.Textbox(value="", label="Filter by position name (substring)")
	btn = gr.Button("Render", variant="primary")
	with gr.Column(scale=1):
	out_html = gr.HTML(label="Network")

	btn.click(
	fn=render_network,
	inputs=[json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout],
	outputs=[out_html]
	)

	if __name__ == "__main__":
	demo.launch()