NetworkChart / app.py
Nucha's picture
Upload 2 files
844f88a verified
raw
history blame
7.1 kB
\
import os
import json
import math
import gradio as gr
from pyvis.network import Network
DEFAULT_JSON = "job_position_skill_graph.json" # Put this file at repo root
# Color palette for clusters (fallback if more clusters appear)
CLUSTER_COLORS = [
"#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
"#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
]
def load_graph_json(json_file):
"""
Load JSON either from uploaded file or from DEFAULT_JSON if present.
Expected schema:
{
"positions": [{"name": "...","skills": {"cluster":[{"name":"skill","count":int},...]...}}],
"edges": [{"source":"...","target":"...","weight":float,"shared_skills":[...]}]
}
"""
if json_file is not None:
# gr.File may pass a tempfile path string or a file object
path = json_file.name if hasattr(json_file, "name") else json_file
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
if os.path.exists(DEFAULT_JSON):
with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
return json.load(f)
raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")
def infer_node_cluster_and_size(position, node_size_mode):
"""
Infer dominant cluster for coloring; compute base node size.
node_size_mode: 'skills-total' or 'skills-top10'
"""
skills_by_cluster = position.get("skills", {})
# Aggregate counts per cluster
cluster_scores = {}
total_skills_count = 0
for cl, items in skills_by_cluster.items():
s = sum(max(0, int(it.get("count", 0))) for it in items)
cluster_scores[cl] = s
total_skills_count += s
if not cluster_scores:
return ("other", 10)
# Dominant cluster
dominant = max(cluster_scores.items(), key=lambda x: x[1])[0]
if node_size_mode == "skills-top10":
# Sum only top 10 across clusters
acc = 0
for cl, items in skills_by_cluster.items():
for it in sorted(items, key=lambda x: -int(x.get("count", 0)))[:10]:
acc += int(it.get("count", 0))
size = acc
else:
size = total_skills_count
# Map size to a reasonable node size (10..60)
if size <= 0:
return (dominant, 10)
# sqrt scale to compress big ranges
scaled = 10 + min(50, 5 * math.sqrt(size))
return (dominant, scaled)
def build_tooltip(position, max_items_per_cluster=6):
"""
Build HTML tooltip listing top skills per cluster.
"""
name = position.get("name", "")
skills_by_cluster = position.get("skills", {})
parts = [f"<b>{name}</b>"]
for cl, items in skills_by_cluster.items():
if not items:
continue
top = sorted(items, key=lambda x: -int(x.get('count', 0)))[:max_items_per_cluster]
inner = ", ".join([f"{it.get('name','')} ({int(it.get('count',0))})" for it in top])
parts.append(f"<div><b>{cl}:</b> {inner}</div>")
return "<br/>".join(parts)
def render_network(json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout):
data = load_graph_json(json_file)
# Prepare pyvis network
net = Network(height="720px", width="100%", bgcolor="#ffffff", font_color="#111111", directed=False, cdn_resources="in_line")
# Physics options
if physics:
if layout == "Barnes-Hut":
net.barnes_hut()
else:
# ForceAtlas2Based may look nice for dense graphs
net.force_atlas_2based()
else:
net.set_options("""
var options = { physics: { enabled: false } };
""")
# Build cluster -> color map based on encountered clusters
cluster_names = []
for pos in data.get("positions", []):
for cl in (pos.get("skills") or {}).keys():
if cl not in cluster_names:
cluster_names.append(cl)
color_map = {}
for idx, cl in enumerate(cluster_names):
color_map[cl] = CLUSTER_COLORS[idx % len(CLUSTER_COLORS)]
color_map.setdefault("other", "#888888")
# Optional position name filter (substring, case-insensitive)
filter_position = (filter_position or "").strip().lower()
# Add nodes
node_ids = set()
for pos in data.get("positions", []):
name = pos.get("name", "")
if filter_position and filter_position not in name.lower():
continue
dominant_cluster, size = infer_node_cluster_and_size(pos, node_size_mode)
tooltip = build_tooltip(pos, max_items_per_cluster=max_items_per_cluster)
net.add_node(
n_id=name,
label=name if show_labels else "",
title=tooltip,
color=color_map.get(dominant_cluster, color_map["other"]),
size=size
)
node_ids.add(name)
# Add edges with threshold filter
kept_edges = 0
for e in data.get("edges", []):
w = float(e.get("weight", 0))
if w < float(min_edge_weight):
continue
src, tgt = e.get("source"), e.get("target")
if (src in node_ids) and (tgt in node_ids):
title = f"weight={w:.2f} | shared: {', '.join(e.get('shared_skills', [])[:10])}"
net.add_edge(src, tgt, value=w, title=title)
kept_edges += 1
# If graph ends up empty, hint the user
if len(node_ids) == 0:
html = "<h3>No nodes to show</h3><p>Loosen filters or upload a JSON.</p>"
return html
# Generate HTML
html = net.generate_html()
return html
with gr.Blocks(title="Job Position ↔ Hard Skills — Network") as demo:
gr.Markdown("# Job Position ↔ Hard Skills — Network Diagram\nUpload a JSON or place **job_position_skill_graph.json** in repo root.")
with gr.Row():
with gr.Column(scale=1):
json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
min_edge_weight = gr.Slider(0.0, 1.0, value=0.15, step=0.01, label="Min edge weight (Jaccard)")
show_labels = gr.Checkbox(value=True, label="Show node labels")
physics = gr.Checkbox(value=True, label="Enable physics layout")
layout = gr.Radio(choices=["Barnes-Hut", "ForceAtlas2Based"], value="ForceAtlas2Based", label="Layout algorithm")
node_size_mode = gr.Radio(choices=["skills-total", "skills-top10"], value="skills-total", label="Node size scale by")
max_items_per_cluster = gr.Slider(1, 20, value=6, step=1, label="Tooltip: max skills per cluster")
filter_position = gr.Textbox(value="", label="Filter by position name (substring)")
btn = gr.Button("Render", variant="primary")
with gr.Column(scale=1):
out_html = gr.HTML(label="Network")
btn.click(
fn=render_network,
inputs=[json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout],
outputs=[out_html]
)
if __name__ == "__main__":
demo.launch()