NetworkChart / app.py
Nucha's picture
Upload 2 files
dfba298 verified
raw
history blame
7.66 kB
\
import os
import json
import uuid # <-- FIX: added
from typing import Dict, Any, List, Tuple
import gradio as gr
from pyvis.network import Network
DEFAULT_JSON = "job_skill_network.json"
def _load_graph(file_obj) -> Dict[str, Any]:
if file_obj is not None:
with open(file_obj.name if hasattr(file_obj, "name") else file_obj, "r", encoding="utf-8") as f:
return json.load(f)
if os.path.exists(DEFAULT_JSON):
with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
return json.load(f)
raise gr.Error("No JSON provided and default file not found. Please upload job_skill_network.json.")
def _split_nodes(nodes: List[Dict[str, Any]]):
jobs = [n for n in nodes if str(n.get("type","")).lower() == "job"]
skills = [n for n in nodes if str(n.get("type","")).lower() == "skill"]
return jobs, skills
def _index_nodes(nodes: List[Dict[str, Any]]):
return {n["id"]: n for n in nodes}
def _filter_graph(graph: Dict[str, Any],
include_requires: bool,
include_similar: bool,
min_weight: int,
top_n_jobs: int,
keep_outside_similar: bool,
include_job_nodes: bool,
include_skill_nodes: bool):
nodes = graph.get("nodes", [])
edges = graph.get("edges", [])
jobs, skills = _split_nodes(nodes)
def _postings(n):
try:
return int(n.get("postings", 0))
except Exception:
return 0
jobs_sorted = sorted(jobs, key=_postings, reverse=True)
selected_job_ids = set([n["id"] for n in jobs_sorted[:max(1, int(top_n_jobs))]]) if include_job_nodes else set()
selected_edges = []
for e in edges:
et = str(e.get("type","")).lower()
if et == "requires" and include_requires and int(e.get("weight", 1)) >= int(min_weight):
if include_job_nodes or include_skill_nodes:
selected_edges.append(e)
elif et == "similar" and include_similar and int(e.get("weight", 1)) >= int(min_weight):
selected_edges.append(e)
node_ids = set()
for e in selected_edges:
s, t, et = e.get("source"), e.get("target"), str(e.get("type","")).lower()
if et == "requires" and selected_job_ids:
if (s in selected_job_ids) or (t in selected_job_ids):
node_ids.update([s, t])
else:
node_ids.update([s, t])
if include_similar and keep_outside_similar and selected_job_ids:
for e in selected_edges:
if str(e.get("type","")).lower() != "similar":
continue
s, t = e.get("source"), e.get("target")
if (s in selected_job_ids) or (t in selected_job_ids):
node_ids.update([s, t])
node_map = _index_nodes(nodes)
final_nodes = []
for nid in list(node_ids):
n = node_map.get(nid)
if not n:
continue
ntype = str(n.get("type","")).lower()
if (ntype == "job" and include_job_nodes) or (ntype == "skill" and include_skill_nodes):
final_nodes.append(n)
final_ids = set(n["id"] for n in final_nodes)
final_edges = [e for e in selected_edges if e.get("source") in final_ids and e.get("target") in final_ids]
return final_nodes, final_edges
def _build_pyvis_html(nodes, edges, physics: bool, hierarchical: bool):
net = Network(height="720px", width="100%", directed=False, notebook=False)
net.barnes_hut()
for n in nodes:
nid = n["id"]
label = str(n.get("label", nid))
ntype = str(n.get("type","")).lower()
title = f"{ntype.upper()} | {label}"
size = 12
shape = "dot"
if ntype == "job":
size = 18 + int(n.get("postings", 0)) * 0.1
shape = "ellipse"
elif ntype == "skill":
size = 8
net.add_node(nid, label=label, title=title, group=ntype, shape=shape, value=size)
for e in edges:
s, t = e.get("source"), e.get("target")
et = str(e.get("type",""))
weight = int(e.get("weight", 1))
title = f"{et} (w={weight})"
net.add_edge(s, t, title=title, value=weight)
options = {
"physics": {"enabled": bool(physics)},
"interaction": {"hover": True, "multiselect": True, "dragNodes": True},
"nodes": {"font": {"size": 14}},
"edges": {"smooth": {"type": "dynamic"}}
}
if hierarchical:
options["layout"] = {
"hierarchical": {
"enabled": True,
"levelSeparation": 120,
"nodeSpacing": 120,
"treeSpacing": 180,
"direction": "UD",
"sortMethod": "hubsize"
}
}
options["physics"]["enabled"] = False
import json as _json
net.set_options(_json.dumps(options))
return net.generate_html()
def build_network(
json_file,
include_requires,
include_similar,
min_weight,
top_n_jobs,
keep_outside_similar,
include_job_nodes,
include_skill_nodes,
physics,
hierarchical
):
graph = _load_graph(json_file)
nodes, edges = _filter_graph(graph, include_requires, include_similar, int(min_weight), int(top_n_jobs),
bool(keep_outside_similar), bool(include_job_nodes), bool(include_skill_nodes))
if not nodes or not edges:
raise gr.Error("No nodes/edges remain after filtering. Try lowering the filter or including more edge types.")
html = _build_pyvis_html(nodes, edges, physics, hierarchical)
out_name = f"network_{uuid.uuid4().hex[:8]}.html"
with open(out_name, "w", encoding="utf-8") as f:
f.write(html)
return gr.update(value=html), out_name
with gr.Blocks(title="Job ↔ Hard Skill Network") as demo:
gr.Markdown("# Job ↔ Hard Skill Network Diagram\nUpload `job_skill_network.json` or place it at repo root.")
with gr.Row():
with gr.Column(scale=1):
json_file = gr.File(label="Upload JSON (optional)", file_count="single", file_types=[".json"])
gr.Markdown("### Include Edge Types")
include_requires = gr.Checkbox(value=True, label="Job–Skill edges (type='requires')")
include_similar = gr.Checkbox(value=True, label="Job–Job edges (type='similar')")
gr.Markdown("### Filters")
min_weight = gr.Slider(0, 50, value=3, step=1, label="Minimum edge weight")
top_n_jobs = gr.Slider(1, 100, value=30, step=1, label="Top-N job nodes by postings")
keep_outside_similar = gr.Checkbox(value=True, label="Include similar jobs outside Top-N")
gr.Markdown("### Node Types")
include_job_nodes = gr.Checkbox(value=True, label="Include job nodes")
include_skill_nodes = gr.Checkbox(value=True, label="Include skill nodes")
gr.Markdown("### Layout")
physics = gr.Checkbox(value=True, label="Enable physics")
hierarchical = gr.Checkbox(value=False, label="Hierarchical layout (good for Job→Skill)")
btn = gr.Button("Build Network", variant="primary")
with gr.Column(scale=1):
html_view = gr.HTML(label="Interactive Network (PyVis)")
html_file = gr.File(label="Download HTML")
btn.click(
fn=build_network,
inputs=[json_file, include_requires, include_similar, min_weight, top_n_jobs, keep_outside_similar,
include_job_nodes, include_skill_nodes, physics, hierarchical],
outputs=[html_view, html_file]
)
if __name__ == "__main__":
demo.launch()