Spaces:

Nucha
/

NetworkChart

Sleeping

App Files Files Community

Nucha commited on Aug 29, 2025

Commit

844f88a

verified ·

1 Parent(s): dfba298

Upload 2 files

Browse files

Files changed (2) hide show

app.py +156 -181
requirements.txt +0 -1

app.py CHANGED Viewed

@@ -1,203 +1,178 @@
 \
 import os
 import json
-import uuid  # <-- FIX: added
-from typing import Dict, Any, List, Tuple
 import gradio as gr
 from pyvis.network import Network
-DEFAULT_JSON = "job_skill_network.json"
-def _load_graph(file_obj) -> Dict[str, Any]:
-    if file_obj is not None:
-        with open(file_obj.name if hasattr(file_obj, "name") else file_obj, "r", encoding="utf-8") as f:
             return json.load(f)
     if os.path.exists(DEFAULT_JSON):
         with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
             return json.load(f)
-    raise gr.Error("No JSON provided and default file not found. Please upload job_skill_network.json.")
-def _split_nodes(nodes: List[Dict[str, Any]]):
-    jobs = [n for n in nodes if str(n.get("type","")).lower() == "job"]
-    skills = [n for n in nodes if str(n.get("type","")).lower() == "skill"]
-    return jobs, skills
-def _index_nodes(nodes: List[Dict[str, Any]]):
-    return {n["id"]: n for n in nodes}
-def _filter_graph(graph: Dict[str, Any],
-                  include_requires: bool,
-                  include_similar: bool,
-                  min_weight: int,
-                  top_n_jobs: int,
-                  keep_outside_similar: bool,
-                  include_job_nodes: bool,
-                  include_skill_nodes: bool):
-    nodes = graph.get("nodes", [])
-    edges = graph.get("edges", [])
-    jobs, skills = _split_nodes(nodes)
-    def _postings(n):
-        try:
-            return int(n.get("postings", 0))
-        except Exception:
-            return 0
-    jobs_sorted = sorted(jobs, key=_postings, reverse=True)
-    selected_job_ids = set([n["id"] for n in jobs_sorted[:max(1, int(top_n_jobs))]]) if include_job_nodes else set()
-    selected_edges = []
-    for e in edges:
-        et = str(e.get("type","")).lower()
-        if et == "requires" and include_requires and int(e.get("weight", 1)) >= int(min_weight):
-            if include_job_nodes or include_skill_nodes:
-                selected_edges.append(e)
-        elif et == "similar" and include_similar and int(e.get("weight", 1)) >= int(min_weight):
-            selected_edges.append(e)
-    node_ids = set()
-    for e in selected_edges:
-        s, t, et = e.get("source"), e.get("target"), str(e.get("type","")).lower()
-        if et == "requires" and selected_job_ids:
-            if (s in selected_job_ids) or (t in selected_job_ids):
-                node_ids.update([s, t])
         else:
-            node_ids.update([s, t])
-    if include_similar and keep_outside_similar and selected_job_ids:
-        for e in selected_edges:
-            if str(e.get("type","")).lower() != "similar":
-                continue
-            s, t = e.get("source"), e.get("target")
-            if (s in selected_job_ids) or (t in selected_job_ids):
-                node_ids.update([s, t])
-    node_map = _index_nodes(nodes)
-    final_nodes = []
-    for nid in list(node_ids):
-        n = node_map.get(nid)
-        if not n:
             continue
-        ntype = str(n.get("type","")).lower()
-        if (ntype == "job" and include_job_nodes) or (ntype == "skill" and include_skill_nodes):
-            final_nodes.append(n)
-    final_ids = set(n["id"] for n in final_nodes)
-    final_edges = [e for e in selected_edges if e.get("source") in final_ids and e.get("target") in final_ids]
-    return final_nodes, final_edges
-def _build_pyvis_html(nodes, edges, physics: bool, hierarchical: bool):
-    net = Network(height="720px", width="100%", directed=False, notebook=False)
-    net.barnes_hut()
-    for n in nodes:
-        nid = n["id"]
-        label = str(n.get("label", nid))
-        ntype = str(n.get("type","")).lower()
-        title = f"{ntype.upper()} | {label}"
-        size = 12
-        shape = "dot"
-        if ntype == "job":
-            size = 18 + int(n.get("postings", 0)) * 0.1
-            shape = "ellipse"
-        elif ntype == "skill":
-            size = 8
-        net.add_node(nid, label=label, title=title, group=ntype, shape=shape, value=size)
-    for e in edges:
-        s, t = e.get("source"), e.get("target")
-        et = str(e.get("type",""))
-        weight = int(e.get("weight", 1))
-        title = f"{et} (w={weight})"
-        net.add_edge(s, t, title=title, value=weight)
-    options = {
-      "physics": {"enabled": bool(physics)},
-      "interaction": {"hover": True, "multiselect": True, "dragNodes": True},
-      "nodes": {"font": {"size": 14}},
-      "edges": {"smooth": {"type": "dynamic"}}
-    }
-    if hierarchical:
-        options["layout"] = {
-            "hierarchical": {
-                "enabled": True,
-                "levelSeparation": 120,
-                "nodeSpacing": 120,
-                "treeSpacing": 180,
-                "direction": "UD",
-                "sortMethod": "hubsize"
-            }
-        }
-        options["physics"]["enabled"] = False
-    import json as _json
-    net.set_options(_json.dumps(options))
-    return net.generate_html()
-def build_network(
-    json_file,
-    include_requires,
-    include_similar,
-    min_weight,
-    top_n_jobs,
-    keep_outside_similar,
-    include_job_nodes,
-    include_skill_nodes,
-    physics,
-    hierarchical
-):
-    graph = _load_graph(json_file)
-    nodes, edges = _filter_graph(graph, include_requires, include_similar, int(min_weight), int(top_n_jobs),
-                                 bool(keep_outside_similar), bool(include_job_nodes), bool(include_skill_nodes))
-    if not nodes or not edges:
-        raise gr.Error("No nodes/edges remain after filtering. Try lowering the filter or including more edge types.")
-    html = _build_pyvis_html(nodes, edges, physics, hierarchical)
-    out_name = f"network_{uuid.uuid4().hex[:8]}.html"
-    with open(out_name, "w", encoding="utf-8") as f:
-        f.write(html)
-    return gr.update(value=html), out_name
-with gr.Blocks(title="Job ↔ Hard Skill Network") as demo:
-    gr.Markdown("# Job ↔ Hard Skill Network Diagram\nUpload `job_skill_network.json` or place it at repo root.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            json_file = gr.File(label="Upload JSON (optional)", file_count="single", file_types=[".json"])
-            gr.Markdown("### Include Edge Types")
-            include_requires = gr.Checkbox(value=True, label="Job–Skill edges (type='requires')")
-            include_similar = gr.Checkbox(value=True, label="Job–Job edges (type='similar')")
-            gr.Markdown("### Filters")
-            min_weight = gr.Slider(0, 50, value=3, step=1, label="Minimum edge weight")
-            top_n_jobs = gr.Slider(1, 100, value=30, step=1, label="Top-N job nodes by postings")
-            keep_outside_similar = gr.Checkbox(value=True, label="Include similar jobs outside Top-N")
-            gr.Markdown("### Node Types")
-            include_job_nodes = gr.Checkbox(value=True, label="Include job nodes")
-            include_skill_nodes = gr.Checkbox(value=True, label="Include skill nodes")
-            gr.Markdown("### Layout")
-            physics = gr.Checkbox(value=True, label="Enable physics")
-            hierarchical = gr.Checkbox(value=False, label="Hierarchical layout (good for Job→Skill)")
-            btn = gr.Button("Build Network", variant="primary")
         with gr.Column(scale=1):
-            html_view = gr.HTML(label="Interactive Network (PyVis)")
-            html_file = gr.File(label="Download HTML")
     btn.click(
-        fn=build_network,
-        inputs=[json_file, include_requires, include_similar, min_weight, top_n_jobs, keep_outside_similar,
-                include_job_nodes, include_skill_nodes, physics, hierarchical],
-        outputs=[html_view, html_file]
     )
 if __name__ == "__main__":

 \
 import os
 import json
+import math
 import gradio as gr
 from pyvis.network import Network
+DEFAULT_JSON = "job_position_skill_graph.json"  # Put this file at repo root
+# Color palette for clusters (fallback if more clusters appear)
+CLUSTER_COLORS = [
+    "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
+    "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
+]
+def load_graph_json(json_file):
+    """
+    Load JSON either from uploaded file or from DEFAULT_JSON if present.
+    Expected schema:
+    {
+      "positions": [{"name": "...","skills": {"cluster":[{"name":"skill","count":int},...]...}}],
+      "edges": [{"source":"...","target":"...","weight":float,"shared_skills":[...]}]
+    }
+    """
+    if json_file is not None:
+        # gr.File may pass a tempfile path string or a file object
+        path = json_file.name if hasattr(json_file, "name") else json_file
+        with open(path, "r", encoding="utf-8") as f:
             return json.load(f)
     if os.path.exists(DEFAULT_JSON):
         with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
             return json.load(f)
+    raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")
+def infer_node_cluster_and_size(position, node_size_mode):
+    """
+    Infer dominant cluster for coloring; compute base node size.
+    node_size_mode: 'skills-total' or 'skills-top10'
+    """
+    skills_by_cluster = position.get("skills", {})
+    # Aggregate counts per cluster
+    cluster_scores = {}
+    total_skills_count = 0
+    for cl, items in skills_by_cluster.items():
+        s = sum(max(0, int(it.get("count", 0))) for it in items)
+        cluster_scores[cl] = s
+        total_skills_count += s
+    if not cluster_scores:
+        return ("other", 10)
+    # Dominant cluster
+    dominant = max(cluster_scores.items(), key=lambda x: x[1])[0]
+    if node_size_mode == "skills-top10":
+        # Sum only top 10 across clusters
+        acc = 0
+        for cl, items in skills_by_cluster.items():
+            for it in sorted(items, key=lambda x: -int(x.get("count", 0)))[:10]:
+                acc += int(it.get("count", 0))
+        size = acc
+    else:
+        size = total_skills_count
+    # Map size to a reasonable node size (10..60)
+    if size <= 0:
+        return (dominant, 10)
+    # sqrt scale to compress big ranges
+    scaled = 10 + min(50, 5 * math.sqrt(size))
+    return (dominant, scaled)
+def build_tooltip(position, max_items_per_cluster=6):
+    """
+    Build HTML tooltip listing top skills per cluster.
+    """
+    name = position.get("name", "")
+    skills_by_cluster = position.get("skills", {})
+    parts = [f"<b>{name}</b>"]
+    for cl, items in skills_by_cluster.items():
+        if not items:
+            continue
+        top = sorted(items, key=lambda x: -int(x.get('count', 0)))[:max_items_per_cluster]
+        inner = ", ".join([f"{it.get('name','')} ({int(it.get('count',0))})" for it in top])
+        parts.append(f"<div><b>{cl}:</b> {inner}</div>")
+    return "<br/>".join(parts)
+def render_network(json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout):
+    data = load_graph_json(json_file)
+    # Prepare pyvis network
+    net = Network(height="720px", width="100%", bgcolor="#ffffff", font_color="#111111", directed=False, cdn_resources="in_line")
+    # Physics options
+    if physics:
+        if layout == "Barnes-Hut":
+            net.barnes_hut()
         else:
+            # ForceAtlas2Based may look nice for dense graphs
+            net.force_atlas_2based()
+    else:
+        net.set_options("""
+        var options = { physics: { enabled: false } };
+        """)
+    # Build cluster -> color map based on encountered clusters
+    cluster_names = []
+    for pos in data.get("positions", []):
+        for cl in (pos.get("skills") or {}).keys():
+            if cl not in cluster_names:
+                cluster_names.append(cl)
+    color_map = {}
+    for idx, cl in enumerate(cluster_names):
+        color_map[cl] = CLUSTER_COLORS[idx % len(CLUSTER_COLORS)]
+    color_map.setdefault("other", "#888888")
+    # Optional position name filter (substring, case-insensitive)
+    filter_position = (filter_position or "").strip().lower()
+    # Add nodes
+    node_ids = set()
+    for pos in data.get("positions", []):
+        name = pos.get("name", "")
+        if filter_position and filter_position not in name.lower():
             continue
+        dominant_cluster, size = infer_node_cluster_and_size(pos, node_size_mode)
+        tooltip = build_tooltip(pos, max_items_per_cluster=max_items_per_cluster)
+        net.add_node(
+            n_id=name,
+            label=name if show_labels else "",
+            title=tooltip,
+            color=color_map.get(dominant_cluster, color_map["other"]),
+            size=size
+        )
+        node_ids.add(name)
+    # Add edges with threshold filter
+    kept_edges = 0
+    for e in data.get("edges", []):
+        w = float(e.get("weight", 0))
+        if w < float(min_edge_weight):
+            continue
+        src, tgt = e.get("source"), e.get("target")
+        if (src in node_ids) and (tgt in node_ids):
+            title = f"weight={w:.2f} | shared: {', '.join(e.get('shared_skills', [])[:10])}"
+            net.add_edge(src, tgt, value=w, title=title)
+            kept_edges += 1
+    # If graph ends up empty, hint the user
+    if len(node_ids) == 0:
+        html = "<h3>No nodes to show</h3><p>Loosen filters or upload a JSON.</p>"
+        return html
+    # Generate HTML
+    html = net.generate_html()
+    return html
+with gr.Blocks(title="Job Position ↔ Hard Skills — Network") as demo:
+    gr.Markdown("# Job Position ↔ Hard Skills — Network Diagram\nUpload a JSON or place **job_position_skill_graph.json** in repo root.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
+            min_edge_weight = gr.Slider(0.0, 1.0, value=0.15, step=0.01, label="Min edge weight (Jaccard)")
+            show_labels = gr.Checkbox(value=True, label="Show node labels")
+            physics = gr.Checkbox(value=True, label="Enable physics layout")
+            layout = gr.Radio(choices=["Barnes-Hut", "ForceAtlas2Based"], value="ForceAtlas2Based", label="Layout algorithm")
+            node_size_mode = gr.Radio(choices=["skills-total", "skills-top10"], value="skills-total", label="Node size scale by")
+            max_items_per_cluster = gr.Slider(1, 20, value=6, step=1, label="Tooltip: max skills per cluster")
+            filter_position = gr.Textbox(value="", label="Filter by position name (substring)")
+            btn = gr.Button("Render", variant="primary")
         with gr.Column(scale=1):
+            out_html = gr.HTML(label="Network")
     btn.click(
+        fn=render_network,
+        inputs=[json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout],
+        outputs=[out_html]
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,3 +1,2 @@
 gradio>=4.26.0
 pyvis>=0.3.2
-networkx>=3.2


1	gradio>=4.26.0
2	pyvis>=0.3.2