Spaces:

Nucha
/

NetworkChart

Sleeping

App Files Files Community

Nucha commited on Aug 29, 2025

Commit

f728c11

verified ·

1 Parent(s): 844f88a

Upload 2 files

Browse files

Files changed (1) hide show

app.py +178 -148

app.py CHANGED Viewed

@@ -1,178 +1,208 @@
 \
 import os
 import json
-import math
 import gradio as gr
-from pyvis.network import Network
-DEFAULT_JSON = "job_position_skill_graph.json"  # Put this file at repo root
-# Color palette for clusters (fallback if more clusters appear)
-CLUSTER_COLORS = [
-    "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
-    "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
-]
-def load_graph_json(json_file):
-    """
-    Load JSON either from uploaded file or from DEFAULT_JSON if present.
-    Expected schema:
-    {
-      "positions": [{"name": "...","skills": {"cluster":[{"name":"skill","count":int},...]...}}],
-      "edges": [{"source":"...","target":"...","weight":float,"shared_skills":[...]}]
-    }
-    """
     if json_file is not None:
-        # gr.File may pass a tempfile path string or a file object
         path = json_file.name if hasattr(json_file, "name") else json_file
         with open(path, "r", encoding="utf-8") as f:
-            return json.load(f)
     if os.path.exists(DEFAULT_JSON):
         with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
-            return json.load(f)
     raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")
-def infer_node_cluster_and_size(position, node_size_mode):
-    """
-    Infer dominant cluster for coloring; compute base node size.
-    node_size_mode: 'skills-total' or 'skills-top10'
-    """
-    skills_by_cluster = position.get("skills", {})
-    # Aggregate counts per cluster
-    cluster_scores = {}
-    total_skills_count = 0
-    for cl, items in skills_by_cluster.items():
-        s = sum(max(0, int(it.get("count", 0))) for it in items)
-        cluster_scores[cl] = s
-        total_skills_count += s
-    if not cluster_scores:
-        return ("other", 10)
-    # Dominant cluster
-    dominant = max(cluster_scores.items(), key=lambda x: x[1])[0]
-    if node_size_mode == "skills-top10":
-        # Sum only top 10 across clusters
-        acc = 0
-        for cl, items in skills_by_cluster.items():
-            for it in sorted(items, key=lambda x: -int(x.get("count", 0)))[:10]:
-                acc += int(it.get("count", 0))
-        size = acc
-    else:
-        size = total_skills_count
-    # Map size to a reasonable node size (10..60)
-    if size <= 0:
-        return (dominant, 10)
-    # sqrt scale to compress big ranges
-    scaled = 10 + min(50, 5 * math.sqrt(size))
-    return (dominant, scaled)
-def build_tooltip(position, max_items_per_cluster=6):
     """
-    Build HTML tooltip listing top skills per cluster.
     """
-    name = position.get("name", "")
-    skills_by_cluster = position.get("skills", {})
-    parts = [f"<b>{name}</b>"]
-    for cl, items in skills_by_cluster.items():
-        if not items:
             continue
-        top = sorted(items, key=lambda x: -int(x.get('count', 0)))[:max_items_per_cluster]
-        inner = ", ".join([f"{it.get('name','')} ({int(it.get('count',0))})" for it in top])
-        parts.append(f"<div><b>{cl}:</b> {inner}</div>")
-    return "<br/>".join(parts)
-def render_network(json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout):
-    data = load_graph_json(json_file)
-    # Prepare pyvis network
-    net = Network(height="720px", width="100%", bgcolor="#ffffff", font_color="#111111", directed=False, cdn_resources="in_line")
-    # Physics options
-    if physics:
-        if layout == "Barnes-Hut":
-            net.barnes_hut()
-        else:
-            # ForceAtlas2Based may look nice for dense graphs
-            net.force_atlas_2based()
-    else:
-        net.set_options("""
-        var options = { physics: { enabled: false } };
-        """)
-    # Build cluster -> color map based on encountered clusters
-    cluster_names = []
-    for pos in data.get("positions", []):
-        for cl in (pos.get("skills") or {}).keys():
-            if cl not in cluster_names:
-                cluster_names.append(cl)
-    color_map = {}
-    for idx, cl in enumerate(cluster_names):
-        color_map[cl] = CLUSTER_COLORS[idx % len(CLUSTER_COLORS)]
-    color_map.setdefault("other", "#888888")
-    # Optional position name filter (substring, case-insensitive)
-    filter_position = (filter_position or "").strip().lower()
-    # Add nodes
-    node_ids = set()
-    for pos in data.get("positions", []):
-        name = pos.get("name", "")
-        if filter_position and filter_position not in name.lower():
             continue
-        dominant_cluster, size = infer_node_cluster_and_size(pos, node_size_mode)
-        tooltip = build_tooltip(pos, max_items_per_cluster=max_items_per_cluster)
-        net.add_node(
-            n_id=name,
-            label=name if show_labels else "",
-            title=tooltip,
-            color=color_map.get(dominant_cluster, color_map["other"]),
-            size=size
-        )
-        node_ids.add(name)
-    # Add edges with threshold filter
-    kept_edges = 0
-    for e in data.get("edges", []):
-        w = float(e.get("weight", 0))
-        if w < float(min_edge_weight):
-            continue
-        src, tgt = e.get("source"), e.get("target")
-        if (src in node_ids) and (tgt in node_ids):
-            title = f"weight={w:.2f} | shared: {', '.join(e.get('shared_skills', [])[:10])}"
-            net.add_edge(src, tgt, value=w, title=title)
-            kept_edges += 1
-    # If graph ends up empty, hint the user
-    if len(node_ids) == 0:
-        html = "<h3>No nodes to show</h3><p>Loosen filters or upload a JSON.</p>"
-        return html
-    # Generate HTML
-    html = net.generate_html()
-    return html
-with gr.Blocks(title="Job Position ↔ Hard Skills — Network") as demo:
-    gr.Markdown("# Job Position ↔ Hard Skills — Network Diagram\nUpload a JSON or place **job_position_skill_graph.json** in repo root.")
     with gr.Row():
-        with gr.Column(scale=1):
             json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
-            min_edge_weight = gr.Slider(0.0, 1.0, value=0.15, step=0.01, label="Min edge weight (Jaccard)")
-            show_labels = gr.Checkbox(value=True, label="Show node labels")
             physics = gr.Checkbox(value=True, label="Enable physics layout")
-            layout = gr.Radio(choices=["Barnes-Hut", "ForceAtlas2Based"], value="ForceAtlas2Based", label="Layout algorithm")
-            node_size_mode = gr.Radio(choices=["skills-total", "skills-top10"], value="skills-total", label="Node size scale by")
-            max_items_per_cluster = gr.Slider(1, 20, value=6, step=1, label="Tooltip: max skills per cluster")
-            filter_position = gr.Textbox(value="", label="Filter by position name (substring)")
-            btn = gr.Button("Render", variant="primary")
-        with gr.Column(scale=1):
-            out_html = gr.HTML(label="Network")
     btn.click(
-        fn=render_network,
-        inputs=[json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout],
-        outputs=[out_html]
     )
 if __name__ == "__main__":

 \
 import os
 import json
+import re
+from collections import defaultdict
 import gradio as gr
+# Graph libs
+from pyvis.network import Network
+DEFAULT_JSON = "job_position_skill_graph.json"  # Put this file at the repo root
+def load_graph(json_file):
+    """Load JSON from upload or default file in repo root."""
     if json_file is not None:
         path = json_file.name if hasattr(json_file, "name") else json_file
         with open(path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        return data
     if os.path.exists(DEFAULT_JSON):
         with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        return data
     raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")
+def flatten_skills(positions):
+    """Return set of all skills and map skill->cluster (first seen)."""
+    skill2cluster = {}
+    for pos in positions:
+        grouped = pos.get("skills", {})
+        for cluster_name, items in grouped.items():
+            for it in items:
+                sk = str(it.get("name", "")).strip()
+                if not sk:
+                    continue
+                if sk not in skill2cluster:
+                    skill2cluster[sk] = cluster_name
+    return skill2cluster
+def build_edges(positions, min_count=1, max_skills_per_position=100, clusters_filter=None, positions_filter=None, skill_regex=None):
     """
+    Create bipartite edges Position -> Skill with weight by 'count'.
+    Apply filters: min_count, clusters_filter (set), positions_filter (set), regex.
     """
+    edges = []
+    skill_counts_global = defaultdict(int)
+    patt = None
+    if skill_regex:
+        try:
+            patt = re.compile(skill_regex, re.IGNORECASE)
+        except re.error as e:
+            raise gr.Error(f"Invalid regex: {e}")
+    for pos in positions:
+        pname = pos.get("name", "").strip()
+        if not pname:
             continue
+        if positions_filter and pname not in positions_filter:
             continue
+        grouped = pos.get("skills", {})
+        # flatten with filter by cluster and regex
+        flat = []
+        for cluster_name, items in grouped.items():
+            if clusters_filter and cluster_name not in clusters_filter:
+                continue
+            for it in items:
+                sk = str(it.get("name", "")).strip()
+                cnt = int(it.get("count", 0))
+                if not sk:
+                    continue
+                if cnt < min_count:
+                    continue
+                if patt and not patt.search(sk):
+                    continue
+                flat.append((cluster_name, sk, cnt))
+        # keep top-K for this position by count
+        flat.sort(key=lambda x: -x[2])
+        for cluster_name, sk, cnt in flat[:max_skills_per_position]:
+            edges.append((pname, sk, cnt, cluster_name))
+            skill_counts_global[sk] += cnt
+    return edges, skill_counts_global
+def build_pyvis_html(
+    data,
+    min_count=5,
+    max_skills_per_position=30,
+    selected_clusters=None,
+    selected_positions=None,
+    skill_regex="",
+    physics=True,
+    hierarchical=False
+):
+    positions = data.get("positions", [])
+    # Derive available clusters and positions for UI
+    all_clusters = sorted({cl for pos in positions for cl in pos.get("skills", {}).keys()})
+    all_positions = sorted({pos.get("name","") for pos in positions if pos.get("name","")})
+    clusters_filter = set(selected_clusters) if selected_clusters else set(all_clusters)
+    positions_filter = set(selected_positions) if selected_positions else None
+    edges, skill_counts_global = build_edges(
+        positions,
+        min_count=min_count,
+        max_skills_per_position=max_skills_per_position,
+        clusters_filter=clusters_filter,
+        positions_filter=positions_filter,
+        skill_regex=skill_regex.strip() or None
+    )
+    # Create network
+    net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="#222222", directed=False, notebook=False, cdn_resources="in_line")
+    # Add nodes
+    # Position nodes: group 'position', shape 'dot'
+    # Skill nodes: group by cluster for color
+    pos_added = set()
+    skill_added = set()
+    # Predefine some distinct groups for clusters (pyvis auto-colors groups)
+    # We'll assign group=cluster for skills, and "position" for positions.
+    for pname, sk, cnt, cluster_name in edges:
+        if pname not in pos_added:
+            net.add_node(f"pos::{pname}", label=pname, title=f"Position: {pname}", shape="dot", size=18, group="position")
+            pos_added.add(pname)
+        if sk not in skill_added:
+            net.add_node(f"sk::{sk}", label=sk, title=f"Skill: {sk}\\nCluster: {cluster_name}\\nGlobal count (approx.): {skill_counts_global.get(sk, 0)}", shape="box", group=cluster_name)
+            skill_added.add(sk)
+        # Edge with value influences thickness
+        net.add_edge(f"pos::{pname}", f"sk::{sk}", value=int(cnt), title=f"{pname} ↔ {sk} (count={cnt})")
+    # Physics / layout options
+    options = {
+      "physics": {
+        "enabled": bool(physics),
+        "barnesHut": {"gravitationalConstant": -8000, "centralGravity": 0.2, "springLength": 150, "springConstant": 0.04},
+        "stabilization": {"enabled": True, "iterations": 100}
+      }
+    }
+    if hierarchical:
+        options["layout"] = {"hierarchical": {"enabled": True, "direction": "LR", "sortMethod": "hubsize"}}
+    net.set_options(json.dumps(options))
+    # Render HTML
+    html_path = "network.html"
+    net.write_html(html_path)
+    with open(html_path, "r", encoding="utf-8") as f:
+        html = f.read()
+    # Build a small data preview (limit rows)
+    preview_rows = [{"position": p, "skill": s, "cluster": c, "count": cnt} for (p, s, cnt, c) in edges]
+    preview_rows = sorted(preview_rows, key=lambda x: (-x["count"], x["position"]))[:1000]  # cap
+    return html, all_clusters, all_positions, preview_rows
+def run(
+    json_file,
+    min_count,
+    max_skills_per_position,
+    selected_clusters,
+    selected_positions,
+    skill_regex,
+    physics,
+    hierarchical
+):
+    data = load_graph(json_file)
+    html, all_clusters, all_positions, preview_rows = build_pyvis_html(
+        data,
+        min_count=min_count,
+        max_skills_per_position=max_skills_per_position,
+        selected_clusters=selected_clusters,
+        selected_positions=selected_positions,
+        skill_regex=skill_regex,
+        physics=physics,
+        hierarchical=hierarchical
+    )
+    # Update choices if user hasn't selected yet
+    clusters_update = gr.update(choices=all_clusters, value=selected_clusters or all_clusters)
+    positions_update = gr.update(choices=all_positions, value=selected_positions or [])
+    return html, clusters_update, positions_update, preview_rows
+with gr.Blocks(title="Position–Skill Network (PyVis)") as demo:
+    gr.Markdown("# Position–Skill Network (PyVis)\nUpload `job_position_skill_graph.json` or place it in the repo root.")
     with gr.Row():
+        with gr.Column(scale=1, min_width=350):
             json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
+            min_count = gr.Slider(1, 50, value=5, step=1, label="Minimum skill count (filter)")
+            max_skills_per_position = gr.Slider(5, 200, value=30, step=1, label="Max skills per position")
+            selected_clusters = gr.CheckboxGroup(choices=[], label="Clusters to include (blank = all)")
+            selected_positions = gr.CheckboxGroup(choices=[], label="Positions to include (blank = all)")
+            skill_regex = gr.Textbox(value="", label="Skill name filter (regex, optional)")
             physics = gr.Checkbox(value=True, label="Enable physics layout")
+            hierarchical = gr.Checkbox(value=False, label="Hierarchical layout (Left→Right)")
+            btn = gr.Button("Build Network", variant="primary")
+        with gr.Column(scale=2):
+            out_html = gr.HTML(label="Network Diagram")
+            out_table = gr.Dataframe(label="Edges preview (top)", wrap=True)
     btn.click(
+        fn=run,
+        inputs=[json_file, min_count, max_skills_per_position, selected_clusters, selected_positions, skill_regex, physics, hierarchical],
+        outputs=[out_html, selected_clusters, selected_positions, out_table]
     )
 if __name__ == "__main__":