Spaces:

Nucha
/

NetworkChart

Sleeping

App Files Files Community

Nucha commited on Aug 29, 2025

Commit

e99665e

verified ·

1 Parent(s): f728c11

Upload 2 files

Browse files

Files changed (2) hide show

app.py +172 -183
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,208 +1,197 @@
 \
 import os
 import json
-import re
-from collections import defaultdict
 import gradio as gr
-# Graph libs
 from pyvis.network import Network
-DEFAULT_JSON = "job_position_skill_graph.json"  # Put this file at the repo root
-def load_graph(json_file):
-    """Load JSON from upload or default file in repo root."""
-    if json_file is not None:
-        path = json_file.name if hasattr(json_file, "name") else json_file
-        with open(path, "r", encoding="utf-8") as f:
-            data = json.load(f)
-        return data
     if os.path.exists(DEFAULT_JSON):
-        with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
-            data = json.load(f)
-        return data
-    raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")
-def flatten_skills(positions):
-    """Return set of all skills and map skill->cluster (first seen)."""
-    skill2cluster = {}
-    for pos in positions:
-        grouped = pos.get("skills", {})
-        for cluster_name, items in grouped.items():
             for it in items:
-                sk = str(it.get("name", "")).strip()
-                if not sk:
                     continue
-                if sk not in skill2cluster:
-                    skill2cluster[sk] = cluster_name
-    return skill2cluster
-def build_edges(positions, min_count=1, max_skills_per_position=100, clusters_filter=None, positions_filter=None, skill_regex=None):
-    """
-    Create bipartite edges Position -> Skill with weight by 'count'.
-    Apply filters: min_count, clusters_filter (set), positions_filter (set), regex.
-    """
-    edges = []
-    skill_counts_global = defaultdict(int)
-    patt = None
-    if skill_regex:
-        try:
-            patt = re.compile(skill_regex, re.IGNORECASE)
-        except re.error as e:
-            raise gr.Error(f"Invalid regex: {e}")
-    for pos in positions:
-        pname = pos.get("name", "").strip()
-        if not pname:
             continue
-        if positions_filter and pname not in positions_filter:
             continue
-        grouped = pos.get("skills", {})
-        # flatten with filter by cluster and regex
         flat = []
-        for cluster_name, items in grouped.items():
-            if clusters_filter and cluster_name not in clusters_filter:
-                continue
             for it in items:
-                sk = str(it.get("name", "")).strip()
-                cnt = int(it.get("count", 0))
-                if not sk:
-                    continue
-                if cnt < min_count:
-                    continue
-                if patt and not patt.search(sk):
-                    continue
-                flat.append((cluster_name, sk, cnt))
-        # keep top-K for this position by count
-        flat.sort(key=lambda x: -x[2])
-        for cluster_name, sk, cnt in flat[:max_skills_per_position]:
-            edges.append((pname, sk, cnt, cluster_name))
-            skill_counts_global[sk] += cnt
-    return edges, skill_counts_global
-def build_pyvis_html(
-    data,
-    min_count=5,
-    max_skills_per_position=30,
-    selected_clusters=None,
-    selected_positions=None,
-    skill_regex="",
-    physics=True,
-    hierarchical=False
-):
-    positions = data.get("positions", [])
-    # Derive available clusters and positions for UI
-    all_clusters = sorted({cl for pos in positions for cl in pos.get("skills", {}).keys()})
-    all_positions = sorted({pos.get("name","") for pos in positions if pos.get("name","")})
-    clusters_filter = set(selected_clusters) if selected_clusters else set(all_clusters)
-    positions_filter = set(selected_positions) if selected_positions else None
-    edges, skill_counts_global = build_edges(
-        positions,
-        min_count=min_count,
-        max_skills_per_position=max_skills_per_position,
-        clusters_filter=clusters_filter,
-        positions_filter=positions_filter,
-        skill_regex=skill_regex.strip() or None
-    )
-    # Create network
-    net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="#222222", directed=False, notebook=False, cdn_resources="in_line")
-    # Add nodes
-    # Position nodes: group 'position', shape 'dot'
-    # Skill nodes: group by cluster for color
-    pos_added = set()
-    skill_added = set()
-    # Predefine some distinct groups for clusters (pyvis auto-colors groups)
-    # We'll assign group=cluster for skills, and "position" for positions.
-    for pname, sk, cnt, cluster_name in edges:
-        if pname not in pos_added:
-            net.add_node(f"pos::{pname}", label=pname, title=f"Position: {pname}", shape="dot", size=18, group="position")
-            pos_added.add(pname)
-        if sk not in skill_added:
-            net.add_node(f"sk::{sk}", label=sk, title=f"Skill: {sk}\\nCluster: {cluster_name}\\nGlobal count (approx.): {skill_counts_global.get(sk, 0)}", shape="box", group=cluster_name)
-            skill_added.add(sk)
-        # Edge with value influences thickness
-        net.add_edge(f"pos::{pname}", f"sk::{sk}", value=int(cnt), title=f"{pname} ↔ {sk} (count={cnt})")
-    # Physics / layout options
-    options = {
-      "physics": {
-        "enabled": bool(physics),
-        "barnesHut": {"gravitationalConstant": -8000, "centralGravity": 0.2, "springLength": 150, "springConstant": 0.04},
-        "stabilization": {"enabled": True, "iterations": 100}
-      }
-    }
-    if hierarchical:
-        options["layout"] = {"hierarchical": {"enabled": True, "direction": "LR", "sortMethod": "hubsize"}}
-    net.set_options(json.dumps(options))
-    # Render HTML
-    html_path = "network.html"
-    net.write_html(html_path)
-    with open(html_path, "r", encoding="utf-8") as f:
-        html = f.read()
-    # Build a small data preview (limit rows)
-    preview_rows = [{"position": p, "skill": s, "cluster": c, "count": cnt} for (p, s, cnt, c) in edges]
-    preview_rows = sorted(preview_rows, key=lambda x: (-x["count"], x["position"]))[:1000]  # cap
-    return html, all_clusters, all_positions, preview_rows
-def run(
-    json_file,
-    min_count,
-    max_skills_per_position,
-    selected_clusters,
-    selected_positions,
-    skill_regex,
-    physics,
-    hierarchical
-):
-    data = load_graph(json_file)
-    html, all_clusters, all_positions, preview_rows = build_pyvis_html(
-        data,
-        min_count=min_count,
-        max_skills_per_position=max_skills_per_position,
-        selected_clusters=selected_clusters,
-        selected_positions=selected_positions,
-        skill_regex=skill_regex,
-        physics=physics,
-        hierarchical=hierarchical
     )
-    # Update choices if user hasn't selected yet
-    clusters_update = gr.update(choices=all_clusters, value=selected_clusters or all_clusters)
-    positions_update = gr.update(choices=all_positions, value=selected_positions or [])
-    return html, clusters_update, positions_update, preview_rows
-with gr.Blocks(title="Position–Skill Network (PyVis)") as demo:
-    gr.Markdown("# Position–Skill Network (PyVis)\nUpload `job_position_skill_graph.json` or place it in the repo root.")
     with gr.Row():
-        with gr.Column(scale=1, min_width=350):
             json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
-            min_count = gr.Slider(1, 50, value=5, step=1, label="Minimum skill count (filter)")
-            max_skills_per_position = gr.Slider(5, 200, value=30, step=1, label="Max skills per position")
-            selected_clusters = gr.CheckboxGroup(choices=[], label="Clusters to include (blank = all)")
-            selected_positions = gr.CheckboxGroup(choices=[], label="Positions to include (blank = all)")
-            skill_regex = gr.Textbox(value="", label="Skill name filter (regex, optional)")
-            physics = gr.Checkbox(value=True, label="Enable physics layout")
-            hierarchical = gr.Checkbox(value=False, label="Hierarchical layout (Left→Right)")
             btn = gr.Button("Build Network", variant="primary")
-        with gr.Column(scale=2):
-            out_html = gr.HTML(label="Network Diagram")
-            out_table = gr.Dataframe(label="Edges preview (top)", wrap=True)
     btn.click(
         fn=run,
-        inputs=[json_file, min_count, max_skills_per_position, selected_clusters, selected_positions, skill_regex, physics, hierarchical],
-        outputs=[out_html, selected_clusters, selected_positions, out_table]
     )
 if __name__ == "__main__":

 \
 import os
 import json
+import math
 import gradio as gr
+import networkx as nx
 from pyvis.network import Network
+DEFAULT_JSON = "job_position_skill_graph.json"
+CLUSTER_COLORS = {
+    "programming": "#1f77b4",
+    "databases": "#ff7f0e",
+    "cloud": "#2ca02c",
+    "devops": "#d62728",
+    "version_control": "#9467bd",
+    "data_processing": "#8c564b",
+    "ml_ai": "#e377c2",
+    "web_backend": "#7f7f7f",
+    "web_frontend": "#bcbd22",
+    "security": "#17becf",
+    "networking": "#1b9e77",
+    "mobile": "#d95f02",
+    "analytics_bi": "#7570b3",
+    "testing_qc": "#e7298a",
+    "infra_sys": "#66a61e",
+    "other": "#999999",
+}
+def _load_json(file_obj):
+    if file_obj is not None:
+        return json.load(open(file_obj.name, "r", encoding="utf-8"))
     if os.path.exists(DEFAULT_JSON):
+        return json.load(open(DEFAULT_JSON, "r", encoding="utf-8"))
+    raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")
+def _aggregate_skill_totals(data):
+    totals = {}
+    for pos in data.get("positions", []):
+        for cluster, items in pos.get("skills", {}).items():
             for it in items:
+                name, cnt = it.get("name"), int(it.get("count", 0))
+                if not name:
                     continue
+                if name not in totals:
+                    totals[name] = {"total": 0, "clusters": set()}
+                totals[name]["total"] += cnt
+                totals[name]["clusters"].add(cluster or "other")
+    for k, v in totals.items():
+        clusters = list(v["clusters"])
+        v["cluster"] = clusters[0] if clusters else "other"
+    return totals
+def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min):
+    G = nx.Graph()
+    for pos in data.get("positions", []):
+        pos_name = pos.get("name")
+        if not pos_name:
             continue
+        total_skills = sum(len(v) for v in pos.get("skills", {}).values())
+        G.add_node(
+            f"pos::{pos_name}",
+            label=pos_name,
+            kind="position",
+            size=max(15, min(60, 10 + 2*total_skills)),
+            color="#333333",
+            title=f"<b>{pos_name}</b><br/>skills groups: {list(pos.get('skills', {}).keys())}",
+        )
+    skill_totals = _aggregate_skill_totals(data)
+    for pos in data.get("positions", []):
+        pos_name = pos.get("name")
+        if not pos_name:
             continue
         flat = []
+        for cluster, items in pos.get("skills", {}).items():
             for it in items:
+                if int(it.get("count", 0)) >= min_skill_count:
+                    flat.append((cluster or "other", it["name"], int(it["count"])))
+        if top_k_per_position and top_k_per_position > 0:
+            flat = sorted(flat, key=lambda x: -x[2])[: top_k_per_position]
+        for cluster, skill, cnt in flat:
+            node_id = f"skill::{skill}"
+            if node_id not in G:
+                total = skill_totals.get(skill, {}).get("total", cnt)
+                node_size = max(8, min(50, 6 + math.sqrt(total)*2))
+                color = CLUSTER_COLORS.get(cluster, "#999999")
+                G.add_node(
+                    node_id,
+                    label=skill,
+                    kind="skill",
+                    size=node_size,
+                    color=color,
+                    title=f"<b>{skill}</b><br/>cluster: {cluster}<br/>total: {total}",
+                )
+            G.add_edge(
+                f"pos::{pos_name}",
+                node_id,
+                weight=cnt,
+                title=f"{pos_name} → {skill}: {cnt}",
+            )
+    if include_pos_pos_edges:
+        for e in data.get("edges", []):
+            w = float(e.get("weight", 0.0))
+            if w < pos_pos_weight_min:
+                continue
+            a = f"pos::{e.get('source')}"
+            b = f"pos::{e.get('target')}"
+            if a in G and b in G:
+                G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
+    return G
+def _nx_to_pyvis_html(G, physics, layout, height_px):
+    net = Network(
+        height=f"{height_px}px",
+        width="100%",
+        bgcolor="#ffffff",
+        font_color="#222222",
+        directed=False,
+        notebook=False,
     )
+    if physics:
+        net.force_atlas_2based()
+    if layout == "hierarchical (positions → skills)":
+        net.set_options("""
+        var options = {
+          layout: {
+            hierarchical: {
+              enabled: true,
+              levelSeparation: 180,
+              nodeSpacing: 170,
+              treeSpacing: 200,
+              direction: 'UD',
+              sortMethod: 'hubsize'
+            }
+          },
+          physics: { enabled: %s }
+        }
+        """ % ('true' if physics else 'false'))
+    else:
+        net.set_options("""
+        var options = {
+          physics: { enabled: %s, stabilization: { iterations: 150 } }
+        }
+        """ % ('true' if physics else 'false'))
+    for n, data in G.nodes(data=True):
+        net.add_node(
+            n,
+            label=data.get("label", n),
+            color=data.get("color", "#97c2fc"),
+            title=data.get("title", ""),
+            size=data.get("size", 15),
+            shape="dot" if data.get("kind") == "skill" else "ellipse",
+        )
+    for u, v, edata in G.edges(data=True):
+        net.add_edge(u, v, title=edata.get("title", ""), value=edata.get("weight", 1), color=edata.get("color"))
+    return net.generate_html()
+def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
+    data = _load_json(json_file)
+    G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
+    html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
+    return html
+with gr.Blocks(title="Job Positions ↔ Hard Skills — Network Diagram") as demo:
+    gr.Markdown("# Network Diagram: Positions ↔ Skills\\nUpload `job_position_skill_graph.json` or place it in the repo root.\\n- **Black ovals** = Job positions\\n- **Colored dots** = Skills (color by cluster)\\n- Edge weight = frequency of skill in that position")
     with gr.Row():
+        with gr.Column(scale=1):
             json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
+            min_skill_count = gr.Slider(0, 50, value=5, step=1, label="Minimum skill count per position (filter noise)")
+            top_k_per_position = gr.Slider(0, 100, value=20, step=1, label="Top-K skills per position (0 = all)")
+            include_pos_pos_edges = gr.Checkbox(value=False, label="Include position↔position similarity edges")
+            pos_pos_weight_min = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Min similarity (if enabled)")
+            physics = gr.Checkbox(value=True, label="Enable physics (force layout)")
+            layout = gr.Dropdown(choices=["free (force layout)", "hierarchical (positions → skills)"], value="free (force layout)", label="Layout")
+            height_px = gr.Slider(500, 1400, value=900, step=50, label="Canvas height (px)")
             btn = gr.Button("Build Network", variant="primary")
+        with gr.Column(scale=1):
+            out_html = gr.HTML(label="Interactive Network")
     btn.click(
         fn=run,
+        inputs=[json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px],
+        outputs=[out_html]
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 gradio>=4.26.0
 pyvis>=0.3.2

 gradio>=4.26.0
+networkx>=3.2
 pyvis>=0.3.2