Spaces:

Nucha
/

NetworkChart

Sleeping

App Files Files Community

Nucha commited on Aug 29, 2025

Commit

1048569

verified ·

1 Parent(s): e99665e

Upload 2 files

Browse files

Files changed (1) hide show

app.py +118 -26

app.py CHANGED Viewed

@@ -30,15 +30,103 @@ CLUSTER_COLORS = {
 def _load_json(file_obj):
     if file_obj is not None:
-        return json.load(open(file_obj.name, "r", encoding="utf-8"))
     if os.path.exists(DEFAULT_JSON):
-        return json.load(open(DEFAULT_JSON, "r", encoding="utf-8"))
     raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")
 def _aggregate_skill_totals(data):
     totals = {}
     for pos in data.get("positions", []):
-        for cluster, items in pos.get("skills", {}).items():
             for it in items:
                 name, cnt = it.get("name"), int(it.get("count", 0))
                 if not name:
@@ -59,14 +147,14 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
         pos_name = pos.get("name")
         if not pos_name:
             continue
-        total_skills = sum(len(v) for v in pos.get("skills", {}).values())
         G.add_node(
             f"pos::{pos_name}",
             label=pos_name,
             kind="position",
             size=max(15, min(60, 10 + 2*total_skills)),
             color="#333333",
-            title=f"<b>{pos_name}</b><br/>skills groups: {list(pos.get('skills', {}).keys())}",
         )
     skill_totals = _aggregate_skill_totals(data)
@@ -76,7 +164,7 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
         if not pos_name:
             continue
         flat = []
-        for cluster, items in pos.get("skills", {}).items():
             for it in items:
                 if int(it.get("count", 0)) >= min_skill_count:
                     flat.append((cluster or "other", it["name"], int(it["count"])))
@@ -106,6 +194,8 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
     if include_pos_pos_edges:
         for e in data.get("edges", []):
             w = float(e.get("weight", 0.0))
             if w < pos_pos_weight_min:
                 continue
@@ -113,7 +203,6 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
             b = f"pos::{e.get('target')}"
             if a in G and b in G:
                 G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
     return G
 def _nx_to_pyvis_html(G, physics, layout, height_px):
@@ -128,28 +217,30 @@ def _nx_to_pyvis_html(G, physics, layout, height_px):
     if physics:
         net.force_atlas_2based()
     if layout == "hierarchical (positions → skills)":
-        net.set_options("""
-        var options = {
-          layout: {
-            hierarchical: {
-              enabled: true,
-              levelSeparation: 180,
-              nodeSpacing: 170,
-              treeSpacing: 200,
-              direction: 'UD',
-              sortMethod: 'hubsize'
-            }
-          },
-          physics: { enabled: %s }
         }
-        """ % ('true' if physics else 'false'))
     else:
-        net.set_options("""
-        var options = {
-          physics: { enabled: %s, stabilization: { iterations: 150 } }
         }
-        """ % ('true' if physics else 'false'))
     for n, data in G.nodes(data=True):
         net.add_node(
@@ -166,7 +257,8 @@ def _nx_to_pyvis_html(G, physics, layout, height_px):
     return net.generate_html()
 def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
-    data = _load_json(json_file)
     G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
     html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
     return html

 def _load_json(file_obj):
     if file_obj is not None:
+        with open(file_obj.name, "r", encoding="utf-8") as f:
+            return json.load(f)
     if os.path.exists(DEFAULT_JSON):
+        with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
+            return json.load(f)
     raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")
+def _normalize_schema(data):
+    """
+    Accepts multiple schemas and converts to internal format:
+    {
+      "positions": [{"name": "...","skills": {"cluster":[{"name":"...","count":N}, ...], ...}}, ...],
+      "edges": [{"source":"...","target":"...","weight":0.2,"shared_skills":[...]}]
+    }
+    Supported inputs:
+      A) Internal format (pass-through)
+      B) positions as a dict -> convert to list
+      C) Top-level dict mapping position_name -> {"name": "...", "skills": {...}} or {"skills":[...]}
+      D) Top-level dict mapping position_name -> {"skills": {"cluster":[...]} } (no edges)
+    """
+    if not isinstance(data, dict):
+        raise gr.Error("JSON root must be an object.")
+    # Case A: already in internal format with positions as list
+    if "positions" in data and isinstance(data["positions"], list):
+        return data
+    norm = {"positions": [], "edges": data.get("edges", []) if isinstance(data.get("edges", []), list) else []}
+    # Case B: positions is a dict
+    if "positions" in data and isinstance(data["positions"], dict):
+        for pos_name, pos_val in data["positions"].items():
+            if isinstance(pos_val, dict):
+                name = pos_val.get("name") or pos_name
+                skills = pos_val.get("skills", {})
+            else:
+                name = str(pos_name)
+                skills = {}
+            norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
+        return norm
+    # Case C/D: top-level keys (excluding known keys) are positions
+    excluded = {"positions", "edges"}
+    candidates = {k: v for k, v in data.items() if k not in excluded}
+    if candidates:
+        for pos_name, pos_val in candidates.items():
+            if isinstance(pos_val, dict):
+                name = pos_val.get("name") or pos_name
+                skills = pos_val.get("skills", {})
+            elif isinstance(pos_val, list):
+                # interpret as flat skills list -> put under "other" cluster with count=1
+                skills = {"other": [{"name": s, "count": 1} for s in pos_val]}
+                name = pos_name
+            else:
+                name = pos_name
+                skills = {}
+            norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
+        return norm
+    raise gr.Error("Unrecognized JSON schema. Include 'positions' or a mapping of position names.")
+def _coerce_skills(skills):
+    """
+    Ensure skills structure is {cluster: [{"name":..., "count": int}, ...], ...}
+    Accepts:
+      - dict of cluster -> list of dicts with name/count
+      - dict of cluster -> list of strings (count=1)
+      - list of strings -> will be wrapped into {'other': [...]}
+    """
+    if isinstance(skills, list):
+        return {"other": [{"name": str(s), "count": 1} for s in skills]}
+    if isinstance(skills, dict):
+        out = {}
+        for cl, items in skills.items():
+            if isinstance(items, list):
+                norm_items = []
+                for it in items:
+                    if isinstance(it, dict):
+                        nm = str(it.get("name", "")).strip()
+                        if not nm:
+                            continue
+                        cnt = int(it.get("count", 1))
+                        norm_items.append({"name": nm, "count": cnt})
+                    else:
+                        nm = str(it).strip()
+                        if not nm:
+                            continue
+                        norm_items.append({"name": nm, "count": 1})
+                if norm_items:
+                    out[cl or "other"] = norm_items
+        return out
+    return {}
 def _aggregate_skill_totals(data):
     totals = {}
     for pos in data.get("positions", []):
+        for cluster, items in (pos.get("skills") or {}).items():
             for it in items:
                 name, cnt = it.get("name"), int(it.get("count", 0))
                 if not name:
         pos_name = pos.get("name")
         if not pos_name:
             continue
+        total_skills = sum(len(v) for v in (pos.get("skills") or {}).values())
         G.add_node(
             f"pos::{pos_name}",
             label=pos_name,
             kind="position",
             size=max(15, min(60, 10 + 2*total_skills)),
             color="#333333",
+            title=f"<b>{pos_name}</b><br/>skills groups: {list((pos.get('skills') or {}).keys())}",
         )
     skill_totals = _aggregate_skill_totals(data)
         if not pos_name:
             continue
         flat = []
+        for cluster, items in (pos.get("skills") or {}).items():
             for it in items:
                 if int(it.get("count", 0)) >= min_skill_count:
                     flat.append((cluster or "other", it["name"], int(it["count"])))
     if include_pos_pos_edges:
         for e in data.get("edges", []):
+            if not isinstance(e, dict):
+                continue
             w = float(e.get("weight", 0.0))
             if w < pos_pos_weight_min:
                 continue
             b = f"pos::{e.get('target')}"
             if a in G and b in G:
                 G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
     return G
 def _nx_to_pyvis_html(G, physics, layout, height_px):
     if physics:
         net.force_atlas_2based()
+    # PyVis expects pure JSON (no 'var options =')
     if layout == "hierarchical (positions → skills)":
+        options = {
+            "layout": {
+                "hierarchical": {
+                    "enabled": True,
+                    "levelSeparation": 180,
+                    "nodeSpacing": 170,
+                    "treeSpacing": 200,
+                    "direction": "UD",
+                    "sortMethod": "hubsize"
+                }
+            },
+            "physics": {"enabled": bool(physics)}
         }
     else:
+        options = {
+            "physics": {
+                "enabled": bool(physics),
+                "stabilization": {"iterations": 150}
+            }
         }
+    import json as _json
+    net.set_options(_json.dumps(options))
     for n, data in G.nodes(data=True):
         net.add_node(
     return net.generate_html()
 def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
+    data_raw = _load_json(json_file)
+    data = _normalize_schema(data_raw)
     G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
     html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
     return html