Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files
app.py
CHANGED
|
@@ -30,15 +30,103 @@ CLUSTER_COLORS = {
|
|
| 30 |
|
| 31 |
def _load_json(file_obj):
|
| 32 |
if file_obj is not None:
|
| 33 |
-
|
|
|
|
| 34 |
if os.path.exists(DEFAULT_JSON):
|
| 35 |
-
|
|
|
|
| 36 |
raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def _aggregate_skill_totals(data):
|
| 39 |
totals = {}
|
| 40 |
for pos in data.get("positions", []):
|
| 41 |
-
for cluster, items in pos.get("skills"
|
| 42 |
for it in items:
|
| 43 |
name, cnt = it.get("name"), int(it.get("count", 0))
|
| 44 |
if not name:
|
|
@@ -59,14 +147,14 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
|
|
| 59 |
pos_name = pos.get("name")
|
| 60 |
if not pos_name:
|
| 61 |
continue
|
| 62 |
-
total_skills = sum(len(v) for v in pos.get("skills"
|
| 63 |
G.add_node(
|
| 64 |
f"pos::{pos_name}",
|
| 65 |
label=pos_name,
|
| 66 |
kind="position",
|
| 67 |
size=max(15, min(60, 10 + 2*total_skills)),
|
| 68 |
color="#333333",
|
| 69 |
-
title=f"<b>{pos_name}</b><br/>skills groups: {list(pos.get('skills'
|
| 70 |
)
|
| 71 |
|
| 72 |
skill_totals = _aggregate_skill_totals(data)
|
|
@@ -76,7 +164,7 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
|
|
| 76 |
if not pos_name:
|
| 77 |
continue
|
| 78 |
flat = []
|
| 79 |
-
for cluster, items in pos.get("skills"
|
| 80 |
for it in items:
|
| 81 |
if int(it.get("count", 0)) >= min_skill_count:
|
| 82 |
flat.append((cluster or "other", it["name"], int(it["count"])))
|
|
@@ -106,6 +194,8 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
|
|
| 106 |
|
| 107 |
if include_pos_pos_edges:
|
| 108 |
for e in data.get("edges", []):
|
|
|
|
|
|
|
| 109 |
w = float(e.get("weight", 0.0))
|
| 110 |
if w < pos_pos_weight_min:
|
| 111 |
continue
|
|
@@ -113,7 +203,6 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
|
|
| 113 |
b = f"pos::{e.get('target')}"
|
| 114 |
if a in G and b in G:
|
| 115 |
G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
|
| 116 |
-
|
| 117 |
return G
|
| 118 |
|
| 119 |
def _nx_to_pyvis_html(G, physics, layout, height_px):
|
|
@@ -128,28 +217,30 @@ def _nx_to_pyvis_html(G, physics, layout, height_px):
|
|
| 128 |
if physics:
|
| 129 |
net.force_atlas_2based()
|
| 130 |
|
|
|
|
| 131 |
if layout == "hierarchical (positions → skills)":
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
}
|
| 143 |
-
|
| 144 |
-
physics: { enabled: %s }
|
| 145 |
}
|
| 146 |
-
""" % ('true' if physics else 'false'))
|
| 147 |
else:
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
| 151 |
}
|
| 152 |
-
|
|
|
|
| 153 |
|
| 154 |
for n, data in G.nodes(data=True):
|
| 155 |
net.add_node(
|
|
@@ -166,7 +257,8 @@ def _nx_to_pyvis_html(G, physics, layout, height_px):
|
|
| 166 |
return net.generate_html()
|
| 167 |
|
| 168 |
def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
|
| 169 |
-
|
|
|
|
| 170 |
G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
|
| 171 |
html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
|
| 172 |
return html
|
|
|
|
| 30 |
|
| 31 |
def _load_json(file_obj):
|
| 32 |
if file_obj is not None:
|
| 33 |
+
with open(file_obj.name, "r", encoding="utf-8") as f:
|
| 34 |
+
return json.load(f)
|
| 35 |
if os.path.exists(DEFAULT_JSON):
|
| 36 |
+
with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
|
| 37 |
+
return json.load(f)
|
| 38 |
raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")
|
| 39 |
|
| 40 |
+
def _normalize_schema(data):
|
| 41 |
+
"""
|
| 42 |
+
Accepts multiple schemas and converts to internal format:
|
| 43 |
+
{
|
| 44 |
+
"positions": [{"name": "...","skills": {"cluster":[{"name":"...","count":N}, ...], ...}}, ...],
|
| 45 |
+
"edges": [{"source":"...","target":"...","weight":0.2,"shared_skills":[...]}]
|
| 46 |
+
}
|
| 47 |
+
Supported inputs:
|
| 48 |
+
A) Internal format (pass-through)
|
| 49 |
+
B) positions as a dict -> convert to list
|
| 50 |
+
C) Top-level dict mapping position_name -> {"name": "...", "skills": {...}} or {"skills":[...]}
|
| 51 |
+
D) Top-level dict mapping position_name -> {"skills": {"cluster":[...]} } (no edges)
|
| 52 |
+
"""
|
| 53 |
+
if not isinstance(data, dict):
|
| 54 |
+
raise gr.Error("JSON root must be an object.")
|
| 55 |
+
|
| 56 |
+
# Case A: already in internal format with positions as list
|
| 57 |
+
if "positions" in data and isinstance(data["positions"], list):
|
| 58 |
+
return data
|
| 59 |
+
|
| 60 |
+
norm = {"positions": [], "edges": data.get("edges", []) if isinstance(data.get("edges", []), list) else []}
|
| 61 |
+
|
| 62 |
+
# Case B: positions is a dict
|
| 63 |
+
if "positions" in data and isinstance(data["positions"], dict):
|
| 64 |
+
for pos_name, pos_val in data["positions"].items():
|
| 65 |
+
if isinstance(pos_val, dict):
|
| 66 |
+
name = pos_val.get("name") or pos_name
|
| 67 |
+
skills = pos_val.get("skills", {})
|
| 68 |
+
else:
|
| 69 |
+
name = str(pos_name)
|
| 70 |
+
skills = {}
|
| 71 |
+
norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
|
| 72 |
+
return norm
|
| 73 |
+
|
| 74 |
+
# Case C/D: top-level keys (excluding known keys) are positions
|
| 75 |
+
excluded = {"positions", "edges"}
|
| 76 |
+
candidates = {k: v for k, v in data.items() if k not in excluded}
|
| 77 |
+
if candidates:
|
| 78 |
+
for pos_name, pos_val in candidates.items():
|
| 79 |
+
if isinstance(pos_val, dict):
|
| 80 |
+
name = pos_val.get("name") or pos_name
|
| 81 |
+
skills = pos_val.get("skills", {})
|
| 82 |
+
elif isinstance(pos_val, list):
|
| 83 |
+
# interpret as flat skills list -> put under "other" cluster with count=1
|
| 84 |
+
skills = {"other": [{"name": s, "count": 1} for s in pos_val]}
|
| 85 |
+
name = pos_name
|
| 86 |
+
else:
|
| 87 |
+
name = pos_name
|
| 88 |
+
skills = {}
|
| 89 |
+
norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
|
| 90 |
+
return norm
|
| 91 |
+
|
| 92 |
+
raise gr.Error("Unrecognized JSON schema. Include 'positions' or a mapping of position names.")
|
| 93 |
+
|
| 94 |
+
def _coerce_skills(skills):
|
| 95 |
+
"""
|
| 96 |
+
Ensure skills structure is {cluster: [{"name":..., "count": int}, ...], ...}
|
| 97 |
+
Accepts:
|
| 98 |
+
- dict of cluster -> list of dicts with name/count
|
| 99 |
+
- dict of cluster -> list of strings (count=1)
|
| 100 |
+
- list of strings -> will be wrapped into {'other': [...]}
|
| 101 |
+
"""
|
| 102 |
+
if isinstance(skills, list):
|
| 103 |
+
return {"other": [{"name": str(s), "count": 1} for s in skills]}
|
| 104 |
+
if isinstance(skills, dict):
|
| 105 |
+
out = {}
|
| 106 |
+
for cl, items in skills.items():
|
| 107 |
+
if isinstance(items, list):
|
| 108 |
+
norm_items = []
|
| 109 |
+
for it in items:
|
| 110 |
+
if isinstance(it, dict):
|
| 111 |
+
nm = str(it.get("name", "")).strip()
|
| 112 |
+
if not nm:
|
| 113 |
+
continue
|
| 114 |
+
cnt = int(it.get("count", 1))
|
| 115 |
+
norm_items.append({"name": nm, "count": cnt})
|
| 116 |
+
else:
|
| 117 |
+
nm = str(it).strip()
|
| 118 |
+
if not nm:
|
| 119 |
+
continue
|
| 120 |
+
norm_items.append({"name": nm, "count": 1})
|
| 121 |
+
if norm_items:
|
| 122 |
+
out[cl or "other"] = norm_items
|
| 123 |
+
return out
|
| 124 |
+
return {}
|
| 125 |
+
|
| 126 |
def _aggregate_skill_totals(data):
|
| 127 |
totals = {}
|
| 128 |
for pos in data.get("positions", []):
|
| 129 |
+
for cluster, items in (pos.get("skills") or {}).items():
|
| 130 |
for it in items:
|
| 131 |
name, cnt = it.get("name"), int(it.get("count", 0))
|
| 132 |
if not name:
|
|
|
|
| 147 |
pos_name = pos.get("name")
|
| 148 |
if not pos_name:
|
| 149 |
continue
|
| 150 |
+
total_skills = sum(len(v) for v in (pos.get("skills") or {}).values())
|
| 151 |
G.add_node(
|
| 152 |
f"pos::{pos_name}",
|
| 153 |
label=pos_name,
|
| 154 |
kind="position",
|
| 155 |
size=max(15, min(60, 10 + 2*total_skills)),
|
| 156 |
color="#333333",
|
| 157 |
+
title=f"<b>{pos_name}</b><br/>skills groups: {list((pos.get('skills') or {}).keys())}",
|
| 158 |
)
|
| 159 |
|
| 160 |
skill_totals = _aggregate_skill_totals(data)
|
|
|
|
| 164 |
if not pos_name:
|
| 165 |
continue
|
| 166 |
flat = []
|
| 167 |
+
for cluster, items in (pos.get("skills") or {}).items():
|
| 168 |
for it in items:
|
| 169 |
if int(it.get("count", 0)) >= min_skill_count:
|
| 170 |
flat.append((cluster or "other", it["name"], int(it["count"])))
|
|
|
|
| 194 |
|
| 195 |
if include_pos_pos_edges:
|
| 196 |
for e in data.get("edges", []):
|
| 197 |
+
if not isinstance(e, dict):
|
| 198 |
+
continue
|
| 199 |
w = float(e.get("weight", 0.0))
|
| 200 |
if w < pos_pos_weight_min:
|
| 201 |
continue
|
|
|
|
| 203 |
b = f"pos::{e.get('target')}"
|
| 204 |
if a in G and b in G:
|
| 205 |
G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
|
|
|
|
| 206 |
return G
|
| 207 |
|
| 208 |
def _nx_to_pyvis_html(G, physics, layout, height_px):
|
|
|
|
| 217 |
if physics:
|
| 218 |
net.force_atlas_2based()
|
| 219 |
|
| 220 |
+
# PyVis expects pure JSON (no 'var options =')
|
| 221 |
if layout == "hierarchical (positions → skills)":
|
| 222 |
+
options = {
|
| 223 |
+
"layout": {
|
| 224 |
+
"hierarchical": {
|
| 225 |
+
"enabled": True,
|
| 226 |
+
"levelSeparation": 180,
|
| 227 |
+
"nodeSpacing": 170,
|
| 228 |
+
"treeSpacing": 200,
|
| 229 |
+
"direction": "UD",
|
| 230 |
+
"sortMethod": "hubsize"
|
| 231 |
+
}
|
| 232 |
+
},
|
| 233 |
+
"physics": {"enabled": bool(physics)}
|
|
|
|
| 234 |
}
|
|
|
|
| 235 |
else:
|
| 236 |
+
options = {
|
| 237 |
+
"physics": {
|
| 238 |
+
"enabled": bool(physics),
|
| 239 |
+
"stabilization": {"iterations": 150}
|
| 240 |
+
}
|
| 241 |
}
|
| 242 |
+
import json as _json
|
| 243 |
+
net.set_options(_json.dumps(options))
|
| 244 |
|
| 245 |
for n, data in G.nodes(data=True):
|
| 246 |
net.add_node(
|
|
|
|
| 257 |
return net.generate_html()
|
| 258 |
|
| 259 |
def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
|
| 260 |
+
data_raw = _load_json(json_file)
|
| 261 |
+
data = _normalize_schema(data_raw)
|
| 262 |
G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
|
| 263 |
html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
|
| 264 |
return html
|