Nucha commited on
Commit
1048569
·
verified ·
1 Parent(s): e99665e

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +118 -26
app.py CHANGED
@@ -30,15 +30,103 @@ CLUSTER_COLORS = {
30
 
31
  def _load_json(file_obj):
32
  if file_obj is not None:
33
- return json.load(open(file_obj.name, "r", encoding="utf-8"))
 
34
  if os.path.exists(DEFAULT_JSON):
35
- return json.load(open(DEFAULT_JSON, "r", encoding="utf-8"))
 
36
  raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def _aggregate_skill_totals(data):
39
  totals = {}
40
  for pos in data.get("positions", []):
41
- for cluster, items in pos.get("skills", {}).items():
42
  for it in items:
43
  name, cnt = it.get("name"), int(it.get("count", 0))
44
  if not name:
@@ -59,14 +147,14 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
59
  pos_name = pos.get("name")
60
  if not pos_name:
61
  continue
62
- total_skills = sum(len(v) for v in pos.get("skills", {}).values())
63
  G.add_node(
64
  f"pos::{pos_name}",
65
  label=pos_name,
66
  kind="position",
67
  size=max(15, min(60, 10 + 2*total_skills)),
68
  color="#333333",
69
- title=f"<b>{pos_name}</b><br/>skills groups: {list(pos.get('skills', {}).keys())}",
70
  )
71
 
72
  skill_totals = _aggregate_skill_totals(data)
@@ -76,7 +164,7 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
76
  if not pos_name:
77
  continue
78
  flat = []
79
- for cluster, items in pos.get("skills", {}).items():
80
  for it in items:
81
  if int(it.get("count", 0)) >= min_skill_count:
82
  flat.append((cluster or "other", it["name"], int(it["count"])))
@@ -106,6 +194,8 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
106
 
107
  if include_pos_pos_edges:
108
  for e in data.get("edges", []):
 
 
109
  w = float(e.get("weight", 0.0))
110
  if w < pos_pos_weight_min:
111
  continue
@@ -113,7 +203,6 @@ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edge
113
  b = f"pos::{e.get('target')}"
114
  if a in G and b in G:
115
  G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
116
-
117
  return G
118
 
119
  def _nx_to_pyvis_html(G, physics, layout, height_px):
@@ -128,28 +217,30 @@ def _nx_to_pyvis_html(G, physics, layout, height_px):
128
  if physics:
129
  net.force_atlas_2based()
130
 
 
131
  if layout == "hierarchical (positions → skills)":
132
- net.set_options("""
133
- var options = {
134
- layout: {
135
- hierarchical: {
136
- enabled: true,
137
- levelSeparation: 180,
138
- nodeSpacing: 170,
139
- treeSpacing: 200,
140
- direction: 'UD',
141
- sortMethod: 'hubsize'
142
- }
143
- },
144
- physics: { enabled: %s }
145
  }
146
- """ % ('true' if physics else 'false'))
147
  else:
148
- net.set_options("""
149
- var options = {
150
- physics: { enabled: %s, stabilization: { iterations: 150 } }
 
 
151
  }
152
- """ % ('true' if physics else 'false'))
 
153
 
154
  for n, data in G.nodes(data=True):
155
  net.add_node(
@@ -166,7 +257,8 @@ def _nx_to_pyvis_html(G, physics, layout, height_px):
166
  return net.generate_html()
167
 
168
  def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
169
- data = _load_json(json_file)
 
170
  G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
171
  html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
172
  return html
 
30
 
31
  def _load_json(file_obj):
32
  if file_obj is not None:
33
+ with open(file_obj.name, "r", encoding="utf-8") as f:
34
+ return json.load(f)
35
  if os.path.exists(DEFAULT_JSON):
36
+ with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
37
+ return json.load(f)
38
  raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")
39
 
40
+ def _normalize_schema(data):
41
+ """
42
+ Accepts multiple schemas and converts to internal format:
43
+ {
44
+ "positions": [{"name": "...","skills": {"cluster":[{"name":"...","count":N}, ...], ...}}, ...],
45
+ "edges": [{"source":"...","target":"...","weight":0.2,"shared_skills":[...]}]
46
+ }
47
+ Supported inputs:
48
+ A) Internal format (pass-through)
49
+ B) positions as a dict -> convert to list
50
+ C) Top-level dict mapping position_name -> {"name": "...", "skills": {...}} or {"skills":[...]}
51
+ D) Top-level dict mapping position_name -> {"skills": {"cluster":[...]} } (no edges)
52
+ """
53
+ if not isinstance(data, dict):
54
+ raise gr.Error("JSON root must be an object.")
55
+
56
+ # Case A: already in internal format with positions as list
57
+ if "positions" in data and isinstance(data["positions"], list):
58
+ return data
59
+
60
+ norm = {"positions": [], "edges": data.get("edges", []) if isinstance(data.get("edges", []), list) else []}
61
+
62
+ # Case B: positions is a dict
63
+ if "positions" in data and isinstance(data["positions"], dict):
64
+ for pos_name, pos_val in data["positions"].items():
65
+ if isinstance(pos_val, dict):
66
+ name = pos_val.get("name") or pos_name
67
+ skills = pos_val.get("skills", {})
68
+ else:
69
+ name = str(pos_name)
70
+ skills = {}
71
+ norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
72
+ return norm
73
+
74
+ # Case C/D: top-level keys (excluding known keys) are positions
75
+ excluded = {"positions", "edges"}
76
+ candidates = {k: v for k, v in data.items() if k not in excluded}
77
+ if candidates:
78
+ for pos_name, pos_val in candidates.items():
79
+ if isinstance(pos_val, dict):
80
+ name = pos_val.get("name") or pos_name
81
+ skills = pos_val.get("skills", {})
82
+ elif isinstance(pos_val, list):
83
+ # interpret as flat skills list -> put under "other" cluster with count=1
84
+ skills = {"other": [{"name": s, "count": 1} for s in pos_val]}
85
+ name = pos_name
86
+ else:
87
+ name = pos_name
88
+ skills = {}
89
+ norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
90
+ return norm
91
+
92
+ raise gr.Error("Unrecognized JSON schema. Include 'positions' or a mapping of position names.")
93
+
94
+ def _coerce_skills(skills):
95
+ """
96
+ Ensure skills structure is {cluster: [{"name":..., "count": int}, ...], ...}
97
+ Accepts:
98
+ - dict of cluster -> list of dicts with name/count
99
+ - dict of cluster -> list of strings (count=1)
100
+ - list of strings -> will be wrapped into {'other': [...]}
101
+ """
102
+ if isinstance(skills, list):
103
+ return {"other": [{"name": str(s), "count": 1} for s in skills]}
104
+ if isinstance(skills, dict):
105
+ out = {}
106
+ for cl, items in skills.items():
107
+ if isinstance(items, list):
108
+ norm_items = []
109
+ for it in items:
110
+ if isinstance(it, dict):
111
+ nm = str(it.get("name", "")).strip()
112
+ if not nm:
113
+ continue
114
+ cnt = int(it.get("count", 1))
115
+ norm_items.append({"name": nm, "count": cnt})
116
+ else:
117
+ nm = str(it).strip()
118
+ if not nm:
119
+ continue
120
+ norm_items.append({"name": nm, "count": 1})
121
+ if norm_items:
122
+ out[cl or "other"] = norm_items
123
+ return out
124
+ return {}
125
+
126
  def _aggregate_skill_totals(data):
127
  totals = {}
128
  for pos in data.get("positions", []):
129
+ for cluster, items in (pos.get("skills") or {}).items():
130
  for it in items:
131
  name, cnt = it.get("name"), int(it.get("count", 0))
132
  if not name:
 
147
  pos_name = pos.get("name")
148
  if not pos_name:
149
  continue
150
+ total_skills = sum(len(v) for v in (pos.get("skills") or {}).values())
151
  G.add_node(
152
  f"pos::{pos_name}",
153
  label=pos_name,
154
  kind="position",
155
  size=max(15, min(60, 10 + 2*total_skills)),
156
  color="#333333",
157
+ title=f"<b>{pos_name}</b><br/>skills groups: {list((pos.get('skills') or {}).keys())}",
158
  )
159
 
160
  skill_totals = _aggregate_skill_totals(data)
 
164
  if not pos_name:
165
  continue
166
  flat = []
167
+ for cluster, items in (pos.get("skills") or {}).items():
168
  for it in items:
169
  if int(it.get("count", 0)) >= min_skill_count:
170
  flat.append((cluster or "other", it["name"], int(it["count"])))
 
194
 
195
  if include_pos_pos_edges:
196
  for e in data.get("edges", []):
197
+ if not isinstance(e, dict):
198
+ continue
199
  w = float(e.get("weight", 0.0))
200
  if w < pos_pos_weight_min:
201
  continue
 
203
  b = f"pos::{e.get('target')}"
204
  if a in G and b in G:
205
  G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
 
206
  return G
207
 
208
  def _nx_to_pyvis_html(G, physics, layout, height_px):
 
217
  if physics:
218
  net.force_atlas_2based()
219
 
220
+ # PyVis expects pure JSON (no 'var options =')
221
  if layout == "hierarchical (positions → skills)":
222
+ options = {
223
+ "layout": {
224
+ "hierarchical": {
225
+ "enabled": True,
226
+ "levelSeparation": 180,
227
+ "nodeSpacing": 170,
228
+ "treeSpacing": 200,
229
+ "direction": "UD",
230
+ "sortMethod": "hubsize"
231
+ }
232
+ },
233
+ "physics": {"enabled": bool(physics)}
 
234
  }
 
235
  else:
236
+ options = {
237
+ "physics": {
238
+ "enabled": bool(physics),
239
+ "stabilization": {"iterations": 150}
240
+ }
241
  }
242
+ import json as _json
243
+ net.set_options(_json.dumps(options))
244
 
245
  for n, data in G.nodes(data=True):
246
  net.add_node(
 
257
  return net.generate_html()
258
 
259
  def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
260
+ data_raw = _load_json(json_file)
261
+ data = _normalize_schema(data_raw)
262
  G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
263
  html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
264
  return html