Nucha commited on
Commit
f728c11
·
verified ·
1 Parent(s): 844f88a

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +178 -148
app.py CHANGED
@@ -1,178 +1,208 @@
1
  \
2
  import os
3
  import json
4
- import math
 
 
5
  import gradio as gr
6
- from pyvis.network import Network
7
 
8
- DEFAULT_JSON = "job_position_skill_graph.json" # Put this file at repo root
 
9
 
10
- # Color palette for clusters (fallback if more clusters appear)
11
- CLUSTER_COLORS = [
12
- "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
13
- "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
14
- ]
15
 
16
- def load_graph_json(json_file):
17
- """
18
- Load JSON either from uploaded file or from DEFAULT_JSON if present.
19
- Expected schema:
20
- {
21
- "positions": [{"name": "...","skills": {"cluster":[{"name":"skill","count":int},...]...}}],
22
- "edges": [{"source":"...","target":"...","weight":float,"shared_skills":[...]}]
23
- }
24
- """
25
  if json_file is not None:
26
- # gr.File may pass a tempfile path string or a file object
27
  path = json_file.name if hasattr(json_file, "name") else json_file
28
  with open(path, "r", encoding="utf-8") as f:
29
- return json.load(f)
 
30
  if os.path.exists(DEFAULT_JSON):
31
  with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
32
- return json.load(f)
 
33
  raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")
34
 
35
- def infer_node_cluster_and_size(position, node_size_mode):
36
- """
37
- Infer dominant cluster for coloring; compute base node size.
38
- node_size_mode: 'skills-total' or 'skills-top10'
39
- """
40
- skills_by_cluster = position.get("skills", {})
41
- # Aggregate counts per cluster
42
- cluster_scores = {}
43
- total_skills_count = 0
44
- for cl, items in skills_by_cluster.items():
45
- s = sum(max(0, int(it.get("count", 0))) for it in items)
46
- cluster_scores[cl] = s
47
- total_skills_count += s
48
- if not cluster_scores:
49
- return ("other", 10)
50
-
51
- # Dominant cluster
52
- dominant = max(cluster_scores.items(), key=lambda x: x[1])[0]
53
-
54
- if node_size_mode == "skills-top10":
55
- # Sum only top 10 across clusters
56
- acc = 0
57
- for cl, items in skills_by_cluster.items():
58
- for it in sorted(items, key=lambda x: -int(x.get("count", 0)))[:10]:
59
- acc += int(it.get("count", 0))
60
- size = acc
61
- else:
62
- size = total_skills_count
63
-
64
- # Map size to a reasonable node size (10..60)
65
- if size <= 0:
66
- return (dominant, 10)
67
- # sqrt scale to compress big ranges
68
- scaled = 10 + min(50, 5 * math.sqrt(size))
69
- return (dominant, scaled)
70
-
71
- def build_tooltip(position, max_items_per_cluster=6):
72
  """
73
- Build HTML tooltip listing top skills per cluster.
 
74
  """
75
- name = position.get("name", "")
76
- skills_by_cluster = position.get("skills", {})
77
- parts = [f"<b>{name}</b>"]
78
- for cl, items in skills_by_cluster.items():
79
- if not items:
 
 
 
 
 
 
 
 
80
  continue
81
- top = sorted(items, key=lambda x: -int(x.get('count', 0)))[:max_items_per_cluster]
82
- inner = ", ".join([f"{it.get('name','')} ({int(it.get('count',0))})" for it in top])
83
- parts.append(f"<div><b>{cl}:</b> {inner}</div>")
84
- return "<br/>".join(parts)
85
-
86
- def render_network(json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout):
87
- data = load_graph_json(json_file)
88
-
89
- # Prepare pyvis network
90
- net = Network(height="720px", width="100%", bgcolor="#ffffff", font_color="#111111", directed=False, cdn_resources="in_line")
91
- # Physics options
92
- if physics:
93
- if layout == "Barnes-Hut":
94
- net.barnes_hut()
95
- else:
96
- # ForceAtlas2Based may look nice for dense graphs
97
- net.force_atlas_2based()
98
- else:
99
- net.set_options("""
100
- var options = { physics: { enabled: false } };
101
- """)
102
-
103
- # Build cluster -> color map based on encountered clusters
104
- cluster_names = []
105
- for pos in data.get("positions", []):
106
- for cl in (pos.get("skills") or {}).keys():
107
- if cl not in cluster_names:
108
- cluster_names.append(cl)
109
- color_map = {}
110
- for idx, cl in enumerate(cluster_names):
111
- color_map[cl] = CLUSTER_COLORS[idx % len(CLUSTER_COLORS)]
112
- color_map.setdefault("other", "#888888")
113
-
114
- # Optional position name filter (substring, case-insensitive)
115
- filter_position = (filter_position or "").strip().lower()
116
-
117
- # Add nodes
118
- node_ids = set()
119
- for pos in data.get("positions", []):
120
- name = pos.get("name", "")
121
- if filter_position and filter_position not in name.lower():
122
  continue
123
- dominant_cluster, size = infer_node_cluster_and_size(pos, node_size_mode)
124
- tooltip = build_tooltip(pos, max_items_per_cluster=max_items_per_cluster)
125
- net.add_node(
126
- n_id=name,
127
- label=name if show_labels else "",
128
- title=tooltip,
129
- color=color_map.get(dominant_cluster, color_map["other"]),
130
- size=size
131
- )
132
- node_ids.add(name)
133
-
134
- # Add edges with threshold filter
135
- kept_edges = 0
136
- for e in data.get("edges", []):
137
- w = float(e.get("weight", 0))
138
- if w < float(min_edge_weight):
139
- continue
140
- src, tgt = e.get("source"), e.get("target")
141
- if (src in node_ids) and (tgt in node_ids):
142
- title = f"weight={w:.2f} | shared: {', '.join(e.get('shared_skills', [])[:10])}"
143
- net.add_edge(src, tgt, value=w, title=title)
144
- kept_edges += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- # If graph ends up empty, hint the user
147
- if len(node_ids) == 0:
148
- html = "<h3>No nodes to show</h3><p>Loosen filters or upload a JSON.</p>"
149
- return html
150
 
151
- # Generate HTML
152
- html = net.generate_html()
153
- return html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
- with gr.Blocks(title="Job Position Hard Skills — Network") as demo:
156
- gr.Markdown("# Job Position ↔ Hard Skills — Network Diagram\nUpload a JSON or place **job_position_skill_graph.json** in repo root.")
157
 
158
  with gr.Row():
159
- with gr.Column(scale=1):
160
  json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
161
- min_edge_weight = gr.Slider(0.0, 1.0, value=0.15, step=0.01, label="Min edge weight (Jaccard)")
162
- show_labels = gr.Checkbox(value=True, label="Show node labels")
 
 
 
163
  physics = gr.Checkbox(value=True, label="Enable physics layout")
164
- layout = gr.Radio(choices=["Barnes-Hut", "ForceAtlas2Based"], value="ForceAtlas2Based", label="Layout algorithm")
165
- node_size_mode = gr.Radio(choices=["skills-total", "skills-top10"], value="skills-total", label="Node size scale by")
166
- max_items_per_cluster = gr.Slider(1, 20, value=6, step=1, label="Tooltip: max skills per cluster")
167
- filter_position = gr.Textbox(value="", label="Filter by position name (substring)")
168
- btn = gr.Button("Render", variant="primary")
169
- with gr.Column(scale=1):
170
- out_html = gr.HTML(label="Network")
171
 
172
  btn.click(
173
- fn=render_network,
174
- inputs=[json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout],
175
- outputs=[out_html]
176
  )
177
 
178
  if __name__ == "__main__":
 
1
  \
2
  import os
3
  import json
4
+ import re
5
+ from collections import defaultdict
6
+
7
  import gradio as gr
 
8
 
9
+ # Graph libs
10
+ from pyvis.network import Network
11
 
12
+ DEFAULT_JSON = "job_position_skill_graph.json" # Put this file at the repo root
 
 
 
 
13
 
14
+ def load_graph(json_file):
15
+ """Load JSON from upload or default file in repo root."""
 
 
 
 
 
 
 
16
  if json_file is not None:
 
17
  path = json_file.name if hasattr(json_file, "name") else json_file
18
  with open(path, "r", encoding="utf-8") as f:
19
+ data = json.load(f)
20
+ return data
21
  if os.path.exists(DEFAULT_JSON):
22
  with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
23
+ data = json.load(f)
24
+ return data
25
  raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")
26
 
27
+ def flatten_skills(positions):
28
+ """Return set of all skills and map skill->cluster (first seen)."""
29
+ skill2cluster = {}
30
+ for pos in positions:
31
+ grouped = pos.get("skills", {})
32
+ for cluster_name, items in grouped.items():
33
+ for it in items:
34
+ sk = str(it.get("name", "")).strip()
35
+ if not sk:
36
+ continue
37
+ if sk not in skill2cluster:
38
+ skill2cluster[sk] = cluster_name
39
+ return skill2cluster
40
+
41
+ def build_edges(positions, min_count=1, max_skills_per_position=100, clusters_filter=None, positions_filter=None, skill_regex=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  """
43
+ Create bipartite edges Position -> Skill with weight by 'count'.
44
+ Apply filters: min_count, clusters_filter (set), positions_filter (set), regex.
45
  """
46
+ edges = []
47
+ skill_counts_global = defaultdict(int)
48
+
49
+ patt = None
50
+ if skill_regex:
51
+ try:
52
+ patt = re.compile(skill_regex, re.IGNORECASE)
53
+ except re.error as e:
54
+ raise gr.Error(f"Invalid regex: {e}")
55
+
56
+ for pos in positions:
57
+ pname = pos.get("name", "").strip()
58
+ if not pname:
59
  continue
60
+ if positions_filter and pname not in positions_filter:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  continue
62
+ grouped = pos.get("skills", {})
63
+ # flatten with filter by cluster and regex
64
+ flat = []
65
+ for cluster_name, items in grouped.items():
66
+ if clusters_filter and cluster_name not in clusters_filter:
67
+ continue
68
+ for it in items:
69
+ sk = str(it.get("name", "")).strip()
70
+ cnt = int(it.get("count", 0))
71
+ if not sk:
72
+ continue
73
+ if cnt < min_count:
74
+ continue
75
+ if patt and not patt.search(sk):
76
+ continue
77
+ flat.append((cluster_name, sk, cnt))
78
+
79
+ # keep top-K for this position by count
80
+ flat.sort(key=lambda x: -x[2])
81
+ for cluster_name, sk, cnt in flat[:max_skills_per_position]:
82
+ edges.append((pname, sk, cnt, cluster_name))
83
+ skill_counts_global[sk] += cnt
84
+
85
+ return edges, skill_counts_global
86
+
87
+ def build_pyvis_html(
88
+ data,
89
+ min_count=5,
90
+ max_skills_per_position=30,
91
+ selected_clusters=None,
92
+ selected_positions=None,
93
+ skill_regex="",
94
+ physics=True,
95
+ hierarchical=False
96
+ ):
97
+ positions = data.get("positions", [])
98
+ # Derive available clusters and positions for UI
99
+ all_clusters = sorted({cl for pos in positions for cl in pos.get("skills", {}).keys()})
100
+ all_positions = sorted({pos.get("name","") for pos in positions if pos.get("name","")})
101
+
102
+ clusters_filter = set(selected_clusters) if selected_clusters else set(all_clusters)
103
+ positions_filter = set(selected_positions) if selected_positions else None
104
+
105
+ edges, skill_counts_global = build_edges(
106
+ positions,
107
+ min_count=min_count,
108
+ max_skills_per_position=max_skills_per_position,
109
+ clusters_filter=clusters_filter,
110
+ positions_filter=positions_filter,
111
+ skill_regex=skill_regex.strip() or None
112
+ )
113
 
114
+ # Create network
115
+ net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="#222222", directed=False, notebook=False, cdn_resources="in_line")
 
 
116
 
117
+ # Add nodes
118
+ # Position nodes: group 'position', shape 'dot'
119
+ # Skill nodes: group by cluster for color
120
+ pos_added = set()
121
+ skill_added = set()
122
+
123
+ # Predefine some distinct groups for clusters (pyvis auto-colors groups)
124
+ # We'll assign group=cluster for skills, and "position" for positions.
125
+ for pname, sk, cnt, cluster_name in edges:
126
+ if pname not in pos_added:
127
+ net.add_node(f"pos::{pname}", label=pname, title=f"Position: {pname}", shape="dot", size=18, group="position")
128
+ pos_added.add(pname)
129
+ if sk not in skill_added:
130
+ net.add_node(f"sk::{sk}", label=sk, title=f"Skill: {sk}\\nCluster: {cluster_name}\\nGlobal count (approx.): {skill_counts_global.get(sk, 0)}", shape="box", group=cluster_name)
131
+ skill_added.add(sk)
132
+ # Edge with value influences thickness
133
+ net.add_edge(f"pos::{pname}", f"sk::{sk}", value=int(cnt), title=f"{pname} ↔ {sk} (count={cnt})")
134
+
135
+ # Physics / layout options
136
+ options = {
137
+ "physics": {
138
+ "enabled": bool(physics),
139
+ "barnesHut": {"gravitationalConstant": -8000, "centralGravity": 0.2, "springLength": 150, "springConstant": 0.04},
140
+ "stabilization": {"enabled": True, "iterations": 100}
141
+ }
142
+ }
143
+ if hierarchical:
144
+ options["layout"] = {"hierarchical": {"enabled": True, "direction": "LR", "sortMethod": "hubsize"}}
145
+ net.set_options(json.dumps(options))
146
+
147
+ # Render HTML
148
+ html_path = "network.html"
149
+ net.write_html(html_path)
150
+ with open(html_path, "r", encoding="utf-8") as f:
151
+ html = f.read()
152
+
153
+ # Build a small data preview (limit rows)
154
+ preview_rows = [{"position": p, "skill": s, "cluster": c, "count": cnt} for (p, s, cnt, c) in edges]
155
+ preview_rows = sorted(preview_rows, key=lambda x: (-x["count"], x["position"]))[:1000] # cap
156
+ return html, all_clusters, all_positions, preview_rows
157
+
158
+ def run(
159
+ json_file,
160
+ min_count,
161
+ max_skills_per_position,
162
+ selected_clusters,
163
+ selected_positions,
164
+ skill_regex,
165
+ physics,
166
+ hierarchical
167
+ ):
168
+ data = load_graph(json_file)
169
+ html, all_clusters, all_positions, preview_rows = build_pyvis_html(
170
+ data,
171
+ min_count=min_count,
172
+ max_skills_per_position=max_skills_per_position,
173
+ selected_clusters=selected_clusters,
174
+ selected_positions=selected_positions,
175
+ skill_regex=skill_regex,
176
+ physics=physics,
177
+ hierarchical=hierarchical
178
+ )
179
+ # Update choices if user hasn't selected yet
180
+ clusters_update = gr.update(choices=all_clusters, value=selected_clusters or all_clusters)
181
+ positions_update = gr.update(choices=all_positions, value=selected_positions or [])
182
+ return html, clusters_update, positions_update, preview_rows
183
 
184
+ with gr.Blocks(title="Position–Skill Network (PyVis)") as demo:
185
+ gr.Markdown("# Position–Skill Network (PyVis)\nUpload `job_position_skill_graph.json` or place it in the repo root.")
186
 
187
  with gr.Row():
188
+ with gr.Column(scale=1, min_width=350):
189
  json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
190
+ min_count = gr.Slider(1, 50, value=5, step=1, label="Minimum skill count (filter)")
191
+ max_skills_per_position = gr.Slider(5, 200, value=30, step=1, label="Max skills per position")
192
+ selected_clusters = gr.CheckboxGroup(choices=[], label="Clusters to include (blank = all)")
193
+ selected_positions = gr.CheckboxGroup(choices=[], label="Positions to include (blank = all)")
194
+ skill_regex = gr.Textbox(value="", label="Skill name filter (regex, optional)")
195
  physics = gr.Checkbox(value=True, label="Enable physics layout")
196
+ hierarchical = gr.Checkbox(value=False, label="Hierarchical layout (Left→Right)")
197
+ btn = gr.Button("Build Network", variant="primary")
198
+ with gr.Column(scale=2):
199
+ out_html = gr.HTML(label="Network Diagram")
200
+ out_table = gr.Dataframe(label="Edges preview (top)", wrap=True)
 
 
201
 
202
  btn.click(
203
+ fn=run,
204
+ inputs=[json_file, min_count, max_skills_per_position, selected_clusters, selected_positions, skill_regex, physics, hierarchical],
205
+ outputs=[out_html, selected_clusters, selected_positions, out_table]
206
  )
207
 
208
  if __name__ == "__main__":