Nucha commited on
Commit
e99665e
·
verified ·
1 Parent(s): f728c11

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +172 -183
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,208 +1,197 @@
1
  \
2
  import os
3
  import json
4
- import re
5
- from collections import defaultdict
6
-
7
  import gradio as gr
8
 
9
- # Graph libs
10
  from pyvis.network import Network
11
 
12
- DEFAULT_JSON = "job_position_skill_graph.json" # Put this file at the repo root
13
-
14
- def load_graph(json_file):
15
- """Load JSON from upload or default file in repo root."""
16
- if json_file is not None:
17
- path = json_file.name if hasattr(json_file, "name") else json_file
18
- with open(path, "r", encoding="utf-8") as f:
19
- data = json.load(f)
20
- return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  if os.path.exists(DEFAULT_JSON):
22
- with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
23
- data = json.load(f)
24
- return data
25
- raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")
26
-
27
- def flatten_skills(positions):
28
- """Return set of all skills and map skill->cluster (first seen)."""
29
- skill2cluster = {}
30
- for pos in positions:
31
- grouped = pos.get("skills", {})
32
- for cluster_name, items in grouped.items():
33
  for it in items:
34
- sk = str(it.get("name", "")).strip()
35
- if not sk:
36
  continue
37
- if sk not in skill2cluster:
38
- skill2cluster[sk] = cluster_name
39
- return skill2cluster
40
-
41
- def build_edges(positions, min_count=1, max_skills_per_position=100, clusters_filter=None, positions_filter=None, skill_regex=None):
42
- """
43
- Create bipartite edges Position -> Skill with weight by 'count'.
44
- Apply filters: min_count, clusters_filter (set), positions_filter (set), regex.
45
- """
46
- edges = []
47
- skill_counts_global = defaultdict(int)
48
-
49
- patt = None
50
- if skill_regex:
51
- try:
52
- patt = re.compile(skill_regex, re.IGNORECASE)
53
- except re.error as e:
54
- raise gr.Error(f"Invalid regex: {e}")
55
-
56
- for pos in positions:
57
- pname = pos.get("name", "").strip()
58
- if not pname:
59
  continue
60
- if positions_filter and pname not in positions_filter:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  continue
62
- grouped = pos.get("skills", {})
63
- # flatten with filter by cluster and regex
64
  flat = []
65
- for cluster_name, items in grouped.items():
66
- if clusters_filter and cluster_name not in clusters_filter:
67
- continue
68
  for it in items:
69
- sk = str(it.get("name", "")).strip()
70
- cnt = int(it.get("count", 0))
71
- if not sk:
72
- continue
73
- if cnt < min_count:
74
- continue
75
- if patt and not patt.search(sk):
76
- continue
77
- flat.append((cluster_name, sk, cnt))
78
-
79
- # keep top-K for this position by count
80
- flat.sort(key=lambda x: -x[2])
81
- for cluster_name, sk, cnt in flat[:max_skills_per_position]:
82
- edges.append((pname, sk, cnt, cluster_name))
83
- skill_counts_global[sk] += cnt
84
-
85
- return edges, skill_counts_global
86
-
87
- def build_pyvis_html(
88
- data,
89
- min_count=5,
90
- max_skills_per_position=30,
91
- selected_clusters=None,
92
- selected_positions=None,
93
- skill_regex="",
94
- physics=True,
95
- hierarchical=False
96
- ):
97
- positions = data.get("positions", [])
98
- # Derive available clusters and positions for UI
99
- all_clusters = sorted({cl for pos in positions for cl in pos.get("skills", {}).keys()})
100
- all_positions = sorted({pos.get("name","") for pos in positions if pos.get("name","")})
101
-
102
- clusters_filter = set(selected_clusters) if selected_clusters else set(all_clusters)
103
- positions_filter = set(selected_positions) if selected_positions else None
104
-
105
- edges, skill_counts_global = build_edges(
106
- positions,
107
- min_count=min_count,
108
- max_skills_per_position=max_skills_per_position,
109
- clusters_filter=clusters_filter,
110
- positions_filter=positions_filter,
111
- skill_regex=skill_regex.strip() or None
112
- )
113
-
114
- # Create network
115
- net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="#222222", directed=False, notebook=False, cdn_resources="in_line")
116
-
117
- # Add nodes
118
- # Position nodes: group 'position', shape 'dot'
119
- # Skill nodes: group by cluster for color
120
- pos_added = set()
121
- skill_added = set()
122
-
123
- # Predefine some distinct groups for clusters (pyvis auto-colors groups)
124
- # We'll assign group=cluster for skills, and "position" for positions.
125
- for pname, sk, cnt, cluster_name in edges:
126
- if pname not in pos_added:
127
- net.add_node(f"pos::{pname}", label=pname, title=f"Position: {pname}", shape="dot", size=18, group="position")
128
- pos_added.add(pname)
129
- if sk not in skill_added:
130
- net.add_node(f"sk::{sk}", label=sk, title=f"Skill: {sk}\\nCluster: {cluster_name}\\nGlobal count (approx.): {skill_counts_global.get(sk, 0)}", shape="box", group=cluster_name)
131
- skill_added.add(sk)
132
- # Edge with value influences thickness
133
- net.add_edge(f"pos::{pname}", f"sk::{sk}", value=int(cnt), title=f"{pname} ↔ {sk} (count={cnt})")
134
-
135
- # Physics / layout options
136
- options = {
137
- "physics": {
138
- "enabled": bool(physics),
139
- "barnesHut": {"gravitationalConstant": -8000, "centralGravity": 0.2, "springLength": 150, "springConstant": 0.04},
140
- "stabilization": {"enabled": True, "iterations": 100}
141
- }
142
- }
143
- if hierarchical:
144
- options["layout"] = {"hierarchical": {"enabled": True, "direction": "LR", "sortMethod": "hubsize"}}
145
- net.set_options(json.dumps(options))
146
-
147
- # Render HTML
148
- html_path = "network.html"
149
- net.write_html(html_path)
150
- with open(html_path, "r", encoding="utf-8") as f:
151
- html = f.read()
152
-
153
- # Build a small data preview (limit rows)
154
- preview_rows = [{"position": p, "skill": s, "cluster": c, "count": cnt} for (p, s, cnt, c) in edges]
155
- preview_rows = sorted(preview_rows, key=lambda x: (-x["count"], x["position"]))[:1000] # cap
156
- return html, all_clusters, all_positions, preview_rows
157
-
158
- def run(
159
- json_file,
160
- min_count,
161
- max_skills_per_position,
162
- selected_clusters,
163
- selected_positions,
164
- skill_regex,
165
- physics,
166
- hierarchical
167
- ):
168
- data = load_graph(json_file)
169
- html, all_clusters, all_positions, preview_rows = build_pyvis_html(
170
- data,
171
- min_count=min_count,
172
- max_skills_per_position=max_skills_per_position,
173
- selected_clusters=selected_clusters,
174
- selected_positions=selected_positions,
175
- skill_regex=skill_regex,
176
- physics=physics,
177
- hierarchical=hierarchical
178
  )
179
- # Update choices if user hasn't selected yet
180
- clusters_update = gr.update(choices=all_clusters, value=selected_clusters or all_clusters)
181
- positions_update = gr.update(choices=all_positions, value=selected_positions or [])
182
- return html, clusters_update, positions_update, preview_rows
183
-
184
- with gr.Blocks(title="Position–Skill Network (PyVis)") as demo:
185
- gr.Markdown("# Position–Skill Network (PyVis)\nUpload `job_position_skill_graph.json` or place it in the repo root.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  with gr.Row():
188
- with gr.Column(scale=1, min_width=350):
189
  json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
190
- min_count = gr.Slider(1, 50, value=5, step=1, label="Minimum skill count (filter)")
191
- max_skills_per_position = gr.Slider(5, 200, value=30, step=1, label="Max skills per position")
192
- selected_clusters = gr.CheckboxGroup(choices=[], label="Clusters to include (blank = all)")
193
- selected_positions = gr.CheckboxGroup(choices=[], label="Positions to include (blank = all)")
194
- skill_regex = gr.Textbox(value="", label="Skill name filter (regex, optional)")
195
- physics = gr.Checkbox(value=True, label="Enable physics layout")
196
- hierarchical = gr.Checkbox(value=False, label="Hierarchical layout (Left→Right)")
197
  btn = gr.Button("Build Network", variant="primary")
198
- with gr.Column(scale=2):
199
- out_html = gr.HTML(label="Network Diagram")
200
- out_table = gr.Dataframe(label="Edges preview (top)", wrap=True)
201
 
202
  btn.click(
203
  fn=run,
204
- inputs=[json_file, min_count, max_skills_per_position, selected_clusters, selected_positions, skill_regex, physics, hierarchical],
205
- outputs=[out_html, selected_clusters, selected_positions, out_table]
206
  )
207
 
208
  if __name__ == "__main__":
 
1
  \
2
  import os
3
  import json
4
+ import math
 
 
5
  import gradio as gr
6
 
7
+ import networkx as nx
8
  from pyvis.network import Network
9
 
10
+ DEFAULT_JSON = "job_position_skill_graph.json"
11
+
12
+ CLUSTER_COLORS = {
13
+ "programming": "#1f77b4",
14
+ "databases": "#ff7f0e",
15
+ "cloud": "#2ca02c",
16
+ "devops": "#d62728",
17
+ "version_control": "#9467bd",
18
+ "data_processing": "#8c564b",
19
+ "ml_ai": "#e377c2",
20
+ "web_backend": "#7f7f7f",
21
+ "web_frontend": "#bcbd22",
22
+ "security": "#17becf",
23
+ "networking": "#1b9e77",
24
+ "mobile": "#d95f02",
25
+ "analytics_bi": "#7570b3",
26
+ "testing_qc": "#e7298a",
27
+ "infra_sys": "#66a61e",
28
+ "other": "#999999",
29
+ }
30
+
31
+ def _load_json(file_obj):
32
+ if file_obj is not None:
33
+ return json.load(open(file_obj.name, "r", encoding="utf-8"))
34
  if os.path.exists(DEFAULT_JSON):
35
+ return json.load(open(DEFAULT_JSON, "r", encoding="utf-8"))
36
+ raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")
37
+
38
+ def _aggregate_skill_totals(data):
39
+ totals = {}
40
+ for pos in data.get("positions", []):
41
+ for cluster, items in pos.get("skills", {}).items():
 
 
 
 
42
  for it in items:
43
+ name, cnt = it.get("name"), int(it.get("count", 0))
44
+ if not name:
45
  continue
46
+ if name not in totals:
47
+ totals[name] = {"total": 0, "clusters": set()}
48
+ totals[name]["total"] += cnt
49
+ totals[name]["clusters"].add(cluster or "other")
50
+ for k, v in totals.items():
51
+ clusters = list(v["clusters"])
52
+ v["cluster"] = clusters[0] if clusters else "other"
53
+ return totals
54
+
55
+ def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min):
56
+ G = nx.Graph()
57
+
58
+ for pos in data.get("positions", []):
59
+ pos_name = pos.get("name")
60
+ if not pos_name:
 
 
 
 
 
 
 
61
  continue
62
+ total_skills = sum(len(v) for v in pos.get("skills", {}).values())
63
+ G.add_node(
64
+ f"pos::{pos_name}",
65
+ label=pos_name,
66
+ kind="position",
67
+ size=max(15, min(60, 10 + 2*total_skills)),
68
+ color="#333333",
69
+ title=f"<b>{pos_name}</b><br/>skills groups: {list(pos.get('skills', {}).keys())}",
70
+ )
71
+
72
+ skill_totals = _aggregate_skill_totals(data)
73
+
74
+ for pos in data.get("positions", []):
75
+ pos_name = pos.get("name")
76
+ if not pos_name:
77
  continue
 
 
78
  flat = []
79
+ for cluster, items in pos.get("skills", {}).items():
 
 
80
  for it in items:
81
+ if int(it.get("count", 0)) >= min_skill_count:
82
+ flat.append((cluster or "other", it["name"], int(it["count"])))
83
+ if top_k_per_position and top_k_per_position > 0:
84
+ flat = sorted(flat, key=lambda x: -x[2])[: top_k_per_position]
85
+
86
+ for cluster, skill, cnt in flat:
87
+ node_id = f"skill::{skill}"
88
+ if node_id not in G:
89
+ total = skill_totals.get(skill, {}).get("total", cnt)
90
+ node_size = max(8, min(50, 6 + math.sqrt(total)*2))
91
+ color = CLUSTER_COLORS.get(cluster, "#999999")
92
+ G.add_node(
93
+ node_id,
94
+ label=skill,
95
+ kind="skill",
96
+ size=node_size,
97
+ color=color,
98
+ title=f"<b>{skill}</b><br/>cluster: {cluster}<br/>total: {total}",
99
+ )
100
+ G.add_edge(
101
+ f"pos::{pos_name}",
102
+ node_id,
103
+ weight=cnt,
104
+ title=f"{pos_name} → {skill}: {cnt}",
105
+ )
106
+
107
+ if include_pos_pos_edges:
108
+ for e in data.get("edges", []):
109
+ w = float(e.get("weight", 0.0))
110
+ if w < pos_pos_weight_min:
111
+ continue
112
+ a = f"pos::{e.get('source')}"
113
+ b = f"pos::{e.get('target')}"
114
+ if a in G and b in G:
115
+ G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
116
+
117
+ return G
118
+
119
+ def _nx_to_pyvis_html(G, physics, layout, height_px):
120
+ net = Network(
121
+ height=f"{height_px}px",
122
+ width="100%",
123
+ bgcolor="#ffffff",
124
+ font_color="#222222",
125
+ directed=False,
126
+ notebook=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  )
128
+ if physics:
129
+ net.force_atlas_2based()
130
+
131
+ if layout == "hierarchical (positions → skills)":
132
+ net.set_options("""
133
+ var options = {
134
+ layout: {
135
+ hierarchical: {
136
+ enabled: true,
137
+ levelSeparation: 180,
138
+ nodeSpacing: 170,
139
+ treeSpacing: 200,
140
+ direction: 'UD',
141
+ sortMethod: 'hubsize'
142
+ }
143
+ },
144
+ physics: { enabled: %s }
145
+ }
146
+ """ % ('true' if physics else 'false'))
147
+ else:
148
+ net.set_options("""
149
+ var options = {
150
+ physics: { enabled: %s, stabilization: { iterations: 150 } }
151
+ }
152
+ """ % ('true' if physics else 'false'))
153
+
154
+ for n, data in G.nodes(data=True):
155
+ net.add_node(
156
+ n,
157
+ label=data.get("label", n),
158
+ color=data.get("color", "#97c2fc"),
159
+ title=data.get("title", ""),
160
+ size=data.get("size", 15),
161
+ shape="dot" if data.get("kind") == "skill" else "ellipse",
162
+ )
163
+ for u, v, edata in G.edges(data=True):
164
+ net.add_edge(u, v, title=edata.get("title", ""), value=edata.get("weight", 1), color=edata.get("color"))
165
+
166
+ return net.generate_html()
167
+
168
+ def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
169
+ data = _load_json(json_file)
170
+ G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
171
+ html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
172
+ return html
173
+
174
+ with gr.Blocks(title="Job Positions ↔ Hard Skills — Network Diagram") as demo:
175
+ gr.Markdown("# Network Diagram: Positions ↔ Skills\\nUpload `job_position_skill_graph.json` or place it in the repo root.\\n- **Black ovals** = Job positions\\n- **Colored dots** = Skills (color by cluster)\\n- Edge weight = frequency of skill in that position")
176
 
177
  with gr.Row():
178
+ with gr.Column(scale=1):
179
  json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
180
+ min_skill_count = gr.Slider(0, 50, value=5, step=1, label="Minimum skill count per position (filter noise)")
181
+ top_k_per_position = gr.Slider(0, 100, value=20, step=1, label="Top-K skills per position (0 = all)")
182
+ include_pos_pos_edges = gr.Checkbox(value=False, label="Include position↔position similarity edges")
183
+ pos_pos_weight_min = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Min similarity (if enabled)")
184
+ physics = gr.Checkbox(value=True, label="Enable physics (force layout)")
185
+ layout = gr.Dropdown(choices=["free (force layout)", "hierarchical (positions → skills)"], value="free (force layout)", label="Layout")
186
+ height_px = gr.Slider(500, 1400, value=900, step=50, label="Canvas height (px)")
187
  btn = gr.Button("Build Network", variant="primary")
188
+ with gr.Column(scale=1):
189
+ out_html = gr.HTML(label="Interactive Network")
 
190
 
191
  btn.click(
192
  fn=run,
193
+ inputs=[json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px],
194
+ outputs=[out_html]
195
  )
196
 
197
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio>=4.26.0
 
2
  pyvis>=0.3.2
 
1
  gradio>=4.26.0
2
+ networkx>=3.2
3
  pyvis>=0.3.2