Nucha commited on
Commit
844f88a
·
verified ·
1 Parent(s): dfba298

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +156 -181
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,203 +1,178 @@
1
  \
2
  import os
3
  import json
4
- import uuid # <-- FIX: added
5
- from typing import Dict, Any, List, Tuple
6
  import gradio as gr
7
-
8
  from pyvis.network import Network
9
 
10
- DEFAULT_JSON = "job_skill_network.json"
11
-
12
- def _load_graph(file_obj) -> Dict[str, Any]:
13
- if file_obj is not None:
14
- with open(file_obj.name if hasattr(file_obj, "name") else file_obj, "r", encoding="utf-8") as f:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  return json.load(f)
16
  if os.path.exists(DEFAULT_JSON):
17
  with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
18
  return json.load(f)
19
- raise gr.Error("No JSON provided and default file not found. Please upload job_skill_network.json.")
20
-
21
- def _split_nodes(nodes: List[Dict[str, Any]]):
22
- jobs = [n for n in nodes if str(n.get("type","")).lower() == "job"]
23
- skills = [n for n in nodes if str(n.get("type","")).lower() == "skill"]
24
- return jobs, skills
25
-
26
- def _index_nodes(nodes: List[Dict[str, Any]]):
27
- return {n["id"]: n for n in nodes}
28
-
29
- def _filter_graph(graph: Dict[str, Any],
30
- include_requires: bool,
31
- include_similar: bool,
32
- min_weight: int,
33
- top_n_jobs: int,
34
- keep_outside_similar: bool,
35
- include_job_nodes: bool,
36
- include_skill_nodes: bool):
37
- nodes = graph.get("nodes", [])
38
- edges = graph.get("edges", [])
39
- jobs, skills = _split_nodes(nodes)
40
-
41
- def _postings(n):
42
- try:
43
- return int(n.get("postings", 0))
44
- except Exception:
45
- return 0
46
-
47
- jobs_sorted = sorted(jobs, key=_postings, reverse=True)
48
- selected_job_ids = set([n["id"] for n in jobs_sorted[:max(1, int(top_n_jobs))]]) if include_job_nodes else set()
49
-
50
- selected_edges = []
51
- for e in edges:
52
- et = str(e.get("type","")).lower()
53
- if et == "requires" and include_requires and int(e.get("weight", 1)) >= int(min_weight):
54
- if include_job_nodes or include_skill_nodes:
55
- selected_edges.append(e)
56
- elif et == "similar" and include_similar and int(e.get("weight", 1)) >= int(min_weight):
57
- selected_edges.append(e)
58
-
59
- node_ids = set()
60
- for e in selected_edges:
61
- s, t, et = e.get("source"), e.get("target"), str(e.get("type","")).lower()
62
- if et == "requires" and selected_job_ids:
63
- if (s in selected_job_ids) or (t in selected_job_ids):
64
- node_ids.update([s, t])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  else:
66
- node_ids.update([s, t])
67
-
68
- if include_similar and keep_outside_similar and selected_job_ids:
69
- for e in selected_edges:
70
- if str(e.get("type","")).lower() != "similar":
71
- continue
72
- s, t = e.get("source"), e.get("target")
73
- if (s in selected_job_ids) or (t in selected_job_ids):
74
- node_ids.update([s, t])
75
-
76
- node_map = _index_nodes(nodes)
77
- final_nodes = []
78
- for nid in list(node_ids):
79
- n = node_map.get(nid)
80
- if not n:
 
 
 
 
 
 
 
 
 
 
 
81
  continue
82
- ntype = str(n.get("type","")).lower()
83
- if (ntype == "job" and include_job_nodes) or (ntype == "skill" and include_skill_nodes):
84
- final_nodes.append(n)
85
-
86
- final_ids = set(n["id"] for n in final_nodes)
87
- final_edges = [e for e in selected_edges if e.get("source") in final_ids and e.get("target") in final_ids]
88
-
89
- return final_nodes, final_edges
90
-
91
- def _build_pyvis_html(nodes, edges, physics: bool, hierarchical: bool):
92
- net = Network(height="720px", width="100%", directed=False, notebook=False)
93
- net.barnes_hut()
94
-
95
- for n in nodes:
96
- nid = n["id"]
97
- label = str(n.get("label", nid))
98
- ntype = str(n.get("type","")).lower()
99
- title = f"{ntype.upper()} | {label}"
100
- size = 12
101
- shape = "dot"
102
- if ntype == "job":
103
- size = 18 + int(n.get("postings", 0)) * 0.1
104
- shape = "ellipse"
105
- elif ntype == "skill":
106
- size = 8
107
- net.add_node(nid, label=label, title=title, group=ntype, shape=shape, value=size)
108
-
109
- for e in edges:
110
- s, t = e.get("source"), e.get("target")
111
- et = str(e.get("type",""))
112
- weight = int(e.get("weight", 1))
113
- title = f"{et} (w={weight})"
114
- net.add_edge(s, t, title=title, value=weight)
115
-
116
- options = {
117
- "physics": {"enabled": bool(physics)},
118
- "interaction": {"hover": True, "multiselect": True, "dragNodes": True},
119
- "nodes": {"font": {"size": 14}},
120
- "edges": {"smooth": {"type": "dynamic"}}
121
- }
122
- if hierarchical:
123
- options["layout"] = {
124
- "hierarchical": {
125
- "enabled": True,
126
- "levelSeparation": 120,
127
- "nodeSpacing": 120,
128
- "treeSpacing": 180,
129
- "direction": "UD",
130
- "sortMethod": "hubsize"
131
- }
132
- }
133
- options["physics"]["enabled"] = False
134
-
135
- import json as _json
136
- net.set_options(_json.dumps(options))
137
- return net.generate_html()
138
-
139
- def build_network(
140
- json_file,
141
- include_requires,
142
- include_similar,
143
- min_weight,
144
- top_n_jobs,
145
- keep_outside_similar,
146
- include_job_nodes,
147
- include_skill_nodes,
148
- physics,
149
- hierarchical
150
- ):
151
- graph = _load_graph(json_file)
152
- nodes, edges = _filter_graph(graph, include_requires, include_similar, int(min_weight), int(top_n_jobs),
153
- bool(keep_outside_similar), bool(include_job_nodes), bool(include_skill_nodes))
154
-
155
- if not nodes or not edges:
156
- raise gr.Error("No nodes/edges remain after filtering. Try lowering the filter or including more edge types.")
157
-
158
- html = _build_pyvis_html(nodes, edges, physics, hierarchical)
159
-
160
- out_name = f"network_{uuid.uuid4().hex[:8]}.html"
161
- with open(out_name, "w", encoding="utf-8") as f:
162
- f.write(html)
163
-
164
- return gr.update(value=html), out_name
165
-
166
- with gr.Blocks(title="Job ↔ Hard Skill Network") as demo:
167
- gr.Markdown("# Job ↔ Hard Skill Network Diagram\nUpload `job_skill_network.json` or place it at repo root.")
168
-
169
- with gr.Row():
170
- with gr.Column(scale=1):
171
- json_file = gr.File(label="Upload JSON (optional)", file_count="single", file_types=[".json"])
172
-
173
- gr.Markdown("### Include Edge Types")
174
- include_requires = gr.Checkbox(value=True, label="Job–Skill edges (type='requires')")
175
- include_similar = gr.Checkbox(value=True, label="Job–Job edges (type='similar')")
176
-
177
- gr.Markdown("### Filters")
178
- min_weight = gr.Slider(0, 50, value=3, step=1, label="Minimum edge weight")
179
- top_n_jobs = gr.Slider(1, 100, value=30, step=1, label="Top-N job nodes by postings")
180
- keep_outside_similar = gr.Checkbox(value=True, label="Include similar jobs outside Top-N")
181
 
182
- gr.Markdown("### Node Types")
183
- include_job_nodes = gr.Checkbox(value=True, label="Include job nodes")
184
- include_skill_nodes = gr.Checkbox(value=True, label="Include skill nodes")
 
185
 
186
- gr.Markdown("### Layout")
187
- physics = gr.Checkbox(value=True, label="Enable physics")
188
- hierarchical = gr.Checkbox(value=False, label="Hierarchical layout (good for Job→Skill)")
189
 
190
- btn = gr.Button("Build Network", variant="primary")
 
191
 
 
 
 
 
 
 
 
 
 
 
 
192
  with gr.Column(scale=1):
193
- html_view = gr.HTML(label="Interactive Network (PyVis)")
194
- html_file = gr.File(label="Download HTML")
195
 
196
  btn.click(
197
- fn=build_network,
198
- inputs=[json_file, include_requires, include_similar, min_weight, top_n_jobs, keep_outside_similar,
199
- include_job_nodes, include_skill_nodes, physics, hierarchical],
200
- outputs=[html_view, html_file]
201
  )
202
 
203
  if __name__ == "__main__":
 
1
  \
2
  import os
3
  import json
4
+ import math
 
5
  import gradio as gr
 
6
  from pyvis.network import Network
7
 
8
+ DEFAULT_JSON = "job_position_skill_graph.json" # Put this file at repo root
9
+
10
+ # Color palette for clusters (fallback if more clusters appear)
11
+ CLUSTER_COLORS = [
12
+ "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
13
+ "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
14
+ ]
15
+
16
+ def load_graph_json(json_file):
17
+ """
18
+ Load JSON either from uploaded file or from DEFAULT_JSON if present.
19
+ Expected schema:
20
+ {
21
+ "positions": [{"name": "...","skills": {"cluster":[{"name":"skill","count":int},...]...}}],
22
+ "edges": [{"source":"...","target":"...","weight":float,"shared_skills":[...]}]
23
+ }
24
+ """
25
+ if json_file is not None:
26
+ # gr.File may pass a tempfile path string or a file object
27
+ path = json_file.name if hasattr(json_file, "name") else json_file
28
+ with open(path, "r", encoding="utf-8") as f:
29
  return json.load(f)
30
  if os.path.exists(DEFAULT_JSON):
31
  with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
32
  return json.load(f)
33
+ raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.")
34
+
35
+ def infer_node_cluster_and_size(position, node_size_mode):
36
+ """
37
+ Infer dominant cluster for coloring; compute base node size.
38
+ node_size_mode: 'skills-total' or 'skills-top10'
39
+ """
40
+ skills_by_cluster = position.get("skills", {})
41
+ # Aggregate counts per cluster
42
+ cluster_scores = {}
43
+ total_skills_count = 0
44
+ for cl, items in skills_by_cluster.items():
45
+ s = sum(max(0, int(it.get("count", 0))) for it in items)
46
+ cluster_scores[cl] = s
47
+ total_skills_count += s
48
+ if not cluster_scores:
49
+ return ("other", 10)
50
+
51
+ # Dominant cluster
52
+ dominant = max(cluster_scores.items(), key=lambda x: x[1])[0]
53
+
54
+ if node_size_mode == "skills-top10":
55
+ # Sum only top 10 across clusters
56
+ acc = 0
57
+ for cl, items in skills_by_cluster.items():
58
+ for it in sorted(items, key=lambda x: -int(x.get("count", 0)))[:10]:
59
+ acc += int(it.get("count", 0))
60
+ size = acc
61
+ else:
62
+ size = total_skills_count
63
+
64
+ # Map size to a reasonable node size (10..60)
65
+ if size <= 0:
66
+ return (dominant, 10)
67
+ # sqrt scale to compress big ranges
68
+ scaled = 10 + min(50, 5 * math.sqrt(size))
69
+ return (dominant, scaled)
70
+
71
+ def build_tooltip(position, max_items_per_cluster=6):
72
+ """
73
+ Build HTML tooltip listing top skills per cluster.
74
+ """
75
+ name = position.get("name", "")
76
+ skills_by_cluster = position.get("skills", {})
77
+ parts = [f"<b>{name}</b>"]
78
+ for cl, items in skills_by_cluster.items():
79
+ if not items:
80
+ continue
81
+ top = sorted(items, key=lambda x: -int(x.get('count', 0)))[:max_items_per_cluster]
82
+ inner = ", ".join([f"{it.get('name','')} ({int(it.get('count',0))})" for it in top])
83
+ parts.append(f"<div><b>{cl}:</b> {inner}</div>")
84
+ return "<br/>".join(parts)
85
+
86
+ def render_network(json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout):
87
+ data = load_graph_json(json_file)
88
+
89
+ # Prepare pyvis network
90
+ net = Network(height="720px", width="100%", bgcolor="#ffffff", font_color="#111111", directed=False, cdn_resources="in_line")
91
+ # Physics options
92
+ if physics:
93
+ if layout == "Barnes-Hut":
94
+ net.barnes_hut()
95
  else:
96
+ # ForceAtlas2Based may look nice for dense graphs
97
+ net.force_atlas_2based()
98
+ else:
99
+ net.set_options("""
100
+ var options = { physics: { enabled: false } };
101
+ """)
102
+
103
+ # Build cluster -> color map based on encountered clusters
104
+ cluster_names = []
105
+ for pos in data.get("positions", []):
106
+ for cl in (pos.get("skills") or {}).keys():
107
+ if cl not in cluster_names:
108
+ cluster_names.append(cl)
109
+ color_map = {}
110
+ for idx, cl in enumerate(cluster_names):
111
+ color_map[cl] = CLUSTER_COLORS[idx % len(CLUSTER_COLORS)]
112
+ color_map.setdefault("other", "#888888")
113
+
114
+ # Optional position name filter (substring, case-insensitive)
115
+ filter_position = (filter_position or "").strip().lower()
116
+
117
+ # Add nodes
118
+ node_ids = set()
119
+ for pos in data.get("positions", []):
120
+ name = pos.get("name", "")
121
+ if filter_position and filter_position not in name.lower():
122
  continue
123
+ dominant_cluster, size = infer_node_cluster_and_size(pos, node_size_mode)
124
+ tooltip = build_tooltip(pos, max_items_per_cluster=max_items_per_cluster)
125
+ net.add_node(
126
+ n_id=name,
127
+ label=name if show_labels else "",
128
+ title=tooltip,
129
+ color=color_map.get(dominant_cluster, color_map["other"]),
130
+ size=size
131
+ )
132
+ node_ids.add(name)
133
+
134
+ # Add edges with threshold filter
135
+ kept_edges = 0
136
+ for e in data.get("edges", []):
137
+ w = float(e.get("weight", 0))
138
+ if w < float(min_edge_weight):
139
+ continue
140
+ src, tgt = e.get("source"), e.get("target")
141
+ if (src in node_ids) and (tgt in node_ids):
142
+ title = f"weight={w:.2f} | shared: {', '.join(e.get('shared_skills', [])[:10])}"
143
+ net.add_edge(src, tgt, value=w, title=title)
144
+ kept_edges += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
+ # If graph ends up empty, hint the user
147
+ if len(node_ids) == 0:
148
+ html = "<h3>No nodes to show</h3><p>Loosen filters or upload a JSON.</p>"
149
+ return html
150
 
151
+ # Generate HTML
152
+ html = net.generate_html()
153
+ return html
154
 
155
+ with gr.Blocks(title="Job Position ↔ Hard Skills — Network") as demo:
156
+ gr.Markdown("# Job Position ↔ Hard Skills — Network Diagram\nUpload a JSON or place **job_position_skill_graph.json** in repo root.")
157
 
158
+ with gr.Row():
159
+ with gr.Column(scale=1):
160
+ json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
161
+ min_edge_weight = gr.Slider(0.0, 1.0, value=0.15, step=0.01, label="Min edge weight (Jaccard)")
162
+ show_labels = gr.Checkbox(value=True, label="Show node labels")
163
+ physics = gr.Checkbox(value=True, label="Enable physics layout")
164
+ layout = gr.Radio(choices=["Barnes-Hut", "ForceAtlas2Based"], value="ForceAtlas2Based", label="Layout algorithm")
165
+ node_size_mode = gr.Radio(choices=["skills-total", "skills-top10"], value="skills-total", label="Node size scale by")
166
+ max_items_per_cluster = gr.Slider(1, 20, value=6, step=1, label="Tooltip: max skills per cluster")
167
+ filter_position = gr.Textbox(value="", label="Filter by position name (substring)")
168
+ btn = gr.Button("Render", variant="primary")
169
  with gr.Column(scale=1):
170
+ out_html = gr.HTML(label="Network")
 
171
 
172
  btn.click(
173
+ fn=render_network,
174
+ inputs=[json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout],
175
+ outputs=[out_html]
 
176
  )
177
 
178
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,3 +1,2 @@
1
  gradio>=4.26.0
2
  pyvis>=0.3.2
3
- networkx>=3.2
 
1
  gradio>=4.26.0
2
  pyvis>=0.3.2