Nucha commited on
Commit
a29e2b2
·
verified ·
1 Parent(s): 98b2a97

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +226 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \
2
+ import os
3
+ import json
4
+ from typing import Dict, Any, List, Tuple
5
+ import gradio as gr
6
+
7
+ # Network rendering
8
+ from pyvis.network import Network
9
+
10
+ DEFAULT_JSON = "job_skill_network.json" # Place this file in the Space repo root
11
+
12
+ def _load_graph(file_obj) -> Dict[str, Any]:
13
+ """
14
+ Load JSON from uploaded file or from DEFAULT_JSON if present.
15
+ Expect keys: 'nodes': [{'id','label','type',...}], 'edges': [{'source','target','type','weight',...}]
16
+ """
17
+ if file_obj is not None:
18
+ with open(file_obj.name if hasattr(file_obj, "name") else file_obj, "r", encoding="utf-8") as f:
19
+ return json.load(f)
20
+ if os.path.exists(DEFAULT_JSON):
21
+ with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
22
+ return json.load(f)
23
+ raise gr.Error("No JSON provided and default file not found. Please upload job_skill_network.json.")
24
+
25
+ def _split_nodes(nodes: List[Dict[str, Any]]):
26
+ jobs = [n for n in nodes if str(n.get("type","")).lower() == "job"]
27
+ skills = [n for n in nodes if str(n.get("type","")).lower() == "skill"]
28
+ return jobs, skills
29
+
30
+ def _index_nodes(nodes: List[Dict[str, Any]]):
31
+ return {n["id"]: n for n in nodes}
32
+
33
+ def _filter_graph(graph: Dict[str, Any],
34
+ include_requires: bool,
35
+ include_similar: bool,
36
+ min_weight: int,
37
+ top_n_jobs: int,
38
+ keep_outside_similar: bool,
39
+ include_job_nodes: bool,
40
+ include_skill_nodes: bool) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
41
+ nodes = graph.get("nodes", [])
42
+ edges = graph.get("edges", [])
43
+ jobs, skills = _split_nodes(nodes)
44
+
45
+ # Sort jobs by postings desc (fallback to degree if missing)
46
+ def _postings(n):
47
+ try:
48
+ return int(n.get("postings", 0))
49
+ except Exception:
50
+ return 0
51
+
52
+ jobs_sorted = sorted(jobs, key=_postings, reverse=True)
53
+ selected_job_ids = set([n["id"] for n in jobs_sorted[:max(1, int(top_n_jobs))]]) if include_job_nodes else set()
54
+
55
+ # Start with required edge types
56
+ selected_edges = []
57
+ for e in edges:
58
+ et = str(e.get("type","")).lower()
59
+ if et == "requires" and include_requires and int(e.get("weight", 1)) >= int(min_weight):
60
+ # only keep if we include job nodes and skill nodes
61
+ if include_job_nodes or include_skill_nodes:
62
+ selected_edges.append(e)
63
+ elif et == "similar" and include_similar and int(e.get("weight", 1)) >= int(min_weight):
64
+ selected_edges.append(e)
65
+
66
+ # Build node id set from edges, but restricted to selected jobs for "requires" if we have a top_n constraint
67
+ node_ids = set()
68
+ for e in selected_edges:
69
+ s, t, et = e.get("source"), e.get("target"), str(e.get("type","")).lower()
70
+ if et == "requires" and selected_job_ids:
71
+ # retain requires edges only if job is in top N (either as source or target, depending direction)
72
+ if (s in selected_job_ids) or (t in selected_job_ids):
73
+ node_ids.update([s, t])
74
+ else:
75
+ node_ids.update([s, t])
76
+
77
+ # If include_similar is True and keep_outside_similar is True, add jobs similar to selected jobs even if outside top N
78
+ if include_similar and keep_outside_similar and selected_job_ids:
79
+ for e in selected_edges:
80
+ if str(e.get("type","")).lower() != "similar":
81
+ continue
82
+ s, t = e.get("source"), e.get("target")
83
+ if (s in selected_job_ids) or (t in selected_job_ids):
84
+ node_ids.update([s, t])
85
+
86
+ # Build final node list according to include flags
87
+ node_map = _index_nodes(nodes)
88
+ final_nodes = []
89
+ for nid in list(node_ids):
90
+ n = node_map.get(nid)
91
+ if not n:
92
+ continue
93
+ ntype = str(n.get("type","")).lower()
94
+ if (ntype == "job" and include_job_nodes) or (ntype == "skill" and include_skill_nodes):
95
+ final_nodes.append(n)
96
+
97
+ # Filter edges to keep only those whose endpoints remain
98
+ final_ids = set(n["id"] for n in final_nodes)
99
+ final_edges = [e for e in selected_edges if e.get("source") in final_ids and e.get("target") in final_ids]
100
+
101
+ return final_nodes, final_edges
102
+
103
+ def _build_pyvis_html(nodes: List[Dict[str, Any]], edges: List[Dict[str, Any]], physics: bool, hierarchical: bool):
104
+ # Create network
105
+ net = Network(height="720px", width="100%", directed=False, notebook=False)
106
+ net.barnes_hut() # default physics model
107
+
108
+ # Add nodes
109
+ for n in nodes:
110
+ nid = n["id"]
111
+ label = str(n.get("label", nid))
112
+ ntype = str(n.get("type","")).lower()
113
+ title = f"{ntype.upper()} | {label}"
114
+ size = 12
115
+ shape = "dot"
116
+ if ntype == "job":
117
+ size = 18 + int(n.get("postings", 0)) * 0.1 # scale by postings
118
+ shape = "ellipse"
119
+ elif ntype == "skill":
120
+ size = 8
121
+
122
+ net.add_node(nid, label=label, title=title, group=ntype, shape=shape, value=size)
123
+
124
+ # Add edges
125
+ for e in edges:
126
+ s, t = e.get("source"), e.get("target")
127
+ et = str(e.get("type",""))
128
+ weight = int(e.get("weight", 1))
129
+ title = f"{et} (w={weight})"
130
+ net.add_edge(s, t, title=title, value=weight)
131
+
132
+ # Options
133
+ options = {
134
+ "physics": {"enabled": bool(physics)},
135
+ "interaction": {"hover": True, "multiselect": True, "dragNodes": True},
136
+ "nodes": {"font": {"size": 14}},
137
+ "edges": {"smooth": {"type": "dynamic"}}
138
+ }
139
+ if hierarchical:
140
+ # Simple hierarchical layout (works better when dominated by job->skill edges)
141
+ options["layout"] = {
142
+ "hierarchical": {
143
+ "enabled": True,
144
+ "levelSeparation": 120,
145
+ "nodeSpacing": 120,
146
+ "treeSpacing": 180,
147
+ "direction": "UD",
148
+ "sortMethod": "hubsize"
149
+ }
150
+ }
151
+ # When hierarchical, physics should usually be off to avoid jitter
152
+ options["physics"]["enabled"] = False
153
+
154
+ net.set_options(json.dumps(options))
155
+
156
+ # Return html string (include vis.js assets inline)
157
+ return net.generate_html()
158
+
159
+ def build_network(
160
+ json_file,
161
+ include_requires,
162
+ include_similar,
163
+ min_weight,
164
+ top_n_jobs,
165
+ keep_outside_similar,
166
+ include_job_nodes,
167
+ include_skill_nodes,
168
+ physics,
169
+ hierarchical
170
+ ):
171
+ graph = _load_graph(json_file)
172
+ nodes, edges = _filter_graph(graph, include_requires, include_similar, int(min_weight), int(top_n_jobs),
173
+ bool(keep_outside_similar), bool(include_job_nodes), bool(include_skill_nodes))
174
+
175
+ if not nodes or not edges:
176
+ raise gr.Error("No nodes/edges remain after filtering. Try lowering the filter or including more edge types.")
177
+
178
+ html = _build_pyvis_html(nodes, edges, physics, hierarchical)
179
+
180
+ # Save to a temp file so users can download
181
+ out_name = f"network_{uuid.uuid4().hex[:8]}.html"
182
+ with open(out_name, "w", encoding="utf-8") as f:
183
+ f.write(html)
184
+
185
+ return gr.update(value=html), out_name
186
+
187
+ with gr.Blocks(title="Job ↔ Hard Skill Network") as demo:
188
+ gr.Markdown("# Job ↔ Hard Skill Network Diagram\n"
189
+ "Upload `job_skill_network.json` or place it at repo root.")
190
+
191
+ with gr.Row():
192
+ with gr.Column(scale=1):
193
+ json_file = gr.File(label="Upload JSON (optional)", file_count="single", file_types=[".json"])
194
+
195
+ gr.Markdown("### Include Edge Types")
196
+ include_requires = gr.Checkbox(value=True, label="Job–Skill edges (type='requires')")
197
+ include_similar = gr.Checkbox(value=True, label="Job–Job edges (type='similar')")
198
+
199
+ gr.Markdown("### Filters")
200
+ min_weight = gr.Slider(0, 50, value=3, step=1, label="Minimum edge weight")
201
+ top_n_jobs = gr.Slider(1, 100, value=30, step=1, label="Top-N job nodes by postings")
202
+ keep_outside_similar = gr.Checkbox(value=True, label="Include similar jobs outside Top-N")
203
+
204
+ gr.Markdown("### Node Types")
205
+ include_job_nodes = gr.Checkbox(value=True, label="Include job nodes")
206
+ include_skill_nodes = gr.Checkbox(value=True, label="Include skill nodes")
207
+
208
+ gr.Markdown("### Layout")
209
+ physics = gr.Checkbox(value=True, label="Enable physics")
210
+ hierarchical = gr.Checkbox(value=False, label="Hierarchical layout (good for Job→Skill)")
211
+
212
+ btn = gr.Button("Build Network", variant="primary")
213
+
214
+ with gr.Column(scale=1):
215
+ html_view = gr.HTML(label="Interactive Network (PyVis)")
216
+ html_file = gr.File(label="Download HTML")
217
+
218
+ btn.click(
219
+ fn=build_network,
220
+ inputs=[json_file, include_requires, include_similar, min_weight, top_n_jobs, keep_outside_similar,
221
+ include_job_nodes, include_skill_nodes, physics, hierarchical],
222
+ outputs=[html_view, html_file]
223
+ )
224
+
225
+ if __name__ == "__main__":
226
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=4.26.0
2
+ pyvis>=0.3.2
3
+ networkx>=3.2