File size: 11,557 Bytes
a29e2b2
 
 
e99665e
a29e2b2
 
e99665e
f728c11
844f88a
e99665e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1048569
 
a29e2b2
1048569
 
e99665e
 
1048569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e99665e
 
 
1048569
f728c11
e99665e
 
f728c11
e99665e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
844f88a
1048569
e99665e
 
 
 
 
 
1048569
e99665e
 
 
 
 
 
 
a29e2b2
f728c11
1048569
f728c11
e99665e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1048569
 
e99665e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f728c11
e99665e
 
 
1048569
e99665e
1048569
 
 
 
 
 
 
 
 
 
 
 
e99665e
 
1048569
 
 
 
 
e99665e
1048569
 
e99665e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1048569
 
e99665e
 
 
 
 
 
a29e2b2
844f88a
e99665e
844f88a
e99665e
 
 
 
 
 
 
f728c11
e99665e
 
a29e2b2
 
f728c11
e99665e
 
a29e2b2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
\
import os
import json
import math
import gradio as gr

import networkx as nx
from pyvis.network import Network

DEFAULT_JSON = "job_position_skill_graph.json"

CLUSTER_COLORS = {
    "programming": "#1f77b4",
    "databases": "#ff7f0e",
    "cloud": "#2ca02c",
    "devops": "#d62728",
    "version_control": "#9467bd",
    "data_processing": "#8c564b",
    "ml_ai": "#e377c2",
    "web_backend": "#7f7f7f",
    "web_frontend": "#bcbd22",
    "security": "#17becf",
    "networking": "#1b9e77",
    "mobile": "#d95f02",
    "analytics_bi": "#7570b3",
    "testing_qc": "#e7298a",
    "infra_sys": "#66a61e",
    "other": "#999999",
}

def _load_json(file_obj):
    if file_obj is not None:
        with open(file_obj.name, "r", encoding="utf-8") as f:
            return json.load(f)
    if os.path.exists(DEFAULT_JSON):
        with open(DEFAULT_JSON, "r", encoding="utf-8") as f:
            return json.load(f)
    raise gr.Error("No JSON provided and default file not found. Upload or place job_position_skill_graph.json in repo root.")

def _normalize_schema(data):
    """
    Accepts multiple schemas and converts to internal format:
    {
      "positions": [{"name": "...","skills": {"cluster":[{"name":"...","count":N}, ...], ...}}, ...],
      "edges": [{"source":"...","target":"...","weight":0.2,"shared_skills":[...]}]
    }
    Supported inputs:
      A) Internal format (pass-through)
      B) positions as a dict -> convert to list
      C) Top-level dict mapping position_name -> {"name": "...", "skills": {...}} or {"skills":[...]}
      D) Top-level dict mapping position_name -> {"skills": {"cluster":[...]} } (no edges)
    """
    if not isinstance(data, dict):
        raise gr.Error("JSON root must be an object.")

    # Case A: already in internal format with positions as list
    if "positions" in data and isinstance(data["positions"], list):
        return data

    norm = {"positions": [], "edges": data.get("edges", []) if isinstance(data.get("edges", []), list) else []}

    # Case B: positions is a dict
    if "positions" in data and isinstance(data["positions"], dict):
        for pos_name, pos_val in data["positions"].items():
            if isinstance(pos_val, dict):
                name = pos_val.get("name") or pos_name
                skills = pos_val.get("skills", {})
            else:
                name = str(pos_name)
                skills = {}
            norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
        return norm

    # Case C/D: top-level keys (excluding known keys) are positions
    excluded = {"positions", "edges"}
    candidates = {k: v for k, v in data.items() if k not in excluded}
    if candidates:
        for pos_name, pos_val in candidates.items():
            if isinstance(pos_val, dict):
                name = pos_val.get("name") or pos_name
                skills = pos_val.get("skills", {})
            elif isinstance(pos_val, list):
                # interpret as flat skills list -> put under "other" cluster with count=1
                skills = {"other": [{"name": s, "count": 1} for s in pos_val]}
                name = pos_name
            else:
                name = pos_name
                skills = {}
            norm["positions"].append({"name": name, "skills": _coerce_skills(skills)})
        return norm

    raise gr.Error("Unrecognized JSON schema. Include 'positions' or a mapping of position names.")

def _coerce_skills(skills):
    """
    Ensure skills structure is {cluster: [{"name":..., "count": int}, ...], ...}
    Accepts:
      - dict of cluster -> list of dicts with name/count
      - dict of cluster -> list of strings (count=1)
      - list of strings -> will be wrapped into {'other': [...]}
    """
    if isinstance(skills, list):
        return {"other": [{"name": str(s), "count": 1} for s in skills]}
    if isinstance(skills, dict):
        out = {}
        for cl, items in skills.items():
            if isinstance(items, list):
                norm_items = []
                for it in items:
                    if isinstance(it, dict):
                        nm = str(it.get("name", "")).strip()
                        if not nm:
                            continue
                        cnt = int(it.get("count", 1))
                        norm_items.append({"name": nm, "count": cnt})
                    else:
                        nm = str(it).strip()
                        if not nm:
                            continue
                        norm_items.append({"name": nm, "count": 1})
                if norm_items:
                    out[cl or "other"] = norm_items
        return out
    return {}

def _aggregate_skill_totals(data):
    totals = {}
    for pos in data.get("positions", []):
        for cluster, items in (pos.get("skills") or {}).items():
            for it in items:
                name, cnt = it.get("name"), int(it.get("count", 0))
                if not name:
                    continue
                if name not in totals:
                    totals[name] = {"total": 0, "clusters": set()}
                totals[name]["total"] += cnt
                totals[name]["clusters"].add(cluster or "other")
    for k, v in totals.items():
        clusters = list(v["clusters"])
        v["cluster"] = clusters[0] if clusters else "other"
    return totals

def _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min):
    G = nx.Graph()

    for pos in data.get("positions", []):
        pos_name = pos.get("name")
        if not pos_name:
            continue
        total_skills = sum(len(v) for v in (pos.get("skills") or {}).values())
        G.add_node(
            f"pos::{pos_name}",
            label=pos_name,
            kind="position",
            size=max(15, min(60, 10 + 2*total_skills)),
            color="#333333",
            title=f"<b>{pos_name}</b><br/>skills groups: {list((pos.get('skills') or {}).keys())}",
        )

    skill_totals = _aggregate_skill_totals(data)

    for pos in data.get("positions", []):
        pos_name = pos.get("name")
        if not pos_name:
            continue
        flat = []
        for cluster, items in (pos.get("skills") or {}).items():
            for it in items:
                if int(it.get("count", 0)) >= min_skill_count:
                    flat.append((cluster or "other", it["name"], int(it["count"])))
        if top_k_per_position and top_k_per_position > 0:
            flat = sorted(flat, key=lambda x: -x[2])[: top_k_per_position]

        for cluster, skill, cnt in flat:
            node_id = f"skill::{skill}"
            if node_id not in G:
                total = skill_totals.get(skill, {}).get("total", cnt)
                node_size = max(8, min(50, 6 + math.sqrt(total)*2))
                color = CLUSTER_COLORS.get(cluster, "#999999")
                G.add_node(
                    node_id,
                    label=skill,
                    kind="skill",
                    size=node_size,
                    color=color,
                    title=f"<b>{skill}</b><br/>cluster: {cluster}<br/>total: {total}",
                )
            G.add_edge(
                f"pos::{pos_name}",
                node_id,
                weight=cnt,
                title=f"{pos_name}{skill}: {cnt}",
            )

    if include_pos_pos_edges:
        for e in data.get("edges", []):
            if not isinstance(e, dict):
                continue
            w = float(e.get("weight", 0.0))
            if w < pos_pos_weight_min:
                continue
            a = f"pos::{e.get('source')}"
            b = f"pos::{e.get('target')}"
            if a in G and b in G:
                G.add_edge(a, b, weight=max(1, int(w*10)), color="#555555", dashes=True, title=f"similarity: {w}")
    return G

def _nx_to_pyvis_html(G, physics, layout, height_px):
    net = Network(
        height=f"{height_px}px",
        width="100%",
        bgcolor="#ffffff",
        font_color="#222222",
        directed=False,
        notebook=False,
    )
    if physics:
        net.force_atlas_2based()

    # PyVis expects pure JSON (no 'var options =')
    if layout == "hierarchical (positions → skills)":
        options = {
            "layout": {
                "hierarchical": {
                    "enabled": True,
                    "levelSeparation": 180,
                    "nodeSpacing": 170,
                    "treeSpacing": 200,
                    "direction": "UD",
                    "sortMethod": "hubsize"
                }
            },
            "physics": {"enabled": bool(physics)}
        }
    else:
        options = {
            "physics": {
                "enabled": bool(physics),
                "stabilization": {"iterations": 150}
            }
        }
    import json as _json
    net.set_options(_json.dumps(options))

    for n, data in G.nodes(data=True):
        net.add_node(
            n,
            label=data.get("label", n),
            color=data.get("color", "#97c2fc"),
            title=data.get("title", ""),
            size=data.get("size", 15),
            shape="dot" if data.get("kind") == "skill" else "ellipse",
        )
    for u, v, edata in G.edges(data=True):
        net.add_edge(u, v, title=edata.get("title", ""), value=edata.get("weight", 1), color=edata.get("color"))

    return net.generate_html()

def run(json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px):
    data_raw = _load_json(json_file)
    data = _normalize_schema(data_raw)
    G = _build_graph(data, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min)
    html = _nx_to_pyvis_html(G, physics=physics, layout=layout, height_px=height_px)
    return html

with gr.Blocks(title="Job Positions ↔ Hard Skills — Network Diagram") as demo:
    gr.Markdown("# Network Diagram: Positions ↔ Skills\\nUpload `job_position_skill_graph.json` or place it in the repo root.\\n- **Black ovals** = Job positions\\n- **Colored dots** = Skills (color by cluster)\\n- Edge weight = frequency of skill in that position")

    with gr.Row():
        with gr.Column(scale=1):
            json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"])
            min_skill_count = gr.Slider(0, 50, value=5, step=1, label="Minimum skill count per position (filter noise)")
            top_k_per_position = gr.Slider(0, 100, value=20, step=1, label="Top-K skills per position (0 = all)")
            include_pos_pos_edges = gr.Checkbox(value=False, label="Include position↔position similarity edges")
            pos_pos_weight_min = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Min similarity (if enabled)")
            physics = gr.Checkbox(value=True, label="Enable physics (force layout)")
            layout = gr.Dropdown(choices=["free (force layout)", "hierarchical (positions → skills)"], value="free (force layout)", label="Layout")
            height_px = gr.Slider(500, 1400, value=900, step=50, label="Canvas height (px)")
            btn = gr.Button("Build Network", variant="primary")
        with gr.Column(scale=1):
            out_html = gr.HTML(label="Interactive Network")

    btn.click(
        fn=run,
        inputs=[json_file, min_skill_count, top_k_per_position, include_pos_pos_edges, pos_pos_weight_min, physics, layout, height_px],
        outputs=[out_html]
    )

if __name__ == "__main__":
    demo.launch()