Spaces:
Sleeping
Sleeping
| \ | |
| import os | |
| import json | |
| import math | |
| import gradio as gr | |
| from pyvis.network import Network | |
| DEFAULT_JSON = "job_position_skill_graph.json" # Put this file at repo root | |
| # Color palette for clusters (fallback if more clusters appear) | |
| CLUSTER_COLORS = [ | |
| "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", | |
| "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf" | |
| ] | |
| def load_graph_json(json_file): | |
| """ | |
| Load JSON either from uploaded file or from DEFAULT_JSON if present. | |
| Expected schema: | |
| { | |
| "positions": [{"name": "...","skills": {"cluster":[{"name":"skill","count":int},...]...}}], | |
| "edges": [{"source":"...","target":"...","weight":float,"shared_skills":[...]}] | |
| } | |
| """ | |
| if json_file is not None: | |
| # gr.File may pass a tempfile path string or a file object | |
| path = json_file.name if hasattr(json_file, "name") else json_file | |
| with open(path, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| if os.path.exists(DEFAULT_JSON): | |
| with open(DEFAULT_JSON, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| raise gr.Error("No JSON provided and default file not found. Please upload job_position_skill_graph.json.") | |
| def infer_node_cluster_and_size(position, node_size_mode): | |
| """ | |
| Infer dominant cluster for coloring; compute base node size. | |
| node_size_mode: 'skills-total' or 'skills-top10' | |
| """ | |
| skills_by_cluster = position.get("skills", {}) | |
| # Aggregate counts per cluster | |
| cluster_scores = {} | |
| total_skills_count = 0 | |
| for cl, items in skills_by_cluster.items(): | |
| s = sum(max(0, int(it.get("count", 0))) for it in items) | |
| cluster_scores[cl] = s | |
| total_skills_count += s | |
| if not cluster_scores: | |
| return ("other", 10) | |
| # Dominant cluster | |
| dominant = max(cluster_scores.items(), key=lambda x: x[1])[0] | |
| if node_size_mode == "skills-top10": | |
| # Sum only top 10 across clusters | |
| acc = 0 | |
| for cl, items in skills_by_cluster.items(): | |
| for it in sorted(items, key=lambda x: -int(x.get("count", 0)))[:10]: | |
| acc += int(it.get("count", 0)) | |
| size = acc | |
| else: | |
| size = total_skills_count | |
| # Map size to a reasonable node size (10..60) | |
| if size <= 0: | |
| return (dominant, 10) | |
| # sqrt scale to compress big ranges | |
| scaled = 10 + min(50, 5 * math.sqrt(size)) | |
| return (dominant, scaled) | |
| def build_tooltip(position, max_items_per_cluster=6): | |
| """ | |
| Build HTML tooltip listing top skills per cluster. | |
| """ | |
| name = position.get("name", "") | |
| skills_by_cluster = position.get("skills", {}) | |
| parts = [f"<b>{name}</b>"] | |
| for cl, items in skills_by_cluster.items(): | |
| if not items: | |
| continue | |
| top = sorted(items, key=lambda x: -int(x.get('count', 0)))[:max_items_per_cluster] | |
| inner = ", ".join([f"{it.get('name','')} ({int(it.get('count',0))})" for it in top]) | |
| parts.append(f"<div><b>{cl}:</b> {inner}</div>") | |
| return "<br/>".join(parts) | |
| def render_network(json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout): | |
| data = load_graph_json(json_file) | |
| # Prepare pyvis network | |
| net = Network(height="720px", width="100%", bgcolor="#ffffff", font_color="#111111", directed=False, cdn_resources="in_line") | |
| # Physics options | |
| if physics: | |
| if layout == "Barnes-Hut": | |
| net.barnes_hut() | |
| else: | |
| # ForceAtlas2Based may look nice for dense graphs | |
| net.force_atlas_2based() | |
| else: | |
| net.set_options(""" | |
| var options = { physics: { enabled: false } }; | |
| """) | |
| # Build cluster -> color map based on encountered clusters | |
| cluster_names = [] | |
| for pos in data.get("positions", []): | |
| for cl in (pos.get("skills") or {}).keys(): | |
| if cl not in cluster_names: | |
| cluster_names.append(cl) | |
| color_map = {} | |
| for idx, cl in enumerate(cluster_names): | |
| color_map[cl] = CLUSTER_COLORS[idx % len(CLUSTER_COLORS)] | |
| color_map.setdefault("other", "#888888") | |
| # Optional position name filter (substring, case-insensitive) | |
| filter_position = (filter_position or "").strip().lower() | |
| # Add nodes | |
| node_ids = set() | |
| for pos in data.get("positions", []): | |
| name = pos.get("name", "") | |
| if filter_position and filter_position not in name.lower(): | |
| continue | |
| dominant_cluster, size = infer_node_cluster_and_size(pos, node_size_mode) | |
| tooltip = build_tooltip(pos, max_items_per_cluster=max_items_per_cluster) | |
| net.add_node( | |
| n_id=name, | |
| label=name if show_labels else "", | |
| title=tooltip, | |
| color=color_map.get(dominant_cluster, color_map["other"]), | |
| size=size | |
| ) | |
| node_ids.add(name) | |
| # Add edges with threshold filter | |
| kept_edges = 0 | |
| for e in data.get("edges", []): | |
| w = float(e.get("weight", 0)) | |
| if w < float(min_edge_weight): | |
| continue | |
| src, tgt = e.get("source"), e.get("target") | |
| if (src in node_ids) and (tgt in node_ids): | |
| title = f"weight={w:.2f} | shared: {', '.join(e.get('shared_skills', [])[:10])}" | |
| net.add_edge(src, tgt, value=w, title=title) | |
| kept_edges += 1 | |
| # If graph ends up empty, hint the user | |
| if len(node_ids) == 0: | |
| html = "<h3>No nodes to show</h3><p>Loosen filters or upload a JSON.</p>" | |
| return html | |
| # Generate HTML | |
| html = net.generate_html() | |
| return html | |
| with gr.Blocks(title="Job Position ↔ Hard Skills — Network") as demo: | |
| gr.Markdown("# Job Position ↔ Hard Skills — Network Diagram\nUpload a JSON or place **job_position_skill_graph.json** in repo root.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| json_file = gr.File(label="Upload job_position_skill_graph.json (optional)", file_count="single", file_types=[".json"]) | |
| min_edge_weight = gr.Slider(0.0, 1.0, value=0.15, step=0.01, label="Min edge weight (Jaccard)") | |
| show_labels = gr.Checkbox(value=True, label="Show node labels") | |
| physics = gr.Checkbox(value=True, label="Enable physics layout") | |
| layout = gr.Radio(choices=["Barnes-Hut", "ForceAtlas2Based"], value="ForceAtlas2Based", label="Layout algorithm") | |
| node_size_mode = gr.Radio(choices=["skills-total", "skills-top10"], value="skills-total", label="Node size scale by") | |
| max_items_per_cluster = gr.Slider(1, 20, value=6, step=1, label="Tooltip: max skills per cluster") | |
| filter_position = gr.Textbox(value="", label="Filter by position name (substring)") | |
| btn = gr.Button("Render", variant="primary") | |
| with gr.Column(scale=1): | |
| out_html = gr.HTML(label="Network") | |
| btn.click( | |
| fn=render_network, | |
| inputs=[json_file, min_edge_weight, show_labels, physics, max_items_per_cluster, node_size_mode, filter_position, layout], | |
| outputs=[out_html] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |