Spaces:
Running
Running
| """๊ธฐ์ ์คํ ๊ณต๋ ์ถํ ๋คํธ์ํฌ ๋ถ์ (NetworkX).""" | |
| import sqlite3 | |
| from itertools import combinations | |
| from collections import Counter | |
| import networkx as nx | |
| import pandas as pd | |
| def build_cooccurrence_graph( | |
| conn: sqlite3.Connection, | |
| category: str | None = None, | |
| min_cooccur: int = 3, | |
| ) -> nx.Graph: | |
| """ | |
| ๊ฐ์ ๊ณต๊ณ ์ ํจ๊ป ๋ฑ์ฅํ๋ ์คํฌ ์์ผ๋ก ๊ทธ๋ํ ๊ตฌ์ฑ. | |
| Args: | |
| category: None์ด๋ฉด ์ ์ฒด ์ง๊ตฐ | |
| min_cooccur: ์ต์ ๊ณต๋ ์ถํ ํ์ (์ฃ์ง ํํฐ) | |
| Returns: | |
| G: nodes = ์คํฌ, edges = (์คํฌA, ์คํฌB, weight=๊ณต๋์ถํ์) | |
| """ | |
| where = "WHERE j.is_active = 1" | |
| params: list = [] | |
| if category: | |
| where += " AND j.job_category = ?" | |
| params.append(category) | |
| rows = pd.read_sql_query( | |
| f""" | |
| SELECT js.job_id, js.skill_name | |
| FROM job_skills js | |
| JOIN jobs j ON js.job_id = j.id | |
| {where} | |
| """, | |
| conn, | |
| params=params, | |
| ) | |
| # ๊ณต๊ณ ๋ณ ์คํฌ ์งํฉ | |
| job_skills: dict[int, set[str]] = ( | |
| rows.groupby("job_id")["skill_name"] | |
| .apply(set) | |
| .to_dict() | |
| ) | |
| # ๊ณต๋ ์ถํ ์นด์ดํธ | |
| cooccur: Counter = Counter() | |
| for skills in job_skills.values(): | |
| skill_list = sorted(skills) | |
| for a, b in combinations(skill_list, 2): | |
| cooccur[(a, b)] += 1 | |
| # ๋ ธ๋ ๋น๋ (๊ฐ ์คํฌ์ด ๋ช ๊ฐ ๊ณต๊ณ ์ ๋ฑ์ฅํ๋์ง) | |
| node_freq: Counter = Counter() | |
| for skills in job_skills.values(): | |
| node_freq.update(skills) | |
| G = nx.Graph() | |
| for skill, freq in node_freq.items(): | |
| G.add_node(skill, frequency=freq) | |
| for (a, b), weight in cooccur.items(): | |
| if weight >= min_cooccur: | |
| G.add_edge(a, b, weight=weight) | |
| return G | |
| def get_top_central_skills(G: nx.Graph, top_n: int = 15) -> pd.DataFrame: | |
| """ | |
| ์ค์ฌ์ฑ ๊ธฐ๋ฐ ์ฃผ์ ์คํฌ ์ถ์ถ. | |
| Returns: columns = [skill, degree_centrality, betweenness, frequency] | |
| """ | |
| if len(G.nodes) == 0: | |
| return pd.DataFrame(columns=["skill", "degree_centrality", "betweenness", "frequency"]) | |
| degree_c = nx.degree_centrality(G) | |
| between_c = nx.betweenness_centrality(G, weight="weight") | |
| records = [ | |
| { | |
| "skill": node, | |
| "degree_centrality": round(degree_c[node], 4), | |
| "betweenness": round(between_c[node], 4), | |
| "frequency": G.nodes[node].get("frequency", 0), | |
| } | |
| for node in G.nodes | |
| ] | |
| return ( | |
| pd.DataFrame(records) | |
| .sort_values("degree_centrality", ascending=False) | |
| .head(top_n) | |
| .reset_index(drop=True) | |
| ) | |
| def graph_to_plotly_traces(G: nx.Graph) -> tuple[list, list]: | |
| """ | |
| Plotly scatter ํ์์ผ๋ก ๋ณํ (๋์๋ณด๋์ฉ). | |
| Returns: (edge_traces, node_traces) โ Plotly go.Scatter ๋ฐ์ดํฐ | |
| """ | |
| import plotly.graph_objects as go | |
| pos = nx.spring_layout(G, seed=42, k=0.8) | |
| # ์ฃ์ง | |
| edge_x, edge_y = [], [] | |
| for u, v in G.edges(): | |
| x0, y0 = pos[u] | |
| x1, y1 = pos[v] | |
| edge_x += [x0, x1, None] | |
| edge_y += [y0, y1, None] | |
| edge_trace = go.Scatter( | |
| x=edge_x, y=edge_y, | |
| mode="lines", | |
| line=dict(width=0.5, color="#aaa"), | |
| hoverinfo="none", | |
| name="connections", | |
| ) | |
| # ๋ ธ๋ | |
| node_x = [pos[n][0] for n in G.nodes] | |
| node_y = [pos[n][1] for n in G.nodes] | |
| node_text = list(G.nodes) | |
| node_size = [max(8, G.nodes[n].get("frequency", 1) ** 0.6) for n in G.nodes] | |
| node_trace = go.Scatter( | |
| x=node_x, y=node_y, | |
| mode="markers+text", | |
| text=node_text, | |
| textposition="top center", | |
| marker=dict( | |
| size=node_size, | |
| color=[G.degree(n) for n in G.nodes], | |
| colorscale="Viridis", | |
| showscale=True, | |
| colorbar=dict(title="์ฐ๊ฒฐ ์"), | |
| ), | |
| hovertemplate="<b>%{text}</b><br>๊ณต๊ณ ์: %{marker.size}<extra></extra>", | |
| name="skills", | |
| ) | |
| return [edge_trace], [node_trace] | |