|
|
"""Function for detecting communities based on Louvain Community Detection |
|
|
Algorithm""" |
|
|
|
|
|
from collections import defaultdict, deque |
|
|
|
|
|
import networkx as nx |
|
|
from networkx.algorithms.community import modularity |
|
|
from networkx.utils import py_random_state |
|
|
|
|
|
__all__ = ["louvain_communities", "louvain_partitions"] |
|
|
|
|
|
|
|
|
@py_random_state("seed") |
|
|
@nx._dispatch(edge_attrs="weight") |
|
|
def louvain_communities( |
|
|
G, weight="weight", resolution=1, threshold=0.0000001, seed=None |
|
|
): |
|
|
r"""Find the best partition of a graph using the Louvain Community Detection |
|
|
Algorithm. |
|
|
|
|
|
Louvain Community Detection Algorithm is a simple method to extract the community |
|
|
structure of a network. This is a heuristic method based on modularity optimization. [1]_ |
|
|
|
|
|
The algorithm works in 2 steps. On the first step it assigns every node to be |
|
|
in its own community and then for each node it tries to find the maximum positive |
|
|
modularity gain by moving each node to all of its neighbor communities. If no positive |
|
|
gain is achieved the node remains in its original community. |
|
|
|
|
|
The modularity gain obtained by moving an isolated node $i$ into a community $C$ can |
|
|
easily be calculated by the following formula (combining [1]_ [2]_ and some algebra): |
|
|
|
|
|
.. math:: |
|
|
\Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2} |
|
|
|
|
|
where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links |
|
|
from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$, |
|
|
$\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$ |
|
|
is the resolution parameter. |
|
|
|
|
|
For the directed case the modularity gain can be computed using this formula according to [3]_ |
|
|
|
|
|
.. math:: |
|
|
\Delta Q = \frac{k_{i,in}}{m} |
|
|
- \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2} |
|
|
|
|
|
where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and |
|
|
$\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident |
|
|
to nodes in $C$. |
|
|
|
|
|
The first phase continues until no individual move can improve the modularity. |
|
|
|
|
|
The second phase consists in building a new network whose nodes are now the communities |
|
|
found in the first phase. To do so, the weights of the links between the new nodes are given by |
|
|
the sum of the weight of the links between nodes in the corresponding two communities. Once this |
|
|
phase is complete it is possible to reapply the first phase creating bigger communities with |
|
|
increased modularity. |
|
|
|
|
|
The above two phases are executed until no modularity gain is achieved (or is less than |
|
|
the `threshold`). |
|
|
|
|
|
Be careful with self-loops in the input graph. These are treated as |
|
|
previously reduced communities -- as if the process had been started |
|
|
in the middle of the algorithm. Large self-loop edge weights thus |
|
|
represent strong communities and in practice may be hard to add |
|
|
other nodes to. If your input graph edge weights for self-loops |
|
|
do not represent already reduced communities you may want to remove |
|
|
the self-loops before inputting that graph. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
G : NetworkX graph |
|
|
weight : string or None, optional (default="weight") |
|
|
The name of an edge attribute that holds the numerical value |
|
|
used as a weight. If None then each edge has weight 1. |
|
|
resolution : float, optional (default=1) |
|
|
If resolution is less than 1, the algorithm favors larger communities. |
|
|
Greater than 1 favors smaller communities |
|
|
threshold : float, optional (default=0.0000001) |
|
|
Modularity gain threshold for each level. If the gain of modularity |
|
|
between 2 levels of the algorithm is less than the given threshold |
|
|
then the algorithm stops and returns the resulting communities. |
|
|
seed : integer, random_state, or None (default) |
|
|
Indicator of random number generation state. |
|
|
See :ref:`Randomness<randomness>`. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
list |
|
|
A list of sets (partition of `G`). Each set represents one community and contains |
|
|
all the nodes that constitute it. |
|
|
|
|
|
Examples |
|
|
-------- |
|
|
>>> import networkx as nx |
|
|
>>> G = nx.petersen_graph() |
|
|
>>> nx.community.louvain_communities(G, seed=123) |
|
|
[{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}] |
|
|
|
|
|
Notes |
|
|
----- |
|
|
The order in which the nodes are considered can affect the final output. In the algorithm |
|
|
the ordering happens using a random shuffle. |
|
|
|
|
|
References |
|
|
---------- |
|
|
.. [1] Blondel, V.D. et al. Fast unfolding of communities in |
|
|
large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008 |
|
|
.. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing |
|
|
well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z |
|
|
.. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks. |
|
|
[Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784 |
|
|
|
|
|
See Also |
|
|
-------- |
|
|
louvain_partitions |
|
|
""" |
|
|
|
|
|
d = louvain_partitions(G, weight, resolution, threshold, seed) |
|
|
q = deque(d, maxlen=1) |
|
|
return q.pop() |
|
|
|
|
|
|
|
|
@py_random_state("seed") |
|
|
@nx._dispatch(edge_attrs="weight") |
|
|
def louvain_partitions( |
|
|
G, weight="weight", resolution=1, threshold=0.0000001, seed=None |
|
|
): |
|
|
"""Yields partitions for each level of the Louvain Community Detection Algorithm |
|
|
|
|
|
Louvain Community Detection Algorithm is a simple method to extract the community |
|
|
structure of a network. This is a heuristic method based on modularity optimization. [1]_ |
|
|
|
|
|
The partitions at each level (step of the algorithm) form a dendrogram of communities. |
|
|
A dendrogram is a diagram representing a tree and each level represents |
|
|
a partition of the G graph. The top level contains the smallest communities |
|
|
and as you traverse to the bottom of the tree the communities get bigger |
|
|
and the overall modularity increases making the partition better. |
|
|
|
|
|
Each level is generated by executing the two phases of the Louvain Community |
|
|
Detection Algorithm. |
|
|
|
|
|
Be careful with self-loops in the input graph. These are treated as |
|
|
previously reduced communities -- as if the process had been started |
|
|
in the middle of the algorithm. Large self-loop edge weights thus |
|
|
represent strong communities and in practice may be hard to add |
|
|
other nodes to. If your input graph edge weights for self-loops |
|
|
do not represent already reduced communities you may want to remove |
|
|
the self-loops before inputting that graph. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
G : NetworkX graph |
|
|
weight : string or None, optional (default="weight") |
|
|
The name of an edge attribute that holds the numerical value |
|
|
used as a weight. If None then each edge has weight 1. |
|
|
resolution : float, optional (default=1) |
|
|
If resolution is less than 1, the algorithm favors larger communities. |
|
|
Greater than 1 favors smaller communities |
|
|
threshold : float, optional (default=0.0000001) |
|
|
Modularity gain threshold for each level. If the gain of modularity |
|
|
between 2 levels of the algorithm is less than the given threshold |
|
|
then the algorithm stops and returns the resulting communities. |
|
|
seed : integer, random_state, or None (default) |
|
|
Indicator of random number generation state. |
|
|
See :ref:`Randomness<randomness>`. |
|
|
|
|
|
Yields |
|
|
------ |
|
|
list |
|
|
A list of sets (partition of `G`). Each set represents one community and contains |
|
|
all the nodes that constitute it. |
|
|
|
|
|
References |
|
|
---------- |
|
|
.. [1] Blondel, V.D. et al. Fast unfolding of communities in |
|
|
large networks. J. Stat. Mech 10008, 1-12(2008) |
|
|
|
|
|
See Also |
|
|
-------- |
|
|
louvain_communities |
|
|
""" |
|
|
|
|
|
partition = [{u} for u in G.nodes()] |
|
|
if nx.is_empty(G): |
|
|
yield partition |
|
|
return |
|
|
mod = modularity(G, partition, resolution=resolution, weight=weight) |
|
|
is_directed = G.is_directed() |
|
|
if G.is_multigraph(): |
|
|
graph = _convert_multigraph(G, weight, is_directed) |
|
|
else: |
|
|
graph = G.__class__() |
|
|
graph.add_nodes_from(G) |
|
|
graph.add_weighted_edges_from(G.edges(data=weight, default=1)) |
|
|
|
|
|
m = graph.size(weight="weight") |
|
|
partition, inner_partition, improvement = _one_level( |
|
|
graph, m, partition, resolution, is_directed, seed |
|
|
) |
|
|
improvement = True |
|
|
while improvement: |
|
|
|
|
|
yield [s.copy() for s in partition] |
|
|
new_mod = modularity( |
|
|
graph, inner_partition, resolution=resolution, weight="weight" |
|
|
) |
|
|
if new_mod - mod <= threshold: |
|
|
return |
|
|
mod = new_mod |
|
|
graph = _gen_graph(graph, inner_partition) |
|
|
partition, inner_partition, improvement = _one_level( |
|
|
graph, m, partition, resolution, is_directed, seed |
|
|
) |
|
|
|
|
|
|
|
|
def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None): |
|
|
"""Calculate one level of the Louvain partitions tree |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
G : NetworkX Graph/DiGraph |
|
|
The graph from which to detect communities |
|
|
m : number |
|
|
The size of the graph `G`. |
|
|
partition : list of sets of nodes |
|
|
A valid partition of the graph `G` |
|
|
resolution : positive number |
|
|
The resolution parameter for computing the modularity of a partition |
|
|
is_directed : bool |
|
|
True if `G` is a directed graph. |
|
|
seed : integer, random_state, or None (default) |
|
|
Indicator of random number generation state. |
|
|
See :ref:`Randomness<randomness>`. |
|
|
|
|
|
""" |
|
|
node2com = {u: i for i, u in enumerate(G.nodes())} |
|
|
inner_partition = [{u} for u in G.nodes()] |
|
|
if is_directed: |
|
|
in_degrees = dict(G.in_degree(weight="weight")) |
|
|
out_degrees = dict(G.out_degree(weight="weight")) |
|
|
Stot_in = list(in_degrees.values()) |
|
|
Stot_out = list(out_degrees.values()) |
|
|
|
|
|
nbrs = {} |
|
|
for u in G: |
|
|
nbrs[u] = defaultdict(float) |
|
|
for _, n, wt in G.out_edges(u, data="weight"): |
|
|
if u != n: |
|
|
nbrs[u][n] += wt |
|
|
for n, _, wt in G.in_edges(u, data="weight"): |
|
|
if u != n: |
|
|
nbrs[u][n] += wt |
|
|
else: |
|
|
degrees = dict(G.degree(weight="weight")) |
|
|
Stot = list(degrees.values()) |
|
|
nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G} |
|
|
rand_nodes = list(G.nodes) |
|
|
seed.shuffle(rand_nodes) |
|
|
nb_moves = 1 |
|
|
improvement = False |
|
|
while nb_moves > 0: |
|
|
nb_moves = 0 |
|
|
for u in rand_nodes: |
|
|
best_mod = 0 |
|
|
best_com = node2com[u] |
|
|
weights2com = _neighbor_weights(nbrs[u], node2com) |
|
|
if is_directed: |
|
|
in_degree = in_degrees[u] |
|
|
out_degree = out_degrees[u] |
|
|
Stot_in[best_com] -= in_degree |
|
|
Stot_out[best_com] -= out_degree |
|
|
remove_cost = ( |
|
|
-weights2com[best_com] / m |
|
|
+ resolution |
|
|
* (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com]) |
|
|
/ m**2 |
|
|
) |
|
|
else: |
|
|
degree = degrees[u] |
|
|
Stot[best_com] -= degree |
|
|
remove_cost = -weights2com[best_com] / m + resolution * ( |
|
|
Stot[best_com] * degree |
|
|
) / (2 * m**2) |
|
|
for nbr_com, wt in weights2com.items(): |
|
|
if is_directed: |
|
|
gain = ( |
|
|
remove_cost |
|
|
+ wt / m |
|
|
- resolution |
|
|
* ( |
|
|
out_degree * Stot_in[nbr_com] |
|
|
+ in_degree * Stot_out[nbr_com] |
|
|
) |
|
|
/ m**2 |
|
|
) |
|
|
else: |
|
|
gain = ( |
|
|
remove_cost |
|
|
+ wt / m |
|
|
- resolution * (Stot[nbr_com] * degree) / (2 * m**2) |
|
|
) |
|
|
if gain > best_mod: |
|
|
best_mod = gain |
|
|
best_com = nbr_com |
|
|
if is_directed: |
|
|
Stot_in[best_com] += in_degree |
|
|
Stot_out[best_com] += out_degree |
|
|
else: |
|
|
Stot[best_com] += degree |
|
|
if best_com != node2com[u]: |
|
|
com = G.nodes[u].get("nodes", {u}) |
|
|
partition[node2com[u]].difference_update(com) |
|
|
inner_partition[node2com[u]].remove(u) |
|
|
partition[best_com].update(com) |
|
|
inner_partition[best_com].add(u) |
|
|
improvement = True |
|
|
nb_moves += 1 |
|
|
node2com[u] = best_com |
|
|
partition = list(filter(len, partition)) |
|
|
inner_partition = list(filter(len, inner_partition)) |
|
|
return partition, inner_partition, improvement |
|
|
|
|
|
|
|
|
def _neighbor_weights(nbrs, node2com): |
|
|
"""Calculate weights between node and its neighbor communities. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
nbrs : dictionary |
|
|
Dictionary with nodes' neighbours as keys and their edge weight as value. |
|
|
node2com : dictionary |
|
|
Dictionary with all graph's nodes as keys and their community index as value. |
|
|
|
|
|
""" |
|
|
weights = defaultdict(float) |
|
|
for nbr, wt in nbrs.items(): |
|
|
weights[node2com[nbr]] += wt |
|
|
return weights |
|
|
|
|
|
|
|
|
def _gen_graph(G, partition): |
|
|
"""Generate a new graph based on the partitions of a given graph""" |
|
|
H = G.__class__() |
|
|
node2com = {} |
|
|
for i, part in enumerate(partition): |
|
|
nodes = set() |
|
|
for node in part: |
|
|
node2com[node] = i |
|
|
nodes.update(G.nodes[node].get("nodes", {node})) |
|
|
H.add_node(i, nodes=nodes) |
|
|
|
|
|
for node1, node2, wt in G.edges(data=True): |
|
|
wt = wt["weight"] |
|
|
com1 = node2com[node1] |
|
|
com2 = node2com[node2] |
|
|
temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"] |
|
|
H.add_edge(com1, com2, weight=wt + temp) |
|
|
return H |
|
|
|
|
|
|
|
|
def _convert_multigraph(G, weight, is_directed): |
|
|
"""Convert a Multigraph to normal Graph""" |
|
|
if is_directed: |
|
|
H = nx.DiGraph() |
|
|
else: |
|
|
H = nx.Graph() |
|
|
H.add_nodes_from(G) |
|
|
for u, v, wt in G.edges(data=weight, default=1): |
|
|
if H.has_edge(u, v): |
|
|
H[u][v]["weight"] += wt |
|
|
else: |
|
|
H.add_edge(u, v, weight=wt) |
|
|
return H |
|
|
|