| | """Functions for measuring the quality of a partition (into |
| | communities). |
| | |
| | """ |
| |
|
| | from itertools import combinations |
| |
|
| | import networkx as nx |
| | from networkx import NetworkXError |
| | from networkx.algorithms.community.community_utils import is_partition |
| | from networkx.utils.decorators import argmap |
| |
|
| | __all__ = ["modularity", "partition_quality"] |
| |
|
| |
|
| | class NotAPartition(NetworkXError): |
| | """Raised if a given collection is not a partition.""" |
| |
|
| | def __init__(self, G, collection): |
| | msg = f"{collection} is not a valid partition of the graph {G}" |
| | super().__init__(msg) |
| |
|
| |
|
| | def _require_partition(G, partition): |
| | """Decorator to check that a valid partition is input to a function |
| | |
| | Raises :exc:`networkx.NetworkXError` if the partition is not valid. |
| | |
| | This decorator should be used on functions whose first two arguments |
| | are a graph and a partition of the nodes of that graph (in that |
| | order):: |
| | |
| | >>> @require_partition |
| | ... def foo(G, partition): |
| | ... print("partition is valid!") |
| | ... |
| | >>> G = nx.complete_graph(5) |
| | >>> partition = [{0, 1}, {2, 3}, {4}] |
| | >>> foo(G, partition) |
| | partition is valid! |
| | >>> partition = [{0}, {2, 3}, {4}] |
| | >>> foo(G, partition) |
| | Traceback (most recent call last): |
| | ... |
| | networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G |
| | >>> partition = [{0, 1}, {1, 2, 3}, {4}] |
| | >>> foo(G, partition) |
| | Traceback (most recent call last): |
| | ... |
| | networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G |
| | |
| | """ |
| | if is_partition(G, partition): |
| | return G, partition |
| | raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G") |
| |
|
| |
|
| | require_partition = argmap(_require_partition, (0, 1)) |
| |
|
| |
|
| | @nx._dispatchable |
| | def intra_community_edges(G, partition): |
| | """Returns the number of intra-community edges for a partition of `G`. |
| | |
| | Parameters |
| | ---------- |
| | G : NetworkX graph. |
| | |
| | partition : iterable of sets of nodes |
| | This must be a partition of the nodes of `G`. |
| | |
| | The "intra-community edges" are those edges joining a pair of nodes |
| | in the same block of the partition. |
| | |
| | """ |
| | return sum(G.subgraph(block).size() for block in partition) |
| |
|
| |
|
| | @nx._dispatchable |
| | def inter_community_edges(G, partition): |
| | """Returns the number of inter-community edges for a partition of `G`. |
| | according to the given |
| | partition of the nodes of `G`. |
| | |
| | Parameters |
| | ---------- |
| | G : NetworkX graph. |
| | |
| | partition : iterable of sets of nodes |
| | This must be a partition of the nodes of `G`. |
| | |
| | The *inter-community edges* are those edges joining a pair of nodes |
| | in different blocks of the partition. |
| | |
| | Implementation note: this function creates an intermediate graph |
| | that may require the same amount of memory as that of `G`. |
| | |
| | """ |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph |
| | return nx.quotient_graph(G, partition, create_using=MG).size() |
| |
|
| |
|
| | @nx._dispatchable |
| | def inter_community_non_edges(G, partition): |
| | """Returns the number of inter-community non-edges according to the |
| | given partition of the nodes of `G`. |
| | |
| | Parameters |
| | ---------- |
| | G : NetworkX graph. |
| | |
| | partition : iterable of sets of nodes |
| | This must be a partition of the nodes of `G`. |
| | |
| | A *non-edge* is a pair of nodes (undirected if `G` is undirected) |
| | that are not adjacent in `G`. The *inter-community non-edges* are |
| | those non-edges on a pair of nodes in different blocks of the |
| | partition. |
| | |
| | Implementation note: this function creates two intermediate graphs, |
| | which may require up to twice the amount of memory as required to |
| | store `G`. |
| | |
| | """ |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | return inter_community_edges(nx.complement(G), partition) |
| |
|
| |
|
| | @nx._dispatchable(edge_attrs="weight") |
| | def modularity(G, communities, weight="weight", resolution=1): |
| | r"""Returns the modularity of the given partition of the graph. |
| | |
| | Modularity is defined in [1]_ as |
| | |
| | .. math:: |
| | Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right) |
| | \delta(c_i,c_j) |
| | |
| | where $m$ is the number of edges (or sum of all edge weights as in [5]_), |
| | $A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$, |
| | $\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and |
| | $j$ are in the same community else 0. |
| | |
| | According to [2]_ (and verified by some algebra) this can be reduced to |
| | |
| | .. math:: |
| | Q = \sum_{c=1}^{n} |
| | \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right] |
| | |
| | where the sum iterates over all communities $c$, $m$ is the number of edges, |
| | $L_c$ is the number of intra-community links for community $c$, |
| | $k_c$ is the sum of degrees of the nodes in community $c$, |
| | and $\gamma$ is the resolution parameter. |
| | |
| | The resolution parameter sets an arbitrary tradeoff between intra-group |
| | edges and inter-group edges. More complex grouping patterns can be |
| | discovered by analyzing the same network with multiple values of gamma |
| | and then combining the results [3]_. That said, it is very common to |
| | simply use gamma=1. More on the choice of gamma is in [4]_. |
| | |
| | The second formula is the one actually used in calculation of the modularity. |
| | For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$. |
| | |
| | Parameters |
| | ---------- |
| | G : NetworkX Graph |
| | |
| | communities : list or iterable of set of nodes |
| | These node sets must represent a partition of G's nodes. |
| | |
| | weight : string or None, optional (default="weight") |
| | The edge attribute that holds the numerical value used |
| | as a weight. If None or an edge does not have that attribute, |
| | then that edge has weight 1. |
| | |
| | resolution : float (default=1) |
| | If resolution is less than 1, modularity favors larger communities. |
| | Greater than 1 favors smaller communities. |
| | |
| | Returns |
| | ------- |
| | Q : float |
| | The modularity of the partition. |
| | |
| | Raises |
| | ------ |
| | NotAPartition |
| | If `communities` is not a partition of the nodes of `G`. |
| | |
| | Examples |
| | -------- |
| | >>> G = nx.barbell_graph(3, 0) |
| | >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}]) |
| | 0.35714285714285715 |
| | >>> nx.community.modularity(G, nx.community.label_propagation_communities(G)) |
| | 0.35714285714285715 |
| | |
| | References |
| | ---------- |
| | .. [1] M. E. J. Newman "Networks: An Introduction", page 224. |
| | Oxford University Press, 2011. |
| | .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore. |
| | "Finding community structure in very large networks." |
| | Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187> |
| | .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection" |
| | Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110 |
| | .. [4] M. E. J. Newman, "Equivalence between modularity optimization and |
| | maximum likelihood methods for community detection" |
| | Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315 |
| | .. [5] Blondel, V.D. et al. "Fast unfolding of communities in large |
| | networks" J. Stat. Mech 10008, 1-12 (2008). |
| | https://doi.org/10.1088/1742-5468/2008/10/P10008 |
| | """ |
| | if not isinstance(communities, list): |
| | communities = list(communities) |
| | if not is_partition(G, communities): |
| | raise NotAPartition(G, communities) |
| |
|
| | directed = G.is_directed() |
| | if directed: |
| | out_degree = dict(G.out_degree(weight=weight)) |
| | in_degree = dict(G.in_degree(weight=weight)) |
| | m = sum(out_degree.values()) |
| | norm = 1 / m**2 |
| | else: |
| | out_degree = in_degree = dict(G.degree(weight=weight)) |
| | deg_sum = sum(out_degree.values()) |
| | m = deg_sum / 2 |
| | norm = 1 / deg_sum**2 |
| |
|
| | def community_contribution(community): |
| | comm = set(community) |
| | L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm) |
| |
|
| | out_degree_sum = sum(out_degree[u] for u in comm) |
| | in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum |
| |
|
| | return L_c / m - resolution * out_degree_sum * in_degree_sum * norm |
| |
|
| | return sum(map(community_contribution, communities)) |
| |
|
| |
|
| | @require_partition |
| | @nx._dispatchable |
| | def partition_quality(G, partition): |
| | """Returns the coverage and performance of a partition of G. |
| | |
| | The *coverage* of a partition is the ratio of the number of |
| | intra-community edges to the total number of edges in the graph. |
| | |
| | The *performance* of a partition is the number of |
| | intra-community edges plus inter-community non-edges divided by the total |
| | number of potential edges. |
| | |
| | This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links. |
| | |
| | Parameters |
| | ---------- |
| | G : NetworkX graph |
| | |
| | partition : sequence |
| | Partition of the nodes of `G`, represented as a sequence of |
| | sets of nodes (blocks). Each block of the partition represents a |
| | community. |
| | |
| | Returns |
| | ------- |
| | (float, float) |
| | The (coverage, performance) tuple of the partition, as defined above. |
| | |
| | Raises |
| | ------ |
| | NetworkXError |
| | If `partition` is not a valid partition of the nodes of `G`. |
| | |
| | Notes |
| | ----- |
| | If `G` is a multigraph; |
| | - for coverage, the multiplicity of edges is counted |
| | - for performance, the result is -1 (total number of possible edges is not defined) |
| | |
| | References |
| | ---------- |
| | .. [1] Santo Fortunato. |
| | "Community Detection in Graphs". |
| | *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174 |
| | <https://arxiv.org/abs/0906.0612> |
| | """ |
| |
|
| | node_community = {} |
| | for i, community in enumerate(partition): |
| | for node in community: |
| | node_community[node] = i |
| |
|
| | |
| | if not G.is_multigraph(): |
| | |
| | possible_inter_community_edges = sum( |
| | len(p1) * len(p2) for p1, p2 in combinations(partition, 2) |
| | ) |
| |
|
| | if G.is_directed(): |
| | possible_inter_community_edges *= 2 |
| | else: |
| | possible_inter_community_edges = 0 |
| |
|
| | |
| | |
| | n = len(G) |
| | total_pairs = n * (n - 1) |
| | if not G.is_directed(): |
| | total_pairs //= 2 |
| |
|
| | intra_community_edges = 0 |
| | inter_community_non_edges = possible_inter_community_edges |
| |
|
| | |
| | for e in G.edges(): |
| | if node_community[e[0]] == node_community[e[1]]: |
| | intra_community_edges += 1 |
| | else: |
| | inter_community_non_edges -= 1 |
| |
|
| | coverage = intra_community_edges / len(G.edges) |
| |
|
| | if G.is_multigraph(): |
| | performance = -1.0 |
| | else: |
| | performance = (intra_community_edges + inter_community_non_edges) / total_pairs |
| |
|
| | return coverage, performance |
| |
|