Add files using upload-large-folder tool

f911107 verified 3 months ago

11.9 kB

	"""Functions for measuring the quality of a partition (into
	communities).

	"""

	from itertools import combinations

	import networkx as nx
	from networkx import NetworkXError
	from networkx.algorithms.community.community_utils import is_partition
	from networkx.utils.decorators import argmap

	__all__ = ["modularity", "partition_quality"]


	class NotAPartition(NetworkXError):
	"""Raised if a given collection is not a partition."""

	def __init__(self, G, collection):
	msg = f"{collection} is not a valid partition of the graph {G}"
	super().__init__(msg)


	def _require_partition(G, partition):
	"""Decorator to check that a valid partition is input to a function

	Raises :exc:`networkx.NetworkXError` if the partition is not valid.

	This decorator should be used on functions whose first two arguments
	are a graph and a partition of the nodes of that graph (in that
	order)::

	>>> @require_partition
	... def foo(G, partition):
	... print("partition is valid!")
	...
	>>> G = nx.complete_graph(5)
	>>> partition = [{0, 1}, {2, 3}, {4}]
	>>> foo(G, partition)
	partition is valid!
	>>> partition = [{0}, {2, 3}, {4}]
	>>> foo(G, partition)
	Traceback (most recent call last):
	...
	networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
	>>> partition = [{0, 1}, {1, 2, 3}, {4}]
	>>> foo(G, partition)
	Traceback (most recent call last):
	...
	networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G

	"""
	if is_partition(G, partition):
	return G, partition
	raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G")


	require_partition = argmap(_require_partition, (0, 1))


	@nx._dispatch
	def intra_community_edges(G, partition):
	"""Returns the number of intra-community edges for a partition of `G`.

	Parameters
	----------
	G : NetworkX graph.

	partition : iterable of sets of nodes
	This must be a partition of the nodes of `G`.

	The "intra-community edges" are those edges joining a pair of nodes
	in the same block of the partition.

	"""
	return sum(G.subgraph(block).size() for block in partition)


	@nx._dispatch
	def inter_community_edges(G, partition):
	"""Returns the number of inter-community edges for a partition of `G`.
	according to the given
	partition of the nodes of `G`.

	Parameters
	----------
	G : NetworkX graph.

	partition : iterable of sets of nodes
	This must be a partition of the nodes of `G`.

	The inter-community edges are those edges joining a pair of nodes
	in different blocks of the partition.

	Implementation note: this function creates an intermediate graph
	that may require the same amount of memory as that of `G`.

	"""
	# Alternate implementation that does not require constructing a new
	# graph object (but does require constructing an affiliation
	# dictionary):
	#
	# aff = dict(chain.from_iterable(((v, block) for v in block)
	# for block in partition))
	# return sum(1 for u, v in G.edges() if aff[u] != aff[v])
	#
	MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph
	return nx.quotient_graph(G, partition, create_using=MG).size()


	@nx._dispatch
	def inter_community_non_edges(G, partition):
	"""Returns the number of inter-community non-edges according to the
	given partition of the nodes of `G`.

	Parameters
	----------
	G : NetworkX graph.

	partition : iterable of sets of nodes
	This must be a partition of the nodes of `G`.

	A non-edge is a pair of nodes (undirected if `G` is undirected)
	that are not adjacent in `G`. The inter-community non-edges are
	those non-edges on a pair of nodes in different blocks of the
	partition.

	Implementation note: this function creates two intermediate graphs,
	which may require up to twice the amount of memory as required to
	store `G`.

	"""
	# Alternate implementation that does not require constructing two
	# new graph objects (but does require constructing an affiliation
	# dictionary):
	#
	# aff = dict(chain.from_iterable(((v, block) for v in block)
	# for block in partition))
	# return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v])
	#
	return inter_community_edges(nx.complement(G), partition)


	@nx._dispatch(edge_attrs="weight")
	def modularity(G, communities, weight="weight", resolution=1):
	r"""Returns the modularity of the given partition of the graph.

	Modularity is defined in [1]_ as

	.. math::
	Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right)
	\delta(c_i,c_j)

	where $m$ is the number of edges (or sum of all edge weights as in [5]_),
	$A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$,
	$\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and
	$j$ are in the same community else 0.

	According to [2]_ (and verified by some algebra) this can be reduced to

	.. math::
	Q = \sum_{c=1}^{n}
	\left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right]

	where the sum iterates over all communities $c$, $m$ is the number of edges,
	$L_c$ is the number of intra-community links for community $c$,
	$k_c$ is the sum of degrees of the nodes in community $c$,
	and $\gamma$ is the resolution parameter.

	The resolution parameter sets an arbitrary tradeoff between intra-group
	edges and inter-group edges. More complex grouping patterns can be
	discovered by analyzing the same network with multiple values of gamma
	and then combining the results [3]_. That said, it is very common to
	simply use gamma=1. More on the choice of gamma is in [4]_.

	The second formula is the one actually used in calculation of the modularity.
	For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$.

	Parameters
	----------
	G : NetworkX Graph

	communities : list or iterable of set of nodes
	These node sets must represent a partition of G's nodes.

	weight : string or None, optional (default="weight")
	The edge attribute that holds the numerical value used
	as a weight. If None or an edge does not have that attribute,
	then that edge has weight 1.

	resolution : float (default=1)
	If resolution is less than 1, modularity favors larger communities.
	Greater than 1 favors smaller communities.

	Returns
	-------
	Q : float
	The modularity of the partition.

	Raises
	------
	NotAPartition
	If `communities` is not a partition of the nodes of `G`.

	Examples
	--------
	>>> G = nx.barbell_graph(3, 0)
	>>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}])
	0.35714285714285715
	>>> nx.community.modularity(G, nx.community.label_propagation_communities(G))
	0.35714285714285715

	References
	----------
	.. [1] M. E. J. Newman "Networks: An Introduction", page 224.
	Oxford University Press, 2011.
	.. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
	"Finding community structure in very large networks."
	Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
	.. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection"
	Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110
	.. [4] M. E. J. Newman, "Equivalence between modularity optimization and
	maximum likelihood methods for community detection"
	Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315
	.. [5] Blondel, V.D. et al. "Fast unfolding of communities in large
	networks" J. Stat. Mech 10008, 1-12 (2008).
	https://doi.org/10.1088/1742-5468/2008/10/P10008
	"""
	if not isinstance(communities, list):
	communities = list(communities)
	if not is_partition(G, communities):
	raise NotAPartition(G, communities)

	directed = G.is_directed()
	if directed:
	out_degree = dict(G.out_degree(weight=weight))
	in_degree = dict(G.in_degree(weight=weight))
	m = sum(out_degree.values())
	norm = 1 / m**2
	else:
	out_degree = in_degree = dict(G.degree(weight=weight))
	deg_sum = sum(out_degree.values())
	m = deg_sum / 2
	norm = 1 / deg_sum**2

	def community_contribution(community):
	comm = set(community)
	L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)

	out_degree_sum = sum(out_degree[u] for u in comm)
	in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum

	return L_c / m - resolution * out_degree_sum * in_degree_sum * norm

	return sum(map(community_contribution, communities))


	@require_partition
	@nx._dispatch
	def partition_quality(G, partition):
	"""Returns the coverage and performance of a partition of G.

	The coverage of a partition is the ratio of the number of
	intra-community edges to the total number of edges in the graph.

	The performance of a partition is the number of
	intra-community edges plus inter-community non-edges divided by the total
	number of potential edges.

	This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links.

	Parameters
	----------
	G : NetworkX graph

	partition : sequence
	Partition of the nodes of `G`, represented as a sequence of
	sets of nodes (blocks). Each block of the partition represents a
	community.

	Returns
	-------
	(float, float)
	The (coverage, performance) tuple of the partition, as defined above.

	Raises
	------
	NetworkXError
	If `partition` is not a valid partition of the nodes of `G`.

	Notes
	-----
	If `G` is a multigraph;
	- for coverage, the multiplicity of edges is counted
	- for performance, the result is -1 (total number of possible edges is not defined)

	References
	----------
	.. [1] Santo Fortunato.
	"Community Detection in Graphs".
	Physical Reports, Volume 486, Issue 3--5 pp. 75--174
	<https://arxiv.org/abs/0906.0612>
	"""

	node_community = {}
	for i, community in enumerate(partition):
	for node in community:
	node_community[node] = i

	# `performance` is not defined for multigraphs
	if not G.is_multigraph():
	# Iterate over the communities, quadratic, to calculate `possible_inter_community_edges`
	possible_inter_community_edges = sum(
	len(p1) * len(p2) for p1, p2 in combinations(partition, 2)
	)

	if G.is_directed():
	possible_inter_community_edges *= 2
	else:
	possible_inter_community_edges = 0

	# Compute the number of edges in the complete graph -- `n` nodes,
	# directed or undirected, depending on `G`
	n = len(G)
	total_pairs = n * (n - 1)
	if not G.is_directed():
	total_pairs //= 2

	intra_community_edges = 0
	inter_community_non_edges = possible_inter_community_edges

	# Iterate over the links to count `intra_community_edges` and `inter_community_non_edges`
	for e in G.edges():
	if node_community[e[0]] == node_community[e[1]]:
	intra_community_edges += 1
	else:
	inter_community_non_edges -= 1

	coverage = intra_community_edges / len(G.edges)

	if G.is_multigraph():
	performance = -1.0
	else:
	performance = (intra_community_edges + inter_community_non_edges) / total_pairs

	return coverage, performance