Add files using upload-large-folder tool

f4c7019 verified 8 months ago

15.4 kB

	"""Function for detecting communities based on Louvain Community Detection
	Algorithm"""

	import itertools
	from collections import defaultdict, deque

	import networkx as nx
	from networkx.algorithms.community import modularity
	from networkx.utils import py_random_state

	__all__ = ["louvain_communities", "louvain_partitions"]


	@py_random_state("seed")
	@nx._dispatchable(edge_attrs="weight")
	def louvain_communities(
	G, weight="weight", resolution=1, threshold=0.0000001, max_level=None, seed=None
	):
	r"""Find the best partition of a graph using the Louvain Community Detection
	Algorithm.

	Louvain Community Detection Algorithm is a simple method to extract the community
	structure of a network. This is a heuristic method based on modularity optimization. [1]_

	The algorithm works in 2 steps. On the first step it assigns every node to be
	in its own community and then for each node it tries to find the maximum positive
	modularity gain by moving each node to all of its neighbor communities. If no positive
	gain is achieved the node remains in its original community.

	The modularity gain obtained by moving an isolated node $i$ into a community $C$ can
	easily be calculated by the following formula (combining [1]_ [2]_ and some algebra):

	.. math::
	\Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2}

	where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links
	from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$,
	$\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$
	is the resolution parameter.

	For the directed case the modularity gain can be computed using this formula according to [3]_

	.. math::
	\Delta Q = \frac{k_{i,in}}{m}
	- \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2}

	where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and
	$\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident
	to nodes in $C$.

	The first phase continues until no individual move can improve the modularity.

	The second phase consists in building a new network whose nodes are now the communities
	found in the first phase. To do so, the weights of the links between the new nodes are given by
	the sum of the weight of the links between nodes in the corresponding two communities. Once this
	phase is complete it is possible to reapply the first phase creating bigger communities with
	increased modularity.

	The above two phases are executed until no modularity gain is achieved (or is less than
	the `threshold`, or until `max_levels` is reached).

	Be careful with self-loops in the input graph. These are treated as
	previously reduced communities -- as if the process had been started
	in the middle of the algorithm. Large self-loop edge weights thus
	represent strong communities and in practice may be hard to add
	other nodes to. If your input graph edge weights for self-loops
	do not represent already reduced communities you may want to remove
	the self-loops before inputting that graph.

	Parameters
	----------
	G : NetworkX graph
	weight : string or None, optional (default="weight")
	The name of an edge attribute that holds the numerical value
	used as a weight. If None then each edge has weight 1.
	resolution : float, optional (default=1)
	If resolution is less than 1, the algorithm favors larger communities.
	Greater than 1 favors smaller communities
	threshold : float, optional (default=0.0000001)
	Modularity gain threshold for each level. If the gain of modularity
	between 2 levels of the algorithm is less than the given threshold
	then the algorithm stops and returns the resulting communities.
	max_level : int or None, optional (default=None)
	The maximum number of levels (steps of the algorithm) to compute.
	Must be a positive integer or None. If None, then there is no max
	level and the threshold parameter determines the stopping condition.
	seed : integer, random_state, or None (default)
	Indicator of random number generation state.
	See :ref:`Randomness<randomness>`.

	Returns
	-------
	list
	A list of sets (partition of `G`). Each set represents one community and contains
	all the nodes that constitute it.

	Examples
	--------
	>>> import networkx as nx
	>>> G = nx.petersen_graph()
	>>> nx.community.louvain_communities(G, seed=123)
	[{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}]

	Notes
	-----
	The order in which the nodes are considered can affect the final output. In the algorithm
	the ordering happens using a random shuffle.

	References
	----------
	.. [1] Blondel, V.D. et al. Fast unfolding of communities in
	large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008
	.. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
	well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
	.. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks.
	[Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784

	See Also
	--------
	louvain_partitions
	"""

	partitions = louvain_partitions(G, weight, resolution, threshold, seed)
	if max_level is not None:
	if max_level <= 0:
	raise ValueError("max_level argument must be a positive integer or None")
	partitions = itertools.islice(partitions, max_level)
	final_partition = deque(partitions, maxlen=1)
	return final_partition.pop()


	@py_random_state("seed")
	@nx._dispatchable(edge_attrs="weight")
	def louvain_partitions(
	G, weight="weight", resolution=1, threshold=0.0000001, seed=None
	):
	"""Yields partitions for each level of the Louvain Community Detection Algorithm

	Louvain Community Detection Algorithm is a simple method to extract the community
	structure of a network. This is a heuristic method based on modularity optimization. [1]_

	The partitions at each level (step of the algorithm) form a dendrogram of communities.
	A dendrogram is a diagram representing a tree and each level represents
	a partition of the G graph. The top level contains the smallest communities
	and as you traverse to the bottom of the tree the communities get bigger
	and the overall modularity increases making the partition better.

	Each level is generated by executing the two phases of the Louvain Community
	Detection Algorithm.

	Be careful with self-loops in the input graph. These are treated as
	previously reduced communities -- as if the process had been started
	in the middle of the algorithm. Large self-loop edge weights thus
	represent strong communities and in practice may be hard to add
	other nodes to. If your input graph edge weights for self-loops
	do not represent already reduced communities you may want to remove
	the self-loops before inputting that graph.

	Parameters
	----------
	G : NetworkX graph
	weight : string or None, optional (default="weight")
	The name of an edge attribute that holds the numerical value
	used as a weight. If None then each edge has weight 1.
	resolution : float, optional (default=1)
	If resolution is less than 1, the algorithm favors larger communities.
	Greater than 1 favors smaller communities
	threshold : float, optional (default=0.0000001)
	Modularity gain threshold for each level. If the gain of modularity
	between 2 levels of the algorithm is less than the given threshold
	then the algorithm stops and returns the resulting communities.
	seed : integer, random_state, or None (default)
	Indicator of random number generation state.
	See :ref:`Randomness<randomness>`.

	Yields
	------
	list
	A list of sets (partition of `G`). Each set represents one community and contains
	all the nodes that constitute it.

	References
	----------
	.. [1] Blondel, V.D. et al. Fast unfolding of communities in
	large networks. J. Stat. Mech 10008, 1-12(2008)

	See Also
	--------
	louvain_communities
	"""

	partition = [{u} for u in G.nodes()]
	if nx.is_empty(G):
	yield partition
	return
	mod = modularity(G, partition, resolution=resolution, weight=weight)
	is_directed = G.is_directed()
	if G.is_multigraph():
	graph = _convert_multigraph(G, weight, is_directed)
	else:
	graph = G.__class__()
	graph.add_nodes_from(G)
	graph.add_weighted_edges_from(G.edges(data=weight, default=1))

	m = graph.size(weight="weight")
	partition, inner_partition, improvement = _one_level(
	graph, m, partition, resolution, is_directed, seed
	)
	improvement = True
	while improvement:
	# gh-5901 protect the sets in the yielded list from further manipulation here
	yield [s.copy() for s in partition]
	new_mod = modularity(
	graph, inner_partition, resolution=resolution, weight="weight"
	)
	if new_mod - mod <= threshold:
	return
	mod = new_mod
	graph = _gen_graph(graph, inner_partition)
	partition, inner_partition, improvement = _one_level(
	graph, m, partition, resolution, is_directed, seed
	)


	def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None):
	"""Calculate one level of the Louvain partitions tree

	Parameters
	----------
	G : NetworkX Graph/DiGraph
	The graph from which to detect communities
	m : number
	The size of the graph `G`.
	partition : list of sets of nodes
	A valid partition of the graph `G`
	resolution : positive number
	The resolution parameter for computing the modularity of a partition
	is_directed : bool
	True if `G` is a directed graph.
	seed : integer, random_state, or None (default)
	Indicator of random number generation state.
	See :ref:`Randomness<randomness>`.

	"""
	node2com = {u: i for i, u in enumerate(G.nodes())}
	inner_partition = [{u} for u in G.nodes()]
	if is_directed:
	in_degrees = dict(G.in_degree(weight="weight"))
	out_degrees = dict(G.out_degree(weight="weight"))
	Stot_in = list(in_degrees.values())
	Stot_out = list(out_degrees.values())
	# Calculate weights for both in and out neighbors without considering self-loops
	nbrs = {}
	for u in G:
	nbrs[u] = defaultdict(float)
	for _, n, wt in G.out_edges(u, data="weight"):
	if u != n:
	nbrs[u][n] += wt
	for n, _, wt in G.in_edges(u, data="weight"):
	if u != n:
	nbrs[u][n] += wt
	else:
	degrees = dict(G.degree(weight="weight"))
	Stot = list(degrees.values())
	nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G}
	rand_nodes = list(G.nodes)
	seed.shuffle(rand_nodes)
	nb_moves = 1
	improvement = False
	while nb_moves > 0:
	nb_moves = 0
	for u in rand_nodes:
	best_mod = 0
	best_com = node2com[u]
	weights2com = _neighbor_weights(nbrs[u], node2com)
	if is_directed:
	in_degree = in_degrees[u]
	out_degree = out_degrees[u]
	Stot_in[best_com] -= in_degree
	Stot_out[best_com] -= out_degree
	remove_cost = (
	-weights2com[best_com] / m
	+ resolution
	* (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com])
	/ m**2
	)
	else:
	degree = degrees[u]
	Stot[best_com] -= degree
	remove_cost = -weights2com[best_com] / m + resolution * (
	Stot[best_com] * degree
	) / (2 * m**2)
	for nbr_com, wt in weights2com.items():
	if is_directed:
	gain = (
	remove_cost
	+ wt / m
	- resolution
	* (
	out_degree * Stot_in[nbr_com]
	+ in_degree * Stot_out[nbr_com]
	)
	/ m**2
	)
	else:
	gain = (
	remove_cost
	+ wt / m
	- resolution * (Stot[nbr_com] * degree) / (2 * m**2)
	)
	if gain > best_mod:
	best_mod = gain
	best_com = nbr_com
	if is_directed:
	Stot_in[best_com] += in_degree
	Stot_out[best_com] += out_degree
	else:
	Stot[best_com] += degree
	if best_com != node2com[u]:
	com = G.nodes[u].get("nodes", {u})
	partition[node2com[u]].difference_update(com)
	inner_partition[node2com[u]].remove(u)
	partition[best_com].update(com)
	inner_partition[best_com].add(u)
	improvement = True
	nb_moves += 1
	node2com[u] = best_com
	partition = list(filter(len, partition))
	inner_partition = list(filter(len, inner_partition))
	return partition, inner_partition, improvement


	def _neighbor_weights(nbrs, node2com):
	"""Calculate weights between node and its neighbor communities.

	Parameters
	----------
	nbrs : dictionary
	Dictionary with nodes' neighbors as keys and their edge weight as value.
	node2com : dictionary
	Dictionary with all graph's nodes as keys and their community index as value.

	"""
	weights = defaultdict(float)
	for nbr, wt in nbrs.items():
	weights[node2com[nbr]] += wt
	return weights


	def _gen_graph(G, partition):
	"""Generate a new graph based on the partitions of a given graph"""
	H = G.__class__()
	node2com = {}
	for i, part in enumerate(partition):
	nodes = set()
	for node in part:
	node2com[node] = i
	nodes.update(G.nodes[node].get("nodes", {node}))
	H.add_node(i, nodes=nodes)

	for node1, node2, wt in G.edges(data=True):
	wt = wt["weight"]
	com1 = node2com[node1]
	com2 = node2com[node2]
	temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"]
	H.add_edge(com1, com2, weight=wt + temp)
	return H


	def _convert_multigraph(G, weight, is_directed):
	"""Convert a Multigraph to normal Graph"""
	if is_directed:
	H = nx.DiGraph()
	else:
	H = nx.Graph()
	H.add_nodes_from(G)
	for u, v, wt in G.edges(data=weight, default=1):
	if H.has_edge(u, v):
	H[u][v]["weight"] += wt
	else:
	H.add_edge(u, v, weight=wt)
	return H