diff --git a/.gitattributes b/.gitattributes index 3c247a97dda9ef643cd46e38b91395ddcc32787e..0cefcd508f6c7a75e0e0076669b705ea0527264b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -301,3 +301,7 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/lib/ .venv/lib/python3.11/site-packages/torchaudio/lib/pybind11_prefixctc.so filter=lfs diff=lfs merge=lfs -text .venv/lib/python3.11/site-packages/torchaudio/lib/libctc_prefix_decoder.so filter=lfs diff=lfs merge=lfs -text .venv/lib/python3.11/site-packages/torchaudio/lib/libtorchaudio_sox.so filter=lfs diff=lfs merge=lfs -text +.venv/lib/python3.11/site-packages/torchaudio/lib/_torchaudio_sox.so filter=lfs diff=lfs merge=lfs -text +.venv/lib/python3.11/site-packages/torchaudio/transforms/__pycache__/_transforms.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text +.venv/lib/python3.11/site-packages/PIL/_imagingmath.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +.venv/lib/python3.11/site-packages/PIL/_imagingcms.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text diff --git a/.venv/lib/python3.11/site-packages/PIL/_imagingcms.cpython-311-x86_64-linux-gnu.so b/.venv/lib/python3.11/site-packages/PIL/_imagingcms.cpython-311-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..1de9f479c5c105300743a1cd05f1379fa9730850 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/PIL/_imagingcms.cpython-311-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e423efe7a4ff703ed5e70eff519697c0cf641e674f29d2446c4af895f0a4f1be +size 145401 diff --git a/.venv/lib/python3.11/site-packages/PIL/_imagingmath.cpython-311-x86_64-linux-gnu.so b/.venv/lib/python3.11/site-packages/PIL/_imagingmath.cpython-311-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..ca66bea5ffc778bed1ee9f955cf9ba01ee85edc7 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/PIL/_imagingmath.cpython-311-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cc372f73a5562f6a0a364bfd1c42234e42e67403f814a83aab901136cce3a29 +size 149024 diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/asteroidal.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/asteroidal.py new file mode 100644 index 0000000000000000000000000000000000000000..3f9b2ab51bb6370d806ae1755587e2ae669ae2f8 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/asteroidal.py @@ -0,0 +1,171 @@ +""" +Algorithms for asteroidal triples and asteroidal numbers in graphs. + +An asteroidal triple in a graph G is a set of three non-adjacent vertices +u, v and w such that there exist a path between any two of them that avoids +closed neighborhood of the third. More formally, v_j, v_k belongs to the same +connected component of G - N[v_i], where N[v_i] denotes the closed neighborhood +of v_i. A graph which does not contain any asteroidal triples is called +an AT-free graph. The class of AT-free graphs is a graph class for which +many NP-complete problems are solvable in polynomial time. Amongst them, +independent set and coloring. +""" + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["is_at_free", "find_asteroidal_triple"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def find_asteroidal_triple(G): + r"""Find an asteroidal triple in the given graph. + + An asteroidal triple is a triple of non-adjacent vertices such that + there exists a path between any two of them which avoids the closed + neighborhood of the third. It checks all independent triples of vertices + and whether they are an asteroidal triple or not. This is done with the + help of a data structure called a component structure. + A component structure encodes information about which vertices belongs to + the same connected component when the closed neighborhood of a given vertex + is removed from the graph. The algorithm used to check is the trivial + one, outlined in [1]_, which has a runtime of + :math:`O(|V||\overline{E} + |V||E|)`, where the second term is the + creation of the component structure. + + Parameters + ---------- + G : NetworkX Graph + The graph to check whether is AT-free or not + + Returns + ------- + list or None + An asteroidal triple is returned as a list of nodes. If no asteroidal + triple exists, i.e. the graph is AT-free, then None is returned. + The returned value depends on the certificate parameter. The default + option is a bool which is True if the graph is AT-free, i.e. the + given graph contains no asteroidal triples, and False otherwise, i.e. + if the graph contains at least one asteroidal triple. + + Notes + ----- + The component structure and the algorithm is described in [1]_. The current + implementation implements the trivial algorithm for simple graphs. + + References + ---------- + .. [1] Ekkehard Köhler, + "Recognizing Graphs without asteroidal triples", + Journal of Discrete Algorithms 2, pages 439-452, 2004. + https://www.sciencedirect.com/science/article/pii/S157086670400019X + """ + V = set(G.nodes) + + if len(V) < 6: + # An asteroidal triple cannot exist in a graph with 5 or less vertices. + return None + + component_structure = create_component_structure(G) + E_complement = set(nx.complement(G).edges) + + for e in E_complement: + u = e[0] + v = e[1] + u_neighborhood = set(G[u]).union([u]) + v_neighborhood = set(G[v]).union([v]) + union_of_neighborhoods = u_neighborhood.union(v_neighborhood) + for w in V - union_of_neighborhoods: + # Check for each pair of vertices whether they belong to the + # same connected component when the closed neighborhood of the + # third is removed. + if ( + component_structure[u][v] == component_structure[u][w] + and component_structure[v][u] == component_structure[v][w] + and component_structure[w][u] == component_structure[w][v] + ): + return [u, v, w] + return None + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def is_at_free(G): + """Check if a graph is AT-free. + + The method uses the `find_asteroidal_triple` method to recognize + an AT-free graph. If no asteroidal triple is found the graph is + AT-free and True is returned. If at least one asteroidal triple is + found the graph is not AT-free and False is returned. + + Parameters + ---------- + G : NetworkX Graph + The graph to check whether is AT-free or not. + + Returns + ------- + bool + True if G is AT-free and False otherwise. + + Examples + -------- + >>> G = nx.Graph([(0, 1), (0, 2), (1, 2), (1, 3), (1, 4), (4, 5)]) + >>> nx.is_at_free(G) + True + + >>> G = nx.cycle_graph(6) + >>> nx.is_at_free(G) + False + """ + return find_asteroidal_triple(G) is None + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def create_component_structure(G): + r"""Create component structure for G. + + A *component structure* is an `nxn` array, denoted `c`, where `n` is + the number of vertices, where each row and column corresponds to a vertex. + + .. math:: + c_{uv} = \begin{cases} 0, if v \in N[u] \\ + k, if v \in component k of G \setminus N[u] \end{cases} + + Where `k` is an arbitrary label for each component. The structure is used + to simplify the detection of asteroidal triples. + + Parameters + ---------- + G : NetworkX Graph + Undirected, simple graph. + + Returns + ------- + component_structure : dictionary + A dictionary of dictionaries, keyed by pairs of vertices. + + """ + V = set(G.nodes) + component_structure = {} + for v in V: + label = 0 + closed_neighborhood = set(G[v]).union({v}) + row_dict = {} + for u in closed_neighborhood: + row_dict[u] = 0 + + G_reduced = G.subgraph(set(G.nodes) - closed_neighborhood) + for cc in nx.connected_components(G_reduced): + label += 1 + for u in cc: + row_dict[u] = label + + component_structure[v] = row_dict + + return component_structure diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/boundary.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/boundary.py new file mode 100644 index 0000000000000000000000000000000000000000..ba05d803037d8812bfff83df5382e8ea942711b2 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/boundary.py @@ -0,0 +1,168 @@ +"""Routines to find the boundary of a set of nodes. + +An edge boundary is a set of edges, each of which has exactly one +endpoint in a given set of nodes (or, in the case of directed graphs, +the set of edges whose source node is in the set). + +A node boundary of a set *S* of nodes is the set of (out-)neighbors of +nodes in *S* that are outside *S*. + +""" + +from itertools import chain + +import networkx as nx + +__all__ = ["edge_boundary", "node_boundary"] + + +@nx._dispatchable(edge_attrs={"data": "default"}, preserve_edge_attrs="data") +def edge_boundary(G, nbunch1, nbunch2=None, data=False, keys=False, default=None): + """Returns the edge boundary of `nbunch1`. + + The *edge boundary* of a set *S* with respect to a set *T* is the + set of edges (*u*, *v*) such that *u* is in *S* and *v* is in *T*. + If *T* is not specified, it is assumed to be the set of all nodes + not in *S*. + + Parameters + ---------- + G : NetworkX graph + + nbunch1 : iterable + Iterable of nodes in the graph representing the set of nodes + whose edge boundary will be returned. (This is the set *S* from + the definition above.) + + nbunch2 : iterable + Iterable of nodes representing the target (or "exterior") set of + nodes. (This is the set *T* from the definition above.) If not + specified, this is assumed to be the set of all nodes in `G` + not in `nbunch1`. + + keys : bool + This parameter has the same meaning as in + :meth:`MultiGraph.edges`. + + data : bool or object + This parameter has the same meaning as in + :meth:`MultiGraph.edges`. + + default : object + This parameter has the same meaning as in + :meth:`MultiGraph.edges`. + + Returns + ------- + iterator + An iterator over the edges in the boundary of `nbunch1` with + respect to `nbunch2`. If `keys`, `data`, or `default` + are specified and `G` is a multigraph, then edges are returned + with keys and/or data, as in :meth:`MultiGraph.edges`. + + Examples + -------- + >>> G = nx.wheel_graph(6) + + When nbunch2=None: + + >>> list(nx.edge_boundary(G, (1, 3))) + [(1, 0), (1, 2), (1, 5), (3, 0), (3, 2), (3, 4)] + + When nbunch2 is given: + + >>> list(nx.edge_boundary(G, (1, 3), (2, 0))) + [(1, 0), (1, 2), (3, 0), (3, 2)] + + Notes + ----- + Any element of `nbunch` that is not in the graph `G` will be + ignored. + + `nbunch1` and `nbunch2` are usually meant to be disjoint, but in + the interest of speed and generality, that is not required here. + + """ + nset1 = {n for n in nbunch1 if n in G} + # Here we create an iterator over edges incident to nodes in the set + # `nset1`. The `Graph.edges()` method does not provide a guarantee + # on the orientation of the edges, so our algorithm below must + # handle the case in which exactly one orientation, either (u, v) or + # (v, u), appears in this iterable. + if G.is_multigraph(): + edges = G.edges(nset1, data=data, keys=keys, default=default) + else: + edges = G.edges(nset1, data=data, default=default) + # If `nbunch2` is not provided, then it is assumed to be the set + # complement of `nbunch1`. For the sake of efficiency, this is + # implemented by using the `not in` operator, instead of by creating + # an additional set and using the `in` operator. + if nbunch2 is None: + return (e for e in edges if (e[0] in nset1) ^ (e[1] in nset1)) + nset2 = set(nbunch2) + return ( + e + for e in edges + if (e[0] in nset1 and e[1] in nset2) or (e[1] in nset1 and e[0] in nset2) + ) + + +@nx._dispatchable +def node_boundary(G, nbunch1, nbunch2=None): + """Returns the node boundary of `nbunch1`. + + The *node boundary* of a set *S* with respect to a set *T* is the + set of nodes *v* in *T* such that for some *u* in *S*, there is an + edge joining *u* to *v*. If *T* is not specified, it is assumed to + be the set of all nodes not in *S*. + + Parameters + ---------- + G : NetworkX graph + + nbunch1 : iterable + Iterable of nodes in the graph representing the set of nodes + whose node boundary will be returned. (This is the set *S* from + the definition above.) + + nbunch2 : iterable + Iterable of nodes representing the target (or "exterior") set of + nodes. (This is the set *T* from the definition above.) If not + specified, this is assumed to be the set of all nodes in `G` + not in `nbunch1`. + + Returns + ------- + set + The node boundary of `nbunch1` with respect to `nbunch2`. + + Examples + -------- + >>> G = nx.wheel_graph(6) + + When nbunch2=None: + + >>> list(nx.node_boundary(G, (3, 4))) + [0, 2, 5] + + When nbunch2 is given: + + >>> list(nx.node_boundary(G, (3, 4), (0, 1, 5))) + [0, 5] + + Notes + ----- + Any element of `nbunch` that is not in the graph `G` will be + ignored. + + `nbunch1` and `nbunch2` are usually meant to be disjoint, but in + the interest of speed and generality, that is not required here. + + """ + nset1 = {n for n in nbunch1 if n in G} + bdy = set(chain.from_iterable(G[v] for v in nset1)) - nset1 + # If `nbunch2` is not specified, it is assumed to be the set + # complement of `nbunch1`. + if nbunch2 is not None: + bdy &= set(nbunch2) + return bdy diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/bridges.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/bridges.py new file mode 100644 index 0000000000000000000000000000000000000000..eaa6fd3bd7ef881abf93682315b76dc3b11e40ce --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/bridges.py @@ -0,0 +1,205 @@ +"""Bridge-finding algorithms.""" + +from itertools import chain + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["bridges", "has_bridges", "local_bridges"] + + +@not_implemented_for("directed") +@nx._dispatchable +def bridges(G, root=None): + """Generate all bridges in a graph. + + A *bridge* in a graph is an edge whose removal causes the number of + connected components of the graph to increase. Equivalently, a bridge is an + edge that does not belong to any cycle. Bridges are also known as cut-edges, + isthmuses, or cut arcs. + + Parameters + ---------- + G : undirected graph + + root : node (optional) + A node in the graph `G`. If specified, only the bridges in the + connected component containing this node will be returned. + + Yields + ------ + e : edge + An edge in the graph whose removal disconnects the graph (or + causes the number of connected components to increase). + + Raises + ------ + NodeNotFound + If `root` is not in the graph `G`. + + NetworkXNotImplemented + If `G` is a directed graph. + + Examples + -------- + The barbell graph with parameter zero has a single bridge: + + >>> G = nx.barbell_graph(10, 0) + >>> list(nx.bridges(G)) + [(9, 10)] + + Notes + ----- + This is an implementation of the algorithm described in [1]_. An edge is a + bridge if and only if it is not contained in any chain. Chains are found + using the :func:`networkx.chain_decomposition` function. + + The algorithm described in [1]_ requires a simple graph. If the provided + graph is a multigraph, we convert it to a simple graph and verify that any + bridges discovered by the chain decomposition algorithm are not multi-edges. + + Ignoring polylogarithmic factors, the worst-case time complexity is the + same as the :func:`networkx.chain_decomposition` function, + $O(m + n)$, where $n$ is the number of nodes in the graph and $m$ is + the number of edges. + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Bridge_%28graph_theory%29#Bridge-Finding_with_Chain_Decompositions + """ + multigraph = G.is_multigraph() + H = nx.Graph(G) if multigraph else G + chains = nx.chain_decomposition(H, root=root) + chain_edges = set(chain.from_iterable(chains)) + if root is not None: + H = H.subgraph(nx.node_connected_component(H, root)).copy() + for u, v in H.edges(): + if (u, v) not in chain_edges and (v, u) not in chain_edges: + if multigraph and len(G[u][v]) > 1: + continue + yield u, v + + +@not_implemented_for("directed") +@nx._dispatchable +def has_bridges(G, root=None): + """Decide whether a graph has any bridges. + + A *bridge* in a graph is an edge whose removal causes the number of + connected components of the graph to increase. + + Parameters + ---------- + G : undirected graph + + root : node (optional) + A node in the graph `G`. If specified, only the bridges in the + connected component containing this node will be considered. + + Returns + ------- + bool + Whether the graph (or the connected component containing `root`) + has any bridges. + + Raises + ------ + NodeNotFound + If `root` is not in the graph `G`. + + NetworkXNotImplemented + If `G` is a directed graph. + + Examples + -------- + The barbell graph with parameter zero has a single bridge:: + + >>> G = nx.barbell_graph(10, 0) + >>> nx.has_bridges(G) + True + + On the other hand, the cycle graph has no bridges:: + + >>> G = nx.cycle_graph(5) + >>> nx.has_bridges(G) + False + + Notes + ----- + This implementation uses the :func:`networkx.bridges` function, so + it shares its worst-case time complexity, $O(m + n)$, ignoring + polylogarithmic factors, where $n$ is the number of nodes in the + graph and $m$ is the number of edges. + + """ + try: + next(bridges(G, root=root)) + except StopIteration: + return False + else: + return True + + +@not_implemented_for("multigraph") +@not_implemented_for("directed") +@nx._dispatchable(edge_attrs="weight") +def local_bridges(G, with_span=True, weight=None): + """Iterate over local bridges of `G` optionally computing the span + + A *local bridge* is an edge whose endpoints have no common neighbors. + That is, the edge is not part of a triangle in the graph. + + The *span* of a *local bridge* is the shortest path length between + the endpoints if the local bridge is removed. + + Parameters + ---------- + G : undirected graph + + with_span : bool + If True, yield a 3-tuple `(u, v, span)` + + weight : function, string or None (default: None) + If function, used to compute edge weights for the span. + If string, the edge data attribute used in calculating span. + If None, all edges have weight 1. + + Yields + ------ + e : edge + The local bridges as an edge 2-tuple of nodes `(u, v)` or + as a 3-tuple `(u, v, span)` when `with_span is True`. + + Raises + ------ + NetworkXNotImplemented + If `G` is a directed graph or multigraph. + + Examples + -------- + A cycle graph has every edge a local bridge with span N-1. + + >>> G = nx.cycle_graph(9) + >>> (0, 8, 8) in set(nx.local_bridges(G)) + True + """ + if with_span is not True: + for u, v in G.edges: + if not (set(G[u]) & set(G[v])): + yield u, v + else: + wt = nx.weighted._weight_function(G, weight) + for u, v in G.edges: + if not (set(G[u]) & set(G[v])): + enodes = {u, v} + + def hide_edge(n, nbr, d): + if n not in enodes or nbr not in enodes: + return wt(n, nbr, d) + return None + + try: + span = nx.shortest_path_length(G, u, v, weight=hide_edge) + yield u, v, span + except nx.NetworkXNoPath: + yield u, v, float("inf") diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/chains.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/chains.py new file mode 100644 index 0000000000000000000000000000000000000000..ae342d9c8669acd832a3bdb4fe8eecf3e300464f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/chains.py @@ -0,0 +1,172 @@ +"""Functions for finding chains in a graph.""" + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["chain_decomposition"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def chain_decomposition(G, root=None): + """Returns the chain decomposition of a graph. + + The *chain decomposition* of a graph with respect a depth-first + search tree is a set of cycles or paths derived from the set of + fundamental cycles of the tree in the following manner. Consider + each fundamental cycle with respect to the given tree, represented + as a list of edges beginning with the nontree edge oriented away + from the root of the tree. For each fundamental cycle, if it + overlaps with any previous fundamental cycle, just take the initial + non-overlapping segment, which is a path instead of a cycle. Each + cycle or path is called a *chain*. For more information, see [1]_. + + Parameters + ---------- + G : undirected graph + + root : node (optional) + A node in the graph `G`. If specified, only the chain + decomposition for the connected component containing this node + will be returned. This node indicates the root of the depth-first + search tree. + + Yields + ------ + chain : list + A list of edges representing a chain. There is no guarantee on + the orientation of the edges in each chain (for example, if a + chain includes the edge joining nodes 1 and 2, the chain may + include either (1, 2) or (2, 1)). + + Raises + ------ + NodeNotFound + If `root` is not in the graph `G`. + + Examples + -------- + >>> G = nx.Graph([(0, 1), (1, 4), (3, 4), (3, 5), (4, 5)]) + >>> list(nx.chain_decomposition(G)) + [[(4, 5), (5, 3), (3, 4)]] + + Notes + ----- + The worst-case running time of this implementation is linear in the + number of nodes and number of edges [1]_. + + References + ---------- + .. [1] Jens M. Schmidt (2013). "A simple test on 2-vertex- + and 2-edge-connectivity." *Information Processing Letters*, + 113, 241–244. Elsevier. + + """ + + def _dfs_cycle_forest(G, root=None): + """Builds a directed graph composed of cycles from the given graph. + + `G` is an undirected simple graph. `root` is a node in the graph + from which the depth-first search is started. + + This function returns both the depth-first search cycle graph + (as a :class:`~networkx.DiGraph`) and the list of nodes in + depth-first preorder. The depth-first search cycle graph is a + directed graph whose edges are the edges of `G` oriented toward + the root if the edge is a tree edge and away from the root if + the edge is a non-tree edge. If `root` is not specified, this + performs a depth-first search on each connected component of `G` + and returns a directed forest instead. + + If `root` is not in the graph, this raises :exc:`KeyError`. + + """ + # Create a directed graph from the depth-first search tree with + # root node `root` in which tree edges are directed toward the + # root and nontree edges are directed away from the root. For + # each node with an incident nontree edge, this creates a + # directed cycle starting with the nontree edge and returning to + # that node. + # + # The `parent` node attribute stores the parent of each node in + # the DFS tree. The `nontree` edge attribute indicates whether + # the edge is a tree edge or a nontree edge. + # + # We also store the order of the nodes found in the depth-first + # search in the `nodes` list. + H = nx.DiGraph() + nodes = [] + for u, v, d in nx.dfs_labeled_edges(G, source=root): + if d == "forward": + # `dfs_labeled_edges()` yields (root, root, 'forward') + # if it is beginning the search on a new connected + # component. + if u == v: + H.add_node(v, parent=None) + nodes.append(v) + else: + H.add_node(v, parent=u) + H.add_edge(v, u, nontree=False) + nodes.append(v) + # `dfs_labeled_edges` considers nontree edges in both + # orientations, so we need to not add the edge if it its + # other orientation has been added. + elif d == "nontree" and v not in H[u]: + H.add_edge(v, u, nontree=True) + else: + # Do nothing on 'reverse' edges; we only care about + # forward and nontree edges. + pass + return H, nodes + + def _build_chain(G, u, v, visited): + """Generate the chain starting from the given nontree edge. + + `G` is a DFS cycle graph as constructed by + :func:`_dfs_cycle_graph`. The edge (`u`, `v`) is a nontree edge + that begins a chain. `visited` is a set representing the nodes + in `G` that have already been visited. + + This function yields the edges in an initial segment of the + fundamental cycle of `G` starting with the nontree edge (`u`, + `v`) that includes all the edges up until the first node that + appears in `visited`. The tree edges are given by the 'parent' + node attribute. The `visited` set is updated to add each node in + an edge yielded by this function. + + """ + while v not in visited: + yield u, v + visited.add(v) + u, v = v, G.nodes[v]["parent"] + yield u, v + + # Check if the root is in the graph G. If not, raise NodeNotFound + if root is not None and root not in G: + raise nx.NodeNotFound(f"Root node {root} is not in graph") + + # Create a directed version of H that has the DFS edges directed + # toward the root and the nontree edges directed away from the root + # (in each connected component). + H, nodes = _dfs_cycle_forest(G, root) + + # Visit the nodes again in DFS order. For each node, and for each + # nontree edge leaving that node, compute the fundamental cycle for + # that nontree edge starting with that edge. If the fundamental + # cycle overlaps with any visited nodes, just take the prefix of the + # cycle up to the point of visited nodes. + # + # We repeat this process for each connected component (implicitly, + # since `nodes` already has a list of the nodes grouped by connected + # component). + visited = set() + for u in nodes: + visited.add(u) + # For each nontree edge going out of node u... + edges = ((u, v) for u, v, d in H.out_edges(u, data="nontree") if d) + for u, v in edges: + # Create the cycle or cycle prefix starting with the + # nontree edge. + chain = list(_build_chain(H, u, v, visited)) + yield chain diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/chordal.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/chordal.py new file mode 100644 index 0000000000000000000000000000000000000000..ab71c243f314d02b74eac9a7b0b4e601ed7e484d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/chordal.py @@ -0,0 +1,443 @@ +""" +Algorithms for chordal graphs. + +A graph is chordal if every cycle of length at least 4 has a chord +(an edge joining two nodes not adjacent in the cycle). +https://en.wikipedia.org/wiki/Chordal_graph +""" + +import sys + +import networkx as nx +from networkx.algorithms.components import connected_components +from networkx.utils import arbitrary_element, not_implemented_for + +__all__ = [ + "is_chordal", + "find_induced_nodes", + "chordal_graph_cliques", + "chordal_graph_treewidth", + "NetworkXTreewidthBoundExceeded", + "complete_to_chordal_graph", +] + + +class NetworkXTreewidthBoundExceeded(nx.NetworkXException): + """Exception raised when a treewidth bound has been provided and it has + been exceeded""" + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def is_chordal(G): + """Checks whether G is a chordal graph. + + A graph is chordal if every cycle of length at least 4 has a chord + (an edge joining two nodes not adjacent in the cycle). + + Parameters + ---------- + G : graph + A NetworkX graph. + + Returns + ------- + chordal : bool + True if G is a chordal graph and False otherwise. + + Raises + ------ + NetworkXNotImplemented + The algorithm does not support DiGraph, MultiGraph and MultiDiGraph. + + Examples + -------- + >>> e = [ + ... (1, 2), + ... (1, 3), + ... (2, 3), + ... (2, 4), + ... (3, 4), + ... (3, 5), + ... (3, 6), + ... (4, 5), + ... (4, 6), + ... (5, 6), + ... ] + >>> G = nx.Graph(e) + >>> nx.is_chordal(G) + True + + Notes + ----- + The routine tries to go through every node following maximum cardinality + search. It returns False when it finds that the separator for any node + is not a clique. Based on the algorithms in [1]_. + + Self loops are ignored. + + References + ---------- + .. [1] R. E. Tarjan and M. Yannakakis, Simple linear-time algorithms + to test chordality of graphs, test acyclicity of hypergraphs, and + selectively reduce acyclic hypergraphs, SIAM J. Comput., 13 (1984), + pp. 566–579. + """ + if len(G.nodes) <= 3: + return True + return len(_find_chordality_breaker(G)) == 0 + + +@nx._dispatchable +def find_induced_nodes(G, s, t, treewidth_bound=sys.maxsize): + """Returns the set of induced nodes in the path from s to t. + + Parameters + ---------- + G : graph + A chordal NetworkX graph + s : node + Source node to look for induced nodes + t : node + Destination node to look for induced nodes + treewidth_bound: float + Maximum treewidth acceptable for the graph H. The search + for induced nodes will end as soon as the treewidth_bound is exceeded. + + Returns + ------- + induced_nodes : Set of nodes + The set of induced nodes in the path from s to t in G + + Raises + ------ + NetworkXError + The algorithm does not support DiGraph, MultiGraph and MultiDiGraph. + If the input graph is an instance of one of these classes, a + :exc:`NetworkXError` is raised. + The algorithm can only be applied to chordal graphs. If the input + graph is found to be non-chordal, a :exc:`NetworkXError` is raised. + + Examples + -------- + >>> G = nx.Graph() + >>> G = nx.generators.classic.path_graph(10) + >>> induced_nodes = nx.find_induced_nodes(G, 1, 9, 2) + >>> sorted(induced_nodes) + [1, 2, 3, 4, 5, 6, 7, 8, 9] + + Notes + ----- + G must be a chordal graph and (s,t) an edge that is not in G. + + If a treewidth_bound is provided, the search for induced nodes will end + as soon as the treewidth_bound is exceeded. + + The algorithm is inspired by Algorithm 4 in [1]_. + A formal definition of induced node can also be found on that reference. + + Self Loops are ignored + + References + ---------- + .. [1] Learning Bounded Treewidth Bayesian Networks. + Gal Elidan, Stephen Gould; JMLR, 9(Dec):2699--2731, 2008. + http://jmlr.csail.mit.edu/papers/volume9/elidan08a/elidan08a.pdf + """ + if not is_chordal(G): + raise nx.NetworkXError("Input graph is not chordal.") + + H = nx.Graph(G) + H.add_edge(s, t) + induced_nodes = set() + triplet = _find_chordality_breaker(H, s, treewidth_bound) + while triplet: + (u, v, w) = triplet + induced_nodes.update(triplet) + for n in triplet: + if n != s: + H.add_edge(s, n) + triplet = _find_chordality_breaker(H, s, treewidth_bound) + if induced_nodes: + # Add t and the second node in the induced path from s to t. + induced_nodes.add(t) + for u in G[s]: + if len(induced_nodes & set(G[u])) == 2: + induced_nodes.add(u) + break + return induced_nodes + + +@nx._dispatchable +def chordal_graph_cliques(G): + """Returns all maximal cliques of a chordal graph. + + The algorithm breaks the graph in connected components and performs a + maximum cardinality search in each component to get the cliques. + + Parameters + ---------- + G : graph + A NetworkX graph + + Yields + ------ + frozenset of nodes + Maximal cliques, each of which is a frozenset of + nodes in `G`. The order of cliques is arbitrary. + + Raises + ------ + NetworkXError + The algorithm does not support DiGraph, MultiGraph and MultiDiGraph. + The algorithm can only be applied to chordal graphs. If the input + graph is found to be non-chordal, a :exc:`NetworkXError` is raised. + + Examples + -------- + >>> e = [ + ... (1, 2), + ... (1, 3), + ... (2, 3), + ... (2, 4), + ... (3, 4), + ... (3, 5), + ... (3, 6), + ... (4, 5), + ... (4, 6), + ... (5, 6), + ... (7, 8), + ... ] + >>> G = nx.Graph(e) + >>> G.add_node(9) + >>> cliques = [c for c in chordal_graph_cliques(G)] + >>> cliques[0] + frozenset({1, 2, 3}) + """ + for C in (G.subgraph(c).copy() for c in connected_components(G)): + if C.number_of_nodes() == 1: + if nx.number_of_selfloops(C) > 0: + raise nx.NetworkXError("Input graph is not chordal.") + yield frozenset(C.nodes()) + else: + unnumbered = set(C.nodes()) + v = arbitrary_element(C) + unnumbered.remove(v) + numbered = {v} + clique_wanna_be = {v} + while unnumbered: + v = _max_cardinality_node(C, unnumbered, numbered) + unnumbered.remove(v) + numbered.add(v) + new_clique_wanna_be = set(C.neighbors(v)) & numbered + sg = C.subgraph(clique_wanna_be) + if _is_complete_graph(sg): + new_clique_wanna_be.add(v) + if not new_clique_wanna_be >= clique_wanna_be: + yield frozenset(clique_wanna_be) + clique_wanna_be = new_clique_wanna_be + else: + raise nx.NetworkXError("Input graph is not chordal.") + yield frozenset(clique_wanna_be) + + +@nx._dispatchable +def chordal_graph_treewidth(G): + """Returns the treewidth of the chordal graph G. + + Parameters + ---------- + G : graph + A NetworkX graph + + Returns + ------- + treewidth : int + The size of the largest clique in the graph minus one. + + Raises + ------ + NetworkXError + The algorithm does not support DiGraph, MultiGraph and MultiDiGraph. + The algorithm can only be applied to chordal graphs. If the input + graph is found to be non-chordal, a :exc:`NetworkXError` is raised. + + Examples + -------- + >>> e = [ + ... (1, 2), + ... (1, 3), + ... (2, 3), + ... (2, 4), + ... (3, 4), + ... (3, 5), + ... (3, 6), + ... (4, 5), + ... (4, 6), + ... (5, 6), + ... (7, 8), + ... ] + >>> G = nx.Graph(e) + >>> G.add_node(9) + >>> nx.chordal_graph_treewidth(G) + 3 + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Tree_decomposition#Treewidth + """ + if not is_chordal(G): + raise nx.NetworkXError("Input graph is not chordal.") + + max_clique = -1 + for clique in nx.chordal_graph_cliques(G): + max_clique = max(max_clique, len(clique)) + return max_clique - 1 + + +def _is_complete_graph(G): + """Returns True if G is a complete graph.""" + if nx.number_of_selfloops(G) > 0: + raise nx.NetworkXError("Self loop found in _is_complete_graph()") + n = G.number_of_nodes() + if n < 2: + return True + e = G.number_of_edges() + max_edges = (n * (n - 1)) / 2 + return e == max_edges + + +def _find_missing_edge(G): + """Given a non-complete graph G, returns a missing edge.""" + nodes = set(G) + for u in G: + missing = nodes - set(list(G[u].keys()) + [u]) + if missing: + return (u, missing.pop()) + + +def _max_cardinality_node(G, choices, wanna_connect): + """Returns a the node in choices that has more connections in G + to nodes in wanna_connect. + """ + max_number = -1 + for x in choices: + number = len([y for y in G[x] if y in wanna_connect]) + if number > max_number: + max_number = number + max_cardinality_node = x + return max_cardinality_node + + +def _find_chordality_breaker(G, s=None, treewidth_bound=sys.maxsize): + """Given a graph G, starts a max cardinality search + (starting from s if s is given and from an arbitrary node otherwise) + trying to find a non-chordal cycle. + + If it does find one, it returns (u,v,w) where u,v,w are the three + nodes that together with s are involved in the cycle. + + It ignores any self loops. + """ + if len(G) == 0: + raise nx.NetworkXPointlessConcept("Graph has no nodes.") + unnumbered = set(G) + if s is None: + s = arbitrary_element(G) + unnumbered.remove(s) + numbered = {s} + current_treewidth = -1 + while unnumbered: # and current_treewidth <= treewidth_bound: + v = _max_cardinality_node(G, unnumbered, numbered) + unnumbered.remove(v) + numbered.add(v) + clique_wanna_be = set(G[v]) & numbered + sg = G.subgraph(clique_wanna_be) + if _is_complete_graph(sg): + # The graph seems to be chordal by now. We update the treewidth + current_treewidth = max(current_treewidth, len(clique_wanna_be)) + if current_treewidth > treewidth_bound: + raise nx.NetworkXTreewidthBoundExceeded( + f"treewidth_bound exceeded: {current_treewidth}" + ) + else: + # sg is not a clique, + # look for an edge that is not included in sg + (u, w) = _find_missing_edge(sg) + return (u, v, w) + return () + + +@not_implemented_for("directed") +@nx._dispatchable(returns_graph=True) +def complete_to_chordal_graph(G): + """Return a copy of G completed to a chordal graph + + Adds edges to a copy of G to create a chordal graph. A graph G=(V,E) is + called chordal if for each cycle with length bigger than 3, there exist + two non-adjacent nodes connected by an edge (called a chord). + + Parameters + ---------- + G : NetworkX graph + Undirected graph + + Returns + ------- + H : NetworkX graph + The chordal enhancement of G + alpha : Dictionary + The elimination ordering of nodes of G + + Notes + ----- + There are different approaches to calculate the chordal + enhancement of a graph. The algorithm used here is called + MCS-M and gives at least minimal (local) triangulation of graph. Note + that this triangulation is not necessarily a global minimum. + + https://en.wikipedia.org/wiki/Chordal_graph + + References + ---------- + .. [1] Berry, Anne & Blair, Jean & Heggernes, Pinar & Peyton, Barry. (2004) + Maximum Cardinality Search for Computing Minimal Triangulations of + Graphs. Algorithmica. 39. 287-298. 10.1007/s00453-004-1084-3. + + Examples + -------- + >>> from networkx.algorithms.chordal import complete_to_chordal_graph + >>> G = nx.wheel_graph(10) + >>> H, alpha = complete_to_chordal_graph(G) + """ + H = G.copy() + alpha = {node: 0 for node in H} + if nx.is_chordal(H): + return H, alpha + chords = set() + weight = {node: 0 for node in H.nodes()} + unnumbered_nodes = list(H.nodes()) + for i in range(len(H.nodes()), 0, -1): + # get the node in unnumbered_nodes with the maximum weight + z = max(unnumbered_nodes, key=lambda node: weight[node]) + unnumbered_nodes.remove(z) + alpha[z] = i + update_nodes = [] + for y in unnumbered_nodes: + if G.has_edge(y, z): + update_nodes.append(y) + else: + # y_weight will be bigger than node weights between y and z + y_weight = weight[y] + lower_nodes = [ + node for node in unnumbered_nodes if weight[node] < y_weight + ] + if nx.has_path(H.subgraph(lower_nodes + [z, y]), y, z): + update_nodes.append(y) + chords.add((z, y)) + # during calculation of paths the weights should not be updated + for node in update_nodes: + weight[node] += 1 + H.add_edges_from(chords) + return H, alpha diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/clique.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/clique.py new file mode 100644 index 0000000000000000000000000000000000000000..57b588ae350943636d7c0648c2d1b7d327f0d071 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/clique.py @@ -0,0 +1,755 @@ +"""Functions for finding and manipulating cliques. + +Finding the largest clique in a graph is NP-complete problem, so most of +these algorithms have an exponential running time; for more information, +see the Wikipedia article on the clique problem [1]_. + +.. [1] clique problem:: https://en.wikipedia.org/wiki/Clique_problem + +""" + +from collections import defaultdict, deque +from itertools import chain, combinations, islice + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = [ + "find_cliques", + "find_cliques_recursive", + "make_max_clique_graph", + "make_clique_bipartite", + "node_clique_number", + "number_of_cliques", + "enumerate_all_cliques", + "max_weight_clique", +] + + +@not_implemented_for("directed") +@nx._dispatchable +def enumerate_all_cliques(G): + """Returns all cliques in an undirected graph. + + This function returns an iterator over cliques, each of which is a + list of nodes. The iteration is ordered by cardinality of the + cliques: first all cliques of size one, then all cliques of size + two, etc. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + Returns + ------- + iterator + An iterator over cliques, each of which is a list of nodes in + `G`. The cliques are ordered according to size. + + Notes + ----- + To obtain a list of all cliques, use + `list(enumerate_all_cliques(G))`. However, be aware that in the + worst-case, the length of this list can be exponential in the number + of nodes in the graph (for example, when the graph is the complete + graph). This function avoids storing all cliques in memory by only + keeping current candidate node lists in memory during its search. + + The implementation is adapted from the algorithm by Zhang, et + al. (2005) [1]_ to output all cliques discovered. + + This algorithm ignores self-loops and parallel edges, since cliques + are not conventionally defined with such edges. + + References + ---------- + .. [1] Yun Zhang, Abu-Khzam, F.N., Baldwin, N.E., Chesler, E.J., + Langston, M.A., Samatova, N.F., + "Genome-Scale Computational Approaches to Memory-Intensive + Applications in Systems Biology". + *Supercomputing*, 2005. Proceedings of the ACM/IEEE SC 2005 + Conference, pp. 12, 12--18 Nov. 2005. + . + + """ + index = {} + nbrs = {} + for u in G: + index[u] = len(index) + # Neighbors of u that appear after u in the iteration order of G. + nbrs[u] = {v for v in G[u] if v not in index} + + queue = deque(([u], sorted(nbrs[u], key=index.__getitem__)) for u in G) + # Loop invariants: + # 1. len(base) is nondecreasing. + # 2. (base + cnbrs) is sorted with respect to the iteration order of G. + # 3. cnbrs is a set of common neighbors of nodes in base. + while queue: + base, cnbrs = map(list, queue.popleft()) + yield base + for i, u in enumerate(cnbrs): + # Use generators to reduce memory consumption. + queue.append( + ( + chain(base, [u]), + filter(nbrs[u].__contains__, islice(cnbrs, i + 1, None)), + ) + ) + + +@not_implemented_for("directed") +@nx._dispatchable +def find_cliques(G, nodes=None): + """Returns all maximal cliques in an undirected graph. + + For each node *n*, a *maximal clique for n* is a largest complete + subgraph containing *n*. The largest maximal clique is sometimes + called the *maximum clique*. + + This function returns an iterator over cliques, each of which is a + list of nodes. It is an iterative implementation, so should not + suffer from recursion depth issues. + + This function accepts a list of `nodes` and only the maximal cliques + containing all of these `nodes` are returned. It can considerably speed up + the running time if some specific cliques are desired. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + nodes : list, optional (default=None) + If provided, only yield *maximal cliques* containing all nodes in `nodes`. + If `nodes` isn't a clique itself, a ValueError is raised. + + Returns + ------- + iterator + An iterator over maximal cliques, each of which is a list of + nodes in `G`. If `nodes` is provided, only the maximal cliques + containing all the nodes in `nodes` are returned. The order of + cliques is arbitrary. + + Raises + ------ + ValueError + If `nodes` is not a clique. + + Examples + -------- + >>> from pprint import pprint # For nice dict formatting + >>> G = nx.karate_club_graph() + >>> sum(1 for c in nx.find_cliques(G)) # The number of maximal cliques in G + 36 + >>> max(nx.find_cliques(G), key=len) # The largest maximal clique in G + [0, 1, 2, 3, 13] + + The size of the largest maximal clique is known as the *clique number* of + the graph, which can be found directly with: + + >>> max(len(c) for c in nx.find_cliques(G)) + 5 + + One can also compute the number of maximal cliques in `G` that contain a given + node. The following produces a dictionary keyed by node whose + values are the number of maximal cliques in `G` that contain the node: + + >>> pprint({n: sum(1 for c in nx.find_cliques(G) if n in c) for n in G}) + {0: 13, + 1: 6, + 2: 7, + 3: 3, + 4: 2, + 5: 3, + 6: 3, + 7: 1, + 8: 3, + 9: 2, + 10: 2, + 11: 1, + 12: 1, + 13: 2, + 14: 1, + 15: 1, + 16: 1, + 17: 1, + 18: 1, + 19: 2, + 20: 1, + 21: 1, + 22: 1, + 23: 3, + 24: 2, + 25: 2, + 26: 1, + 27: 3, + 28: 2, + 29: 2, + 30: 2, + 31: 4, + 32: 9, + 33: 14} + + Or, similarly, the maximal cliques in `G` that contain a given node. + For example, the 4 maximal cliques that contain node 31: + + >>> [c for c in nx.find_cliques(G) if 31 in c] + [[0, 31], [33, 32, 31], [33, 28, 31], [24, 25, 31]] + + See Also + -------- + find_cliques_recursive + A recursive version of the same algorithm. + + Notes + ----- + To obtain a list of all maximal cliques, use + `list(find_cliques(G))`. However, be aware that in the worst-case, + the length of this list can be exponential in the number of nodes in + the graph. This function avoids storing all cliques in memory by + only keeping current candidate node lists in memory during its search. + + This implementation is based on the algorithm published by Bron and + Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi + (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It + essentially unrolls the recursion used in the references to avoid + issues of recursion stack depth (for a recursive implementation, see + :func:`find_cliques_recursive`). + + This algorithm ignores self-loops and parallel edges, since cliques + are not conventionally defined with such edges. + + References + ---------- + .. [1] Bron, C. and Kerbosch, J. + "Algorithm 457: finding all cliques of an undirected graph". + *Communications of the ACM* 16, 9 (Sep. 1973), 575--577. + + + .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi, + "The worst-case time complexity for generating all maximal + cliques and computational experiments", + *Theoretical Computer Science*, Volume 363, Issue 1, + Computing and Combinatorics, + 10th Annual International Conference on + Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42 + + + .. [3] F. Cazals, C. Karande, + "A note on the problem of reporting maximal cliques", + *Theoretical Computer Science*, + Volume 407, Issues 1--3, 6 November 2008, Pages 564--568, + + + """ + if len(G) == 0: + return + + adj = {u: {v for v in G[u] if v != u} for u in G} + + # Initialize Q with the given nodes and subg, cand with their nbrs + Q = nodes[:] if nodes is not None else [] + cand = set(G) + for node in Q: + if node not in cand: + raise ValueError(f"The given `nodes` {nodes} do not form a clique") + cand &= adj[node] + + if not cand: + yield Q[:] + return + + subg = cand.copy() + stack = [] + Q.append(None) + + u = max(subg, key=lambda u: len(cand & adj[u])) + ext_u = cand - adj[u] + + try: + while True: + if ext_u: + q = ext_u.pop() + cand.remove(q) + Q[-1] = q + adj_q = adj[q] + subg_q = subg & adj_q + if not subg_q: + yield Q[:] + else: + cand_q = cand & adj_q + if cand_q: + stack.append((subg, cand, ext_u)) + Q.append(None) + subg = subg_q + cand = cand_q + u = max(subg, key=lambda u: len(cand & adj[u])) + ext_u = cand - adj[u] + else: + Q.pop() + subg, cand, ext_u = stack.pop() + except IndexError: + pass + + +# TODO Should this also be not implemented for directed graphs? +@nx._dispatchable +def find_cliques_recursive(G, nodes=None): + """Returns all maximal cliques in a graph. + + For each node *v*, a *maximal clique for v* is a largest complete + subgraph containing *v*. The largest maximal clique is sometimes + called the *maximum clique*. + + This function returns an iterator over cliques, each of which is a + list of nodes. It is a recursive implementation, so may suffer from + recursion depth issues, but is included for pedagogical reasons. + For a non-recursive implementation, see :func:`find_cliques`. + + This function accepts a list of `nodes` and only the maximal cliques + containing all of these `nodes` are returned. It can considerably speed up + the running time if some specific cliques are desired. + + Parameters + ---------- + G : NetworkX graph + + nodes : list, optional (default=None) + If provided, only yield *maximal cliques* containing all nodes in `nodes`. + If `nodes` isn't a clique itself, a ValueError is raised. + + Returns + ------- + iterator + An iterator over maximal cliques, each of which is a list of + nodes in `G`. If `nodes` is provided, only the maximal cliques + containing all the nodes in `nodes` are yielded. The order of + cliques is arbitrary. + + Raises + ------ + ValueError + If `nodes` is not a clique. + + See Also + -------- + find_cliques + An iterative version of the same algorithm. See docstring for examples. + + Notes + ----- + To obtain a list of all maximal cliques, use + `list(find_cliques_recursive(G))`. However, be aware that in the + worst-case, the length of this list can be exponential in the number + of nodes in the graph. This function avoids storing all cliques in memory + by only keeping current candidate node lists in memory during its search. + + This implementation is based on the algorithm published by Bron and + Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi + (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a + non-recursive implementation, see :func:`find_cliques`. + + This algorithm ignores self-loops and parallel edges, since cliques + are not conventionally defined with such edges. + + References + ---------- + .. [1] Bron, C. and Kerbosch, J. + "Algorithm 457: finding all cliques of an undirected graph". + *Communications of the ACM* 16, 9 (Sep. 1973), 575--577. + + + .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi, + "The worst-case time complexity for generating all maximal + cliques and computational experiments", + *Theoretical Computer Science*, Volume 363, Issue 1, + Computing and Combinatorics, + 10th Annual International Conference on + Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42 + + + .. [3] F. Cazals, C. Karande, + "A note on the problem of reporting maximal cliques", + *Theoretical Computer Science*, + Volume 407, Issues 1--3, 6 November 2008, Pages 564--568, + + + """ + if len(G) == 0: + return iter([]) + + adj = {u: {v for v in G[u] if v != u} for u in G} + + # Initialize Q with the given nodes and subg, cand with their nbrs + Q = nodes[:] if nodes is not None else [] + cand_init = set(G) + for node in Q: + if node not in cand_init: + raise ValueError(f"The given `nodes` {nodes} do not form a clique") + cand_init &= adj[node] + + if not cand_init: + return iter([Q]) + + subg_init = cand_init.copy() + + def expand(subg, cand): + u = max(subg, key=lambda u: len(cand & adj[u])) + for q in cand - adj[u]: + cand.remove(q) + Q.append(q) + adj_q = adj[q] + subg_q = subg & adj_q + if not subg_q: + yield Q[:] + else: + cand_q = cand & adj_q + if cand_q: + yield from expand(subg_q, cand_q) + Q.pop() + + return expand(subg_init, cand_init) + + +@nx._dispatchable(returns_graph=True) +def make_max_clique_graph(G, create_using=None): + """Returns the maximal clique graph of the given graph. + + The nodes of the maximal clique graph of `G` are the cliques of + `G` and an edge joins two cliques if the cliques are not disjoint. + + Parameters + ---------- + G : NetworkX graph + + create_using : NetworkX graph constructor, optional (default=nx.Graph) + Graph type to create. If graph instance, then cleared before populated. + + Returns + ------- + NetworkX graph + A graph whose nodes are the cliques of `G` and whose edges + join two cliques if they are not disjoint. + + Notes + ----- + This function behaves like the following code:: + + import networkx as nx + + G = nx.make_clique_bipartite(G) + cliques = [v for v in G.nodes() if G.nodes[v]["bipartite"] == 0] + G = nx.bipartite.projected_graph(G, cliques) + G = nx.relabel_nodes(G, {-v: v - 1 for v in G}) + + It should be faster, though, since it skips all the intermediate + steps. + + """ + if create_using is None: + B = G.__class__() + else: + B = nx.empty_graph(0, create_using) + cliques = list(enumerate(set(c) for c in find_cliques(G))) + # Add a numbered node for each clique. + B.add_nodes_from(i for i, c in cliques) + # Join cliques by an edge if they share a node. + clique_pairs = combinations(cliques, 2) + B.add_edges_from((i, j) for (i, c1), (j, c2) in clique_pairs if c1 & c2) + return B + + +@nx._dispatchable(returns_graph=True) +def make_clique_bipartite(G, fpos=None, create_using=None, name=None): + """Returns the bipartite clique graph corresponding to `G`. + + In the returned bipartite graph, the "bottom" nodes are the nodes of + `G` and the "top" nodes represent the maximal cliques of `G`. + There is an edge from node *v* to clique *C* in the returned graph + if and only if *v* is an element of *C*. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + fpos : bool + If True or not None, the returned graph will have an + additional attribute, `pos`, a dictionary mapping node to + position in the Euclidean plane. + + create_using : NetworkX graph constructor, optional (default=nx.Graph) + Graph type to create. If graph instance, then cleared before populated. + + Returns + ------- + NetworkX graph + A bipartite graph whose "bottom" set is the nodes of the graph + `G`, whose "top" set is the cliques of `G`, and whose edges + join nodes of `G` to the cliques that contain them. + + The nodes of the graph `G` have the node attribute + 'bipartite' set to 1 and the nodes representing cliques + have the node attribute 'bipartite' set to 0, as is the + convention for bipartite graphs in NetworkX. + + """ + B = nx.empty_graph(0, create_using) + B.clear() + # The "bottom" nodes in the bipartite graph are the nodes of the + # original graph, G. + B.add_nodes_from(G, bipartite=1) + for i, cl in enumerate(find_cliques(G)): + # The "top" nodes in the bipartite graph are the cliques. These + # nodes get negative numbers as labels. + name = -i - 1 + B.add_node(name, bipartite=0) + B.add_edges_from((v, name) for v in cl) + return B + + +@nx._dispatchable +def node_clique_number(G, nodes=None, cliques=None, separate_nodes=False): + """Returns the size of the largest maximal clique containing each given node. + + Returns a single or list depending on input nodes. + An optional list of cliques can be input if already computed. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + cliques : list, optional (default=None) + A list of cliques, each of which is itself a list of nodes. + If not specified, the list of all cliques will be computed + using :func:`find_cliques`. + + Returns + ------- + int or dict + If `nodes` is a single node, returns the size of the + largest maximal clique in `G` containing that node. + Otherwise return a dict keyed by node to the size + of the largest maximal clique containing that node. + + See Also + -------- + find_cliques + find_cliques yields the maximal cliques of G. + It accepts a `nodes` argument which restricts consideration to + maximal cliques containing all the given `nodes`. + The search for the cliques is optimized for `nodes`. + """ + if cliques is None: + if nodes is not None: + # Use ego_graph to decrease size of graph + # check for single node + if nodes in G: + return max(len(c) for c in find_cliques(nx.ego_graph(G, nodes))) + # handle multiple nodes + return { + n: max(len(c) for c in find_cliques(nx.ego_graph(G, n))) for n in nodes + } + + # nodes is None--find all cliques + cliques = list(find_cliques(G)) + + # single node requested + if nodes in G: + return max(len(c) for c in cliques if nodes in c) + + # multiple nodes requested + # preprocess all nodes (faster than one at a time for even 2 nodes) + size_for_n = defaultdict(int) + for c in cliques: + size_of_c = len(c) + for n in c: + if size_for_n[n] < size_of_c: + size_for_n[n] = size_of_c + if nodes is None: + return size_for_n + return {n: size_for_n[n] for n in nodes} + + +def number_of_cliques(G, nodes=None, cliques=None): + """Returns the number of maximal cliques for each node. + + Returns a single or list depending on input nodes. + Optional list of cliques can be input if already computed. + """ + if cliques is None: + cliques = list(find_cliques(G)) + + if nodes is None: + nodes = list(G.nodes()) # none, get entire graph + + if not isinstance(nodes, list): # check for a list + v = nodes + # assume it is a single value + numcliq = len([1 for c in cliques if v in c]) + else: + numcliq = {} + for v in nodes: + numcliq[v] = len([1 for c in cliques if v in c]) + return numcliq + + +class MaxWeightClique: + """A class for the maximum weight clique algorithm. + + This class is a helper for the `max_weight_clique` function. The class + should not normally be used directly. + + Parameters + ---------- + G : NetworkX graph + The undirected graph for which a maximum weight clique is sought + weight : string or None, optional (default='weight') + The node attribute that holds the integer value used as a weight. + If None, then each node has weight 1. + + Attributes + ---------- + G : NetworkX graph + The undirected graph for which a maximum weight clique is sought + node_weights: dict + The weight of each node + incumbent_nodes : list + The nodes of the incumbent clique (the best clique found so far) + incumbent_weight: int + The weight of the incumbent clique + """ + + def __init__(self, G, weight): + self.G = G + self.incumbent_nodes = [] + self.incumbent_weight = 0 + + if weight is None: + self.node_weights = {v: 1 for v in G.nodes()} + else: + for v in G.nodes(): + if weight not in G.nodes[v]: + errmsg = f"Node {v!r} does not have the requested weight field." + raise KeyError(errmsg) + if not isinstance(G.nodes[v][weight], int): + errmsg = f"The {weight!r} field of node {v!r} is not an integer." + raise ValueError(errmsg) + self.node_weights = {v: G.nodes[v][weight] for v in G.nodes()} + + def update_incumbent_if_improved(self, C, C_weight): + """Update the incumbent if the node set C has greater weight. + + C is assumed to be a clique. + """ + if C_weight > self.incumbent_weight: + self.incumbent_nodes = C[:] + self.incumbent_weight = C_weight + + def greedily_find_independent_set(self, P): + """Greedily find an independent set of nodes from a set of + nodes P.""" + independent_set = [] + P = P[:] + while P: + v = P[0] + independent_set.append(v) + P = [w for w in P if v != w and not self.G.has_edge(v, w)] + return independent_set + + def find_branching_nodes(self, P, target): + """Find a set of nodes to branch on.""" + residual_wt = {v: self.node_weights[v] for v in P} + total_wt = 0 + P = P[:] + while P: + independent_set = self.greedily_find_independent_set(P) + min_wt_in_class = min(residual_wt[v] for v in independent_set) + total_wt += min_wt_in_class + if total_wt > target: + break + for v in independent_set: + residual_wt[v] -= min_wt_in_class + P = [v for v in P if residual_wt[v] != 0] + return P + + def expand(self, C, C_weight, P): + """Look for the best clique that contains all the nodes in C and zero or + more of the nodes in P, backtracking if it can be shown that no such + clique has greater weight than the incumbent. + """ + self.update_incumbent_if_improved(C, C_weight) + branching_nodes = self.find_branching_nodes(P, self.incumbent_weight - C_weight) + while branching_nodes: + v = branching_nodes.pop() + P.remove(v) + new_C = C + [v] + new_C_weight = C_weight + self.node_weights[v] + new_P = [w for w in P if self.G.has_edge(v, w)] + self.expand(new_C, new_C_weight, new_P) + + def find_max_weight_clique(self): + """Find a maximum weight clique.""" + # Sort nodes in reverse order of degree for speed + nodes = sorted(self.G.nodes(), key=lambda v: self.G.degree(v), reverse=True) + nodes = [v for v in nodes if self.node_weights[v] > 0] + self.expand([], 0, nodes) + + +@not_implemented_for("directed") +@nx._dispatchable(node_attrs="weight") +def max_weight_clique(G, weight="weight"): + """Find a maximum weight clique in G. + + A *clique* in a graph is a set of nodes such that every two distinct nodes + are adjacent. The *weight* of a clique is the sum of the weights of its + nodes. A *maximum weight clique* of graph G is a clique C in G such that + no clique in G has weight greater than the weight of C. + + Parameters + ---------- + G : NetworkX graph + Undirected graph + weight : string or None, optional (default='weight') + The node attribute that holds the integer value used as a weight. + If None, then each node has weight 1. + + Returns + ------- + clique : list + the nodes of a maximum weight clique + weight : int + the weight of a maximum weight clique + + Notes + ----- + The implementation is recursive, and therefore it may run into recursion + depth issues if G contains a clique whose number of nodes is close to the + recursion depth limit. + + At each search node, the algorithm greedily constructs a weighted + independent set cover of part of the graph in order to find a small set of + nodes on which to branch. The algorithm is very similar to the algorithm + of Tavares et al. [1]_, other than the fact that the NetworkX version does + not use bitsets. This style of algorithm for maximum weight clique (and + maximum weight independent set, which is the same problem but on the + complement graph) has a decades-long history. See Algorithm B of Warren + and Hicks [2]_ and the references in that paper. + + References + ---------- + .. [1] Tavares, W.A., Neto, M.B.C., Rodrigues, C.D., Michelon, P.: Um + algoritmo de branch and bound para o problema da clique máxima + ponderada. Proceedings of XLVII SBPO 1 (2015). + + .. [2] Warren, Jeffrey S, Hicks, Illya V.: Combinatorial Branch-and-Bound + for the Maximum Weight Independent Set Problem. Technical Report, + Texas A&M University (2016). + """ + + mwc = MaxWeightClique(G, weight) + mwc.find_max_weight_clique() + return mwc.incumbent_nodes, mwc.incumbent_weight diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/cluster.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/cluster.py new file mode 100644 index 0000000000000000000000000000000000000000..6c91ad28135059fb47b6b65373d4489d038f9eae --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/cluster.py @@ -0,0 +1,609 @@ +"""Algorithms to characterize the number of triangles in a graph.""" + +from collections import Counter +from itertools import chain, combinations + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = [ + "triangles", + "average_clustering", + "clustering", + "transitivity", + "square_clustering", + "generalized_degree", +] + + +@not_implemented_for("directed") +@nx._dispatchable +def triangles(G, nodes=None): + """Compute the number of triangles. + + Finds the number of triangles that include a node as one vertex. + + Parameters + ---------- + G : graph + A networkx graph + + nodes : node, iterable of nodes, or None (default=None) + If a singleton node, return the number of triangles for that node. + If an iterable, compute the number of triangles for each of those nodes. + If `None` (the default) compute the number of triangles for all nodes in `G`. + + Returns + ------- + out : dict or int + If `nodes` is a container of nodes, returns number of triangles keyed by node (dict). + If `nodes` is a specific node, returns number of triangles for the node (int). + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> print(nx.triangles(G, 0)) + 6 + >>> print(nx.triangles(G)) + {0: 6, 1: 6, 2: 6, 3: 6, 4: 6} + >>> print(list(nx.triangles(G, [0, 1]).values())) + [6, 6] + + Notes + ----- + Self loops are ignored. + + """ + if nodes is not None: + # If `nodes` represents a single node, return only its number of triangles + if nodes in G: + return next(_triangles_and_degree_iter(G, nodes))[2] // 2 + + # if `nodes` is a container of nodes, then return a + # dictionary mapping node to number of triangles. + return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)} + + # if nodes is None, then compute triangles for the complete graph + + # dict used to avoid visiting the same nodes twice + # this allows calculating/counting each triangle only once + later_nbrs = {} + + # iterate over the nodes in a graph + for node, neighbors in G.adjacency(): + later_nbrs[node] = {n for n in neighbors if n not in later_nbrs and n != node} + + # instantiate Counter for each node to include isolated nodes + # add 1 to the count if a nodes neighbor's neighbor is also a neighbor + triangle_counts = Counter(dict.fromkeys(G, 0)) + for node1, neighbors in later_nbrs.items(): + for node2 in neighbors: + third_nodes = neighbors & later_nbrs[node2] + m = len(third_nodes) + triangle_counts[node1] += m + triangle_counts[node2] += m + triangle_counts.update(third_nodes) + + return dict(triangle_counts) + + +@not_implemented_for("multigraph") +def _triangles_and_degree_iter(G, nodes=None): + """Return an iterator of (node, degree, triangles, generalized degree). + + This double counts triangles so you may want to divide by 2. + See degree(), triangles() and generalized_degree() for definitions + and details. + + """ + if nodes is None: + nodes_nbrs = G.adj.items() + else: + nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes)) + + for v, v_nbrs in nodes_nbrs: + vs = set(v_nbrs) - {v} + gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs) + ntriangles = sum(k * val for k, val in gen_degree.items()) + yield (v, len(vs), ntriangles, gen_degree) + + +@not_implemented_for("multigraph") +def _weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"): + """Return an iterator of (node, degree, weighted_triangles). + + Used for weighted clustering. + Note: this returns the geometric average weight of edges in the triangle. + Also, each triangle is counted twice (each direction). + So you may want to divide by 2. + + """ + import numpy as np + + if weight is None or G.number_of_edges() == 0: + max_weight = 1 + else: + max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True)) + if nodes is None: + nodes_nbrs = G.adj.items() + else: + nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes)) + + def wt(u, v): + return G[u][v].get(weight, 1) / max_weight + + for i, nbrs in nodes_nbrs: + inbrs = set(nbrs) - {i} + weighted_triangles = 0 + seen = set() + for j in inbrs: + seen.add(j) + # This avoids counting twice -- we double at the end. + jnbrs = set(G[j]) - seen + # Only compute the edge weight once, before the inner inner + # loop. + wij = wt(i, j) + weighted_triangles += np.cbrt( + [(wij * wt(j, k) * wt(k, i)) for k in inbrs & jnbrs] + ).sum() + yield (i, len(inbrs), 2 * float(weighted_triangles)) + + +@not_implemented_for("multigraph") +def _directed_triangles_and_degree_iter(G, nodes=None): + """Return an iterator of + (node, total_degree, reciprocal_degree, directed_triangles). + + Used for directed clustering. + Note that unlike `_triangles_and_degree_iter()`, this function counts + directed triangles so does not count triangles twice. + + """ + nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes)) + + for i, preds, succs in nodes_nbrs: + ipreds = set(preds) - {i} + isuccs = set(succs) - {i} + + directed_triangles = 0 + for j in chain(ipreds, isuccs): + jpreds = set(G._pred[j]) - {j} + jsuccs = set(G._succ[j]) - {j} + directed_triangles += sum( + 1 + for k in chain( + (ipreds & jpreds), + (ipreds & jsuccs), + (isuccs & jpreds), + (isuccs & jsuccs), + ) + ) + dtotal = len(ipreds) + len(isuccs) + dbidirectional = len(ipreds & isuccs) + yield (i, dtotal, dbidirectional, directed_triangles) + + +@not_implemented_for("multigraph") +def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"): + """Return an iterator of + (node, total_degree, reciprocal_degree, directed_weighted_triangles). + + Used for directed weighted clustering. + Note that unlike `_weighted_triangles_and_degree_iter()`, this function counts + directed triangles so does not count triangles twice. + + """ + import numpy as np + + if weight is None or G.number_of_edges() == 0: + max_weight = 1 + else: + max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True)) + + nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes)) + + def wt(u, v): + return G[u][v].get(weight, 1) / max_weight + + for i, preds, succs in nodes_nbrs: + ipreds = set(preds) - {i} + isuccs = set(succs) - {i} + + directed_triangles = 0 + for j in ipreds: + jpreds = set(G._pred[j]) - {j} + jsuccs = set(G._succ[j]) - {j} + directed_triangles += np.cbrt( + [(wt(j, i) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds] + ).sum() + directed_triangles += np.cbrt( + [(wt(j, i) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs] + ).sum() + directed_triangles += np.cbrt( + [(wt(j, i) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds] + ).sum() + directed_triangles += np.cbrt( + [(wt(j, i) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs] + ).sum() + + for j in isuccs: + jpreds = set(G._pred[j]) - {j} + jsuccs = set(G._succ[j]) - {j} + directed_triangles += np.cbrt( + [(wt(i, j) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds] + ).sum() + directed_triangles += np.cbrt( + [(wt(i, j) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs] + ).sum() + directed_triangles += np.cbrt( + [(wt(i, j) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds] + ).sum() + directed_triangles += np.cbrt( + [(wt(i, j) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs] + ).sum() + + dtotal = len(ipreds) + len(isuccs) + dbidirectional = len(ipreds & isuccs) + yield (i, dtotal, dbidirectional, float(directed_triangles)) + + +@nx._dispatchable(edge_attrs="weight") +def average_clustering(G, nodes=None, weight=None, count_zeros=True): + r"""Compute the average clustering coefficient for the graph G. + + The clustering coefficient for the graph is the average, + + .. math:: + + C = \frac{1}{n}\sum_{v \in G} c_v, + + where :math:`n` is the number of nodes in `G`. + + Parameters + ---------- + G : graph + + nodes : container of nodes, optional (default=all nodes in G) + Compute average clustering for nodes in this container. + + weight : string or None, optional (default=None) + The edge attribute that holds the numerical value used as a weight. + If None, then each edge has weight 1. + + count_zeros : bool + If False include only the nodes with nonzero clustering in the average. + + Returns + ------- + avg : float + Average clustering + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> print(nx.average_clustering(G)) + 1.0 + + Notes + ----- + This is a space saving routine; it might be faster + to use the clustering function to get a list and then take the average. + + Self loops are ignored. + + References + ---------- + .. [1] Generalizations of the clustering coefficient to weighted + complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, + K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). + http://jponnela.com/web_documents/a9.pdf + .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated + nodes and leafs on clustering measures for small-world networks. + https://arxiv.org/abs/0802.2512 + """ + c = clustering(G, nodes, weight=weight).values() + if not count_zeros: + c = [v for v in c if abs(v) > 0] + return sum(c) / len(c) + + +@nx._dispatchable(edge_attrs="weight") +def clustering(G, nodes=None, weight=None): + r"""Compute the clustering coefficient for nodes. + + For unweighted graphs, the clustering of a node :math:`u` + is the fraction of possible triangles through that node that exist, + + .. math:: + + c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)}, + + where :math:`T(u)` is the number of triangles through node :math:`u` and + :math:`deg(u)` is the degree of :math:`u`. + + For weighted graphs, there are several ways to define clustering [1]_. + the one used here is defined + as the geometric average of the subgraph edge weights [2]_, + + .. math:: + + c_u = \frac{1}{deg(u)(deg(u)-1))} + \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}. + + The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight + in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`. + + The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`. + + Additionally, this weighted definition has been generalized to support negative edge weights [3]_. + + For directed graphs, the clustering is similarly defined as the fraction + of all possible directed triangles or geometric average of the subgraph + edge weights for unweighted and weighted directed graph respectively [4]_. + + .. math:: + + c_u = \frac{T(u)}{2(deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u))}, + + where :math:`T(u)` is the number of directed triangles through node + :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of + :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of + :math:`u`. + + + Parameters + ---------- + G : graph + + nodes : node, iterable of nodes, or None (default=None) + If a singleton node, return the number of triangles for that node. + If an iterable, compute the number of triangles for each of those nodes. + If `None` (the default) compute the number of triangles for all nodes in `G`. + + weight : string or None, optional (default=None) + The edge attribute that holds the numerical value used as a weight. + If None, then each edge has weight 1. + + Returns + ------- + out : float, or dictionary + Clustering coefficient at specified nodes + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> print(nx.clustering(G, 0)) + 1.0 + >>> print(nx.clustering(G)) + {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} + + Notes + ----- + Self loops are ignored. + + References + ---------- + .. [1] Generalizations of the clustering coefficient to weighted + complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, + K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). + http://jponnela.com/web_documents/a9.pdf + .. [2] Intensity and coherence of motifs in weighted complex + networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski, + Physical Review E, 71(6), 065103 (2005). + .. [3] Generalization of Clustering Coefficients to Signed Correlation Networks + by G. Costantini and M. Perugini, PloS one, 9(2), e88669 (2014). + .. [4] Clustering in complex directed networks by G. Fagiolo, + Physical Review E, 76(2), 026107 (2007). + """ + if G.is_directed(): + if weight is not None: + td_iter = _directed_weighted_triangles_and_degree_iter(G, nodes, weight) + clusterc = { + v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) + for v, dt, db, t in td_iter + } + else: + td_iter = _directed_triangles_and_degree_iter(G, nodes) + clusterc = { + v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) + for v, dt, db, t in td_iter + } + else: + # The formula 2*T/(d*(d-1)) from docs is t/(d*(d-1)) here b/c t==2*T + if weight is not None: + td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight) + clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter} + else: + td_iter = _triangles_and_degree_iter(G, nodes) + clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter} + if nodes in G: + # Return the value of the sole entry in the dictionary. + return clusterc[nodes] + return clusterc + + +@nx._dispatchable +def transitivity(G): + r"""Compute graph transitivity, the fraction of all possible triangles + present in G. + + Possible triangles are identified by the number of "triads" + (two edges with a shared vertex). + + The transitivity is + + .. math:: + + T = 3\frac{\#triangles}{\#triads}. + + Parameters + ---------- + G : graph + + Returns + ------- + out : float + Transitivity + + Notes + ----- + Self loops are ignored. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> print(nx.transitivity(G)) + 1.0 + """ + triangles_contri = [ + (t, d * (d - 1)) for v, d, t, _ in _triangles_and_degree_iter(G) + ] + # If the graph is empty + if len(triangles_contri) == 0: + return 0 + triangles, contri = map(sum, zip(*triangles_contri)) + return 0 if triangles == 0 else triangles / contri + + +@nx._dispatchable +def square_clustering(G, nodes=None): + r"""Compute the squares clustering coefficient for nodes. + + For each node return the fraction of possible squares that exist at + the node [1]_ + + .. math:: + C_4(v) = \frac{ \sum_{u=1}^{k_v} + \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v} + \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]}, + + where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and + :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u - + (1+q_v(u,w)+\theta_{uv})) + (k_w - (1+q_v(u,w)+\theta_{uw}))`, where + :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0 + otherwise. [2]_ + + Parameters + ---------- + G : graph + + nodes : container of nodes, optional (default=all nodes in G) + Compute clustering for nodes in this container. + + Returns + ------- + c4 : dictionary + A dictionary keyed by node with the square clustering coefficient value. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> print(nx.square_clustering(G, 0)) + 1.0 + >>> print(nx.square_clustering(G)) + {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} + + Notes + ----- + While :math:`C_3(v)` (triangle clustering) gives the probability that + two neighbors of node v are connected with each other, :math:`C_4(v)` is + the probability that two neighbors of node v share a common + neighbor different from v. This algorithm can be applied to both + bipartite and unipartite networks. + + References + ---------- + .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005 + Cycles and clustering in bipartite networks. + Physical Review E (72) 056127. + .. [2] Zhang, Peng et al. Clustering Coefficient and Community Structure of + Bipartite Networks. Physica A: Statistical Mechanics and its Applications 387.27 (2008): 6869–6875. + https://arxiv.org/abs/0710.0117v1 + """ + if nodes is None: + node_iter = G + else: + node_iter = G.nbunch_iter(nodes) + clustering = {} + for v in node_iter: + clustering[v] = 0 + potential = 0 + for u, w in combinations(G[v], 2): + squares = len((set(G[u]) & set(G[w])) - {v}) + clustering[v] += squares + degm = squares + 1 + if w in G[u]: + degm += 1 + potential += (len(G[u]) - degm) + (len(G[w]) - degm) + squares + if potential > 0: + clustering[v] /= potential + if nodes in G: + # Return the value of the sole entry in the dictionary. + return clustering[nodes] + return clustering + + +@not_implemented_for("directed") +@nx._dispatchable +def generalized_degree(G, nodes=None): + r"""Compute the generalized degree for nodes. + + For each node, the generalized degree shows how many edges of given + triangle multiplicity the node is connected to. The triangle multiplicity + of an edge is the number of triangles an edge participates in. The + generalized degree of node :math:`i` can be written as a vector + :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where + :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that + participate in :math:`j` triangles. + + Parameters + ---------- + G : graph + + nodes : container of nodes, optional (default=all nodes in G) + Compute the generalized degree for nodes in this container. + + Returns + ------- + out : Counter, or dictionary of Counters + Generalized degree of specified nodes. The Counter is keyed by edge + triangle multiplicity. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> print(nx.generalized_degree(G, 0)) + Counter({3: 4}) + >>> print(nx.generalized_degree(G)) + {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})} + + To recover the number of triangles attached to a node: + + >>> k1 = nx.generalized_degree(G, 0) + >>> sum([k * v for k, v in k1.items()]) / 2 == nx.triangles(G, 0) + True + + Notes + ----- + Self loops are ignored. + + In a network of N nodes, the highest triangle multiplicity an edge can have + is N-2. + + The return value does not include a `zero` entry if no edges of a + particular triangle multiplicity are present. + + The number of triangles node :math:`i` is attached to can be recovered from + the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, + k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`. + + References + ---------- + .. [1] Networks with arbitrary edge multiplicities by V. Zlatić, + D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters), + Volume 97, Number 2 (2012). + https://iopscience.iop.org/article/10.1209/0295-5075/97/28005 + """ + if nodes in G: + return next(_triangles_and_degree_iter(G, nodes))[3] + return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)} diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/communicability_alg.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/communicability_alg.py new file mode 100644 index 0000000000000000000000000000000000000000..dea156b633a2b367c184f4bf31ab465812de68b4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/communicability_alg.py @@ -0,0 +1,163 @@ +""" +Communicability. +""" + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["communicability", "communicability_exp"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def communicability(G): + r"""Returns communicability between all pairs of nodes in G. + + The communicability between pairs of nodes in G is the sum of + walks of different lengths starting at node u and ending at node v. + + Parameters + ---------- + G: graph + + Returns + ------- + comm: dictionary of dictionaries + Dictionary of dictionaries keyed by nodes with communicability + as the value. + + Raises + ------ + NetworkXError + If the graph is not undirected and simple. + + See Also + -------- + communicability_exp: + Communicability between all pairs of nodes in G using spectral + decomposition. + communicability_betweenness_centrality: + Communicability betweenness centrality for each node in G. + + Notes + ----- + This algorithm uses a spectral decomposition of the adjacency matrix. + Let G=(V,E) be a simple undirected graph. Using the connection between + the powers of the adjacency matrix and the number of walks in the graph, + the communicability between nodes `u` and `v` based on the graph spectrum + is [1]_ + + .. math:: + C(u,v)=\sum_{j=1}^{n}\phi_{j}(u)\phi_{j}(v)e^{\lambda_{j}}, + + where `\phi_{j}(u)` is the `u\rm{th}` element of the `j\rm{th}` orthonormal + eigenvector of the adjacency matrix associated with the eigenvalue + `\lambda_{j}`. + + References + ---------- + .. [1] Ernesto Estrada, Naomichi Hatano, + "Communicability in complex networks", + Phys. Rev. E 77, 036111 (2008). + https://arxiv.org/abs/0707.0756 + + Examples + -------- + >>> G = nx.Graph([(0, 1), (1, 2), (1, 5), (5, 4), (2, 4), (2, 3), (4, 3), (3, 6)]) + >>> c = nx.communicability(G) + """ + import numpy as np + + nodelist = list(G) # ordering of nodes in matrix + A = nx.to_numpy_array(G, nodelist) + # convert to 0-1 matrix + A[A != 0.0] = 1 + w, vec = np.linalg.eigh(A) + expw = np.exp(w) + mapping = dict(zip(nodelist, range(len(nodelist)))) + c = {} + # computing communicabilities + for u in G: + c[u] = {} + for v in G: + s = 0 + p = mapping[u] + q = mapping[v] + for j in range(len(nodelist)): + s += vec[:, j][p] * vec[:, j][q] * expw[j] + c[u][v] = float(s) + return c + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def communicability_exp(G): + r"""Returns communicability between all pairs of nodes in G. + + Communicability between pair of node (u,v) of node in G is the sum of + walks of different lengths starting at node u and ending at node v. + + Parameters + ---------- + G: graph + + Returns + ------- + comm: dictionary of dictionaries + Dictionary of dictionaries keyed by nodes with communicability + as the value. + + Raises + ------ + NetworkXError + If the graph is not undirected and simple. + + See Also + -------- + communicability: + Communicability between pairs of nodes in G. + communicability_betweenness_centrality: + Communicability betweenness centrality for each node in G. + + Notes + ----- + This algorithm uses matrix exponentiation of the adjacency matrix. + + Let G=(V,E) be a simple undirected graph. Using the connection between + the powers of the adjacency matrix and the number of walks in the graph, + the communicability between nodes u and v is [1]_, + + .. math:: + C(u,v) = (e^A)_{uv}, + + where `A` is the adjacency matrix of G. + + References + ---------- + .. [1] Ernesto Estrada, Naomichi Hatano, + "Communicability in complex networks", + Phys. Rev. E 77, 036111 (2008). + https://arxiv.org/abs/0707.0756 + + Examples + -------- + >>> G = nx.Graph([(0, 1), (1, 2), (1, 5), (5, 4), (2, 4), (2, 3), (4, 3), (3, 6)]) + >>> c = nx.communicability_exp(G) + """ + import scipy as sp + + nodelist = list(G) # ordering of nodes in matrix + A = nx.to_numpy_array(G, nodelist) + # convert to 0-1 matrix + A[A != 0.0] = 1 + # communicability matrix + expA = sp.linalg.expm(A) + mapping = dict(zip(nodelist, range(len(nodelist)))) + c = {} + for u in G: + c[u] = {} + for v in G: + c[u][v] = float(expA[mapping[u], mapping[v]]) + return c diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/core.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/core.py new file mode 100644 index 0000000000000000000000000000000000000000..6acfb49952409818d0cf173dff29a09fb7b3595a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/core.py @@ -0,0 +1,649 @@ +""" +Find the k-cores of a graph. + +The k-core is found by recursively pruning nodes with degrees less than k. + +See the following references for details: + +An O(m) Algorithm for Cores Decomposition of Networks +Vladimir Batagelj and Matjaz Zaversnik, 2003. +https://arxiv.org/abs/cs.DS/0310049 + +Generalized Cores +Vladimir Batagelj and Matjaz Zaversnik, 2002. +https://arxiv.org/pdf/cs/0202039 + +For directed graphs a more general notion is that of D-cores which +looks at (k, l) restrictions on (in, out) degree. The (k, k) D-core +is the k-core. + +D-cores: Measuring Collaboration of Directed Graphs Based on Degeneracy +Christos Giatsidis, Dimitrios M. Thilikos, Michalis Vazirgiannis, ICDM 2011. +http://www.graphdegeneracy.org/dcores_ICDM_2011.pdf + +Multi-scale structure and topological anomaly detection via a new network \ +statistic: The onion decomposition +L. Hébert-Dufresne, J. A. Grochow, and A. Allard +Scientific Reports 6, 31708 (2016) +http://doi.org/10.1038/srep31708 + +""" + +import networkx as nx + +__all__ = [ + "core_number", + "k_core", + "k_shell", + "k_crust", + "k_corona", + "k_truss", + "onion_layers", +] + + +@nx.utils.not_implemented_for("multigraph") +@nx._dispatchable +def core_number(G): + """Returns the core number for each node. + + A k-core is a maximal subgraph that contains nodes of degree k or more. + + The core number of a node is the largest value k of a k-core containing + that node. + + Parameters + ---------- + G : NetworkX graph + An undirected or directed graph + + Returns + ------- + core_number : dictionary + A dictionary keyed by node to the core number. + + Raises + ------ + NetworkXNotImplemented + If `G` is a multigraph or contains self loops. + + Notes + ----- + For directed graphs the node degree is defined to be the + in-degree + out-degree. + + Examples + -------- + >>> degrees = [0, 1, 2, 2, 2, 2, 3] + >>> H = nx.havel_hakimi_graph(degrees) + >>> nx.core_number(H) + {0: 1, 1: 2, 2: 2, 3: 2, 4: 1, 5: 2, 6: 0} + >>> G = nx.DiGraph() + >>> G.add_edges_from([(1, 2), (2, 1), (2, 3), (2, 4), (3, 4), (4, 3)]) + >>> nx.core_number(G) + {1: 2, 2: 2, 3: 2, 4: 2} + + References + ---------- + .. [1] An O(m) Algorithm for Cores Decomposition of Networks + Vladimir Batagelj and Matjaz Zaversnik, 2003. + https://arxiv.org/abs/cs.DS/0310049 + """ + if nx.number_of_selfloops(G) > 0: + msg = ( + "Input graph has self loops which is not permitted; " + "Consider using G.remove_edges_from(nx.selfloop_edges(G))." + ) + raise nx.NetworkXNotImplemented(msg) + degrees = dict(G.degree()) + # Sort nodes by degree. + nodes = sorted(degrees, key=degrees.get) + bin_boundaries = [0] + curr_degree = 0 + for i, v in enumerate(nodes): + if degrees[v] > curr_degree: + bin_boundaries.extend([i] * (degrees[v] - curr_degree)) + curr_degree = degrees[v] + node_pos = {v: pos for pos, v in enumerate(nodes)} + # The initial guess for the core number of a node is its degree. + core = degrees + nbrs = {v: list(nx.all_neighbors(G, v)) for v in G} + for v in nodes: + for u in nbrs[v]: + if core[u] > core[v]: + nbrs[u].remove(v) + pos = node_pos[u] + bin_start = bin_boundaries[core[u]] + node_pos[u] = bin_start + node_pos[nodes[bin_start]] = pos + nodes[bin_start], nodes[pos] = nodes[pos], nodes[bin_start] + bin_boundaries[core[u]] += 1 + core[u] -= 1 + return core + + +def _core_subgraph(G, k_filter, k=None, core=None): + """Returns the subgraph induced by nodes passing filter `k_filter`. + + Parameters + ---------- + G : NetworkX graph + The graph or directed graph to process + k_filter : filter function + This function filters the nodes chosen. It takes three inputs: + A node of G, the filter's cutoff, and the core dict of the graph. + The function should return a Boolean value. + k : int, optional + The order of the core. If not specified use the max core number. + This value is used as the cutoff for the filter. + core : dict, optional + Precomputed core numbers keyed by node for the graph `G`. + If not specified, the core numbers will be computed from `G`. + + """ + if core is None: + core = core_number(G) + if k is None: + k = max(core.values()) + nodes = (v for v in core if k_filter(v, k, core)) + return G.subgraph(nodes).copy() + + +@nx._dispatchable(preserve_all_attrs=True, returns_graph=True) +def k_core(G, k=None, core_number=None): + """Returns the k-core of G. + + A k-core is a maximal subgraph that contains nodes of degree `k` or more. + + .. deprecated:: 3.3 + `k_core` will not accept `MultiGraph` objects in version 3.5. + + Parameters + ---------- + G : NetworkX graph + A graph or directed graph + k : int, optional + The order of the core. If not specified return the main core. + core_number : dictionary, optional + Precomputed core numbers for the graph G. + + Returns + ------- + G : NetworkX graph + The k-core subgraph + + Raises + ------ + NetworkXNotImplemented + The k-core is not defined for multigraphs or graphs with self loops. + + Notes + ----- + The main core is the core with `k` as the largest core_number. + + For directed graphs the node degree is defined to be the + in-degree + out-degree. + + Graph, node, and edge attributes are copied to the subgraph. + + Examples + -------- + >>> degrees = [0, 1, 2, 2, 2, 2, 3] + >>> H = nx.havel_hakimi_graph(degrees) + >>> H.degree + DegreeView({0: 1, 1: 2, 2: 2, 3: 2, 4: 2, 5: 3, 6: 0}) + >>> nx.k_core(H).nodes + NodeView((1, 2, 3, 5)) + + See Also + -------- + core_number + + References + ---------- + .. [1] An O(m) Algorithm for Cores Decomposition of Networks + Vladimir Batagelj and Matjaz Zaversnik, 2003. + https://arxiv.org/abs/cs.DS/0310049 + """ + + import warnings + + if G.is_multigraph(): + warnings.warn( + ( + "\n\n`k_core` will not accept `MultiGraph` objects in version 3.5.\n" + "Convert it to an undirected graph instead, using::\n\n" + "\tG = nx.Graph(G)\n" + ), + category=DeprecationWarning, + stacklevel=5, + ) + + def k_filter(v, k, c): + return c[v] >= k + + return _core_subgraph(G, k_filter, k, core_number) + + +@nx._dispatchable(preserve_all_attrs=True, returns_graph=True) +def k_shell(G, k=None, core_number=None): + """Returns the k-shell of G. + + The k-shell is the subgraph induced by nodes with core number k. + That is, nodes in the k-core that are not in the (k+1)-core. + + .. deprecated:: 3.3 + `k_shell` will not accept `MultiGraph` objects in version 3.5. + + Parameters + ---------- + G : NetworkX graph + A graph or directed graph. + k : int, optional + The order of the shell. If not specified return the outer shell. + core_number : dictionary, optional + Precomputed core numbers for the graph G. + + + Returns + ------- + G : NetworkX graph + The k-shell subgraph + + Raises + ------ + NetworkXNotImplemented + The k-shell is not implemented for multigraphs or graphs with self loops. + + Notes + ----- + This is similar to k_corona but in that case only neighbors in the + k-core are considered. + + For directed graphs the node degree is defined to be the + in-degree + out-degree. + + Graph, node, and edge attributes are copied to the subgraph. + + Examples + -------- + >>> degrees = [0, 1, 2, 2, 2, 2, 3] + >>> H = nx.havel_hakimi_graph(degrees) + >>> H.degree + DegreeView({0: 1, 1: 2, 2: 2, 3: 2, 4: 2, 5: 3, 6: 0}) + >>> nx.k_shell(H, k=1).nodes + NodeView((0, 4)) + + See Also + -------- + core_number + k_corona + + + References + ---------- + .. [1] A model of Internet topology using k-shell decomposition + Shai Carmi, Shlomo Havlin, Scott Kirkpatrick, Yuval Shavitt, + and Eran Shir, PNAS July 3, 2007 vol. 104 no. 27 11150-11154 + http://www.pnas.org/content/104/27/11150.full + """ + + import warnings + + if G.is_multigraph(): + warnings.warn( + ( + "\n\n`k_shell` will not accept `MultiGraph` objects in version 3.5.\n" + "Convert it to an undirected graph instead, using::\n\n" + "\tG = nx.Graph(G)\n" + ), + category=DeprecationWarning, + stacklevel=5, + ) + + def k_filter(v, k, c): + return c[v] == k + + return _core_subgraph(G, k_filter, k, core_number) + + +@nx._dispatchable(preserve_all_attrs=True, returns_graph=True) +def k_crust(G, k=None, core_number=None): + """Returns the k-crust of G. + + The k-crust is the graph G with the edges of the k-core removed + and isolated nodes found after the removal of edges are also removed. + + .. deprecated:: 3.3 + `k_crust` will not accept `MultiGraph` objects in version 3.5. + + Parameters + ---------- + G : NetworkX graph + A graph or directed graph. + k : int, optional + The order of the shell. If not specified return the main crust. + core_number : dictionary, optional + Precomputed core numbers for the graph G. + + Returns + ------- + G : NetworkX graph + The k-crust subgraph + + Raises + ------ + NetworkXNotImplemented + The k-crust is not implemented for multigraphs or graphs with self loops. + + Notes + ----- + This definition of k-crust is different than the definition in [1]_. + The k-crust in [1]_ is equivalent to the k+1 crust of this algorithm. + + For directed graphs the node degree is defined to be the + in-degree + out-degree. + + Graph, node, and edge attributes are copied to the subgraph. + + Examples + -------- + >>> degrees = [0, 1, 2, 2, 2, 2, 3] + >>> H = nx.havel_hakimi_graph(degrees) + >>> H.degree + DegreeView({0: 1, 1: 2, 2: 2, 3: 2, 4: 2, 5: 3, 6: 0}) + >>> nx.k_crust(H, k=1).nodes + NodeView((0, 4, 6)) + + See Also + -------- + core_number + + References + ---------- + .. [1] A model of Internet topology using k-shell decomposition + Shai Carmi, Shlomo Havlin, Scott Kirkpatrick, Yuval Shavitt, + and Eran Shir, PNAS July 3, 2007 vol. 104 no. 27 11150-11154 + http://www.pnas.org/content/104/27/11150.full + """ + + import warnings + + if G.is_multigraph(): + warnings.warn( + ( + "\n\n`k_crust` will not accept `MultiGraph` objects in version 3.5.\n" + "Convert it to an undirected graph instead, using::\n\n" + "\tG = nx.Graph(G)\n" + ), + category=DeprecationWarning, + stacklevel=5, + ) + + # Default for k is one less than in _core_subgraph, so just inline. + # Filter is c[v] <= k + if core_number is None: + core_number = nx.core_number(G) + if k is None: + k = max(core_number.values()) - 1 + nodes = (v for v in core_number if core_number[v] <= k) + return G.subgraph(nodes).copy() + + +@nx._dispatchable(preserve_all_attrs=True, returns_graph=True) +def k_corona(G, k, core_number=None): + """Returns the k-corona of G. + + The k-corona is the subgraph of nodes in the k-core which have + exactly k neighbors in the k-core. + + .. deprecated:: 3.3 + `k_corona` will not accept `MultiGraph` objects in version 3.5. + + Parameters + ---------- + G : NetworkX graph + A graph or directed graph + k : int + The order of the corona. + core_number : dictionary, optional + Precomputed core numbers for the graph G. + + Returns + ------- + G : NetworkX graph + The k-corona subgraph + + Raises + ------ + NetworkXNotImplemented + The k-corona is not defined for multigraphs or graphs with self loops. + + Notes + ----- + For directed graphs the node degree is defined to be the + in-degree + out-degree. + + Graph, node, and edge attributes are copied to the subgraph. + + Examples + -------- + >>> degrees = [0, 1, 2, 2, 2, 2, 3] + >>> H = nx.havel_hakimi_graph(degrees) + >>> H.degree + DegreeView({0: 1, 1: 2, 2: 2, 3: 2, 4: 2, 5: 3, 6: 0}) + >>> nx.k_corona(H, k=2).nodes + NodeView((1, 2, 3, 5)) + + See Also + -------- + core_number + + References + ---------- + .. [1] k -core (bootstrap) percolation on complex networks: + Critical phenomena and nonlocal effects, + A. V. Goltsev, S. N. Dorogovtsev, and J. F. F. Mendes, + Phys. Rev. E 73, 056101 (2006) + http://link.aps.org/doi/10.1103/PhysRevE.73.056101 + """ + + import warnings + + if G.is_multigraph(): + warnings.warn( + ( + "\n\n`k_corona` will not accept `MultiGraph` objects in version 3.5.\n" + "Convert it to an undirected graph instead, using::\n\n" + "\tG = nx.Graph(G)\n" + ), + category=DeprecationWarning, + stacklevel=5, + ) + + def func(v, k, c): + return c[v] == k and k == sum(1 for w in G[v] if c[w] >= k) + + return _core_subgraph(G, func, k, core_number) + + +@nx.utils.not_implemented_for("directed") +@nx.utils.not_implemented_for("multigraph") +@nx._dispatchable(preserve_all_attrs=True, returns_graph=True) +def k_truss(G, k): + """Returns the k-truss of `G`. + + The k-truss is the maximal induced subgraph of `G` which contains at least + three vertices where every edge is incident to at least `k-2` triangles. + + Parameters + ---------- + G : NetworkX graph + An undirected graph + k : int + The order of the truss + + Returns + ------- + H : NetworkX graph + The k-truss subgraph + + Raises + ------ + NetworkXNotImplemented + If `G` is a multigraph or directed graph or if it contains self loops. + + Notes + ----- + A k-clique is a (k-2)-truss and a k-truss is a (k+1)-core. + + Graph, node, and edge attributes are copied to the subgraph. + + K-trusses were originally defined in [2] which states that the k-truss + is the maximal induced subgraph where each edge belongs to at least + `k-2` triangles. A more recent paper, [1], uses a slightly different + definition requiring that each edge belong to at least `k` triangles. + This implementation uses the original definition of `k-2` triangles. + + Examples + -------- + >>> degrees = [0, 1, 2, 2, 2, 2, 3] + >>> H = nx.havel_hakimi_graph(degrees) + >>> H.degree + DegreeView({0: 1, 1: 2, 2: 2, 3: 2, 4: 2, 5: 3, 6: 0}) + >>> nx.k_truss(H, k=2).nodes + NodeView((0, 1, 2, 3, 4, 5)) + + References + ---------- + .. [1] Bounds and Algorithms for k-truss. Paul Burkhardt, Vance Faber, + David G. Harris, 2018. https://arxiv.org/abs/1806.05523v2 + .. [2] Trusses: Cohesive Subgraphs for Social Network Analysis. Jonathan + Cohen, 2005. + """ + if nx.number_of_selfloops(G) > 0: + msg = ( + "Input graph has self loops which is not permitted; " + "Consider using G.remove_edges_from(nx.selfloop_edges(G))." + ) + raise nx.NetworkXNotImplemented(msg) + + H = G.copy() + + n_dropped = 1 + while n_dropped > 0: + n_dropped = 0 + to_drop = [] + seen = set() + for u in H: + nbrs_u = set(H[u]) + seen.add(u) + new_nbrs = [v for v in nbrs_u if v not in seen] + for v in new_nbrs: + if len(nbrs_u & set(H[v])) < (k - 2): + to_drop.append((u, v)) + H.remove_edges_from(to_drop) + n_dropped = len(to_drop) + H.remove_nodes_from(list(nx.isolates(H))) + + return H + + +@nx.utils.not_implemented_for("multigraph") +@nx.utils.not_implemented_for("directed") +@nx._dispatchable +def onion_layers(G): + """Returns the layer of each vertex in an onion decomposition of the graph. + + The onion decomposition refines the k-core decomposition by providing + information on the internal organization of each k-shell. It is usually + used alongside the `core numbers`. + + Parameters + ---------- + G : NetworkX graph + An undirected graph without self loops. + + Returns + ------- + od_layers : dictionary + A dictionary keyed by node to the onion layer. The layers are + contiguous integers starting at 1. + + Raises + ------ + NetworkXNotImplemented + If `G` is a multigraph or directed graph or if it contains self loops. + + Examples + -------- + >>> degrees = [0, 1, 2, 2, 2, 2, 3] + >>> H = nx.havel_hakimi_graph(degrees) + >>> H.degree + DegreeView({0: 1, 1: 2, 2: 2, 3: 2, 4: 2, 5: 3, 6: 0}) + >>> nx.onion_layers(H) + {6: 1, 0: 2, 4: 3, 1: 4, 2: 4, 3: 4, 5: 4} + + See Also + -------- + core_number + + References + ---------- + .. [1] Multi-scale structure and topological anomaly detection via a new + network statistic: The onion decomposition + L. Hébert-Dufresne, J. A. Grochow, and A. Allard + Scientific Reports 6, 31708 (2016) + http://doi.org/10.1038/srep31708 + .. [2] Percolation and the effective structure of complex networks + A. Allard and L. Hébert-Dufresne + Physical Review X 9, 011023 (2019) + http://doi.org/10.1103/PhysRevX.9.011023 + """ + if nx.number_of_selfloops(G) > 0: + msg = ( + "Input graph contains self loops which is not permitted; " + "Consider using G.remove_edges_from(nx.selfloop_edges(G))." + ) + raise nx.NetworkXNotImplemented(msg) + # Dictionaries to register the k-core/onion decompositions. + od_layers = {} + # Adjacency list + neighbors = {v: list(nx.all_neighbors(G, v)) for v in G} + # Effective degree of nodes. + degrees = dict(G.degree()) + # Performs the onion decomposition. + current_core = 1 + current_layer = 1 + # Sets vertices of degree 0 to layer 1, if any. + isolated_nodes = list(nx.isolates(G)) + if len(isolated_nodes) > 0: + for v in isolated_nodes: + od_layers[v] = current_layer + degrees.pop(v) + current_layer = 2 + # Finds the layer for the remaining nodes. + while len(degrees) > 0: + # Sets the order for looking at nodes. + nodes = sorted(degrees, key=degrees.get) + # Sets properly the current core. + min_degree = degrees[nodes[0]] + if min_degree > current_core: + current_core = min_degree + # Identifies vertices in the current layer. + this_layer = [] + for n in nodes: + if degrees[n] > current_core: + break + this_layer.append(n) + # Identifies the core/layer of the vertices in the current layer. + for v in this_layer: + od_layers[v] = current_layer + for n in neighbors[v]: + neighbors[n].remove(v) + degrees[n] = degrees[n] - 1 + degrees.pop(v) + # Updates the layer count. + current_layer = current_layer + 1 + # Returns the dictionaries containing the onion layer of each vertices. + return od_layers diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/covering.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/covering.py new file mode 100644 index 0000000000000000000000000000000000000000..a0e15dd335dcf52d06a5a470239ab47548b2a819 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/covering.py @@ -0,0 +1,142 @@ +"""Functions related to graph covers.""" + +from functools import partial +from itertools import chain + +import networkx as nx +from networkx.utils import arbitrary_element, not_implemented_for + +__all__ = ["min_edge_cover", "is_edge_cover"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def min_edge_cover(G, matching_algorithm=None): + """Returns the min cardinality edge cover of the graph as a set of edges. + + A smallest edge cover can be found in polynomial time by finding + a maximum matching and extending it greedily so that all nodes + are covered. This function follows that process. A maximum matching + algorithm can be specified for the first step of the algorithm. + The resulting set may return a set with one 2-tuple for each edge, + (the usual case) or with both 2-tuples `(u, v)` and `(v, u)` for + each edge. The latter is only done when a bipartite matching algorithm + is specified as `matching_algorithm`. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + matching_algorithm : function + A function that returns a maximum cardinality matching for `G`. + The function must take one input, the graph `G`, and return + either a set of edges (with only one direction for the pair of nodes) + or a dictionary mapping each node to its mate. If not specified, + :func:`~networkx.algorithms.matching.max_weight_matching` is used. + Common bipartite matching functions include + :func:`~networkx.algorithms.bipartite.matching.hopcroft_karp_matching` + or + :func:`~networkx.algorithms.bipartite.matching.eppstein_matching`. + + Returns + ------- + min_cover : set + + A set of the edges in a minimum edge cover in the form of tuples. + It contains only one of the equivalent 2-tuples `(u, v)` and `(v, u)` + for each edge. If a bipartite method is used to compute the matching, + the returned set contains both the 2-tuples `(u, v)` and `(v, u)` + for each edge of a minimum edge cover. + + Examples + -------- + >>> G = nx.Graph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3)]) + >>> sorted(nx.min_edge_cover(G)) + [(2, 1), (3, 0)] + + Notes + ----- + An edge cover of a graph is a set of edges such that every node of + the graph is incident to at least one edge of the set. + The minimum edge cover is an edge covering of smallest cardinality. + + Due to its implementation, the worst-case running time of this algorithm + is bounded by the worst-case running time of the function + ``matching_algorithm``. + + Minimum edge cover for `G` can also be found using the `min_edge_covering` + function in :mod:`networkx.algorithms.bipartite.covering` which is + simply this function with a default matching algorithm of + :func:`~networkx.algorithms.bipartite.matching.hopcraft_karp_matching` + """ + if len(G) == 0: + return set() + if nx.number_of_isolates(G) > 0: + # ``min_cover`` does not exist as there is an isolated node + raise nx.NetworkXException( + "Graph has a node with no edge incident on it, so no edge cover exists." + ) + if matching_algorithm is None: + matching_algorithm = partial(nx.max_weight_matching, maxcardinality=True) + maximum_matching = matching_algorithm(G) + # ``min_cover`` is superset of ``maximum_matching`` + try: + # bipartite matching algs return dict so convert if needed + min_cover = set(maximum_matching.items()) + bipartite_cover = True + except AttributeError: + min_cover = maximum_matching + bipartite_cover = False + # iterate for uncovered nodes + uncovered_nodes = set(G) - {v for u, v in min_cover} - {u for u, v in min_cover} + for v in uncovered_nodes: + # Since `v` is uncovered, each edge incident to `v` will join it + # with a covered node (otherwise, if there were an edge joining + # uncovered nodes `u` and `v`, the maximum matching algorithm + # would have found it), so we can choose an arbitrary edge + # incident to `v`. (This applies only in a simple graph, not a + # multigraph.) + u = arbitrary_element(G[v]) + min_cover.add((u, v)) + if bipartite_cover: + min_cover.add((v, u)) + return min_cover + + +@not_implemented_for("directed") +@nx._dispatchable +def is_edge_cover(G, cover): + """Decides whether a set of edges is a valid edge cover of the graph. + + Given a set of edges, whether it is an edge covering can + be decided if we just check whether all nodes of the graph + has an edge from the set, incident on it. + + Parameters + ---------- + G : NetworkX graph + An undirected bipartite graph. + + cover : set + Set of edges to be checked. + + Returns + ------- + bool + Whether the set of edges is a valid edge cover of the graph. + + Examples + -------- + >>> G = nx.Graph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3)]) + >>> cover = {(2, 1), (3, 0)} + >>> nx.is_edge_cover(G, cover) + True + + Notes + ----- + An edge cover of a graph is a set of edges such that every node of + the graph is incident to at least one edge of the set. + """ + return set(G) <= set(chain.from_iterable(cover)) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/cuts.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/cuts.py new file mode 100644 index 0000000000000000000000000000000000000000..e9514312765c25bfc64041165f8afda84d92e3e2 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/cuts.py @@ -0,0 +1,398 @@ +"""Functions for finding and evaluating cuts in a graph.""" + +from itertools import chain + +import networkx as nx + +__all__ = [ + "boundary_expansion", + "conductance", + "cut_size", + "edge_expansion", + "mixing_expansion", + "node_expansion", + "normalized_cut_size", + "volume", +] + + +# TODO STILL NEED TO UPDATE ALL THE DOCUMENTATION! + + +@nx._dispatchable(edge_attrs="weight") +def cut_size(G, S, T=None, weight=None): + """Returns the size of the cut between two sets of nodes. + + A *cut* is a partition of the nodes of a graph into two sets. The + *cut size* is the sum of the weights of the edges "between" the two + sets of nodes. + + Parameters + ---------- + G : NetworkX graph + + S : collection + A collection of nodes in `G`. + + T : collection + A collection of nodes in `G`. If not specified, this is taken to + be the set complement of `S`. + + weight : object + Edge attribute key to use as weight. If not specified, edges + have weight one. + + Returns + ------- + number + Total weight of all edges from nodes in set `S` to nodes in + set `T` (and, in the case of directed graphs, all edges from + nodes in `T` to nodes in `S`). + + Examples + -------- + In the graph with two cliques joined by a single edges, the natural + bipartition of the graph into two blocks, one for each clique, + yields a cut of weight one:: + + >>> G = nx.barbell_graph(3, 0) + >>> S = {0, 1, 2} + >>> T = {3, 4, 5} + >>> nx.cut_size(G, S, T) + 1 + + Each parallel edge in a multigraph is counted when determining the + cut size:: + + >>> G = nx.MultiGraph(["ab", "ab"]) + >>> S = {"a"} + >>> T = {"b"} + >>> nx.cut_size(G, S, T) + 2 + + Notes + ----- + In a multigraph, the cut size is the total weight of edges including + multiplicity. + + """ + edges = nx.edge_boundary(G, S, T, data=weight, default=1) + if G.is_directed(): + edges = chain(edges, nx.edge_boundary(G, T, S, data=weight, default=1)) + return sum(weight for u, v, weight in edges) + + +@nx._dispatchable(edge_attrs="weight") +def volume(G, S, weight=None): + """Returns the volume of a set of nodes. + + The *volume* of a set *S* is the sum of the (out-)degrees of nodes + in *S* (taking into account parallel edges in multigraphs). [1] + + Parameters + ---------- + G : NetworkX graph + + S : collection + A collection of nodes in `G`. + + weight : object + Edge attribute key to use as weight. If not specified, edges + have weight one. + + Returns + ------- + number + The volume of the set of nodes represented by `S` in the graph + `G`. + + See also + -------- + conductance + cut_size + edge_expansion + edge_boundary + normalized_cut_size + + References + ---------- + .. [1] David Gleich. + *Hierarchical Directed Spectral Graph Partitioning*. + + + """ + degree = G.out_degree if G.is_directed() else G.degree + return sum(d for v, d in degree(S, weight=weight)) + + +@nx._dispatchable(edge_attrs="weight") +def normalized_cut_size(G, S, T=None, weight=None): + """Returns the normalized size of the cut between two sets of nodes. + + The *normalized cut size* is the cut size times the sum of the + reciprocal sizes of the volumes of the two sets. [1] + + Parameters + ---------- + G : NetworkX graph + + S : collection + A collection of nodes in `G`. + + T : collection + A collection of nodes in `G`. + + weight : object + Edge attribute key to use as weight. If not specified, edges + have weight one. + + Returns + ------- + number + The normalized cut size between the two sets `S` and `T`. + + Notes + ----- + In a multigraph, the cut size is the total weight of edges including + multiplicity. + + See also + -------- + conductance + cut_size + edge_expansion + volume + + References + ---------- + .. [1] David Gleich. + *Hierarchical Directed Spectral Graph Partitioning*. + + + """ + if T is None: + T = set(G) - set(S) + num_cut_edges = cut_size(G, S, T=T, weight=weight) + volume_S = volume(G, S, weight=weight) + volume_T = volume(G, T, weight=weight) + return num_cut_edges * ((1 / volume_S) + (1 / volume_T)) + + +@nx._dispatchable(edge_attrs="weight") +def conductance(G, S, T=None, weight=None): + """Returns the conductance of two sets of nodes. + + The *conductance* is the quotient of the cut size and the smaller of + the volumes of the two sets. [1] + + Parameters + ---------- + G : NetworkX graph + + S : collection + A collection of nodes in `G`. + + T : collection + A collection of nodes in `G`. + + weight : object + Edge attribute key to use as weight. If not specified, edges + have weight one. + + Returns + ------- + number + The conductance between the two sets `S` and `T`. + + See also + -------- + cut_size + edge_expansion + normalized_cut_size + volume + + References + ---------- + .. [1] David Gleich. + *Hierarchical Directed Spectral Graph Partitioning*. + + + """ + if T is None: + T = set(G) - set(S) + num_cut_edges = cut_size(G, S, T, weight=weight) + volume_S = volume(G, S, weight=weight) + volume_T = volume(G, T, weight=weight) + return num_cut_edges / min(volume_S, volume_T) + + +@nx._dispatchable(edge_attrs="weight") +def edge_expansion(G, S, T=None, weight=None): + """Returns the edge expansion between two node sets. + + The *edge expansion* is the quotient of the cut size and the smaller + of the cardinalities of the two sets. [1] + + Parameters + ---------- + G : NetworkX graph + + S : collection + A collection of nodes in `G`. + + T : collection + A collection of nodes in `G`. + + weight : object + Edge attribute key to use as weight. If not specified, edges + have weight one. + + Returns + ------- + number + The edge expansion between the two sets `S` and `T`. + + See also + -------- + boundary_expansion + mixing_expansion + node_expansion + + References + ---------- + .. [1] Fan Chung. + *Spectral Graph Theory*. + (CBMS Regional Conference Series in Mathematics, No. 92), + American Mathematical Society, 1997, ISBN 0-8218-0315-8 + + + """ + if T is None: + T = set(G) - set(S) + num_cut_edges = cut_size(G, S, T=T, weight=weight) + return num_cut_edges / min(len(S), len(T)) + + +@nx._dispatchable(edge_attrs="weight") +def mixing_expansion(G, S, T=None, weight=None): + """Returns the mixing expansion between two node sets. + + The *mixing expansion* is the quotient of the cut size and twice the + number of edges in the graph. [1] + + Parameters + ---------- + G : NetworkX graph + + S : collection + A collection of nodes in `G`. + + T : collection + A collection of nodes in `G`. + + weight : object + Edge attribute key to use as weight. If not specified, edges + have weight one. + + Returns + ------- + number + The mixing expansion between the two sets `S` and `T`. + + See also + -------- + boundary_expansion + edge_expansion + node_expansion + + References + ---------- + .. [1] Vadhan, Salil P. + "Pseudorandomness." + *Foundations and Trends + in Theoretical Computer Science* 7.1–3 (2011): 1–336. + + + """ + num_cut_edges = cut_size(G, S, T=T, weight=weight) + num_total_edges = G.number_of_edges() + return num_cut_edges / (2 * num_total_edges) + + +# TODO What is the generalization to two arguments, S and T? Does the +# denominator become `min(len(S), len(T))`? +@nx._dispatchable +def node_expansion(G, S): + """Returns the node expansion of the set `S`. + + The *node expansion* is the quotient of the size of the node + boundary of *S* and the cardinality of *S*. [1] + + Parameters + ---------- + G : NetworkX graph + + S : collection + A collection of nodes in `G`. + + Returns + ------- + number + The node expansion of the set `S`. + + See also + -------- + boundary_expansion + edge_expansion + mixing_expansion + + References + ---------- + .. [1] Vadhan, Salil P. + "Pseudorandomness." + *Foundations and Trends + in Theoretical Computer Science* 7.1–3 (2011): 1–336. + + + """ + neighborhood = set(chain.from_iterable(G.neighbors(v) for v in S)) + return len(neighborhood) / len(S) + + +# TODO What is the generalization to two arguments, S and T? Does the +# denominator become `min(len(S), len(T))`? +@nx._dispatchable +def boundary_expansion(G, S): + """Returns the boundary expansion of the set `S`. + + The *boundary expansion* is the quotient of the size + of the node boundary and the cardinality of *S*. [1] + + Parameters + ---------- + G : NetworkX graph + + S : collection + A collection of nodes in `G`. + + Returns + ------- + number + The boundary expansion of the set `S`. + + See also + -------- + edge_expansion + mixing_expansion + node_expansion + + References + ---------- + .. [1] Vadhan, Salil P. + "Pseudorandomness." + *Foundations and Trends in Theoretical Computer Science* + 7.1–3 (2011): 1–336. + + + """ + return len(nx.node_boundary(G, S)) / len(S) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/cycles.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/cycles.py new file mode 100644 index 0000000000000000000000000000000000000000..975462a73312ad456abbbfaf419295628d02910c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/cycles.py @@ -0,0 +1,1230 @@ +""" +======================== +Cycle finding algorithms +======================== +""" + +from collections import Counter, defaultdict +from itertools import combinations, product +from math import inf + +import networkx as nx +from networkx.utils import not_implemented_for, pairwise + +__all__ = [ + "cycle_basis", + "simple_cycles", + "recursive_simple_cycles", + "find_cycle", + "minimum_cycle_basis", + "chordless_cycles", + "girth", +] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def cycle_basis(G, root=None): + """Returns a list of cycles which form a basis for cycles of G. + + A basis for cycles of a network is a minimal collection of + cycles such that any cycle in the network can be written + as a sum of cycles in the basis. Here summation of cycles + is defined as "exclusive or" of the edges. Cycle bases are + useful, e.g. when deriving equations for electric circuits + using Kirchhoff's Laws. + + Parameters + ---------- + G : NetworkX Graph + root : node, optional + Specify starting node for basis. + + Returns + ------- + A list of cycle lists. Each cycle list is a list of nodes + which forms a cycle (loop) in G. + + Examples + -------- + >>> G = nx.Graph() + >>> nx.add_cycle(G, [0, 1, 2, 3]) + >>> nx.add_cycle(G, [0, 3, 4, 5]) + >>> nx.cycle_basis(G, 0) + [[3, 4, 5, 0], [1, 2, 3, 0]] + + Notes + ----- + This is adapted from algorithm CACM 491 [1]_. + + References + ---------- + .. [1] Paton, K. An algorithm for finding a fundamental set of + cycles of a graph. Comm. ACM 12, 9 (Sept 1969), 514-518. + + See Also + -------- + simple_cycles + minimum_cycle_basis + """ + gnodes = dict.fromkeys(G) # set-like object that maintains node order + cycles = [] + while gnodes: # loop over connected components + if root is None: + root = gnodes.popitem()[0] + stack = [root] + pred = {root: root} + used = {root: set()} + while stack: # walk the spanning tree finding cycles + z = stack.pop() # use last-in so cycles easier to find + zused = used[z] + for nbr in G[z]: + if nbr not in used: # new node + pred[nbr] = z + stack.append(nbr) + used[nbr] = {z} + elif nbr == z: # self loops + cycles.append([z]) + elif nbr not in zused: # found a cycle + pn = used[nbr] + cycle = [nbr, z] + p = pred[z] + while p not in pn: + cycle.append(p) + p = pred[p] + cycle.append(p) + cycles.append(cycle) + used[nbr].add(z) + for node in pred: + gnodes.pop(node, None) + root = None + return cycles + + +@nx._dispatchable +def simple_cycles(G, length_bound=None): + """Find simple cycles (elementary circuits) of a graph. + + A "simple cycle", or "elementary circuit", is a closed path where + no node appears twice. In a directed graph, two simple cycles are distinct + if they are not cyclic permutations of each other. In an undirected graph, + two simple cycles are distinct if they are not cyclic permutations of each + other nor of the other's reversal. + + Optionally, the cycles are bounded in length. In the unbounded case, we use + a nonrecursive, iterator/generator version of Johnson's algorithm [1]_. In + the bounded case, we use a version of the algorithm of Gupta and + Suzumura [2]_. There may be better algorithms for some cases [3]_ [4]_ [5]_. + + The algorithms of Johnson, and Gupta and Suzumura, are enhanced by some + well-known preprocessing techniques. When `G` is directed, we restrict our + attention to strongly connected components of `G`, generate all simple cycles + containing a certain node, remove that node, and further decompose the + remainder into strongly connected components. When `G` is undirected, we + restrict our attention to biconnected components, generate all simple cycles + containing a particular edge, remove that edge, and further decompose the + remainder into biconnected components. + + Note that multigraphs are supported by this function -- and in undirected + multigraphs, a pair of parallel edges is considered a cycle of length 2. + Likewise, self-loops are considered to be cycles of length 1. We define + cycles as sequences of nodes; so the presence of loops and parallel edges + does not change the number of simple cycles in a graph. + + Parameters + ---------- + G : NetworkX Graph + A networkx graph. Undirected, directed, and multigraphs are all supported. + + length_bound : int or None, optional (default=None) + If `length_bound` is an int, generate all simple cycles of `G` with length at + most `length_bound`. Otherwise, generate all simple cycles of `G`. + + Yields + ------ + list of nodes + Each cycle is represented by a list of nodes along the cycle. + + Examples + -------- + >>> G = nx.DiGraph([(0, 0), (0, 1), (0, 2), (1, 2), (2, 0), (2, 1), (2, 2)]) + >>> sorted(nx.simple_cycles(G)) + [[0], [0, 1, 2], [0, 2], [1, 2], [2]] + + To filter the cycles so that they don't include certain nodes or edges, + copy your graph and eliminate those nodes or edges before calling. + For example, to exclude self-loops from the above example: + + >>> H = G.copy() + >>> H.remove_edges_from(nx.selfloop_edges(G)) + >>> sorted(nx.simple_cycles(H)) + [[0, 1, 2], [0, 2], [1, 2]] + + Notes + ----- + When `length_bound` is None, the time complexity is $O((n+e)(c+1))$ for $n$ + nodes, $e$ edges and $c$ simple circuits. Otherwise, when ``length_bound > 1``, + the time complexity is $O((c+n)(k-1)d^k)$ where $d$ is the average degree of + the nodes of `G` and $k$ = `length_bound`. + + Raises + ------ + ValueError + when ``length_bound < 0``. + + References + ---------- + .. [1] Finding all the elementary circuits of a directed graph. + D. B. Johnson, SIAM Journal on Computing 4, no. 1, 77-84, 1975. + https://doi.org/10.1137/0204007 + .. [2] Finding All Bounded-Length Simple Cycles in a Directed Graph + A. Gupta and T. Suzumura https://arxiv.org/abs/2105.10094 + .. [3] Enumerating the cycles of a digraph: a new preprocessing strategy. + G. Loizou and P. Thanish, Information Sciences, v. 27, 163-182, 1982. + .. [4] A search strategy for the elementary cycles of a directed graph. + J.L. Szwarcfiter and P.E. Lauer, BIT NUMERICAL MATHEMATICS, + v. 16, no. 2, 192-204, 1976. + .. [5] Optimal Listing of Cycles and st-Paths in Undirected Graphs + R. Ferreira and R. Grossi and A. Marino and N. Pisanti and R. Rizzi and + G. Sacomoto https://arxiv.org/abs/1205.2766 + + See Also + -------- + cycle_basis + chordless_cycles + """ + + if length_bound is not None: + if length_bound == 0: + return + elif length_bound < 0: + raise ValueError("length bound must be non-negative") + + directed = G.is_directed() + yield from ([v] for v, Gv in G.adj.items() if v in Gv) + + if length_bound is not None and length_bound == 1: + return + + if G.is_multigraph() and not directed: + visited = set() + for u, Gu in G.adj.items(): + multiplicity = ((v, len(Guv)) for v, Guv in Gu.items() if v in visited) + yield from ([u, v] for v, m in multiplicity if m > 1) + visited.add(u) + + # explicitly filter out loops; implicitly filter out parallel edges + if directed: + G = nx.DiGraph((u, v) for u, Gu in G.adj.items() for v in Gu if v != u) + else: + G = nx.Graph((u, v) for u, Gu in G.adj.items() for v in Gu if v != u) + + # this case is not strictly necessary but improves performance + if length_bound is not None and length_bound == 2: + if directed: + visited = set() + for u, Gu in G.adj.items(): + yield from ( + [v, u] for v in visited.intersection(Gu) if G.has_edge(v, u) + ) + visited.add(u) + return + + if directed: + yield from _directed_cycle_search(G, length_bound) + else: + yield from _undirected_cycle_search(G, length_bound) + + +def _directed_cycle_search(G, length_bound): + """A dispatch function for `simple_cycles` for directed graphs. + + We generate all cycles of G through binary partition. + + 1. Pick a node v in G which belongs to at least one cycle + a. Generate all cycles of G which contain the node v. + b. Recursively generate all cycles of G \\ v. + + This is accomplished through the following: + + 1. Compute the strongly connected components SCC of G. + 2. Select and remove a biconnected component C from BCC. Select a + non-tree edge (u, v) of a depth-first search of G[C]. + 3. For each simple cycle P containing v in G[C], yield P. + 4. Add the biconnected components of G[C \\ v] to BCC. + + If the parameter length_bound is not None, then step 3 will be limited to + simple cycles of length at most length_bound. + + Parameters + ---------- + G : NetworkX DiGraph + A directed graph + + length_bound : int or None + If length_bound is an int, generate all simple cycles of G with length at most length_bound. + Otherwise, generate all simple cycles of G. + + Yields + ------ + list of nodes + Each cycle is represented by a list of nodes along the cycle. + """ + + scc = nx.strongly_connected_components + components = [c for c in scc(G) if len(c) >= 2] + while components: + c = components.pop() + Gc = G.subgraph(c) + v = next(iter(c)) + if length_bound is None: + yield from _johnson_cycle_search(Gc, [v]) + else: + yield from _bounded_cycle_search(Gc, [v], length_bound) + # delete v after searching G, to make sure we can find v + G.remove_node(v) + components.extend(c for c in scc(Gc) if len(c) >= 2) + + +def _undirected_cycle_search(G, length_bound): + """A dispatch function for `simple_cycles` for undirected graphs. + + We generate all cycles of G through binary partition. + + 1. Pick an edge (u, v) in G which belongs to at least one cycle + a. Generate all cycles of G which contain the edge (u, v) + b. Recursively generate all cycles of G \\ (u, v) + + This is accomplished through the following: + + 1. Compute the biconnected components BCC of G. + 2. Select and remove a biconnected component C from BCC. Select a + non-tree edge (u, v) of a depth-first search of G[C]. + 3. For each (v -> u) path P remaining in G[C] \\ (u, v), yield P. + 4. Add the biconnected components of G[C] \\ (u, v) to BCC. + + If the parameter length_bound is not None, then step 3 will be limited to simple paths + of length at most length_bound. + + Parameters + ---------- + G : NetworkX Graph + An undirected graph + + length_bound : int or None + If length_bound is an int, generate all simple cycles of G with length at most length_bound. + Otherwise, generate all simple cycles of G. + + Yields + ------ + list of nodes + Each cycle is represented by a list of nodes along the cycle. + """ + + bcc = nx.biconnected_components + components = [c for c in bcc(G) if len(c) >= 3] + while components: + c = components.pop() + Gc = G.subgraph(c) + uv = list(next(iter(Gc.edges))) + G.remove_edge(*uv) + # delete (u, v) before searching G, to avoid fake 3-cycles [u, v, u] + if length_bound is None: + yield from _johnson_cycle_search(Gc, uv) + else: + yield from _bounded_cycle_search(Gc, uv, length_bound) + components.extend(c for c in bcc(Gc) if len(c) >= 3) + + +class _NeighborhoodCache(dict): + """Very lightweight graph wrapper which caches neighborhoods as list. + + This dict subclass uses the __missing__ functionality to query graphs for + their neighborhoods, and store the result as a list. This is used to avoid + the performance penalty incurred by subgraph views. + """ + + def __init__(self, G): + self.G = G + + def __missing__(self, v): + Gv = self[v] = list(self.G[v]) + return Gv + + +def _johnson_cycle_search(G, path): + """The main loop of the cycle-enumeration algorithm of Johnson. + + Parameters + ---------- + G : NetworkX Graph or DiGraph + A graph + + path : list + A cycle prefix. All cycles generated will begin with this prefix. + + Yields + ------ + list of nodes + Each cycle is represented by a list of nodes along the cycle. + + References + ---------- + .. [1] Finding all the elementary circuits of a directed graph. + D. B. Johnson, SIAM Journal on Computing 4, no. 1, 77-84, 1975. + https://doi.org/10.1137/0204007 + + """ + + G = _NeighborhoodCache(G) + blocked = set(path) + B = defaultdict(set) # graph portions that yield no elementary circuit + start = path[0] + stack = [iter(G[path[-1]])] + closed = [False] + while stack: + nbrs = stack[-1] + for w in nbrs: + if w == start: + yield path[:] + closed[-1] = True + elif w not in blocked: + path.append(w) + closed.append(False) + stack.append(iter(G[w])) + blocked.add(w) + break + else: # no more nbrs + stack.pop() + v = path.pop() + if closed.pop(): + if closed: + closed[-1] = True + unblock_stack = {v} + while unblock_stack: + u = unblock_stack.pop() + if u in blocked: + blocked.remove(u) + unblock_stack.update(B[u]) + B[u].clear() + else: + for w in G[v]: + B[w].add(v) + + +def _bounded_cycle_search(G, path, length_bound): + """The main loop of the cycle-enumeration algorithm of Gupta and Suzumura. + + Parameters + ---------- + G : NetworkX Graph or DiGraph + A graph + + path : list + A cycle prefix. All cycles generated will begin with this prefix. + + length_bound: int + A length bound. All cycles generated will have length at most length_bound. + + Yields + ------ + list of nodes + Each cycle is represented by a list of nodes along the cycle. + + References + ---------- + .. [1] Finding All Bounded-Length Simple Cycles in a Directed Graph + A. Gupta and T. Suzumura https://arxiv.org/abs/2105.10094 + + """ + G = _NeighborhoodCache(G) + lock = {v: 0 for v in path} + B = defaultdict(set) + start = path[0] + stack = [iter(G[path[-1]])] + blen = [length_bound] + while stack: + nbrs = stack[-1] + for w in nbrs: + if w == start: + yield path[:] + blen[-1] = 1 + elif len(path) < lock.get(w, length_bound): + path.append(w) + blen.append(length_bound) + lock[w] = len(path) + stack.append(iter(G[w])) + break + else: + stack.pop() + v = path.pop() + bl = blen.pop() + if blen: + blen[-1] = min(blen[-1], bl) + if bl < length_bound: + relax_stack = [(bl, v)] + while relax_stack: + bl, u = relax_stack.pop() + if lock.get(u, length_bound) < length_bound - bl + 1: + lock[u] = length_bound - bl + 1 + relax_stack.extend((bl + 1, w) for w in B[u].difference(path)) + else: + for w in G[v]: + B[w].add(v) + + +@nx._dispatchable +def chordless_cycles(G, length_bound=None): + """Find simple chordless cycles of a graph. + + A `simple cycle` is a closed path where no node appears twice. In a simple + cycle, a `chord` is an additional edge between two nodes in the cycle. A + `chordless cycle` is a simple cycle without chords. Said differently, a + chordless cycle is a cycle C in a graph G where the number of edges in the + induced graph G[C] is equal to the length of `C`. + + Note that some care must be taken in the case that G is not a simple graph + nor a simple digraph. Some authors limit the definition of chordless cycles + to have a prescribed minimum length; we do not. + + 1. We interpret self-loops to be chordless cycles, except in multigraphs + with multiple loops in parallel. Likewise, in a chordless cycle of + length greater than 1, there can be no nodes with self-loops. + + 2. We interpret directed two-cycles to be chordless cycles, except in + multi-digraphs when any edge in a two-cycle has a parallel copy. + + 3. We interpret parallel pairs of undirected edges as two-cycles, except + when a third (or more) parallel edge exists between the two nodes. + + 4. Generalizing the above, edges with parallel clones may not occur in + chordless cycles. + + In a directed graph, two chordless cycles are distinct if they are not + cyclic permutations of each other. In an undirected graph, two chordless + cycles are distinct if they are not cyclic permutations of each other nor of + the other's reversal. + + Optionally, the cycles are bounded in length. + + We use an algorithm strongly inspired by that of Dias et al [1]_. It has + been modified in the following ways: + + 1. Recursion is avoided, per Python's limitations + + 2. The labeling function is not necessary, because the starting paths + are chosen (and deleted from the host graph) to prevent multiple + occurrences of the same path + + 3. The search is optionally bounded at a specified length + + 4. Support for directed graphs is provided by extending cycles along + forward edges, and blocking nodes along forward and reverse edges + + 5. Support for multigraphs is provided by omitting digons from the set + of forward edges + + Parameters + ---------- + G : NetworkX DiGraph + A directed graph + + length_bound : int or None, optional (default=None) + If length_bound is an int, generate all simple cycles of G with length at + most length_bound. Otherwise, generate all simple cycles of G. + + Yields + ------ + list of nodes + Each cycle is represented by a list of nodes along the cycle. + + Examples + -------- + >>> sorted(list(nx.chordless_cycles(nx.complete_graph(4)))) + [[1, 0, 2], [1, 0, 3], [2, 0, 3], [2, 1, 3]] + + Notes + ----- + When length_bound is None, and the graph is simple, the time complexity is + $O((n+e)(c+1))$ for $n$ nodes, $e$ edges and $c$ chordless cycles. + + Raises + ------ + ValueError + when length_bound < 0. + + References + ---------- + .. [1] Efficient enumeration of chordless cycles + E. Dias and D. Castonguay and H. Longo and W.A.R. Jradi + https://arxiv.org/abs/1309.1051 + + See Also + -------- + simple_cycles + """ + + if length_bound is not None: + if length_bound == 0: + return + elif length_bound < 0: + raise ValueError("length bound must be non-negative") + + directed = G.is_directed() + multigraph = G.is_multigraph() + + if multigraph: + yield from ([v] for v, Gv in G.adj.items() if len(Gv.get(v, ())) == 1) + else: + yield from ([v] for v, Gv in G.adj.items() if v in Gv) + + if length_bound is not None and length_bound == 1: + return + + # Nodes with loops cannot belong to longer cycles. Let's delete them here. + # also, we implicitly reduce the multiplicity of edges down to 1 in the case + # of multiedges. + if directed: + F = nx.DiGraph((u, v) for u, Gu in G.adj.items() if u not in Gu for v in Gu) + B = F.to_undirected(as_view=False) + else: + F = nx.Graph((u, v) for u, Gu in G.adj.items() if u not in Gu for v in Gu) + B = None + + # If we're given a multigraph, we have a few cases to consider with parallel + # edges. + # + # 1. If we have 2 or more edges in parallel between the nodes (u, v), we + # must not construct longer cycles along (u, v). + # 2. If G is not directed, then a pair of parallel edges between (u, v) is a + # chordless cycle unless there exists a third (or more) parallel edge. + # 3. If G is directed, then parallel edges do not form cycles, but do + # preclude back-edges from forming cycles (handled in the next section), + # Thus, if an edge (u, v) is duplicated and the reverse (v, u) is also + # present, then we remove both from F. + # + # In directed graphs, we need to consider both directions that edges can + # take, so iterate over all edges (u, v) and possibly (v, u). In undirected + # graphs, we need to be a little careful to only consider every edge once, + # so we use a "visited" set to emulate node-order comparisons. + + if multigraph: + if not directed: + B = F.copy() + visited = set() + for u, Gu in G.adj.items(): + if directed: + multiplicity = ((v, len(Guv)) for v, Guv in Gu.items()) + for v, m in multiplicity: + if m > 1: + F.remove_edges_from(((u, v), (v, u))) + else: + multiplicity = ((v, len(Guv)) for v, Guv in Gu.items() if v in visited) + for v, m in multiplicity: + if m == 2: + yield [u, v] + if m > 1: + F.remove_edge(u, v) + visited.add(u) + + # If we're given a directed graphs, we need to think about digons. If we + # have two edges (u, v) and (v, u), then that's a two-cycle. If either edge + # was duplicated above, then we removed both from F. So, any digons we find + # here are chordless. After finding digons, we remove their edges from F + # to avoid traversing them in the search for chordless cycles. + if directed: + for u, Fu in F.adj.items(): + digons = [[u, v] for v in Fu if F.has_edge(v, u)] + yield from digons + F.remove_edges_from(digons) + F.remove_edges_from(e[::-1] for e in digons) + + if length_bound is not None and length_bound == 2: + return + + # Now, we prepare to search for cycles. We have removed all cycles of + # lengths 1 and 2, so F is a simple graph or simple digraph. We repeatedly + # separate digraphs into their strongly connected components, and undirected + # graphs into their biconnected components. For each component, we pick a + # node v, search for chordless cycles based at each "stem" (u, v, w), and + # then remove v from that component before separating the graph again. + if directed: + separate = nx.strongly_connected_components + + # Directed stems look like (u -> v -> w), so we use the product of + # predecessors of v with successors of v. + def stems(C, v): + for u, w in product(C.pred[v], C.succ[v]): + if not G.has_edge(u, w): # omit stems with acyclic chords + yield [u, v, w], F.has_edge(w, u) + + else: + separate = nx.biconnected_components + + # Undirected stems look like (u ~ v ~ w), but we must not also search + # (w ~ v ~ u), so we use combinations of v's neighbors of length 2. + def stems(C, v): + yield from (([u, v, w], F.has_edge(w, u)) for u, w in combinations(C[v], 2)) + + components = [c for c in separate(F) if len(c) > 2] + while components: + c = components.pop() + v = next(iter(c)) + Fc = F.subgraph(c) + Fcc = Bcc = None + for S, is_triangle in stems(Fc, v): + if is_triangle: + yield S + else: + if Fcc is None: + Fcc = _NeighborhoodCache(Fc) + Bcc = Fcc if B is None else _NeighborhoodCache(B.subgraph(c)) + yield from _chordless_cycle_search(Fcc, Bcc, S, length_bound) + + components.extend(c for c in separate(F.subgraph(c - {v})) if len(c) > 2) + + +def _chordless_cycle_search(F, B, path, length_bound): + """The main loop for chordless cycle enumeration. + + This algorithm is strongly inspired by that of Dias et al [1]_. It has been + modified in the following ways: + + 1. Recursion is avoided, per Python's limitations + + 2. The labeling function is not necessary, because the starting paths + are chosen (and deleted from the host graph) to prevent multiple + occurrences of the same path + + 3. The search is optionally bounded at a specified length + + 4. Support for directed graphs is provided by extending cycles along + forward edges, and blocking nodes along forward and reverse edges + + 5. Support for multigraphs is provided by omitting digons from the set + of forward edges + + Parameters + ---------- + F : _NeighborhoodCache + A graph of forward edges to follow in constructing cycles + + B : _NeighborhoodCache + A graph of blocking edges to prevent the production of chordless cycles + + path : list + A cycle prefix. All cycles generated will begin with this prefix. + + length_bound : int + A length bound. All cycles generated will have length at most length_bound. + + + Yields + ------ + list of nodes + Each cycle is represented by a list of nodes along the cycle. + + References + ---------- + .. [1] Efficient enumeration of chordless cycles + E. Dias and D. Castonguay and H. Longo and W.A.R. Jradi + https://arxiv.org/abs/1309.1051 + + """ + blocked = defaultdict(int) + target = path[0] + blocked[path[1]] = 1 + for w in path[1:]: + for v in B[w]: + blocked[v] += 1 + + stack = [iter(F[path[2]])] + while stack: + nbrs = stack[-1] + for w in nbrs: + if blocked[w] == 1 and (length_bound is None or len(path) < length_bound): + Fw = F[w] + if target in Fw: + yield path + [w] + else: + Bw = B[w] + if target in Bw: + continue + for v in Bw: + blocked[v] += 1 + path.append(w) + stack.append(iter(Fw)) + break + else: + stack.pop() + for v in B[path.pop()]: + blocked[v] -= 1 + + +@not_implemented_for("undirected") +@nx._dispatchable(mutates_input=True) +def recursive_simple_cycles(G): + """Find simple cycles (elementary circuits) of a directed graph. + + A `simple cycle`, or `elementary circuit`, is a closed path where + no node appears twice. Two elementary circuits are distinct if they + are not cyclic permutations of each other. + + This version uses a recursive algorithm to build a list of cycles. + You should probably use the iterator version called simple_cycles(). + Warning: This recursive version uses lots of RAM! + It appears in NetworkX for pedagogical value. + + Parameters + ---------- + G : NetworkX DiGraph + A directed graph + + Returns + ------- + A list of cycles, where each cycle is represented by a list of nodes + along the cycle. + + Example: + + >>> edges = [(0, 0), (0, 1), (0, 2), (1, 2), (2, 0), (2, 1), (2, 2)] + >>> G = nx.DiGraph(edges) + >>> nx.recursive_simple_cycles(G) + [[0], [2], [0, 1, 2], [0, 2], [1, 2]] + + Notes + ----- + The implementation follows pp. 79-80 in [1]_. + + The time complexity is $O((n+e)(c+1))$ for $n$ nodes, $e$ edges and $c$ + elementary circuits. + + References + ---------- + .. [1] Finding all the elementary circuits of a directed graph. + D. B. Johnson, SIAM Journal on Computing 4, no. 1, 77-84, 1975. + https://doi.org/10.1137/0204007 + + See Also + -------- + simple_cycles, cycle_basis + """ + + # Jon Olav Vik, 2010-08-09 + def _unblock(thisnode): + """Recursively unblock and remove nodes from B[thisnode].""" + if blocked[thisnode]: + blocked[thisnode] = False + while B[thisnode]: + _unblock(B[thisnode].pop()) + + def circuit(thisnode, startnode, component): + closed = False # set to True if elementary path is closed + path.append(thisnode) + blocked[thisnode] = True + for nextnode in component[thisnode]: # direct successors of thisnode + if nextnode == startnode: + result.append(path[:]) + closed = True + elif not blocked[nextnode]: + if circuit(nextnode, startnode, component): + closed = True + if closed: + _unblock(thisnode) + else: + for nextnode in component[thisnode]: + if thisnode not in B[nextnode]: # TODO: use set for speedup? + B[nextnode].append(thisnode) + path.pop() # remove thisnode from path + return closed + + path = [] # stack of nodes in current path + blocked = defaultdict(bool) # vertex: blocked from search? + B = defaultdict(list) # graph portions that yield no elementary circuit + result = [] # list to accumulate the circuits found + + # Johnson's algorithm exclude self cycle edges like (v, v) + # To be backward compatible, we record those cycles in advance + # and then remove from subG + for v in G: + if G.has_edge(v, v): + result.append([v]) + G.remove_edge(v, v) + + # Johnson's algorithm requires some ordering of the nodes. + # They might not be sortable so we assign an arbitrary ordering. + ordering = dict(zip(G, range(len(G)))) + for s in ordering: + # Build the subgraph induced by s and following nodes in the ordering + subgraph = G.subgraph(node for node in G if ordering[node] >= ordering[s]) + # Find the strongly connected component in the subgraph + # that contains the least node according to the ordering + strongcomp = nx.strongly_connected_components(subgraph) + mincomp = min(strongcomp, key=lambda ns: min(ordering[n] for n in ns)) + component = G.subgraph(mincomp) + if len(component) > 1: + # smallest node in the component according to the ordering + startnode = min(component, key=ordering.__getitem__) + for node in component: + blocked[node] = False + B[node][:] = [] + dummy = circuit(startnode, startnode, component) + return result + + +@nx._dispatchable +def find_cycle(G, source=None, orientation=None): + """Returns a cycle found via depth-first traversal. + + The cycle is a list of edges indicating the cyclic path. + Orientation of directed edges is controlled by `orientation`. + + Parameters + ---------- + G : graph + A directed/undirected graph/multigraph. + + source : node, list of nodes + The node from which the traversal begins. If None, then a source + is chosen arbitrarily and repeatedly until all edges from each node in + the graph are searched. + + orientation : None | 'original' | 'reverse' | 'ignore' (default: None) + For directed graphs and directed multigraphs, edge traversals need not + respect the original orientation of the edges. + When set to 'reverse' every edge is traversed in the reverse direction. + When set to 'ignore', every edge is treated as undirected. + When set to 'original', every edge is treated as directed. + In all three cases, the yielded edge tuples add a last entry to + indicate the direction in which that edge was traversed. + If orientation is None, the yielded edge has no direction indicated. + The direction is respected, but not reported. + + Returns + ------- + edges : directed edges + A list of directed edges indicating the path taken for the loop. + If no cycle is found, then an exception is raised. + For graphs, an edge is of the form `(u, v)` where `u` and `v` + are the tail and head of the edge as determined by the traversal. + For multigraphs, an edge is of the form `(u, v, key)`, where `key` is + the key of the edge. When the graph is directed, then `u` and `v` + are always in the order of the actual directed edge. + If orientation is not None then the edge tuple is extended to include + the direction of traversal ('forward' or 'reverse') on that edge. + + Raises + ------ + NetworkXNoCycle + If no cycle was found. + + Examples + -------- + In this example, we construct a DAG and find, in the first call, that there + are no directed cycles, and so an exception is raised. In the second call, + we ignore edge orientations and find that there is an undirected cycle. + Note that the second call finds a directed cycle while effectively + traversing an undirected graph, and so, we found an "undirected cycle". + This means that this DAG structure does not form a directed tree (which + is also known as a polytree). + + >>> G = nx.DiGraph([(0, 1), (0, 2), (1, 2)]) + >>> nx.find_cycle(G, orientation="original") + Traceback (most recent call last): + ... + networkx.exception.NetworkXNoCycle: No cycle found. + >>> list(nx.find_cycle(G, orientation="ignore")) + [(0, 1, 'forward'), (1, 2, 'forward'), (0, 2, 'reverse')] + + See Also + -------- + simple_cycles + """ + if not G.is_directed() or orientation in (None, "original"): + + def tailhead(edge): + return edge[:2] + + elif orientation == "reverse": + + def tailhead(edge): + return edge[1], edge[0] + + elif orientation == "ignore": + + def tailhead(edge): + if edge[-1] == "reverse": + return edge[1], edge[0] + return edge[:2] + + explored = set() + cycle = [] + final_node = None + for start_node in G.nbunch_iter(source): + if start_node in explored: + # No loop is possible. + continue + + edges = [] + # All nodes seen in this iteration of edge_dfs + seen = {start_node} + # Nodes in active path. + active_nodes = {start_node} + previous_head = None + + for edge in nx.edge_dfs(G, start_node, orientation): + # Determine if this edge is a continuation of the active path. + tail, head = tailhead(edge) + if head in explored: + # Then we've already explored it. No loop is possible. + continue + if previous_head is not None and tail != previous_head: + # This edge results from backtracking. + # Pop until we get a node whose head equals the current tail. + # So for example, we might have: + # (0, 1), (1, 2), (2, 3), (1, 4) + # which must become: + # (0, 1), (1, 4) + while True: + try: + popped_edge = edges.pop() + except IndexError: + edges = [] + active_nodes = {tail} + break + else: + popped_head = tailhead(popped_edge)[1] + active_nodes.remove(popped_head) + + if edges: + last_head = tailhead(edges[-1])[1] + if tail == last_head: + break + edges.append(edge) + + if head in active_nodes: + # We have a loop! + cycle.extend(edges) + final_node = head + break + else: + seen.add(head) + active_nodes.add(head) + previous_head = head + + if cycle: + break + else: + explored.update(seen) + + else: + assert len(cycle) == 0 + raise nx.exception.NetworkXNoCycle("No cycle found.") + + # We now have a list of edges which ends on a cycle. + # So we need to remove from the beginning edges that are not relevant. + + for i, edge in enumerate(cycle): + tail, head = tailhead(edge) + if tail == final_node: + break + + return cycle[i:] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable(edge_attrs="weight") +def minimum_cycle_basis(G, weight=None): + """Returns a minimum weight cycle basis for G + + Minimum weight means a cycle basis for which the total weight + (length for unweighted graphs) of all the cycles is minimum. + + Parameters + ---------- + G : NetworkX Graph + weight: string + name of the edge attribute to use for edge weights + + Returns + ------- + A list of cycle lists. Each cycle list is a list of nodes + which forms a cycle (loop) in G. Note that the nodes are not + necessarily returned in a order by which they appear in the cycle + + Examples + -------- + >>> G = nx.Graph() + >>> nx.add_cycle(G, [0, 1, 2, 3]) + >>> nx.add_cycle(G, [0, 3, 4, 5]) + >>> nx.minimum_cycle_basis(G) + [[5, 4, 3, 0], [3, 2, 1, 0]] + + References: + [1] Kavitha, Telikepalli, et al. "An O(m^2n) Algorithm for + Minimum Cycle Basis of Graphs." + http://link.springer.com/article/10.1007/s00453-007-9064-z + [2] de Pina, J. 1995. Applications of shortest path methods. + Ph.D. thesis, University of Amsterdam, Netherlands + + See Also + -------- + simple_cycles, cycle_basis + """ + # We first split the graph in connected subgraphs + return sum( + (_min_cycle_basis(G.subgraph(c), weight) for c in nx.connected_components(G)), + [], + ) + + +def _min_cycle_basis(G, weight): + cb = [] + # We extract the edges not in a spanning tree. We do not really need a + # *minimum* spanning tree. That is why we call the next function with + # weight=None. Depending on implementation, it may be faster as well + tree_edges = list(nx.minimum_spanning_edges(G, weight=None, data=False)) + chords = G.edges - tree_edges - {(v, u) for u, v in tree_edges} + + # We maintain a set of vectors orthogonal to sofar found cycles + set_orth = [{edge} for edge in chords] + while set_orth: + base = set_orth.pop() + # kth cycle is "parallel" to kth vector in set_orth + cycle_edges = _min_cycle(G, base, weight) + cb.append([v for u, v in cycle_edges]) + + # now update set_orth so that k+1,k+2... th elements are + # orthogonal to the newly found cycle, as per [p. 336, 1] + set_orth = [ + ( + {e for e in orth if e not in base if e[::-1] not in base} + | {e for e in base if e not in orth if e[::-1] not in orth} + ) + if sum((e in orth or e[::-1] in orth) for e in cycle_edges) % 2 + else orth + for orth in set_orth + ] + return cb + + +def _min_cycle(G, orth, weight): + """ + Computes the minimum weight cycle in G, + orthogonal to the vector orth as per [p. 338, 1] + Use (u, 1) to indicate the lifted copy of u (denoted u' in paper). + """ + Gi = nx.Graph() + + # Add 2 copies of each edge in G to Gi. + # If edge is in orth, add cross edge; otherwise in-plane edge + for u, v, wt in G.edges(data=weight, default=1): + if (u, v) in orth or (v, u) in orth: + Gi.add_edges_from([(u, (v, 1)), ((u, 1), v)], Gi_weight=wt) + else: + Gi.add_edges_from([(u, v), ((u, 1), (v, 1))], Gi_weight=wt) + + # find the shortest length in Gi between n and (n, 1) for each n + # Note: Use "Gi_weight" for name of weight attribute + spl = nx.shortest_path_length + lift = {n: spl(Gi, source=n, target=(n, 1), weight="Gi_weight") for n in G} + + # Now compute that short path in Gi, which translates to a cycle in G + start = min(lift, key=lift.get) + end = (start, 1) + min_path_i = nx.shortest_path(Gi, source=start, target=end, weight="Gi_weight") + + # Now we obtain the actual path, re-map nodes in Gi to those in G + min_path = [n if n in G else n[0] for n in min_path_i] + + # Now remove the edges that occur two times + # two passes: flag which edges get kept, then build it + edgelist = list(pairwise(min_path)) + edgeset = set() + for e in edgelist: + if e in edgeset: + edgeset.remove(e) + elif e[::-1] in edgeset: + edgeset.remove(e[::-1]) + else: + edgeset.add(e) + + min_edgelist = [] + for e in edgelist: + if e in edgeset: + min_edgelist.append(e) + edgeset.remove(e) + elif e[::-1] in edgeset: + min_edgelist.append(e[::-1]) + edgeset.remove(e[::-1]) + + return min_edgelist + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def girth(G): + """Returns the girth of the graph. + + The girth of a graph is the length of its shortest cycle, or infinity if + the graph is acyclic. The algorithm follows the description given on the + Wikipedia page [1]_, and runs in time O(mn) on a graph with m edges and n + nodes. + + Parameters + ---------- + G : NetworkX Graph + + Returns + ------- + int or math.inf + + Examples + -------- + All examples below (except P_5) can easily be checked using Wikipedia, + which has a page for each of these famous graphs. + + >>> nx.girth(nx.chvatal_graph()) + 4 + >>> nx.girth(nx.tutte_graph()) + 4 + >>> nx.girth(nx.petersen_graph()) + 5 + >>> nx.girth(nx.heawood_graph()) + 6 + >>> nx.girth(nx.pappus_graph()) + 6 + >>> nx.girth(nx.path_graph(5)) + inf + + References + ---------- + .. [1] `Wikipedia: Girth `_ + + """ + girth = depth_limit = inf + tree_edge = nx.algorithms.traversal.breadth_first_search.TREE_EDGE + level_edge = nx.algorithms.traversal.breadth_first_search.LEVEL_EDGE + for n in G: + # run a BFS from source n, keeping track of distances; since we want + # the shortest cycle, no need to explore beyond the current minimum length + depth = {n: 0} + for u, v, label in nx.bfs_labeled_edges(G, n): + du = depth[u] + if du > depth_limit: + break + if label is tree_edge: + depth[v] = du + 1 + else: + # if (u, v) is a level edge, the length is du + du + 1 (odd) + # otherwise, it's a forward edge; length is du + (du + 1) + 1 (even) + delta = label is level_edge + length = du + du + 2 - delta + if length < girth: + girth = length + depth_limit = du - delta + + return girth diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/d_separation.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/d_separation.py new file mode 100644 index 0000000000000000000000000000000000000000..a688eca4081aff7a4fb3e3f4cf7e0cd9dafefc44 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/d_separation.py @@ -0,0 +1,722 @@ +""" +Algorithm for testing d-separation in DAGs. + +*d-separation* is a test for conditional independence in probability +distributions that can be factorized using DAGs. It is a purely +graphical test that uses the underlying graph and makes no reference +to the actual distribution parameters. See [1]_ for a formal +definition. + +The implementation is based on the conceptually simple linear time +algorithm presented in [2]_. Refer to [3]_, [4]_ for a couple of +alternative algorithms. + +The functional interface in NetworkX consists of three functions: + +- `find_minimal_d_separator` returns a minimal d-separator set ``z``. + That is, removing any node or nodes from it makes it no longer a d-separator. +- `is_d_separator` checks if a given set is a d-separator. +- `is_minimal_d_separator` checks if a given set is a minimal d-separator. + +D-separators +------------ + +Here, we provide a brief overview of d-separation and related concepts that +are relevant for understanding it: + +The ideas of d-separation and d-connection relate to paths being open or blocked. + +- A "path" is a sequence of nodes connected in order by edges. Unlike for most + graph theory analysis, the direction of the edges is ignored. Thus the path + can be thought of as a traditional path on the undirected version of the graph. +- A "candidate d-separator" ``z`` is a set of nodes being considered as + possibly blocking all paths between two prescribed sets ``x`` and ``y`` of nodes. + We refer to each node in the candidate d-separator as "known". +- A "collider" node on a path is a node that is a successor of its two neighbor + nodes on the path. That is, ``c`` is a collider if the edge directions + along the path look like ``... u -> c <- v ...``. +- If a collider node or any of its descendants are "known", the collider + is called an "open collider". Otherwise it is a "blocking collider". +- Any path can be "blocked" in two ways. If the path contains a "known" node + that is not a collider, the path is blocked. Also, if the path contains a + collider that is not a "known" node, the path is blocked. +- A path is "open" if it is not blocked. That is, it is open if every node is + either an open collider or not a "known". Said another way, every + "known" in the path is a collider and every collider is open (has a + "known" as a inclusive descendant). The concept of "open path" is meant to + demonstrate a probabilistic conditional dependence between two nodes given + prescribed knowledge ("known" nodes). +- Two sets ``x`` and ``y`` of nodes are "d-separated" by a set of nodes ``z`` + if all paths between nodes in ``x`` and nodes in ``y`` are blocked. That is, + if there are no open paths from any node in ``x`` to any node in ``y``. + Such a set ``z`` is a "d-separator" of ``x`` and ``y``. +- A "minimal d-separator" is a d-separator ``z`` for which no node or subset + of nodes can be removed with it still being a d-separator. + +The d-separator blocks some paths between ``x`` and ``y`` but opens others. +Nodes in the d-separator block paths if the nodes are not colliders. +But if a collider or its descendant nodes are in the d-separation set, the +colliders are open, allowing a path through that collider. + +Illustration of D-separation with examples +------------------------------------------ + +A pair of two nodes, ``u`` and ``v``, are d-connected if there is a path +from ``u`` to ``v`` that is not blocked. That means, there is an open +path from ``u`` to ``v``. + +For example, if the d-separating set is the empty set, then the following paths are +open between ``u`` and ``v``: + +- u <- n -> v +- u -> w -> ... -> n -> v + +If on the other hand, ``n`` is in the d-separating set, then ``n`` blocks +those paths between ``u`` and ``v``. + +Colliders block a path if they and their descendants are not included +in the d-separating set. An example of a path that is blocked when the +d-separating set is empty is: + +- u -> w -> ... -> n <- v + +The node ``n`` is a collider in this path and is not in the d-separating set. +So ``n`` blocks this path. However, if ``n`` or a descendant of ``n`` is +included in the d-separating set, then the path through the collider +at ``n`` (... -> n <- ...) is "open". + +D-separation is concerned with blocking all paths between nodes from ``x`` to ``y``. +A d-separating set between ``x`` and ``y`` is one where all paths are blocked. + +D-separation and its applications in probability +------------------------------------------------ + +D-separation is commonly used in probabilistic causal-graph models. D-separation +connects the idea of probabilistic "dependence" with separation in a graph. If +one assumes the causal Markov condition [5]_, (every node is conditionally +independent of its non-descendants, given its parents) then d-separation implies +conditional independence in probability distributions. +Symmetrically, d-connection implies dependence. + +The intuition is as follows. The edges on a causal graph indicate which nodes +influence the outcome of other nodes directly. An edge from u to v +implies that the outcome of event ``u`` influences the probabilities for +the outcome of event ``v``. Certainly knowing ``u`` changes predictions for ``v``. +But also knowing ``v`` changes predictions for ``u``. The outcomes are dependent. +Furthermore, an edge from ``v`` to ``w`` would mean that ``w`` and ``v`` are dependent +and thus that ``u`` could indirectly influence ``w``. + +Without any knowledge about the system (candidate d-separating set is empty) +a causal graph ``u -> v -> w`` allows all three nodes to be dependent. But +if we know the outcome of ``v``, the conditional probabilities of outcomes for +``u`` and ``w`` are independent of each other. That is, once we know the outcome +for ```v`, the probabilities for ``w`` do not depend on the outcome for ``u``. +This is the idea behind ``v`` blocking the path if it is "known" (in the candidate +d-separating set). + +The same argument works whether the direction of the edges are both +left-going and when both arrows head out from the middle. Having a "known" +node on a path blocks the collider-free path because those relationships +make the conditional probabilities independent. + +The direction of the causal edges does impact dependence precisely in the +case of a collider e.g. ``u -> v <- w``. In that situation, both ``u`` and ``w`` +influence ``v```. But they do not directly influence each other. So without any +knowledge of any outcomes, ``u`` and ``w`` are independent. That is the idea behind +colliders blocking the path. But, if ``v`` is known, the conditional probabilities +of ``u`` and ``w`` can be dependent. This is the heart of Berkson's Paradox [6]_. +For example, suppose ``u`` and ``w`` are boolean events (they either happen or do not) +and ``v`` represents the outcome "at least one of ``u`` and ``w`` occur". Then knowing +``v`` is true makes the conditional probabilities of ``u`` and ``w`` dependent. +Essentially, knowing that at least one of them is true raises the probability of +each. But further knowledge that ``w`` is true (or false) change the conditional +probability of ``u`` to either the original value or 1. So the conditional +probability of ``u`` depends on the outcome of ``w`` even though there is no +causal relationship between them. When a collider is known, dependence can +occur across paths through that collider. This is the reason open colliders +do not block paths. + +Furthermore, even if ``v`` is not "known", if one of its descendants is "known" +we can use that information to know more about ``v`` which again makes +``u`` and ``w`` potentially dependent. Suppose the chance of ``n`` occurring +is much higher when ``v`` occurs ("at least one of ``u`` and ``w`` occur"). +Then if we know ``n`` occurred, it is more likely that ``v`` occurred and that +makes the chance of ``u`` and ``w`` dependent. This is the idea behind why +a collider does no block a path if any descendant of the collider is "known". + +When two sets of nodes ``x`` and ``y`` are d-separated by a set ``z``, +it means that given the outcomes of the nodes in ``z``, the probabilities +of outcomes of the nodes in ``x`` are independent of the outcomes of the +nodes in ``y`` and vice versa. + +Examples +-------- +A Hidden Markov Model with 5 observed states and 5 hidden states +where the hidden states have causal relationships resulting in +a path results in the following causal network. We check that +early states along the path are separated from late state in +the path by the d-separator of the middle hidden state. +Thus if we condition on the middle hidden state, the early +state probabilities are independent of the late state outcomes. + +>>> G = nx.DiGraph() +>>> G.add_edges_from( +... [ +... ("H1", "H2"), +... ("H2", "H3"), +... ("H3", "H4"), +... ("H4", "H5"), +... ("H1", "O1"), +... ("H2", "O2"), +... ("H3", "O3"), +... ("H4", "O4"), +... ("H5", "O5"), +... ] +... ) +>>> x, y, z = ({"H1", "O1"}, {"H5", "O5"}, {"H3"}) +>>> nx.is_d_separator(G, x, y, z) +True +>>> nx.is_minimal_d_separator(G, x, y, z) +True +>>> nx.is_minimal_d_separator(G, x, y, z | {"O3"}) +False +>>> z = nx.find_minimal_d_separator(G, x | y, {"O2", "O3", "O4"}) +>>> z == {"H2", "H4"} +True + +If no minimal_d_separator exists, `None` is returned + +>>> other_z = nx.find_minimal_d_separator(G, x | y, {"H2", "H3"}) +>>> other_z is None +True + + +References +---------- + +.. [1] Pearl, J. (2009). Causality. Cambridge: Cambridge University Press. + +.. [2] Darwiche, A. (2009). Modeling and reasoning with Bayesian networks. + Cambridge: Cambridge University Press. + +.. [3] Shachter, Ross D. "Bayes-ball: The rational pastime (for + determining irrelevance and requisite information in belief networks + and influence diagrams)." In Proceedings of the Fourteenth Conference + on Uncertainty in Artificial Intelligence (UAI), (pp. 480–487). 1998. + +.. [4] Koller, D., & Friedman, N. (2009). + Probabilistic graphical models: principles and techniques. The MIT Press. + +.. [5] https://en.wikipedia.org/wiki/Causal_Markov_condition + +.. [6] https://en.wikipedia.org/wiki/Berkson%27s_paradox + +""" + +from collections import deque +from itertools import chain + +import networkx as nx +from networkx.utils import UnionFind, not_implemented_for + +__all__ = [ + "is_d_separator", + "is_minimal_d_separator", + "find_minimal_d_separator", + "d_separated", + "minimal_d_separator", +] + + +@not_implemented_for("undirected") +@nx._dispatchable +def is_d_separator(G, x, y, z): + """Return whether node sets `x` and `y` are d-separated by `z`. + + Parameters + ---------- + G : nx.DiGraph + A NetworkX DAG. + + x : node or set of nodes + First node or set of nodes in `G`. + + y : node or set of nodes + Second node or set of nodes in `G`. + + z : node or set of nodes + Potential separator (set of conditioning nodes in `G`). Can be empty set. + + Returns + ------- + b : bool + A boolean that is true if `x` is d-separated from `y` given `z` in `G`. + + Raises + ------ + NetworkXError + The *d-separation* test is commonly used on disjoint sets of + nodes in acyclic directed graphs. Accordingly, the algorithm + raises a :exc:`NetworkXError` if the node sets are not + disjoint or if the input graph is not a DAG. + + NodeNotFound + If any of the input nodes are not found in the graph, + a :exc:`NodeNotFound` exception is raised + + Notes + ----- + A d-separating set in a DAG is a set of nodes that + blocks all paths between the two sets. Nodes in `z` + block a path if they are part of the path and are not a collider, + or a descendant of a collider. Also colliders that are not in `z` + block a path. A collider structure along a path + is ``... -> c <- ...`` where ``c`` is the collider node. + + https://en.wikipedia.org/wiki/Bayesian_network#d-separation + """ + try: + x = {x} if x in G else x + y = {y} if y in G else y + z = {z} if z in G else z + + intersection = x & y or x & z or y & z + if intersection: + raise nx.NetworkXError( + f"The sets are not disjoint, with intersection {intersection}" + ) + + set_v = x | y | z + if set_v - G.nodes: + raise nx.NodeNotFound(f"The node(s) {set_v - G.nodes} are not found in G") + except TypeError: + raise nx.NodeNotFound("One of x, y, or z is not a node or a set of nodes in G") + + if not nx.is_directed_acyclic_graph(G): + raise nx.NetworkXError("graph should be directed acyclic") + + # contains -> and <-> edges from starting node T + forward_deque = deque([]) + forward_visited = set() + + # contains <- and - edges from starting node T + backward_deque = deque(x) + backward_visited = set() + + ancestors_or_z = set().union(*[nx.ancestors(G, node) for node in x]) | z | x + + while forward_deque or backward_deque: + if backward_deque: + node = backward_deque.popleft() + backward_visited.add(node) + if node in y: + return False + if node in z: + continue + + # add <- edges to backward deque + backward_deque.extend(G.pred[node].keys() - backward_visited) + # add -> edges to forward deque + forward_deque.extend(G.succ[node].keys() - forward_visited) + + if forward_deque: + node = forward_deque.popleft() + forward_visited.add(node) + if node in y: + return False + + # Consider if -> node <- is opened due to ancestor of node in z + if node in ancestors_or_z: + # add <- edges to backward deque + backward_deque.extend(G.pred[node].keys() - backward_visited) + if node not in z: + # add -> edges to forward deque + forward_deque.extend(G.succ[node].keys() - forward_visited) + + return True + + +@not_implemented_for("undirected") +@nx._dispatchable +def find_minimal_d_separator(G, x, y, *, included=None, restricted=None): + """Returns a minimal d-separating set between `x` and `y` if possible + + A d-separating set in a DAG is a set of nodes that blocks all + paths between the two sets of nodes, `x` and `y`. This function + constructs a d-separating set that is "minimal", meaning no nodes can + be removed without it losing the d-separating property for `x` and `y`. + If no d-separating sets exist for `x` and `y`, this returns `None`. + + In a DAG there may be more than one minimal d-separator between two + sets of nodes. Minimal d-separators are not always unique. This function + returns one minimal d-separator, or `None` if no d-separator exists. + + Uses the algorithm presented in [1]_. The complexity of the algorithm + is :math:`O(m)`, where :math:`m` stands for the number of edges in + the subgraph of G consisting of only the ancestors of `x` and `y`. + For full details, see [1]_. + + Parameters + ---------- + G : graph + A networkx DAG. + x : set | node + A node or set of nodes in the graph. + y : set | node + A node or set of nodes in the graph. + included : set | node | None + A node or set of nodes which must be included in the found separating set, + default is None, which means the empty set. + restricted : set | node | None + Restricted node or set of nodes to consider. Only these nodes can be in + the found separating set, default is None meaning all nodes in ``G``. + + Returns + ------- + z : set | None + The minimal d-separating set, if at least one d-separating set exists, + otherwise None. + + Raises + ------ + NetworkXError + Raises a :exc:`NetworkXError` if the input graph is not a DAG + or if node sets `x`, `y`, and `included` are not disjoint. + + NodeNotFound + If any of the input nodes are not found in the graph, + a :exc:`NodeNotFound` exception is raised. + + References + ---------- + .. [1] van der Zander, Benito, and Maciej Liśkiewicz. "Finding + minimal d-separators in linear time and applications." In + Uncertainty in Artificial Intelligence, pp. 637-647. PMLR, 2020. + """ + if not nx.is_directed_acyclic_graph(G): + raise nx.NetworkXError("graph should be directed acyclic") + + try: + x = {x} if x in G else x + y = {y} if y in G else y + + if included is None: + included = set() + elif included in G: + included = {included} + + if restricted is None: + restricted = set(G) + elif restricted in G: + restricted = {restricted} + + set_y = x | y | included | restricted + if set_y - G.nodes: + raise nx.NodeNotFound(f"The node(s) {set_y - G.nodes} are not found in G") + except TypeError: + raise nx.NodeNotFound( + "One of x, y, included or restricted is not a node or set of nodes in G" + ) + + if not included <= restricted: + raise nx.NetworkXError( + f"Included nodes {included} must be in restricted nodes {restricted}" + ) + + intersection = x & y or x & included or y & included + if intersection: + raise nx.NetworkXError( + f"The sets x, y, included are not disjoint. Overlap: {intersection}" + ) + + nodeset = x | y | included + ancestors_x_y_included = nodeset.union(*[nx.ancestors(G, node) for node in nodeset]) + + z_init = restricted & (ancestors_x_y_included - (x | y)) + + x_closure = _reachable(G, x, ancestors_x_y_included, z_init) + if x_closure & y: + return None + + z_updated = z_init & (x_closure | included) + y_closure = _reachable(G, y, ancestors_x_y_included, z_updated) + return z_updated & (y_closure | included) + + +@not_implemented_for("undirected") +@nx._dispatchable +def is_minimal_d_separator(G, x, y, z, *, included=None, restricted=None): + """Determine if `z` is a minimal d-separator for `x` and `y`. + + A d-separator, `z`, in a DAG is a set of nodes that blocks + all paths from nodes in set `x` to nodes in set `y`. + A minimal d-separator is a d-separator `z` such that removing + any subset of nodes makes it no longer a d-separator. + + Note: This function checks whether `z` is a d-separator AND is + minimal. One can use the function `is_d_separator` to only check if + `z` is a d-separator. See examples below. + + Parameters + ---------- + G : nx.DiGraph + A NetworkX DAG. + x : node | set + A node or set of nodes in the graph. + y : node | set + A node or set of nodes in the graph. + z : node | set + The node or set of nodes to check if it is a minimal d-separating set. + The function :func:`is_d_separator` is called inside this function + to verify that `z` is in fact a d-separator. + included : set | node | None + A node or set of nodes which must be included in the found separating set, + default is ``None``, which means the empty set. + restricted : set | node | None + Restricted node or set of nodes to consider. Only these nodes can be in + the found separating set, default is ``None`` meaning all nodes in ``G``. + + Returns + ------- + bool + Whether or not the set `z` is a minimal d-separator subject to + `restricted` nodes and `included` node constraints. + + Examples + -------- + >>> G = nx.path_graph([0, 1, 2, 3], create_using=nx.DiGraph) + >>> G.add_node(4) + >>> nx.is_minimal_d_separator(G, 0, 2, {1}) + True + >>> # since {1} is the minimal d-separator, {1, 3, 4} is not minimal + >>> nx.is_minimal_d_separator(G, 0, 2, {1, 3, 4}) + False + >>> # alternatively, if we only want to check that {1, 3, 4} is a d-separator + >>> nx.is_d_separator(G, 0, 2, {1, 3, 4}) + True + + Raises + ------ + NetworkXError + Raises a :exc:`NetworkXError` if the input graph is not a DAG. + + NodeNotFound + If any of the input nodes are not found in the graph, + a :exc:`NodeNotFound` exception is raised. + + References + ---------- + .. [1] van der Zander, Benito, and Maciej Liśkiewicz. "Finding + minimal d-separators in linear time and applications." In + Uncertainty in Artificial Intelligence, pp. 637-647. PMLR, 2020. + + Notes + ----- + This function works on verifying that a set is minimal and + d-separating between two nodes. Uses criterion (a), (b), (c) on + page 4 of [1]_. a) closure(`x`) and `y` are disjoint. b) `z` contains + all nodes from `included` and is contained in the `restricted` + nodes and in the union of ancestors of `x`, `y`, and `included`. + c) the nodes in `z` not in `included` are contained in both + closure(x) and closure(y). The closure of a set is the set of nodes + connected to the set by a directed path in G. + + The complexity is :math:`O(m)`, where :math:`m` stands for the + number of edges in the subgraph of G consisting of only the + ancestors of `x` and `y`. + + For full details, see [1]_. + """ + if not nx.is_directed_acyclic_graph(G): + raise nx.NetworkXError("graph should be directed acyclic") + + try: + x = {x} if x in G else x + y = {y} if y in G else y + z = {z} if z in G else z + + if included is None: + included = set() + elif included in G: + included = {included} + + if restricted is None: + restricted = set(G) + elif restricted in G: + restricted = {restricted} + + set_y = x | y | included | restricted + if set_y - G.nodes: + raise nx.NodeNotFound(f"The node(s) {set_y - G.nodes} are not found in G") + except TypeError: + raise nx.NodeNotFound( + "One of x, y, z, included or restricted is not a node or set of nodes in G" + ) + + if not included <= z: + raise nx.NetworkXError( + f"Included nodes {included} must be in proposed separating set z {x}" + ) + if not z <= restricted: + raise nx.NetworkXError( + f"Separating set {z} must be contained in restricted set {restricted}" + ) + + intersection = x.intersection(y) or x.intersection(z) or y.intersection(z) + if intersection: + raise nx.NetworkXError( + f"The sets are not disjoint, with intersection {intersection}" + ) + + nodeset = x | y | included + ancestors_x_y_included = nodeset.union(*[nx.ancestors(G, n) for n in nodeset]) + + # criterion (a) -- check that z is actually a separator + x_closure = _reachable(G, x, ancestors_x_y_included, z) + if x_closure & y: + return False + + # criterion (b) -- basic constraint; included and restricted already checked above + if not (z <= ancestors_x_y_included): + return False + + # criterion (c) -- check that z is minimal + y_closure = _reachable(G, y, ancestors_x_y_included, z) + if not ((z - included) <= (x_closure & y_closure)): + return False + return True + + +@not_implemented_for("undirected") +def _reachable(G, x, a, z): + """Modified Bayes-Ball algorithm for finding d-connected nodes. + + Find all nodes in `a` that are d-connected to those in `x` by + those in `z`. This is an implementation of the function + `REACHABLE` in [1]_ (which is itself a modification of the + Bayes-Ball algorithm [2]_) when restricted to DAGs. + + Parameters + ---------- + G : nx.DiGraph + A NetworkX DAG. + x : node | set + A node in the DAG, or a set of nodes. + a : node | set + A (set of) node(s) in the DAG containing the ancestors of `x`. + z : node | set + The node or set of nodes conditioned on when checking d-connectedness. + + Returns + ------- + w : set + The closure of `x` in `a` with respect to d-connectedness + given `z`. + + References + ---------- + .. [1] van der Zander, Benito, and Maciej Liśkiewicz. "Finding + minimal d-separators in linear time and applications." In + Uncertainty in Artificial Intelligence, pp. 637-647. PMLR, 2020. + + .. [2] Shachter, Ross D. "Bayes-ball: The rational pastime + (for determining irrelevance and requisite information in + belief networks and influence diagrams)." In Proceedings of the + Fourteenth Conference on Uncertainty in Artificial Intelligence + (UAI), (pp. 480–487). 1998. + """ + + def _pass(e, v, f, n): + """Whether a ball entering node `v` along edge `e` passes to `n` along `f`. + + Boolean function defined on page 6 of [1]_. + + Parameters + ---------- + e : bool + Directed edge by which the ball got to node `v`; `True` iff directed into `v`. + v : node + Node where the ball is. + f : bool + Directed edge connecting nodes `v` and `n`; `True` iff directed `n`. + n : node + Checking whether the ball passes to this node. + + Returns + ------- + b : bool + Whether the ball passes or not. + + References + ---------- + .. [1] van der Zander, Benito, and Maciej Liśkiewicz. "Finding + minimal d-separators in linear time and applications." In + Uncertainty in Artificial Intelligence, pp. 637-647. PMLR, 2020. + """ + is_element_of_A = n in a + # almost_definite_status = True # always true for DAGs; not so for RCGs + collider_if_in_Z = v not in z or (e and not f) + return is_element_of_A and collider_if_in_Z # and almost_definite_status + + queue = deque([]) + for node in x: + if bool(G.pred[node]): + queue.append((True, node)) + if bool(G.succ[node]): + queue.append((False, node)) + processed = queue.copy() + + while any(queue): + e, v = queue.popleft() + preds = ((False, n) for n in G.pred[v]) + succs = ((True, n) for n in G.succ[v]) + f_n_pairs = chain(preds, succs) + for f, n in f_n_pairs: + if (f, n) not in processed and _pass(e, v, f, n): + queue.append((f, n)) + processed.append((f, n)) + + return {w for (_, w) in processed} + + +# Deprecated functions: +def d_separated(G, x, y, z): + """Return whether nodes sets ``x`` and ``y`` are d-separated by ``z``. + + .. deprecated:: 3.3 + + This function is deprecated and will be removed in NetworkX v3.5. + Please use `is_d_separator(G, x, y, z)`. + + """ + import warnings + + warnings.warn( + "d_separated is deprecated and will be removed in NetworkX v3.5." + "Please use `is_d_separator(G, x, y, z)`.", + category=DeprecationWarning, + stacklevel=2, + ) + return nx.is_d_separator(G, x, y, z) + + +def minimal_d_separator(G, u, v): + """Returns a minimal_d-separating set between `x` and `y` if possible + + .. deprecated:: 3.3 + + minimal_d_separator is deprecated and will be removed in NetworkX v3.5. + Please use `find_minimal_d_separator(G, x, y)`. + + """ + import warnings + + warnings.warn( + ( + "This function is deprecated and will be removed in NetworkX v3.5." + "Please use `is_d_separator(G, x, y)`." + ), + category=DeprecationWarning, + stacklevel=2, + ) + return nx.find_minimal_d_separator(G, u, v) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/dag.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/dag.py new file mode 100644 index 0000000000000000000000000000000000000000..c757afb96f1398d64ae63a5f682e46031a38ff8d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/dag.py @@ -0,0 +1,1418 @@ +"""Algorithms for directed acyclic graphs (DAGs). + +Note that most of these functions are only guaranteed to work for DAGs. +In general, these functions do not check for acyclic-ness, so it is up +to the user to check for that. +""" + +import heapq +from collections import deque +from functools import partial +from itertools import chain, combinations, product, starmap +from math import gcd + +import networkx as nx +from networkx.utils import arbitrary_element, not_implemented_for, pairwise + +__all__ = [ + "descendants", + "ancestors", + "topological_sort", + "lexicographical_topological_sort", + "all_topological_sorts", + "topological_generations", + "is_directed_acyclic_graph", + "is_aperiodic", + "transitive_closure", + "transitive_closure_dag", + "transitive_reduction", + "antichains", + "dag_longest_path", + "dag_longest_path_length", + "dag_to_branching", + "compute_v_structures", +] + +chaini = chain.from_iterable + + +@nx._dispatchable +def descendants(G, source): + """Returns all nodes reachable from `source` in `G`. + + Parameters + ---------- + G : NetworkX Graph + source : node in `G` + + Returns + ------- + set() + The descendants of `source` in `G` + + Raises + ------ + NetworkXError + If node `source` is not in `G`. + + Examples + -------- + >>> DG = nx.path_graph(5, create_using=nx.DiGraph) + >>> sorted(nx.descendants(DG, 2)) + [3, 4] + + The `source` node is not a descendant of itself, but can be included manually: + + >>> sorted(nx.descendants(DG, 2) | {2}) + [2, 3, 4] + + See also + -------- + ancestors + """ + return {child for parent, child in nx.bfs_edges(G, source)} + + +@nx._dispatchable +def ancestors(G, source): + """Returns all nodes having a path to `source` in `G`. + + Parameters + ---------- + G : NetworkX Graph + source : node in `G` + + Returns + ------- + set() + The ancestors of `source` in `G` + + Raises + ------ + NetworkXError + If node `source` is not in `G`. + + Examples + -------- + >>> DG = nx.path_graph(5, create_using=nx.DiGraph) + >>> sorted(nx.ancestors(DG, 2)) + [0, 1] + + The `source` node is not an ancestor of itself, but can be included manually: + + >>> sorted(nx.ancestors(DG, 2) | {2}) + [0, 1, 2] + + See also + -------- + descendants + """ + return {child for parent, child in nx.bfs_edges(G, source, reverse=True)} + + +@nx._dispatchable +def has_cycle(G): + """Decides whether the directed graph has a cycle.""" + try: + # Feed the entire iterator into a zero-length deque. + deque(topological_sort(G), maxlen=0) + except nx.NetworkXUnfeasible: + return True + else: + return False + + +@nx._dispatchable +def is_directed_acyclic_graph(G): + """Returns True if the graph `G` is a directed acyclic graph (DAG) or + False if not. + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + bool + True if `G` is a DAG, False otherwise + + Examples + -------- + Undirected graph:: + + >>> G = nx.Graph([(1, 2), (2, 3)]) + >>> nx.is_directed_acyclic_graph(G) + False + + Directed graph with cycle:: + + >>> G = nx.DiGraph([(1, 2), (2, 3), (3, 1)]) + >>> nx.is_directed_acyclic_graph(G) + False + + Directed acyclic graph:: + + >>> G = nx.DiGraph([(1, 2), (2, 3)]) + >>> nx.is_directed_acyclic_graph(G) + True + + See also + -------- + topological_sort + """ + return G.is_directed() and not has_cycle(G) + + +@nx._dispatchable +def topological_generations(G): + """Stratifies a DAG into generations. + + A topological generation is node collection in which ancestors of a node in each + generation are guaranteed to be in a previous generation, and any descendants of + a node are guaranteed to be in a following generation. Nodes are guaranteed to + be in the earliest possible generation that they can belong to. + + Parameters + ---------- + G : NetworkX digraph + A directed acyclic graph (DAG) + + Yields + ------ + sets of nodes + Yields sets of nodes representing each generation. + + Raises + ------ + NetworkXError + Generations are defined for directed graphs only. If the graph + `G` is undirected, a :exc:`NetworkXError` is raised. + + NetworkXUnfeasible + If `G` is not a directed acyclic graph (DAG) no topological generations + exist and a :exc:`NetworkXUnfeasible` exception is raised. This can also + be raised if `G` is changed while the returned iterator is being processed + + RuntimeError + If `G` is changed while the returned iterator is being processed. + + Examples + -------- + >>> DG = nx.DiGraph([(2, 1), (3, 1)]) + >>> [sorted(generation) for generation in nx.topological_generations(DG)] + [[2, 3], [1]] + + Notes + ----- + The generation in which a node resides can also be determined by taking the + max-path-distance from the node to the farthest leaf node. That value can + be obtained with this function using `enumerate(topological_generations(G))`. + + See also + -------- + topological_sort + """ + if not G.is_directed(): + raise nx.NetworkXError("Topological sort not defined on undirected graphs.") + + multigraph = G.is_multigraph() + indegree_map = {v: d for v, d in G.in_degree() if d > 0} + zero_indegree = [v for v, d in G.in_degree() if d == 0] + + while zero_indegree: + this_generation = zero_indegree + zero_indegree = [] + for node in this_generation: + if node not in G: + raise RuntimeError("Graph changed during iteration") + for child in G.neighbors(node): + try: + indegree_map[child] -= len(G[node][child]) if multigraph else 1 + except KeyError as err: + raise RuntimeError("Graph changed during iteration") from err + if indegree_map[child] == 0: + zero_indegree.append(child) + del indegree_map[child] + yield this_generation + + if indegree_map: + raise nx.NetworkXUnfeasible( + "Graph contains a cycle or graph changed during iteration" + ) + + +@nx._dispatchable +def topological_sort(G): + """Returns a generator of nodes in topologically sorted order. + + A topological sort is a nonunique permutation of the nodes of a + directed graph such that an edge from u to v implies that u + appears before v in the topological sort order. This ordering is + valid only if the graph has no directed cycles. + + Parameters + ---------- + G : NetworkX digraph + A directed acyclic graph (DAG) + + Yields + ------ + nodes + Yields the nodes in topological sorted order. + + Raises + ------ + NetworkXError + Topological sort is defined for directed graphs only. If the graph `G` + is undirected, a :exc:`NetworkXError` is raised. + + NetworkXUnfeasible + If `G` is not a directed acyclic graph (DAG) no topological sort exists + and a :exc:`NetworkXUnfeasible` exception is raised. This can also be + raised if `G` is changed while the returned iterator is being processed + + RuntimeError + If `G` is changed while the returned iterator is being processed. + + Examples + -------- + To get the reverse order of the topological sort: + + >>> DG = nx.DiGraph([(1, 2), (2, 3)]) + >>> list(reversed(list(nx.topological_sort(DG)))) + [3, 2, 1] + + If your DiGraph naturally has the edges representing tasks/inputs + and nodes representing people/processes that initiate tasks, then + topological_sort is not quite what you need. You will have to change + the tasks to nodes with dependence reflected by edges. The result is + a kind of topological sort of the edges. This can be done + with :func:`networkx.line_graph` as follows: + + >>> list(nx.topological_sort(nx.line_graph(DG))) + [(1, 2), (2, 3)] + + Notes + ----- + This algorithm is based on a description and proof in + "Introduction to Algorithms: A Creative Approach" [1]_ . + + See also + -------- + is_directed_acyclic_graph, lexicographical_topological_sort + + References + ---------- + .. [1] Manber, U. (1989). + *Introduction to Algorithms - A Creative Approach.* Addison-Wesley. + """ + for generation in nx.topological_generations(G): + yield from generation + + +@nx._dispatchable +def lexicographical_topological_sort(G, key=None): + """Generate the nodes in the unique lexicographical topological sort order. + + Generates a unique ordering of nodes by first sorting topologically (for which there are often + multiple valid orderings) and then additionally by sorting lexicographically. + + A topological sort arranges the nodes of a directed graph so that the + upstream node of each directed edge precedes the downstream node. + It is always possible to find a solution for directed graphs that have no cycles. + There may be more than one valid solution. + + Lexicographical sorting is just sorting alphabetically. It is used here to break ties in the + topological sort and to determine a single, unique ordering. This can be useful in comparing + sort results. + + The lexicographical order can be customized by providing a function to the `key=` parameter. + The definition of the key function is the same as used in python's built-in `sort()`. + The function takes a single argument and returns a key to use for sorting purposes. + + Lexicographical sorting can fail if the node names are un-sortable. See the example below. + The solution is to provide a function to the `key=` argument that returns sortable keys. + + + Parameters + ---------- + G : NetworkX digraph + A directed acyclic graph (DAG) + + key : function, optional + A function of one argument that converts a node name to a comparison key. + It defines and resolves ambiguities in the sort order. Defaults to the identity function. + + Yields + ------ + nodes + Yields the nodes of G in lexicographical topological sort order. + + Raises + ------ + NetworkXError + Topological sort is defined for directed graphs only. If the graph `G` + is undirected, a :exc:`NetworkXError` is raised. + + NetworkXUnfeasible + If `G` is not a directed acyclic graph (DAG) no topological sort exists + and a :exc:`NetworkXUnfeasible` exception is raised. This can also be + raised if `G` is changed while the returned iterator is being processed + + RuntimeError + If `G` is changed while the returned iterator is being processed. + + TypeError + Results from un-sortable node names. + Consider using `key=` parameter to resolve ambiguities in the sort order. + + Examples + -------- + >>> DG = nx.DiGraph([(2, 1), (2, 5), (1, 3), (1, 4), (5, 4)]) + >>> list(nx.lexicographical_topological_sort(DG)) + [2, 1, 3, 5, 4] + >>> list(nx.lexicographical_topological_sort(DG, key=lambda x: -x)) + [2, 5, 1, 4, 3] + + The sort will fail for any graph with integer and string nodes. Comparison of integer to strings + is not defined in python. Is 3 greater or less than 'red'? + + >>> DG = nx.DiGraph([(1, "red"), (3, "red"), (1, "green"), (2, "blue")]) + >>> list(nx.lexicographical_topological_sort(DG)) + Traceback (most recent call last): + ... + TypeError: '<' not supported between instances of 'str' and 'int' + ... + + Incomparable nodes can be resolved using a `key` function. This example function + allows comparison of integers and strings by returning a tuple where the first + element is True for `str`, False otherwise. The second element is the node name. + This groups the strings and integers separately so they can be compared only among themselves. + + >>> key = lambda node: (isinstance(node, str), node) + >>> list(nx.lexicographical_topological_sort(DG, key=key)) + [1, 2, 3, 'blue', 'green', 'red'] + + Notes + ----- + This algorithm is based on a description and proof in + "Introduction to Algorithms: A Creative Approach" [1]_ . + + See also + -------- + topological_sort + + References + ---------- + .. [1] Manber, U. (1989). + *Introduction to Algorithms - A Creative Approach.* Addison-Wesley. + """ + if not G.is_directed(): + msg = "Topological sort not defined on undirected graphs." + raise nx.NetworkXError(msg) + + if key is None: + + def key(node): + return node + + nodeid_map = {n: i for i, n in enumerate(G)} + + def create_tuple(node): + return key(node), nodeid_map[node], node + + indegree_map = {v: d for v, d in G.in_degree() if d > 0} + # These nodes have zero indegree and ready to be returned. + zero_indegree = [create_tuple(v) for v, d in G.in_degree() if d == 0] + heapq.heapify(zero_indegree) + + while zero_indegree: + _, _, node = heapq.heappop(zero_indegree) + + if node not in G: + raise RuntimeError("Graph changed during iteration") + for _, child in G.edges(node): + try: + indegree_map[child] -= 1 + except KeyError as err: + raise RuntimeError("Graph changed during iteration") from err + if indegree_map[child] == 0: + try: + heapq.heappush(zero_indegree, create_tuple(child)) + except TypeError as err: + raise TypeError( + f"{err}\nConsider using `key=` parameter to resolve ambiguities in the sort order." + ) + del indegree_map[child] + + yield node + + if indegree_map: + msg = "Graph contains a cycle or graph changed during iteration" + raise nx.NetworkXUnfeasible(msg) + + +@not_implemented_for("undirected") +@nx._dispatchable +def all_topological_sorts(G): + """Returns a generator of _all_ topological sorts of the directed graph G. + + A topological sort is a nonunique permutation of the nodes such that an + edge from u to v implies that u appears before v in the topological sort + order. + + Parameters + ---------- + G : NetworkX DiGraph + A directed graph + + Yields + ------ + topological_sort_order : list + a list of nodes in `G`, representing one of the topological sort orders + + Raises + ------ + NetworkXNotImplemented + If `G` is not directed + NetworkXUnfeasible + If `G` is not acyclic + + Examples + -------- + To enumerate all topological sorts of directed graph: + + >>> DG = nx.DiGraph([(1, 2), (2, 3), (2, 4)]) + >>> list(nx.all_topological_sorts(DG)) + [[1, 2, 4, 3], [1, 2, 3, 4]] + + Notes + ----- + Implements an iterative version of the algorithm given in [1]. + + References + ---------- + .. [1] Knuth, Donald E., Szwarcfiter, Jayme L. (1974). + "A Structured Program to Generate All Topological Sorting Arrangements" + Information Processing Letters, Volume 2, Issue 6, 1974, Pages 153-157, + ISSN 0020-0190, + https://doi.org/10.1016/0020-0190(74)90001-5. + Elsevier (North-Holland), Amsterdam + """ + if not G.is_directed(): + raise nx.NetworkXError("Topological sort not defined on undirected graphs.") + + # the names of count and D are chosen to match the global variables in [1] + # number of edges originating in a vertex v + count = dict(G.in_degree()) + # vertices with indegree 0 + D = deque([v for v, d in G.in_degree() if d == 0]) + # stack of first value chosen at a position k in the topological sort + bases = [] + current_sort = [] + + # do-while construct + while True: + assert all(count[v] == 0 for v in D) + + if len(current_sort) == len(G): + yield list(current_sort) + + # clean-up stack + while len(current_sort) > 0: + assert len(bases) == len(current_sort) + q = current_sort.pop() + + # "restores" all edges (q, x) + # NOTE: it is important to iterate over edges instead + # of successors, so count is updated correctly in multigraphs + for _, j in G.out_edges(q): + count[j] += 1 + assert count[j] >= 0 + # remove entries from D + while len(D) > 0 and count[D[-1]] > 0: + D.pop() + + # corresponds to a circular shift of the values in D + # if the first value chosen (the base) is in the first + # position of D again, we are done and need to consider the + # previous condition + D.appendleft(q) + if D[-1] == bases[-1]: + # all possible values have been chosen at current position + # remove corresponding marker + bases.pop() + else: + # there are still elements that have not been fixed + # at the current position in the topological sort + # stop removing elements, escape inner loop + break + + else: + if len(D) == 0: + raise nx.NetworkXUnfeasible("Graph contains a cycle.") + + # choose next node + q = D.pop() + # "erase" all edges (q, x) + # NOTE: it is important to iterate over edges instead + # of successors, so count is updated correctly in multigraphs + for _, j in G.out_edges(q): + count[j] -= 1 + assert count[j] >= 0 + if count[j] == 0: + D.append(j) + current_sort.append(q) + + # base for current position might _not_ be fixed yet + if len(bases) < len(current_sort): + bases.append(q) + + if len(bases) == 0: + break + + +@nx._dispatchable +def is_aperiodic(G): + """Returns True if `G` is aperiodic. + + A directed graph is aperiodic if there is no integer k > 1 that + divides the length of every cycle in the graph. + + Parameters + ---------- + G : NetworkX DiGraph + A directed graph + + Returns + ------- + bool + True if the graph is aperiodic False otherwise + + Raises + ------ + NetworkXError + If `G` is not directed + + Examples + -------- + A graph consisting of one cycle, the length of which is 2. Therefore ``k = 2`` + divides the length of every cycle in the graph and thus the graph + is *not aperiodic*:: + + >>> DG = nx.DiGraph([(1, 2), (2, 1)]) + >>> nx.is_aperiodic(DG) + False + + A graph consisting of two cycles: one of length 2 and the other of length 3. + The cycle lengths are coprime, so there is no single value of k where ``k > 1`` + that divides each cycle length and therefore the graph is *aperiodic*:: + + >>> DG = nx.DiGraph([(1, 2), (2, 3), (3, 1), (1, 4), (4, 1)]) + >>> nx.is_aperiodic(DG) + True + + A graph consisting of two cycles: one of length 2 and the other of length 4. + The lengths of the cycles share a common factor ``k = 2``, and therefore + the graph is *not aperiodic*:: + + >>> DG = nx.DiGraph([(1, 2), (2, 1), (3, 4), (4, 5), (5, 6), (6, 3)]) + >>> nx.is_aperiodic(DG) + False + + An acyclic graph, therefore the graph is *not aperiodic*:: + + >>> DG = nx.DiGraph([(1, 2), (2, 3)]) + >>> nx.is_aperiodic(DG) + False + + Notes + ----- + This uses the method outlined in [1]_, which runs in $O(m)$ time + given $m$ edges in `G`. Note that a graph is not aperiodic if it is + acyclic as every integer trivial divides length 0 cycles. + + References + ---------- + .. [1] Jarvis, J. P.; Shier, D. R. (1996), + "Graph-theoretic analysis of finite Markov chains," + in Shier, D. R.; Wallenius, K. T., Applied Mathematical Modeling: + A Multidisciplinary Approach, CRC Press. + """ + if not G.is_directed(): + raise nx.NetworkXError("is_aperiodic not defined for undirected graphs") + if len(G) == 0: + raise nx.NetworkXPointlessConcept("Graph has no nodes.") + s = arbitrary_element(G) + levels = {s: 0} + this_level = [s] + g = 0 + lev = 1 + while this_level: + next_level = [] + for u in this_level: + for v in G[u]: + if v in levels: # Non-Tree Edge + g = gcd(g, levels[u] - levels[v] + 1) + else: # Tree Edge + next_level.append(v) + levels[v] = lev + this_level = next_level + lev += 1 + if len(levels) == len(G): # All nodes in tree + return g == 1 + else: + return g == 1 and nx.is_aperiodic(G.subgraph(set(G) - set(levels))) + + +@nx._dispatchable(preserve_all_attrs=True, returns_graph=True) +def transitive_closure(G, reflexive=False): + """Returns transitive closure of a graph + + The transitive closure of G = (V,E) is a graph G+ = (V,E+) such that + for all v, w in V there is an edge (v, w) in E+ if and only if there + is a path from v to w in G. + + Handling of paths from v to v has some flexibility within this definition. + A reflexive transitive closure creates a self-loop for the path + from v to v of length 0. The usual transitive closure creates a + self-loop only if a cycle exists (a path from v to v with length > 0). + We also allow an option for no self-loops. + + Parameters + ---------- + G : NetworkX Graph + A directed/undirected graph/multigraph. + reflexive : Bool or None, optional (default: False) + Determines when cycles create self-loops in the Transitive Closure. + If True, trivial cycles (length 0) create self-loops. The result + is a reflexive transitive closure of G. + If False (the default) non-trivial cycles create self-loops. + If None, self-loops are not created. + + Returns + ------- + NetworkX graph + The transitive closure of `G` + + Raises + ------ + NetworkXError + If `reflexive` not in `{None, True, False}` + + Examples + -------- + The treatment of trivial (i.e. length 0) cycles is controlled by the + `reflexive` parameter. + + Trivial (i.e. length 0) cycles do not create self-loops when + ``reflexive=False`` (the default):: + + >>> DG = nx.DiGraph([(1, 2), (2, 3)]) + >>> TC = nx.transitive_closure(DG, reflexive=False) + >>> TC.edges() + OutEdgeView([(1, 2), (1, 3), (2, 3)]) + + However, nontrivial (i.e. length greater than 0) cycles create self-loops + when ``reflexive=False`` (the default):: + + >>> DG = nx.DiGraph([(1, 2), (2, 3), (3, 1)]) + >>> TC = nx.transitive_closure(DG, reflexive=False) + >>> TC.edges() + OutEdgeView([(1, 2), (1, 3), (1, 1), (2, 3), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)]) + + Trivial cycles (length 0) create self-loops when ``reflexive=True``:: + + >>> DG = nx.DiGraph([(1, 2), (2, 3)]) + >>> TC = nx.transitive_closure(DG, reflexive=True) + >>> TC.edges() + OutEdgeView([(1, 2), (1, 1), (1, 3), (2, 3), (2, 2), (3, 3)]) + + And the third option is not to create self-loops at all when ``reflexive=None``:: + + >>> DG = nx.DiGraph([(1, 2), (2, 3), (3, 1)]) + >>> TC = nx.transitive_closure(DG, reflexive=None) + >>> TC.edges() + OutEdgeView([(1, 2), (1, 3), (2, 3), (2, 1), (3, 1), (3, 2)]) + + References + ---------- + .. [1] https://www.ics.uci.edu/~eppstein/PADS/PartialOrder.py + """ + TC = G.copy() + + if reflexive not in {None, True, False}: + raise nx.NetworkXError("Incorrect value for the parameter `reflexive`") + + for v in G: + if reflexive is None: + TC.add_edges_from((v, u) for u in nx.descendants(G, v) if u not in TC[v]) + elif reflexive is True: + TC.add_edges_from( + (v, u) for u in nx.descendants(G, v) | {v} if u not in TC[v] + ) + elif reflexive is False: + TC.add_edges_from((v, e[1]) for e in nx.edge_bfs(G, v) if e[1] not in TC[v]) + + return TC + + +@not_implemented_for("undirected") +@nx._dispatchable(preserve_all_attrs=True, returns_graph=True) +def transitive_closure_dag(G, topo_order=None): + """Returns the transitive closure of a directed acyclic graph. + + This function is faster than the function `transitive_closure`, but fails + if the graph has a cycle. + + The transitive closure of G = (V,E) is a graph G+ = (V,E+) such that + for all v, w in V there is an edge (v, w) in E+ if and only if there + is a non-null path from v to w in G. + + Parameters + ---------- + G : NetworkX DiGraph + A directed acyclic graph (DAG) + + topo_order: list or tuple, optional + A topological order for G (if None, the function will compute one) + + Returns + ------- + NetworkX DiGraph + The transitive closure of `G` + + Raises + ------ + NetworkXNotImplemented + If `G` is not directed + NetworkXUnfeasible + If `G` has a cycle + + Examples + -------- + >>> DG = nx.DiGraph([(1, 2), (2, 3)]) + >>> TC = nx.transitive_closure_dag(DG) + >>> TC.edges() + OutEdgeView([(1, 2), (1, 3), (2, 3)]) + + Notes + ----- + This algorithm is probably simple enough to be well-known but I didn't find + a mention in the literature. + """ + if topo_order is None: + topo_order = list(topological_sort(G)) + + TC = G.copy() + + # idea: traverse vertices following a reverse topological order, connecting + # each vertex to its descendants at distance 2 as we go + for v in reversed(topo_order): + TC.add_edges_from((v, u) for u in nx.descendants_at_distance(TC, v, 2)) + + return TC + + +@not_implemented_for("undirected") +@nx._dispatchable(returns_graph=True) +def transitive_reduction(G): + """Returns transitive reduction of a directed graph + + The transitive reduction of G = (V,E) is a graph G- = (V,E-) such that + for all v,w in V there is an edge (v,w) in E- if and only if (v,w) is + in E and there is no path from v to w in G with length greater than 1. + + Parameters + ---------- + G : NetworkX DiGraph + A directed acyclic graph (DAG) + + Returns + ------- + NetworkX DiGraph + The transitive reduction of `G` + + Raises + ------ + NetworkXError + If `G` is not a directed acyclic graph (DAG) transitive reduction is + not uniquely defined and a :exc:`NetworkXError` exception is raised. + + Examples + -------- + To perform transitive reduction on a DiGraph: + + >>> DG = nx.DiGraph([(1, 2), (2, 3), (1, 3)]) + >>> TR = nx.transitive_reduction(DG) + >>> list(TR.edges) + [(1, 2), (2, 3)] + + To avoid unnecessary data copies, this implementation does not return a + DiGraph with node/edge data. + To perform transitive reduction on a DiGraph and transfer node/edge data: + + >>> DG = nx.DiGraph() + >>> DG.add_edges_from([(1, 2), (2, 3), (1, 3)], color="red") + >>> TR = nx.transitive_reduction(DG) + >>> TR.add_nodes_from(DG.nodes(data=True)) + >>> TR.add_edges_from((u, v, DG.edges[u, v]) for u, v in TR.edges) + >>> list(TR.edges(data=True)) + [(1, 2, {'color': 'red'}), (2, 3, {'color': 'red'})] + + References + ---------- + https://en.wikipedia.org/wiki/Transitive_reduction + + """ + if not is_directed_acyclic_graph(G): + msg = "Directed Acyclic Graph required for transitive_reduction" + raise nx.NetworkXError(msg) + TR = nx.DiGraph() + TR.add_nodes_from(G.nodes()) + descendants = {} + # count before removing set stored in descendants + check_count = dict(G.in_degree) + for u in G: + u_nbrs = set(G[u]) + for v in G[u]: + if v in u_nbrs: + if v not in descendants: + descendants[v] = {y for x, y in nx.dfs_edges(G, v)} + u_nbrs -= descendants[v] + check_count[v] -= 1 + if check_count[v] == 0: + del descendants[v] + TR.add_edges_from((u, v) for v in u_nbrs) + return TR + + +@not_implemented_for("undirected") +@nx._dispatchable +def antichains(G, topo_order=None): + """Generates antichains from a directed acyclic graph (DAG). + + An antichain is a subset of a partially ordered set such that any + two elements in the subset are incomparable. + + Parameters + ---------- + G : NetworkX DiGraph + A directed acyclic graph (DAG) + + topo_order: list or tuple, optional + A topological order for G (if None, the function will compute one) + + Yields + ------ + antichain : list + a list of nodes in `G` representing an antichain + + Raises + ------ + NetworkXNotImplemented + If `G` is not directed + + NetworkXUnfeasible + If `G` contains a cycle + + Examples + -------- + >>> DG = nx.DiGraph([(1, 2), (1, 3)]) + >>> list(nx.antichains(DG)) + [[], [3], [2], [2, 3], [1]] + + Notes + ----- + This function was originally developed by Peter Jipsen and Franco Saliola + for the SAGE project. It's included in NetworkX with permission from the + authors. Original SAGE code at: + + https://github.com/sagemath/sage/blob/master/src/sage/combinat/posets/hasse_diagram.py + + References + ---------- + .. [1] Free Lattices, by R. Freese, J. Jezek and J. B. Nation, + AMS, Vol 42, 1995, p. 226. + """ + if topo_order is None: + topo_order = list(nx.topological_sort(G)) + + TC = nx.transitive_closure_dag(G, topo_order) + antichains_stacks = [([], list(reversed(topo_order)))] + + while antichains_stacks: + (antichain, stack) = antichains_stacks.pop() + # Invariant: + # - the elements of antichain are independent + # - the elements of stack are independent from those of antichain + yield antichain + while stack: + x = stack.pop() + new_antichain = antichain + [x] + new_stack = [t for t in stack if not ((t in TC[x]) or (x in TC[t]))] + antichains_stacks.append((new_antichain, new_stack)) + + +@not_implemented_for("undirected") +@nx._dispatchable(edge_attrs={"weight": "default_weight"}) +def dag_longest_path(G, weight="weight", default_weight=1, topo_order=None): + """Returns the longest path in a directed acyclic graph (DAG). + + If `G` has edges with `weight` attribute the edge data are used as + weight values. + + Parameters + ---------- + G : NetworkX DiGraph + A directed acyclic graph (DAG) + + weight : str, optional + Edge data key to use for weight + + default_weight : int, optional + The weight of edges that do not have a weight attribute + + topo_order: list or tuple, optional + A topological order for `G` (if None, the function will compute one) + + Returns + ------- + list + Longest path + + Raises + ------ + NetworkXNotImplemented + If `G` is not directed + + Examples + -------- + >>> DG = nx.DiGraph( + ... [(0, 1, {"cost": 1}), (1, 2, {"cost": 1}), (0, 2, {"cost": 42})] + ... ) + >>> list(nx.all_simple_paths(DG, 0, 2)) + [[0, 1, 2], [0, 2]] + >>> nx.dag_longest_path(DG) + [0, 1, 2] + >>> nx.dag_longest_path(DG, weight="cost") + [0, 2] + + In the case where multiple valid topological orderings exist, `topo_order` + can be used to specify a specific ordering: + + >>> DG = nx.DiGraph([(0, 1), (0, 2)]) + >>> sorted(nx.all_topological_sorts(DG)) # Valid topological orderings + [[0, 1, 2], [0, 2, 1]] + >>> nx.dag_longest_path(DG, topo_order=[0, 1, 2]) + [0, 1] + >>> nx.dag_longest_path(DG, topo_order=[0, 2, 1]) + [0, 2] + + See also + -------- + dag_longest_path_length + + """ + if not G: + return [] + + if topo_order is None: + topo_order = nx.topological_sort(G) + + dist = {} # stores {v : (length, u)} + for v in topo_order: + us = [ + ( + dist[u][0] + + ( + max(data.values(), key=lambda x: x.get(weight, default_weight)) + if G.is_multigraph() + else data + ).get(weight, default_weight), + u, + ) + for u, data in G.pred[v].items() + ] + + # Use the best predecessor if there is one and its distance is + # non-negative, otherwise terminate. + maxu = max(us, key=lambda x: x[0]) if us else (0, v) + dist[v] = maxu if maxu[0] >= 0 else (0, v) + + u = None + v = max(dist, key=lambda x: dist[x][0]) + path = [] + while u != v: + path.append(v) + u = v + v = dist[v][1] + + path.reverse() + return path + + +@not_implemented_for("undirected") +@nx._dispatchable(edge_attrs={"weight": "default_weight"}) +def dag_longest_path_length(G, weight="weight", default_weight=1): + """Returns the longest path length in a DAG + + Parameters + ---------- + G : NetworkX DiGraph + A directed acyclic graph (DAG) + + weight : string, optional + Edge data key to use for weight + + default_weight : int, optional + The weight of edges that do not have a weight attribute + + Returns + ------- + int + Longest path length + + Raises + ------ + NetworkXNotImplemented + If `G` is not directed + + Examples + -------- + >>> DG = nx.DiGraph( + ... [(0, 1, {"cost": 1}), (1, 2, {"cost": 1}), (0, 2, {"cost": 42})] + ... ) + >>> list(nx.all_simple_paths(DG, 0, 2)) + [[0, 1, 2], [0, 2]] + >>> nx.dag_longest_path_length(DG) + 2 + >>> nx.dag_longest_path_length(DG, weight="cost") + 42 + + See also + -------- + dag_longest_path + """ + path = nx.dag_longest_path(G, weight, default_weight) + path_length = 0 + if G.is_multigraph(): + for u, v in pairwise(path): + i = max(G[u][v], key=lambda x: G[u][v][x].get(weight, default_weight)) + path_length += G[u][v][i].get(weight, default_weight) + else: + for u, v in pairwise(path): + path_length += G[u][v].get(weight, default_weight) + + return path_length + + +@nx._dispatchable +def root_to_leaf_paths(G): + """Yields root-to-leaf paths in a directed acyclic graph. + + `G` must be a directed acyclic graph. If not, the behavior of this + function is undefined. A "root" in this graph is a node of in-degree + zero and a "leaf" a node of out-degree zero. + + When invoked, this function iterates over each path from any root to + any leaf. A path is a list of nodes. + + """ + roots = (v for v, d in G.in_degree() if d == 0) + leaves = (v for v, d in G.out_degree() if d == 0) + all_paths = partial(nx.all_simple_paths, G) + # TODO In Python 3, this would be better as `yield from ...`. + return chaini(starmap(all_paths, product(roots, leaves))) + + +@not_implemented_for("multigraph") +@not_implemented_for("undirected") +@nx._dispatchable(returns_graph=True) +def dag_to_branching(G): + """Returns a branching representing all (overlapping) paths from + root nodes to leaf nodes in the given directed acyclic graph. + + As described in :mod:`networkx.algorithms.tree.recognition`, a + *branching* is a directed forest in which each node has at most one + parent. In other words, a branching is a disjoint union of + *arborescences*. For this function, each node of in-degree zero in + `G` becomes a root of one of the arborescences, and there will be + one leaf node for each distinct path from that root to a leaf node + in `G`. + + Each node `v` in `G` with *k* parents becomes *k* distinct nodes in + the returned branching, one for each parent, and the sub-DAG rooted + at `v` is duplicated for each copy. The algorithm then recurses on + the children of each copy of `v`. + + Parameters + ---------- + G : NetworkX graph + A directed acyclic graph. + + Returns + ------- + DiGraph + The branching in which there is a bijection between root-to-leaf + paths in `G` (in which multiple paths may share the same leaf) + and root-to-leaf paths in the branching (in which there is a + unique path from a root to a leaf). + + Each node has an attribute 'source' whose value is the original + node to which this node corresponds. No other graph, node, or + edge attributes are copied into this new graph. + + Raises + ------ + NetworkXNotImplemented + If `G` is not directed, or if `G` is a multigraph. + + HasACycle + If `G` is not acyclic. + + Examples + -------- + To examine which nodes in the returned branching were produced by + which original node in the directed acyclic graph, we can collect + the mapping from source node to new nodes into a dictionary. For + example, consider the directed diamond graph:: + + >>> from collections import defaultdict + >>> from operator import itemgetter + >>> + >>> G = nx.DiGraph(nx.utils.pairwise("abd")) + >>> G.add_edges_from(nx.utils.pairwise("acd")) + >>> B = nx.dag_to_branching(G) + >>> + >>> sources = defaultdict(set) + >>> for v, source in B.nodes(data="source"): + ... sources[source].add(v) + >>> len(sources["a"]) + 1 + >>> len(sources["d"]) + 2 + + To copy node attributes from the original graph to the new graph, + you can use a dictionary like the one constructed in the above + example:: + + >>> for source, nodes in sources.items(): + ... for v in nodes: + ... B.nodes[v].update(G.nodes[source]) + + Notes + ----- + This function is not idempotent in the sense that the node labels in + the returned branching may be uniquely generated each time the + function is invoked. In fact, the node labels may not be integers; + in order to relabel the nodes to be more readable, you can use the + :func:`networkx.convert_node_labels_to_integers` function. + + The current implementation of this function uses + :func:`networkx.prefix_tree`, so it is subject to the limitations of + that function. + + """ + if has_cycle(G): + msg = "dag_to_branching is only defined for acyclic graphs" + raise nx.HasACycle(msg) + paths = root_to_leaf_paths(G) + B = nx.prefix_tree(paths) + # Remove the synthetic `root`(0) and `NIL`(-1) nodes from the tree + B.remove_node(0) + B.remove_node(-1) + return B + + +@not_implemented_for("undirected") +@nx._dispatchable +def compute_v_structures(G): + """Yields 3-node tuples that represent the v-structures in `G`. + + .. deprecated:: 3.4 + + `compute_v_structures` actually yields colliders. It will be removed in + version 3.6. Use `nx.dag.v_structures` or `nx.dag.colliders` instead. + + Colliders are triples in the directed acyclic graph (DAG) where two parent nodes + point to the same child node. V-structures are colliders where the two parent + nodes are not adjacent. In a causal graph setting, the parents do not directly + depend on each other, but conditioning on the child node provides an association. + + Parameters + ---------- + G : graph + A networkx `~networkx.DiGraph`. + + Yields + ------ + A 3-tuple representation of a v-structure + Each v-structure is a 3-tuple with the parent, collider, and other parent. + + Raises + ------ + NetworkXNotImplemented + If `G` is an undirected graph. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (0, 4), (3, 1), (2, 4), (0, 5), (4, 5), (1, 5)]) + >>> nx.is_directed_acyclic_graph(G) + True + >>> list(nx.compute_v_structures(G)) + [(0, 4, 2), (0, 5, 4), (0, 5, 1), (4, 5, 1)] + + See Also + -------- + v_structures + colliders + + Notes + ----- + This function was written to be used on DAGs, however it works on cyclic graphs + too. Since colliders are referred to in the cyclic causal graph literature + [2]_ we allow cyclic graphs in this function. It is suggested that you test if + your input graph is acyclic as in the example if you want that property. + + References + ---------- + .. [1] `Pearl's PRIMER `_ + Ch-2 page 50: v-structures def. + .. [2] A Hyttinen, P.O. Hoyer, F. Eberhardt, M J ̈arvisalo, (2013) + "Discovering cyclic causal models with latent variables: + a general SAT-based procedure", UAI'13: Proceedings of the Twenty-Ninth + Conference on Uncertainty in Artificial Intelligence, pg 301–310, + `doi:10.5555/3023638.3023669 `_ + """ + import warnings + + warnings.warn( + ( + "\n\n`compute_v_structures` actually yields colliders. It will be\n" + "removed in version 3.6. Use `nx.dag.v_structures` or `nx.dag.colliders`\n" + "instead.\n" + ), + category=DeprecationWarning, + stacklevel=5, + ) + + return colliders(G) + + +@not_implemented_for("undirected") +@nx._dispatchable +def v_structures(G): + """Yields 3-node tuples that represent the v-structures in `G`. + + Colliders are triples in the directed acyclic graph (DAG) where two parent nodes + point to the same child node. V-structures are colliders where the two parent + nodes are not adjacent. In a causal graph setting, the parents do not directly + depend on each other, but conditioning on the child node provides an association. + + Parameters + ---------- + G : graph + A networkx `~networkx.DiGraph`. + + Yields + ------ + A 3-tuple representation of a v-structure + Each v-structure is a 3-tuple with the parent, collider, and other parent. + + Raises + ------ + NetworkXNotImplemented + If `G` is an undirected graph. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (0, 4), (3, 1), (2, 4), (0, 5), (4, 5), (1, 5)]) + >>> nx.is_directed_acyclic_graph(G) + True + >>> list(nx.dag.v_structures(G)) + [(0, 4, 2), (0, 5, 1), (4, 5, 1)] + + See Also + -------- + colliders + + Notes + ----- + This function was written to be used on DAGs, however it works on cyclic graphs + too. Since colliders are referred to in the cyclic causal graph literature + [2]_ we allow cyclic graphs in this function. It is suggested that you test if + your input graph is acyclic as in the example if you want that property. + + References + ---------- + .. [1] `Pearl's PRIMER `_ + Ch-2 page 50: v-structures def. + .. [2] A Hyttinen, P.O. Hoyer, F. Eberhardt, M J ̈arvisalo, (2013) + "Discovering cyclic causal models with latent variables: + a general SAT-based procedure", UAI'13: Proceedings of the Twenty-Ninth + Conference on Uncertainty in Artificial Intelligence, pg 301–310, + `doi:10.5555/3023638.3023669 `_ + """ + for p1, c, p2 in colliders(G): + if not (G.has_edge(p1, p2) or G.has_edge(p2, p1)): + yield (p1, c, p2) + + +@not_implemented_for("undirected") +@nx._dispatchable +def colliders(G): + """Yields 3-node tuples that represent the colliders in `G`. + + In a Directed Acyclic Graph (DAG), if you have three nodes A, B, and C, and + there are edges from A to C and from B to C, then C is a collider [1]_ . In + a causal graph setting, this means that both events A and B are "causing" C, + and conditioning on C provide an association between A and B even if + no direct causal relationship exists between A and B. + + Parameters + ---------- + G : graph + A networkx `~networkx.DiGraph`. + + Yields + ------ + A 3-tuple representation of a collider + Each collider is a 3-tuple with the parent, collider, and other parent. + + Raises + ------ + NetworkXNotImplemented + If `G` is an undirected graph. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (0, 4), (3, 1), (2, 4), (0, 5), (4, 5), (1, 5)]) + >>> nx.is_directed_acyclic_graph(G) + True + >>> list(nx.dag.colliders(G)) + [(0, 4, 2), (0, 5, 4), (0, 5, 1), (4, 5, 1)] + + See Also + -------- + v_structures + + Notes + ----- + This function was written to be used on DAGs, however it works on cyclic graphs + too. Since colliders are referred to in the cyclic causal graph literature + [2]_ we allow cyclic graphs in this function. It is suggested that you test if + your input graph is acyclic as in the example if you want that property. + + References + ---------- + .. [1] `Wikipedia: Collider in causal graphs `_ + .. [2] A Hyttinen, P.O. Hoyer, F. Eberhardt, M J ̈arvisalo, (2013) + "Discovering cyclic causal models with latent variables: + a general SAT-based procedure", UAI'13: Proceedings of the Twenty-Ninth + Conference on Uncertainty in Artificial Intelligence, pg 301–310, + `doi:10.5555/3023638.3023669 `_ + """ + for node in G.nodes: + for p1, p2 in combinations(G.predecessors(node), 2): + yield (p1, node, p2) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/distance_measures.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/distance_measures.py new file mode 100644 index 0000000000000000000000000000000000000000..8e15bf8d9205a96c1faaf73ee0a0d005541a7840 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/distance_measures.py @@ -0,0 +1,1022 @@ +"""Graph diameter, radius, eccentricity and other properties.""" + +import math + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = [ + "eccentricity", + "diameter", + "harmonic_diameter", + "radius", + "periphery", + "center", + "barycenter", + "resistance_distance", + "kemeny_constant", + "effective_graph_resistance", +] + + +def _extrema_bounding(G, compute="diameter", weight=None): + """Compute requested extreme distance metric of undirected graph G + + Computation is based on smart lower and upper bounds, and in practice + linear in the number of nodes, rather than quadratic (except for some + border cases such as complete graphs or circle shaped graphs). + + Parameters + ---------- + G : NetworkX graph + An undirected graph + + compute : string denoting the requesting metric + "diameter" for the maximal eccentricity value, + "radius" for the minimal eccentricity value, + "periphery" for the set of nodes with eccentricity equal to the diameter, + "center" for the set of nodes with eccentricity equal to the radius, + "eccentricities" for the maximum distance from each node to all other nodes in G + + weight : string, function, or None + If this is a string, then edge weights will be accessed via the + edge attribute with this key (that is, the weight of the edge + joining `u` to `v` will be ``G.edges[u, v][weight]``). If no + such edge attribute exists, the weight of the edge is assumed to + be one. + + If this is a function, the weight of an edge is the value + returned by the function. The function must accept exactly three + positional arguments: the two endpoints of an edge and the + dictionary of edge attributes for that edge. The function must + return a number. + + If this is None, every edge has weight/distance/cost 1. + + Weights stored as floating point values can lead to small round-off + errors in distances. Use integer weights to avoid this. + + Weights should be positive, since they are distances. + + Returns + ------- + value : value of the requested metric + int for "diameter" and "radius" or + list of nodes for "center" and "periphery" or + dictionary of eccentricity values keyed by node for "eccentricities" + + Raises + ------ + NetworkXError + If the graph consists of multiple components + ValueError + If `compute` is not one of "diameter", "radius", "periphery", "center", or "eccentricities". + + Notes + ----- + This algorithm was proposed in [1]_ and discussed further in [2]_ and [3]_. + + References + ---------- + .. [1] F. W. Takes, W. A. Kosters, + "Determining the diameter of small world networks." + Proceedings of the 20th ACM international conference on Information and knowledge management, 2011 + https://dl.acm.org/doi/abs/10.1145/2063576.2063748 + .. [2] F. W. Takes, W. A. Kosters, + "Computing the Eccentricity Distribution of Large Graphs." + Algorithms, 2013 + https://www.mdpi.com/1999-4893/6/1/100 + .. [3] M. Borassi, P. Crescenzi, M. Habib, W. A. Kosters, A. Marino, F. W. Takes, + "Fast diameter and radius BFS-based computation in (weakly connected) real-world graphs: With an application to the six degrees of separation games. " + Theoretical Computer Science, 2015 + https://www.sciencedirect.com/science/article/pii/S0304397515001644 + """ + # init variables + degrees = dict(G.degree()) # start with the highest degree node + minlowernode = max(degrees, key=degrees.get) + N = len(degrees) # number of nodes + # alternate between smallest lower and largest upper bound + high = False + # status variables + ecc_lower = dict.fromkeys(G, 0) + ecc_upper = dict.fromkeys(G, N) + candidates = set(G) + + # (re)set bound extremes + minlower = N + maxlower = 0 + minupper = N + maxupper = 0 + + # repeat the following until there are no more candidates + while candidates: + if high: + current = maxuppernode # select node with largest upper bound + else: + current = minlowernode # select node with smallest lower bound + high = not high + + # get distances from/to current node and derive eccentricity + dist = nx.shortest_path_length(G, source=current, weight=weight) + + if len(dist) != N: + msg = "Cannot compute metric because graph is not connected." + raise nx.NetworkXError(msg) + current_ecc = max(dist.values()) + + # print status update + # print ("ecc of " + str(current) + " (" + str(ecc_lower[current]) + "/" + # + str(ecc_upper[current]) + ", deg: " + str(dist[current]) + ") is " + # + str(current_ecc)) + # print(ecc_upper) + + # (re)set bound extremes + maxuppernode = None + minlowernode = None + + # update node bounds + for i in candidates: + # update eccentricity bounds + d = dist[i] + ecc_lower[i] = low = max(ecc_lower[i], max(d, (current_ecc - d))) + ecc_upper[i] = upp = min(ecc_upper[i], current_ecc + d) + + # update min/max values of lower and upper bounds + minlower = min(ecc_lower[i], minlower) + maxlower = max(ecc_lower[i], maxlower) + minupper = min(ecc_upper[i], minupper) + maxupper = max(ecc_upper[i], maxupper) + + # update candidate set + if compute == "diameter": + ruled_out = { + i + for i in candidates + if ecc_upper[i] <= maxlower and 2 * ecc_lower[i] >= maxupper + } + elif compute == "radius": + ruled_out = { + i + for i in candidates + if ecc_lower[i] >= minupper and ecc_upper[i] + 1 <= 2 * minlower + } + elif compute == "periphery": + ruled_out = { + i + for i in candidates + if ecc_upper[i] < maxlower + and (maxlower == maxupper or ecc_lower[i] > maxupper) + } + elif compute == "center": + ruled_out = { + i + for i in candidates + if ecc_lower[i] > minupper + and (minlower == minupper or ecc_upper[i] + 1 < 2 * minlower) + } + elif compute == "eccentricities": + ruled_out = set() + else: + msg = "compute must be one of 'diameter', 'radius', 'periphery', 'center', 'eccentricities'" + raise ValueError(msg) + + ruled_out.update(i for i in candidates if ecc_lower[i] == ecc_upper[i]) + candidates -= ruled_out + + # for i in ruled_out: + # print("removing %g: ecc_u: %g maxl: %g ecc_l: %g maxu: %g"% + # (i,ecc_upper[i],maxlower,ecc_lower[i],maxupper)) + # print("node %g: ecc_u: %g maxl: %g ecc_l: %g maxu: %g"% + # (4,ecc_upper[4],maxlower,ecc_lower[4],maxupper)) + # print("NODE 4: %g"%(ecc_upper[4] <= maxlower)) + # print("NODE 4: %g"%(2 * ecc_lower[4] >= maxupper)) + # print("NODE 4: %g"%(ecc_upper[4] <= maxlower + # and 2 * ecc_lower[4] >= maxupper)) + + # updating maxuppernode and minlowernode for selection in next round + for i in candidates: + if ( + minlowernode is None + or ( + ecc_lower[i] == ecc_lower[minlowernode] + and degrees[i] > degrees[minlowernode] + ) + or (ecc_lower[i] < ecc_lower[minlowernode]) + ): + minlowernode = i + + if ( + maxuppernode is None + or ( + ecc_upper[i] == ecc_upper[maxuppernode] + and degrees[i] > degrees[maxuppernode] + ) + or (ecc_upper[i] > ecc_upper[maxuppernode]) + ): + maxuppernode = i + + # print status update + # print (" min=" + str(minlower) + "/" + str(minupper) + + # " max=" + str(maxlower) + "/" + str(maxupper) + + # " candidates: " + str(len(candidates))) + # print("cand:",candidates) + # print("ecc_l",ecc_lower) + # print("ecc_u",ecc_upper) + # wait = input("press Enter to continue") + + # return the correct value of the requested metric + if compute == "diameter": + return maxlower + if compute == "radius": + return minupper + if compute == "periphery": + p = [v for v in G if ecc_lower[v] == maxlower] + return p + if compute == "center": + c = [v for v in G if ecc_upper[v] == minupper] + return c + if compute == "eccentricities": + return ecc_lower + return None + + +@nx._dispatchable(edge_attrs="weight") +def eccentricity(G, v=None, sp=None, weight=None): + """Returns the eccentricity of nodes in G. + + The eccentricity of a node v is the maximum distance from v to + all other nodes in G. + + Parameters + ---------- + G : NetworkX graph + A graph + + v : node, optional + Return value of specified node + + sp : dict of dicts, optional + All pairs shortest path lengths as a dictionary of dictionaries + + weight : string, function, or None (default=None) + If this is a string, then edge weights will be accessed via the + edge attribute with this key (that is, the weight of the edge + joining `u` to `v` will be ``G.edges[u, v][weight]``). If no + such edge attribute exists, the weight of the edge is assumed to + be one. + + If this is a function, the weight of an edge is the value + returned by the function. The function must accept exactly three + positional arguments: the two endpoints of an edge and the + dictionary of edge attributes for that edge. The function must + return a number. + + If this is None, every edge has weight/distance/cost 1. + + Weights stored as floating point values can lead to small round-off + errors in distances. Use integer weights to avoid this. + + Weights should be positive, since they are distances. + + Returns + ------- + ecc : dictionary + A dictionary of eccentricity values keyed by node. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)]) + >>> dict(nx.eccentricity(G)) + {1: 2, 2: 3, 3: 2, 4: 2, 5: 3} + + >>> dict( + ... nx.eccentricity(G, v=[1, 5]) + ... ) # This returns the eccentricity of node 1 & 5 + {1: 2, 5: 3} + + """ + # if v is None: # none, use entire graph + # nodes=G.nodes() + # elif v in G: # is v a single node + # nodes=[v] + # else: # assume v is a container of nodes + # nodes=v + order = G.order() + e = {} + for n in G.nbunch_iter(v): + if sp is None: + length = nx.shortest_path_length(G, source=n, weight=weight) + + L = len(length) + else: + try: + length = sp[n] + L = len(length) + except TypeError as err: + raise nx.NetworkXError('Format of "sp" is invalid.') from err + if L != order: + if G.is_directed(): + msg = ( + "Found infinite path length because the digraph is not" + " strongly connected" + ) + else: + msg = "Found infinite path length because the graph is not" " connected" + raise nx.NetworkXError(msg) + + e[n] = max(length.values()) + + if v in G: + return e[v] # return single value + return e + + +@nx._dispatchable(edge_attrs="weight") +def diameter(G, e=None, usebounds=False, weight=None): + """Returns the diameter of the graph G. + + The diameter is the maximum eccentricity. + + Parameters + ---------- + G : NetworkX graph + A graph + + e : eccentricity dictionary, optional + A precomputed dictionary of eccentricities. + + weight : string, function, or None + If this is a string, then edge weights will be accessed via the + edge attribute with this key (that is, the weight of the edge + joining `u` to `v` will be ``G.edges[u, v][weight]``). If no + such edge attribute exists, the weight of the edge is assumed to + be one. + + If this is a function, the weight of an edge is the value + returned by the function. The function must accept exactly three + positional arguments: the two endpoints of an edge and the + dictionary of edge attributes for that edge. The function must + return a number. + + If this is None, every edge has weight/distance/cost 1. + + Weights stored as floating point values can lead to small round-off + errors in distances. Use integer weights to avoid this. + + Weights should be positive, since they are distances. + + Returns + ------- + d : integer + Diameter of graph + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)]) + >>> nx.diameter(G) + 3 + + See Also + -------- + eccentricity + """ + if usebounds is True and e is None and not G.is_directed(): + return _extrema_bounding(G, compute="diameter", weight=weight) + if e is None: + e = eccentricity(G, weight=weight) + return max(e.values()) + + +@nx._dispatchable +def harmonic_diameter(G, sp=None): + """Returns the harmonic diameter of the graph G. + + The harmonic diameter of a graph is the harmonic mean of the distances + between all pairs of distinct vertices. Graphs that are not strongly + connected have infinite diameter and mean distance, making such + measures not useful. Restricting the diameter or mean distance to + finite distances yields paradoxical values (e.g., a perfect match + would have diameter one). The harmonic mean handles gracefully + infinite distances (e.g., a perfect match has harmonic diameter equal + to the number of vertices minus one), making it possible to assign a + meaningful value to all graphs. + + Note that in [1] the harmonic diameter is called "connectivity length": + however, "harmonic diameter" is a more standard name from the + theory of metric spaces. The name "harmonic mean distance" is perhaps + a more descriptive name, but is not used in the literature, so we use the + name "harmonic diameter" here. + + Parameters + ---------- + G : NetworkX graph + A graph + + sp : dict of dicts, optional + All-pairs shortest path lengths as a dictionary of dictionaries + + Returns + ------- + hd : float + Harmonic diameter of graph + + References + ---------- + .. [1] Massimo Marchiori and Vito Latora, "Harmony in the small-world". + *Physica A: Statistical Mechanics and Its Applications* + 285(3-4), pages 539-546, 2000. + + """ + order = G.order() + + sum_invd = 0 + for n in G: + if sp is None: + length = nx.single_source_shortest_path_length(G, n) + else: + try: + length = sp[n] + L = len(length) + except TypeError as err: + raise nx.NetworkXError('Format of "sp" is invalid.') from err + + for d in length.values(): + # Note that this will skip the zero distance from n to itself, + # as it should be, but also zero-weight paths in weighted graphs. + if d != 0: + sum_invd += 1 / d + + if sum_invd != 0: + return order * (order - 1) / sum_invd + if order > 1: + return math.inf + return math.nan + + +@nx._dispatchable(edge_attrs="weight") +def periphery(G, e=None, usebounds=False, weight=None): + """Returns the periphery of the graph G. + + The periphery is the set of nodes with eccentricity equal to the diameter. + + Parameters + ---------- + G : NetworkX graph + A graph + + e : eccentricity dictionary, optional + A precomputed dictionary of eccentricities. + + weight : string, function, or None + If this is a string, then edge weights will be accessed via the + edge attribute with this key (that is, the weight of the edge + joining `u` to `v` will be ``G.edges[u, v][weight]``). If no + such edge attribute exists, the weight of the edge is assumed to + be one. + + If this is a function, the weight of an edge is the value + returned by the function. The function must accept exactly three + positional arguments: the two endpoints of an edge and the + dictionary of edge attributes for that edge. The function must + return a number. + + If this is None, every edge has weight/distance/cost 1. + + Weights stored as floating point values can lead to small round-off + errors in distances. Use integer weights to avoid this. + + Weights should be positive, since they are distances. + + Returns + ------- + p : list + List of nodes in periphery + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)]) + >>> nx.periphery(G) + [2, 5] + + See Also + -------- + barycenter + center + """ + if usebounds is True and e is None and not G.is_directed(): + return _extrema_bounding(G, compute="periphery", weight=weight) + if e is None: + e = eccentricity(G, weight=weight) + diameter = max(e.values()) + p = [v for v in e if e[v] == diameter] + return p + + +@nx._dispatchable(edge_attrs="weight") +def radius(G, e=None, usebounds=False, weight=None): + """Returns the radius of the graph G. + + The radius is the minimum eccentricity. + + Parameters + ---------- + G : NetworkX graph + A graph + + e : eccentricity dictionary, optional + A precomputed dictionary of eccentricities. + + weight : string, function, or None + If this is a string, then edge weights will be accessed via the + edge attribute with this key (that is, the weight of the edge + joining `u` to `v` will be ``G.edges[u, v][weight]``). If no + such edge attribute exists, the weight of the edge is assumed to + be one. + + If this is a function, the weight of an edge is the value + returned by the function. The function must accept exactly three + positional arguments: the two endpoints of an edge and the + dictionary of edge attributes for that edge. The function must + return a number. + + If this is None, every edge has weight/distance/cost 1. + + Weights stored as floating point values can lead to small round-off + errors in distances. Use integer weights to avoid this. + + Weights should be positive, since they are distances. + + Returns + ------- + r : integer + Radius of graph + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)]) + >>> nx.radius(G) + 2 + + """ + if usebounds is True and e is None and not G.is_directed(): + return _extrema_bounding(G, compute="radius", weight=weight) + if e is None: + e = eccentricity(G, weight=weight) + return min(e.values()) + + +@nx._dispatchable(edge_attrs="weight") +def center(G, e=None, usebounds=False, weight=None): + """Returns the center of the graph G. + + The center is the set of nodes with eccentricity equal to radius. + + Parameters + ---------- + G : NetworkX graph + A graph + + e : eccentricity dictionary, optional + A precomputed dictionary of eccentricities. + + weight : string, function, or None + If this is a string, then edge weights will be accessed via the + edge attribute with this key (that is, the weight of the edge + joining `u` to `v` will be ``G.edges[u, v][weight]``). If no + such edge attribute exists, the weight of the edge is assumed to + be one. + + If this is a function, the weight of an edge is the value + returned by the function. The function must accept exactly three + positional arguments: the two endpoints of an edge and the + dictionary of edge attributes for that edge. The function must + return a number. + + If this is None, every edge has weight/distance/cost 1. + + Weights stored as floating point values can lead to small round-off + errors in distances. Use integer weights to avoid this. + + Weights should be positive, since they are distances. + + Returns + ------- + c : list + List of nodes in center + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)]) + >>> list(nx.center(G)) + [1, 3, 4] + + See Also + -------- + barycenter + periphery + """ + if usebounds is True and e is None and not G.is_directed(): + return _extrema_bounding(G, compute="center", weight=weight) + if e is None: + e = eccentricity(G, weight=weight) + radius = min(e.values()) + p = [v for v in e if e[v] == radius] + return p + + +@nx._dispatchable(edge_attrs="weight", mutates_input={"attr": 2}) +def barycenter(G, weight=None, attr=None, sp=None): + r"""Calculate barycenter of a connected graph, optionally with edge weights. + + The :dfn:`barycenter` a + :func:`connected ` graph + :math:`G` is the subgraph induced by the set of its nodes :math:`v` + minimizing the objective function + + .. math:: + + \sum_{u \in V(G)} d_G(u, v), + + where :math:`d_G` is the (possibly weighted) :func:`path length + `. + The barycenter is also called the :dfn:`median`. See [West01]_, p. 78. + + Parameters + ---------- + G : :class:`networkx.Graph` + The connected graph :math:`G`. + weight : :class:`str`, optional + Passed through to + :func:`~networkx.algorithms.shortest_paths.generic.shortest_path_length`. + attr : :class:`str`, optional + If given, write the value of the objective function to each node's + `attr` attribute. Otherwise do not store the value. + sp : dict of dicts, optional + All pairs shortest path lengths as a dictionary of dictionaries + + Returns + ------- + list + Nodes of `G` that induce the barycenter of `G`. + + Raises + ------ + NetworkXNoPath + If `G` is disconnected. `G` may appear disconnected to + :func:`barycenter` if `sp` is given but is missing shortest path + lengths for any pairs. + ValueError + If `sp` and `weight` are both given. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)]) + >>> nx.barycenter(G) + [1, 3, 4] + + See Also + -------- + center + periphery + """ + if sp is None: + sp = nx.shortest_path_length(G, weight=weight) + else: + sp = sp.items() + if weight is not None: + raise ValueError("Cannot use both sp, weight arguments together") + smallest, barycenter_vertices, n = float("inf"), [], len(G) + for v, dists in sp: + if len(dists) < n: + raise nx.NetworkXNoPath( + f"Input graph {G} is disconnected, so every induced subgraph " + "has infinite barycentricity." + ) + barycentricity = sum(dists.values()) + if attr is not None: + G.nodes[v][attr] = barycentricity + if barycentricity < smallest: + smallest = barycentricity + barycenter_vertices = [v] + elif barycentricity == smallest: + barycenter_vertices.append(v) + if attr is not None: + nx._clear_cache(G) + return barycenter_vertices + + +@not_implemented_for("directed") +@nx._dispatchable(edge_attrs="weight") +def resistance_distance(G, nodeA=None, nodeB=None, weight=None, invert_weight=True): + """Returns the resistance distance between pairs of nodes in graph G. + + The resistance distance between two nodes of a graph is akin to treating + the graph as a grid of resistors with a resistance equal to the provided + weight [1]_, [2]_. + + If weight is not provided, then a weight of 1 is used for all edges. + + If two nodes are the same, the resistance distance is zero. + + Parameters + ---------- + G : NetworkX graph + A graph + + nodeA : node or None, optional (default=None) + A node within graph G. + If None, compute resistance distance using all nodes as source nodes. + + nodeB : node or None, optional (default=None) + A node within graph G. + If None, compute resistance distance using all nodes as target nodes. + + weight : string or None, optional (default=None) + The edge data key used to compute the resistance distance. + If None, then each edge has weight 1. + + invert_weight : boolean (default=True) + Proper calculation of resistance distance requires building the + Laplacian matrix with the reciprocal of the weight. Not required + if the weight is already inverted. Weight cannot be zero. + + Returns + ------- + rd : dict or float + If `nodeA` and `nodeB` are given, resistance distance between `nodeA` + and `nodeB`. If `nodeA` or `nodeB` is unspecified (the default), a + dictionary of nodes with resistance distances as the value. + + Raises + ------ + NetworkXNotImplemented + If `G` is a directed graph. + + NetworkXError + If `G` is not connected, or contains no nodes, + or `nodeA` is not in `G` or `nodeB` is not in `G`. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)]) + >>> round(nx.resistance_distance(G, 1, 3), 10) + 0.625 + + Notes + ----- + The implementation is based on Theorem A in [2]_. Self-loops are ignored. + Multi-edges are contracted in one edge with weight equal to the harmonic sum of the weights. + + References + ---------- + .. [1] Wikipedia + "Resistance distance." + https://en.wikipedia.org/wiki/Resistance_distance + .. [2] D. J. Klein and M. Randic. + Resistance distance. + J. of Math. Chem. 12:81-95, 1993. + """ + import numpy as np + + if len(G) == 0: + raise nx.NetworkXError("Graph G must contain at least one node.") + if not nx.is_connected(G): + raise nx.NetworkXError("Graph G must be strongly connected.") + if nodeA is not None and nodeA not in G: + raise nx.NetworkXError("Node A is not in graph G.") + if nodeB is not None and nodeB not in G: + raise nx.NetworkXError("Node B is not in graph G.") + + G = G.copy() + node_list = list(G) + + # Invert weights + if invert_weight and weight is not None: + if G.is_multigraph(): + for u, v, k, d in G.edges(keys=True, data=True): + d[weight] = 1 / d[weight] + else: + for u, v, d in G.edges(data=True): + d[weight] = 1 / d[weight] + + # Compute resistance distance using the Pseudo-inverse of the Laplacian + # Self-loops are ignored + L = nx.laplacian_matrix(G, weight=weight).todense() + Linv = np.linalg.pinv(L, hermitian=True) + + # Return relevant distances + if nodeA is not None and nodeB is not None: + i = node_list.index(nodeA) + j = node_list.index(nodeB) + return Linv.item(i, i) + Linv.item(j, j) - Linv.item(i, j) - Linv.item(j, i) + + elif nodeA is not None: + i = node_list.index(nodeA) + d = {} + for n in G: + j = node_list.index(n) + d[n] = Linv.item(i, i) + Linv.item(j, j) - Linv.item(i, j) - Linv.item(j, i) + return d + + elif nodeB is not None: + j = node_list.index(nodeB) + d = {} + for n in G: + i = node_list.index(n) + d[n] = Linv.item(i, i) + Linv.item(j, j) - Linv.item(i, j) - Linv.item(j, i) + return d + + else: + d = {} + for n in G: + i = node_list.index(n) + d[n] = {} + for n2 in G: + j = node_list.index(n2) + d[n][n2] = ( + Linv.item(i, i) + + Linv.item(j, j) + - Linv.item(i, j) + - Linv.item(j, i) + ) + return d + + +@not_implemented_for("directed") +@nx._dispatchable(edge_attrs="weight") +def effective_graph_resistance(G, weight=None, invert_weight=True): + """Returns the Effective graph resistance of G. + + Also known as the Kirchhoff index. + + The effective graph resistance is defined as the sum + of the resistance distance of every node pair in G [1]_. + + If weight is not provided, then a weight of 1 is used for all edges. + + The effective graph resistance of a disconnected graph is infinite. + + Parameters + ---------- + G : NetworkX graph + A graph + + weight : string or None, optional (default=None) + The edge data key used to compute the effective graph resistance. + If None, then each edge has weight 1. + + invert_weight : boolean (default=True) + Proper calculation of resistance distance requires building the + Laplacian matrix with the reciprocal of the weight. Not required + if the weight is already inverted. Weight cannot be zero. + + Returns + ------- + RG : float + The effective graph resistance of `G`. + + Raises + ------ + NetworkXNotImplemented + If `G` is a directed graph. + + NetworkXError + If `G` does not contain any nodes. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)]) + >>> round(nx.effective_graph_resistance(G), 10) + 10.25 + + Notes + ----- + The implementation is based on Theorem 2.2 in [2]_. Self-loops are ignored. + Multi-edges are contracted in one edge with weight equal to the harmonic sum of the weights. + + References + ---------- + .. [1] Wolfram + "Kirchhoff Index." + https://mathworld.wolfram.com/KirchhoffIndex.html + .. [2] W. Ellens, F. M. Spieksma, P. Van Mieghem, A. Jamakovic, R. E. Kooij. + Effective graph resistance. + Lin. Alg. Appl. 435:2491-2506, 2011. + """ + import numpy as np + + if len(G) == 0: + raise nx.NetworkXError("Graph G must contain at least one node.") + + # Disconnected graphs have infinite Effective graph resistance + if not nx.is_connected(G): + return float("inf") + + # Invert weights + G = G.copy() + if invert_weight and weight is not None: + if G.is_multigraph(): + for u, v, k, d in G.edges(keys=True, data=True): + d[weight] = 1 / d[weight] + else: + for u, v, d in G.edges(data=True): + d[weight] = 1 / d[weight] + + # Get Laplacian eigenvalues + mu = np.sort(nx.laplacian_spectrum(G, weight=weight)) + + # Compute Effective graph resistance based on spectrum of the Laplacian + # Self-loops are ignored + return float(np.sum(1 / mu[1:]) * G.number_of_nodes()) + + +@nx.utils.not_implemented_for("directed") +@nx._dispatchable(edge_attrs="weight") +def kemeny_constant(G, *, weight=None): + """Returns the Kemeny constant of the given graph. + + The *Kemeny constant* (or Kemeny's constant) of a graph `G` + can be computed by regarding the graph as a Markov chain. + The Kemeny constant is then the expected number of time steps + to transition from a starting state i to a random destination state + sampled from the Markov chain's stationary distribution. + The Kemeny constant is independent of the chosen initial state [1]_. + + The Kemeny constant measures the time needed for spreading + across a graph. Low values indicate a closely connected graph + whereas high values indicate a spread-out graph. + + If weight is not provided, then a weight of 1 is used for all edges. + + Since `G` represents a Markov chain, the weights must be positive. + + Parameters + ---------- + G : NetworkX graph + + weight : string or None, optional (default=None) + The edge data key used to compute the Kemeny constant. + If None, then each edge has weight 1. + + Returns + ------- + float + The Kemeny constant of the graph `G`. + + Raises + ------ + NetworkXNotImplemented + If the graph `G` is directed. + + NetworkXError + If the graph `G` is not connected, or contains no nodes, + or has edges with negative weights. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> round(nx.kemeny_constant(G), 10) + 3.2 + + Notes + ----- + The implementation is based on equation (3.3) in [2]_. + Self-loops are allowed and indicate a Markov chain where + the state can remain the same. Multi-edges are contracted + in one edge with weight equal to the sum of the weights. + + References + ---------- + .. [1] Wikipedia + "Kemeny's constant." + https://en.wikipedia.org/wiki/Kemeny%27s_constant + .. [2] Lovász L. + Random walks on graphs: A survey. + Paul Erdös is Eighty, vol. 2, Bolyai Society, + Mathematical Studies, Keszthely, Hungary (1993), pp. 1-46 + """ + import numpy as np + import scipy as sp + + if len(G) == 0: + raise nx.NetworkXError("Graph G must contain at least one node.") + if not nx.is_connected(G): + raise nx.NetworkXError("Graph G must be connected.") + if nx.is_negatively_weighted(G, weight=weight): + raise nx.NetworkXError("The weights of graph G must be nonnegative.") + + # Compute matrix H = D^-1/2 A D^-1/2 + A = nx.adjacency_matrix(G, weight=weight) + n, m = A.shape + diags = A.sum(axis=1) + with np.errstate(divide="ignore"): + diags_sqrt = 1.0 / np.sqrt(diags) + diags_sqrt[np.isinf(diags_sqrt)] = 0 + DH = sp.sparse.csr_array(sp.sparse.spdiags(diags_sqrt, 0, m, n, format="csr")) + H = DH @ (A @ DH) + + # Compute eigenvalues of H + eig = np.sort(sp.linalg.eigvalsh(H.todense())) + + # Compute the Kemeny constant + return float(np.sum(1 / (1 - eig[:-1]))) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/distance_regular.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/distance_regular.py new file mode 100644 index 0000000000000000000000000000000000000000..27b4d0216e427a03f6cc0b90d15f4debb2d52b56 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/distance_regular.py @@ -0,0 +1,238 @@ +""" +======================= +Distance-regular graphs +======================= +""" + +import networkx as nx +from networkx.utils import not_implemented_for + +from .distance_measures import diameter + +__all__ = [ + "is_distance_regular", + "is_strongly_regular", + "intersection_array", + "global_parameters", +] + + +@nx._dispatchable +def is_distance_regular(G): + """Returns True if the graph is distance regular, False otherwise. + + A connected graph G is distance-regular if for any nodes x,y + and any integers i,j=0,1,...,d (where d is the graph + diameter), the number of vertices at distance i from x and + distance j from y depends only on i,j and the graph distance + between x and y, independently of the choice of x and y. + + Parameters + ---------- + G: Networkx graph (undirected) + + Returns + ------- + bool + True if the graph is Distance Regular, False otherwise + + Examples + -------- + >>> G = nx.hypercube_graph(6) + >>> nx.is_distance_regular(G) + True + + See Also + -------- + intersection_array, global_parameters + + Notes + ----- + For undirected and simple graphs only + + References + ---------- + .. [1] Brouwer, A. E.; Cohen, A. M.; and Neumaier, A. + Distance-Regular Graphs. New York: Springer-Verlag, 1989. + .. [2] Weisstein, Eric W. "Distance-Regular Graph." + http://mathworld.wolfram.com/Distance-RegularGraph.html + + """ + try: + intersection_array(G) + return True + except nx.NetworkXError: + return False + + +def global_parameters(b, c): + """Returns global parameters for a given intersection array. + + Given a distance-regular graph G with integers b_i, c_i,i = 0,....,d + such that for any 2 vertices x,y in G at a distance i=d(x,y), there + are exactly c_i neighbors of y at a distance of i-1 from x and b_i + neighbors of y at a distance of i+1 from x. + + Thus, a distance regular graph has the global parameters, + [[c_0,a_0,b_0],[c_1,a_1,b_1],......,[c_d,a_d,b_d]] for the + intersection array [b_0,b_1,.....b_{d-1};c_1,c_2,.....c_d] + where a_i+b_i+c_i=k , k= degree of every vertex. + + Parameters + ---------- + b : list + + c : list + + Returns + ------- + iterable + An iterable over three tuples. + + Examples + -------- + >>> G = nx.dodecahedral_graph() + >>> b, c = nx.intersection_array(G) + >>> list(nx.global_parameters(b, c)) + [(0, 0, 3), (1, 0, 2), (1, 1, 1), (1, 1, 1), (2, 0, 1), (3, 0, 0)] + + References + ---------- + .. [1] Weisstein, Eric W. "Global Parameters." + From MathWorld--A Wolfram Web Resource. + http://mathworld.wolfram.com/GlobalParameters.html + + See Also + -------- + intersection_array + """ + return ((y, b[0] - x - y, x) for x, y in zip(b + [0], [0] + c)) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def intersection_array(G): + """Returns the intersection array of a distance-regular graph. + + Given a distance-regular graph G with integers b_i, c_i,i = 0,....,d + such that for any 2 vertices x,y in G at a distance i=d(x,y), there + are exactly c_i neighbors of y at a distance of i-1 from x and b_i + neighbors of y at a distance of i+1 from x. + + A distance regular graph's intersection array is given by, + [b_0,b_1,.....b_{d-1};c_1,c_2,.....c_d] + + Parameters + ---------- + G: Networkx graph (undirected) + + Returns + ------- + b,c: tuple of lists + + Examples + -------- + >>> G = nx.icosahedral_graph() + >>> nx.intersection_array(G) + ([5, 2, 1], [1, 2, 5]) + + References + ---------- + .. [1] Weisstein, Eric W. "Intersection Array." + From MathWorld--A Wolfram Web Resource. + http://mathworld.wolfram.com/IntersectionArray.html + + See Also + -------- + global_parameters + """ + # test for regular graph (all degrees must be equal) + if len(G) == 0: + raise nx.NetworkXPointlessConcept("Graph has no nodes.") + degree = iter(G.degree()) + (_, k) = next(degree) + for _, knext in degree: + if knext != k: + raise nx.NetworkXError("Graph is not distance regular.") + k = knext + path_length = dict(nx.all_pairs_shortest_path_length(G)) + diameter = max(max(path_length[n].values()) for n in path_length) + bint = {} # 'b' intersection array + cint = {} # 'c' intersection array + for u in G: + for v in G: + try: + i = path_length[u][v] + except KeyError as err: # graph must be connected + raise nx.NetworkXError("Graph is not distance regular.") from err + # number of neighbors of v at a distance of i-1 from u + c = len([n for n in G[v] if path_length[n][u] == i - 1]) + # number of neighbors of v at a distance of i+1 from u + b = len([n for n in G[v] if path_length[n][u] == i + 1]) + # b,c are independent of u and v + if cint.get(i, c) != c or bint.get(i, b) != b: + raise nx.NetworkXError("Graph is not distance regular") + bint[i] = b + cint[i] = c + return ( + [bint.get(j, 0) for j in range(diameter)], + [cint.get(j + 1, 0) for j in range(diameter)], + ) + + +# TODO There is a definition for directed strongly regular graphs. +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def is_strongly_regular(G): + """Returns True if and only if the given graph is strongly + regular. + + An undirected graph is *strongly regular* if + + * it is regular, + * each pair of adjacent vertices has the same number of neighbors in + common, + * each pair of nonadjacent vertices has the same number of neighbors + in common. + + Each strongly regular graph is a distance-regular graph. + Conversely, if a distance-regular graph has diameter two, then it is + a strongly regular graph. For more information on distance-regular + graphs, see :func:`is_distance_regular`. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + Returns + ------- + bool + Whether `G` is strongly regular. + + Examples + -------- + + The cycle graph on five vertices is strongly regular. It is + two-regular, each pair of adjacent vertices has no shared neighbors, + and each pair of nonadjacent vertices has one shared neighbor:: + + >>> G = nx.cycle_graph(5) + >>> nx.is_strongly_regular(G) + True + + """ + # Here is an alternate implementation based directly on the + # definition of strongly regular graphs: + # + # return (all_equal(G.degree().values()) + # and all_equal(len(common_neighbors(G, u, v)) + # for u, v in G.edges()) + # and all_equal(len(common_neighbors(G, u, v)) + # for u, v in non_edges(G))) + # + # We instead use the fact that a distance-regular graph of diameter + # two is strongly regular. + return is_distance_regular(G) and diameter(G) == 2 diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/dominance.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/dominance.py new file mode 100644 index 0000000000000000000000000000000000000000..30cb8115c3ad6924a523f015ea2161417d401679 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/dominance.py @@ -0,0 +1,135 @@ +""" +Dominance algorithms. +""" + +from functools import reduce + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["immediate_dominators", "dominance_frontiers"] + + +@not_implemented_for("undirected") +@nx._dispatchable +def immediate_dominators(G, start): + """Returns the immediate dominators of all nodes of a directed graph. + + Parameters + ---------- + G : a DiGraph or MultiDiGraph + The graph where dominance is to be computed. + + start : node + The start node of dominance computation. + + Returns + ------- + idom : dict keyed by nodes + A dict containing the immediate dominators of each node reachable from + `start`. + + Raises + ------ + NetworkXNotImplemented + If `G` is undirected. + + NetworkXError + If `start` is not in `G`. + + Notes + ----- + Except for `start`, the immediate dominators are the parents of their + corresponding nodes in the dominator tree. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (1, 3), (2, 5), (3, 4), (4, 5)]) + >>> sorted(nx.immediate_dominators(G, 1).items()) + [(1, 1), (2, 1), (3, 1), (4, 3), (5, 1)] + + References + ---------- + .. [1] Cooper, Keith D., Harvey, Timothy J. and Kennedy, Ken. + "A simple, fast dominance algorithm." (2006). + https://hdl.handle.net/1911/96345 + """ + if start not in G: + raise nx.NetworkXError("start is not in G") + + idom = {start: start} + + order = list(nx.dfs_postorder_nodes(G, start)) + dfn = {u: i for i, u in enumerate(order)} + order.pop() + order.reverse() + + def intersect(u, v): + while u != v: + while dfn[u] < dfn[v]: + u = idom[u] + while dfn[u] > dfn[v]: + v = idom[v] + return u + + changed = True + while changed: + changed = False + for u in order: + new_idom = reduce(intersect, (v for v in G.pred[u] if v in idom)) + if u not in idom or idom[u] != new_idom: + idom[u] = new_idom + changed = True + + return idom + + +@nx._dispatchable +def dominance_frontiers(G, start): + """Returns the dominance frontiers of all nodes of a directed graph. + + Parameters + ---------- + G : a DiGraph or MultiDiGraph + The graph where dominance is to be computed. + + start : node + The start node of dominance computation. + + Returns + ------- + df : dict keyed by nodes + A dict containing the dominance frontiers of each node reachable from + `start` as lists. + + Raises + ------ + NetworkXNotImplemented + If `G` is undirected. + + NetworkXError + If `start` is not in `G`. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (1, 3), (2, 5), (3, 4), (4, 5)]) + >>> sorted((u, sorted(df)) for u, df in nx.dominance_frontiers(G, 1).items()) + [(1, []), (2, [5]), (3, [5]), (4, [5]), (5, [])] + + References + ---------- + .. [1] Cooper, Keith D., Harvey, Timothy J. and Kennedy, Ken. + "A simple, fast dominance algorithm." (2006). + https://hdl.handle.net/1911/96345 + """ + idom = nx.immediate_dominators(G, start) + + df = {u: set() for u in idom} + for u in idom: + if len(G.pred[u]) >= 2: + for v in G.pred[u]: + if v in idom: + while v != idom[u]: + df[v].add(u) + v = idom[v] + return df diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/efficiency_measures.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/efficiency_measures.py new file mode 100644 index 0000000000000000000000000000000000000000..b8e9d7a9e680e7db5d61b87e067c03a6d603c3af --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/efficiency_measures.py @@ -0,0 +1,167 @@ +"""Provides functions for computing the efficiency of nodes and graphs.""" + +import networkx as nx +from networkx.exception import NetworkXNoPath + +from ..utils import not_implemented_for + +__all__ = ["efficiency", "local_efficiency", "global_efficiency"] + + +@not_implemented_for("directed") +@nx._dispatchable +def efficiency(G, u, v): + """Returns the efficiency of a pair of nodes in a graph. + + The *efficiency* of a pair of nodes is the multiplicative inverse of the + shortest path distance between the nodes [1]_. Returns 0 if no path + between nodes. + + Parameters + ---------- + G : :class:`networkx.Graph` + An undirected graph for which to compute the average local efficiency. + u, v : node + Nodes in the graph ``G``. + + Returns + ------- + float + Multiplicative inverse of the shortest path distance between the nodes. + + Examples + -------- + >>> G = nx.Graph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3)]) + >>> nx.efficiency(G, 2, 3) # this gives efficiency for node 2 and 3 + 0.5 + + Notes + ----- + Edge weights are ignored when computing the shortest path distances. + + See also + -------- + local_efficiency + global_efficiency + + References + ---------- + .. [1] Latora, Vito, and Massimo Marchiori. + "Efficient behavior of small-world networks." + *Physical Review Letters* 87.19 (2001): 198701. + + + """ + try: + eff = 1 / nx.shortest_path_length(G, u, v) + except NetworkXNoPath: + eff = 0 + return eff + + +@not_implemented_for("directed") +@nx._dispatchable +def global_efficiency(G): + """Returns the average global efficiency of the graph. + + The *efficiency* of a pair of nodes in a graph is the multiplicative + inverse of the shortest path distance between the nodes. The *average + global efficiency* of a graph is the average efficiency of all pairs of + nodes [1]_. + + Parameters + ---------- + G : :class:`networkx.Graph` + An undirected graph for which to compute the average global efficiency. + + Returns + ------- + float + The average global efficiency of the graph. + + Examples + -------- + >>> G = nx.Graph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3)]) + >>> round(nx.global_efficiency(G), 12) + 0.916666666667 + + Notes + ----- + Edge weights are ignored when computing the shortest path distances. + + See also + -------- + local_efficiency + + References + ---------- + .. [1] Latora, Vito, and Massimo Marchiori. + "Efficient behavior of small-world networks." + *Physical Review Letters* 87.19 (2001): 198701. + + + """ + n = len(G) + denom = n * (n - 1) + if denom != 0: + lengths = nx.all_pairs_shortest_path_length(G) + g_eff = 0 + for source, targets in lengths: + for target, distance in targets.items(): + if distance > 0: + g_eff += 1 / distance + g_eff /= denom + # g_eff = sum(1 / d for s, tgts in lengths + # for t, d in tgts.items() if d > 0) / denom + else: + g_eff = 0 + # TODO This can be made more efficient by computing all pairs shortest + # path lengths in parallel. + return g_eff + + +@not_implemented_for("directed") +@nx._dispatchable +def local_efficiency(G): + """Returns the average local efficiency of the graph. + + The *efficiency* of a pair of nodes in a graph is the multiplicative + inverse of the shortest path distance between the nodes. The *local + efficiency* of a node in the graph is the average global efficiency of the + subgraph induced by the neighbors of the node. The *average local + efficiency* is the average of the local efficiencies of each node [1]_. + + Parameters + ---------- + G : :class:`networkx.Graph` + An undirected graph for which to compute the average local efficiency. + + Returns + ------- + float + The average local efficiency of the graph. + + Examples + -------- + >>> G = nx.Graph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3)]) + >>> nx.local_efficiency(G) + 0.9166666666666667 + + Notes + ----- + Edge weights are ignored when computing the shortest path distances. + + See also + -------- + global_efficiency + + References + ---------- + .. [1] Latora, Vito, and Massimo Marchiori. + "Efficient behavior of small-world networks." + *Physical Review Letters* 87.19 (2001): 198701. + + + """ + efficiency_list = (global_efficiency(G.subgraph(G[v])) for v in G) + return sum(efficiency_list) / len(G) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/euler.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/euler.py new file mode 100644 index 0000000000000000000000000000000000000000..2c308e380c774a6450d4ce275118ccffd65defaa --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/euler.py @@ -0,0 +1,470 @@ +""" +Eulerian circuits and graphs. +""" + +from itertools import combinations + +import networkx as nx + +from ..utils import arbitrary_element, not_implemented_for + +__all__ = [ + "is_eulerian", + "eulerian_circuit", + "eulerize", + "is_semieulerian", + "has_eulerian_path", + "eulerian_path", +] + + +@nx._dispatchable +def is_eulerian(G): + """Returns True if and only if `G` is Eulerian. + + A graph is *Eulerian* if it has an Eulerian circuit. An *Eulerian + circuit* is a closed walk that includes each edge of a graph exactly + once. + + Graphs with isolated vertices (i.e. vertices with zero degree) are not + considered to have Eulerian circuits. Therefore, if the graph is not + connected (or not strongly connected, for directed graphs), this function + returns False. + + Parameters + ---------- + G : NetworkX graph + A graph, either directed or undirected. + + Examples + -------- + >>> nx.is_eulerian(nx.DiGraph({0: [3], 1: [2], 2: [3], 3: [0, 1]})) + True + >>> nx.is_eulerian(nx.complete_graph(5)) + True + >>> nx.is_eulerian(nx.petersen_graph()) + False + + If you prefer to allow graphs with isolated vertices to have Eulerian circuits, + you can first remove such vertices and then call `is_eulerian` as below example shows. + + >>> G = nx.Graph([(0, 1), (1, 2), (0, 2)]) + >>> G.add_node(3) + >>> nx.is_eulerian(G) + False + + >>> G.remove_nodes_from(list(nx.isolates(G))) + >>> nx.is_eulerian(G) + True + + + """ + if G.is_directed(): + # Every node must have equal in degree and out degree and the + # graph must be strongly connected + return all( + G.in_degree(n) == G.out_degree(n) for n in G + ) and nx.is_strongly_connected(G) + # An undirected Eulerian graph has no vertices of odd degree and + # must be connected. + return all(d % 2 == 0 for v, d in G.degree()) and nx.is_connected(G) + + +@nx._dispatchable +def is_semieulerian(G): + """Return True iff `G` is semi-Eulerian. + + G is semi-Eulerian if it has an Eulerian path but no Eulerian circuit. + + See Also + -------- + has_eulerian_path + is_eulerian + """ + return has_eulerian_path(G) and not is_eulerian(G) + + +def _find_path_start(G): + """Return a suitable starting vertex for an Eulerian path. + + If no path exists, return None. + """ + if not has_eulerian_path(G): + return None + + if is_eulerian(G): + return arbitrary_element(G) + + if G.is_directed(): + v1, v2 = (v for v in G if G.in_degree(v) != G.out_degree(v)) + # Determines which is the 'start' node (as opposed to the 'end') + if G.out_degree(v1) > G.in_degree(v1): + return v1 + else: + return v2 + + else: + # In an undirected graph randomly choose one of the possibilities + start = [v for v in G if G.degree(v) % 2 != 0][0] + return start + + +def _simplegraph_eulerian_circuit(G, source): + if G.is_directed(): + degree = G.out_degree + edges = G.out_edges + else: + degree = G.degree + edges = G.edges + vertex_stack = [source] + last_vertex = None + while vertex_stack: + current_vertex = vertex_stack[-1] + if degree(current_vertex) == 0: + if last_vertex is not None: + yield (last_vertex, current_vertex) + last_vertex = current_vertex + vertex_stack.pop() + else: + _, next_vertex = arbitrary_element(edges(current_vertex)) + vertex_stack.append(next_vertex) + G.remove_edge(current_vertex, next_vertex) + + +def _multigraph_eulerian_circuit(G, source): + if G.is_directed(): + degree = G.out_degree + edges = G.out_edges + else: + degree = G.degree + edges = G.edges + vertex_stack = [(source, None)] + last_vertex = None + last_key = None + while vertex_stack: + current_vertex, current_key = vertex_stack[-1] + if degree(current_vertex) == 0: + if last_vertex is not None: + yield (last_vertex, current_vertex, last_key) + last_vertex, last_key = current_vertex, current_key + vertex_stack.pop() + else: + triple = arbitrary_element(edges(current_vertex, keys=True)) + _, next_vertex, next_key = triple + vertex_stack.append((next_vertex, next_key)) + G.remove_edge(current_vertex, next_vertex, next_key) + + +@nx._dispatchable +def eulerian_circuit(G, source=None, keys=False): + """Returns an iterator over the edges of an Eulerian circuit in `G`. + + An *Eulerian circuit* is a closed walk that includes each edge of a + graph exactly once. + + Parameters + ---------- + G : NetworkX graph + A graph, either directed or undirected. + + source : node, optional + Starting node for circuit. + + keys : bool + If False, edges generated by this function will be of the form + ``(u, v)``. Otherwise, edges will be of the form ``(u, v, k)``. + This option is ignored unless `G` is a multigraph. + + Returns + ------- + edges : iterator + An iterator over edges in the Eulerian circuit. + + Raises + ------ + NetworkXError + If the graph is not Eulerian. + + See Also + -------- + is_eulerian + + Notes + ----- + This is a linear time implementation of an algorithm adapted from [1]_. + + For general information about Euler tours, see [2]_. + + References + ---------- + .. [1] J. Edmonds, E. L. Johnson. + Matching, Euler tours and the Chinese postman. + Mathematical programming, Volume 5, Issue 1 (1973), 111-114. + .. [2] https://en.wikipedia.org/wiki/Eulerian_path + + Examples + -------- + To get an Eulerian circuit in an undirected graph:: + + >>> G = nx.complete_graph(3) + >>> list(nx.eulerian_circuit(G)) + [(0, 2), (2, 1), (1, 0)] + >>> list(nx.eulerian_circuit(G, source=1)) + [(1, 2), (2, 0), (0, 1)] + + To get the sequence of vertices in an Eulerian circuit:: + + >>> [u for u, v in nx.eulerian_circuit(G)] + [0, 2, 1] + + """ + if not is_eulerian(G): + raise nx.NetworkXError("G is not Eulerian.") + if G.is_directed(): + G = G.reverse() + else: + G = G.copy() + if source is None: + source = arbitrary_element(G) + if G.is_multigraph(): + for u, v, k in _multigraph_eulerian_circuit(G, source): + if keys: + yield u, v, k + else: + yield u, v + else: + yield from _simplegraph_eulerian_circuit(G, source) + + +@nx._dispatchable +def has_eulerian_path(G, source=None): + """Return True iff `G` has an Eulerian path. + + An Eulerian path is a path in a graph which uses each edge of a graph + exactly once. If `source` is specified, then this function checks + whether an Eulerian path that starts at node `source` exists. + + A directed graph has an Eulerian path iff: + - at most one vertex has out_degree - in_degree = 1, + - at most one vertex has in_degree - out_degree = 1, + - every other vertex has equal in_degree and out_degree, + - and all of its vertices belong to a single connected + component of the underlying undirected graph. + + If `source` is not None, an Eulerian path starting at `source` exists if no + other node has out_degree - in_degree = 1. This is equivalent to either + there exists an Eulerian circuit or `source` has out_degree - in_degree = 1 + and the conditions above hold. + + An undirected graph has an Eulerian path iff: + - exactly zero or two vertices have odd degree, + - and all of its vertices belong to a single connected component. + + If `source` is not None, an Eulerian path starting at `source` exists if + either there exists an Eulerian circuit or `source` has an odd degree and the + conditions above hold. + + Graphs with isolated vertices (i.e. vertices with zero degree) are not considered + to have an Eulerian path. Therefore, if the graph is not connected (or not strongly + connected, for directed graphs), this function returns False. + + Parameters + ---------- + G : NetworkX Graph + The graph to find an euler path in. + + source : node, optional + Starting node for path. + + Returns + ------- + Bool : True if G has an Eulerian path. + + Examples + -------- + If you prefer to allow graphs with isolated vertices to have Eulerian path, + you can first remove such vertices and then call `has_eulerian_path` as below example shows. + + >>> G = nx.Graph([(0, 1), (1, 2), (0, 2)]) + >>> G.add_node(3) + >>> nx.has_eulerian_path(G) + False + + >>> G.remove_nodes_from(list(nx.isolates(G))) + >>> nx.has_eulerian_path(G) + True + + See Also + -------- + is_eulerian + eulerian_path + """ + if nx.is_eulerian(G): + return True + + if G.is_directed(): + ins = G.in_degree + outs = G.out_degree + # Since we know it is not eulerian, outs - ins must be 1 for source + if source is not None and outs[source] - ins[source] != 1: + return False + + unbalanced_ins = 0 + unbalanced_outs = 0 + for v in G: + if ins[v] - outs[v] == 1: + unbalanced_ins += 1 + elif outs[v] - ins[v] == 1: + unbalanced_outs += 1 + elif ins[v] != outs[v]: + return False + + return ( + unbalanced_ins <= 1 and unbalanced_outs <= 1 and nx.is_weakly_connected(G) + ) + else: + # We know it is not eulerian, so degree of source must be odd. + if source is not None and G.degree[source] % 2 != 1: + return False + + # Sum is 2 since we know it is not eulerian (which implies sum is 0) + return sum(d % 2 == 1 for v, d in G.degree()) == 2 and nx.is_connected(G) + + +@nx._dispatchable +def eulerian_path(G, source=None, keys=False): + """Return an iterator over the edges of an Eulerian path in `G`. + + Parameters + ---------- + G : NetworkX Graph + The graph in which to look for an eulerian path. + source : node or None (default: None) + The node at which to start the search. None means search over all + starting nodes. + keys : Bool (default: False) + Indicates whether to yield edge 3-tuples (u, v, edge_key). + The default yields edge 2-tuples + + Yields + ------ + Edge tuples along the eulerian path. + + Warning: If `source` provided is not the start node of an Euler path + will raise error even if an Euler Path exists. + """ + if not has_eulerian_path(G, source): + raise nx.NetworkXError("Graph has no Eulerian paths.") + if G.is_directed(): + G = G.reverse() + if source is None or nx.is_eulerian(G) is False: + source = _find_path_start(G) + if G.is_multigraph(): + for u, v, k in _multigraph_eulerian_circuit(G, source): + if keys: + yield u, v, k + else: + yield u, v + else: + yield from _simplegraph_eulerian_circuit(G, source) + else: + G = G.copy() + if source is None: + source = _find_path_start(G) + if G.is_multigraph(): + if keys: + yield from reversed( + [(v, u, k) for u, v, k in _multigraph_eulerian_circuit(G, source)] + ) + else: + yield from reversed( + [(v, u) for u, v, k in _multigraph_eulerian_circuit(G, source)] + ) + else: + yield from reversed( + [(v, u) for u, v in _simplegraph_eulerian_circuit(G, source)] + ) + + +@not_implemented_for("directed") +@nx._dispatchable(returns_graph=True) +def eulerize(G): + """Transforms a graph into an Eulerian graph. + + If `G` is Eulerian the result is `G` as a MultiGraph, otherwise the result is a smallest + (in terms of the number of edges) multigraph whose underlying simple graph is `G`. + + Parameters + ---------- + G : NetworkX graph + An undirected graph + + Returns + ------- + G : NetworkX multigraph + + Raises + ------ + NetworkXError + If the graph is not connected. + + See Also + -------- + is_eulerian + eulerian_circuit + + References + ---------- + .. [1] J. Edmonds, E. L. Johnson. + Matching, Euler tours and the Chinese postman. + Mathematical programming, Volume 5, Issue 1 (1973), 111-114. + .. [2] https://en.wikipedia.org/wiki/Eulerian_path + .. [3] http://web.math.princeton.edu/math_alive/5/Notes1.pdf + + Examples + -------- + >>> G = nx.complete_graph(10) + >>> H = nx.eulerize(G) + >>> nx.is_eulerian(H) + True + + """ + if G.order() == 0: + raise nx.NetworkXPointlessConcept("Cannot Eulerize null graph") + if not nx.is_connected(G): + raise nx.NetworkXError("G is not connected") + odd_degree_nodes = [n for n, d in G.degree() if d % 2 == 1] + G = nx.MultiGraph(G) + if len(odd_degree_nodes) == 0: + return G + + # get all shortest paths between vertices of odd degree + odd_deg_pairs_paths = [ + (m, {n: nx.shortest_path(G, source=m, target=n)}) + for m, n in combinations(odd_degree_nodes, 2) + ] + + # use the number of vertices in a graph + 1 as an upper bound on + # the maximum length of a path in G + upper_bound_on_max_path_length = len(G) + 1 + + # use "len(G) + 1 - len(P)", + # where P is a shortest path between vertices n and m, + # as edge-weights in a new graph + # store the paths in the graph for easy indexing later + Gp = nx.Graph() + for n, Ps in odd_deg_pairs_paths: + for m, P in Ps.items(): + if n != m: + Gp.add_edge( + m, n, weight=upper_bound_on_max_path_length - len(P), path=P + ) + + # find the minimum weight matching of edges in the weighted graph + best_matching = nx.Graph(list(nx.max_weight_matching(Gp))) + + # duplicate each edge along each path in the set of paths in Gp + for m, n in best_matching.edges(): + path = Gp[m][n]["path"] + G.add_edges_from(nx.utils.pairwise(path)) + return G diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/__init__.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c5d19abed99501086359c87670edc31a680fe36c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/__init__.py @@ -0,0 +1,11 @@ +from .maxflow import * +from .mincost import * +from .boykovkolmogorov import * +from .dinitz_alg import * +from .edmondskarp import * +from .gomory_hu import * +from .preflowpush import * +from .shortestaugmentingpath import * +from .capacityscaling import * +from .networksimplex import * +from .utils import build_flow_dict, build_residual_network diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/boykovkolmogorov.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/boykovkolmogorov.py new file mode 100644 index 0000000000000000000000000000000000000000..30899c6c33e7ff508cfb13886a13ec96fef4ba44 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/boykovkolmogorov.py @@ -0,0 +1,370 @@ +""" +Boykov-Kolmogorov algorithm for maximum flow problems. +""" + +from collections import deque +from operator import itemgetter + +import networkx as nx +from networkx.algorithms.flow.utils import build_residual_network + +__all__ = ["boykov_kolmogorov"] + + +@nx._dispatchable(edge_attrs={"capacity": float("inf")}, returns_graph=True) +def boykov_kolmogorov( + G, s, t, capacity="capacity", residual=None, value_only=False, cutoff=None +): + r"""Find a maximum single-commodity flow using Boykov-Kolmogorov algorithm. + + This function returns the residual network resulting after computing + the maximum flow. See below for details about the conventions + NetworkX uses for defining residual networks. + + This algorithm has worse case complexity $O(n^2 m |C|)$ for $n$ nodes, $m$ + edges, and $|C|$ the cost of the minimum cut [1]_. This implementation + uses the marking heuristic defined in [2]_ which improves its running + time in many practical problems. + + Parameters + ---------- + G : NetworkX graph + Edges of the graph are expected to have an attribute called + 'capacity'. If this attribute is not present, the edge is + considered to have infinite capacity. + + s : node + Source node for the flow. + + t : node + Sink node for the flow. + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + residual : NetworkX graph + Residual network on which the algorithm is to be executed. If None, a + new residual network is created. Default value: None. + + value_only : bool + If True compute only the value of the maximum flow. This parameter + will be ignored by this algorithm because it is not applicable. + + cutoff : integer, float + If specified, the algorithm will terminate when the flow value reaches + or exceeds the cutoff. In this case, it may be unable to immediately + determine a minimum cut. Default value: None. + + Returns + ------- + R : NetworkX DiGraph + Residual network after computing the maximum flow. + + Raises + ------ + NetworkXError + The algorithm does not support MultiGraph and MultiDiGraph. If + the input graph is an instance of one of these two classes, a + NetworkXError is raised. + + NetworkXUnbounded + If the graph has a path of infinite capacity, the value of a + feasible flow on the graph is unbounded above and the function + raises a NetworkXUnbounded. + + See also + -------- + :meth:`maximum_flow` + :meth:`minimum_cut` + :meth:`preflow_push` + :meth:`shortest_augmenting_path` + + Notes + ----- + The residual network :samp:`R` from an input graph :samp:`G` has the + same nodes as :samp:`G`. :samp:`R` is a DiGraph that contains a pair + of edges :samp:`(u, v)` and :samp:`(v, u)` iff :samp:`(u, v)` is not a + self-loop, and at least one of :samp:`(u, v)` and :samp:`(v, u)` exists + in :samp:`G`. + + For each edge :samp:`(u, v)` in :samp:`R`, :samp:`R[u][v]['capacity']` + is equal to the capacity of :samp:`(u, v)` in :samp:`G` if it exists + in :samp:`G` or zero otherwise. If the capacity is infinite, + :samp:`R[u][v]['capacity']` will have a high arbitrary finite value + that does not affect the solution of the problem. This value is stored in + :samp:`R.graph['inf']`. For each edge :samp:`(u, v)` in :samp:`R`, + :samp:`R[u][v]['flow']` represents the flow function of :samp:`(u, v)` and + satisfies :samp:`R[u][v]['flow'] == -R[v][u]['flow']`. + + The flow value, defined as the total flow into :samp:`t`, the sink, is + stored in :samp:`R.graph['flow_value']`. If :samp:`cutoff` is not + specified, reachability to :samp:`t` using only edges :samp:`(u, v)` such + that :samp:`R[u][v]['flow'] < R[u][v]['capacity']` induces a minimum + :samp:`s`-:samp:`t` cut. + + Examples + -------- + >>> from networkx.algorithms.flow import boykov_kolmogorov + + The functions that implement flow algorithms and output a residual + network, such as this one, are not imported to the base NetworkX + namespace, so you have to explicitly import them from the flow package. + + >>> G = nx.DiGraph() + >>> G.add_edge("x", "a", capacity=3.0) + >>> G.add_edge("x", "b", capacity=1.0) + >>> G.add_edge("a", "c", capacity=3.0) + >>> G.add_edge("b", "c", capacity=5.0) + >>> G.add_edge("b", "d", capacity=4.0) + >>> G.add_edge("d", "e", capacity=2.0) + >>> G.add_edge("c", "y", capacity=2.0) + >>> G.add_edge("e", "y", capacity=3.0) + >>> R = boykov_kolmogorov(G, "x", "y") + >>> flow_value = nx.maximum_flow_value(G, "x", "y") + >>> flow_value + 3.0 + >>> flow_value == R.graph["flow_value"] + True + + A nice feature of the Boykov-Kolmogorov algorithm is that a partition + of the nodes that defines a minimum cut can be easily computed based + on the search trees used during the algorithm. These trees are stored + in the graph attribute `trees` of the residual network. + + >>> source_tree, target_tree = R.graph["trees"] + >>> partition = (set(source_tree), set(G) - set(source_tree)) + + Or equivalently: + + >>> partition = (set(G) - set(target_tree), set(target_tree)) + + References + ---------- + .. [1] Boykov, Y., & Kolmogorov, V. (2004). An experimental comparison + of min-cut/max-flow algorithms for energy minimization in vision. + Pattern Analysis and Machine Intelligence, IEEE Transactions on, + 26(9), 1124-1137. + https://doi.org/10.1109/TPAMI.2004.60 + + .. [2] Vladimir Kolmogorov. Graph-based Algorithms for Multi-camera + Reconstruction Problem. PhD thesis, Cornell University, CS Department, + 2003. pp. 109-114. + https://web.archive.org/web/20170809091249/https://pub.ist.ac.at/~vnk/papers/thesis.pdf + + """ + R = boykov_kolmogorov_impl(G, s, t, capacity, residual, cutoff) + R.graph["algorithm"] = "boykov_kolmogorov" + nx._clear_cache(R) + return R + + +def boykov_kolmogorov_impl(G, s, t, capacity, residual, cutoff): + if s not in G: + raise nx.NetworkXError(f"node {str(s)} not in graph") + if t not in G: + raise nx.NetworkXError(f"node {str(t)} not in graph") + if s == t: + raise nx.NetworkXError("source and sink are the same node") + + if residual is None: + R = build_residual_network(G, capacity) + else: + R = residual + + # Initialize/reset the residual network. + # This is way too slow + # nx.set_edge_attributes(R, 0, 'flow') + for u in R: + for e in R[u].values(): + e["flow"] = 0 + + # Use an arbitrary high value as infinite. It is computed + # when building the residual network. + INF = R.graph["inf"] + + if cutoff is None: + cutoff = INF + + R_succ = R.succ + R_pred = R.pred + + def grow(): + """Bidirectional breadth-first search for the growth stage. + + Returns a connecting edge, that is and edge that connects + a node from the source search tree with a node from the + target search tree. + The first node in the connecting edge is always from the + source tree and the last node from the target tree. + """ + while active: + u = active[0] + if u in source_tree: + this_tree = source_tree + other_tree = target_tree + neighbors = R_succ + else: + this_tree = target_tree + other_tree = source_tree + neighbors = R_pred + for v, attr in neighbors[u].items(): + if attr["capacity"] - attr["flow"] > 0: + if v not in this_tree: + if v in other_tree: + return (u, v) if this_tree is source_tree else (v, u) + this_tree[v] = u + dist[v] = dist[u] + 1 + timestamp[v] = timestamp[u] + active.append(v) + elif v in this_tree and _is_closer(u, v): + this_tree[v] = u + dist[v] = dist[u] + 1 + timestamp[v] = timestamp[u] + _ = active.popleft() + return None, None + + def augment(u, v): + """Augmentation stage. + + Reconstruct path and determine its residual capacity. + We start from a connecting edge, which links a node + from the source tree to a node from the target tree. + The connecting edge is the output of the grow function + and the input of this function. + """ + attr = R_succ[u][v] + flow = min(INF, attr["capacity"] - attr["flow"]) + path = [u] + # Trace a path from u to s in source_tree. + w = u + while w != s: + n = w + w = source_tree[n] + attr = R_pred[n][w] + flow = min(flow, attr["capacity"] - attr["flow"]) + path.append(w) + path.reverse() + # Trace a path from v to t in target_tree. + path.append(v) + w = v + while w != t: + n = w + w = target_tree[n] + attr = R_succ[n][w] + flow = min(flow, attr["capacity"] - attr["flow"]) + path.append(w) + # Augment flow along the path and check for saturated edges. + it = iter(path) + u = next(it) + these_orphans = [] + for v in it: + R_succ[u][v]["flow"] += flow + R_succ[v][u]["flow"] -= flow + if R_succ[u][v]["flow"] == R_succ[u][v]["capacity"]: + if v in source_tree: + source_tree[v] = None + these_orphans.append(v) + if u in target_tree: + target_tree[u] = None + these_orphans.append(u) + u = v + orphans.extend(sorted(these_orphans, key=dist.get)) + return flow + + def adopt(): + """Adoption stage. + + Reconstruct search trees by adopting or discarding orphans. + During augmentation stage some edges got saturated and thus + the source and target search trees broke down to forests, with + orphans as roots of some of its trees. We have to reconstruct + the search trees rooted to source and target before we can grow + them again. + """ + while orphans: + u = orphans.popleft() + if u in source_tree: + tree = source_tree + neighbors = R_pred + else: + tree = target_tree + neighbors = R_succ + nbrs = ((n, attr, dist[n]) for n, attr in neighbors[u].items() if n in tree) + for v, attr, d in sorted(nbrs, key=itemgetter(2)): + if attr["capacity"] - attr["flow"] > 0: + if _has_valid_root(v, tree): + tree[u] = v + dist[u] = dist[v] + 1 + timestamp[u] = time + break + else: + nbrs = ( + (n, attr, dist[n]) for n, attr in neighbors[u].items() if n in tree + ) + for v, attr, d in sorted(nbrs, key=itemgetter(2)): + if attr["capacity"] - attr["flow"] > 0: + if v not in active: + active.append(v) + if tree[v] == u: + tree[v] = None + orphans.appendleft(v) + if u in active: + active.remove(u) + del tree[u] + + def _has_valid_root(n, tree): + path = [] + v = n + while v is not None: + path.append(v) + if v in (s, t): + base_dist = 0 + break + elif timestamp[v] == time: + base_dist = dist[v] + break + v = tree[v] + else: + return False + length = len(path) + for i, u in enumerate(path, 1): + dist[u] = base_dist + length - i + timestamp[u] = time + return True + + def _is_closer(u, v): + return timestamp[v] <= timestamp[u] and dist[v] > dist[u] + 1 + + source_tree = {s: None} + target_tree = {t: None} + active = deque([s, t]) + orphans = deque() + flow_value = 0 + # data structures for the marking heuristic + time = 1 + timestamp = {s: time, t: time} + dist = {s: 0, t: 0} + while flow_value < cutoff: + # Growth stage + u, v = grow() + if u is None: + break + time += 1 + # Augmentation stage + flow_value += augment(u, v) + # Adoption stage + adopt() + + if flow_value * 2 > INF: + raise nx.NetworkXUnbounded("Infinite capacity path, flow unbounded above.") + + # Add source and target tree in a graph attribute. + # A partition that defines a minimum cut can be directly + # computed from the search trees as explained in the docstrings. + R.graph["trees"] = (source_tree, target_tree) + # Add the standard flow_value graph attribute. + R.graph["flow_value"] = flow_value + return R diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/capacityscaling.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/capacityscaling.py new file mode 100644 index 0000000000000000000000000000000000000000..bf68565c5486bb7b60e7ddcf6089e448bc6ddef1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/capacityscaling.py @@ -0,0 +1,407 @@ +""" +Capacity scaling minimum cost flow algorithm. +""" + +__all__ = ["capacity_scaling"] + +from itertools import chain +from math import log + +import networkx as nx + +from ...utils import BinaryHeap, arbitrary_element, not_implemented_for + + +def _detect_unboundedness(R): + """Detect infinite-capacity negative cycles.""" + G = nx.DiGraph() + G.add_nodes_from(R) + + # Value simulating infinity. + inf = R.graph["inf"] + # True infinity. + f_inf = float("inf") + for u in R: + for v, e in R[u].items(): + # Compute the minimum weight of infinite-capacity (u, v) edges. + w = f_inf + for k, e in e.items(): + if e["capacity"] == inf: + w = min(w, e["weight"]) + if w != f_inf: + G.add_edge(u, v, weight=w) + + if nx.negative_edge_cycle(G): + raise nx.NetworkXUnbounded( + "Negative cost cycle of infinite capacity found. " + "Min cost flow may be unbounded below." + ) + + +@not_implemented_for("undirected") +def _build_residual_network(G, demand, capacity, weight): + """Build a residual network and initialize a zero flow.""" + if sum(G.nodes[u].get(demand, 0) for u in G) != 0: + raise nx.NetworkXUnfeasible("Sum of the demands should be 0.") + + R = nx.MultiDiGraph() + R.add_nodes_from( + (u, {"excess": -G.nodes[u].get(demand, 0), "potential": 0}) for u in G + ) + + inf = float("inf") + # Detect selfloops with infinite capacities and negative weights. + for u, v, e in nx.selfloop_edges(G, data=True): + if e.get(weight, 0) < 0 and e.get(capacity, inf) == inf: + raise nx.NetworkXUnbounded( + "Negative cost cycle of infinite capacity found. " + "Min cost flow may be unbounded below." + ) + + # Extract edges with positive capacities. Self loops excluded. + if G.is_multigraph(): + edge_list = [ + (u, v, k, e) + for u, v, k, e in G.edges(data=True, keys=True) + if u != v and e.get(capacity, inf) > 0 + ] + else: + edge_list = [ + (u, v, 0, e) + for u, v, e in G.edges(data=True) + if u != v and e.get(capacity, inf) > 0 + ] + # Simulate infinity with the larger of the sum of absolute node imbalances + # the sum of finite edge capacities or any positive value if both sums are + # zero. This allows the infinite-capacity edges to be distinguished for + # unboundedness detection and directly participate in residual capacity + # calculation. + inf = ( + max( + sum(abs(R.nodes[u]["excess"]) for u in R), + 2 + * sum( + e[capacity] + for u, v, k, e in edge_list + if capacity in e and e[capacity] != inf + ), + ) + or 1 + ) + for u, v, k, e in edge_list: + r = min(e.get(capacity, inf), inf) + w = e.get(weight, 0) + # Add both (u, v) and (v, u) into the residual network marked with the + # original key. (key[1] == True) indicates the (u, v) is in the + # original network. + R.add_edge(u, v, key=(k, True), capacity=r, weight=w, flow=0) + R.add_edge(v, u, key=(k, False), capacity=0, weight=-w, flow=0) + + # Record the value simulating infinity. + R.graph["inf"] = inf + + _detect_unboundedness(R) + + return R + + +def _build_flow_dict(G, R, capacity, weight): + """Build a flow dictionary from a residual network.""" + inf = float("inf") + flow_dict = {} + if G.is_multigraph(): + for u in G: + flow_dict[u] = {} + for v, es in G[u].items(): + flow_dict[u][v] = { + # Always saturate negative selfloops. + k: ( + 0 + if ( + u != v or e.get(capacity, inf) <= 0 or e.get(weight, 0) >= 0 + ) + else e[capacity] + ) + for k, e in es.items() + } + for v, es in R[u].items(): + if v in flow_dict[u]: + flow_dict[u][v].update( + (k[0], e["flow"]) for k, e in es.items() if e["flow"] > 0 + ) + else: + for u in G: + flow_dict[u] = { + # Always saturate negative selfloops. + v: ( + 0 + if (u != v or e.get(capacity, inf) <= 0 or e.get(weight, 0) >= 0) + else e[capacity] + ) + for v, e in G[u].items() + } + flow_dict[u].update( + (v, e["flow"]) + for v, es in R[u].items() + for e in es.values() + if e["flow"] > 0 + ) + return flow_dict + + +@nx._dispatchable( + node_attrs="demand", edge_attrs={"capacity": float("inf"), "weight": 0} +) +def capacity_scaling( + G, demand="demand", capacity="capacity", weight="weight", heap=BinaryHeap +): + r"""Find a minimum cost flow satisfying all demands in digraph G. + + This is a capacity scaling successive shortest augmenting path algorithm. + + G is a digraph with edge costs and capacities and in which nodes + have demand, i.e., they want to send or receive some amount of + flow. A negative demand means that the node wants to send flow, a + positive demand means that the node want to receive flow. A flow on + the digraph G satisfies all demand if the net flow into each node + is equal to the demand of that node. + + Parameters + ---------- + G : NetworkX graph + DiGraph or MultiDiGraph on which a minimum cost flow satisfying all + demands is to be found. + + demand : string + Nodes of the graph G are expected to have an attribute demand + that indicates how much flow a node wants to send (negative + demand) or receive (positive demand). Note that the sum of the + demands should be 0 otherwise the problem in not feasible. If + this attribute is not present, a node is considered to have 0 + demand. Default value: 'demand'. + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + weight : string + Edges of the graph G are expected to have an attribute weight + that indicates the cost incurred by sending one unit of flow on + that edge. If not present, the weight is considered to be 0. + Default value: 'weight'. + + heap : class + Type of heap to be used in the algorithm. It should be a subclass of + :class:`MinHeap` or implement a compatible interface. + + If a stock heap implementation is to be used, :class:`BinaryHeap` is + recommended over :class:`PairingHeap` for Python implementations without + optimized attribute accesses (e.g., CPython) despite a slower + asymptotic running time. For Python implementations with optimized + attribute accesses (e.g., PyPy), :class:`PairingHeap` provides better + performance. Default value: :class:`BinaryHeap`. + + Returns + ------- + flowCost : integer + Cost of a minimum cost flow satisfying all demands. + + flowDict : dictionary + If G is a digraph, a dict-of-dicts keyed by nodes such that + flowDict[u][v] is the flow on edge (u, v). + If G is a MultiDiGraph, a dict-of-dicts-of-dicts keyed by nodes + so that flowDict[u][v][key] is the flow on edge (u, v, key). + + Raises + ------ + NetworkXError + This exception is raised if the input graph is not directed, + not connected. + + NetworkXUnfeasible + This exception is raised in the following situations: + + * The sum of the demands is not zero. Then, there is no + flow satisfying all demands. + * There is no flow satisfying all demand. + + NetworkXUnbounded + This exception is raised if the digraph G has a cycle of + negative cost and infinite capacity. Then, the cost of a flow + satisfying all demands is unbounded below. + + Notes + ----- + This algorithm does not work if edge weights are floating-point numbers. + + See also + -------- + :meth:`network_simplex` + + Examples + -------- + A simple example of a min cost flow problem. + + >>> G = nx.DiGraph() + >>> G.add_node("a", demand=-5) + >>> G.add_node("d", demand=5) + >>> G.add_edge("a", "b", weight=3, capacity=4) + >>> G.add_edge("a", "c", weight=6, capacity=10) + >>> G.add_edge("b", "d", weight=1, capacity=9) + >>> G.add_edge("c", "d", weight=2, capacity=5) + >>> flowCost, flowDict = nx.capacity_scaling(G) + >>> flowCost + 24 + >>> flowDict + {'a': {'b': 4, 'c': 1}, 'd': {}, 'b': {'d': 4}, 'c': {'d': 1}} + + It is possible to change the name of the attributes used for the + algorithm. + + >>> G = nx.DiGraph() + >>> G.add_node("p", spam=-4) + >>> G.add_node("q", spam=2) + >>> G.add_node("a", spam=-2) + >>> G.add_node("d", spam=-1) + >>> G.add_node("t", spam=2) + >>> G.add_node("w", spam=3) + >>> G.add_edge("p", "q", cost=7, vacancies=5) + >>> G.add_edge("p", "a", cost=1, vacancies=4) + >>> G.add_edge("q", "d", cost=2, vacancies=3) + >>> G.add_edge("t", "q", cost=1, vacancies=2) + >>> G.add_edge("a", "t", cost=2, vacancies=4) + >>> G.add_edge("d", "w", cost=3, vacancies=4) + >>> G.add_edge("t", "w", cost=4, vacancies=1) + >>> flowCost, flowDict = nx.capacity_scaling( + ... G, demand="spam", capacity="vacancies", weight="cost" + ... ) + >>> flowCost + 37 + >>> flowDict + {'p': {'q': 2, 'a': 2}, 'q': {'d': 1}, 'a': {'t': 4}, 'd': {'w': 2}, 't': {'q': 1, 'w': 1}, 'w': {}} + """ + R = _build_residual_network(G, demand, capacity, weight) + + inf = float("inf") + # Account cost of negative selfloops. + flow_cost = sum( + 0 + if e.get(capacity, inf) <= 0 or e.get(weight, 0) >= 0 + else e[capacity] * e[weight] + for u, v, e in nx.selfloop_edges(G, data=True) + ) + + # Determine the maximum edge capacity. + wmax = max(chain([-inf], (e["capacity"] for u, v, e in R.edges(data=True)))) + if wmax == -inf: + # Residual network has no edges. + return flow_cost, _build_flow_dict(G, R, capacity, weight) + + R_nodes = R.nodes + R_succ = R.succ + + delta = 2 ** int(log(wmax, 2)) + while delta >= 1: + # Saturate Δ-residual edges with negative reduced costs to achieve + # Δ-optimality. + for u in R: + p_u = R_nodes[u]["potential"] + for v, es in R_succ[u].items(): + for k, e in es.items(): + flow = e["capacity"] - e["flow"] + if e["weight"] - p_u + R_nodes[v]["potential"] < 0: + flow = e["capacity"] - e["flow"] + if flow >= delta: + e["flow"] += flow + R_succ[v][u][(k[0], not k[1])]["flow"] -= flow + R_nodes[u]["excess"] -= flow + R_nodes[v]["excess"] += flow + # Determine the Δ-active nodes. + S = set() + T = set() + S_add = S.add + S_remove = S.remove + T_add = T.add + T_remove = T.remove + for u in R: + excess = R_nodes[u]["excess"] + if excess >= delta: + S_add(u) + elif excess <= -delta: + T_add(u) + # Repeatedly augment flow from S to T along shortest paths until + # Δ-feasibility is achieved. + while S and T: + s = arbitrary_element(S) + t = None + # Search for a shortest path in terms of reduce costs from s to + # any t in T in the Δ-residual network. + d = {} + pred = {s: None} + h = heap() + h_insert = h.insert + h_get = h.get + h_insert(s, 0) + while h: + u, d_u = h.pop() + d[u] = d_u + if u in T: + # Path found. + t = u + break + p_u = R_nodes[u]["potential"] + for v, es in R_succ[u].items(): + if v in d: + continue + wmin = inf + # Find the minimum-weighted (u, v) Δ-residual edge. + for k, e in es.items(): + if e["capacity"] - e["flow"] >= delta: + w = e["weight"] + if w < wmin: + wmin = w + kmin = k + emin = e + if wmin == inf: + continue + # Update the distance label of v. + d_v = d_u + wmin - p_u + R_nodes[v]["potential"] + if h_insert(v, d_v): + pred[v] = (u, kmin, emin) + if t is not None: + # Augment Δ units of flow from s to t. + while u != s: + v = u + u, k, e = pred[v] + e["flow"] += delta + R_succ[v][u][(k[0], not k[1])]["flow"] -= delta + # Account node excess and deficit. + R_nodes[s]["excess"] -= delta + R_nodes[t]["excess"] += delta + if R_nodes[s]["excess"] < delta: + S_remove(s) + if R_nodes[t]["excess"] > -delta: + T_remove(t) + # Update node potentials. + d_t = d[t] + for u, d_u in d.items(): + R_nodes[u]["potential"] -= d_u - d_t + else: + # Path not found. + S_remove(s) + delta //= 2 + + if any(R.nodes[u]["excess"] != 0 for u in R): + raise nx.NetworkXUnfeasible("No flow satisfying all demands.") + + # Calculate the flow cost. + for u in R: + for v, es in R_succ[u].items(): + for e in es.values(): + flow = e["flow"] + if flow > 0: + flow_cost += flow * e["weight"] + + return flow_cost, _build_flow_dict(G, R, capacity, weight) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/dinitz_alg.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/dinitz_alg.py new file mode 100644 index 0000000000000000000000000000000000000000..f369642af2968094184741132a843f5dde81e428 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/dinitz_alg.py @@ -0,0 +1,238 @@ +""" +Dinitz' algorithm for maximum flow problems. +""" + +from collections import deque + +import networkx as nx +from networkx.algorithms.flow.utils import build_residual_network +from networkx.utils import pairwise + +__all__ = ["dinitz"] + + +@nx._dispatchable(edge_attrs={"capacity": float("inf")}, returns_graph=True) +def dinitz(G, s, t, capacity="capacity", residual=None, value_only=False, cutoff=None): + """Find a maximum single-commodity flow using Dinitz' algorithm. + + This function returns the residual network resulting after computing + the maximum flow. See below for details about the conventions + NetworkX uses for defining residual networks. + + This algorithm has a running time of $O(n^2 m)$ for $n$ nodes and $m$ + edges [1]_. + + + Parameters + ---------- + G : NetworkX graph + Edges of the graph are expected to have an attribute called + 'capacity'. If this attribute is not present, the edge is + considered to have infinite capacity. + + s : node + Source node for the flow. + + t : node + Sink node for the flow. + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + residual : NetworkX graph + Residual network on which the algorithm is to be executed. If None, a + new residual network is created. Default value: None. + + value_only : bool + If True compute only the value of the maximum flow. This parameter + will be ignored by this algorithm because it is not applicable. + + cutoff : integer, float + If specified, the algorithm will terminate when the flow value reaches + or exceeds the cutoff. In this case, it may be unable to immediately + determine a minimum cut. Default value: None. + + Returns + ------- + R : NetworkX DiGraph + Residual network after computing the maximum flow. + + Raises + ------ + NetworkXError + The algorithm does not support MultiGraph and MultiDiGraph. If + the input graph is an instance of one of these two classes, a + NetworkXError is raised. + + NetworkXUnbounded + If the graph has a path of infinite capacity, the value of a + feasible flow on the graph is unbounded above and the function + raises a NetworkXUnbounded. + + See also + -------- + :meth:`maximum_flow` + :meth:`minimum_cut` + :meth:`preflow_push` + :meth:`shortest_augmenting_path` + + Notes + ----- + The residual network :samp:`R` from an input graph :samp:`G` has the + same nodes as :samp:`G`. :samp:`R` is a DiGraph that contains a pair + of edges :samp:`(u, v)` and :samp:`(v, u)` iff :samp:`(u, v)` is not a + self-loop, and at least one of :samp:`(u, v)` and :samp:`(v, u)` exists + in :samp:`G`. + + For each edge :samp:`(u, v)` in :samp:`R`, :samp:`R[u][v]['capacity']` + is equal to the capacity of :samp:`(u, v)` in :samp:`G` if it exists + in :samp:`G` or zero otherwise. If the capacity is infinite, + :samp:`R[u][v]['capacity']` will have a high arbitrary finite value + that does not affect the solution of the problem. This value is stored in + :samp:`R.graph['inf']`. For each edge :samp:`(u, v)` in :samp:`R`, + :samp:`R[u][v]['flow']` represents the flow function of :samp:`(u, v)` and + satisfies :samp:`R[u][v]['flow'] == -R[v][u]['flow']`. + + The flow value, defined as the total flow into :samp:`t`, the sink, is + stored in :samp:`R.graph['flow_value']`. If :samp:`cutoff` is not + specified, reachability to :samp:`t` using only edges :samp:`(u, v)` such + that :samp:`R[u][v]['flow'] < R[u][v]['capacity']` induces a minimum + :samp:`s`-:samp:`t` cut. + + Examples + -------- + >>> from networkx.algorithms.flow import dinitz + + The functions that implement flow algorithms and output a residual + network, such as this one, are not imported to the base NetworkX + namespace, so you have to explicitly import them from the flow package. + + >>> G = nx.DiGraph() + >>> G.add_edge("x", "a", capacity=3.0) + >>> G.add_edge("x", "b", capacity=1.0) + >>> G.add_edge("a", "c", capacity=3.0) + >>> G.add_edge("b", "c", capacity=5.0) + >>> G.add_edge("b", "d", capacity=4.0) + >>> G.add_edge("d", "e", capacity=2.0) + >>> G.add_edge("c", "y", capacity=2.0) + >>> G.add_edge("e", "y", capacity=3.0) + >>> R = dinitz(G, "x", "y") + >>> flow_value = nx.maximum_flow_value(G, "x", "y") + >>> flow_value + 3.0 + >>> flow_value == R.graph["flow_value"] + True + + References + ---------- + .. [1] Dinitz' Algorithm: The Original Version and Even's Version. + 2006. Yefim Dinitz. In Theoretical Computer Science. Lecture + Notes in Computer Science. Volume 3895. pp 218-240. + https://doi.org/10.1007/11685654_10 + + """ + R = dinitz_impl(G, s, t, capacity, residual, cutoff) + R.graph["algorithm"] = "dinitz" + nx._clear_cache(R) + return R + + +def dinitz_impl(G, s, t, capacity, residual, cutoff): + if s not in G: + raise nx.NetworkXError(f"node {str(s)} not in graph") + if t not in G: + raise nx.NetworkXError(f"node {str(t)} not in graph") + if s == t: + raise nx.NetworkXError("source and sink are the same node") + + if residual is None: + R = build_residual_network(G, capacity) + else: + R = residual + + # Initialize/reset the residual network. + for u in R: + for e in R[u].values(): + e["flow"] = 0 + + # Use an arbitrary high value as infinite. It is computed + # when building the residual network. + INF = R.graph["inf"] + + if cutoff is None: + cutoff = INF + + R_succ = R.succ + R_pred = R.pred + + def breath_first_search(): + parents = {} + vertex_dist = {s: 0} + queue = deque([(s, 0)]) + # Record all the potential edges of shortest augmenting paths + while queue: + if t in parents: + break + u, dist = queue.popleft() + for v, attr in R_succ[u].items(): + if attr["capacity"] - attr["flow"] > 0: + if v in parents: + if vertex_dist[v] == dist + 1: + parents[v].append(u) + else: + parents[v] = deque([u]) + vertex_dist[v] = dist + 1 + queue.append((v, dist + 1)) + return parents + + def depth_first_search(parents): + # DFS to find all the shortest augmenting paths + """Build a path using DFS starting from the sink""" + total_flow = 0 + u = t + # path also functions as a stack + path = [u] + # The loop ends with no augmenting path left in the layered graph + while True: + if len(parents[u]) > 0: + v = parents[u][0] + path.append(v) + else: + path.pop() + if len(path) == 0: + break + v = path[-1] + parents[v].popleft() + # Augment the flow along the path found + if v == s: + flow = INF + for u, v in pairwise(path): + flow = min(flow, R_pred[u][v]["capacity"] - R_pred[u][v]["flow"]) + for u, v in pairwise(reversed(path)): + R_pred[v][u]["flow"] += flow + R_pred[u][v]["flow"] -= flow + # Find the proper node to continue the search + if R_pred[v][u]["capacity"] - R_pred[v][u]["flow"] == 0: + parents[v].popleft() + while path[-1] != v: + path.pop() + total_flow += flow + v = path[-1] + u = v + return total_flow + + flow_value = 0 + while flow_value < cutoff: + parents = breath_first_search() + if t not in parents: + break + this_flow = depth_first_search(parents) + if this_flow * 2 > INF: + raise nx.NetworkXUnbounded("Infinite capacity path, flow unbounded above.") + flow_value += this_flow + + R.graph["flow_value"] = flow_value + return R diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/gomory_hu.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/gomory_hu.py new file mode 100644 index 0000000000000000000000000000000000000000..69913da904547b3a9fe682467b69e696e9c8e0dc --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/gomory_hu.py @@ -0,0 +1,178 @@ +""" +Gomory-Hu tree of undirected Graphs. +""" + +import networkx as nx +from networkx.utils import not_implemented_for + +from .edmondskarp import edmonds_karp +from .utils import build_residual_network + +default_flow_func = edmonds_karp + +__all__ = ["gomory_hu_tree"] + + +@not_implemented_for("directed") +@nx._dispatchable(edge_attrs={"capacity": float("inf")}, returns_graph=True) +def gomory_hu_tree(G, capacity="capacity", flow_func=None): + r"""Returns the Gomory-Hu tree of an undirected graph G. + + A Gomory-Hu tree of an undirected graph with capacities is a + weighted tree that represents the minimum s-t cuts for all s-t + pairs in the graph. + + It only requires `n-1` minimum cut computations instead of the + obvious `n(n-1)/2`. The tree represents all s-t cuts as the + minimum cut value among any pair of nodes is the minimum edge + weight in the shortest path between the two nodes in the + Gomory-Hu tree. + + The Gomory-Hu tree also has the property that removing the + edge with the minimum weight in the shortest path between + any two nodes leaves two connected components that form + a partition of the nodes in G that defines the minimum s-t + cut. + + See Examples section below for details. + + Parameters + ---------- + G : NetworkX graph + Undirected graph + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + flow_func : function + Function to perform the underlying flow computations. Default value + :func:`edmonds_karp`. This function performs better in sparse graphs + with right tailed degree distributions. + :func:`shortest_augmenting_path` will perform better in denser + graphs. + + Returns + ------- + Tree : NetworkX graph + A NetworkX graph representing the Gomory-Hu tree of the input graph. + + Raises + ------ + NetworkXNotImplemented + Raised if the input graph is directed. + + NetworkXError + Raised if the input graph is an empty Graph. + + Examples + -------- + >>> G = nx.karate_club_graph() + >>> nx.set_edge_attributes(G, 1, "capacity") + >>> T = nx.gomory_hu_tree(G) + >>> # The value of the minimum cut between any pair + ... # of nodes in G is the minimum edge weight in the + ... # shortest path between the two nodes in the + ... # Gomory-Hu tree. + ... def minimum_edge_weight_in_shortest_path(T, u, v): + ... path = nx.shortest_path(T, u, v, weight="weight") + ... return min((T[u][v]["weight"], (u, v)) for (u, v) in zip(path, path[1:])) + >>> u, v = 0, 33 + >>> cut_value, edge = minimum_edge_weight_in_shortest_path(T, u, v) + >>> cut_value + 10 + >>> nx.minimum_cut_value(G, u, v) + 10 + >>> # The Gomory-Hu tree also has the property that removing the + ... # edge with the minimum weight in the shortest path between + ... # any two nodes leaves two connected components that form + ... # a partition of the nodes in G that defines the minimum s-t + ... # cut. + ... cut_value, edge = minimum_edge_weight_in_shortest_path(T, u, v) + >>> T.remove_edge(*edge) + >>> U, V = list(nx.connected_components(T)) + >>> # Thus U and V form a partition that defines a minimum cut + ... # between u and v in G. You can compute the edge cut set, + ... # that is, the set of edges that if removed from G will + ... # disconnect u from v in G, with this information: + ... cutset = set() + >>> for x, nbrs in ((n, G[n]) for n in U): + ... cutset.update((x, y) for y in nbrs if y in V) + >>> # Because we have set the capacities of all edges to 1 + ... # the cutset contains ten edges + ... len(cutset) + 10 + >>> # You can use any maximum flow algorithm for the underlying + ... # flow computations using the argument flow_func + ... from networkx.algorithms import flow + >>> T = nx.gomory_hu_tree(G, flow_func=flow.boykov_kolmogorov) + >>> cut_value, edge = minimum_edge_weight_in_shortest_path(T, u, v) + >>> cut_value + 10 + >>> nx.minimum_cut_value(G, u, v, flow_func=flow.boykov_kolmogorov) + 10 + + Notes + ----- + This implementation is based on Gusfield approach [1]_ to compute + Gomory-Hu trees, which does not require node contractions and has + the same computational complexity than the original method. + + See also + -------- + :func:`minimum_cut` + :func:`maximum_flow` + + References + ---------- + .. [1] Gusfield D: Very simple methods for all pairs network flow analysis. + SIAM J Comput 19(1):143-155, 1990. + + """ + if flow_func is None: + flow_func = default_flow_func + + if len(G) == 0: # empty graph + msg = "Empty Graph does not have a Gomory-Hu tree representation" + raise nx.NetworkXError(msg) + + # Start the tree as a star graph with an arbitrary node at the center + tree = {} + labels = {} + iter_nodes = iter(G) + root = next(iter_nodes) + for n in iter_nodes: + tree[n] = root + + # Reuse residual network + R = build_residual_network(G, capacity) + + # For all the leaves in the star graph tree (that is n-1 nodes). + for source in tree: + # Find neighbor in the tree + target = tree[source] + # compute minimum cut + cut_value, partition = nx.minimum_cut( + G, source, target, capacity=capacity, flow_func=flow_func, residual=R + ) + labels[(source, target)] = cut_value + # Update the tree + # Source will always be in partition[0] and target in partition[1] + for node in partition[0]: + if node != source and node in tree and tree[node] == target: + tree[node] = source + labels[node, source] = labels.get((node, target), cut_value) + # + if target != root and tree[target] in partition[0]: + labels[source, tree[target]] = labels[target, tree[target]] + labels[target, source] = cut_value + tree[source] = tree[target] + tree[target] = source + + # Build the tree + T = nx.Graph() + T.add_nodes_from(G) + T.add_weighted_edges_from(((u, v, labels[u, v]) for u, v in tree.items())) + return T diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/maxflow.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/maxflow.py new file mode 100644 index 0000000000000000000000000000000000000000..7993d87ba9ad8c3f3aa0639f82590f4c16f5f4b7 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/maxflow.py @@ -0,0 +1,607 @@ +""" +Maximum flow (and minimum cut) algorithms on capacitated graphs. +""" + +import networkx as nx + +from .boykovkolmogorov import boykov_kolmogorov +from .dinitz_alg import dinitz +from .edmondskarp import edmonds_karp +from .preflowpush import preflow_push +from .shortestaugmentingpath import shortest_augmenting_path +from .utils import build_flow_dict + +# Define the default flow function for computing maximum flow. +default_flow_func = preflow_push + +__all__ = ["maximum_flow", "maximum_flow_value", "minimum_cut", "minimum_cut_value"] + + +@nx._dispatchable(graphs="flowG", edge_attrs={"capacity": float("inf")}) +def maximum_flow(flowG, _s, _t, capacity="capacity", flow_func=None, **kwargs): + """Find a maximum single-commodity flow. + + Parameters + ---------- + flowG : NetworkX graph + Edges of the graph are expected to have an attribute called + 'capacity'. If this attribute is not present, the edge is + considered to have infinite capacity. + + _s : node + Source node for the flow. + + _t : node + Sink node for the flow. + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + flow_func : function + A function for computing the maximum flow among a pair of nodes + in a capacitated graph. The function has to accept at least three + parameters: a Graph or Digraph, a source node, and a target node. + And return a residual network that follows NetworkX conventions + (see Notes). If flow_func is None, the default maximum + flow function (:meth:`preflow_push`) is used. See below for + alternative algorithms. The choice of the default function may change + from version to version and should not be relied on. Default value: + None. + + kwargs : Any other keyword parameter is passed to the function that + computes the maximum flow. + + Returns + ------- + flow_value : integer, float + Value of the maximum flow, i.e., net outflow from the source. + + flow_dict : dict + A dictionary containing the value of the flow that went through + each edge. + + Raises + ------ + NetworkXError + The algorithm does not support MultiGraph and MultiDiGraph. If + the input graph is an instance of one of these two classes, a + NetworkXError is raised. + + NetworkXUnbounded + If the graph has a path of infinite capacity, the value of a + feasible flow on the graph is unbounded above and the function + raises a NetworkXUnbounded. + + See also + -------- + :meth:`maximum_flow_value` + :meth:`minimum_cut` + :meth:`minimum_cut_value` + :meth:`edmonds_karp` + :meth:`preflow_push` + :meth:`shortest_augmenting_path` + + Notes + ----- + The function used in the flow_func parameter has to return a residual + network that follows NetworkX conventions: + + The residual network :samp:`R` from an input graph :samp:`G` has the + same nodes as :samp:`G`. :samp:`R` is a DiGraph that contains a pair + of edges :samp:`(u, v)` and :samp:`(v, u)` iff :samp:`(u, v)` is not a + self-loop, and at least one of :samp:`(u, v)` and :samp:`(v, u)` exists + in :samp:`G`. + + For each edge :samp:`(u, v)` in :samp:`R`, :samp:`R[u][v]['capacity']` + is equal to the capacity of :samp:`(u, v)` in :samp:`G` if it exists + in :samp:`G` or zero otherwise. If the capacity is infinite, + :samp:`R[u][v]['capacity']` will have a high arbitrary finite value + that does not affect the solution of the problem. This value is stored in + :samp:`R.graph['inf']`. For each edge :samp:`(u, v)` in :samp:`R`, + :samp:`R[u][v]['flow']` represents the flow function of :samp:`(u, v)` and + satisfies :samp:`R[u][v]['flow'] == -R[v][u]['flow']`. + + The flow value, defined as the total flow into :samp:`t`, the sink, is + stored in :samp:`R.graph['flow_value']`. Reachability to :samp:`t` using + only edges :samp:`(u, v)` such that + :samp:`R[u][v]['flow'] < R[u][v]['capacity']` induces a minimum + :samp:`s`-:samp:`t` cut. + + Specific algorithms may store extra data in :samp:`R`. + + The function should supports an optional boolean parameter value_only. When + True, it can optionally terminate the algorithm as soon as the maximum flow + value and the minimum cut can be determined. + + Examples + -------- + >>> G = nx.DiGraph() + >>> G.add_edge("x", "a", capacity=3.0) + >>> G.add_edge("x", "b", capacity=1.0) + >>> G.add_edge("a", "c", capacity=3.0) + >>> G.add_edge("b", "c", capacity=5.0) + >>> G.add_edge("b", "d", capacity=4.0) + >>> G.add_edge("d", "e", capacity=2.0) + >>> G.add_edge("c", "y", capacity=2.0) + >>> G.add_edge("e", "y", capacity=3.0) + + maximum_flow returns both the value of the maximum flow and a + dictionary with all flows. + + >>> flow_value, flow_dict = nx.maximum_flow(G, "x", "y") + >>> flow_value + 3.0 + >>> print(flow_dict["x"]["b"]) + 1.0 + + You can also use alternative algorithms for computing the + maximum flow by using the flow_func parameter. + + >>> from networkx.algorithms.flow import shortest_augmenting_path + >>> flow_value == nx.maximum_flow(G, "x", "y", flow_func=shortest_augmenting_path)[ + ... 0 + ... ] + True + + """ + if flow_func is None: + if kwargs: + raise nx.NetworkXError( + "You have to explicitly set a flow_func if" + " you need to pass parameters via kwargs." + ) + flow_func = default_flow_func + + if not callable(flow_func): + raise nx.NetworkXError("flow_func has to be callable.") + + R = flow_func(flowG, _s, _t, capacity=capacity, value_only=False, **kwargs) + flow_dict = build_flow_dict(flowG, R) + + return (R.graph["flow_value"], flow_dict) + + +@nx._dispatchable(graphs="flowG", edge_attrs={"capacity": float("inf")}) +def maximum_flow_value(flowG, _s, _t, capacity="capacity", flow_func=None, **kwargs): + """Find the value of maximum single-commodity flow. + + Parameters + ---------- + flowG : NetworkX graph + Edges of the graph are expected to have an attribute called + 'capacity'. If this attribute is not present, the edge is + considered to have infinite capacity. + + _s : node + Source node for the flow. + + _t : node + Sink node for the flow. + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + flow_func : function + A function for computing the maximum flow among a pair of nodes + in a capacitated graph. The function has to accept at least three + parameters: a Graph or Digraph, a source node, and a target node. + And return a residual network that follows NetworkX conventions + (see Notes). If flow_func is None, the default maximum + flow function (:meth:`preflow_push`) is used. See below for + alternative algorithms. The choice of the default function may change + from version to version and should not be relied on. Default value: + None. + + kwargs : Any other keyword parameter is passed to the function that + computes the maximum flow. + + Returns + ------- + flow_value : integer, float + Value of the maximum flow, i.e., net outflow from the source. + + Raises + ------ + NetworkXError + The algorithm does not support MultiGraph and MultiDiGraph. If + the input graph is an instance of one of these two classes, a + NetworkXError is raised. + + NetworkXUnbounded + If the graph has a path of infinite capacity, the value of a + feasible flow on the graph is unbounded above and the function + raises a NetworkXUnbounded. + + See also + -------- + :meth:`maximum_flow` + :meth:`minimum_cut` + :meth:`minimum_cut_value` + :meth:`edmonds_karp` + :meth:`preflow_push` + :meth:`shortest_augmenting_path` + + Notes + ----- + The function used in the flow_func parameter has to return a residual + network that follows NetworkX conventions: + + The residual network :samp:`R` from an input graph :samp:`G` has the + same nodes as :samp:`G`. :samp:`R` is a DiGraph that contains a pair + of edges :samp:`(u, v)` and :samp:`(v, u)` iff :samp:`(u, v)` is not a + self-loop, and at least one of :samp:`(u, v)` and :samp:`(v, u)` exists + in :samp:`G`. + + For each edge :samp:`(u, v)` in :samp:`R`, :samp:`R[u][v]['capacity']` + is equal to the capacity of :samp:`(u, v)` in :samp:`G` if it exists + in :samp:`G` or zero otherwise. If the capacity is infinite, + :samp:`R[u][v]['capacity']` will have a high arbitrary finite value + that does not affect the solution of the problem. This value is stored in + :samp:`R.graph['inf']`. For each edge :samp:`(u, v)` in :samp:`R`, + :samp:`R[u][v]['flow']` represents the flow function of :samp:`(u, v)` and + satisfies :samp:`R[u][v]['flow'] == -R[v][u]['flow']`. + + The flow value, defined as the total flow into :samp:`t`, the sink, is + stored in :samp:`R.graph['flow_value']`. Reachability to :samp:`t` using + only edges :samp:`(u, v)` such that + :samp:`R[u][v]['flow'] < R[u][v]['capacity']` induces a minimum + :samp:`s`-:samp:`t` cut. + + Specific algorithms may store extra data in :samp:`R`. + + The function should supports an optional boolean parameter value_only. When + True, it can optionally terminate the algorithm as soon as the maximum flow + value and the minimum cut can be determined. + + Examples + -------- + >>> G = nx.DiGraph() + >>> G.add_edge("x", "a", capacity=3.0) + >>> G.add_edge("x", "b", capacity=1.0) + >>> G.add_edge("a", "c", capacity=3.0) + >>> G.add_edge("b", "c", capacity=5.0) + >>> G.add_edge("b", "d", capacity=4.0) + >>> G.add_edge("d", "e", capacity=2.0) + >>> G.add_edge("c", "y", capacity=2.0) + >>> G.add_edge("e", "y", capacity=3.0) + + maximum_flow_value computes only the value of the + maximum flow: + + >>> flow_value = nx.maximum_flow_value(G, "x", "y") + >>> flow_value + 3.0 + + You can also use alternative algorithms for computing the + maximum flow by using the flow_func parameter. + + >>> from networkx.algorithms.flow import shortest_augmenting_path + >>> flow_value == nx.maximum_flow_value( + ... G, "x", "y", flow_func=shortest_augmenting_path + ... ) + True + + """ + if flow_func is None: + if kwargs: + raise nx.NetworkXError( + "You have to explicitly set a flow_func if" + " you need to pass parameters via kwargs." + ) + flow_func = default_flow_func + + if not callable(flow_func): + raise nx.NetworkXError("flow_func has to be callable.") + + R = flow_func(flowG, _s, _t, capacity=capacity, value_only=True, **kwargs) + + return R.graph["flow_value"] + + +@nx._dispatchable(graphs="flowG", edge_attrs={"capacity": float("inf")}) +def minimum_cut(flowG, _s, _t, capacity="capacity", flow_func=None, **kwargs): + """Compute the value and the node partition of a minimum (s, t)-cut. + + Use the max-flow min-cut theorem, i.e., the capacity of a minimum + capacity cut is equal to the flow value of a maximum flow. + + Parameters + ---------- + flowG : NetworkX graph + Edges of the graph are expected to have an attribute called + 'capacity'. If this attribute is not present, the edge is + considered to have infinite capacity. + + _s : node + Source node for the flow. + + _t : node + Sink node for the flow. + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + flow_func : function + A function for computing the maximum flow among a pair of nodes + in a capacitated graph. The function has to accept at least three + parameters: a Graph or Digraph, a source node, and a target node. + And return a residual network that follows NetworkX conventions + (see Notes). If flow_func is None, the default maximum + flow function (:meth:`preflow_push`) is used. See below for + alternative algorithms. The choice of the default function may change + from version to version and should not be relied on. Default value: + None. + + kwargs : Any other keyword parameter is passed to the function that + computes the maximum flow. + + Returns + ------- + cut_value : integer, float + Value of the minimum cut. + + partition : pair of node sets + A partitioning of the nodes that defines a minimum cut. + + Raises + ------ + NetworkXUnbounded + If the graph has a path of infinite capacity, all cuts have + infinite capacity and the function raises a NetworkXError. + + See also + -------- + :meth:`maximum_flow` + :meth:`maximum_flow_value` + :meth:`minimum_cut_value` + :meth:`edmonds_karp` + :meth:`preflow_push` + :meth:`shortest_augmenting_path` + + Notes + ----- + The function used in the flow_func parameter has to return a residual + network that follows NetworkX conventions: + + The residual network :samp:`R` from an input graph :samp:`G` has the + same nodes as :samp:`G`. :samp:`R` is a DiGraph that contains a pair + of edges :samp:`(u, v)` and :samp:`(v, u)` iff :samp:`(u, v)` is not a + self-loop, and at least one of :samp:`(u, v)` and :samp:`(v, u)` exists + in :samp:`G`. + + For each edge :samp:`(u, v)` in :samp:`R`, :samp:`R[u][v]['capacity']` + is equal to the capacity of :samp:`(u, v)` in :samp:`G` if it exists + in :samp:`G` or zero otherwise. If the capacity is infinite, + :samp:`R[u][v]['capacity']` will have a high arbitrary finite value + that does not affect the solution of the problem. This value is stored in + :samp:`R.graph['inf']`. For each edge :samp:`(u, v)` in :samp:`R`, + :samp:`R[u][v]['flow']` represents the flow function of :samp:`(u, v)` and + satisfies :samp:`R[u][v]['flow'] == -R[v][u]['flow']`. + + The flow value, defined as the total flow into :samp:`t`, the sink, is + stored in :samp:`R.graph['flow_value']`. Reachability to :samp:`t` using + only edges :samp:`(u, v)` such that + :samp:`R[u][v]['flow'] < R[u][v]['capacity']` induces a minimum + :samp:`s`-:samp:`t` cut. + + Specific algorithms may store extra data in :samp:`R`. + + The function should supports an optional boolean parameter value_only. When + True, it can optionally terminate the algorithm as soon as the maximum flow + value and the minimum cut can be determined. + + Examples + -------- + >>> G = nx.DiGraph() + >>> G.add_edge("x", "a", capacity=3.0) + >>> G.add_edge("x", "b", capacity=1.0) + >>> G.add_edge("a", "c", capacity=3.0) + >>> G.add_edge("b", "c", capacity=5.0) + >>> G.add_edge("b", "d", capacity=4.0) + >>> G.add_edge("d", "e", capacity=2.0) + >>> G.add_edge("c", "y", capacity=2.0) + >>> G.add_edge("e", "y", capacity=3.0) + + minimum_cut computes both the value of the + minimum cut and the node partition: + + >>> cut_value, partition = nx.minimum_cut(G, "x", "y") + >>> reachable, non_reachable = partition + + 'partition' here is a tuple with the two sets of nodes that define + the minimum cut. You can compute the cut set of edges that induce + the minimum cut as follows: + + >>> cutset = set() + >>> for u, nbrs in ((n, G[n]) for n in reachable): + ... cutset.update((u, v) for v in nbrs if v in non_reachable) + >>> print(sorted(cutset)) + [('c', 'y'), ('x', 'b')] + >>> cut_value == sum(G.edges[u, v]["capacity"] for (u, v) in cutset) + True + + You can also use alternative algorithms for computing the + minimum cut by using the flow_func parameter. + + >>> from networkx.algorithms.flow import shortest_augmenting_path + >>> cut_value == nx.minimum_cut(G, "x", "y", flow_func=shortest_augmenting_path)[0] + True + + """ + if flow_func is None: + if kwargs: + raise nx.NetworkXError( + "You have to explicitly set a flow_func if" + " you need to pass parameters via kwargs." + ) + flow_func = default_flow_func + + if not callable(flow_func): + raise nx.NetworkXError("flow_func has to be callable.") + + if kwargs.get("cutoff") is not None and flow_func is preflow_push: + raise nx.NetworkXError("cutoff should not be specified.") + + R = flow_func(flowG, _s, _t, capacity=capacity, value_only=True, **kwargs) + # Remove saturated edges from the residual network + cutset = [(u, v, d) for u, v, d in R.edges(data=True) if d["flow"] == d["capacity"]] + R.remove_edges_from(cutset) + + # Then, reachable and non reachable nodes from source in the + # residual network form the node partition that defines + # the minimum cut. + non_reachable = set(dict(nx.shortest_path_length(R, target=_t))) + partition = (set(flowG) - non_reachable, non_reachable) + # Finally add again cutset edges to the residual network to make + # sure that it is reusable. + R.add_edges_from(cutset) + return (R.graph["flow_value"], partition) + + +@nx._dispatchable(graphs="flowG", edge_attrs={"capacity": float("inf")}) +def minimum_cut_value(flowG, _s, _t, capacity="capacity", flow_func=None, **kwargs): + """Compute the value of a minimum (s, t)-cut. + + Use the max-flow min-cut theorem, i.e., the capacity of a minimum + capacity cut is equal to the flow value of a maximum flow. + + Parameters + ---------- + flowG : NetworkX graph + Edges of the graph are expected to have an attribute called + 'capacity'. If this attribute is not present, the edge is + considered to have infinite capacity. + + _s : node + Source node for the flow. + + _t : node + Sink node for the flow. + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + flow_func : function + A function for computing the maximum flow among a pair of nodes + in a capacitated graph. The function has to accept at least three + parameters: a Graph or Digraph, a source node, and a target node. + And return a residual network that follows NetworkX conventions + (see Notes). If flow_func is None, the default maximum + flow function (:meth:`preflow_push`) is used. See below for + alternative algorithms. The choice of the default function may change + from version to version and should not be relied on. Default value: + None. + + kwargs : Any other keyword parameter is passed to the function that + computes the maximum flow. + + Returns + ------- + cut_value : integer, float + Value of the minimum cut. + + Raises + ------ + NetworkXUnbounded + If the graph has a path of infinite capacity, all cuts have + infinite capacity and the function raises a NetworkXError. + + See also + -------- + :meth:`maximum_flow` + :meth:`maximum_flow_value` + :meth:`minimum_cut` + :meth:`edmonds_karp` + :meth:`preflow_push` + :meth:`shortest_augmenting_path` + + Notes + ----- + The function used in the flow_func parameter has to return a residual + network that follows NetworkX conventions: + + The residual network :samp:`R` from an input graph :samp:`G` has the + same nodes as :samp:`G`. :samp:`R` is a DiGraph that contains a pair + of edges :samp:`(u, v)` and :samp:`(v, u)` iff :samp:`(u, v)` is not a + self-loop, and at least one of :samp:`(u, v)` and :samp:`(v, u)` exists + in :samp:`G`. + + For each edge :samp:`(u, v)` in :samp:`R`, :samp:`R[u][v]['capacity']` + is equal to the capacity of :samp:`(u, v)` in :samp:`G` if it exists + in :samp:`G` or zero otherwise. If the capacity is infinite, + :samp:`R[u][v]['capacity']` will have a high arbitrary finite value + that does not affect the solution of the problem. This value is stored in + :samp:`R.graph['inf']`. For each edge :samp:`(u, v)` in :samp:`R`, + :samp:`R[u][v]['flow']` represents the flow function of :samp:`(u, v)` and + satisfies :samp:`R[u][v]['flow'] == -R[v][u]['flow']`. + + The flow value, defined as the total flow into :samp:`t`, the sink, is + stored in :samp:`R.graph['flow_value']`. Reachability to :samp:`t` using + only edges :samp:`(u, v)` such that + :samp:`R[u][v]['flow'] < R[u][v]['capacity']` induces a minimum + :samp:`s`-:samp:`t` cut. + + Specific algorithms may store extra data in :samp:`R`. + + The function should supports an optional boolean parameter value_only. When + True, it can optionally terminate the algorithm as soon as the maximum flow + value and the minimum cut can be determined. + + Examples + -------- + >>> G = nx.DiGraph() + >>> G.add_edge("x", "a", capacity=3.0) + >>> G.add_edge("x", "b", capacity=1.0) + >>> G.add_edge("a", "c", capacity=3.0) + >>> G.add_edge("b", "c", capacity=5.0) + >>> G.add_edge("b", "d", capacity=4.0) + >>> G.add_edge("d", "e", capacity=2.0) + >>> G.add_edge("c", "y", capacity=2.0) + >>> G.add_edge("e", "y", capacity=3.0) + + minimum_cut_value computes only the value of the + minimum cut: + + >>> cut_value = nx.minimum_cut_value(G, "x", "y") + >>> cut_value + 3.0 + + You can also use alternative algorithms for computing the + minimum cut by using the flow_func parameter. + + >>> from networkx.algorithms.flow import shortest_augmenting_path + >>> cut_value == nx.minimum_cut_value( + ... G, "x", "y", flow_func=shortest_augmenting_path + ... ) + True + + """ + if flow_func is None: + if kwargs: + raise nx.NetworkXError( + "You have to explicitly set a flow_func if" + " you need to pass parameters via kwargs." + ) + flow_func = default_flow_func + + if not callable(flow_func): + raise nx.NetworkXError("flow_func has to be callable.") + + if kwargs.get("cutoff") is not None and flow_func is preflow_push: + raise nx.NetworkXError("cutoff should not be specified.") + + R = flow_func(flowG, _s, _t, capacity=capacity, value_only=True, **kwargs) + + return R.graph["flow_value"] diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/networksimplex.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/networksimplex.py new file mode 100644 index 0000000000000000000000000000000000000000..a9822d968808eb0c7bb45794e13150ad659b311a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/networksimplex.py @@ -0,0 +1,666 @@ +""" +Minimum cost flow algorithms on directed connected graphs. +""" + +__all__ = ["network_simplex"] + +from itertools import chain, islice, repeat +from math import ceil, sqrt + +import networkx as nx +from networkx.utils import not_implemented_for + + +class _DataEssentialsAndFunctions: + def __init__( + self, G, multigraph, demand="demand", capacity="capacity", weight="weight" + ): + # Number all nodes and edges and hereafter reference them using ONLY their numbers + self.node_list = list(G) # nodes + self.node_indices = {u: i for i, u in enumerate(self.node_list)} # node indices + self.node_demands = [ + G.nodes[u].get(demand, 0) for u in self.node_list + ] # node demands + + self.edge_sources = [] # edge sources + self.edge_targets = [] # edge targets + if multigraph: + self.edge_keys = [] # edge keys + self.edge_indices = {} # edge indices + self.edge_capacities = [] # edge capacities + self.edge_weights = [] # edge weights + + if not multigraph: + edges = G.edges(data=True) + else: + edges = G.edges(data=True, keys=True) + + inf = float("inf") + edges = (e for e in edges if e[0] != e[1] and e[-1].get(capacity, inf) != 0) + for i, e in enumerate(edges): + self.edge_sources.append(self.node_indices[e[0]]) + self.edge_targets.append(self.node_indices[e[1]]) + if multigraph: + self.edge_keys.append(e[2]) + self.edge_indices[e[:-1]] = i + self.edge_capacities.append(e[-1].get(capacity, inf)) + self.edge_weights.append(e[-1].get(weight, 0)) + + # spanning tree specific data to be initialized + + self.edge_count = None # number of edges + self.edge_flow = None # edge flows + self.node_potentials = None # node potentials + self.parent = None # parent nodes + self.parent_edge = None # edges to parents + self.subtree_size = None # subtree sizes + self.next_node_dft = None # next nodes in depth-first thread + self.prev_node_dft = None # previous nodes in depth-first thread + self.last_descendent_dft = None # last descendants in depth-first thread + self._spanning_tree_initialized = ( + False # False until initialize_spanning_tree() is called + ) + + def initialize_spanning_tree(self, n, faux_inf): + self.edge_count = len(self.edge_indices) # number of edges + self.edge_flow = list( + chain(repeat(0, self.edge_count), (abs(d) for d in self.node_demands)) + ) # edge flows + self.node_potentials = [ + faux_inf if d <= 0 else -faux_inf for d in self.node_demands + ] # node potentials + self.parent = list(chain(repeat(-1, n), [None])) # parent nodes + self.parent_edge = list( + range(self.edge_count, self.edge_count + n) + ) # edges to parents + self.subtree_size = list(chain(repeat(1, n), [n + 1])) # subtree sizes + self.next_node_dft = list( + chain(range(1, n), [-1, 0]) + ) # next nodes in depth-first thread + self.prev_node_dft = list(range(-1, n)) # previous nodes in depth-first thread + self.last_descendent_dft = list( + chain(range(n), [n - 1]) + ) # last descendants in depth-first thread + self._spanning_tree_initialized = True # True only if all the assignments pass + + def find_apex(self, p, q): + """ + Find the lowest common ancestor of nodes p and q in the spanning tree. + """ + size_p = self.subtree_size[p] + size_q = self.subtree_size[q] + while True: + while size_p < size_q: + p = self.parent[p] + size_p = self.subtree_size[p] + while size_p > size_q: + q = self.parent[q] + size_q = self.subtree_size[q] + if size_p == size_q: + if p != q: + p = self.parent[p] + size_p = self.subtree_size[p] + q = self.parent[q] + size_q = self.subtree_size[q] + else: + return p + + def trace_path(self, p, w): + """ + Returns the nodes and edges on the path from node p to its ancestor w. + """ + Wn = [p] + We = [] + while p != w: + We.append(self.parent_edge[p]) + p = self.parent[p] + Wn.append(p) + return Wn, We + + def find_cycle(self, i, p, q): + """ + Returns the nodes and edges on the cycle containing edge i == (p, q) + when the latter is added to the spanning tree. + + The cycle is oriented in the direction from p to q. + """ + w = self.find_apex(p, q) + Wn, We = self.trace_path(p, w) + Wn.reverse() + We.reverse() + if We != [i]: + We.append(i) + WnR, WeR = self.trace_path(q, w) + del WnR[-1] + Wn += WnR + We += WeR + return Wn, We + + def augment_flow(self, Wn, We, f): + """ + Augment f units of flow along a cycle represented by Wn and We. + """ + for i, p in zip(We, Wn): + if self.edge_sources[i] == p: + self.edge_flow[i] += f + else: + self.edge_flow[i] -= f + + def trace_subtree(self, p): + """ + Yield the nodes in the subtree rooted at a node p. + """ + yield p + l = self.last_descendent_dft[p] + while p != l: + p = self.next_node_dft[p] + yield p + + def remove_edge(self, s, t): + """ + Remove an edge (s, t) where parent[t] == s from the spanning tree. + """ + size_t = self.subtree_size[t] + prev_t = self.prev_node_dft[t] + last_t = self.last_descendent_dft[t] + next_last_t = self.next_node_dft[last_t] + # Remove (s, t). + self.parent[t] = None + self.parent_edge[t] = None + # Remove the subtree rooted at t from the depth-first thread. + self.next_node_dft[prev_t] = next_last_t + self.prev_node_dft[next_last_t] = prev_t + self.next_node_dft[last_t] = t + self.prev_node_dft[t] = last_t + # Update the subtree sizes and last descendants of the (old) ancestors + # of t. + while s is not None: + self.subtree_size[s] -= size_t + if self.last_descendent_dft[s] == last_t: + self.last_descendent_dft[s] = prev_t + s = self.parent[s] + + def make_root(self, q): + """ + Make a node q the root of its containing subtree. + """ + ancestors = [] + while q is not None: + ancestors.append(q) + q = self.parent[q] + ancestors.reverse() + for p, q in zip(ancestors, islice(ancestors, 1, None)): + size_p = self.subtree_size[p] + last_p = self.last_descendent_dft[p] + prev_q = self.prev_node_dft[q] + last_q = self.last_descendent_dft[q] + next_last_q = self.next_node_dft[last_q] + # Make p a child of q. + self.parent[p] = q + self.parent[q] = None + self.parent_edge[p] = self.parent_edge[q] + self.parent_edge[q] = None + self.subtree_size[p] = size_p - self.subtree_size[q] + self.subtree_size[q] = size_p + # Remove the subtree rooted at q from the depth-first thread. + self.next_node_dft[prev_q] = next_last_q + self.prev_node_dft[next_last_q] = prev_q + self.next_node_dft[last_q] = q + self.prev_node_dft[q] = last_q + if last_p == last_q: + self.last_descendent_dft[p] = prev_q + last_p = prev_q + # Add the remaining parts of the subtree rooted at p as a subtree + # of q in the depth-first thread. + self.prev_node_dft[p] = last_q + self.next_node_dft[last_q] = p + self.next_node_dft[last_p] = q + self.prev_node_dft[q] = last_p + self.last_descendent_dft[q] = last_p + + def add_edge(self, i, p, q): + """ + Add an edge (p, q) to the spanning tree where q is the root of a subtree. + """ + last_p = self.last_descendent_dft[p] + next_last_p = self.next_node_dft[last_p] + size_q = self.subtree_size[q] + last_q = self.last_descendent_dft[q] + # Make q a child of p. + self.parent[q] = p + self.parent_edge[q] = i + # Insert the subtree rooted at q into the depth-first thread. + self.next_node_dft[last_p] = q + self.prev_node_dft[q] = last_p + self.prev_node_dft[next_last_p] = last_q + self.next_node_dft[last_q] = next_last_p + # Update the subtree sizes and last descendants of the (new) ancestors + # of q. + while p is not None: + self.subtree_size[p] += size_q + if self.last_descendent_dft[p] == last_p: + self.last_descendent_dft[p] = last_q + p = self.parent[p] + + def update_potentials(self, i, p, q): + """ + Update the potentials of the nodes in the subtree rooted at a node + q connected to its parent p by an edge i. + """ + if q == self.edge_targets[i]: + d = self.node_potentials[p] - self.edge_weights[i] - self.node_potentials[q] + else: + d = self.node_potentials[p] + self.edge_weights[i] - self.node_potentials[q] + for q in self.trace_subtree(q): + self.node_potentials[q] += d + + def reduced_cost(self, i): + """Returns the reduced cost of an edge i.""" + c = ( + self.edge_weights[i] + - self.node_potentials[self.edge_sources[i]] + + self.node_potentials[self.edge_targets[i]] + ) + return c if self.edge_flow[i] == 0 else -c + + def find_entering_edges(self): + """Yield entering edges until none can be found.""" + if self.edge_count == 0: + return + + # Entering edges are found by combining Dantzig's rule and Bland's + # rule. The edges are cyclically grouped into blocks of size B. Within + # each block, Dantzig's rule is applied to find an entering edge. The + # blocks to search is determined following Bland's rule. + B = int(ceil(sqrt(self.edge_count))) # pivot block size + M = (self.edge_count + B - 1) // B # number of blocks needed to cover all edges + m = 0 # number of consecutive blocks without eligible + # entering edges + f = 0 # first edge in block + while m < M: + # Determine the next block of edges. + l = f + B + if l <= self.edge_count: + edges = range(f, l) + else: + l -= self.edge_count + edges = chain(range(f, self.edge_count), range(l)) + f = l + # Find the first edge with the lowest reduced cost. + i = min(edges, key=self.reduced_cost) + c = self.reduced_cost(i) + if c >= 0: + # No entering edge found in the current block. + m += 1 + else: + # Entering edge found. + if self.edge_flow[i] == 0: + p = self.edge_sources[i] + q = self.edge_targets[i] + else: + p = self.edge_targets[i] + q = self.edge_sources[i] + yield i, p, q + m = 0 + # All edges have nonnegative reduced costs. The current flow is + # optimal. + + def residual_capacity(self, i, p): + """Returns the residual capacity of an edge i in the direction away + from its endpoint p. + """ + return ( + self.edge_capacities[i] - self.edge_flow[i] + if self.edge_sources[i] == p + else self.edge_flow[i] + ) + + def find_leaving_edge(self, Wn, We): + """Returns the leaving edge in a cycle represented by Wn and We.""" + j, s = min( + zip(reversed(We), reversed(Wn)), + key=lambda i_p: self.residual_capacity(*i_p), + ) + t = self.edge_targets[j] if self.edge_sources[j] == s else self.edge_sources[j] + return j, s, t + + +@not_implemented_for("undirected") +@nx._dispatchable( + node_attrs="demand", edge_attrs={"capacity": float("inf"), "weight": 0} +) +def network_simplex(G, demand="demand", capacity="capacity", weight="weight"): + r"""Find a minimum cost flow satisfying all demands in digraph G. + + This is a primal network simplex algorithm that uses the leaving + arc rule to prevent cycling. + + G is a digraph with edge costs and capacities and in which nodes + have demand, i.e., they want to send or receive some amount of + flow. A negative demand means that the node wants to send flow, a + positive demand means that the node want to receive flow. A flow on + the digraph G satisfies all demand if the net flow into each node + is equal to the demand of that node. + + Parameters + ---------- + G : NetworkX graph + DiGraph on which a minimum cost flow satisfying all demands is + to be found. + + demand : string + Nodes of the graph G are expected to have an attribute demand + that indicates how much flow a node wants to send (negative + demand) or receive (positive demand). Note that the sum of the + demands should be 0 otherwise the problem in not feasible. If + this attribute is not present, a node is considered to have 0 + demand. Default value: 'demand'. + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + weight : string + Edges of the graph G are expected to have an attribute weight + that indicates the cost incurred by sending one unit of flow on + that edge. If not present, the weight is considered to be 0. + Default value: 'weight'. + + Returns + ------- + flowCost : integer, float + Cost of a minimum cost flow satisfying all demands. + + flowDict : dictionary + Dictionary of dictionaries keyed by nodes such that + flowDict[u][v] is the flow edge (u, v). + + Raises + ------ + NetworkXError + This exception is raised if the input graph is not directed or + not connected. + + NetworkXUnfeasible + This exception is raised in the following situations: + + * The sum of the demands is not zero. Then, there is no + flow satisfying all demands. + * There is no flow satisfying all demand. + + NetworkXUnbounded + This exception is raised if the digraph G has a cycle of + negative cost and infinite capacity. Then, the cost of a flow + satisfying all demands is unbounded below. + + Notes + ----- + This algorithm is not guaranteed to work if edge weights or demands + are floating point numbers (overflows and roundoff errors can + cause problems). As a workaround you can use integer numbers by + multiplying the relevant edge attributes by a convenient + constant factor (eg 100). + + See also + -------- + cost_of_flow, max_flow_min_cost, min_cost_flow, min_cost_flow_cost + + Examples + -------- + A simple example of a min cost flow problem. + + >>> G = nx.DiGraph() + >>> G.add_node("a", demand=-5) + >>> G.add_node("d", demand=5) + >>> G.add_edge("a", "b", weight=3, capacity=4) + >>> G.add_edge("a", "c", weight=6, capacity=10) + >>> G.add_edge("b", "d", weight=1, capacity=9) + >>> G.add_edge("c", "d", weight=2, capacity=5) + >>> flowCost, flowDict = nx.network_simplex(G) + >>> flowCost + 24 + >>> flowDict + {'a': {'b': 4, 'c': 1}, 'd': {}, 'b': {'d': 4}, 'c': {'d': 1}} + + The mincost flow algorithm can also be used to solve shortest path + problems. To find the shortest path between two nodes u and v, + give all edges an infinite capacity, give node u a demand of -1 and + node v a demand a 1. Then run the network simplex. The value of a + min cost flow will be the distance between u and v and edges + carrying positive flow will indicate the path. + + >>> G = nx.DiGraph() + >>> G.add_weighted_edges_from( + ... [ + ... ("s", "u", 10), + ... ("s", "x", 5), + ... ("u", "v", 1), + ... ("u", "x", 2), + ... ("v", "y", 1), + ... ("x", "u", 3), + ... ("x", "v", 5), + ... ("x", "y", 2), + ... ("y", "s", 7), + ... ("y", "v", 6), + ... ] + ... ) + >>> G.add_node("s", demand=-1) + >>> G.add_node("v", demand=1) + >>> flowCost, flowDict = nx.network_simplex(G) + >>> flowCost == nx.shortest_path_length(G, "s", "v", weight="weight") + True + >>> sorted([(u, v) for u in flowDict for v in flowDict[u] if flowDict[u][v] > 0]) + [('s', 'x'), ('u', 'v'), ('x', 'u')] + >>> nx.shortest_path(G, "s", "v", weight="weight") + ['s', 'x', 'u', 'v'] + + It is possible to change the name of the attributes used for the + algorithm. + + >>> G = nx.DiGraph() + >>> G.add_node("p", spam=-4) + >>> G.add_node("q", spam=2) + >>> G.add_node("a", spam=-2) + >>> G.add_node("d", spam=-1) + >>> G.add_node("t", spam=2) + >>> G.add_node("w", spam=3) + >>> G.add_edge("p", "q", cost=7, vacancies=5) + >>> G.add_edge("p", "a", cost=1, vacancies=4) + >>> G.add_edge("q", "d", cost=2, vacancies=3) + >>> G.add_edge("t", "q", cost=1, vacancies=2) + >>> G.add_edge("a", "t", cost=2, vacancies=4) + >>> G.add_edge("d", "w", cost=3, vacancies=4) + >>> G.add_edge("t", "w", cost=4, vacancies=1) + >>> flowCost, flowDict = nx.network_simplex( + ... G, demand="spam", capacity="vacancies", weight="cost" + ... ) + >>> flowCost + 37 + >>> flowDict + {'p': {'q': 2, 'a': 2}, 'q': {'d': 1}, 'a': {'t': 4}, 'd': {'w': 2}, 't': {'q': 1, 'w': 1}, 'w': {}} + + References + ---------- + .. [1] Z. Kiraly, P. Kovacs. + Efficient implementation of minimum-cost flow algorithms. + Acta Universitatis Sapientiae, Informatica 4(1):67--118. 2012. + .. [2] R. Barr, F. Glover, D. Klingman. + Enhancement of spanning tree labeling procedures for network + optimization. + INFOR 17(1):16--34. 1979. + """ + ########################################################################### + # Problem essentials extraction and sanity check + ########################################################################### + + if len(G) == 0: + raise nx.NetworkXError("graph has no nodes") + + multigraph = G.is_multigraph() + + # extracting data essential to problem + DEAF = _DataEssentialsAndFunctions( + G, multigraph, demand=demand, capacity=capacity, weight=weight + ) + + ########################################################################### + # Quick Error Detection + ########################################################################### + + inf = float("inf") + for u, d in zip(DEAF.node_list, DEAF.node_demands): + if abs(d) == inf: + raise nx.NetworkXError(f"node {u!r} has infinite demand") + for e, w in zip(DEAF.edge_indices, DEAF.edge_weights): + if abs(w) == inf: + raise nx.NetworkXError(f"edge {e!r} has infinite weight") + if not multigraph: + edges = nx.selfloop_edges(G, data=True) + else: + edges = nx.selfloop_edges(G, data=True, keys=True) + for e in edges: + if abs(e[-1].get(weight, 0)) == inf: + raise nx.NetworkXError(f"edge {e[:-1]!r} has infinite weight") + + ########################################################################### + # Quick Infeasibility Detection + ########################################################################### + + if sum(DEAF.node_demands) != 0: + raise nx.NetworkXUnfeasible("total node demand is not zero") + for e, c in zip(DEAF.edge_indices, DEAF.edge_capacities): + if c < 0: + raise nx.NetworkXUnfeasible(f"edge {e!r} has negative capacity") + if not multigraph: + edges = nx.selfloop_edges(G, data=True) + else: + edges = nx.selfloop_edges(G, data=True, keys=True) + for e in edges: + if e[-1].get(capacity, inf) < 0: + raise nx.NetworkXUnfeasible(f"edge {e[:-1]!r} has negative capacity") + + ########################################################################### + # Initialization + ########################################################################### + + # Add a dummy node -1 and connect all existing nodes to it with infinite- + # capacity dummy edges. Node -1 will serve as the root of the + # spanning tree of the network simplex method. The new edges will used to + # trivially satisfy the node demands and create an initial strongly + # feasible spanning tree. + for i, d in enumerate(DEAF.node_demands): + # Must be greater-than here. Zero-demand nodes must have + # edges pointing towards the root to ensure strong feasibility. + if d > 0: + DEAF.edge_sources.append(-1) + DEAF.edge_targets.append(i) + else: + DEAF.edge_sources.append(i) + DEAF.edge_targets.append(-1) + faux_inf = ( + 3 + * max( + chain( + [ + sum(c for c in DEAF.edge_capacities if c < inf), + sum(abs(w) for w in DEAF.edge_weights), + ], + (abs(d) for d in DEAF.node_demands), + ) + ) + or 1 + ) + + n = len(DEAF.node_list) # number of nodes + DEAF.edge_weights.extend(repeat(faux_inf, n)) + DEAF.edge_capacities.extend(repeat(faux_inf, n)) + + # Construct the initial spanning tree. + DEAF.initialize_spanning_tree(n, faux_inf) + + ########################################################################### + # Pivot loop + ########################################################################### + + for i, p, q in DEAF.find_entering_edges(): + Wn, We = DEAF.find_cycle(i, p, q) + j, s, t = DEAF.find_leaving_edge(Wn, We) + DEAF.augment_flow(Wn, We, DEAF.residual_capacity(j, s)) + # Do nothing more if the entering edge is the same as the leaving edge. + if i != j: + if DEAF.parent[t] != s: + # Ensure that s is the parent of t. + s, t = t, s + if We.index(i) > We.index(j): + # Ensure that q is in the subtree rooted at t. + p, q = q, p + DEAF.remove_edge(s, t) + DEAF.make_root(q) + DEAF.add_edge(i, p, q) + DEAF.update_potentials(i, p, q) + + ########################################################################### + # Infeasibility and unboundedness detection + ########################################################################### + + if any(DEAF.edge_flow[i] != 0 for i in range(-n, 0)): + raise nx.NetworkXUnfeasible("no flow satisfies all node demands") + + if any(DEAF.edge_flow[i] * 2 >= faux_inf for i in range(DEAF.edge_count)) or any( + e[-1].get(capacity, inf) == inf and e[-1].get(weight, 0) < 0 + for e in nx.selfloop_edges(G, data=True) + ): + raise nx.NetworkXUnbounded("negative cycle with infinite capacity found") + + ########################################################################### + # Flow cost calculation and flow dict construction + ########################################################################### + + del DEAF.edge_flow[DEAF.edge_count :] + flow_cost = sum(w * x for w, x in zip(DEAF.edge_weights, DEAF.edge_flow)) + flow_dict = {n: {} for n in DEAF.node_list} + + def add_entry(e): + """Add a flow dict entry.""" + d = flow_dict[e[0]] + for k in e[1:-2]: + try: + d = d[k] + except KeyError: + t = {} + d[k] = t + d = t + d[e[-2]] = e[-1] + + DEAF.edge_sources = ( + DEAF.node_list[s] for s in DEAF.edge_sources + ) # Use original nodes. + DEAF.edge_targets = ( + DEAF.node_list[t] for t in DEAF.edge_targets + ) # Use original nodes. + if not multigraph: + for e in zip(DEAF.edge_sources, DEAF.edge_targets, DEAF.edge_flow): + add_entry(e) + edges = G.edges(data=True) + else: + for e in zip( + DEAF.edge_sources, DEAF.edge_targets, DEAF.edge_keys, DEAF.edge_flow + ): + add_entry(e) + edges = G.edges(data=True, keys=True) + for e in edges: + if e[0] != e[1]: + if e[-1].get(capacity, inf) == 0: + add_entry(e[:-1] + (0,)) + else: + w = e[-1].get(weight, 0) + if w >= 0: + add_entry(e[:-1] + (0,)) + else: + c = e[-1][capacity] + flow_cost += w * c + add_entry(e[:-1] + (c,)) + + return flow_cost, flow_dict diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/preflowpush.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/preflowpush.py new file mode 100644 index 0000000000000000000000000000000000000000..42cadc2e2db6ecfb5a347499c89d5ae77f6af3d8 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/preflowpush.py @@ -0,0 +1,425 @@ +""" +Highest-label preflow-push algorithm for maximum flow problems. +""" + +from collections import deque +from itertools import islice + +import networkx as nx + +from ...utils import arbitrary_element +from .utils import ( + CurrentEdge, + GlobalRelabelThreshold, + Level, + build_residual_network, + detect_unboundedness, +) + +__all__ = ["preflow_push"] + + +def preflow_push_impl(G, s, t, capacity, residual, global_relabel_freq, value_only): + """Implementation of the highest-label preflow-push algorithm.""" + if s not in G: + raise nx.NetworkXError(f"node {str(s)} not in graph") + if t not in G: + raise nx.NetworkXError(f"node {str(t)} not in graph") + if s == t: + raise nx.NetworkXError("source and sink are the same node") + + if global_relabel_freq is None: + global_relabel_freq = 0 + if global_relabel_freq < 0: + raise nx.NetworkXError("global_relabel_freq must be nonnegative.") + + if residual is None: + R = build_residual_network(G, capacity) + else: + R = residual + + detect_unboundedness(R, s, t) + + R_nodes = R.nodes + R_pred = R.pred + R_succ = R.succ + + # Initialize/reset the residual network. + for u in R: + R_nodes[u]["excess"] = 0 + for e in R_succ[u].values(): + e["flow"] = 0 + + def reverse_bfs(src): + """Perform a reverse breadth-first search from src in the residual + network. + """ + heights = {src: 0} + q = deque([(src, 0)]) + while q: + u, height = q.popleft() + height += 1 + for v, attr in R_pred[u].items(): + if v not in heights and attr["flow"] < attr["capacity"]: + heights[v] = height + q.append((v, height)) + return heights + + # Initialize heights of the nodes. + heights = reverse_bfs(t) + + if s not in heights: + # t is not reachable from s in the residual network. The maximum flow + # must be zero. + R.graph["flow_value"] = 0 + return R + + n = len(R) + # max_height represents the height of the highest level below level n with + # at least one active node. + max_height = max(heights[u] for u in heights if u != s) + heights[s] = n + + grt = GlobalRelabelThreshold(n, R.size(), global_relabel_freq) + + # Initialize heights and 'current edge' data structures of the nodes. + for u in R: + R_nodes[u]["height"] = heights[u] if u in heights else n + 1 + R_nodes[u]["curr_edge"] = CurrentEdge(R_succ[u]) + + def push(u, v, flow): + """Push flow units of flow from u to v.""" + R_succ[u][v]["flow"] += flow + R_succ[v][u]["flow"] -= flow + R_nodes[u]["excess"] -= flow + R_nodes[v]["excess"] += flow + + # The maximum flow must be nonzero now. Initialize the preflow by + # saturating all edges emanating from s. + for u, attr in R_succ[s].items(): + flow = attr["capacity"] + if flow > 0: + push(s, u, flow) + + # Partition nodes into levels. + levels = [Level() for i in range(2 * n)] + for u in R: + if u != s and u != t: + level = levels[R_nodes[u]["height"]] + if R_nodes[u]["excess"] > 0: + level.active.add(u) + else: + level.inactive.add(u) + + def activate(v): + """Move a node from the inactive set to the active set of its level.""" + if v != s and v != t: + level = levels[R_nodes[v]["height"]] + if v in level.inactive: + level.inactive.remove(v) + level.active.add(v) + + def relabel(u): + """Relabel a node to create an admissible edge.""" + grt.add_work(len(R_succ[u])) + return ( + min( + R_nodes[v]["height"] + for v, attr in R_succ[u].items() + if attr["flow"] < attr["capacity"] + ) + + 1 + ) + + def discharge(u, is_phase1): + """Discharge a node until it becomes inactive or, during phase 1 (see + below), its height reaches at least n. The node is known to have the + largest height among active nodes. + """ + height = R_nodes[u]["height"] + curr_edge = R_nodes[u]["curr_edge"] + # next_height represents the next height to examine after discharging + # the current node. During phase 1, it is capped to below n. + next_height = height + levels[height].active.remove(u) + while True: + v, attr = curr_edge.get() + if height == R_nodes[v]["height"] + 1 and attr["flow"] < attr["capacity"]: + flow = min(R_nodes[u]["excess"], attr["capacity"] - attr["flow"]) + push(u, v, flow) + activate(v) + if R_nodes[u]["excess"] == 0: + # The node has become inactive. + levels[height].inactive.add(u) + break + try: + curr_edge.move_to_next() + except StopIteration: + # We have run off the end of the adjacency list, and there can + # be no more admissible edges. Relabel the node to create one. + height = relabel(u) + if is_phase1 and height >= n - 1: + # Although the node is still active, with a height at least + # n - 1, it is now known to be on the s side of the minimum + # s-t cut. Stop processing it until phase 2. + levels[height].active.add(u) + break + # The first relabel operation after global relabeling may not + # increase the height of the node since the 'current edge' data + # structure is not rewound. Use height instead of (height - 1) + # in case other active nodes at the same level are missed. + next_height = height + R_nodes[u]["height"] = height + return next_height + + def gap_heuristic(height): + """Apply the gap heuristic.""" + # Move all nodes at levels (height + 1) to max_height to level n + 1. + for level in islice(levels, height + 1, max_height + 1): + for u in level.active: + R_nodes[u]["height"] = n + 1 + for u in level.inactive: + R_nodes[u]["height"] = n + 1 + levels[n + 1].active.update(level.active) + level.active.clear() + levels[n + 1].inactive.update(level.inactive) + level.inactive.clear() + + def global_relabel(from_sink): + """Apply the global relabeling heuristic.""" + src = t if from_sink else s + heights = reverse_bfs(src) + if not from_sink: + # s must be reachable from t. Remove t explicitly. + del heights[t] + max_height = max(heights.values()) + if from_sink: + # Also mark nodes from which t is unreachable for relabeling. This + # serves the same purpose as the gap heuristic. + for u in R: + if u not in heights and R_nodes[u]["height"] < n: + heights[u] = n + 1 + else: + # Shift the computed heights because the height of s is n. + for u in heights: + heights[u] += n + max_height += n + del heights[src] + for u, new_height in heights.items(): + old_height = R_nodes[u]["height"] + if new_height != old_height: + if u in levels[old_height].active: + levels[old_height].active.remove(u) + levels[new_height].active.add(u) + else: + levels[old_height].inactive.remove(u) + levels[new_height].inactive.add(u) + R_nodes[u]["height"] = new_height + return max_height + + # Phase 1: Find the maximum preflow by pushing as much flow as possible to + # t. + + height = max_height + while height > 0: + # Discharge active nodes in the current level. + while True: + level = levels[height] + if not level.active: + # All active nodes in the current level have been discharged. + # Move to the next lower level. + height -= 1 + break + # Record the old height and level for the gap heuristic. + old_height = height + old_level = level + u = arbitrary_element(level.active) + height = discharge(u, True) + if grt.is_reached(): + # Global relabeling heuristic: Recompute the exact heights of + # all nodes. + height = global_relabel(True) + max_height = height + grt.clear_work() + elif not old_level.active and not old_level.inactive: + # Gap heuristic: If the level at old_height is empty (a 'gap'), + # a minimum cut has been identified. All nodes with heights + # above old_height can have their heights set to n + 1 and not + # be further processed before a maximum preflow is found. + gap_heuristic(old_height) + height = old_height - 1 + max_height = height + else: + # Update the height of the highest level with at least one + # active node. + max_height = max(max_height, height) + + # A maximum preflow has been found. The excess at t is the maximum flow + # value. + if value_only: + R.graph["flow_value"] = R_nodes[t]["excess"] + return R + + # Phase 2: Convert the maximum preflow into a maximum flow by returning the + # excess to s. + + # Relabel all nodes so that they have accurate heights. + height = global_relabel(False) + grt.clear_work() + + # Continue to discharge the active nodes. + while height > n: + # Discharge active nodes in the current level. + while True: + level = levels[height] + if not level.active: + # All active nodes in the current level have been discharged. + # Move to the next lower level. + height -= 1 + break + u = arbitrary_element(level.active) + height = discharge(u, False) + if grt.is_reached(): + # Global relabeling heuristic. + height = global_relabel(False) + grt.clear_work() + + R.graph["flow_value"] = R_nodes[t]["excess"] + return R + + +@nx._dispatchable(edge_attrs={"capacity": float("inf")}, returns_graph=True) +def preflow_push( + G, s, t, capacity="capacity", residual=None, global_relabel_freq=1, value_only=False +): + r"""Find a maximum single-commodity flow using the highest-label + preflow-push algorithm. + + This function returns the residual network resulting after computing + the maximum flow. See below for details about the conventions + NetworkX uses for defining residual networks. + + This algorithm has a running time of $O(n^2 \sqrt{m})$ for $n$ nodes and + $m$ edges. + + + Parameters + ---------- + G : NetworkX graph + Edges of the graph are expected to have an attribute called + 'capacity'. If this attribute is not present, the edge is + considered to have infinite capacity. + + s : node + Source node for the flow. + + t : node + Sink node for the flow. + + capacity : string + Edges of the graph G are expected to have an attribute capacity + that indicates how much flow the edge can support. If this + attribute is not present, the edge is considered to have + infinite capacity. Default value: 'capacity'. + + residual : NetworkX graph + Residual network on which the algorithm is to be executed. If None, a + new residual network is created. Default value: None. + + global_relabel_freq : integer, float + Relative frequency of applying the global relabeling heuristic to speed + up the algorithm. If it is None, the heuristic is disabled. Default + value: 1. + + value_only : bool + If False, compute a maximum flow; otherwise, compute a maximum preflow + which is enough for computing the maximum flow value. Default value: + False. + + Returns + ------- + R : NetworkX DiGraph + Residual network after computing the maximum flow. + + Raises + ------ + NetworkXError + The algorithm does not support MultiGraph and MultiDiGraph. If + the input graph is an instance of one of these two classes, a + NetworkXError is raised. + + NetworkXUnbounded + If the graph has a path of infinite capacity, the value of a + feasible flow on the graph is unbounded above and the function + raises a NetworkXUnbounded. + + See also + -------- + :meth:`maximum_flow` + :meth:`minimum_cut` + :meth:`edmonds_karp` + :meth:`shortest_augmenting_path` + + Notes + ----- + The residual network :samp:`R` from an input graph :samp:`G` has the + same nodes as :samp:`G`. :samp:`R` is a DiGraph that contains a pair + of edges :samp:`(u, v)` and :samp:`(v, u)` iff :samp:`(u, v)` is not a + self-loop, and at least one of :samp:`(u, v)` and :samp:`(v, u)` exists + in :samp:`G`. For each node :samp:`u` in :samp:`R`, + :samp:`R.nodes[u]['excess']` represents the difference between flow into + :samp:`u` and flow out of :samp:`u`. + + For each edge :samp:`(u, v)` in :samp:`R`, :samp:`R[u][v]['capacity']` + is equal to the capacity of :samp:`(u, v)` in :samp:`G` if it exists + in :samp:`G` or zero otherwise. If the capacity is infinite, + :samp:`R[u][v]['capacity']` will have a high arbitrary finite value + that does not affect the solution of the problem. This value is stored in + :samp:`R.graph['inf']`. For each edge :samp:`(u, v)` in :samp:`R`, + :samp:`R[u][v]['flow']` represents the flow function of :samp:`(u, v)` and + satisfies :samp:`R[u][v]['flow'] == -R[v][u]['flow']`. + + The flow value, defined as the total flow into :samp:`t`, the sink, is + stored in :samp:`R.graph['flow_value']`. Reachability to :samp:`t` using + only edges :samp:`(u, v)` such that + :samp:`R[u][v]['flow'] < R[u][v]['capacity']` induces a minimum + :samp:`s`-:samp:`t` cut. + + Examples + -------- + >>> from networkx.algorithms.flow import preflow_push + + The functions that implement flow algorithms and output a residual + network, such as this one, are not imported to the base NetworkX + namespace, so you have to explicitly import them from the flow package. + + >>> G = nx.DiGraph() + >>> G.add_edge("x", "a", capacity=3.0) + >>> G.add_edge("x", "b", capacity=1.0) + >>> G.add_edge("a", "c", capacity=3.0) + >>> G.add_edge("b", "c", capacity=5.0) + >>> G.add_edge("b", "d", capacity=4.0) + >>> G.add_edge("d", "e", capacity=2.0) + >>> G.add_edge("c", "y", capacity=2.0) + >>> G.add_edge("e", "y", capacity=3.0) + >>> R = preflow_push(G, "x", "y") + >>> flow_value = nx.maximum_flow_value(G, "x", "y") + >>> flow_value == R.graph["flow_value"] + True + >>> # preflow_push also stores the maximum flow value + >>> # in the excess attribute of the sink node t + >>> flow_value == R.nodes["y"]["excess"] + True + >>> # For some problems, you might only want to compute a + >>> # maximum preflow. + >>> R = preflow_push(G, "x", "y", value_only=True) + >>> flow_value == R.graph["flow_value"] + True + >>> flow_value == R.nodes["y"]["excess"] + True + + """ + R = preflow_push_impl(G, s, t, capacity, residual, global_relabel_freq, value_only) + R.graph["algorithm"] = "preflow_push" + nx._clear_cache(R) + return R diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/graph_hashing.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/graph_hashing.py new file mode 100644 index 0000000000000000000000000000000000000000..7ded847f0573f5995a640a042dad7601966ccd8a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/graph_hashing.py @@ -0,0 +1,328 @@ +""" +Functions for hashing graphs to strings. +Isomorphic graphs should be assigned identical hashes. +For now, only Weisfeiler-Lehman hashing is implemented. +""" + +from collections import Counter, defaultdict +from hashlib import blake2b + +import networkx as nx + +__all__ = ["weisfeiler_lehman_graph_hash", "weisfeiler_lehman_subgraph_hashes"] + + +def _hash_label(label, digest_size): + return blake2b(label.encode("ascii"), digest_size=digest_size).hexdigest() + + +def _init_node_labels(G, edge_attr, node_attr): + if node_attr: + return {u: str(dd[node_attr]) for u, dd in G.nodes(data=True)} + elif edge_attr: + return {u: "" for u in G} + else: + return {u: str(deg) for u, deg in G.degree()} + + +def _neighborhood_aggregate(G, node, node_labels, edge_attr=None): + """ + Compute new labels for given node by aggregating + the labels of each node's neighbors. + """ + label_list = [] + for nbr in G.neighbors(node): + prefix = "" if edge_attr is None else str(G[node][nbr][edge_attr]) + label_list.append(prefix + node_labels[nbr]) + return node_labels[node] + "".join(sorted(label_list)) + + +@nx.utils.not_implemented_for("multigraph") +@nx._dispatchable(edge_attrs={"edge_attr": None}, node_attrs="node_attr") +def weisfeiler_lehman_graph_hash( + G, edge_attr=None, node_attr=None, iterations=3, digest_size=16 +): + """Return Weisfeiler Lehman (WL) graph hash. + + The function iteratively aggregates and hashes neighborhoods of each node. + After each node's neighbors are hashed to obtain updated node labels, + a hashed histogram of resulting labels is returned as the final hash. + + Hashes are identical for isomorphic graphs and strong guarantees that + non-isomorphic graphs will get different hashes. See [1]_ for details. + + If no node or edge attributes are provided, the degree of each node + is used as its initial label. + Otherwise, node and/or edge labels are used to compute the hash. + + Parameters + ---------- + G : graph + The graph to be hashed. + Can have node and/or edge attributes. Can also have no attributes. + edge_attr : string, optional (default=None) + The key in edge attribute dictionary to be used for hashing. + If None, edge labels are ignored. + node_attr: string, optional (default=None) + The key in node attribute dictionary to be used for hashing. + If None, and no edge_attr given, use the degrees of the nodes as labels. + iterations: int, optional (default=3) + Number of neighbor aggregations to perform. + Should be larger for larger graphs. + digest_size: int, optional (default=16) + Size (in bits) of blake2b hash digest to use for hashing node labels. + + Returns + ------- + h : string + Hexadecimal string corresponding to hash of the input graph. + + Examples + -------- + Two graphs with edge attributes that are isomorphic, except for + differences in the edge labels. + + >>> G1 = nx.Graph() + >>> G1.add_edges_from( + ... [ + ... (1, 2, {"label": "A"}), + ... (2, 3, {"label": "A"}), + ... (3, 1, {"label": "A"}), + ... (1, 4, {"label": "B"}), + ... ] + ... ) + >>> G2 = nx.Graph() + >>> G2.add_edges_from( + ... [ + ... (5, 6, {"label": "B"}), + ... (6, 7, {"label": "A"}), + ... (7, 5, {"label": "A"}), + ... (7, 8, {"label": "A"}), + ... ] + ... ) + + Omitting the `edge_attr` option, results in identical hashes. + + >>> nx.weisfeiler_lehman_graph_hash(G1) + '7bc4dde9a09d0b94c5097b219891d81a' + >>> nx.weisfeiler_lehman_graph_hash(G2) + '7bc4dde9a09d0b94c5097b219891d81a' + + With edge labels, the graphs are no longer assigned + the same hash digest. + + >>> nx.weisfeiler_lehman_graph_hash(G1, edge_attr="label") + 'c653d85538bcf041d88c011f4f905f10' + >>> nx.weisfeiler_lehman_graph_hash(G2, edge_attr="label") + '3dcd84af1ca855d0eff3c978d88e7ec7' + + Notes + ----- + To return the WL hashes of each subgraph of a graph, use + `weisfeiler_lehman_subgraph_hashes` + + Similarity between hashes does not imply similarity between graphs. + + References + ---------- + .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen, + Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman + Graph Kernels. Journal of Machine Learning Research. 2011. + http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf + + See also + -------- + weisfeiler_lehman_subgraph_hashes + """ + + def weisfeiler_lehman_step(G, labels, edge_attr=None): + """ + Apply neighborhood aggregation to each node + in the graph. + Computes a dictionary with labels for each node. + """ + new_labels = {} + for node in G.nodes(): + label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr) + new_labels[node] = _hash_label(label, digest_size) + return new_labels + + # set initial node labels + node_labels = _init_node_labels(G, edge_attr, node_attr) + + subgraph_hash_counts = [] + for _ in range(iterations): + node_labels = weisfeiler_lehman_step(G, node_labels, edge_attr=edge_attr) + counter = Counter(node_labels.values()) + # sort the counter, extend total counts + subgraph_hash_counts.extend(sorted(counter.items(), key=lambda x: x[0])) + + # hash the final counter + return _hash_label(str(tuple(subgraph_hash_counts)), digest_size) + + +@nx.utils.not_implemented_for("multigraph") +@nx._dispatchable(edge_attrs={"edge_attr": None}, node_attrs="node_attr") +def weisfeiler_lehman_subgraph_hashes( + G, + edge_attr=None, + node_attr=None, + iterations=3, + digest_size=16, + include_initial_labels=False, +): + """ + Return a dictionary of subgraph hashes by node. + + Dictionary keys are nodes in `G`, and values are a list of hashes. + Each hash corresponds to a subgraph rooted at a given node u in `G`. + Lists of subgraph hashes are sorted in increasing order of depth from + their root node, with the hash at index i corresponding to a subgraph + of nodes at most i edges distance from u. Thus, each list will contain + `iterations` elements - a hash for a subgraph at each depth. If + `include_initial_labels` is set to `True`, each list will additionally + have contain a hash of the initial node label (or equivalently a + subgraph of depth 0) prepended, totalling ``iterations + 1`` elements. + + The function iteratively aggregates and hashes neighborhoods of each node. + This is achieved for each step by replacing for each node its label from + the previous iteration with its hashed 1-hop neighborhood aggregate. + The new node label is then appended to a list of node labels for each + node. + + To aggregate neighborhoods for a node $u$ at each step, all labels of + nodes adjacent to $u$ are concatenated. If the `edge_attr` parameter is set, + labels for each neighboring node are prefixed with the value of this attribute + along the connecting edge from this neighbor to node $u$. The resulting string + is then hashed to compress this information into a fixed digest size. + + Thus, at the $i$-th iteration, nodes within $i$ hops influence any given + hashed node label. We can therefore say that at depth $i$ for node $u$ + we have a hash for a subgraph induced by the $i$-hop neighborhood of $u$. + + The output can be used to create general Weisfeiler-Lehman graph kernels, + or generate features for graphs or nodes - for example to generate 'words' in + a graph as seen in the 'graph2vec' algorithm. + See [1]_ & [2]_ respectively for details. + + Hashes are identical for isomorphic subgraphs and there exist strong + guarantees that non-isomorphic graphs will get different hashes. + See [1]_ for details. + + If no node or edge attributes are provided, the degree of each node + is used as its initial label. + Otherwise, node and/or edge labels are used to compute the hash. + + Parameters + ---------- + G : graph + The graph to be hashed. + Can have node and/or edge attributes. Can also have no attributes. + edge_attr : string, optional (default=None) + The key in edge attribute dictionary to be used for hashing. + If None, edge labels are ignored. + node_attr : string, optional (default=None) + The key in node attribute dictionary to be used for hashing. + If None, and no edge_attr given, use the degrees of the nodes as labels. + If None, and edge_attr is given, each node starts with an identical label. + iterations : int, optional (default=3) + Number of neighbor aggregations to perform. + Should be larger for larger graphs. + digest_size : int, optional (default=16) + Size (in bits) of blake2b hash digest to use for hashing node labels. + The default size is 16 bits. + include_initial_labels : bool, optional (default=False) + If True, include the hashed initial node label as the first subgraph + hash for each node. + + Returns + ------- + node_subgraph_hashes : dict + A dictionary with each key given by a node in G, and each value given + by the subgraph hashes in order of depth from the key node. + + Examples + -------- + Finding similar nodes in different graphs: + + >>> G1 = nx.Graph() + >>> G1.add_edges_from([(1, 2), (2, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 7)]) + >>> G2 = nx.Graph() + >>> G2.add_edges_from([(1, 3), (2, 3), (1, 6), (1, 5), (4, 6)]) + >>> g1_hashes = nx.weisfeiler_lehman_subgraph_hashes( + ... G1, iterations=3, digest_size=8 + ... ) + >>> g2_hashes = nx.weisfeiler_lehman_subgraph_hashes( + ... G2, iterations=3, digest_size=8 + ... ) + + Even though G1 and G2 are not isomorphic (they have different numbers of edges), + the hash sequence of depth 3 for node 1 in G1 and node 5 in G2 are similar: + + >>> g1_hashes[1] + ['a93b64973cfc8897', 'db1b43ae35a1878f', '57872a7d2059c1c0'] + >>> g2_hashes[5] + ['a93b64973cfc8897', 'db1b43ae35a1878f', '1716d2a4012fa4bc'] + + The first 2 WL subgraph hashes match. From this we can conclude that it's very + likely the neighborhood of 2 hops around these nodes are isomorphic. + + However the 3-hop neighborhoods of ``G1`` and ``G2`` are not isomorphic since the + 3rd hashes in the lists above are not equal. + + These nodes may be candidates to be classified together since their local topology + is similar. + + Notes + ----- + To hash the full graph when subgraph hashes are not needed, use + `weisfeiler_lehman_graph_hash` for efficiency. + + Similarity between hashes does not imply similarity between graphs. + + References + ---------- + .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen, + Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman + Graph Kernels. Journal of Machine Learning Research. 2011. + http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf + .. [2] Annamalai Narayanan, Mahinthan Chandramohan, Rajasekar Venkatesan, + Lihui Chen, Yang Liu and Shantanu Jaiswa. graph2vec: Learning + Distributed Representations of Graphs. arXiv. 2017 + https://arxiv.org/pdf/1707.05005.pdf + + See also + -------- + weisfeiler_lehman_graph_hash + """ + + def weisfeiler_lehman_step(G, labels, node_subgraph_hashes, edge_attr=None): + """ + Apply neighborhood aggregation to each node + in the graph. + Computes a dictionary with labels for each node. + Appends the new hashed label to the dictionary of subgraph hashes + originating from and indexed by each node in G + """ + new_labels = {} + for node in G.nodes(): + label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr) + hashed_label = _hash_label(label, digest_size) + new_labels[node] = hashed_label + node_subgraph_hashes[node].append(hashed_label) + return new_labels + + node_labels = _init_node_labels(G, edge_attr, node_attr) + if include_initial_labels: + node_subgraph_hashes = { + k: [_hash_label(v, digest_size)] for k, v in node_labels.items() + } + else: + node_subgraph_hashes = defaultdict(list) + + for _ in range(iterations): + node_labels = weisfeiler_lehman_step( + G, node_labels, node_subgraph_hashes, edge_attr + ) + + return dict(node_subgraph_hashes) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/graphical.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/graphical.py new file mode 100644 index 0000000000000000000000000000000000000000..d5d82dedda6f9810e3f51bc4c82a9a2b252fa998 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/graphical.py @@ -0,0 +1,483 @@ +"""Test sequences for graphiness.""" + +import heapq + +import networkx as nx + +__all__ = [ + "is_graphical", + "is_multigraphical", + "is_pseudographical", + "is_digraphical", + "is_valid_degree_sequence_erdos_gallai", + "is_valid_degree_sequence_havel_hakimi", +] + + +@nx._dispatchable(graphs=None) +def is_graphical(sequence, method="eg"): + """Returns True if sequence is a valid degree sequence. + + A degree sequence is valid if some graph can realize it. + + Parameters + ---------- + sequence : list or iterable container + A sequence of integer node degrees + + method : "eg" | "hh" (default: 'eg') + The method used to validate the degree sequence. + "eg" corresponds to the Erdős-Gallai algorithm + [EG1960]_, [choudum1986]_, and + "hh" to the Havel-Hakimi algorithm + [havel1955]_, [hakimi1962]_, [CL1996]_. + + Returns + ------- + valid : bool + True if the sequence is a valid degree sequence and False if not. + + Examples + -------- + >>> G = nx.path_graph(4) + >>> sequence = (d for n, d in G.degree()) + >>> nx.is_graphical(sequence) + True + + To test a non-graphical sequence: + >>> sequence_list = [d for n, d in G.degree()] + >>> sequence_list[-1] += 1 + >>> nx.is_graphical(sequence_list) + False + + References + ---------- + .. [EG1960] Erdős and Gallai, Mat. Lapok 11 264, 1960. + .. [choudum1986] S.A. Choudum. "A simple proof of the Erdős-Gallai theorem on + graph sequences." Bulletin of the Australian Mathematical Society, 33, + pp 67-70, 1986. https://doi.org/10.1017/S0004972700002872 + .. [havel1955] Havel, V. "A Remark on the Existence of Finite Graphs" + Casopis Pest. Mat. 80, 477-480, 1955. + .. [hakimi1962] Hakimi, S. "On the Realizability of a Set of Integers as + Degrees of the Vertices of a Graph." SIAM J. Appl. Math. 10, 496-506, 1962. + .. [CL1996] G. Chartrand and L. Lesniak, "Graphs and Digraphs", + Chapman and Hall/CRC, 1996. + """ + if method == "eg": + valid = is_valid_degree_sequence_erdos_gallai(list(sequence)) + elif method == "hh": + valid = is_valid_degree_sequence_havel_hakimi(list(sequence)) + else: + msg = "`method` must be 'eg' or 'hh'" + raise nx.NetworkXException(msg) + return valid + + +def _basic_graphical_tests(deg_sequence): + # Sort and perform some simple tests on the sequence + deg_sequence = nx.utils.make_list_of_ints(deg_sequence) + p = len(deg_sequence) + num_degs = [0] * p + dmax, dmin, dsum, n = 0, p, 0, 0 + for d in deg_sequence: + # Reject if degree is negative or larger than the sequence length + if d < 0 or d >= p: + raise nx.NetworkXUnfeasible + # Process only the non-zero integers + elif d > 0: + dmax, dmin, dsum, n = max(dmax, d), min(dmin, d), dsum + d, n + 1 + num_degs[d] += 1 + # Reject sequence if it has odd sum or is oversaturated + if dsum % 2 or dsum > n * (n - 1): + raise nx.NetworkXUnfeasible + return dmax, dmin, dsum, n, num_degs + + +@nx._dispatchable(graphs=None) +def is_valid_degree_sequence_havel_hakimi(deg_sequence): + r"""Returns True if deg_sequence can be realized by a simple graph. + + The validation proceeds using the Havel-Hakimi theorem + [havel1955]_, [hakimi1962]_, [CL1996]_. + Worst-case run time is $O(s)$ where $s$ is the sum of the sequence. + + Parameters + ---------- + deg_sequence : list + A list of integers where each element specifies the degree of a node + in a graph. + + Returns + ------- + valid : bool + True if deg_sequence is graphical and False if not. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (2, 3), (3, 4), (4, 2), (5, 1), (5, 4)]) + >>> sequence = (d for _, d in G.degree()) + >>> nx.is_valid_degree_sequence_havel_hakimi(sequence) + True + + To test a non-valid sequence: + >>> sequence_list = [d for _, d in G.degree()] + >>> sequence_list[-1] += 1 + >>> nx.is_valid_degree_sequence_havel_hakimi(sequence_list) + False + + Notes + ----- + The ZZ condition says that for the sequence d if + + .. math:: + |d| >= \frac{(\max(d) + \min(d) + 1)^2}{4*\min(d)} + + then d is graphical. This was shown in Theorem 6 in [1]_. + + References + ---------- + .. [1] I.E. Zverovich and V.E. Zverovich. "Contributions to the theory + of graphic sequences", Discrete Mathematics, 105, pp. 292-303 (1992). + .. [havel1955] Havel, V. "A Remark on the Existence of Finite Graphs" + Casopis Pest. Mat. 80, 477-480, 1955. + .. [hakimi1962] Hakimi, S. "On the Realizability of a Set of Integers as + Degrees of the Vertices of a Graph." SIAM J. Appl. Math. 10, 496-506, 1962. + .. [CL1996] G. Chartrand and L. Lesniak, "Graphs and Digraphs", + Chapman and Hall/CRC, 1996. + """ + try: + dmax, dmin, dsum, n, num_degs = _basic_graphical_tests(deg_sequence) + except nx.NetworkXUnfeasible: + return False + # Accept if sequence has no non-zero degrees or passes the ZZ condition + if n == 0 or 4 * dmin * n >= (dmax + dmin + 1) * (dmax + dmin + 1): + return True + + modstubs = [0] * (dmax + 1) + # Successively reduce degree sequence by removing the maximum degree + while n > 0: + # Retrieve the maximum degree in the sequence + while num_degs[dmax] == 0: + dmax -= 1 + # If there are not enough stubs to connect to, then the sequence is + # not graphical + if dmax > n - 1: + return False + + # Remove largest stub in list + num_degs[dmax], n = num_degs[dmax] - 1, n - 1 + # Reduce the next dmax largest stubs + mslen = 0 + k = dmax + for i in range(dmax): + while num_degs[k] == 0: + k -= 1 + num_degs[k], n = num_degs[k] - 1, n - 1 + if k > 1: + modstubs[mslen] = k - 1 + mslen += 1 + # Add back to the list any non-zero stubs that were removed + for i in range(mslen): + stub = modstubs[i] + num_degs[stub], n = num_degs[stub] + 1, n + 1 + return True + + +@nx._dispatchable(graphs=None) +def is_valid_degree_sequence_erdos_gallai(deg_sequence): + r"""Returns True if deg_sequence can be realized by a simple graph. + + The validation is done using the Erdős-Gallai theorem [EG1960]_. + + Parameters + ---------- + deg_sequence : list + A list of integers + + Returns + ------- + valid : bool + True if deg_sequence is graphical and False if not. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (2, 3), (3, 4), (4, 2), (5, 1), (5, 4)]) + >>> sequence = (d for _, d in G.degree()) + >>> nx.is_valid_degree_sequence_erdos_gallai(sequence) + True + + To test a non-valid sequence: + >>> sequence_list = [d for _, d in G.degree()] + >>> sequence_list[-1] += 1 + >>> nx.is_valid_degree_sequence_erdos_gallai(sequence_list) + False + + Notes + ----- + + This implementation uses an equivalent form of the Erdős-Gallai criterion. + Worst-case run time is $O(n)$ where $n$ is the length of the sequence. + + Specifically, a sequence d is graphical if and only if the + sum of the sequence is even and for all strong indices k in the sequence, + + .. math:: + + \sum_{i=1}^{k} d_i \leq k(k-1) + \sum_{j=k+1}^{n} \min(d_i,k) + = k(n-1) - ( k \sum_{j=0}^{k-1} n_j - \sum_{j=0}^{k-1} j n_j ) + + A strong index k is any index where d_k >= k and the value n_j is the + number of occurrences of j in d. The maximal strong index is called the + Durfee index. + + This particular rearrangement comes from the proof of Theorem 3 in [2]_. + + The ZZ condition says that for the sequence d if + + .. math:: + |d| >= \frac{(\max(d) + \min(d) + 1)^2}{4*\min(d)} + + then d is graphical. This was shown in Theorem 6 in [2]_. + + References + ---------- + .. [1] A. Tripathi and S. Vijay. "A note on a theorem of Erdős & Gallai", + Discrete Mathematics, 265, pp. 417-420 (2003). + .. [2] I.E. Zverovich and V.E. Zverovich. "Contributions to the theory + of graphic sequences", Discrete Mathematics, 105, pp. 292-303 (1992). + .. [EG1960] Erdős and Gallai, Mat. Lapok 11 264, 1960. + """ + try: + dmax, dmin, dsum, n, num_degs = _basic_graphical_tests(deg_sequence) + except nx.NetworkXUnfeasible: + return False + # Accept if sequence has no non-zero degrees or passes the ZZ condition + if n == 0 or 4 * dmin * n >= (dmax + dmin + 1) * (dmax + dmin + 1): + return True + + # Perform the EG checks using the reformulation of Zverovich and Zverovich + k, sum_deg, sum_nj, sum_jnj = 0, 0, 0, 0 + for dk in range(dmax, dmin - 1, -1): + if dk < k + 1: # Check if already past Durfee index + return True + if num_degs[dk] > 0: + run_size = num_degs[dk] # Process a run of identical-valued degrees + if dk < k + run_size: # Check if end of run is past Durfee index + run_size = dk - k # Adjust back to Durfee index + sum_deg += run_size * dk + for v in range(run_size): + sum_nj += num_degs[k + v] + sum_jnj += (k + v) * num_degs[k + v] + k += run_size + if sum_deg > k * (n - 1) - k * sum_nj + sum_jnj: + return False + return True + + +@nx._dispatchable(graphs=None) +def is_multigraphical(sequence): + """Returns True if some multigraph can realize the sequence. + + Parameters + ---------- + sequence : list + A list of integers + + Returns + ------- + valid : bool + True if deg_sequence is a multigraphic degree sequence and False if not. + + Examples + -------- + >>> G = nx.MultiGraph([(1, 2), (1, 3), (2, 3), (3, 4), (4, 2), (5, 1), (5, 4)]) + >>> sequence = (d for _, d in G.degree()) + >>> nx.is_multigraphical(sequence) + True + + To test a non-multigraphical sequence: + >>> sequence_list = [d for _, d in G.degree()] + >>> sequence_list[-1] += 1 + >>> nx.is_multigraphical(sequence_list) + False + + Notes + ----- + The worst-case run time is $O(n)$ where $n$ is the length of the sequence. + + References + ---------- + .. [1] S. L. Hakimi. "On the realizability of a set of integers as + degrees of the vertices of a linear graph", J. SIAM, 10, pp. 496-506 + (1962). + """ + try: + deg_sequence = nx.utils.make_list_of_ints(sequence) + except nx.NetworkXError: + return False + dsum, dmax = 0, 0 + for d in deg_sequence: + if d < 0: + return False + dsum, dmax = dsum + d, max(dmax, d) + if dsum % 2 or dsum < 2 * dmax: + return False + return True + + +@nx._dispatchable(graphs=None) +def is_pseudographical(sequence): + """Returns True if some pseudograph can realize the sequence. + + Every nonnegative integer sequence with an even sum is pseudographical + (see [1]_). + + Parameters + ---------- + sequence : list or iterable container + A sequence of integer node degrees + + Returns + ------- + valid : bool + True if the sequence is a pseudographic degree sequence and False if not. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (2, 3), (3, 4), (4, 2), (5, 1), (5, 4)]) + >>> sequence = (d for _, d in G.degree()) + >>> nx.is_pseudographical(sequence) + True + + To test a non-pseudographical sequence: + >>> sequence_list = [d for _, d in G.degree()] + >>> sequence_list[-1] += 1 + >>> nx.is_pseudographical(sequence_list) + False + + Notes + ----- + The worst-case run time is $O(n)$ where n is the length of the sequence. + + References + ---------- + .. [1] F. Boesch and F. Harary. "Line removal algorithms for graphs + and their degree lists", IEEE Trans. Circuits and Systems, CAS-23(12), + pp. 778-782 (1976). + """ + try: + deg_sequence = nx.utils.make_list_of_ints(sequence) + except nx.NetworkXError: + return False + return sum(deg_sequence) % 2 == 0 and min(deg_sequence) >= 0 + + +@nx._dispatchable(graphs=None) +def is_digraphical(in_sequence, out_sequence): + r"""Returns True if some directed graph can realize the in- and out-degree + sequences. + + Parameters + ---------- + in_sequence : list or iterable container + A sequence of integer node in-degrees + + out_sequence : list or iterable container + A sequence of integer node out-degrees + + Returns + ------- + valid : bool + True if in and out-sequences are digraphic False if not. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (1, 3), (2, 3), (3, 4), (4, 2), (5, 1), (5, 4)]) + >>> in_seq = (d for n, d in G.in_degree()) + >>> out_seq = (d for n, d in G.out_degree()) + >>> nx.is_digraphical(in_seq, out_seq) + True + + To test a non-digraphical scenario: + >>> in_seq_list = [d for n, d in G.in_degree()] + >>> in_seq_list[-1] += 1 + >>> nx.is_digraphical(in_seq_list, out_seq) + False + + Notes + ----- + This algorithm is from Kleitman and Wang [1]_. + The worst case runtime is $O(s \times \log n)$ where $s$ and $n$ are the + sum and length of the sequences respectively. + + References + ---------- + .. [1] D.J. Kleitman and D.L. Wang + Algorithms for Constructing Graphs and Digraphs with Given Valences + and Factors, Discrete Mathematics, 6(1), pp. 79-88 (1973) + """ + try: + in_deg_sequence = nx.utils.make_list_of_ints(in_sequence) + out_deg_sequence = nx.utils.make_list_of_ints(out_sequence) + except nx.NetworkXError: + return False + # Process the sequences and form two heaps to store degree pairs with + # either zero or non-zero out degrees + sumin, sumout, nin, nout = 0, 0, len(in_deg_sequence), len(out_deg_sequence) + maxn = max(nin, nout) + maxin = 0 + if maxn == 0: + return True + stubheap, zeroheap = [], [] + for n in range(maxn): + in_deg, out_deg = 0, 0 + if n < nout: + out_deg = out_deg_sequence[n] + if n < nin: + in_deg = in_deg_sequence[n] + if in_deg < 0 or out_deg < 0: + return False + sumin, sumout, maxin = sumin + in_deg, sumout + out_deg, max(maxin, in_deg) + if in_deg > 0: + stubheap.append((-1 * out_deg, -1 * in_deg)) + elif out_deg > 0: + zeroheap.append(-1 * out_deg) + if sumin != sumout: + return False + heapq.heapify(stubheap) + heapq.heapify(zeroheap) + + modstubs = [(0, 0)] * (maxin + 1) + # Successively reduce degree sequence by removing the maximum out degree + while stubheap: + # Take the first value in the sequence with non-zero in degree + (freeout, freein) = heapq.heappop(stubheap) + freein *= -1 + if freein > len(stubheap) + len(zeroheap): + return False + + # Attach out stubs to the nodes with the most in stubs + mslen = 0 + for i in range(freein): + if zeroheap and (not stubheap or stubheap[0][0] > zeroheap[0]): + stubout = heapq.heappop(zeroheap) + stubin = 0 + else: + (stubout, stubin) = heapq.heappop(stubheap) + if stubout == 0: + return False + # Check if target is now totally connected + if stubout + 1 < 0 or stubin < 0: + modstubs[mslen] = (stubout + 1, stubin) + mslen += 1 + + # Add back the nodes to the heap that still have available stubs + for i in range(mslen): + stub = modstubs[i] + if stub[1] < 0: + heapq.heappush(stubheap, stub) + else: + heapq.heappush(zeroheap, stub[0]) + if freeout < 0: + heapq.heappush(zeroheap, freeout) + return True diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/hierarchy.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/hierarchy.py new file mode 100644 index 0000000000000000000000000000000000000000..d5a05525e7ddf1e98b1e07f120df0b0b5b52414b --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/hierarchy.py @@ -0,0 +1,57 @@ +""" +Flow Hierarchy. +""" + +import networkx as nx + +__all__ = ["flow_hierarchy"] + + +@nx._dispatchable(edge_attrs="weight") +def flow_hierarchy(G, weight=None): + """Returns the flow hierarchy of a directed network. + + Flow hierarchy is defined as the fraction of edges not participating + in cycles in a directed graph [1]_. + + Parameters + ---------- + G : DiGraph or MultiDiGraph + A directed graph + + weight : string, optional (default=None) + Attribute to use for edge weights. If None the weight defaults to 1. + + Returns + ------- + h : float + Flow hierarchy value + + Raises + ------ + NetworkXError + If `G` is not a directed graph or if `G` has no edges. + + Notes + ----- + The algorithm described in [1]_ computes the flow hierarchy through + exponentiation of the adjacency matrix. This function implements an + alternative approach that finds strongly connected components. + An edge is in a cycle if and only if it is in a strongly connected + component, which can be found in $O(m)$ time using Tarjan's algorithm. + + References + ---------- + .. [1] Luo, J.; Magee, C.L. (2011), + Detecting evolving patterns of self-organizing networks by flow + hierarchy measurement, Complexity, Volume 16 Issue 6 53-61. + DOI: 10.1002/cplx.20368 + http://web.mit.edu/~cmagee/www/documents/28-DetectingEvolvingPatterns_FlowHierarchy.pdf + """ + # corner case: G has no edges + if nx.is_empty(G): + raise nx.NetworkXError("flow_hierarchy not applicable to empty graphs") + if not G.is_directed(): + raise nx.NetworkXError("G must be a digraph in flow_hierarchy") + scc = nx.strongly_connected_components(G) + return 1 - sum(G.subgraph(c).size(weight) for c in scc) / G.size(weight) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/hybrid.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/hybrid.py new file mode 100644 index 0000000000000000000000000000000000000000..9d3dd3078cd25fb520a20f5866043ad977ef02f5 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/hybrid.py @@ -0,0 +1,196 @@ +""" +Provides functions for finding and testing for locally `(k, l)`-connected +graphs. + +""" + +import copy + +import networkx as nx + +__all__ = ["kl_connected_subgraph", "is_kl_connected"] + + +@nx._dispatchable(returns_graph=True) +def kl_connected_subgraph(G, k, l, low_memory=False, same_as_graph=False): + """Returns the maximum locally `(k, l)`-connected subgraph of `G`. + + A graph is locally `(k, l)`-connected if for each edge `(u, v)` in the + graph there are at least `l` edge-disjoint paths of length at most `k` + joining `u` to `v`. + + Parameters + ---------- + G : NetworkX graph + The graph in which to find a maximum locally `(k, l)`-connected + subgraph. + + k : integer + The maximum length of paths to consider. A higher number means a looser + connectivity requirement. + + l : integer + The number of edge-disjoint paths. A higher number means a stricter + connectivity requirement. + + low_memory : bool + If this is True, this function uses an algorithm that uses slightly + more time but less memory. + + same_as_graph : bool + If True then return a tuple of the form `(H, is_same)`, + where `H` is the maximum locally `(k, l)`-connected subgraph and + `is_same` is a Boolean representing whether `G` is locally `(k, + l)`-connected (and hence, whether `H` is simply a copy of the input + graph `G`). + + Returns + ------- + NetworkX graph or two-tuple + If `same_as_graph` is True, then this function returns a + two-tuple as described above. Otherwise, it returns only the maximum + locally `(k, l)`-connected subgraph. + + See also + -------- + is_kl_connected + + References + ---------- + .. [1] Chung, Fan and Linyuan Lu. "The Small World Phenomenon in Hybrid + Power Law Graphs." *Complex Networks*. Springer Berlin Heidelberg, + 2004. 89--104. + + """ + H = copy.deepcopy(G) # subgraph we construct by removing from G + + graphOK = True + deleted_some = True # hack to start off the while loop + while deleted_some: + deleted_some = False + # We use `for edge in list(H.edges()):` instead of + # `for edge in H.edges():` because we edit the graph `H` in + # the loop. Hence using an iterator will result in + # `RuntimeError: dictionary changed size during iteration` + for edge in list(H.edges()): + (u, v) = edge + # Get copy of graph needed for this search + if low_memory: + verts = {u, v} + for i in range(k): + for w in verts.copy(): + verts.update(G[w]) + G2 = G.subgraph(verts).copy() + else: + G2 = copy.deepcopy(G) + ### + path = [u, v] + cnt = 0 + accept = 0 + while path: + cnt += 1 # Found a path + if cnt >= l: + accept = 1 + break + # record edges along this graph + prev = u + for w in path: + if prev != w: + G2.remove_edge(prev, w) + prev = w + # path = shortest_path(G2, u, v, k) # ??? should "Cutoff" be k+1? + try: + path = nx.shortest_path(G2, u, v) # ??? should "Cutoff" be k+1? + except nx.NetworkXNoPath: + path = False + # No Other Paths + if accept == 0: + H.remove_edge(u, v) + deleted_some = True + if graphOK: + graphOK = False + # We looked through all edges and removed none of them. + # So, H is the maximal (k,l)-connected subgraph of G + if same_as_graph: + return (H, graphOK) + return H + + +@nx._dispatchable +def is_kl_connected(G, k, l, low_memory=False): + """Returns True if and only if `G` is locally `(k, l)`-connected. + + A graph is locally `(k, l)`-connected if for each edge `(u, v)` in the + graph there are at least `l` edge-disjoint paths of length at most `k` + joining `u` to `v`. + + Parameters + ---------- + G : NetworkX graph + The graph to test for local `(k, l)`-connectedness. + + k : integer + The maximum length of paths to consider. A higher number means a looser + connectivity requirement. + + l : integer + The number of edge-disjoint paths. A higher number means a stricter + connectivity requirement. + + low_memory : bool + If this is True, this function uses an algorithm that uses slightly + more time but less memory. + + Returns + ------- + bool + Whether the graph is locally `(k, l)`-connected subgraph. + + See also + -------- + kl_connected_subgraph + + References + ---------- + .. [1] Chung, Fan and Linyuan Lu. "The Small World Phenomenon in Hybrid + Power Law Graphs." *Complex Networks*. Springer Berlin Heidelberg, + 2004. 89--104. + + """ + graphOK = True + for edge in G.edges(): + (u, v) = edge + # Get copy of graph needed for this search + if low_memory: + verts = {u, v} + for i in range(k): + [verts.update(G.neighbors(w)) for w in verts.copy()] + G2 = G.subgraph(verts) + else: + G2 = copy.deepcopy(G) + ### + path = [u, v] + cnt = 0 + accept = 0 + while path: + cnt += 1 # Found a path + if cnt >= l: + accept = 1 + break + # record edges along this graph + prev = u + for w in path: + if w != prev: + G2.remove_edge(prev, w) + prev = w + # path = shortest_path(G2, u, v, k) # ??? should "Cutoff" be k+1? + try: + path = nx.shortest_path(G2, u, v) # ??? should "Cutoff" be k+1? + except nx.NetworkXNoPath: + path = False + # No Other Paths + if accept == 0: + graphOK = False + break + # return status + return graphOK diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/isolate.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/isolate.py new file mode 100644 index 0000000000000000000000000000000000000000..1ea8abe9c8329c9f281059765aa8bfeb9487721f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/isolate.py @@ -0,0 +1,107 @@ +""" +Functions for identifying isolate (degree zero) nodes. +""" + +import networkx as nx + +__all__ = ["is_isolate", "isolates", "number_of_isolates"] + + +@nx._dispatchable +def is_isolate(G, n): + """Determines whether a node is an isolate. + + An *isolate* is a node with no neighbors (that is, with degree + zero). For directed graphs, this means no in-neighbors and no + out-neighbors. + + Parameters + ---------- + G : NetworkX graph + + n : node + A node in `G`. + + Returns + ------- + is_isolate : bool + True if and only if `n` has no neighbors. + + Examples + -------- + >>> G = nx.Graph() + >>> G.add_edge(1, 2) + >>> G.add_node(3) + >>> nx.is_isolate(G, 2) + False + >>> nx.is_isolate(G, 3) + True + """ + return G.degree(n) == 0 + + +@nx._dispatchable +def isolates(G): + """Iterator over isolates in the graph. + + An *isolate* is a node with no neighbors (that is, with degree + zero). For directed graphs, this means no in-neighbors and no + out-neighbors. + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + iterator + An iterator over the isolates of `G`. + + Examples + -------- + To get a list of all isolates of a graph, use the :class:`list` + constructor:: + + >>> G = nx.Graph() + >>> G.add_edge(1, 2) + >>> G.add_node(3) + >>> list(nx.isolates(G)) + [3] + + To remove all isolates in the graph, first create a list of the + isolates, then use :meth:`Graph.remove_nodes_from`:: + + >>> G.remove_nodes_from(list(nx.isolates(G))) + >>> list(G) + [1, 2] + + For digraphs, isolates have zero in-degree and zero out_degre:: + + >>> G = nx.DiGraph([(0, 1), (1, 2)]) + >>> G.add_node(3) + >>> list(nx.isolates(G)) + [3] + + """ + return (n for n, d in G.degree() if d == 0) + + +@nx._dispatchable +def number_of_isolates(G): + """Returns the number of isolates in the graph. + + An *isolate* is a node with no neighbors (that is, with degree + zero). For directed graphs, this means no in-neighbors and no + out-neighbors. + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + int + The number of degree zero nodes in the graph `G`. + + """ + return sum(1 for v in isolates(G)) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/link_prediction.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/link_prediction.py new file mode 100644 index 0000000000000000000000000000000000000000..3615f26deb6d3c2f3c01e55f3fcf8ca3361968b3 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/link_prediction.py @@ -0,0 +1,687 @@ +""" +Link prediction algorithms. +""" + +from math import log + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = [ + "resource_allocation_index", + "jaccard_coefficient", + "adamic_adar_index", + "preferential_attachment", + "cn_soundarajan_hopcroft", + "ra_index_soundarajan_hopcroft", + "within_inter_cluster", + "common_neighbor_centrality", +] + + +def _apply_prediction(G, func, ebunch=None): + """Applies the given function to each edge in the specified iterable + of edges. + + `G` is an instance of :class:`networkx.Graph`. + + `func` is a function on two inputs, each of which is a node in the + graph. The function can return anything, but it should return a + value representing a prediction of the likelihood of a "link" + joining the two nodes. + + `ebunch` is an iterable of pairs of nodes. If not specified, all + non-edges in the graph `G` will be used. + + """ + if ebunch is None: + ebunch = nx.non_edges(G) + else: + for u, v in ebunch: + if u not in G: + raise nx.NodeNotFound(f"Node {u} not in G.") + if v not in G: + raise nx.NodeNotFound(f"Node {v} not in G.") + return ((u, v, func(u, v)) for u, v in ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def resource_allocation_index(G, ebunch=None): + r"""Compute the resource allocation index of all node pairs in ebunch. + + Resource allocation index of `u` and `v` is defined as + + .. math:: + + \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{|\Gamma(w)|} + + where $\Gamma(u)$ denotes the set of neighbors of $u$. + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Resource allocation index will be computed for each pair of + nodes given in the iterable. The pairs must be given as + 2-tuples (u, v) where u and v are nodes in the graph. If ebunch + is None then all nonexistent edges in the graph will be used. + Default value: None. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their resource allocation index. + + Raises + ------ + NetworkXNotImplemented + If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`. + + NodeNotFound + If `ebunch` has a node that is not in `G`. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.resource_allocation_index(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 1) -> 0.75000000 + (2, 3) -> 0.75000000 + + References + ---------- + .. [1] T. Zhou, L. Lu, Y.-C. Zhang. + Predicting missing links via local information. + Eur. Phys. J. B 71 (2009) 623. + https://arxiv.org/pdf/0901.0553.pdf + """ + + def predict(u, v): + return sum(1 / G.degree(w) for w in nx.common_neighbors(G, u, v)) + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def jaccard_coefficient(G, ebunch=None): + r"""Compute the Jaccard coefficient of all node pairs in ebunch. + + Jaccard coefficient of nodes `u` and `v` is defined as + + .. math:: + + \frac{|\Gamma(u) \cap \Gamma(v)|}{|\Gamma(u) \cup \Gamma(v)|} + + where $\Gamma(u)$ denotes the set of neighbors of $u$. + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Jaccard coefficient will be computed for each pair of nodes + given in the iterable. The pairs must be given as 2-tuples + (u, v) where u and v are nodes in the graph. If ebunch is None + then all nonexistent edges in the graph will be used. + Default value: None. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their Jaccard coefficient. + + Raises + ------ + NetworkXNotImplemented + If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`. + + NodeNotFound + If `ebunch` has a node that is not in `G`. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.jaccard_coefficient(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 1) -> 0.60000000 + (2, 3) -> 0.60000000 + + References + ---------- + .. [1] D. Liben-Nowell, J. Kleinberg. + The Link Prediction Problem for Social Networks (2004). + http://www.cs.cornell.edu/home/kleinber/link-pred.pdf + """ + + def predict(u, v): + union_size = len(set(G[u]) | set(G[v])) + if union_size == 0: + return 0 + return len(nx.common_neighbors(G, u, v)) / union_size + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def adamic_adar_index(G, ebunch=None): + r"""Compute the Adamic-Adar index of all node pairs in ebunch. + + Adamic-Adar index of `u` and `v` is defined as + + .. math:: + + \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{\log |\Gamma(w)|} + + where $\Gamma(u)$ denotes the set of neighbors of $u$. + This index leads to zero-division for nodes only connected via self-loops. + It is intended to be used when no self-loops are present. + + Parameters + ---------- + G : graph + NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Adamic-Adar index will be computed for each pair of nodes given + in the iterable. The pairs must be given as 2-tuples (u, v) + where u and v are nodes in the graph. If ebunch is None then all + nonexistent edges in the graph will be used. + Default value: None. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their Adamic-Adar index. + + Raises + ------ + NetworkXNotImplemented + If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`. + + NodeNotFound + If `ebunch` has a node that is not in `G`. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.adamic_adar_index(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 1) -> 2.16404256 + (2, 3) -> 2.16404256 + + References + ---------- + .. [1] D. Liben-Nowell, J. Kleinberg. + The Link Prediction Problem for Social Networks (2004). + http://www.cs.cornell.edu/home/kleinber/link-pred.pdf + """ + + def predict(u, v): + return sum(1 / log(G.degree(w)) for w in nx.common_neighbors(G, u, v)) + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def common_neighbor_centrality(G, ebunch=None, alpha=0.8): + r"""Return the CCPA score for each pair of nodes. + + Compute the Common Neighbor and Centrality based Parameterized Algorithm(CCPA) + score of all node pairs in ebunch. + + CCPA score of `u` and `v` is defined as + + .. math:: + + \alpha \cdot (|\Gamma (u){\cap }^{}\Gamma (v)|)+(1-\alpha )\cdot \frac{N}{{d}_{uv}} + + where $\Gamma(u)$ denotes the set of neighbors of $u$, $\Gamma(v)$ denotes the + set of neighbors of $v$, $\alpha$ is parameter varies between [0,1], $N$ denotes + total number of nodes in the Graph and ${d}_{uv}$ denotes shortest distance + between $u$ and $v$. + + This algorithm is based on two vital properties of nodes, namely the number + of common neighbors and their centrality. Common neighbor refers to the common + nodes between two nodes. Centrality refers to the prestige that a node enjoys + in a network. + + .. seealso:: + + :func:`common_neighbors` + + Parameters + ---------- + G : graph + NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Preferential attachment score will be computed for each pair of + nodes given in the iterable. The pairs must be given as + 2-tuples (u, v) where u and v are nodes in the graph. If ebunch + is None then all nonexistent edges in the graph will be used. + Default value: None. + + alpha : Parameter defined for participation of Common Neighbor + and Centrality Algorithm share. Values for alpha should + normally be between 0 and 1. Default value set to 0.8 + because author found better performance at 0.8 for all the + dataset. + Default value: 0.8 + + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their Common Neighbor and Centrality based + Parameterized Algorithm(CCPA) score. + + Raises + ------ + NetworkXNotImplemented + If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`. + + NetworkXAlgorithmError + If self loops exist in `ebunch` or in `G` (if `ebunch` is `None`). + + NodeNotFound + If `ebunch` has a node that is not in `G`. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.common_neighbor_centrality(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p}") + (0, 1) -> 3.4000000000000004 + (2, 3) -> 3.4000000000000004 + + References + ---------- + .. [1] Ahmad, I., Akhtar, M.U., Noor, S. et al. + Missing Link Prediction using Common Neighbor and Centrality based Parameterized Algorithm. + Sci Rep 10, 364 (2020). + https://doi.org/10.1038/s41598-019-57304-y + """ + + # When alpha == 1, the CCPA score simplifies to the number of common neighbors. + if alpha == 1: + + def predict(u, v): + if u == v: + raise nx.NetworkXAlgorithmError("Self loops are not supported") + + return len(nx.common_neighbors(G, u, v)) + + else: + spl = dict(nx.shortest_path_length(G)) + inf = float("inf") + + def predict(u, v): + if u == v: + raise nx.NetworkXAlgorithmError("Self loops are not supported") + path_len = spl[u].get(v, inf) + + n_nbrs = len(nx.common_neighbors(G, u, v)) + return alpha * n_nbrs + (1 - alpha) * len(G) / path_len + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def preferential_attachment(G, ebunch=None): + r"""Compute the preferential attachment score of all node pairs in ebunch. + + Preferential attachment score of `u` and `v` is defined as + + .. math:: + + |\Gamma(u)| |\Gamma(v)| + + where $\Gamma(u)$ denotes the set of neighbors of $u$. + + Parameters + ---------- + G : graph + NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Preferential attachment score will be computed for each pair of + nodes given in the iterable. The pairs must be given as + 2-tuples (u, v) where u and v are nodes in the graph. If ebunch + is None then all nonexistent edges in the graph will be used. + Default value: None. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their preferential attachment score. + + Raises + ------ + NetworkXNotImplemented + If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`. + + NodeNotFound + If `ebunch` has a node that is not in `G`. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.preferential_attachment(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p}") + (0, 1) -> 16 + (2, 3) -> 16 + + References + ---------- + .. [1] D. Liben-Nowell, J. Kleinberg. + The Link Prediction Problem for Social Networks (2004). + http://www.cs.cornell.edu/home/kleinber/link-pred.pdf + """ + + def predict(u, v): + return G.degree(u) * G.degree(v) + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable(node_attrs="community") +def cn_soundarajan_hopcroft(G, ebunch=None, community="community"): + r"""Count the number of common neighbors of all node pairs in ebunch + using community information. + + For two nodes $u$ and $v$, this function computes the number of + common neighbors and bonus one for each common neighbor belonging to + the same community as $u$ and $v$. Mathematically, + + .. math:: + + |\Gamma(u) \cap \Gamma(v)| + \sum_{w \in \Gamma(u) \cap \Gamma(v)} f(w) + + where $f(w)$ equals 1 if $w$ belongs to the same community as $u$ + and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of + neighbors of $u$. + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + The score will be computed for each pair of nodes given in the + iterable. The pairs must be given as 2-tuples (u, v) where u + and v are nodes in the graph. If ebunch is None then all + nonexistent edges in the graph will be used. + Default value: None. + + community : string, optional (default = 'community') + Nodes attribute name containing the community information. + G[u][community] identifies which community u belongs to. Each + node belongs to at most one community. Default value: 'community'. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their score. + + Raises + ------ + NetworkXNotImplemented + If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`. + + NetworkXAlgorithmError + If no community information is available for a node in `ebunch` or in `G` (if `ebunch` is `None`). + + NodeNotFound + If `ebunch` has a node that is not in `G`. + + Examples + -------- + >>> G = nx.path_graph(3) + >>> G.nodes[0]["community"] = 0 + >>> G.nodes[1]["community"] = 0 + >>> G.nodes[2]["community"] = 0 + >>> preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p}") + (0, 2) -> 2 + + References + ---------- + .. [1] Sucheta Soundarajan and John Hopcroft. + Using community information to improve the precision of link + prediction methods. + In Proceedings of the 21st international conference companion on + World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608. + http://doi.acm.org/10.1145/2187980.2188150 + """ + + def predict(u, v): + Cu = _community(G, u, community) + Cv = _community(G, v, community) + cnbors = nx.common_neighbors(G, u, v) + neighbors = ( + sum(_community(G, w, community) == Cu for w in cnbors) if Cu == Cv else 0 + ) + return len(cnbors) + neighbors + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable(node_attrs="community") +def ra_index_soundarajan_hopcroft(G, ebunch=None, community="community"): + r"""Compute the resource allocation index of all node pairs in + ebunch using community information. + + For two nodes $u$ and $v$, this function computes the resource + allocation index considering only common neighbors belonging to the + same community as $u$ and $v$. Mathematically, + + .. math:: + + \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{f(w)}{|\Gamma(w)|} + + where $f(w)$ equals 1 if $w$ belongs to the same community as $u$ + and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of + neighbors of $u$. + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + The score will be computed for each pair of nodes given in the + iterable. The pairs must be given as 2-tuples (u, v) where u + and v are nodes in the graph. If ebunch is None then all + nonexistent edges in the graph will be used. + Default value: None. + + community : string, optional (default = 'community') + Nodes attribute name containing the community information. + G[u][community] identifies which community u belongs to. Each + node belongs to at most one community. Default value: 'community'. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their score. + + Raises + ------ + NetworkXNotImplemented + If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`. + + NetworkXAlgorithmError + If no community information is available for a node in `ebunch` or in `G` (if `ebunch` is `None`). + + NodeNotFound + If `ebunch` has a node that is not in `G`. + + Examples + -------- + >>> G = nx.Graph() + >>> G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)]) + >>> G.nodes[0]["community"] = 0 + >>> G.nodes[1]["community"] = 0 + >>> G.nodes[2]["community"] = 1 + >>> G.nodes[3]["community"] = 0 + >>> preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 3) -> 0.50000000 + + References + ---------- + .. [1] Sucheta Soundarajan and John Hopcroft. + Using community information to improve the precision of link + prediction methods. + In Proceedings of the 21st international conference companion on + World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608. + http://doi.acm.org/10.1145/2187980.2188150 + """ + + def predict(u, v): + Cu = _community(G, u, community) + Cv = _community(G, v, community) + if Cu != Cv: + return 0 + cnbors = nx.common_neighbors(G, u, v) + return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu) + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable(node_attrs="community") +def within_inter_cluster(G, ebunch=None, delta=0.001, community="community"): + """Compute the ratio of within- and inter-cluster common neighbors + of all node pairs in ebunch. + + For two nodes `u` and `v`, if a common neighbor `w` belongs to the + same community as them, `w` is considered as within-cluster common + neighbor of `u` and `v`. Otherwise, it is considered as + inter-cluster common neighbor of `u` and `v`. The ratio between the + size of the set of within- and inter-cluster common neighbors is + defined as the WIC measure. [1]_ + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + The WIC measure will be computed for each pair of nodes given in + the iterable. The pairs must be given as 2-tuples (u, v) where + u and v are nodes in the graph. If ebunch is None then all + nonexistent edges in the graph will be used. + Default value: None. + + delta : float, optional (default = 0.001) + Value to prevent division by zero in case there is no + inter-cluster common neighbor between two nodes. See [1]_ for + details. Default value: 0.001. + + community : string, optional (default = 'community') + Nodes attribute name containing the community information. + G[u][community] identifies which community u belongs to. Each + node belongs to at most one community. Default value: 'community'. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their WIC measure. + + Raises + ------ + NetworkXNotImplemented + If `G` is a `DiGraph`, a `Multigraph` or a `MultiDiGraph`. + + NetworkXAlgorithmError + - If `delta` is less than or equal to zero. + - If no community information is available for a node in `ebunch` or in `G` (if `ebunch` is `None`). + + NodeNotFound + If `ebunch` has a node that is not in `G`. + + Examples + -------- + >>> G = nx.Graph() + >>> G.add_edges_from([(0, 1), (0, 2), (0, 3), (1, 4), (2, 4), (3, 4)]) + >>> G.nodes[0]["community"] = 0 + >>> G.nodes[1]["community"] = 1 + >>> G.nodes[2]["community"] = 0 + >>> G.nodes[3]["community"] = 0 + >>> G.nodes[4]["community"] = 0 + >>> preds = nx.within_inter_cluster(G, [(0, 4)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 4) -> 1.99800200 + >>> preds = nx.within_inter_cluster(G, [(0, 4)], delta=0.5) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 4) -> 1.33333333 + + References + ---------- + .. [1] Jorge Carlos Valverde-Rebaza and Alneu de Andrade Lopes. + Link prediction in complex networks based on cluster information. + In Proceedings of the 21st Brazilian conference on Advances in + Artificial Intelligence (SBIA'12) + https://doi.org/10.1007/978-3-642-34459-6_10 + """ + if delta <= 0: + raise nx.NetworkXAlgorithmError("Delta must be greater than zero") + + def predict(u, v): + Cu = _community(G, u, community) + Cv = _community(G, v, community) + if Cu != Cv: + return 0 + cnbors = nx.common_neighbors(G, u, v) + within = {w for w in cnbors if _community(G, w, community) == Cu} + inter = cnbors - within + return len(within) / (len(inter) + delta) + + return _apply_prediction(G, predict, ebunch) + + +def _community(G, u, community): + """Get the community of the given node.""" + node_u = G.nodes[u] + try: + return node_u[community] + except KeyError as err: + raise nx.NetworkXAlgorithmError( + f"No community information available for Node {u}" + ) from err diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/lowest_common_ancestors.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/lowest_common_ancestors.py new file mode 100644 index 0000000000000000000000000000000000000000..d580018bd5c3916fe968fb476de76f444b351e29 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/lowest_common_ancestors.py @@ -0,0 +1,269 @@ +"""Algorithms for finding the lowest common ancestor of trees and DAGs.""" + +from collections import defaultdict +from collections.abc import Mapping, Set +from itertools import combinations_with_replacement + +import networkx as nx +from networkx.utils import UnionFind, arbitrary_element, not_implemented_for + +__all__ = [ + "all_pairs_lowest_common_ancestor", + "tree_all_pairs_lowest_common_ancestor", + "lowest_common_ancestor", +] + + +@not_implemented_for("undirected") +@nx._dispatchable +def all_pairs_lowest_common_ancestor(G, pairs=None): + """Return the lowest common ancestor of all pairs or the provided pairs + + Parameters + ---------- + G : NetworkX directed graph + + pairs : iterable of pairs of nodes, optional (default: all pairs) + The pairs of nodes of interest. + If None, will find the LCA of all pairs of nodes. + + Yields + ------ + ((node1, node2), lca) : 2-tuple + Where lca is least common ancestor of node1 and node2. + Note that for the default case, the order of the node pair is not considered, + e.g. you will not get both ``(a, b)`` and ``(b, a)`` + + Raises + ------ + NetworkXPointlessConcept + If `G` is null. + NetworkXError + If `G` is not a DAG. + + Examples + -------- + The default behavior is to yield the lowest common ancestor for all + possible combinations of nodes in `G`, including self-pairings: + + >>> G = nx.DiGraph([(0, 1), (0, 3), (1, 2)]) + >>> dict(nx.all_pairs_lowest_common_ancestor(G)) + {(0, 0): 0, (0, 1): 0, (0, 3): 0, (0, 2): 0, (1, 1): 1, (1, 3): 0, (1, 2): 1, (3, 3): 3, (3, 2): 0, (2, 2): 2} + + The pairs argument can be used to limit the output to only the + specified node pairings: + + >>> dict(nx.all_pairs_lowest_common_ancestor(G, pairs=[(1, 2), (2, 3)])) + {(1, 2): 1, (2, 3): 0} + + Notes + ----- + Only defined on non-null directed acyclic graphs. + + See Also + -------- + lowest_common_ancestor + """ + if not nx.is_directed_acyclic_graph(G): + raise nx.NetworkXError("LCA only defined on directed acyclic graphs.") + if len(G) == 0: + raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") + + if pairs is None: + pairs = combinations_with_replacement(G, 2) + else: + # Convert iterator to iterable, if necessary. Trim duplicates. + pairs = dict.fromkeys(pairs) + # Verify that each of the nodes in the provided pairs is in G + nodeset = set(G) + for pair in pairs: + if set(pair) - nodeset: + raise nx.NodeNotFound( + f"Node(s) {set(pair) - nodeset} from pair {pair} not in G." + ) + + # Once input validation is done, construct the generator + def generate_lca_from_pairs(G, pairs): + ancestor_cache = {} + + for v, w in pairs: + if v not in ancestor_cache: + ancestor_cache[v] = nx.ancestors(G, v) + ancestor_cache[v].add(v) + if w not in ancestor_cache: + ancestor_cache[w] = nx.ancestors(G, w) + ancestor_cache[w].add(w) + + common_ancestors = ancestor_cache[v] & ancestor_cache[w] + + if common_ancestors: + common_ancestor = next(iter(common_ancestors)) + while True: + successor = None + for lower_ancestor in G.successors(common_ancestor): + if lower_ancestor in common_ancestors: + successor = lower_ancestor + break + if successor is None: + break + common_ancestor = successor + yield ((v, w), common_ancestor) + + return generate_lca_from_pairs(G, pairs) + + +@not_implemented_for("undirected") +@nx._dispatchable +def lowest_common_ancestor(G, node1, node2, default=None): + """Compute the lowest common ancestor of the given pair of nodes. + + Parameters + ---------- + G : NetworkX directed graph + + node1, node2 : nodes in the graph. + + default : object + Returned if no common ancestor between `node1` and `node2` + + Returns + ------- + The lowest common ancestor of node1 and node2, + or default if they have no common ancestors. + + Examples + -------- + >>> G = nx.DiGraph() + >>> nx.add_path(G, (0, 1, 2, 3)) + >>> nx.add_path(G, (0, 4, 3)) + >>> nx.lowest_common_ancestor(G, 2, 4) + 0 + + See Also + -------- + all_pairs_lowest_common_ancestor""" + + ans = list(all_pairs_lowest_common_ancestor(G, pairs=[(node1, node2)])) + if ans: + assert len(ans) == 1 + return ans[0][1] + return default + + +@not_implemented_for("undirected") +@nx._dispatchable +def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None): + r"""Yield the lowest common ancestor for sets of pairs in a tree. + + Parameters + ---------- + G : NetworkX directed graph (must be a tree) + + root : node, optional (default: None) + The root of the subtree to operate on. + If None, assume the entire graph has exactly one source and use that. + + pairs : iterable or iterator of pairs of nodes, optional (default: None) + The pairs of interest. If None, Defaults to all pairs of nodes + under `root` that have a lowest common ancestor. + + Returns + ------- + lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes + in `pairs` and `lca` is their lowest common ancestor. + + Examples + -------- + >>> import pprint + >>> G = nx.DiGraph([(1, 3), (2, 4), (1, 2)]) + >>> pprint.pprint(dict(nx.tree_all_pairs_lowest_common_ancestor(G))) + {(1, 1): 1, + (2, 1): 1, + (2, 2): 2, + (3, 1): 1, + (3, 2): 1, + (3, 3): 3, + (3, 4): 1, + (4, 1): 1, + (4, 2): 2, + (4, 4): 4} + + We can also use `pairs` argument to specify the pairs of nodes for which we + want to compute lowest common ancestors. Here is an example: + + >>> dict(nx.tree_all_pairs_lowest_common_ancestor(G, pairs=[(1, 4), (2, 3)])) + {(2, 3): 1, (1, 4): 1} + + Notes + ----- + Only defined on non-null trees represented with directed edges from + parents to children. Uses Tarjan's off-line lowest-common-ancestors + algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest + value of the inverse Ackermann function likely to ever come up in actual + use, and $P$ is the number of pairs requested (or $V^2$ if all are needed). + + Tarjan, R. E. (1979), "Applications of path compression on balanced trees", + Journal of the ACM 26 (4): 690-715, doi:10.1145/322154.322161. + + See Also + -------- + all_pairs_lowest_common_ancestor: similar routine for general DAGs + lowest_common_ancestor: just a single pair for general DAGs + """ + if len(G) == 0: + raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") + + # Index pairs of interest for efficient lookup from either side. + if pairs is not None: + pair_dict = defaultdict(set) + # See note on all_pairs_lowest_common_ancestor. + if not isinstance(pairs, Mapping | Set): + pairs = set(pairs) + for u, v in pairs: + for n in (u, v): + if n not in G: + msg = f"The node {str(n)} is not in the digraph." + raise nx.NodeNotFound(msg) + pair_dict[u].add(v) + pair_dict[v].add(u) + + # If root is not specified, find the exactly one node with in degree 0 and + # use it. Raise an error if none are found, or more than one is. Also check + # for any nodes with in degree larger than 1, which would imply G is not a + # tree. + if root is None: + for n, deg in G.in_degree: + if deg == 0: + if root is not None: + msg = "No root specified and tree has multiple sources." + raise nx.NetworkXError(msg) + root = n + # checking deg>1 is not sufficient for MultiDiGraphs + elif deg > 1 and len(G.pred[n]) > 1: + msg = "Tree LCA only defined on trees; use DAG routine." + raise nx.NetworkXError(msg) + if root is None: + raise nx.NetworkXError("Graph contains a cycle.") + + # Iterative implementation of Tarjan's offline lca algorithm + # as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition) + uf = UnionFind() + ancestors = {} + for node in G: + ancestors[node] = uf[node] + + colors = defaultdict(bool) + for node in nx.dfs_postorder_nodes(G, root): + colors[node] = True + for v in pair_dict[node] if pairs is not None else G: + if colors[v]: + # If the user requested both directions of a pair, give it. + # Otherwise, just give one. + if pairs is not None and (node, v) in pairs: + yield (node, v), ancestors[uf[v]] + if pairs is None or (v, node) in pairs: + yield (v, node), ancestors[uf[v]] + if node != root: + parent = arbitrary_element(G.pred[node]) + uf.union(parent, node) + ancestors[uf[parent]] = parent diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/matching.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..6cfb3c93f6aaa379acb01e5ea3b35b4f20bd40b6 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/matching.py @@ -0,0 +1,1152 @@ +"""Functions for computing and verifying matchings in a graph.""" + +from collections import Counter +from itertools import combinations, repeat + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = [ + "is_matching", + "is_maximal_matching", + "is_perfect_matching", + "max_weight_matching", + "min_weight_matching", + "maximal_matching", +] + + +@not_implemented_for("multigraph") +@not_implemented_for("directed") +@nx._dispatchable +def maximal_matching(G): + r"""Find a maximal matching in the graph. + + A matching is a subset of edges in which no node occurs more than once. + A maximal matching cannot add more edges and still be a matching. + + Parameters + ---------- + G : NetworkX graph + Undirected graph + + Returns + ------- + matching : set + A maximal matching of the graph. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (2, 3), (2, 4), (3, 5), (4, 5)]) + >>> sorted(nx.maximal_matching(G)) + [(1, 2), (3, 5)] + + Notes + ----- + The algorithm greedily selects a maximal matching M of the graph G + (i.e. no superset of M exists). It runs in $O(|E|)$ time. + """ + matching = set() + nodes = set() + for edge in G.edges(): + # If the edge isn't covered, add it to the matching + # then remove neighborhood of u and v from consideration. + u, v = edge + if u not in nodes and v not in nodes and u != v: + matching.add(edge) + nodes.update(edge) + return matching + + +def matching_dict_to_set(matching): + """Converts matching dict format to matching set format + + Converts a dictionary representing a matching (as returned by + :func:`max_weight_matching`) to a set representing a matching (as + returned by :func:`maximal_matching`). + + In the definition of maximal matching adopted by NetworkX, + self-loops are not allowed, so the provided dictionary is expected + to never have any mapping from a key to itself. However, the + dictionary is expected to have mirrored key/value pairs, for + example, key ``u`` with value ``v`` and key ``v`` with value ``u``. + + """ + edges = set() + for edge in matching.items(): + u, v = edge + if (v, u) in edges or edge in edges: + continue + if u == v: + raise nx.NetworkXError(f"Selfloops cannot appear in matchings {edge}") + edges.add(edge) + return edges + + +@nx._dispatchable +def is_matching(G, matching): + """Return True if ``matching`` is a valid matching of ``G`` + + A *matching* in a graph is a set of edges in which no two distinct + edges share a common endpoint. Each node is incident to at most one + edge in the matching. The edges are said to be independent. + + Parameters + ---------- + G : NetworkX graph + + matching : dict or set + A dictionary or set representing a matching. If a dictionary, it + must have ``matching[u] == v`` and ``matching[v] == u`` for each + edge ``(u, v)`` in the matching. If a set, it must have elements + of the form ``(u, v)``, where ``(u, v)`` is an edge in the + matching. + + Returns + ------- + bool + Whether the given set or dictionary represents a valid matching + in the graph. + + Raises + ------ + NetworkXError + If the proposed matching has an edge to a node not in G. + Or if the matching is not a collection of 2-tuple edges. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (2, 3), (2, 4), (3, 5), (4, 5)]) + >>> nx.is_maximal_matching(G, {1: 3, 2: 4}) # using dict to represent matching + True + + >>> nx.is_matching(G, {(1, 3), (2, 4)}) # using set to represent matching + True + + """ + if isinstance(matching, dict): + matching = matching_dict_to_set(matching) + + nodes = set() + for edge in matching: + if len(edge) != 2: + raise nx.NetworkXError(f"matching has non-2-tuple edge {edge}") + u, v = edge + if u not in G or v not in G: + raise nx.NetworkXError(f"matching contains edge {edge} with node not in G") + if u == v: + return False + if not G.has_edge(u, v): + return False + if u in nodes or v in nodes: + return False + nodes.update(edge) + return True + + +@nx._dispatchable +def is_maximal_matching(G, matching): + """Return True if ``matching`` is a maximal matching of ``G`` + + A *maximal matching* in a graph is a matching in which adding any + edge would cause the set to no longer be a valid matching. + + Parameters + ---------- + G : NetworkX graph + + matching : dict or set + A dictionary or set representing a matching. If a dictionary, it + must have ``matching[u] == v`` and ``matching[v] == u`` for each + edge ``(u, v)`` in the matching. If a set, it must have elements + of the form ``(u, v)``, where ``(u, v)`` is an edge in the + matching. + + Returns + ------- + bool + Whether the given set or dictionary represents a valid maximal + matching in the graph. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (2, 3), (3, 4), (3, 5)]) + >>> nx.is_maximal_matching(G, {(1, 2), (3, 4)}) + True + + """ + if isinstance(matching, dict): + matching = matching_dict_to_set(matching) + # If the given set is not a matching, then it is not a maximal matching. + edges = set() + nodes = set() + for edge in matching: + if len(edge) != 2: + raise nx.NetworkXError(f"matching has non-2-tuple edge {edge}") + u, v = edge + if u not in G or v not in G: + raise nx.NetworkXError(f"matching contains edge {edge} with node not in G") + if u == v: + return False + if not G.has_edge(u, v): + return False + if u in nodes or v in nodes: + return False + nodes.update(edge) + edges.add(edge) + edges.add((v, u)) + # A matching is maximal if adding any new edge from G to it + # causes the resulting set to match some node twice. + # Be careful to check for adding selfloops + for u, v in G.edges: + if (u, v) not in edges: + # could add edge (u, v) to edges and have a bigger matching + if u not in nodes and v not in nodes and u != v: + return False + return True + + +@nx._dispatchable +def is_perfect_matching(G, matching): + """Return True if ``matching`` is a perfect matching for ``G`` + + A *perfect matching* in a graph is a matching in which exactly one edge + is incident upon each vertex. + + Parameters + ---------- + G : NetworkX graph + + matching : dict or set + A dictionary or set representing a matching. If a dictionary, it + must have ``matching[u] == v`` and ``matching[v] == u`` for each + edge ``(u, v)`` in the matching. If a set, it must have elements + of the form ``(u, v)``, where ``(u, v)`` is an edge in the + matching. + + Returns + ------- + bool + Whether the given set or dictionary represents a valid perfect + matching in the graph. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (1, 3), (2, 3), (2, 4), (3, 5), (4, 5), (4, 6)]) + >>> my_match = {1: 2, 3: 5, 4: 6} + >>> nx.is_perfect_matching(G, my_match) + True + + """ + if isinstance(matching, dict): + matching = matching_dict_to_set(matching) + + nodes = set() + for edge in matching: + if len(edge) != 2: + raise nx.NetworkXError(f"matching has non-2-tuple edge {edge}") + u, v = edge + if u not in G or v not in G: + raise nx.NetworkXError(f"matching contains edge {edge} with node not in G") + if u == v: + return False + if not G.has_edge(u, v): + return False + if u in nodes or v in nodes: + return False + nodes.update(edge) + return len(nodes) == len(G) + + +@not_implemented_for("multigraph") +@not_implemented_for("directed") +@nx._dispatchable(edge_attrs="weight") +def min_weight_matching(G, weight="weight"): + """Computing a minimum-weight maximal matching of G. + + Use the maximum-weight algorithm with edge weights subtracted + from the maximum weight of all edges. + + A matching is a subset of edges in which no node occurs more than once. + The weight of a matching is the sum of the weights of its edges. + A maximal matching cannot add more edges and still be a matching. + The cardinality of a matching is the number of matched edges. + + This method replaces the edge weights with 1 plus the maximum edge weight + minus the original edge weight. + + new_weight = (max_weight + 1) - edge_weight + + then runs :func:`max_weight_matching` with the new weights. + The max weight matching with these new weights corresponds + to the min weight matching using the original weights. + Adding 1 to the max edge weight keeps all edge weights positive + and as integers if they started as integers. + + You might worry that adding 1 to each weight would make the algorithm + favor matchings with more edges. But we use the parameter + `maxcardinality=True` in `max_weight_matching` to ensure that the + number of edges in the competing matchings are the same and thus + the optimum does not change due to changes in the number of edges. + + Read the documentation of `max_weight_matching` for more information. + + Parameters + ---------- + G : NetworkX graph + Undirected graph + + weight: string, optional (default='weight') + Edge data key corresponding to the edge weight. + If key not found, uses 1 as weight. + + Returns + ------- + matching : set + A minimal weight matching of the graph. + + See Also + -------- + max_weight_matching + """ + if len(G.edges) == 0: + return max_weight_matching(G, maxcardinality=True, weight=weight) + G_edges = G.edges(data=weight, default=1) + max_weight = 1 + max(w for _, _, w in G_edges) + InvG = nx.Graph() + edges = ((u, v, max_weight - w) for u, v, w in G_edges) + InvG.add_weighted_edges_from(edges, weight=weight) + return max_weight_matching(InvG, maxcardinality=True, weight=weight) + + +@not_implemented_for("multigraph") +@not_implemented_for("directed") +@nx._dispatchable(edge_attrs="weight") +def max_weight_matching(G, maxcardinality=False, weight="weight"): + """Compute a maximum-weighted matching of G. + + A matching is a subset of edges in which no node occurs more than once. + The weight of a matching is the sum of the weights of its edges. + A maximal matching cannot add more edges and still be a matching. + The cardinality of a matching is the number of matched edges. + + Parameters + ---------- + G : NetworkX graph + Undirected graph + + maxcardinality: bool, optional (default=False) + If maxcardinality is True, compute the maximum-cardinality matching + with maximum weight among all maximum-cardinality matchings. + + weight: string, optional (default='weight') + Edge data key corresponding to the edge weight. + If key not found, uses 1 as weight. + + + Returns + ------- + matching : set + A maximal matching of the graph. + + Examples + -------- + >>> G = nx.Graph() + >>> edges = [(1, 2, 6), (1, 3, 2), (2, 3, 1), (2, 4, 7), (3, 5, 9), (4, 5, 3)] + >>> G.add_weighted_edges_from(edges) + >>> sorted(nx.max_weight_matching(G)) + [(2, 4), (5, 3)] + + Notes + ----- + If G has edges with weight attributes the edge data are used as + weight values else the weights are assumed to be 1. + + This function takes time O(number_of_nodes ** 3). + + If all edge weights are integers, the algorithm uses only integer + computations. If floating point weights are used, the algorithm + could return a slightly suboptimal matching due to numeric + precision errors. + + This method is based on the "blossom" method for finding augmenting + paths and the "primal-dual" method for finding a matching of maximum + weight, both methods invented by Jack Edmonds [1]_. + + Bipartite graphs can also be matched using the functions present in + :mod:`networkx.algorithms.bipartite.matching`. + + References + ---------- + .. [1] "Efficient Algorithms for Finding Maximum Matching in Graphs", + Zvi Galil, ACM Computing Surveys, 1986. + """ + # + # The algorithm is taken from "Efficient Algorithms for Finding Maximum + # Matching in Graphs" by Zvi Galil, ACM Computing Surveys, 1986. + # It is based on the "blossom" method for finding augmenting paths and + # the "primal-dual" method for finding a matching of maximum weight, both + # methods invented by Jack Edmonds. + # + # A C program for maximum weight matching by Ed Rothberg was used + # extensively to validate this new code. + # + # Many terms used in the code comments are explained in the paper + # by Galil. You will probably need the paper to make sense of this code. + # + + class NoNode: + """Dummy value which is different from any node.""" + + class Blossom: + """Representation of a non-trivial blossom or sub-blossom.""" + + __slots__ = ["childs", "edges", "mybestedges"] + + # b.childs is an ordered list of b's sub-blossoms, starting with + # the base and going round the blossom. + + # b.edges is the list of b's connecting edges, such that + # b.edges[i] = (v, w) where v is a vertex in b.childs[i] + # and w is a vertex in b.childs[wrap(i+1)]. + + # If b is a top-level S-blossom, + # b.mybestedges is a list of least-slack edges to neighboring + # S-blossoms, or None if no such list has been computed yet. + # This is used for efficient computation of delta3. + + # Generate the blossom's leaf vertices. + def leaves(self): + stack = [*self.childs] + while stack: + t = stack.pop() + if isinstance(t, Blossom): + stack.extend(t.childs) + else: + yield t + + # Get a list of vertices. + gnodes = list(G) + if not gnodes: + return set() # don't bother with empty graphs + + # Find the maximum edge weight. + maxweight = 0 + allinteger = True + for i, j, d in G.edges(data=True): + wt = d.get(weight, 1) + if i != j and wt > maxweight: + maxweight = wt + allinteger = allinteger and (str(type(wt)).split("'")[1] in ("int", "long")) + + # If v is a matched vertex, mate[v] is its partner vertex. + # If v is a single vertex, v does not occur as a key in mate. + # Initially all vertices are single; updated during augmentation. + mate = {} + + # If b is a top-level blossom, + # label.get(b) is None if b is unlabeled (free), + # 1 if b is an S-blossom, + # 2 if b is a T-blossom. + # The label of a vertex is found by looking at the label of its top-level + # containing blossom. + # If v is a vertex inside a T-blossom, label[v] is 2 iff v is reachable + # from an S-vertex outside the blossom. + # Labels are assigned during a stage and reset after each augmentation. + label = {} + + # If b is a labeled top-level blossom, + # labeledge[b] = (v, w) is the edge through which b obtained its label + # such that w is a vertex in b, or None if b's base vertex is single. + # If w is a vertex inside a T-blossom and label[w] == 2, + # labeledge[w] = (v, w) is an edge through which w is reachable from + # outside the blossom. + labeledge = {} + + # If v is a vertex, inblossom[v] is the top-level blossom to which v + # belongs. + # If v is a top-level vertex, inblossom[v] == v since v is itself + # a (trivial) top-level blossom. + # Initially all vertices are top-level trivial blossoms. + inblossom = dict(zip(gnodes, gnodes)) + + # If b is a sub-blossom, + # blossomparent[b] is its immediate parent (sub-)blossom. + # If b is a top-level blossom, blossomparent[b] is None. + blossomparent = dict(zip(gnodes, repeat(None))) + + # If b is a (sub-)blossom, + # blossombase[b] is its base VERTEX (i.e. recursive sub-blossom). + blossombase = dict(zip(gnodes, gnodes)) + + # If w is a free vertex (or an unreached vertex inside a T-blossom), + # bestedge[w] = (v, w) is the least-slack edge from an S-vertex, + # or None if there is no such edge. + # If b is a (possibly trivial) top-level S-blossom, + # bestedge[b] = (v, w) is the least-slack edge to a different S-blossom + # (v inside b), or None if there is no such edge. + # This is used for efficient computation of delta2 and delta3. + bestedge = {} + + # If v is a vertex, + # dualvar[v] = 2 * u(v) where u(v) is the v's variable in the dual + # optimization problem (if all edge weights are integers, multiplication + # by two ensures that all values remain integers throughout the algorithm). + # Initially, u(v) = maxweight / 2. + dualvar = dict(zip(gnodes, repeat(maxweight))) + + # If b is a non-trivial blossom, + # blossomdual[b] = z(b) where z(b) is b's variable in the dual + # optimization problem. + blossomdual = {} + + # If (v, w) in allowedge or (w, v) in allowedg, then the edge + # (v, w) is known to have zero slack in the optimization problem; + # otherwise the edge may or may not have zero slack. + allowedge = {} + + # Queue of newly discovered S-vertices. + queue = [] + + # Return 2 * slack of edge (v, w) (does not work inside blossoms). + def slack(v, w): + return dualvar[v] + dualvar[w] - 2 * G[v][w].get(weight, 1) + + # Assign label t to the top-level blossom containing vertex w, + # coming through an edge from vertex v. + def assignLabel(w, t, v): + b = inblossom[w] + assert label.get(w) is None and label.get(b) is None + label[w] = label[b] = t + if v is not None: + labeledge[w] = labeledge[b] = (v, w) + else: + labeledge[w] = labeledge[b] = None + bestedge[w] = bestedge[b] = None + if t == 1: + # b became an S-vertex/blossom; add it(s vertices) to the queue. + if isinstance(b, Blossom): + queue.extend(b.leaves()) + else: + queue.append(b) + elif t == 2: + # b became a T-vertex/blossom; assign label S to its mate. + # (If b is a non-trivial blossom, its base is the only vertex + # with an external mate.) + base = blossombase[b] + assignLabel(mate[base], 1, base) + + # Trace back from vertices v and w to discover either a new blossom + # or an augmenting path. Return the base vertex of the new blossom, + # or NoNode if an augmenting path was found. + def scanBlossom(v, w): + # Trace back from v and w, placing breadcrumbs as we go. + path = [] + base = NoNode + while v is not NoNode: + # Look for a breadcrumb in v's blossom or put a new breadcrumb. + b = inblossom[v] + if label[b] & 4: + base = blossombase[b] + break + assert label[b] == 1 + path.append(b) + label[b] = 5 + # Trace one step back. + if labeledge[b] is None: + # The base of blossom b is single; stop tracing this path. + assert blossombase[b] not in mate + v = NoNode + else: + assert labeledge[b][0] == mate[blossombase[b]] + v = labeledge[b][0] + b = inblossom[v] + assert label[b] == 2 + # b is a T-blossom; trace one more step back. + v = labeledge[b][0] + # Swap v and w so that we alternate between both paths. + if w is not NoNode: + v, w = w, v + # Remove breadcrumbs. + for b in path: + label[b] = 1 + # Return base vertex, if we found one. + return base + + # Construct a new blossom with given base, through S-vertices v and w. + # Label the new blossom as S; set its dual variable to zero; + # relabel its T-vertices to S and add them to the queue. + def addBlossom(base, v, w): + bb = inblossom[base] + bv = inblossom[v] + bw = inblossom[w] + # Create blossom. + b = Blossom() + blossombase[b] = base + blossomparent[b] = None + blossomparent[bb] = b + # Make list of sub-blossoms and their interconnecting edge endpoints. + b.childs = path = [] + b.edges = edgs = [(v, w)] + # Trace back from v to base. + while bv != bb: + # Add bv to the new blossom. + blossomparent[bv] = b + path.append(bv) + edgs.append(labeledge[bv]) + assert label[bv] == 2 or ( + label[bv] == 1 and labeledge[bv][0] == mate[blossombase[bv]] + ) + # Trace one step back. + v = labeledge[bv][0] + bv = inblossom[v] + # Add base sub-blossom; reverse lists. + path.append(bb) + path.reverse() + edgs.reverse() + # Trace back from w to base. + while bw != bb: + # Add bw to the new blossom. + blossomparent[bw] = b + path.append(bw) + edgs.append((labeledge[bw][1], labeledge[bw][0])) + assert label[bw] == 2 or ( + label[bw] == 1 and labeledge[bw][0] == mate[blossombase[bw]] + ) + # Trace one step back. + w = labeledge[bw][0] + bw = inblossom[w] + # Set label to S. + assert label[bb] == 1 + label[b] = 1 + labeledge[b] = labeledge[bb] + # Set dual variable to zero. + blossomdual[b] = 0 + # Relabel vertices. + for v in b.leaves(): + if label[inblossom[v]] == 2: + # This T-vertex now turns into an S-vertex because it becomes + # part of an S-blossom; add it to the queue. + queue.append(v) + inblossom[v] = b + # Compute b.mybestedges. + bestedgeto = {} + for bv in path: + if isinstance(bv, Blossom): + if bv.mybestedges is not None: + # Walk this subblossom's least-slack edges. + nblist = bv.mybestedges + # The sub-blossom won't need this data again. + bv.mybestedges = None + else: + # This subblossom does not have a list of least-slack + # edges; get the information from the vertices. + nblist = [ + (v, w) for v in bv.leaves() for w in G.neighbors(v) if v != w + ] + else: + nblist = [(bv, w) for w in G.neighbors(bv) if bv != w] + for k in nblist: + (i, j) = k + if inblossom[j] == b: + i, j = j, i + bj = inblossom[j] + if ( + bj != b + and label.get(bj) == 1 + and ((bj not in bestedgeto) or slack(i, j) < slack(*bestedgeto[bj])) + ): + bestedgeto[bj] = k + # Forget about least-slack edge of the subblossom. + bestedge[bv] = None + b.mybestedges = list(bestedgeto.values()) + # Select bestedge[b]. + mybestedge = None + bestedge[b] = None + for k in b.mybestedges: + kslack = slack(*k) + if mybestedge is None or kslack < mybestslack: + mybestedge = k + mybestslack = kslack + bestedge[b] = mybestedge + + # Expand the given top-level blossom. + def expandBlossom(b, endstage): + # This is an obnoxiously complicated recursive function for the sake of + # a stack-transformation. So, we hack around the complexity by using + # a trampoline pattern. By yielding the arguments to each recursive + # call, we keep the actual callstack flat. + + def _recurse(b, endstage): + # Convert sub-blossoms into top-level blossoms. + for s in b.childs: + blossomparent[s] = None + if isinstance(s, Blossom): + if endstage and blossomdual[s] == 0: + # Recursively expand this sub-blossom. + yield s + else: + for v in s.leaves(): + inblossom[v] = s + else: + inblossom[s] = s + # If we expand a T-blossom during a stage, its sub-blossoms must be + # relabeled. + if (not endstage) and label.get(b) == 2: + # Start at the sub-blossom through which the expanding + # blossom obtained its label, and relabel sub-blossoms untili + # we reach the base. + # Figure out through which sub-blossom the expanding blossom + # obtained its label initially. + entrychild = inblossom[labeledge[b][1]] + # Decide in which direction we will go round the blossom. + j = b.childs.index(entrychild) + if j & 1: + # Start index is odd; go forward and wrap. + j -= len(b.childs) + jstep = 1 + else: + # Start index is even; go backward. + jstep = -1 + # Move along the blossom until we get to the base. + v, w = labeledge[b] + while j != 0: + # Relabel the T-sub-blossom. + if jstep == 1: + p, q = b.edges[j] + else: + q, p = b.edges[j - 1] + label[w] = None + label[q] = None + assignLabel(w, 2, v) + # Step to the next S-sub-blossom and note its forward edge. + allowedge[(p, q)] = allowedge[(q, p)] = True + j += jstep + if jstep == 1: + v, w = b.edges[j] + else: + w, v = b.edges[j - 1] + # Step to the next T-sub-blossom. + allowedge[(v, w)] = allowedge[(w, v)] = True + j += jstep + # Relabel the base T-sub-blossom WITHOUT stepping through to + # its mate (so don't call assignLabel). + bw = b.childs[j] + label[w] = label[bw] = 2 + labeledge[w] = labeledge[bw] = (v, w) + bestedge[bw] = None + # Continue along the blossom until we get back to entrychild. + j += jstep + while b.childs[j] != entrychild: + # Examine the vertices of the sub-blossom to see whether + # it is reachable from a neighboring S-vertex outside the + # expanding blossom. + bv = b.childs[j] + if label.get(bv) == 1: + # This sub-blossom just got label S through one of its + # neighbors; leave it be. + j += jstep + continue + if isinstance(bv, Blossom): + for v in bv.leaves(): + if label.get(v): + break + else: + v = bv + # If the sub-blossom contains a reachable vertex, assign + # label T to the sub-blossom. + if label.get(v): + assert label[v] == 2 + assert inblossom[v] == bv + label[v] = None + label[mate[blossombase[bv]]] = None + assignLabel(v, 2, labeledge[v][0]) + j += jstep + # Remove the expanded blossom entirely. + label.pop(b, None) + labeledge.pop(b, None) + bestedge.pop(b, None) + del blossomparent[b] + del blossombase[b] + del blossomdual[b] + + # Now, we apply the trampoline pattern. We simulate a recursive + # callstack by maintaining a stack of generators, each yielding a + # sequence of function arguments. We grow the stack by appending a call + # to _recurse on each argument tuple, and shrink the stack whenever a + # generator is exhausted. + stack = [_recurse(b, endstage)] + while stack: + top = stack[-1] + for s in top: + stack.append(_recurse(s, endstage)) + break + else: + stack.pop() + + # Swap matched/unmatched edges over an alternating path through blossom b + # between vertex v and the base vertex. Keep blossom bookkeeping + # consistent. + def augmentBlossom(b, v): + # This is an obnoxiously complicated recursive function for the sake of + # a stack-transformation. So, we hack around the complexity by using + # a trampoline pattern. By yielding the arguments to each recursive + # call, we keep the actual callstack flat. + + def _recurse(b, v): + # Bubble up through the blossom tree from vertex v to an immediate + # sub-blossom of b. + t = v + while blossomparent[t] != b: + t = blossomparent[t] + # Recursively deal with the first sub-blossom. + if isinstance(t, Blossom): + yield (t, v) + # Decide in which direction we will go round the blossom. + i = j = b.childs.index(t) + if i & 1: + # Start index is odd; go forward and wrap. + j -= len(b.childs) + jstep = 1 + else: + # Start index is even; go backward. + jstep = -1 + # Move along the blossom until we get to the base. + while j != 0: + # Step to the next sub-blossom and augment it recursively. + j += jstep + t = b.childs[j] + if jstep == 1: + w, x = b.edges[j] + else: + x, w = b.edges[j - 1] + if isinstance(t, Blossom): + yield (t, w) + # Step to the next sub-blossom and augment it recursively. + j += jstep + t = b.childs[j] + if isinstance(t, Blossom): + yield (t, x) + # Match the edge connecting those sub-blossoms. + mate[w] = x + mate[x] = w + # Rotate the list of sub-blossoms to put the new base at the front. + b.childs = b.childs[i:] + b.childs[:i] + b.edges = b.edges[i:] + b.edges[:i] + blossombase[b] = blossombase[b.childs[0]] + assert blossombase[b] == v + + # Now, we apply the trampoline pattern. We simulate a recursive + # callstack by maintaining a stack of generators, each yielding a + # sequence of function arguments. We grow the stack by appending a call + # to _recurse on each argument tuple, and shrink the stack whenever a + # generator is exhausted. + stack = [_recurse(b, v)] + while stack: + top = stack[-1] + for args in top: + stack.append(_recurse(*args)) + break + else: + stack.pop() + + # Swap matched/unmatched edges over an alternating path between two + # single vertices. The augmenting path runs through S-vertices v and w. + def augmentMatching(v, w): + for s, j in ((v, w), (w, v)): + # Match vertex s to vertex j. Then trace back from s + # until we find a single vertex, swapping matched and unmatched + # edges as we go. + while 1: + bs = inblossom[s] + assert label[bs] == 1 + assert (labeledge[bs] is None and blossombase[bs] not in mate) or ( + labeledge[bs][0] == mate[blossombase[bs]] + ) + # Augment through the S-blossom from s to base. + if isinstance(bs, Blossom): + augmentBlossom(bs, s) + # Update mate[s] + mate[s] = j + # Trace one step back. + if labeledge[bs] is None: + # Reached single vertex; stop. + break + t = labeledge[bs][0] + bt = inblossom[t] + assert label[bt] == 2 + # Trace one more step back. + s, j = labeledge[bt] + # Augment through the T-blossom from j to base. + assert blossombase[bt] == t + if isinstance(bt, Blossom): + augmentBlossom(bt, j) + # Update mate[j] + mate[j] = s + + # Verify that the optimum solution has been reached. + def verifyOptimum(): + if maxcardinality: + # Vertices may have negative dual; + # find a constant non-negative number to add to all vertex duals. + vdualoffset = max(0, -min(dualvar.values())) + else: + vdualoffset = 0 + # 0. all dual variables are non-negative + assert min(dualvar.values()) + vdualoffset >= 0 + assert len(blossomdual) == 0 or min(blossomdual.values()) >= 0 + # 0. all edges have non-negative slack and + # 1. all matched edges have zero slack; + for i, j, d in G.edges(data=True): + wt = d.get(weight, 1) + if i == j: + continue # ignore self-loops + s = dualvar[i] + dualvar[j] - 2 * wt + iblossoms = [i] + jblossoms = [j] + while blossomparent[iblossoms[-1]] is not None: + iblossoms.append(blossomparent[iblossoms[-1]]) + while blossomparent[jblossoms[-1]] is not None: + jblossoms.append(blossomparent[jblossoms[-1]]) + iblossoms.reverse() + jblossoms.reverse() + for bi, bj in zip(iblossoms, jblossoms): + if bi != bj: + break + s += 2 * blossomdual[bi] + assert s >= 0 + if mate.get(i) == j or mate.get(j) == i: + assert mate[i] == j and mate[j] == i + assert s == 0 + # 2. all single vertices have zero dual value; + for v in gnodes: + assert (v in mate) or dualvar[v] + vdualoffset == 0 + # 3. all blossoms with positive dual value are full. + for b in blossomdual: + if blossomdual[b] > 0: + assert len(b.edges) % 2 == 1 + for i, j in b.edges[1::2]: + assert mate[i] == j and mate[j] == i + # Ok. + + # Main loop: continue until no further improvement is possible. + while 1: + # Each iteration of this loop is a "stage". + # A stage finds an augmenting path and uses that to improve + # the matching. + + # Remove labels from top-level blossoms/vertices. + label.clear() + labeledge.clear() + + # Forget all about least-slack edges. + bestedge.clear() + for b in blossomdual: + b.mybestedges = None + + # Loss of labeling means that we can not be sure that currently + # allowable edges remain allowable throughout this stage. + allowedge.clear() + + # Make queue empty. + queue[:] = [] + + # Label single blossoms/vertices with S and put them in the queue. + for v in gnodes: + if (v not in mate) and label.get(inblossom[v]) is None: + assignLabel(v, 1, None) + + # Loop until we succeed in augmenting the matching. + augmented = 0 + while 1: + # Each iteration of this loop is a "substage". + # A substage tries to find an augmenting path; + # if found, the path is used to improve the matching and + # the stage ends. If there is no augmenting path, the + # primal-dual method is used to pump some slack out of + # the dual variables. + + # Continue labeling until all vertices which are reachable + # through an alternating path have got a label. + while queue and not augmented: + # Take an S vertex from the queue. + v = queue.pop() + assert label[inblossom[v]] == 1 + + # Scan its neighbors: + for w in G.neighbors(v): + if w == v: + continue # ignore self-loops + # w is a neighbor to v + bv = inblossom[v] + bw = inblossom[w] + if bv == bw: + # this edge is internal to a blossom; ignore it + continue + if (v, w) not in allowedge: + kslack = slack(v, w) + if kslack <= 0: + # edge k has zero slack => it is allowable + allowedge[(v, w)] = allowedge[(w, v)] = True + if (v, w) in allowedge: + if label.get(bw) is None: + # (C1) w is a free vertex; + # label w with T and label its mate with S (R12). + assignLabel(w, 2, v) + elif label.get(bw) == 1: + # (C2) w is an S-vertex (not in the same blossom); + # follow back-links to discover either an + # augmenting path or a new blossom. + base = scanBlossom(v, w) + if base is not NoNode: + # Found a new blossom; add it to the blossom + # bookkeeping and turn it into an S-blossom. + addBlossom(base, v, w) + else: + # Found an augmenting path; augment the + # matching and end this stage. + augmentMatching(v, w) + augmented = 1 + break + elif label.get(w) is None: + # w is inside a T-blossom, but w itself has not + # yet been reached from outside the blossom; + # mark it as reached (we need this to relabel + # during T-blossom expansion). + assert label[bw] == 2 + label[w] = 2 + labeledge[w] = (v, w) + elif label.get(bw) == 1: + # keep track of the least-slack non-allowable edge to + # a different S-blossom. + if bestedge.get(bv) is None or kslack < slack(*bestedge[bv]): + bestedge[bv] = (v, w) + elif label.get(w) is None: + # w is a free vertex (or an unreached vertex inside + # a T-blossom) but we can not reach it yet; + # keep track of the least-slack edge that reaches w. + if bestedge.get(w) is None or kslack < slack(*bestedge[w]): + bestedge[w] = (v, w) + + if augmented: + break + + # There is no augmenting path under these constraints; + # compute delta and reduce slack in the optimization problem. + # (Note that our vertex dual variables, edge slacks and delta's + # are pre-multiplied by two.) + deltatype = -1 + delta = deltaedge = deltablossom = None + + # Compute delta1: the minimum value of any vertex dual. + if not maxcardinality: + deltatype = 1 + delta = min(dualvar.values()) + + # Compute delta2: the minimum slack on any edge between + # an S-vertex and a free vertex. + for v in G.nodes(): + if label.get(inblossom[v]) is None and bestedge.get(v) is not None: + d = slack(*bestedge[v]) + if deltatype == -1 or d < delta: + delta = d + deltatype = 2 + deltaedge = bestedge[v] + + # Compute delta3: half the minimum slack on any edge between + # a pair of S-blossoms. + for b in blossomparent: + if ( + blossomparent[b] is None + and label.get(b) == 1 + and bestedge.get(b) is not None + ): + kslack = slack(*bestedge[b]) + if allinteger: + assert (kslack % 2) == 0 + d = kslack // 2 + else: + d = kslack / 2.0 + if deltatype == -1 or d < delta: + delta = d + deltatype = 3 + deltaedge = bestedge[b] + + # Compute delta4: minimum z variable of any T-blossom. + for b in blossomdual: + if ( + blossomparent[b] is None + and label.get(b) == 2 + and (deltatype == -1 or blossomdual[b] < delta) + ): + delta = blossomdual[b] + deltatype = 4 + deltablossom = b + + if deltatype == -1: + # No further improvement possible; max-cardinality optimum + # reached. Do a final delta update to make the optimum + # verifiable. + assert maxcardinality + deltatype = 1 + delta = max(0, min(dualvar.values())) + + # Update dual variables according to delta. + for v in gnodes: + if label.get(inblossom[v]) == 1: + # S-vertex: 2*u = 2*u - 2*delta + dualvar[v] -= delta + elif label.get(inblossom[v]) == 2: + # T-vertex: 2*u = 2*u + 2*delta + dualvar[v] += delta + for b in blossomdual: + if blossomparent[b] is None: + if label.get(b) == 1: + # top-level S-blossom: z = z + 2*delta + blossomdual[b] += delta + elif label.get(b) == 2: + # top-level T-blossom: z = z - 2*delta + blossomdual[b] -= delta + + # Take action at the point where minimum delta occurred. + if deltatype == 1: + # No further improvement possible; optimum reached. + break + elif deltatype == 2: + # Use the least-slack edge to continue the search. + (v, w) = deltaedge + assert label[inblossom[v]] == 1 + allowedge[(v, w)] = allowedge[(w, v)] = True + queue.append(v) + elif deltatype == 3: + # Use the least-slack edge to continue the search. + (v, w) = deltaedge + allowedge[(v, w)] = allowedge[(w, v)] = True + assert label[inblossom[v]] == 1 + queue.append(v) + elif deltatype == 4: + # Expand the least-z blossom. + expandBlossom(deltablossom, False) + + # End of a this substage. + + # Paranoia check that the matching is symmetric. + for v in mate: + assert mate[mate[v]] == v + + # Stop when no more augmenting path can be found. + if not augmented: + break + + # End of a stage; expand all S-blossoms which have zero dual. + for b in list(blossomdual.keys()): + if b not in blossomdual: + continue # already expanded + if blossomparent[b] is None and label.get(b) == 1 and blossomdual[b] == 0: + expandBlossom(b, True) + + # Verify that we reached the optimum solution (only for integer weights). + if allinteger: + verifyOptimum() + + return matching_dict_to_set(mate) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/moral.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/moral.py new file mode 100644 index 0000000000000000000000000000000000000000..e2acf80f6c3715da57dfc92e4c2d2daf986b3c29 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/moral.py @@ -0,0 +1,59 @@ +r"""Function for computing the moral graph of a directed graph.""" + +import itertools + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["moral_graph"] + + +@not_implemented_for("undirected") +@nx._dispatchable(returns_graph=True) +def moral_graph(G): + r"""Return the Moral Graph + + Returns the moralized graph of a given directed graph. + + Parameters + ---------- + G : NetworkX graph + Directed graph + + Returns + ------- + H : NetworkX graph + The undirected moralized graph of G + + Raises + ------ + NetworkXNotImplemented + If `G` is undirected. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (2, 3), (2, 5), (3, 4), (4, 3)]) + >>> G_moral = nx.moral_graph(G) + >>> G_moral.edges() + EdgeView([(1, 2), (2, 3), (2, 5), (2, 4), (3, 4)]) + + Notes + ----- + A moral graph is an undirected graph H = (V, E) generated from a + directed Graph, where if a node has more than one parent node, edges + between these parent nodes are inserted and all directed edges become + undirected. + + https://en.wikipedia.org/wiki/Moral_graph + + References + ---------- + .. [1] Wray L. Buntine. 1995. Chain graphs for learning. + In Proceedings of the Eleventh conference on Uncertainty + in artificial intelligence (UAI'95) + """ + H = G.to_undirected() + for preds in G.pred.values(): + predecessors_combinations = itertools.combinations(preds, r=2) + H.add_edges_from(predecessors_combinations) + return H diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/non_randomness.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/non_randomness.py new file mode 100644 index 0000000000000000000000000000000000000000..137991157747e3720f026481ff1b0b3b5e8af1cf --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/non_randomness.py @@ -0,0 +1,98 @@ +r"""Computation of graph non-randomness""" + +import math + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["non_randomness"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable(edge_attrs="weight") +def non_randomness(G, k=None, weight="weight"): + """Compute the non-randomness of graph G. + + The first returned value nr is the sum of non-randomness values of all + edges within the graph (where the non-randomness of an edge tends to be + small when the two nodes linked by that edge are from two different + communities). + + The second computed value nr_rd is a relative measure that indicates + to what extent graph G is different from random graphs in terms + of probability. When it is close to 0, the graph tends to be more + likely generated by an Erdos Renyi model. + + Parameters + ---------- + G : NetworkX graph + Graph must be symmetric, connected, and without self-loops. + + k : int + The number of communities in G. + If k is not set, the function will use a default community + detection algorithm to set it. + + weight : string or None, optional (default=None) + The name of an edge attribute that holds the numerical value used + as a weight. If None, then each edge has weight 1, i.e., the graph is + binary. + + Returns + ------- + non-randomness : (float, float) tuple + Non-randomness, Relative non-randomness w.r.t. + Erdos Renyi random graphs. + + Raises + ------ + NetworkXException + if the input graph is not connected. + NetworkXError + if the input graph contains self-loops or if graph has no edges. + + Examples + -------- + >>> G = nx.karate_club_graph() + >>> nr, nr_rd = nx.non_randomness(G, 2) + >>> nr, nr_rd = nx.non_randomness(G, 2, "weight") + + Notes + ----- + This computes Eq. (4.4) and (4.5) in Ref. [1]_. + + If a weight field is passed, this algorithm will use the eigenvalues + of the weighted adjacency matrix to compute Eq. (4.4) and (4.5). + + References + ---------- + .. [1] Xiaowei Ying and Xintao Wu, + On Randomness Measures for Social Networks, + SIAM International Conference on Data Mining. 2009 + """ + import numpy as np + + # corner case: graph has no edges + if nx.is_empty(G): + raise nx.NetworkXError("non_randomness not applicable to empty graphs") + if not nx.is_connected(G): + raise nx.NetworkXException("Non connected graph.") + if len(list(nx.selfloop_edges(G))) > 0: + raise nx.NetworkXError("Graph must not contain self-loops") + + if k is None: + k = len(tuple(nx.community.label_propagation_communities(G))) + + # eq. 4.4 + eigenvalues = np.linalg.eigvals(nx.to_numpy_array(G, weight=weight)) + nr = float(np.real(np.sum(eigenvalues[:k]))) + + n = G.number_of_nodes() + m = G.number_of_edges() + p = (2 * k * m) / (n * (n - k)) + + # eq. 4.5 + nr_rd = (nr - ((n - 2 * k) * p + k)) / math.sqrt(2 * k * p * (1 - p)) + + return nr, nr_rd diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/planar_drawing.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/planar_drawing.py new file mode 100644 index 0000000000000000000000000000000000000000..ea25809b6aeb198b23b44fe9878775d11b7e109c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/planar_drawing.py @@ -0,0 +1,464 @@ +from collections import defaultdict + +import networkx as nx + +__all__ = ["combinatorial_embedding_to_pos"] + + +def combinatorial_embedding_to_pos(embedding, fully_triangulate=False): + """Assigns every node a (x, y) position based on the given embedding + + The algorithm iteratively inserts nodes of the input graph in a certain + order and rearranges previously inserted nodes so that the planar drawing + stays valid. This is done efficiently by only maintaining relative + positions during the node placements and calculating the absolute positions + at the end. For more information see [1]_. + + Parameters + ---------- + embedding : nx.PlanarEmbedding + This defines the order of the edges + + fully_triangulate : bool + If set to True the algorithm adds edges to a copy of the input + embedding and makes it chordal. + + Returns + ------- + pos : dict + Maps each node to a tuple that defines the (x, y) position + + References + ---------- + .. [1] M. Chrobak and T.H. Payne: + A Linear-time Algorithm for Drawing a Planar Graph on a Grid 1989 + http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.51.6677 + + """ + if len(embedding.nodes()) < 4: + # Position the node in any triangle + default_positions = [(0, 0), (2, 0), (1, 1)] + pos = {} + for i, v in enumerate(embedding.nodes()): + pos[v] = default_positions[i] + return pos + + embedding, outer_face = triangulate_embedding(embedding, fully_triangulate) + + # The following dicts map a node to another node + # If a node is not in the key set it means that the node is not yet in G_k + # If a node maps to None then the corresponding subtree does not exist + left_t_child = {} + right_t_child = {} + + # The following dicts map a node to an integer + delta_x = {} + y_coordinate = {} + + node_list = get_canonical_ordering(embedding, outer_face) + + # 1. Phase: Compute relative positions + + # Initialization + v1, v2, v3 = node_list[0][0], node_list[1][0], node_list[2][0] + + delta_x[v1] = 0 + y_coordinate[v1] = 0 + right_t_child[v1] = v3 + left_t_child[v1] = None + + delta_x[v2] = 1 + y_coordinate[v2] = 0 + right_t_child[v2] = None + left_t_child[v2] = None + + delta_x[v3] = 1 + y_coordinate[v3] = 1 + right_t_child[v3] = v2 + left_t_child[v3] = None + + for k in range(3, len(node_list)): + vk, contour_nbrs = node_list[k] + wp = contour_nbrs[0] + wp1 = contour_nbrs[1] + wq = contour_nbrs[-1] + wq1 = contour_nbrs[-2] + adds_mult_tri = len(contour_nbrs) > 2 + + # Stretch gaps: + delta_x[wp1] += 1 + delta_x[wq] += 1 + + delta_x_wp_wq = sum(delta_x[x] for x in contour_nbrs[1:]) + + # Adjust offsets + delta_x[vk] = (-y_coordinate[wp] + delta_x_wp_wq + y_coordinate[wq]) // 2 + y_coordinate[vk] = (y_coordinate[wp] + delta_x_wp_wq + y_coordinate[wq]) // 2 + delta_x[wq] = delta_x_wp_wq - delta_x[vk] + if adds_mult_tri: + delta_x[wp1] -= delta_x[vk] + + # Install v_k: + right_t_child[wp] = vk + right_t_child[vk] = wq + if adds_mult_tri: + left_t_child[vk] = wp1 + right_t_child[wq1] = None + else: + left_t_child[vk] = None + + # 2. Phase: Set absolute positions + pos = {} + pos[v1] = (0, y_coordinate[v1]) + remaining_nodes = [v1] + while remaining_nodes: + parent_node = remaining_nodes.pop() + + # Calculate position for left child + set_position( + parent_node, left_t_child, remaining_nodes, delta_x, y_coordinate, pos + ) + # Calculate position for right child + set_position( + parent_node, right_t_child, remaining_nodes, delta_x, y_coordinate, pos + ) + return pos + + +def set_position(parent, tree, remaining_nodes, delta_x, y_coordinate, pos): + """Helper method to calculate the absolute position of nodes.""" + child = tree[parent] + parent_node_x = pos[parent][0] + if child is not None: + # Calculate pos of child + child_x = parent_node_x + delta_x[child] + pos[child] = (child_x, y_coordinate[child]) + # Remember to calculate pos of its children + remaining_nodes.append(child) + + +def get_canonical_ordering(embedding, outer_face): + """Returns a canonical ordering of the nodes + + The canonical ordering of nodes (v1, ..., vn) must fulfill the following + conditions: + (See Lemma 1 in [2]_) + + - For the subgraph G_k of the input graph induced by v1, ..., vk it holds: + - 2-connected + - internally triangulated + - the edge (v1, v2) is part of the outer face + - For a node v(k+1) the following holds: + - The node v(k+1) is part of the outer face of G_k + - It has at least two neighbors in G_k + - All neighbors of v(k+1) in G_k lie consecutively on the outer face of + G_k (excluding the edge (v1, v2)). + + The algorithm used here starts with G_n (containing all nodes). It first + selects the nodes v1 and v2. And then tries to find the order of the other + nodes by checking which node can be removed in order to fulfill the + conditions mentioned above. This is done by calculating the number of + chords of nodes on the outer face. For more information see [1]_. + + Parameters + ---------- + embedding : nx.PlanarEmbedding + The embedding must be triangulated + outer_face : list + The nodes on the outer face of the graph + + Returns + ------- + ordering : list + A list of tuples `(vk, wp_wq)`. Here `vk` is the node at this position + in the canonical ordering. The element `wp_wq` is a list of nodes that + make up the outer face of G_k. + + References + ---------- + .. [1] Steven Chaplick. + Canonical Orders of Planar Graphs and (some of) Their Applications 2015 + https://wuecampus2.uni-wuerzburg.de/moodle/pluginfile.php/545727/mod_resource/content/0/vg-ss15-vl03-canonical-orders-druckversion.pdf + .. [2] M. Chrobak and T.H. Payne: + A Linear-time Algorithm for Drawing a Planar Graph on a Grid 1989 + http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.51.6677 + + """ + v1 = outer_face[0] + v2 = outer_face[1] + chords = defaultdict(int) # Maps nodes to the number of their chords + marked_nodes = set() + ready_to_pick = set(outer_face) + + # Initialize outer_face_ccw_nbr (do not include v1 -> v2) + outer_face_ccw_nbr = {} + prev_nbr = v2 + for idx in range(2, len(outer_face)): + outer_face_ccw_nbr[prev_nbr] = outer_face[idx] + prev_nbr = outer_face[idx] + outer_face_ccw_nbr[prev_nbr] = v1 + + # Initialize outer_face_cw_nbr (do not include v2 -> v1) + outer_face_cw_nbr = {} + prev_nbr = v1 + for idx in range(len(outer_face) - 1, 0, -1): + outer_face_cw_nbr[prev_nbr] = outer_face[idx] + prev_nbr = outer_face[idx] + + def is_outer_face_nbr(x, y): + if x not in outer_face_ccw_nbr: + return outer_face_cw_nbr[x] == y + if x not in outer_face_cw_nbr: + return outer_face_ccw_nbr[x] == y + return outer_face_ccw_nbr[x] == y or outer_face_cw_nbr[x] == y + + def is_on_outer_face(x): + return x not in marked_nodes and (x in outer_face_ccw_nbr or x == v1) + + # Initialize number of chords + for v in outer_face: + for nbr in embedding.neighbors_cw_order(v): + if is_on_outer_face(nbr) and not is_outer_face_nbr(v, nbr): + chords[v] += 1 + ready_to_pick.discard(v) + + # Initialize canonical_ordering + canonical_ordering = [None] * len(embedding.nodes()) + canonical_ordering[0] = (v1, []) + canonical_ordering[1] = (v2, []) + ready_to_pick.discard(v1) + ready_to_pick.discard(v2) + + for k in range(len(embedding.nodes()) - 1, 1, -1): + # 1. Pick v from ready_to_pick + v = ready_to_pick.pop() + marked_nodes.add(v) + + # v has exactly two neighbors on the outer face (wp and wq) + wp = None + wq = None + # Iterate over neighbors of v to find wp and wq + nbr_iterator = iter(embedding.neighbors_cw_order(v)) + while True: + nbr = next(nbr_iterator) + if nbr in marked_nodes: + # Only consider nodes that are not yet removed + continue + if is_on_outer_face(nbr): + # nbr is either wp or wq + if nbr == v1: + wp = v1 + elif nbr == v2: + wq = v2 + else: + if outer_face_cw_nbr[nbr] == v: + # nbr is wp + wp = nbr + else: + # nbr is wq + wq = nbr + if wp is not None and wq is not None: + # We don't need to iterate any further + break + + # Obtain new nodes on outer face (neighbors of v from wp to wq) + wp_wq = [wp] + nbr = wp + while nbr != wq: + # Get next neighbor (clockwise on the outer face) + next_nbr = embedding[v][nbr]["ccw"] + wp_wq.append(next_nbr) + # Update outer face + outer_face_cw_nbr[nbr] = next_nbr + outer_face_ccw_nbr[next_nbr] = nbr + # Move to next neighbor of v + nbr = next_nbr + + if len(wp_wq) == 2: + # There was a chord between wp and wq, decrease number of chords + chords[wp] -= 1 + if chords[wp] == 0: + ready_to_pick.add(wp) + chords[wq] -= 1 + if chords[wq] == 0: + ready_to_pick.add(wq) + else: + # Update all chords involving w_(p+1) to w_(q-1) + new_face_nodes = set(wp_wq[1:-1]) + for w in new_face_nodes: + # If we do not find a chord for w later we can pick it next + ready_to_pick.add(w) + for nbr in embedding.neighbors_cw_order(w): + if is_on_outer_face(nbr) and not is_outer_face_nbr(w, nbr): + # There is a chord involving w + chords[w] += 1 + ready_to_pick.discard(w) + if nbr not in new_face_nodes: + # Also increase chord for the neighbor + # We only iterator over new_face_nodes + chords[nbr] += 1 + ready_to_pick.discard(nbr) + # Set the canonical ordering node and the list of contour neighbors + canonical_ordering[k] = (v, wp_wq) + + return canonical_ordering + + +def triangulate_face(embedding, v1, v2): + """Triangulates the face given by half edge (v, w) + + Parameters + ---------- + embedding : nx.PlanarEmbedding + v1 : node + The half-edge (v1, v2) belongs to the face that gets triangulated + v2 : node + """ + _, v3 = embedding.next_face_half_edge(v1, v2) + _, v4 = embedding.next_face_half_edge(v2, v3) + if v1 in (v2, v3): + # The component has less than 3 nodes + return + while v1 != v4: + # Add edge if not already present on other side + if embedding.has_edge(v1, v3): + # Cannot triangulate at this position + v1, v2, v3 = v2, v3, v4 + else: + # Add edge for triangulation + embedding.add_half_edge(v1, v3, ccw=v2) + embedding.add_half_edge(v3, v1, cw=v2) + v1, v2, v3 = v1, v3, v4 + # Get next node + _, v4 = embedding.next_face_half_edge(v2, v3) + + +def triangulate_embedding(embedding, fully_triangulate=True): + """Triangulates the embedding. + + Traverses faces of the embedding and adds edges to a copy of the + embedding to triangulate it. + The method also ensures that the resulting graph is 2-connected by adding + edges if the same vertex is contained twice on a path around a face. + + Parameters + ---------- + embedding : nx.PlanarEmbedding + The input graph must contain at least 3 nodes. + + fully_triangulate : bool + If set to False the face with the most nodes is chooses as outer face. + This outer face does not get triangulated. + + Returns + ------- + (embedding, outer_face) : (nx.PlanarEmbedding, list) tuple + The element `embedding` is a new embedding containing all edges from + the input embedding and the additional edges to triangulate the graph. + The element `outer_face` is a list of nodes that lie on the outer face. + If the graph is fully triangulated these are three arbitrary connected + nodes. + + """ + if len(embedding.nodes) <= 1: + return embedding, list(embedding.nodes) + embedding = nx.PlanarEmbedding(embedding) + + # Get a list with a node for each connected component + component_nodes = [next(iter(x)) for x in nx.connected_components(embedding)] + + # 1. Make graph a single component (add edge between components) + for i in range(len(component_nodes) - 1): + v1 = component_nodes[i] + v2 = component_nodes[i + 1] + embedding.connect_components(v1, v2) + + # 2. Calculate faces, ensure 2-connectedness and determine outer face + outer_face = [] # A face with the most number of nodes + face_list = [] + edges_visited = set() # Used to keep track of already visited faces + for v in embedding.nodes(): + for w in embedding.neighbors_cw_order(v): + new_face = make_bi_connected(embedding, v, w, edges_visited) + if new_face: + # Found a new face + face_list.append(new_face) + if len(new_face) > len(outer_face): + # The face is a candidate to be the outer face + outer_face = new_face + + # 3. Triangulate (internal) faces + for face in face_list: + if face is not outer_face or fully_triangulate: + # Triangulate this face + triangulate_face(embedding, face[0], face[1]) + + if fully_triangulate: + v1 = outer_face[0] + v2 = outer_face[1] + v3 = embedding[v2][v1]["ccw"] + outer_face = [v1, v2, v3] + + return embedding, outer_face + + +def make_bi_connected(embedding, starting_node, outgoing_node, edges_counted): + """Triangulate a face and make it 2-connected + + This method also adds all edges on the face to `edges_counted`. + + Parameters + ---------- + embedding: nx.PlanarEmbedding + The embedding that defines the faces + starting_node : node + A node on the face + outgoing_node : node + A node such that the half edge (starting_node, outgoing_node) belongs + to the face + edges_counted: set + Set of all half-edges that belong to a face that have been visited + + Returns + ------- + face_nodes: list + A list of all nodes at the border of this face + """ + + # Check if the face has already been calculated + if (starting_node, outgoing_node) in edges_counted: + # This face was already counted + return [] + edges_counted.add((starting_node, outgoing_node)) + + # Add all edges to edges_counted which have this face to their left + v1 = starting_node + v2 = outgoing_node + face_list = [starting_node] # List of nodes around the face + face_set = set(face_list) # Set for faster queries + _, v3 = embedding.next_face_half_edge(v1, v2) + + # Move the nodes v1, v2, v3 around the face: + while v2 != starting_node or v3 != outgoing_node: + if v1 == v2: + raise nx.NetworkXException("Invalid half-edge") + # cycle is not completed yet + if v2 in face_set: + # v2 encountered twice: Add edge to ensure 2-connectedness + embedding.add_half_edge(v1, v3, ccw=v2) + embedding.add_half_edge(v3, v1, cw=v2) + edges_counted.add((v2, v3)) + edges_counted.add((v3, v1)) + v2 = v1 + else: + face_set.add(v2) + face_list.append(v2) + + # set next edge + v1 = v2 + v2, v3 = embedding.next_face_half_edge(v2, v3) + + # remember that this edge has been counted + edges_counted.add((v1, v2)) + + return face_list diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/polynomials.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/polynomials.py new file mode 100644 index 0000000000000000000000000000000000000000..7ebc7554a7654c8961c9d8a8024d17210ccf44ca --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/polynomials.py @@ -0,0 +1,306 @@ +"""Provides algorithms supporting the computation of graph polynomials. + +Graph polynomials are polynomial-valued graph invariants that encode a wide +variety of structural information. Examples include the Tutte polynomial, +chromatic polynomial, characteristic polynomial, and matching polynomial. An +extensive treatment is provided in [1]_. + +For a simple example, the `~sympy.matrices.matrices.MatrixDeterminant.charpoly` +method can be used to compute the characteristic polynomial from the adjacency +matrix of a graph. Consider the complete graph ``K_4``: + +>>> import sympy +>>> x = sympy.Symbol("x") +>>> G = nx.complete_graph(4) +>>> A = nx.to_numpy_array(G, dtype=int) +>>> M = sympy.SparseMatrix(A) +>>> M.charpoly(x).as_expr() +x**4 - 6*x**2 - 8*x - 3 + + +.. [1] Y. Shi, M. Dehmer, X. Li, I. Gutman, + "Graph Polynomials" +""" + +from collections import deque + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["tutte_polynomial", "chromatic_polynomial"] + + +@not_implemented_for("directed") +@nx._dispatchable +def tutte_polynomial(G): + r"""Returns the Tutte polynomial of `G` + + This function computes the Tutte polynomial via an iterative version of + the deletion-contraction algorithm. + + The Tutte polynomial `T_G(x, y)` is a fundamental graph polynomial invariant in + two variables. It encodes a wide array of information related to the + edge-connectivity of a graph; "Many problems about graphs can be reduced to + problems of finding and evaluating the Tutte polynomial at certain values" [1]_. + In fact, every deletion-contraction-expressible feature of a graph is a + specialization of the Tutte polynomial [2]_ (see Notes for examples). + + There are several equivalent definitions; here are three: + + Def 1 (rank-nullity expansion): For `G` an undirected graph, `n(G)` the + number of vertices of `G`, `E` the edge set of `G`, `V` the vertex set of + `G`, and `c(A)` the number of connected components of the graph with vertex + set `V` and edge set `A` [3]_: + + .. math:: + + T_G(x, y) = \sum_{A \in E} (x-1)^{c(A) - c(E)} (y-1)^{c(A) + |A| - n(G)} + + Def 2 (spanning tree expansion): Let `G` be an undirected graph, `T` a spanning + tree of `G`, and `E` the edge set of `G`. Let `E` have an arbitrary strict + linear order `L`. Let `B_e` be the unique minimal nonempty edge cut of + $E \setminus T \cup {e}$. An edge `e` is internally active with respect to + `T` and `L` if `e` is the least edge in `B_e` according to the linear order + `L`. The internal activity of `T` (denoted `i(T)`) is the number of edges + in $E \setminus T$ that are internally active with respect to `T` and `L`. + Let `P_e` be the unique path in $T \cup {e}$ whose source and target vertex + are the same. An edge `e` is externally active with respect to `T` and `L` + if `e` is the least edge in `P_e` according to the linear order `L`. The + external activity of `T` (denoted `e(T)`) is the number of edges in + $E \setminus T$ that are externally active with respect to `T` and `L`. + Then [4]_ [5]_: + + .. math:: + + T_G(x, y) = \sum_{T \text{ a spanning tree of } G} x^{i(T)} y^{e(T)} + + Def 3 (deletion-contraction recurrence): For `G` an undirected graph, `G-e` + the graph obtained from `G` by deleting edge `e`, `G/e` the graph obtained + from `G` by contracting edge `e`, `k(G)` the number of cut-edges of `G`, + and `l(G)` the number of self-loops of `G`: + + .. math:: + T_G(x, y) = \begin{cases} + x^{k(G)} y^{l(G)}, & \text{if all edges are cut-edges or self-loops} \\ + T_{G-e}(x, y) + T_{G/e}(x, y), & \text{otherwise, for an arbitrary edge $e$ not a cut-edge or loop} + \end{cases} + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + instance of `sympy.core.add.Add` + A Sympy expression representing the Tutte polynomial for `G`. + + Examples + -------- + >>> C = nx.cycle_graph(5) + >>> nx.tutte_polynomial(C) + x**4 + x**3 + x**2 + x + y + + >>> D = nx.diamond_graph() + >>> nx.tutte_polynomial(D) + x**3 + 2*x**2 + 2*x*y + x + y**2 + y + + Notes + ----- + Some specializations of the Tutte polynomial: + + - `T_G(1, 1)` counts the number of spanning trees of `G` + - `T_G(1, 2)` counts the number of connected spanning subgraphs of `G` + - `T_G(2, 1)` counts the number of spanning forests in `G` + - `T_G(0, 2)` counts the number of strong orientations of `G` + - `T_G(2, 0)` counts the number of acyclic orientations of `G` + + Edge contraction is defined and deletion-contraction is introduced in [6]_. + Combinatorial meaning of the coefficients is introduced in [7]_. + Universality, properties, and applications are discussed in [8]_. + + Practically, up-front computation of the Tutte polynomial may be useful when + users wish to repeatedly calculate edge-connectivity-related information + about one or more graphs. + + References + ---------- + .. [1] M. Brandt, + "The Tutte Polynomial." + Talking About Combinatorial Objects Seminar, 2015 + https://math.berkeley.edu/~brandtm/talks/tutte.pdf + .. [2] A. Björklund, T. Husfeldt, P. Kaski, M. Koivisto, + "Computing the Tutte polynomial in vertex-exponential time" + 49th Annual IEEE Symposium on Foundations of Computer Science, 2008 + https://ieeexplore.ieee.org/abstract/document/4691000 + .. [3] Y. Shi, M. Dehmer, X. Li, I. Gutman, + "Graph Polynomials," p. 14 + .. [4] Y. Shi, M. Dehmer, X. Li, I. Gutman, + "Graph Polynomials," p. 46 + .. [5] A. Nešetril, J. Goodall, + "Graph invariants, homomorphisms, and the Tutte polynomial" + https://iuuk.mff.cuni.cz/~andrew/Tutte.pdf + .. [6] D. B. West, + "Introduction to Graph Theory," p. 84 + .. [7] G. Coutinho, + "A brief introduction to the Tutte polynomial" + Structural Analysis of Complex Networks, 2011 + https://homepages.dcc.ufmg.br/~gabriel/seminars/coutinho_tuttepolynomial_seminar.pdf + .. [8] J. A. Ellis-Monaghan, C. Merino, + "Graph polynomials and their applications I: The Tutte polynomial" + Structural Analysis of Complex Networks, 2011 + https://arxiv.org/pdf/0803.3079.pdf + """ + import sympy + + x = sympy.Symbol("x") + y = sympy.Symbol("y") + stack = deque() + stack.append(nx.MultiGraph(G)) + + polynomial = 0 + while stack: + G = stack.pop() + bridges = set(nx.bridges(G)) + + e = None + for i in G.edges: + if (i[0], i[1]) not in bridges and i[0] != i[1]: + e = i + break + if not e: + loops = list(nx.selfloop_edges(G, keys=True)) + polynomial += x ** len(bridges) * y ** len(loops) + else: + # deletion-contraction + C = nx.contracted_edge(G, e, self_loops=True) + C.remove_edge(e[0], e[0]) + G.remove_edge(*e) + stack.append(G) + stack.append(C) + return sympy.simplify(polynomial) + + +@not_implemented_for("directed") +@nx._dispatchable +def chromatic_polynomial(G): + r"""Returns the chromatic polynomial of `G` + + This function computes the chromatic polynomial via an iterative version of + the deletion-contraction algorithm. + + The chromatic polynomial `X_G(x)` is a fundamental graph polynomial + invariant in one variable. Evaluating `X_G(k)` for an natural number `k` + enumerates the proper k-colorings of `G`. + + There are several equivalent definitions; here are three: + + Def 1 (explicit formula): + For `G` an undirected graph, `c(G)` the number of connected components of + `G`, `E` the edge set of `G`, and `G(S)` the spanning subgraph of `G` with + edge set `S` [1]_: + + .. math:: + + X_G(x) = \sum_{S \subseteq E} (-1)^{|S|} x^{c(G(S))} + + + Def 2 (interpolating polynomial): + For `G` an undirected graph, `n(G)` the number of vertices of `G`, `k_0 = 0`, + and `k_i` the number of distinct ways to color the vertices of `G` with `i` + unique colors (for `i` a natural number at most `n(G)`), `X_G(x)` is the + unique Lagrange interpolating polynomial of degree `n(G)` through the points + `(0, k_0), (1, k_1), \dots, (n(G), k_{n(G)})` [2]_. + + + Def 3 (chromatic recurrence): + For `G` an undirected graph, `G-e` the graph obtained from `G` by deleting + edge `e`, `G/e` the graph obtained from `G` by contracting edge `e`, `n(G)` + the number of vertices of `G`, and `e(G)` the number of edges of `G` [3]_: + + .. math:: + X_G(x) = \begin{cases} + x^{n(G)}, & \text{if $e(G)=0$} \\ + X_{G-e}(x) - X_{G/e}(x), & \text{otherwise, for an arbitrary edge $e$} + \end{cases} + + This formulation is also known as the Fundamental Reduction Theorem [4]_. + + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + instance of `sympy.core.add.Add` + A Sympy expression representing the chromatic polynomial for `G`. + + Examples + -------- + >>> C = nx.cycle_graph(5) + >>> nx.chromatic_polynomial(C) + x**5 - 5*x**4 + 10*x**3 - 10*x**2 + 4*x + + >>> G = nx.complete_graph(4) + >>> nx.chromatic_polynomial(G) + x**4 - 6*x**3 + 11*x**2 - 6*x + + Notes + ----- + Interpretation of the coefficients is discussed in [5]_. Several special + cases are listed in [2]_. + + The chromatic polynomial is a specialization of the Tutte polynomial; in + particular, ``X_G(x) = T_G(x, 0)`` [6]_. + + The chromatic polynomial may take negative arguments, though evaluations + may not have chromatic interpretations. For instance, ``X_G(-1)`` enumerates + the acyclic orientations of `G` [7]_. + + References + ---------- + .. [1] D. B. West, + "Introduction to Graph Theory," p. 222 + .. [2] E. W. Weisstein + "Chromatic Polynomial" + MathWorld--A Wolfram Web Resource + https://mathworld.wolfram.com/ChromaticPolynomial.html + .. [3] D. B. West, + "Introduction to Graph Theory," p. 221 + .. [4] J. Zhang, J. Goodall, + "An Introduction to Chromatic Polynomials" + https://math.mit.edu/~apost/courses/18.204_2018/Julie_Zhang_paper.pdf + .. [5] R. C. Read, + "An Introduction to Chromatic Polynomials" + Journal of Combinatorial Theory, 1968 + https://math.berkeley.edu/~mrklug/ReadChromatic.pdf + .. [6] W. T. Tutte, + "Graph-polynomials" + Advances in Applied Mathematics, 2004 + https://www.sciencedirect.com/science/article/pii/S0196885803000411 + .. [7] R. P. Stanley, + "Acyclic orientations of graphs" + Discrete Mathematics, 2006 + https://math.mit.edu/~rstan/pubs/pubfiles/18.pdf + """ + import sympy + + x = sympy.Symbol("x") + stack = deque() + stack.append(nx.MultiGraph(G, contraction_idx=0)) + + polynomial = 0 + while stack: + G = stack.pop() + edges = list(G.edges) + if not edges: + polynomial += (-1) ** G.graph["contraction_idx"] * x ** len(G) + else: + e = edges[0] + C = nx.contracted_edge(G, e, self_loops=True) + C.graph["contraction_idx"] = G.graph["contraction_idx"] + 1 + C.remove_edge(e[0], e[0]) + G.remove_edge(*e) + stack.append(G) + stack.append(C) + return polynomial diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/reciprocity.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/reciprocity.py new file mode 100644 index 0000000000000000000000000000000000000000..5ea7ed2ce26ab973e07bcc6ec0d92aa4799d9a6a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/reciprocity.py @@ -0,0 +1,98 @@ +"""Algorithms to calculate reciprocity in a directed graph.""" + +import networkx as nx +from networkx import NetworkXError + +from ..utils import not_implemented_for + +__all__ = ["reciprocity", "overall_reciprocity"] + + +@not_implemented_for("undirected", "multigraph") +@nx._dispatchable +def reciprocity(G, nodes=None): + r"""Compute the reciprocity in a directed graph. + + The reciprocity of a directed graph is defined as the ratio + of the number of edges pointing in both directions to the total + number of edges in the graph. + Formally, $r = |{(u,v) \in G|(v,u) \in G}| / |{(u,v) \in G}|$. + + The reciprocity of a single node u is defined similarly, + it is the ratio of the number of edges in both directions to + the total number of edges attached to node u. + + Parameters + ---------- + G : graph + A networkx directed graph + nodes : container of nodes, optional (default=whole graph) + Compute reciprocity for nodes in this container. + + Returns + ------- + out : dictionary + Reciprocity keyed by node label. + + Notes + ----- + The reciprocity is not defined for isolated nodes. + In such cases this function will return None. + + """ + # If `nodes` is not specified, calculate the reciprocity of the graph. + if nodes is None: + return overall_reciprocity(G) + + # If `nodes` represents a single node in the graph, return only its + # reciprocity. + if nodes in G: + reciprocity = next(_reciprocity_iter(G, nodes))[1] + if reciprocity is None: + raise NetworkXError("Not defined for isolated nodes.") + else: + return reciprocity + + # Otherwise, `nodes` represents an iterable of nodes, so return a + # dictionary mapping node to its reciprocity. + return dict(_reciprocity_iter(G, nodes)) + + +def _reciprocity_iter(G, nodes): + """Return an iterator of (node, reciprocity).""" + n = G.nbunch_iter(nodes) + for node in n: + pred = set(G.predecessors(node)) + succ = set(G.successors(node)) + overlap = pred & succ + n_total = len(pred) + len(succ) + + # Reciprocity is not defined for isolated nodes. + # Return None. + if n_total == 0: + yield (node, None) + else: + reciprocity = 2 * len(overlap) / n_total + yield (node, reciprocity) + + +@not_implemented_for("undirected", "multigraph") +@nx._dispatchable +def overall_reciprocity(G): + """Compute the reciprocity for the whole graph. + + See the doc of reciprocity for the definition. + + Parameters + ---------- + G : graph + A networkx graph + + """ + n_all_edge = G.number_of_edges() + n_overlap_edge = (n_all_edge - G.to_undirected().number_of_edges()) * 2 + + if n_all_edge == 0: + raise NetworkXError("Not defined for empty graphs") + + return n_overlap_edge / n_all_edge diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/regular.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/regular.py new file mode 100644 index 0000000000000000000000000000000000000000..f483ef32c85e67a61a863b6390df3f693c757de1 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/regular.py @@ -0,0 +1,215 @@ +"""Functions for computing and verifying regular graphs.""" + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["is_regular", "is_k_regular", "k_factor"] + + +@nx._dispatchable +def is_regular(G): + """Determines whether the graph ``G`` is a regular graph. + + A regular graph is a graph where each vertex has the same degree. A + regular digraph is a graph where the indegree and outdegree of each + vertex are equal. + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + bool + Whether the given graph or digraph is regular. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (2, 3), (3, 4), (4, 1)]) + >>> nx.is_regular(G) + True + + """ + if len(G) == 0: + raise nx.NetworkXPointlessConcept("Graph has no nodes.") + n1 = nx.utils.arbitrary_element(G) + if not G.is_directed(): + d1 = G.degree(n1) + return all(d1 == d for _, d in G.degree) + else: + d_in = G.in_degree(n1) + in_regular = all(d_in == d for _, d in G.in_degree) + d_out = G.out_degree(n1) + out_regular = all(d_out == d for _, d in G.out_degree) + return in_regular and out_regular + + +@not_implemented_for("directed") +@nx._dispatchable +def is_k_regular(G, k): + """Determines whether the graph ``G`` is a k-regular graph. + + A k-regular graph is a graph where each vertex has degree k. + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + bool + Whether the given graph is k-regular. + + Examples + -------- + >>> G = nx.Graph([(1, 2), (2, 3), (3, 4), (4, 1)]) + >>> nx.is_k_regular(G, k=3) + False + + """ + return all(d == k for n, d in G.degree) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable(preserve_edge_attrs=True, returns_graph=True) +def k_factor(G, k, matching_weight="weight"): + """Compute a k-factor of G + + A k-factor of a graph is a spanning k-regular subgraph. + A spanning k-regular subgraph of G is a subgraph that contains + each vertex of G and a subset of the edges of G such that each + vertex has degree k. + + Parameters + ---------- + G : NetworkX graph + Undirected graph + + matching_weight: string, optional (default='weight') + Edge data key corresponding to the edge weight. + Used for finding the max-weighted perfect matching. + If key not found, uses 1 as weight. + + Returns + ------- + G2 : NetworkX graph + A k-factor of G + + Examples + -------- + >>> G = nx.Graph([(1, 2), (2, 3), (3, 4), (4, 1)]) + >>> G2 = nx.k_factor(G, k=1) + >>> G2.edges() + EdgeView([(1, 2), (3, 4)]) + + References + ---------- + .. [1] "An algorithm for computing simple k-factors.", + Meijer, Henk, Yurai Núñez-Rodríguez, and David Rappaport, + Information processing letters, 2009. + """ + + from networkx.algorithms.matching import is_perfect_matching, max_weight_matching + + class LargeKGadget: + def __init__(self, k, degree, node, g): + self.original = node + self.g = g + self.k = k + self.degree = degree + + self.outer_vertices = [(node, x) for x in range(degree)] + self.core_vertices = [(node, x + degree) for x in range(degree - k)] + + def replace_node(self): + adj_view = self.g[self.original] + neighbors = list(adj_view.keys()) + edge_attrs = list(adj_view.values()) + for outer, neighbor, edge_attrs in zip( + self.outer_vertices, neighbors, edge_attrs + ): + self.g.add_edge(outer, neighbor, **edge_attrs) + for core in self.core_vertices: + for outer in self.outer_vertices: + self.g.add_edge(core, outer) + self.g.remove_node(self.original) + + def restore_node(self): + self.g.add_node(self.original) + for outer in self.outer_vertices: + adj_view = self.g[outer] + for neighbor, edge_attrs in list(adj_view.items()): + if neighbor not in self.core_vertices: + self.g.add_edge(self.original, neighbor, **edge_attrs) + break + g.remove_nodes_from(self.outer_vertices) + g.remove_nodes_from(self.core_vertices) + + class SmallKGadget: + def __init__(self, k, degree, node, g): + self.original = node + self.k = k + self.degree = degree + self.g = g + + self.outer_vertices = [(node, x) for x in range(degree)] + self.inner_vertices = [(node, x + degree) for x in range(degree)] + self.core_vertices = [(node, x + 2 * degree) for x in range(k)] + + def replace_node(self): + adj_view = self.g[self.original] + for outer, inner, (neighbor, edge_attrs) in zip( + self.outer_vertices, self.inner_vertices, list(adj_view.items()) + ): + self.g.add_edge(outer, inner) + self.g.add_edge(outer, neighbor, **edge_attrs) + for core in self.core_vertices: + for inner in self.inner_vertices: + self.g.add_edge(core, inner) + self.g.remove_node(self.original) + + def restore_node(self): + self.g.add_node(self.original) + for outer in self.outer_vertices: + adj_view = self.g[outer] + for neighbor, edge_attrs in adj_view.items(): + if neighbor not in self.core_vertices: + self.g.add_edge(self.original, neighbor, **edge_attrs) + break + self.g.remove_nodes_from(self.outer_vertices) + self.g.remove_nodes_from(self.inner_vertices) + self.g.remove_nodes_from(self.core_vertices) + + # Step 1 + if any(d < k for _, d in G.degree): + raise nx.NetworkXUnfeasible("Graph contains a vertex with degree less than k") + g = G.copy() + + # Step 2 + gadgets = [] + for node, degree in list(g.degree): + if k < degree / 2.0: + gadget = SmallKGadget(k, degree, node, g) + else: + gadget = LargeKGadget(k, degree, node, g) + gadget.replace_node() + gadgets.append(gadget) + + # Step 3 + matching = max_weight_matching(g, maxcardinality=True, weight=matching_weight) + + # Step 4 + if not is_perfect_matching(g, matching): + raise nx.NetworkXUnfeasible( + "Cannot find k-factor because no perfect matching exists" + ) + + for edge in g.edges(): + if edge not in matching and (edge[1], edge[0]) not in matching: + g.remove_edge(edge[0], edge[1]) + + for gadget in gadgets: + gadget.restore_node() + + return g diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/richclub.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/richclub.py new file mode 100644 index 0000000000000000000000000000000000000000..445b27d142547e5cad04e00abc9ca33d45edbee6 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/richclub.py @@ -0,0 +1,138 @@ +"""Functions for computing rich-club coefficients.""" + +from itertools import accumulate + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["rich_club_coefficient"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable +def rich_club_coefficient(G, normalized=True, Q=100, seed=None): + r"""Returns the rich-club coefficient of the graph `G`. + + For each degree *k*, the *rich-club coefficient* is the ratio of the + number of actual to the number of potential edges for nodes with + degree greater than *k*: + + .. math:: + + \phi(k) = \frac{2 E_k}{N_k (N_k - 1)} + + where `N_k` is the number of nodes with degree larger than *k*, and + `E_k` is the number of edges among those nodes. + + Parameters + ---------- + G : NetworkX graph + Undirected graph with neither parallel edges nor self-loops. + normalized : bool (optional) + Normalize using randomized network as in [1]_ + Q : float (optional, default=100) + If `normalized` is True, perform `Q * m` double-edge + swaps, where `m` is the number of edges in `G`, to use as a + null-model for normalization. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + rc : dictionary + A dictionary, keyed by degree, with rich-club coefficient values. + + Raises + ------ + NetworkXError + If `G` has fewer than four nodes and ``normalized=True``. + A randomly sampled graph for normalization cannot be generated in this case. + + Examples + -------- + >>> G = nx.Graph([(0, 1), (0, 2), (1, 2), (1, 3), (1, 4), (4, 5)]) + >>> rc = nx.rich_club_coefficient(G, normalized=False, seed=42) + >>> rc[0] + 0.4 + + Notes + ----- + The rich club definition and algorithm are found in [1]_. This + algorithm ignores any edge weights and is not defined for directed + graphs or graphs with parallel edges or self loops. + + Normalization is done by computing the rich club coefficient for a randomly + sampled graph with the same degree distribution as `G` by + repeatedly swapping the endpoints of existing edges. For graphs with fewer than 4 + nodes, it is not possible to generate a random graph with a prescribed + degree distribution, as the degree distribution fully determines the graph + (hence making the coefficients trivially normalized to 1). + This function raises an exception in this case. + + Estimates for appropriate values of `Q` are found in [2]_. + + References + ---------- + .. [1] Julian J. McAuley, Luciano da Fontoura Costa, + and Tibério S. Caetano, + "The rich-club phenomenon across complex network hierarchies", + Applied Physics Letters Vol 91 Issue 8, August 2007. + https://arxiv.org/abs/physics/0701290 + .. [2] R. Milo, N. Kashtan, S. Itzkovitz, M. E. J. Newman, U. Alon, + "Uniform generation of random graphs with arbitrary degree + sequences", 2006. https://arxiv.org/abs/cond-mat/0312028 + """ + if nx.number_of_selfloops(G) > 0: + raise Exception( + "rich_club_coefficient is not implemented for graphs with self loops." + ) + rc = _compute_rc(G) + if normalized: + # make R a copy of G, randomize with Q*|E| double edge swaps + # and use rich_club coefficient of R to normalize + R = G.copy() + E = R.number_of_edges() + nx.double_edge_swap(R, Q * E, max_tries=Q * E * 10, seed=seed) + rcran = _compute_rc(R) + rc = {k: v / rcran[k] for k, v in rc.items()} + return rc + + +def _compute_rc(G): + """Returns the rich-club coefficient for each degree in the graph + `G`. + + `G` is an undirected graph without multiedges. + + Returns a dictionary mapping degree to rich-club coefficient for + that degree. + + """ + deghist = nx.degree_histogram(G) + total = sum(deghist) + # Compute the number of nodes with degree greater than `k`, for each + # degree `k` (omitting the last entry, which is zero). + nks = (total - cs for cs in accumulate(deghist) if total - cs > 1) + # Create a sorted list of pairs of edge endpoint degrees. + # + # The list is sorted in reverse order so that we can pop from the + # right side of the list later, instead of popping from the left + # side of the list, which would have a linear time cost. + edge_degrees = sorted((sorted(map(G.degree, e)) for e in G.edges()), reverse=True) + ek = G.number_of_edges() + if ek == 0: + return {} + + k1, k2 = edge_degrees.pop() + rc = {} + for d, nk in enumerate(nks): + while k1 <= d: + if len(edge_degrees) == 0: + ek = 0 + break + k1, k2 = edge_degrees.pop() + ek -= 1 + rc[d] = 2 * ek / (nk * (nk - 1)) + return rc diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/similarity.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/similarity.py new file mode 100644 index 0000000000000000000000000000000000000000..3c601a728dbf5bdf653e0f94b6dfc7e413f5148a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/similarity.py @@ -0,0 +1,1780 @@ +"""Functions measuring similarity using graph edit distance. + +The graph edit distance is the number of edge/node changes needed +to make two graphs isomorphic. + +The default algorithm/implementation is sub-optimal for some graphs. +The problem of finding the exact Graph Edit Distance (GED) is NP-hard +so it is often slow. If the simple interface `graph_edit_distance` +takes too long for your graph, try `optimize_graph_edit_distance` +and/or `optimize_edit_paths`. + +At the same time, I encourage capable people to investigate +alternative GED algorithms, in order to improve the choices available. +""" + +import math +import time +import warnings +from dataclasses import dataclass +from itertools import product + +import networkx as nx +from networkx.utils import np_random_state + +__all__ = [ + "graph_edit_distance", + "optimal_edit_paths", + "optimize_graph_edit_distance", + "optimize_edit_paths", + "simrank_similarity", + "panther_similarity", + "generate_random_paths", +] + + +def debug_print(*args, **kwargs): + print(*args, **kwargs) + + +@nx._dispatchable( + graphs={"G1": 0, "G2": 1}, preserve_edge_attrs=True, preserve_node_attrs=True +) +def graph_edit_distance( + G1, + G2, + node_match=None, + edge_match=None, + node_subst_cost=None, + node_del_cost=None, + node_ins_cost=None, + edge_subst_cost=None, + edge_del_cost=None, + edge_ins_cost=None, + roots=None, + upper_bound=None, + timeout=None, +): + """Returns GED (graph edit distance) between graphs G1 and G2. + + Graph edit distance is a graph similarity measure analogous to + Levenshtein distance for strings. It is defined as minimum cost + of edit path (sequence of node and edge edit operations) + transforming graph G1 to graph isomorphic to G2. + + Parameters + ---------- + G1, G2: graphs + The two graphs G1 and G2 must be of the same type. + + node_match : callable + A function that returns True if node n1 in G1 and n2 in G2 + should be considered equal during matching. + + The function will be called like + + node_match(G1.nodes[n1], G2.nodes[n2]). + + That is, the function will receive the node attribute + dictionaries for n1 and n2 as inputs. + + Ignored if node_subst_cost is specified. If neither + node_match nor node_subst_cost are specified then node + attributes are not considered. + + edge_match : callable + A function that returns True if the edge attribute dictionaries + for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should + be considered equal during matching. + + The function will be called like + + edge_match(G1[u1][v1], G2[u2][v2]). + + That is, the function will receive the edge attribute + dictionaries of the edges under consideration. + + Ignored if edge_subst_cost is specified. If neither + edge_match nor edge_subst_cost are specified then edge + attributes are not considered. + + node_subst_cost, node_del_cost, node_ins_cost : callable + Functions that return the costs of node substitution, node + deletion, and node insertion, respectively. + + The functions will be called like + + node_subst_cost(G1.nodes[n1], G2.nodes[n2]), + node_del_cost(G1.nodes[n1]), + node_ins_cost(G2.nodes[n2]). + + That is, the functions will receive the node attribute + dictionaries as inputs. The functions are expected to return + positive numeric values. + + Function node_subst_cost overrides node_match if specified. + If neither node_match nor node_subst_cost are specified then + default node substitution cost of 0 is used (node attributes + are not considered during matching). + + If node_del_cost is not specified then default node deletion + cost of 1 is used. If node_ins_cost is not specified then + default node insertion cost of 1 is used. + + edge_subst_cost, edge_del_cost, edge_ins_cost : callable + Functions that return the costs of edge substitution, edge + deletion, and edge insertion, respectively. + + The functions will be called like + + edge_subst_cost(G1[u1][v1], G2[u2][v2]), + edge_del_cost(G1[u1][v1]), + edge_ins_cost(G2[u2][v2]). + + That is, the functions will receive the edge attribute + dictionaries as inputs. The functions are expected to return + positive numeric values. + + Function edge_subst_cost overrides edge_match if specified. + If neither edge_match nor edge_subst_cost are specified then + default edge substitution cost of 0 is used (edge attributes + are not considered during matching). + + If edge_del_cost is not specified then default edge deletion + cost of 1 is used. If edge_ins_cost is not specified then + default edge insertion cost of 1 is used. + + roots : 2-tuple + Tuple where first element is a node in G1 and the second + is a node in G2. + These nodes are forced to be matched in the comparison to + allow comparison between rooted graphs. + + upper_bound : numeric + Maximum edit distance to consider. Return None if no edit + distance under or equal to upper_bound exists. + + timeout : numeric + Maximum number of seconds to execute. + After timeout is met, the current best GED is returned. + + Examples + -------- + >>> G1 = nx.cycle_graph(6) + >>> G2 = nx.wheel_graph(7) + >>> nx.graph_edit_distance(G1, G2) + 7.0 + + >>> G1 = nx.star_graph(5) + >>> G2 = nx.star_graph(5) + >>> nx.graph_edit_distance(G1, G2, roots=(0, 0)) + 0.0 + >>> nx.graph_edit_distance(G1, G2, roots=(1, 0)) + 8.0 + + See Also + -------- + optimal_edit_paths, optimize_graph_edit_distance, + + is_isomorphic: test for graph edit distance of 0 + + References + ---------- + .. [1] Zeina Abu-Aisheh, Romain Raveaux, Jean-Yves Ramel, Patrick + Martineau. An Exact Graph Edit Distance Algorithm for Solving + Pattern Recognition Problems. 4th International Conference on + Pattern Recognition Applications and Methods 2015, Jan 2015, + Lisbon, Portugal. 2015, + <10.5220/0005209202710278>. + https://hal.archives-ouvertes.fr/hal-01168816 + + """ + bestcost = None + for _, _, cost in optimize_edit_paths( + G1, + G2, + node_match, + edge_match, + node_subst_cost, + node_del_cost, + node_ins_cost, + edge_subst_cost, + edge_del_cost, + edge_ins_cost, + upper_bound, + True, + roots, + timeout, + ): + # assert bestcost is None or cost < bestcost + bestcost = cost + return bestcost + + +@nx._dispatchable(graphs={"G1": 0, "G2": 1}) +def optimal_edit_paths( + G1, + G2, + node_match=None, + edge_match=None, + node_subst_cost=None, + node_del_cost=None, + node_ins_cost=None, + edge_subst_cost=None, + edge_del_cost=None, + edge_ins_cost=None, + upper_bound=None, +): + """Returns all minimum-cost edit paths transforming G1 to G2. + + Graph edit path is a sequence of node and edge edit operations + transforming graph G1 to graph isomorphic to G2. Edit operations + include substitutions, deletions, and insertions. + + Parameters + ---------- + G1, G2: graphs + The two graphs G1 and G2 must be of the same type. + + node_match : callable + A function that returns True if node n1 in G1 and n2 in G2 + should be considered equal during matching. + + The function will be called like + + node_match(G1.nodes[n1], G2.nodes[n2]). + + That is, the function will receive the node attribute + dictionaries for n1 and n2 as inputs. + + Ignored if node_subst_cost is specified. If neither + node_match nor node_subst_cost are specified then node + attributes are not considered. + + edge_match : callable + A function that returns True if the edge attribute dictionaries + for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should + be considered equal during matching. + + The function will be called like + + edge_match(G1[u1][v1], G2[u2][v2]). + + That is, the function will receive the edge attribute + dictionaries of the edges under consideration. + + Ignored if edge_subst_cost is specified. If neither + edge_match nor edge_subst_cost are specified then edge + attributes are not considered. + + node_subst_cost, node_del_cost, node_ins_cost : callable + Functions that return the costs of node substitution, node + deletion, and node insertion, respectively. + + The functions will be called like + + node_subst_cost(G1.nodes[n1], G2.nodes[n2]), + node_del_cost(G1.nodes[n1]), + node_ins_cost(G2.nodes[n2]). + + That is, the functions will receive the node attribute + dictionaries as inputs. The functions are expected to return + positive numeric values. + + Function node_subst_cost overrides node_match if specified. + If neither node_match nor node_subst_cost are specified then + default node substitution cost of 0 is used (node attributes + are not considered during matching). + + If node_del_cost is not specified then default node deletion + cost of 1 is used. If node_ins_cost is not specified then + default node insertion cost of 1 is used. + + edge_subst_cost, edge_del_cost, edge_ins_cost : callable + Functions that return the costs of edge substitution, edge + deletion, and edge insertion, respectively. + + The functions will be called like + + edge_subst_cost(G1[u1][v1], G2[u2][v2]), + edge_del_cost(G1[u1][v1]), + edge_ins_cost(G2[u2][v2]). + + That is, the functions will receive the edge attribute + dictionaries as inputs. The functions are expected to return + positive numeric values. + + Function edge_subst_cost overrides edge_match if specified. + If neither edge_match nor edge_subst_cost are specified then + default edge substitution cost of 0 is used (edge attributes + are not considered during matching). + + If edge_del_cost is not specified then default edge deletion + cost of 1 is used. If edge_ins_cost is not specified then + default edge insertion cost of 1 is used. + + upper_bound : numeric + Maximum edit distance to consider. + + Returns + ------- + edit_paths : list of tuples (node_edit_path, edge_edit_path) + - node_edit_path : list of tuples ``(u, v)`` indicating node transformations + between `G1` and `G2`. ``u`` is `None` for insertion, ``v`` is `None` + for deletion. + - edge_edit_path : list of tuples ``((u1, v1), (u2, v2))`` indicating edge + transformations between `G1` and `G2`. ``(None, (u2,v2))`` for insertion + and ``((u1,v1), None)`` for deletion. + + cost : numeric + Optimal edit path cost (graph edit distance). When the cost + is zero, it indicates that `G1` and `G2` are isomorphic. + + Examples + -------- + >>> G1 = nx.cycle_graph(4) + >>> G2 = nx.wheel_graph(5) + >>> paths, cost = nx.optimal_edit_paths(G1, G2) + >>> len(paths) + 40 + >>> cost + 5.0 + + Notes + ----- + To transform `G1` into a graph isomorphic to `G2`, apply the node + and edge edits in the returned ``edit_paths``. + In the case of isomorphic graphs, the cost is zero, and the paths + represent different isomorphic mappings (isomorphisms). That is, the + edits involve renaming nodes and edges to match the structure of `G2`. + + See Also + -------- + graph_edit_distance, optimize_edit_paths + + References + ---------- + .. [1] Zeina Abu-Aisheh, Romain Raveaux, Jean-Yves Ramel, Patrick + Martineau. An Exact Graph Edit Distance Algorithm for Solving + Pattern Recognition Problems. 4th International Conference on + Pattern Recognition Applications and Methods 2015, Jan 2015, + Lisbon, Portugal. 2015, + <10.5220/0005209202710278>. + https://hal.archives-ouvertes.fr/hal-01168816 + + """ + paths = [] + bestcost = None + for vertex_path, edge_path, cost in optimize_edit_paths( + G1, + G2, + node_match, + edge_match, + node_subst_cost, + node_del_cost, + node_ins_cost, + edge_subst_cost, + edge_del_cost, + edge_ins_cost, + upper_bound, + False, + ): + # assert bestcost is None or cost <= bestcost + if bestcost is not None and cost < bestcost: + paths = [] + paths.append((vertex_path, edge_path)) + bestcost = cost + return paths, bestcost + + +@nx._dispatchable(graphs={"G1": 0, "G2": 1}) +def optimize_graph_edit_distance( + G1, + G2, + node_match=None, + edge_match=None, + node_subst_cost=None, + node_del_cost=None, + node_ins_cost=None, + edge_subst_cost=None, + edge_del_cost=None, + edge_ins_cost=None, + upper_bound=None, +): + """Returns consecutive approximations of GED (graph edit distance) + between graphs G1 and G2. + + Graph edit distance is a graph similarity measure analogous to + Levenshtein distance for strings. It is defined as minimum cost + of edit path (sequence of node and edge edit operations) + transforming graph G1 to graph isomorphic to G2. + + Parameters + ---------- + G1, G2: graphs + The two graphs G1 and G2 must be of the same type. + + node_match : callable + A function that returns True if node n1 in G1 and n2 in G2 + should be considered equal during matching. + + The function will be called like + + node_match(G1.nodes[n1], G2.nodes[n2]). + + That is, the function will receive the node attribute + dictionaries for n1 and n2 as inputs. + + Ignored if node_subst_cost is specified. If neither + node_match nor node_subst_cost are specified then node + attributes are not considered. + + edge_match : callable + A function that returns True if the edge attribute dictionaries + for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should + be considered equal during matching. + + The function will be called like + + edge_match(G1[u1][v1], G2[u2][v2]). + + That is, the function will receive the edge attribute + dictionaries of the edges under consideration. + + Ignored if edge_subst_cost is specified. If neither + edge_match nor edge_subst_cost are specified then edge + attributes are not considered. + + node_subst_cost, node_del_cost, node_ins_cost : callable + Functions that return the costs of node substitution, node + deletion, and node insertion, respectively. + + The functions will be called like + + node_subst_cost(G1.nodes[n1], G2.nodes[n2]), + node_del_cost(G1.nodes[n1]), + node_ins_cost(G2.nodes[n2]). + + That is, the functions will receive the node attribute + dictionaries as inputs. The functions are expected to return + positive numeric values. + + Function node_subst_cost overrides node_match if specified. + If neither node_match nor node_subst_cost are specified then + default node substitution cost of 0 is used (node attributes + are not considered during matching). + + If node_del_cost is not specified then default node deletion + cost of 1 is used. If node_ins_cost is not specified then + default node insertion cost of 1 is used. + + edge_subst_cost, edge_del_cost, edge_ins_cost : callable + Functions that return the costs of edge substitution, edge + deletion, and edge insertion, respectively. + + The functions will be called like + + edge_subst_cost(G1[u1][v1], G2[u2][v2]), + edge_del_cost(G1[u1][v1]), + edge_ins_cost(G2[u2][v2]). + + That is, the functions will receive the edge attribute + dictionaries as inputs. The functions are expected to return + positive numeric values. + + Function edge_subst_cost overrides edge_match if specified. + If neither edge_match nor edge_subst_cost are specified then + default edge substitution cost of 0 is used (edge attributes + are not considered during matching). + + If edge_del_cost is not specified then default edge deletion + cost of 1 is used. If edge_ins_cost is not specified then + default edge insertion cost of 1 is used. + + upper_bound : numeric + Maximum edit distance to consider. + + Returns + ------- + Generator of consecutive approximations of graph edit distance. + + Examples + -------- + >>> G1 = nx.cycle_graph(6) + >>> G2 = nx.wheel_graph(7) + >>> for v in nx.optimize_graph_edit_distance(G1, G2): + ... minv = v + >>> minv + 7.0 + + See Also + -------- + graph_edit_distance, optimize_edit_paths + + References + ---------- + .. [1] Zeina Abu-Aisheh, Romain Raveaux, Jean-Yves Ramel, Patrick + Martineau. An Exact Graph Edit Distance Algorithm for Solving + Pattern Recognition Problems. 4th International Conference on + Pattern Recognition Applications and Methods 2015, Jan 2015, + Lisbon, Portugal. 2015, + <10.5220/0005209202710278>. + https://hal.archives-ouvertes.fr/hal-01168816 + """ + for _, _, cost in optimize_edit_paths( + G1, + G2, + node_match, + edge_match, + node_subst_cost, + node_del_cost, + node_ins_cost, + edge_subst_cost, + edge_del_cost, + edge_ins_cost, + upper_bound, + True, + ): + yield cost + + +@nx._dispatchable( + graphs={"G1": 0, "G2": 1}, preserve_edge_attrs=True, preserve_node_attrs=True +) +def optimize_edit_paths( + G1, + G2, + node_match=None, + edge_match=None, + node_subst_cost=None, + node_del_cost=None, + node_ins_cost=None, + edge_subst_cost=None, + edge_del_cost=None, + edge_ins_cost=None, + upper_bound=None, + strictly_decreasing=True, + roots=None, + timeout=None, +): + """GED (graph edit distance) calculation: advanced interface. + + Graph edit path is a sequence of node and edge edit operations + transforming graph G1 to graph isomorphic to G2. Edit operations + include substitutions, deletions, and insertions. + + Graph edit distance is defined as minimum cost of edit path. + + Parameters + ---------- + G1, G2: graphs + The two graphs G1 and G2 must be of the same type. + + node_match : callable + A function that returns True if node n1 in G1 and n2 in G2 + should be considered equal during matching. + + The function will be called like + + node_match(G1.nodes[n1], G2.nodes[n2]). + + That is, the function will receive the node attribute + dictionaries for n1 and n2 as inputs. + + Ignored if node_subst_cost is specified. If neither + node_match nor node_subst_cost are specified then node + attributes are not considered. + + edge_match : callable + A function that returns True if the edge attribute dictionaries + for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should + be considered equal during matching. + + The function will be called like + + edge_match(G1[u1][v1], G2[u2][v2]). + + That is, the function will receive the edge attribute + dictionaries of the edges under consideration. + + Ignored if edge_subst_cost is specified. If neither + edge_match nor edge_subst_cost are specified then edge + attributes are not considered. + + node_subst_cost, node_del_cost, node_ins_cost : callable + Functions that return the costs of node substitution, node + deletion, and node insertion, respectively. + + The functions will be called like + + node_subst_cost(G1.nodes[n1], G2.nodes[n2]), + node_del_cost(G1.nodes[n1]), + node_ins_cost(G2.nodes[n2]). + + That is, the functions will receive the node attribute + dictionaries as inputs. The functions are expected to return + positive numeric values. + + Function node_subst_cost overrides node_match if specified. + If neither node_match nor node_subst_cost are specified then + default node substitution cost of 0 is used (node attributes + are not considered during matching). + + If node_del_cost is not specified then default node deletion + cost of 1 is used. If node_ins_cost is not specified then + default node insertion cost of 1 is used. + + edge_subst_cost, edge_del_cost, edge_ins_cost : callable + Functions that return the costs of edge substitution, edge + deletion, and edge insertion, respectively. + + The functions will be called like + + edge_subst_cost(G1[u1][v1], G2[u2][v2]), + edge_del_cost(G1[u1][v1]), + edge_ins_cost(G2[u2][v2]). + + That is, the functions will receive the edge attribute + dictionaries as inputs. The functions are expected to return + positive numeric values. + + Function edge_subst_cost overrides edge_match if specified. + If neither edge_match nor edge_subst_cost are specified then + default edge substitution cost of 0 is used (edge attributes + are not considered during matching). + + If edge_del_cost is not specified then default edge deletion + cost of 1 is used. If edge_ins_cost is not specified then + default edge insertion cost of 1 is used. + + upper_bound : numeric + Maximum edit distance to consider. + + strictly_decreasing : bool + If True, return consecutive approximations of strictly + decreasing cost. Otherwise, return all edit paths of cost + less than or equal to the previous minimum cost. + + roots : 2-tuple + Tuple where first element is a node in G1 and the second + is a node in G2. + These nodes are forced to be matched in the comparison to + allow comparison between rooted graphs. + + timeout : numeric + Maximum number of seconds to execute. + After timeout is met, the current best GED is returned. + + Returns + ------- + Generator of tuples (node_edit_path, edge_edit_path, cost) + node_edit_path : list of tuples (u, v) + edge_edit_path : list of tuples ((u1, v1), (u2, v2)) + cost : numeric + + See Also + -------- + graph_edit_distance, optimize_graph_edit_distance, optimal_edit_paths + + References + ---------- + .. [1] Zeina Abu-Aisheh, Romain Raveaux, Jean-Yves Ramel, Patrick + Martineau. An Exact Graph Edit Distance Algorithm for Solving + Pattern Recognition Problems. 4th International Conference on + Pattern Recognition Applications and Methods 2015, Jan 2015, + Lisbon, Portugal. 2015, + <10.5220/0005209202710278>. + https://hal.archives-ouvertes.fr/hal-01168816 + + """ + # TODO: support DiGraph + + import numpy as np + import scipy as sp + + @dataclass + class CostMatrix: + C: ... + lsa_row_ind: ... + lsa_col_ind: ... + ls: ... + + def make_CostMatrix(C, m, n): + # assert(C.shape == (m + n, m + n)) + lsa_row_ind, lsa_col_ind = sp.optimize.linear_sum_assignment(C) + + # Fixup dummy assignments: + # each substitution i<->j should have dummy assignment m+j<->n+i + # NOTE: fast reduce of Cv relies on it + # Create masks for substitution and dummy indices + is_subst = (lsa_row_ind < m) & (lsa_col_ind < n) + is_dummy = (lsa_row_ind >= m) & (lsa_col_ind >= n) + + # Map dummy assignments to the correct indices + lsa_row_ind[is_dummy] = lsa_col_ind[is_subst] + m + lsa_col_ind[is_dummy] = lsa_row_ind[is_subst] + n + + return CostMatrix( + C, lsa_row_ind, lsa_col_ind, C[lsa_row_ind, lsa_col_ind].sum() + ) + + def extract_C(C, i, j, m, n): + # assert(C.shape == (m + n, m + n)) + row_ind = [k in i or k - m in j for k in range(m + n)] + col_ind = [k in j or k - n in i for k in range(m + n)] + return C[row_ind, :][:, col_ind] + + def reduce_C(C, i, j, m, n): + # assert(C.shape == (m + n, m + n)) + row_ind = [k not in i and k - m not in j for k in range(m + n)] + col_ind = [k not in j and k - n not in i for k in range(m + n)] + return C[row_ind, :][:, col_ind] + + def reduce_ind(ind, i): + # assert set(ind) == set(range(len(ind))) + rind = ind[[k not in i for k in ind]] + for k in set(i): + rind[rind >= k] -= 1 + return rind + + def match_edges(u, v, pending_g, pending_h, Ce, matched_uv=None): + """ + Parameters: + u, v: matched vertices, u=None or v=None for + deletion/insertion + pending_g, pending_h: lists of edges not yet mapped + Ce: CostMatrix of pending edge mappings + matched_uv: partial vertex edit path + list of tuples (u, v) of previously matched vertex + mappings u<->v, u=None or v=None for + deletion/insertion + + Returns: + list of (i, j): indices of edge mappings g<->h + localCe: local CostMatrix of edge mappings + (basically submatrix of Ce at cross of rows i, cols j) + """ + M = len(pending_g) + N = len(pending_h) + # assert Ce.C.shape == (M + N, M + N) + + # only attempt to match edges after one node match has been made + # this will stop self-edges on the first node being automatically deleted + # even when a substitution is the better option + if matched_uv is None or len(matched_uv) == 0: + g_ind = [] + h_ind = [] + else: + g_ind = [ + i + for i in range(M) + if pending_g[i][:2] == (u, u) + or any( + pending_g[i][:2] in ((p, u), (u, p), (p, p)) for p, q in matched_uv + ) + ] + h_ind = [ + j + for j in range(N) + if pending_h[j][:2] == (v, v) + or any( + pending_h[j][:2] in ((q, v), (v, q), (q, q)) for p, q in matched_uv + ) + ] + + m = len(g_ind) + n = len(h_ind) + + if m or n: + C = extract_C(Ce.C, g_ind, h_ind, M, N) + # assert C.shape == (m + n, m + n) + + # Forbid structurally invalid matches + # NOTE: inf remembered from Ce construction + for k, i in enumerate(g_ind): + g = pending_g[i][:2] + for l, j in enumerate(h_ind): + h = pending_h[j][:2] + if nx.is_directed(G1) or nx.is_directed(G2): + if any( + g == (p, u) and h == (q, v) or g == (u, p) and h == (v, q) + for p, q in matched_uv + ): + continue + else: + if any( + g in ((p, u), (u, p)) and h in ((q, v), (v, q)) + for p, q in matched_uv + ): + continue + if g == (u, u) or any(g == (p, p) for p, q in matched_uv): + continue + if h == (v, v) or any(h == (q, q) for p, q in matched_uv): + continue + C[k, l] = inf + + localCe = make_CostMatrix(C, m, n) + ij = [ + ( + g_ind[k] if k < m else M + h_ind[l], + h_ind[l] if l < n else N + g_ind[k], + ) + for k, l in zip(localCe.lsa_row_ind, localCe.lsa_col_ind) + if k < m or l < n + ] + + else: + ij = [] + localCe = CostMatrix(np.empty((0, 0)), [], [], 0) + + return ij, localCe + + def reduce_Ce(Ce, ij, m, n): + if len(ij): + i, j = zip(*ij) + m_i = m - sum(1 for t in i if t < m) + n_j = n - sum(1 for t in j if t < n) + return make_CostMatrix(reduce_C(Ce.C, i, j, m, n), m_i, n_j) + return Ce + + def get_edit_ops( + matched_uv, pending_u, pending_v, Cv, pending_g, pending_h, Ce, matched_cost + ): + """ + Parameters: + matched_uv: partial vertex edit path + list of tuples (u, v) of vertex mappings u<->v, + u=None or v=None for deletion/insertion + pending_u, pending_v: lists of vertices not yet mapped + Cv: CostMatrix of pending vertex mappings + pending_g, pending_h: lists of edges not yet mapped + Ce: CostMatrix of pending edge mappings + matched_cost: cost of partial edit path + + Returns: + sequence of + (i, j): indices of vertex mapping u<->v + Cv_ij: reduced CostMatrix of pending vertex mappings + (basically Cv with row i, col j removed) + list of (x, y): indices of edge mappings g<->h + Ce_xy: reduced CostMatrix of pending edge mappings + (basically Ce with rows x, cols y removed) + cost: total cost of edit operation + NOTE: most promising ops first + """ + m = len(pending_u) + n = len(pending_v) + # assert Cv.C.shape == (m + n, m + n) + + # 1) a vertex mapping from optimal linear sum assignment + i, j = min( + (k, l) for k, l in zip(Cv.lsa_row_ind, Cv.lsa_col_ind) if k < m or l < n + ) + xy, localCe = match_edges( + pending_u[i] if i < m else None, + pending_v[j] if j < n else None, + pending_g, + pending_h, + Ce, + matched_uv, + ) + Ce_xy = reduce_Ce(Ce, xy, len(pending_g), len(pending_h)) + # assert Ce.ls <= localCe.ls + Ce_xy.ls + if prune(matched_cost + Cv.ls + localCe.ls + Ce_xy.ls): + pass + else: + # get reduced Cv efficiently + Cv_ij = CostMatrix( + reduce_C(Cv.C, (i,), (j,), m, n), + reduce_ind(Cv.lsa_row_ind, (i, m + j)), + reduce_ind(Cv.lsa_col_ind, (j, n + i)), + Cv.ls - Cv.C[i, j], + ) + yield (i, j), Cv_ij, xy, Ce_xy, Cv.C[i, j] + localCe.ls + + # 2) other candidates, sorted by lower-bound cost estimate + other = [] + fixed_i, fixed_j = i, j + if m <= n: + candidates = ( + (t, fixed_j) + for t in range(m + n) + if t != fixed_i and (t < m or t == m + fixed_j) + ) + else: + candidates = ( + (fixed_i, t) + for t in range(m + n) + if t != fixed_j and (t < n or t == n + fixed_i) + ) + for i, j in candidates: + if prune(matched_cost + Cv.C[i, j] + Ce.ls): + continue + Cv_ij = make_CostMatrix( + reduce_C(Cv.C, (i,), (j,), m, n), + m - 1 if i < m else m, + n - 1 if j < n else n, + ) + # assert Cv.ls <= Cv.C[i, j] + Cv_ij.ls + if prune(matched_cost + Cv.C[i, j] + Cv_ij.ls + Ce.ls): + continue + xy, localCe = match_edges( + pending_u[i] if i < m else None, + pending_v[j] if j < n else None, + pending_g, + pending_h, + Ce, + matched_uv, + ) + if prune(matched_cost + Cv.C[i, j] + Cv_ij.ls + localCe.ls): + continue + Ce_xy = reduce_Ce(Ce, xy, len(pending_g), len(pending_h)) + # assert Ce.ls <= localCe.ls + Ce_xy.ls + if prune(matched_cost + Cv.C[i, j] + Cv_ij.ls + localCe.ls + Ce_xy.ls): + continue + other.append(((i, j), Cv_ij, xy, Ce_xy, Cv.C[i, j] + localCe.ls)) + + yield from sorted(other, key=lambda t: t[4] + t[1].ls + t[3].ls) + + def get_edit_paths( + matched_uv, + pending_u, + pending_v, + Cv, + matched_gh, + pending_g, + pending_h, + Ce, + matched_cost, + ): + """ + Parameters: + matched_uv: partial vertex edit path + list of tuples (u, v) of vertex mappings u<->v, + u=None or v=None for deletion/insertion + pending_u, pending_v: lists of vertices not yet mapped + Cv: CostMatrix of pending vertex mappings + matched_gh: partial edge edit path + list of tuples (g, h) of edge mappings g<->h, + g=None or h=None for deletion/insertion + pending_g, pending_h: lists of edges not yet mapped + Ce: CostMatrix of pending edge mappings + matched_cost: cost of partial edit path + + Returns: + sequence of (vertex_path, edge_path, cost) + vertex_path: complete vertex edit path + list of tuples (u, v) of vertex mappings u<->v, + u=None or v=None for deletion/insertion + edge_path: complete edge edit path + list of tuples (g, h) of edge mappings g<->h, + g=None or h=None for deletion/insertion + cost: total cost of edit path + NOTE: path costs are non-increasing + """ + # debug_print('matched-uv:', matched_uv) + # debug_print('matched-gh:', matched_gh) + # debug_print('matched-cost:', matched_cost) + # debug_print('pending-u:', pending_u) + # debug_print('pending-v:', pending_v) + # debug_print(Cv.C) + # assert list(sorted(G1.nodes)) == list(sorted(list(u for u, v in matched_uv if u is not None) + pending_u)) + # assert list(sorted(G2.nodes)) == list(sorted(list(v for u, v in matched_uv if v is not None) + pending_v)) + # debug_print('pending-g:', pending_g) + # debug_print('pending-h:', pending_h) + # debug_print(Ce.C) + # assert list(sorted(G1.edges)) == list(sorted(list(g for g, h in matched_gh if g is not None) + pending_g)) + # assert list(sorted(G2.edges)) == list(sorted(list(h for g, h in matched_gh if h is not None) + pending_h)) + # debug_print() + + if prune(matched_cost + Cv.ls + Ce.ls): + return + + if not max(len(pending_u), len(pending_v)): + # assert not len(pending_g) + # assert not len(pending_h) + # path completed! + # assert matched_cost <= maxcost_value + nonlocal maxcost_value + maxcost_value = min(maxcost_value, matched_cost) + yield matched_uv, matched_gh, matched_cost + + else: + edit_ops = get_edit_ops( + matched_uv, + pending_u, + pending_v, + Cv, + pending_g, + pending_h, + Ce, + matched_cost, + ) + for ij, Cv_ij, xy, Ce_xy, edit_cost in edit_ops: + i, j = ij + # assert Cv.C[i, j] + sum(Ce.C[t] for t in xy) == edit_cost + if prune(matched_cost + edit_cost + Cv_ij.ls + Ce_xy.ls): + continue + + # dive deeper + u = pending_u.pop(i) if i < len(pending_u) else None + v = pending_v.pop(j) if j < len(pending_v) else None + matched_uv.append((u, v)) + for x, y in xy: + len_g = len(pending_g) + len_h = len(pending_h) + matched_gh.append( + ( + pending_g[x] if x < len_g else None, + pending_h[y] if y < len_h else None, + ) + ) + sortedx = sorted(x for x, y in xy) + sortedy = sorted(y for x, y in xy) + G = [ + (pending_g.pop(x) if x < len(pending_g) else None) + for x in reversed(sortedx) + ] + H = [ + (pending_h.pop(y) if y < len(pending_h) else None) + for y in reversed(sortedy) + ] + + yield from get_edit_paths( + matched_uv, + pending_u, + pending_v, + Cv_ij, + matched_gh, + pending_g, + pending_h, + Ce_xy, + matched_cost + edit_cost, + ) + + # backtrack + if u is not None: + pending_u.insert(i, u) + if v is not None: + pending_v.insert(j, v) + matched_uv.pop() + for x, g in zip(sortedx, reversed(G)): + if g is not None: + pending_g.insert(x, g) + for y, h in zip(sortedy, reversed(H)): + if h is not None: + pending_h.insert(y, h) + for _ in xy: + matched_gh.pop() + + # Initialization + + pending_u = list(G1.nodes) + pending_v = list(G2.nodes) + + initial_cost = 0 + if roots: + root_u, root_v = roots + if root_u not in pending_u or root_v not in pending_v: + raise nx.NodeNotFound("Root node not in graph.") + + # remove roots from pending + pending_u.remove(root_u) + pending_v.remove(root_v) + + # cost matrix of vertex mappings + m = len(pending_u) + n = len(pending_v) + C = np.zeros((m + n, m + n)) + if node_subst_cost: + C[0:m, 0:n] = np.array( + [ + node_subst_cost(G1.nodes[u], G2.nodes[v]) + for u in pending_u + for v in pending_v + ] + ).reshape(m, n) + if roots: + initial_cost = node_subst_cost(G1.nodes[root_u], G2.nodes[root_v]) + elif node_match: + C[0:m, 0:n] = np.array( + [ + 1 - int(node_match(G1.nodes[u], G2.nodes[v])) + for u in pending_u + for v in pending_v + ] + ).reshape(m, n) + if roots: + initial_cost = 1 - node_match(G1.nodes[root_u], G2.nodes[root_v]) + else: + # all zeroes + pass + # assert not min(m, n) or C[0:m, 0:n].min() >= 0 + if node_del_cost: + del_costs = [node_del_cost(G1.nodes[u]) for u in pending_u] + else: + del_costs = [1] * len(pending_u) + # assert not m or min(del_costs) >= 0 + if node_ins_cost: + ins_costs = [node_ins_cost(G2.nodes[v]) for v in pending_v] + else: + ins_costs = [1] * len(pending_v) + # assert not n or min(ins_costs) >= 0 + inf = C[0:m, 0:n].sum() + sum(del_costs) + sum(ins_costs) + 1 + C[0:m, n : n + m] = np.array( + [del_costs[i] if i == j else inf for i in range(m) for j in range(m)] + ).reshape(m, m) + C[m : m + n, 0:n] = np.array( + [ins_costs[i] if i == j else inf for i in range(n) for j in range(n)] + ).reshape(n, n) + Cv = make_CostMatrix(C, m, n) + # debug_print(f"Cv: {m} x {n}") + # debug_print(Cv.C) + + pending_g = list(G1.edges) + pending_h = list(G2.edges) + + # cost matrix of edge mappings + m = len(pending_g) + n = len(pending_h) + C = np.zeros((m + n, m + n)) + if edge_subst_cost: + C[0:m, 0:n] = np.array( + [ + edge_subst_cost(G1.edges[g], G2.edges[h]) + for g in pending_g + for h in pending_h + ] + ).reshape(m, n) + elif edge_match: + C[0:m, 0:n] = np.array( + [ + 1 - int(edge_match(G1.edges[g], G2.edges[h])) + for g in pending_g + for h in pending_h + ] + ).reshape(m, n) + else: + # all zeroes + pass + # assert not min(m, n) or C[0:m, 0:n].min() >= 0 + if edge_del_cost: + del_costs = [edge_del_cost(G1.edges[g]) for g in pending_g] + else: + del_costs = [1] * len(pending_g) + # assert not m or min(del_costs) >= 0 + if edge_ins_cost: + ins_costs = [edge_ins_cost(G2.edges[h]) for h in pending_h] + else: + ins_costs = [1] * len(pending_h) + # assert not n or min(ins_costs) >= 0 + inf = C[0:m, 0:n].sum() + sum(del_costs) + sum(ins_costs) + 1 + C[0:m, n : n + m] = np.array( + [del_costs[i] if i == j else inf for i in range(m) for j in range(m)] + ).reshape(m, m) + C[m : m + n, 0:n] = np.array( + [ins_costs[i] if i == j else inf for i in range(n) for j in range(n)] + ).reshape(n, n) + Ce = make_CostMatrix(C, m, n) + # debug_print(f'Ce: {m} x {n}') + # debug_print(Ce.C) + # debug_print() + + maxcost_value = Cv.C.sum() + Ce.C.sum() + 1 + + if timeout is not None: + if timeout <= 0: + raise nx.NetworkXError("Timeout value must be greater than 0") + start = time.perf_counter() + + def prune(cost): + if timeout is not None: + if time.perf_counter() - start > timeout: + return True + if upper_bound is not None: + if cost > upper_bound: + return True + if cost > maxcost_value: + return True + if strictly_decreasing and cost >= maxcost_value: + return True + return False + + # Now go! + + done_uv = [] if roots is None else [roots] + + for vertex_path, edge_path, cost in get_edit_paths( + done_uv, pending_u, pending_v, Cv, [], pending_g, pending_h, Ce, initial_cost + ): + # assert sorted(G1.nodes) == sorted(u for u, v in vertex_path if u is not None) + # assert sorted(G2.nodes) == sorted(v for u, v in vertex_path if v is not None) + # assert sorted(G1.edges) == sorted(g for g, h in edge_path if g is not None) + # assert sorted(G2.edges) == sorted(h for g, h in edge_path if h is not None) + # print(vertex_path, edge_path, cost, file = sys.stderr) + # assert cost == maxcost_value + yield list(vertex_path), list(edge_path), float(cost) + + +@nx._dispatchable +def simrank_similarity( + G, + source=None, + target=None, + importance_factor=0.9, + max_iterations=1000, + tolerance=1e-4, +): + """Returns the SimRank similarity of nodes in the graph ``G``. + + SimRank is a similarity metric that says "two objects are considered + to be similar if they are referenced by similar objects." [1]_. + + The pseudo-code definition from the paper is:: + + def simrank(G, u, v): + in_neighbors_u = G.predecessors(u) + in_neighbors_v = G.predecessors(v) + scale = C / (len(in_neighbors_u) * len(in_neighbors_v)) + return scale * sum( + simrank(G, w, x) for w, x in product(in_neighbors_u, in_neighbors_v) + ) + + where ``G`` is the graph, ``u`` is the source, ``v`` is the target, + and ``C`` is a float decay or importance factor between 0 and 1. + + The SimRank algorithm for determining node similarity is defined in + [2]_. + + Parameters + ---------- + G : NetworkX graph + A NetworkX graph + + source : node + If this is specified, the returned dictionary maps each node + ``v`` in the graph to the similarity between ``source`` and + ``v``. + + target : node + If both ``source`` and ``target`` are specified, the similarity + value between ``source`` and ``target`` is returned. If + ``target`` is specified but ``source`` is not, this argument is + ignored. + + importance_factor : float + The relative importance of indirect neighbors with respect to + direct neighbors. + + max_iterations : integer + Maximum number of iterations. + + tolerance : float + Error tolerance used to check convergence. When an iteration of + the algorithm finds that no similarity value changes more than + this amount, the algorithm halts. + + Returns + ------- + similarity : dictionary or float + If ``source`` and ``target`` are both ``None``, this returns a + dictionary of dictionaries, where keys are node pairs and value + are similarity of the pair of nodes. + + If ``source`` is not ``None`` but ``target`` is, this returns a + dictionary mapping node to the similarity of ``source`` and that + node. + + If neither ``source`` nor ``target`` is ``None``, this returns + the similarity value for the given pair of nodes. + + Raises + ------ + ExceededMaxIterations + If the algorithm does not converge within ``max_iterations``. + + NodeNotFound + If either ``source`` or ``target`` is not in `G`. + + Examples + -------- + >>> G = nx.cycle_graph(2) + >>> nx.simrank_similarity(G) + {0: {0: 1.0, 1: 0.0}, 1: {0: 0.0, 1: 1.0}} + >>> nx.simrank_similarity(G, source=0) + {0: 1.0, 1: 0.0} + >>> nx.simrank_similarity(G, source=0, target=0) + 1.0 + + The result of this function can be converted to a numpy array + representing the SimRank matrix by using the node order of the + graph to determine which row and column represent each node. + Other ordering of nodes is also possible. + + >>> import numpy as np + >>> sim = nx.simrank_similarity(G) + >>> np.array([[sim[u][v] for v in G] for u in G]) + array([[1., 0.], + [0., 1.]]) + >>> sim_1d = nx.simrank_similarity(G, source=0) + >>> np.array([sim[0][v] for v in G]) + array([1., 0.]) + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/SimRank + .. [2] G. Jeh and J. Widom. + "SimRank: a measure of structural-context similarity", + In KDD'02: Proceedings of the Eighth ACM SIGKDD + International Conference on Knowledge Discovery and Data Mining, + pp. 538--543. ACM Press, 2002. + """ + import numpy as np + + nodelist = list(G) + if source is not None: + if source not in nodelist: + raise nx.NodeNotFound(f"Source node {source} not in G") + else: + s_indx = nodelist.index(source) + else: + s_indx = None + + if target is not None: + if target not in nodelist: + raise nx.NodeNotFound(f"Target node {target} not in G") + else: + t_indx = nodelist.index(target) + else: + t_indx = None + + x = _simrank_similarity_numpy( + G, s_indx, t_indx, importance_factor, max_iterations, tolerance + ) + + if isinstance(x, np.ndarray): + if x.ndim == 1: + return dict(zip(G, x.tolist())) + # else x.ndim == 2 + return {u: dict(zip(G, row)) for u, row in zip(G, x.tolist())} + return float(x) + + +def _simrank_similarity_python( + G, + source=None, + target=None, + importance_factor=0.9, + max_iterations=1000, + tolerance=1e-4, +): + """Returns the SimRank similarity of nodes in the graph ``G``. + + This pure Python version is provided for pedagogical purposes. + + Examples + -------- + >>> G = nx.cycle_graph(2) + >>> nx.similarity._simrank_similarity_python(G) + {0: {0: 1, 1: 0.0}, 1: {0: 0.0, 1: 1}} + >>> nx.similarity._simrank_similarity_python(G, source=0) + {0: 1, 1: 0.0} + >>> nx.similarity._simrank_similarity_python(G, source=0, target=0) + 1 + """ + # build up our similarity adjacency dictionary output + newsim = {u: {v: 1 if u == v else 0 for v in G} for u in G} + + # These functions compute the update to the similarity value of the nodes + # `u` and `v` with respect to the previous similarity values. + def avg_sim(s): + return sum(newsim[w][x] for (w, x) in s) / len(s) if s else 0.0 + + Gadj = G.pred if G.is_directed() else G.adj + + def sim(u, v): + return importance_factor * avg_sim(list(product(Gadj[u], Gadj[v]))) + + for its in range(max_iterations): + oldsim = newsim + newsim = {u: {v: sim(u, v) if u != v else 1 for v in G} for u in G} + is_close = all( + all( + abs(newsim[u][v] - old) <= tolerance * (1 + abs(old)) + for v, old in nbrs.items() + ) + for u, nbrs in oldsim.items() + ) + if is_close: + break + + if its + 1 == max_iterations: + raise nx.ExceededMaxIterations( + f"simrank did not converge after {max_iterations} iterations." + ) + + if source is not None and target is not None: + return newsim[source][target] + if source is not None: + return newsim[source] + return newsim + + +def _simrank_similarity_numpy( + G, + source=None, + target=None, + importance_factor=0.9, + max_iterations=1000, + tolerance=1e-4, +): + """Calculate SimRank of nodes in ``G`` using matrices with ``numpy``. + + The SimRank algorithm for determining node similarity is defined in + [1]_. + + Parameters + ---------- + G : NetworkX graph + A NetworkX graph + + source : node + If this is specified, the returned dictionary maps each node + ``v`` in the graph to the similarity between ``source`` and + ``v``. + + target : node + If both ``source`` and ``target`` are specified, the similarity + value between ``source`` and ``target`` is returned. If + ``target`` is specified but ``source`` is not, this argument is + ignored. + + importance_factor : float + The relative importance of indirect neighbors with respect to + direct neighbors. + + max_iterations : integer + Maximum number of iterations. + + tolerance : float + Error tolerance used to check convergence. When an iteration of + the algorithm finds that no similarity value changes more than + this amount, the algorithm halts. + + Returns + ------- + similarity : numpy array or float + If ``source`` and ``target`` are both ``None``, this returns a + 2D array containing SimRank scores of the nodes. + + If ``source`` is not ``None`` but ``target`` is, this returns an + 1D array containing SimRank scores of ``source`` and that + node. + + If neither ``source`` nor ``target`` is ``None``, this returns + the similarity value for the given pair of nodes. + + Examples + -------- + >>> G = nx.cycle_graph(2) + >>> nx.similarity._simrank_similarity_numpy(G) + array([[1., 0.], + [0., 1.]]) + >>> nx.similarity._simrank_similarity_numpy(G, source=0) + array([1., 0.]) + >>> nx.similarity._simrank_similarity_numpy(G, source=0, target=0) + 1.0 + + References + ---------- + .. [1] G. Jeh and J. Widom. + "SimRank: a measure of structural-context similarity", + In KDD'02: Proceedings of the Eighth ACM SIGKDD + International Conference on Knowledge Discovery and Data Mining, + pp. 538--543. ACM Press, 2002. + """ + # This algorithm follows roughly + # + # S = max{C * (A.T * S * A), I} + # + # where C is the importance factor, A is the column normalized + # adjacency matrix, and I is the identity matrix. + import numpy as np + + adjacency_matrix = nx.to_numpy_array(G) + + # column-normalize the ``adjacency_matrix`` + s = np.array(adjacency_matrix.sum(axis=0)) + s[s == 0] = 1 + adjacency_matrix /= s # adjacency_matrix.sum(axis=0) + + newsim = np.eye(len(G), dtype=np.float64) + for its in range(max_iterations): + prevsim = newsim.copy() + newsim = importance_factor * ((adjacency_matrix.T @ prevsim) @ adjacency_matrix) + np.fill_diagonal(newsim, 1.0) + + if np.allclose(prevsim, newsim, atol=tolerance): + break + + if its + 1 == max_iterations: + raise nx.ExceededMaxIterations( + f"simrank did not converge after {max_iterations} iterations." + ) + + if source is not None and target is not None: + return float(newsim[source, target]) + if source is not None: + return newsim[source] + return newsim + + +@nx._dispatchable(edge_attrs="weight") +def panther_similarity( + G, source, k=5, path_length=5, c=0.5, delta=0.1, eps=None, weight="weight" +): + r"""Returns the Panther similarity of nodes in the graph `G` to node ``v``. + + Panther is a similarity metric that says "two objects are considered + to be similar if they frequently appear on the same paths." [1]_. + + Parameters + ---------- + G : NetworkX graph + A NetworkX graph + source : node + Source node for which to find the top `k` similar other nodes + k : int (default = 5) + The number of most similar nodes to return. + path_length : int (default = 5) + How long the randomly generated paths should be (``T`` in [1]_) + c : float (default = 0.5) + A universal positive constant used to scale the number + of sample random paths to generate. + delta : float (default = 0.1) + The probability that the similarity $S$ is not an epsilon-approximation to (R, phi), + where $R$ is the number of random paths and $\phi$ is the probability + that an element sampled from a set $A \subseteq D$, where $D$ is the domain. + eps : float or None (default = None) + The error bound. Per [1]_, a good value is ``sqrt(1/|E|)``. Therefore, + if no value is provided, the recommended computed value will be used. + weight : string or None, optional (default="weight") + The name of an edge attribute that holds the numerical value + used as a weight. If None then each edge has weight 1. + + Returns + ------- + similarity : dictionary + Dictionary of nodes to similarity scores (as floats). Note: + the self-similarity (i.e., ``v``) will not be included in + the returned dictionary. So, for ``k = 5``, a dictionary of + top 4 nodes and their similarity scores will be returned. + + Raises + ------ + NetworkXUnfeasible + If `source` is an isolated node. + + NodeNotFound + If `source` is not in `G`. + + Notes + ----- + The isolated nodes in `G` are ignored. + + Examples + -------- + >>> G = nx.star_graph(10) + >>> sim = nx.panther_similarity(G, 0) + + References + ---------- + .. [1] Zhang, J., Tang, J., Ma, C., Tong, H., Jing, Y., & Li, J. + Panther: Fast top-k similarity search on large networks. + In Proceedings of the ACM SIGKDD International Conference + on Knowledge Discovery and Data Mining (Vol. 2015-August, pp. 1445–1454). + Association for Computing Machinery. https://doi.org/10.1145/2783258.2783267. + """ + import numpy as np + + if source not in G: + raise nx.NodeNotFound(f"Source node {source} not in G") + + isolates = set(nx.isolates(G)) + + if source in isolates: + raise nx.NetworkXUnfeasible( + f"Panther similarity is not defined for the isolated source node {source}." + ) + + G = G.subgraph([node for node in G.nodes if node not in isolates]).copy() + + num_nodes = G.number_of_nodes() + if num_nodes < k: + warnings.warn( + f"Number of nodes is {num_nodes}, but requested k is {k}. " + "Setting k to number of nodes." + ) + k = num_nodes + # According to [1], they empirically determined + # a good value for ``eps`` to be sqrt( 1 / |E| ) + if eps is None: + eps = np.sqrt(1.0 / G.number_of_edges()) + + inv_node_map = {name: index for index, name in enumerate(G.nodes)} + node_map = np.array(G) + + # Calculate the sample size ``R`` for how many paths + # to randomly generate + t_choose_2 = math.comb(path_length, 2) + sample_size = int((c / eps**2) * (np.log2(t_choose_2) + 1 + np.log(1 / delta))) + index_map = {} + _ = list( + generate_random_paths( + G, sample_size, path_length=path_length, index_map=index_map, weight=weight + ) + ) + S = np.zeros(num_nodes) + + inv_sample_size = 1 / sample_size + + source_paths = set(index_map[source]) + + # Calculate the path similarities + # between ``source`` (v) and ``node`` (v_j) + # using our inverted index mapping of + # vertices to paths + for node, paths in index_map.items(): + # Only consider paths where both + # ``node`` and ``source`` are present + common_paths = source_paths.intersection(paths) + S[inv_node_map[node]] = len(common_paths) * inv_sample_size + + # Retrieve top ``k`` similar + # Note: the below performed anywhere from 4-10x faster + # (depending on input sizes) vs the equivalent ``np.argsort(S)[::-1]`` + top_k_unsorted = np.argpartition(S, -k)[-k:] + top_k_sorted = top_k_unsorted[np.argsort(S[top_k_unsorted])][::-1] + + # Add back the similarity scores + top_k_with_val = dict( + zip(node_map[top_k_sorted].tolist(), S[top_k_sorted].tolist()) + ) + + # Remove the self-similarity + top_k_with_val.pop(source, None) + return top_k_with_val + + +@np_random_state(5) +@nx._dispatchable(edge_attrs="weight") +def generate_random_paths( + G, sample_size, path_length=5, index_map=None, weight="weight", seed=None +): + """Randomly generate `sample_size` paths of length `path_length`. + + Parameters + ---------- + G : NetworkX graph + A NetworkX graph + sample_size : integer + The number of paths to generate. This is ``R`` in [1]_. + path_length : integer (default = 5) + The maximum size of the path to randomly generate. + This is ``T`` in [1]_. According to the paper, ``T >= 5`` is + recommended. + index_map : dictionary, optional + If provided, this will be populated with the inverted + index of nodes mapped to the set of generated random path + indices within ``paths``. + weight : string or None, optional (default="weight") + The name of an edge attribute that holds the numerical value + used as a weight. If None then each edge has weight 1. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + paths : generator of lists + Generator of `sample_size` paths each with length `path_length`. + + Examples + -------- + Note that the return value is the list of paths: + + >>> G = nx.star_graph(3) + >>> random_path = nx.generate_random_paths(G, 2) + + By passing a dictionary into `index_map`, it will build an + inverted index mapping of nodes to the paths in which that node is present: + + >>> G = nx.star_graph(3) + >>> index_map = {} + >>> random_path = nx.generate_random_paths(G, 3, index_map=index_map) + >>> paths_containing_node_0 = [ + ... random_path[path_idx] for path_idx in index_map.get(0, []) + ... ] + + References + ---------- + .. [1] Zhang, J., Tang, J., Ma, C., Tong, H., Jing, Y., & Li, J. + Panther: Fast top-k similarity search on large networks. + In Proceedings of the ACM SIGKDD International Conference + on Knowledge Discovery and Data Mining (Vol. 2015-August, pp. 1445–1454). + Association for Computing Machinery. https://doi.org/10.1145/2783258.2783267. + """ + import numpy as np + + randint_fn = ( + seed.integers if isinstance(seed, np.random.Generator) else seed.randint + ) + + # Calculate transition probabilities between + # every pair of vertices according to Eq. (3) + adj_mat = nx.to_numpy_array(G, weight=weight) + inv_row_sums = np.reciprocal(adj_mat.sum(axis=1)).reshape(-1, 1) + transition_probabilities = adj_mat * inv_row_sums + + node_map = list(G) + num_nodes = G.number_of_nodes() + + for path_index in range(sample_size): + # Sample current vertex v = v_i uniformly at random + node_index = randint_fn(num_nodes) + node = node_map[node_index] + + # Add v into p_r and add p_r into the path set + # of v, i.e., P_v + path = [node] + + # Build the inverted index (P_v) of vertices to paths + if index_map is not None: + if node in index_map: + index_map[node].add(path_index) + else: + index_map[node] = {path_index} + + starting_index = node_index + for _ in range(path_length): + # Randomly sample a neighbor (v_j) according + # to transition probabilities from ``node`` (v) to its neighbors + nbr_index = seed.choice( + num_nodes, p=transition_probabilities[starting_index] + ) + + # Set current vertex (v = v_j) + starting_index = nbr_index + + # Add v into p_r + nbr_node = node_map[nbr_index] + path.append(nbr_node) + + # Add p_r into P_v + if index_map is not None: + if nbr_node in index_map: + index_map[nbr_node].add(path_index) + else: + index_map[nbr_node] = {path_index} + + yield path diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/simple_paths.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/simple_paths.py new file mode 100644 index 0000000000000000000000000000000000000000..3605522f04c45e14f407bb98b7968e703f73ca21 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/simple_paths.py @@ -0,0 +1,950 @@ +from heapq import heappop, heappush +from itertools import count + +import networkx as nx +from networkx.algorithms.shortest_paths.weighted import _weight_function +from networkx.utils import not_implemented_for, pairwise + +__all__ = [ + "all_simple_paths", + "is_simple_path", + "shortest_simple_paths", + "all_simple_edge_paths", +] + + +@nx._dispatchable +def is_simple_path(G, nodes): + """Returns True if and only if `nodes` form a simple path in `G`. + + A *simple path* in a graph is a nonempty sequence of nodes in which + no node appears more than once in the sequence, and each adjacent + pair of nodes in the sequence is adjacent in the graph. + + Parameters + ---------- + G : graph + A NetworkX graph. + nodes : list + A list of one or more nodes in the graph `G`. + + Returns + ------- + bool + Whether the given list of nodes represents a simple path in `G`. + + Notes + ----- + An empty list of nodes is not a path but a list of one node is a + path. Here's an explanation why. + + This function operates on *node paths*. One could also consider + *edge paths*. There is a bijection between node paths and edge + paths. + + The *length of a path* is the number of edges in the path, so a list + of nodes of length *n* corresponds to a path of length *n* - 1. + Thus the smallest edge path would be a list of zero edges, the empty + path. This corresponds to a list of one node. + + To convert between a node path and an edge path, you can use code + like the following:: + + >>> from networkx.utils import pairwise + >>> nodes = [0, 1, 2, 3] + >>> edges = list(pairwise(nodes)) + >>> edges + [(0, 1), (1, 2), (2, 3)] + >>> nodes = [edges[0][0]] + [v for u, v in edges] + >>> nodes + [0, 1, 2, 3] + + Examples + -------- + >>> G = nx.cycle_graph(4) + >>> nx.is_simple_path(G, [2, 3, 0]) + True + >>> nx.is_simple_path(G, [0, 2]) + False + + """ + # The empty list is not a valid path. Could also return + # NetworkXPointlessConcept here. + if len(nodes) == 0: + return False + + # If the list is a single node, just check that the node is actually + # in the graph. + if len(nodes) == 1: + return nodes[0] in G + + # check that all nodes in the list are in the graph, if at least one + # is not in the graph, then this is not a simple path + if not all(n in G for n in nodes): + return False + + # If the list contains repeated nodes, then it's not a simple path + if len(set(nodes)) != len(nodes): + return False + + # Test that each adjacent pair of nodes is adjacent. + return all(v in G[u] for u, v in pairwise(nodes)) + + +@nx._dispatchable +def all_simple_paths(G, source, target, cutoff=None): + """Generate all simple paths in the graph G from source to target. + + A simple path is a path with no repeated nodes. + + Parameters + ---------- + G : NetworkX graph + + source : node + Starting node for path + + target : nodes + Single node or iterable of nodes at which to end path + + cutoff : integer, optional + Depth to stop the search. Only paths of length <= cutoff are returned. + + Returns + ------- + path_generator: generator + A generator that produces lists of simple paths. If there are no paths + between the source and target within the given cutoff the generator + produces no output. If it is possible to traverse the same sequence of + nodes in multiple ways, namely through parallel edges, then it will be + returned multiple times (once for each viable edge combination). + + Examples + -------- + This iterator generates lists of nodes:: + + >>> G = nx.complete_graph(4) + >>> for path in nx.all_simple_paths(G, source=0, target=3): + ... print(path) + ... + [0, 1, 2, 3] + [0, 1, 3] + [0, 2, 1, 3] + [0, 2, 3] + [0, 3] + + You can generate only those paths that are shorter than a certain + length by using the `cutoff` keyword argument:: + + >>> paths = nx.all_simple_paths(G, source=0, target=3, cutoff=2) + >>> print(list(paths)) + [[0, 1, 3], [0, 2, 3], [0, 3]] + + To get each path as the corresponding list of edges, you can use the + :func:`networkx.utils.pairwise` helper function:: + + >>> paths = nx.all_simple_paths(G, source=0, target=3) + >>> for path in map(nx.utils.pairwise, paths): + ... print(list(path)) + [(0, 1), (1, 2), (2, 3)] + [(0, 1), (1, 3)] + [(0, 2), (2, 1), (1, 3)] + [(0, 2), (2, 3)] + [(0, 3)] + + Pass an iterable of nodes as target to generate all paths ending in any of several nodes:: + + >>> G = nx.complete_graph(4) + >>> for path in nx.all_simple_paths(G, source=0, target=[3, 2]): + ... print(path) + ... + [0, 1, 2] + [0, 1, 2, 3] + [0, 1, 3] + [0, 1, 3, 2] + [0, 2] + [0, 2, 1, 3] + [0, 2, 3] + [0, 3] + [0, 3, 1, 2] + [0, 3, 2] + + The singleton path from ``source`` to itself is considered a simple path and is + included in the results: + + >>> G = nx.empty_graph(5) + >>> list(nx.all_simple_paths(G, source=0, target=0)) + [[0]] + + >>> G = nx.path_graph(3) + >>> list(nx.all_simple_paths(G, source=0, target={0, 1, 2})) + [[0], [0, 1], [0, 1, 2]] + + Iterate over each path from the root nodes to the leaf nodes in a + directed acyclic graph using a functional programming approach:: + + >>> from itertools import chain + >>> from itertools import product + >>> from itertools import starmap + >>> from functools import partial + >>> + >>> chaini = chain.from_iterable + >>> + >>> G = nx.DiGraph([(0, 1), (1, 2), (0, 3), (3, 2)]) + >>> roots = (v for v, d in G.in_degree() if d == 0) + >>> leaves = (v for v, d in G.out_degree() if d == 0) + >>> all_paths = partial(nx.all_simple_paths, G) + >>> list(chaini(starmap(all_paths, product(roots, leaves)))) + [[0, 1, 2], [0, 3, 2]] + + The same list computed using an iterative approach:: + + >>> G = nx.DiGraph([(0, 1), (1, 2), (0, 3), (3, 2)]) + >>> roots = (v for v, d in G.in_degree() if d == 0) + >>> leaves = (v for v, d in G.out_degree() if d == 0) + >>> all_paths = [] + >>> for root in roots: + ... for leaf in leaves: + ... paths = nx.all_simple_paths(G, root, leaf) + ... all_paths.extend(paths) + >>> all_paths + [[0, 1, 2], [0, 3, 2]] + + Iterate over each path from the root nodes to the leaf nodes in a + directed acyclic graph passing all leaves together to avoid unnecessary + compute:: + + >>> G = nx.DiGraph([(0, 1), (2, 1), (1, 3), (1, 4)]) + >>> roots = (v for v, d in G.in_degree() if d == 0) + >>> leaves = [v for v, d in G.out_degree() if d == 0] + >>> all_paths = [] + >>> for root in roots: + ... paths = nx.all_simple_paths(G, root, leaves) + ... all_paths.extend(paths) + >>> all_paths + [[0, 1, 3], [0, 1, 4], [2, 1, 3], [2, 1, 4]] + + If parallel edges offer multiple ways to traverse a given sequence of + nodes, this sequence of nodes will be returned multiple times: + + >>> G = nx.MultiDiGraph([(0, 1), (0, 1), (1, 2)]) + >>> list(nx.all_simple_paths(G, 0, 2)) + [[0, 1, 2], [0, 1, 2]] + + Notes + ----- + This algorithm uses a modified depth-first search to generate the + paths [1]_. A single path can be found in $O(V+E)$ time but the + number of simple paths in a graph can be very large, e.g. $O(n!)$ in + the complete graph of order $n$. + + This function does not check that a path exists between `source` and + `target`. For large graphs, this may result in very long runtimes. + Consider using `has_path` to check that a path exists between `source` and + `target` before calling this function on large graphs. + + References + ---------- + .. [1] R. Sedgewick, "Algorithms in C, Part 5: Graph Algorithms", + Addison Wesley Professional, 3rd ed., 2001. + + See Also + -------- + all_shortest_paths, shortest_path, has_path + + """ + for edge_path in all_simple_edge_paths(G, source, target, cutoff): + yield [source] + [edge[1] for edge in edge_path] + + +@nx._dispatchable +def all_simple_edge_paths(G, source, target, cutoff=None): + """Generate lists of edges for all simple paths in G from source to target. + + A simple path is a path with no repeated nodes. + + Parameters + ---------- + G : NetworkX graph + + source : node + Starting node for path + + target : nodes + Single node or iterable of nodes at which to end path + + cutoff : integer, optional + Depth to stop the search. Only paths of length <= cutoff are returned. + + Returns + ------- + path_generator: generator + A generator that produces lists of simple paths. If there are no paths + between the source and target within the given cutoff the generator + produces no output. + For multigraphs, the list of edges have elements of the form `(u,v,k)`. + Where `k` corresponds to the edge key. + + Examples + -------- + + Print the simple path edges of a Graph:: + + >>> g = nx.Graph([(1, 2), (2, 4), (1, 3), (3, 4)]) + >>> for path in sorted(nx.all_simple_edge_paths(g, 1, 4)): + ... print(path) + [(1, 2), (2, 4)] + [(1, 3), (3, 4)] + + Print the simple path edges of a MultiGraph. Returned edges come with + their associated keys:: + + >>> mg = nx.MultiGraph() + >>> mg.add_edge(1, 2, key="k0") + 'k0' + >>> mg.add_edge(1, 2, key="k1") + 'k1' + >>> mg.add_edge(2, 3, key="k0") + 'k0' + >>> for path in sorted(nx.all_simple_edge_paths(mg, 1, 3)): + ... print(path) + [(1, 2, 'k0'), (2, 3, 'k0')] + [(1, 2, 'k1'), (2, 3, 'k0')] + + When ``source`` is one of the targets, the empty path starting and ending at + ``source`` without traversing any edge is considered a valid simple edge path + and is included in the results: + + >>> G = nx.Graph() + >>> G.add_node(0) + >>> paths = list(nx.all_simple_edge_paths(G, 0, 0)) + >>> for path in paths: + ... print(path) + [] + >>> len(paths) + 1 + + + Notes + ----- + This algorithm uses a modified depth-first search to generate the + paths [1]_. A single path can be found in $O(V+E)$ time but the + number of simple paths in a graph can be very large, e.g. $O(n!)$ in + the complete graph of order $n$. + + References + ---------- + .. [1] R. Sedgewick, "Algorithms in C, Part 5: Graph Algorithms", + Addison Wesley Professional, 3rd ed., 2001. + + See Also + -------- + all_shortest_paths, shortest_path, all_simple_paths + + """ + if source not in G: + raise nx.NodeNotFound(f"source node {source} not in graph") + + if target in G: + targets = {target} + else: + try: + targets = set(target) + except TypeError as err: + raise nx.NodeNotFound(f"target node {target} not in graph") from err + + cutoff = cutoff if cutoff is not None else len(G) - 1 + + if cutoff >= 0 and targets: + yield from _all_simple_edge_paths(G, source, targets, cutoff) + + +def _all_simple_edge_paths(G, source, targets, cutoff): + # We simulate recursion with a stack, keeping the current path being explored + # and the outgoing edge iterators at each point in the stack. + # To avoid unnecessary checks, the loop is structured in a way such that a path + # is considered for yielding only after a new node/edge is added. + # We bootstrap the search by adding a dummy iterator to the stack that only yields + # a dummy edge to source (so that the trivial path has a chance of being included). + + get_edges = ( + (lambda node: G.edges(node, keys=True)) + if G.is_multigraph() + else (lambda node: G.edges(node)) + ) + + # The current_path is a dictionary that maps nodes in the path to the edge that was + # used to enter that node (instead of a list of edges) because we want both a fast + # membership test for nodes in the path and the preservation of insertion order. + current_path = {None: None} + stack = [iter([(None, source)])] + + while stack: + # 1. Try to extend the current path. + next_edge = next((e for e in stack[-1] if e[1] not in current_path), None) + if next_edge is None: + # All edges of the last node in the current path have been explored. + stack.pop() + current_path.popitem() + continue + previous_node, next_node, *_ = next_edge + + # 2. Check if we've reached a target. + if next_node in targets: + yield (list(current_path.values()) + [next_edge])[2:] # remove dummy edge + + # 3. Only expand the search through the next node if it makes sense. + if len(current_path) - 1 < cutoff and ( + targets - current_path.keys() - {next_node} + ): + current_path[next_node] = next_edge + stack.append(iter(get_edges(next_node))) + + +@not_implemented_for("multigraph") +@nx._dispatchable(edge_attrs="weight") +def shortest_simple_paths(G, source, target, weight=None): + """Generate all simple paths in the graph G from source to target, + starting from shortest ones. + + A simple path is a path with no repeated nodes. + + If a weighted shortest path search is to be used, no negative weights + are allowed. + + Parameters + ---------- + G : NetworkX graph + + source : node + Starting node for path + + target : node + Ending node for path + + weight : string or function + If it is a string, it is the name of the edge attribute to be + used as a weight. + + If it is a function, the weight of an edge is the value returned + by the function. The function must accept exactly three positional + arguments: the two endpoints of an edge and the dictionary of edge + attributes for that edge. The function must return a number or None. + The weight function can be used to hide edges by returning None. + So ``weight = lambda u, v, d: 1 if d['color']=="red" else None`` + will find the shortest red path. + + If None all edges are considered to have unit weight. Default + value None. + + Returns + ------- + path_generator: generator + A generator that produces lists of simple paths, in order from + shortest to longest. + + Raises + ------ + NetworkXNoPath + If no path exists between source and target. + + NetworkXError + If source or target nodes are not in the input graph. + + NetworkXNotImplemented + If the input graph is a Multi[Di]Graph. + + Examples + -------- + + >>> G = nx.cycle_graph(7) + >>> paths = list(nx.shortest_simple_paths(G, 0, 3)) + >>> print(paths) + [[0, 1, 2, 3], [0, 6, 5, 4, 3]] + + You can use this function to efficiently compute the k shortest/best + paths between two nodes. + + >>> from itertools import islice + >>> def k_shortest_paths(G, source, target, k, weight=None): + ... return list( + ... islice(nx.shortest_simple_paths(G, source, target, weight=weight), k) + ... ) + >>> for path in k_shortest_paths(G, 0, 3, 2): + ... print(path) + [0, 1, 2, 3] + [0, 6, 5, 4, 3] + + Notes + ----- + This procedure is based on algorithm by Jin Y. Yen [1]_. Finding + the first $K$ paths requires $O(KN^3)$ operations. + + See Also + -------- + all_shortest_paths + shortest_path + all_simple_paths + + References + ---------- + .. [1] Jin Y. Yen, "Finding the K Shortest Loopless Paths in a + Network", Management Science, Vol. 17, No. 11, Theory Series + (Jul., 1971), pp. 712-716. + + """ + if source not in G: + raise nx.NodeNotFound(f"source node {source} not in graph") + + if target not in G: + raise nx.NodeNotFound(f"target node {target} not in graph") + + if weight is None: + length_func = len + shortest_path_func = _bidirectional_shortest_path + else: + wt = _weight_function(G, weight) + + def length_func(path): + return sum( + wt(u, v, G.get_edge_data(u, v)) for (u, v) in zip(path, path[1:]) + ) + + shortest_path_func = _bidirectional_dijkstra + + listA = [] + listB = PathBuffer() + prev_path = None + while True: + if not prev_path: + length, path = shortest_path_func(G, source, target, weight=weight) + listB.push(length, path) + else: + ignore_nodes = set() + ignore_edges = set() + for i in range(1, len(prev_path)): + root = prev_path[:i] + root_length = length_func(root) + for path in listA: + if path[:i] == root: + ignore_edges.add((path[i - 1], path[i])) + try: + length, spur = shortest_path_func( + G, + root[-1], + target, + ignore_nodes=ignore_nodes, + ignore_edges=ignore_edges, + weight=weight, + ) + path = root[:-1] + spur + listB.push(root_length + length, path) + except nx.NetworkXNoPath: + pass + ignore_nodes.add(root[-1]) + + if listB: + path = listB.pop() + yield path + listA.append(path) + prev_path = path + else: + break + + +class PathBuffer: + def __init__(self): + self.paths = set() + self.sortedpaths = [] + self.counter = count() + + def __len__(self): + return len(self.sortedpaths) + + def push(self, cost, path): + hashable_path = tuple(path) + if hashable_path not in self.paths: + heappush(self.sortedpaths, (cost, next(self.counter), path)) + self.paths.add(hashable_path) + + def pop(self): + (cost, num, path) = heappop(self.sortedpaths) + hashable_path = tuple(path) + self.paths.remove(hashable_path) + return path + + +def _bidirectional_shortest_path( + G, source, target, ignore_nodes=None, ignore_edges=None, weight=None +): + """Returns the shortest path between source and target ignoring + nodes and edges in the containers ignore_nodes and ignore_edges. + + This is a custom modification of the standard bidirectional shortest + path implementation at networkx.algorithms.unweighted + + Parameters + ---------- + G : NetworkX graph + + source : node + starting node for path + + target : node + ending node for path + + ignore_nodes : container of nodes + nodes to ignore, optional + + ignore_edges : container of edges + edges to ignore, optional + + weight : None + This function accepts a weight argument for convenience of + shortest_simple_paths function. It will be ignored. + + Returns + ------- + path: list + List of nodes in a path from source to target. + + Raises + ------ + NetworkXNoPath + If no path exists between source and target. + + See Also + -------- + shortest_path + + """ + # call helper to do the real work + results = _bidirectional_pred_succ(G, source, target, ignore_nodes, ignore_edges) + pred, succ, w = results + + # build path from pred+w+succ + path = [] + # from w to target + while w is not None: + path.append(w) + w = succ[w] + # from source to w + w = pred[path[0]] + while w is not None: + path.insert(0, w) + w = pred[w] + + return len(path), path + + +def _bidirectional_pred_succ(G, source, target, ignore_nodes=None, ignore_edges=None): + """Bidirectional shortest path helper. + Returns (pred,succ,w) where + pred is a dictionary of predecessors from w to the source, and + succ is a dictionary of successors from w to the target. + """ + # does BFS from both source and target and meets in the middle + if ignore_nodes and (source in ignore_nodes or target in ignore_nodes): + raise nx.NetworkXNoPath(f"No path between {source} and {target}.") + if target == source: + return ({target: None}, {source: None}, source) + + # handle either directed or undirected + if G.is_directed(): + Gpred = G.predecessors + Gsucc = G.successors + else: + Gpred = G.neighbors + Gsucc = G.neighbors + + # support optional nodes filter + if ignore_nodes: + + def filter_iter(nodes): + def iterate(v): + for w in nodes(v): + if w not in ignore_nodes: + yield w + + return iterate + + Gpred = filter_iter(Gpred) + Gsucc = filter_iter(Gsucc) + + # support optional edges filter + if ignore_edges: + if G.is_directed(): + + def filter_pred_iter(pred_iter): + def iterate(v): + for w in pred_iter(v): + if (w, v) not in ignore_edges: + yield w + + return iterate + + def filter_succ_iter(succ_iter): + def iterate(v): + for w in succ_iter(v): + if (v, w) not in ignore_edges: + yield w + + return iterate + + Gpred = filter_pred_iter(Gpred) + Gsucc = filter_succ_iter(Gsucc) + + else: + + def filter_iter(nodes): + def iterate(v): + for w in nodes(v): + if (v, w) not in ignore_edges and (w, v) not in ignore_edges: + yield w + + return iterate + + Gpred = filter_iter(Gpred) + Gsucc = filter_iter(Gsucc) + + # predecessor and successors in search + pred = {source: None} + succ = {target: None} + + # initialize fringes, start with forward + forward_fringe = [source] + reverse_fringe = [target] + + while forward_fringe and reverse_fringe: + if len(forward_fringe) <= len(reverse_fringe): + this_level = forward_fringe + forward_fringe = [] + for v in this_level: + for w in Gsucc(v): + if w not in pred: + forward_fringe.append(w) + pred[w] = v + if w in succ: + # found path + return pred, succ, w + else: + this_level = reverse_fringe + reverse_fringe = [] + for v in this_level: + for w in Gpred(v): + if w not in succ: + succ[w] = v + reverse_fringe.append(w) + if w in pred: + # found path + return pred, succ, w + + raise nx.NetworkXNoPath(f"No path between {source} and {target}.") + + +def _bidirectional_dijkstra( + G, source, target, weight="weight", ignore_nodes=None, ignore_edges=None +): + """Dijkstra's algorithm for shortest paths using bidirectional search. + + This function returns the shortest path between source and target + ignoring nodes and edges in the containers ignore_nodes and + ignore_edges. + + This is a custom modification of the standard Dijkstra bidirectional + shortest path implementation at networkx.algorithms.weighted + + Parameters + ---------- + G : NetworkX graph + + source : node + Starting node. + + target : node + Ending node. + + weight: string, function, optional (default='weight') + Edge data key or weight function corresponding to the edge weight + If this is a function, the weight of an edge is the value + returned by the function. The function must accept exactly three + positional arguments: the two endpoints of an edge and the + dictionary of edge attributes for that edge. The function must + return a number or None to indicate a hidden edge. + + ignore_nodes : container of nodes + nodes to ignore, optional + + ignore_edges : container of edges + edges to ignore, optional + + Returns + ------- + length : number + Shortest path length. + + Returns a tuple of two dictionaries keyed by node. + The first dictionary stores distance from the source. + The second stores the path from the source to that node. + + Raises + ------ + NetworkXNoPath + If no path exists between source and target. + + Notes + ----- + Edge weight attributes must be numerical. + Distances are calculated as sums of weighted edges traversed. + + The weight function can be used to hide edges by returning None. + So ``weight = lambda u, v, d: 1 if d['color']=="red" else None`` + will find the shortest red path. + + In practice bidirectional Dijkstra is much more than twice as fast as + ordinary Dijkstra. + + Ordinary Dijkstra expands nodes in a sphere-like manner from the + source. The radius of this sphere will eventually be the length + of the shortest path. Bidirectional Dijkstra will expand nodes + from both the source and the target, making two spheres of half + this radius. Volume of the first sphere is pi*r*r while the + others are 2*pi*r/2*r/2, making up half the volume. + + This algorithm is not guaranteed to work if edge weights + are negative or are floating point numbers + (overflows and roundoff errors can cause problems). + + See Also + -------- + shortest_path + shortest_path_length + """ + if ignore_nodes and (source in ignore_nodes or target in ignore_nodes): + raise nx.NetworkXNoPath(f"No path between {source} and {target}.") + if source == target: + if source not in G: + raise nx.NodeNotFound(f"Node {source} not in graph") + return (0, [source]) + + # handle either directed or undirected + if G.is_directed(): + Gpred = G.predecessors + Gsucc = G.successors + else: + Gpred = G.neighbors + Gsucc = G.neighbors + + # support optional nodes filter + if ignore_nodes: + + def filter_iter(nodes): + def iterate(v): + for w in nodes(v): + if w not in ignore_nodes: + yield w + + return iterate + + Gpred = filter_iter(Gpred) + Gsucc = filter_iter(Gsucc) + + # support optional edges filter + if ignore_edges: + if G.is_directed(): + + def filter_pred_iter(pred_iter): + def iterate(v): + for w in pred_iter(v): + if (w, v) not in ignore_edges: + yield w + + return iterate + + def filter_succ_iter(succ_iter): + def iterate(v): + for w in succ_iter(v): + if (v, w) not in ignore_edges: + yield w + + return iterate + + Gpred = filter_pred_iter(Gpred) + Gsucc = filter_succ_iter(Gsucc) + + else: + + def filter_iter(nodes): + def iterate(v): + for w in nodes(v): + if (v, w) not in ignore_edges and (w, v) not in ignore_edges: + yield w + + return iterate + + Gpred = filter_iter(Gpred) + Gsucc = filter_iter(Gsucc) + + wt = _weight_function(G, weight) + push = heappush + pop = heappop + # Init: Forward Backward + dists = [{}, {}] # dictionary of final distances + paths = [{source: [source]}, {target: [target]}] # dictionary of paths + fringe = [[], []] # heap of (distance, node) tuples for + # extracting next node to expand + seen = [{source: 0}, {target: 0}] # dictionary of distances to + # nodes seen + c = count() + # initialize fringe heap + push(fringe[0], (0, next(c), source)) + push(fringe[1], (0, next(c), target)) + # neighs for extracting correct neighbor information + neighs = [Gsucc, Gpred] + # variables to hold shortest discovered path + # finaldist = 1e30000 + finalpath = [] + dir = 1 + while fringe[0] and fringe[1]: + # choose direction + # dir == 0 is forward direction and dir == 1 is back + dir = 1 - dir + # extract closest to expand + (dist, _, v) = pop(fringe[dir]) + if v in dists[dir]: + # Shortest path to v has already been found + continue + # update distance + dists[dir][v] = dist # equal to seen[dir][v] + if v in dists[1 - dir]: + # if we have scanned v in both directions we are done + # we have now discovered the shortest path + return (finaldist, finalpath) + + for w in neighs[dir](v): + if dir == 0: # forward + minweight = wt(v, w, G.get_edge_data(v, w)) + else: # back, must remember to change v,w->w,v + minweight = wt(w, v, G.get_edge_data(w, v)) + if minweight is None: + continue + vwLength = dists[dir][v] + minweight + + if w in dists[dir]: + if vwLength < dists[dir][w]: + raise ValueError("Contradictory paths found: negative weights?") + elif w not in seen[dir] or vwLength < seen[dir][w]: + # relaxing + seen[dir][w] = vwLength + push(fringe[dir], (vwLength, next(c), w)) + paths[dir][w] = paths[dir][v] + [w] + if w in seen[0] and w in seen[1]: + # see if this path is better than the already + # discovered shortest path + totaldist = seen[0][w] + seen[1][w] + if finalpath == [] or finaldist > totaldist: + finaldist = totaldist + revpath = paths[1][w][:] + revpath.reverse() + finalpath = paths[0][w] + revpath[1:] + raise nx.NetworkXNoPath(f"No path between {source} and {target}.") diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/smallworld.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/smallworld.py new file mode 100644 index 0000000000000000000000000000000000000000..456a4ca11c0aa19d1d770bf90e5713ce80e270d8 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/smallworld.py @@ -0,0 +1,404 @@ +"""Functions for estimating the small-world-ness of graphs. + +A small world network is characterized by a small average shortest path length, +and a large clustering coefficient. + +Small-worldness is commonly measured with the coefficient sigma or omega. + +Both coefficients compare the average clustering coefficient and shortest path +length of a given graph against the same quantities for an equivalent random +or lattice graph. + +For more information, see the Wikipedia article on small-world network [1]_. + +.. [1] Small-world network:: https://en.wikipedia.org/wiki/Small-world_network + +""" + +import networkx as nx +from networkx.utils import not_implemented_for, py_random_state + +__all__ = ["random_reference", "lattice_reference", "sigma", "omega"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@py_random_state(3) +@nx._dispatchable(returns_graph=True) +def random_reference(G, niter=1, connectivity=True, seed=None): + """Compute a random graph by swapping edges of a given graph. + + Parameters + ---------- + G : graph + An undirected graph with 4 or more nodes. + + niter : integer (optional, default=1) + An edge is rewired approximately `niter` times. + + connectivity : boolean (optional, default=True) + When True, ensure connectivity for the randomized graph. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + G : graph + The randomized graph. + + Raises + ------ + NetworkXError + If there are fewer than 4 nodes or 2 edges in `G` + + Notes + ----- + The implementation is adapted from the algorithm by Maslov and Sneppen + (2002) [1]_. + + References + ---------- + .. [1] Maslov, Sergei, and Kim Sneppen. + "Specificity and stability in topology of protein networks." + Science 296.5569 (2002): 910-913. + """ + if len(G) < 4: + raise nx.NetworkXError("Graph has fewer than four nodes.") + if len(G.edges) < 2: + raise nx.NetworkXError("Graph has fewer that 2 edges") + + from networkx.utils import cumulative_distribution, discrete_sequence + + local_conn = nx.connectivity.local_edge_connectivity + + G = G.copy() + keys, degrees = zip(*G.degree()) # keys, degree + cdf = cumulative_distribution(degrees) # cdf of degree + nnodes = len(G) + nedges = nx.number_of_edges(G) + niter = niter * nedges + ntries = int(nnodes * nedges / (nnodes * (nnodes - 1) / 2)) + swapcount = 0 + + for i in range(niter): + n = 0 + while n < ntries: + # pick two random edges without creating edge list + # choose source node indices from discrete distribution + (ai, ci) = discrete_sequence(2, cdistribution=cdf, seed=seed) + if ai == ci: + continue # same source, skip + a = keys[ai] # convert index to label + c = keys[ci] + # choose target uniformly from neighbors + b = seed.choice(list(G.neighbors(a))) + d = seed.choice(list(G.neighbors(c))) + if b in [a, c, d] or d in [a, b, c]: + continue # all vertices should be different + + # don't create parallel edges + if (d not in G[a]) and (b not in G[c]): + G.add_edge(a, d) + G.add_edge(c, b) + G.remove_edge(a, b) + G.remove_edge(c, d) + + # Check if the graph is still connected + if connectivity and local_conn(G, a, b) == 0: + # Not connected, revert the swap + G.remove_edge(a, d) + G.remove_edge(c, b) + G.add_edge(a, b) + G.add_edge(c, d) + else: + swapcount += 1 + break + n += 1 + return G + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@py_random_state(4) +@nx._dispatchable(returns_graph=True) +def lattice_reference(G, niter=5, D=None, connectivity=True, seed=None): + """Latticize the given graph by swapping edges. + + Parameters + ---------- + G : graph + An undirected graph. + + niter : integer (optional, default=1) + An edge is rewired approximately niter times. + + D : numpy.array (optional, default=None) + Distance to the diagonal matrix. + + connectivity : boolean (optional, default=True) + Ensure connectivity for the latticized graph when set to True. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + G : graph + The latticized graph. + + Raises + ------ + NetworkXError + If there are fewer than 4 nodes or 2 edges in `G` + + Notes + ----- + The implementation is adapted from the algorithm by Sporns et al. [1]_. + which is inspired from the original work by Maslov and Sneppen(2002) [2]_. + + References + ---------- + .. [1] Sporns, Olaf, and Jonathan D. Zwi. + "The small world of the cerebral cortex." + Neuroinformatics 2.2 (2004): 145-162. + .. [2] Maslov, Sergei, and Kim Sneppen. + "Specificity and stability in topology of protein networks." + Science 296.5569 (2002): 910-913. + """ + import numpy as np + + from networkx.utils import cumulative_distribution, discrete_sequence + + local_conn = nx.connectivity.local_edge_connectivity + + if len(G) < 4: + raise nx.NetworkXError("Graph has fewer than four nodes.") + if len(G.edges) < 2: + raise nx.NetworkXError("Graph has fewer that 2 edges") + # Instead of choosing uniformly at random from a generated edge list, + # this algorithm chooses nonuniformly from the set of nodes with + # probability weighted by degree. + G = G.copy() + keys, degrees = zip(*G.degree()) # keys, degree + cdf = cumulative_distribution(degrees) # cdf of degree + + nnodes = len(G) + nedges = nx.number_of_edges(G) + if D is None: + D = np.zeros((nnodes, nnodes)) + un = np.arange(1, nnodes) + um = np.arange(nnodes - 1, 0, -1) + u = np.append((0,), np.where(un < um, un, um)) + + for v in range(int(np.ceil(nnodes / 2))): + D[nnodes - v - 1, :] = np.append(u[v + 1 :], u[: v + 1]) + D[v, :] = D[nnodes - v - 1, :][::-1] + + niter = niter * nedges + # maximal number of rewiring attempts per 'niter' + max_attempts = int(nnodes * nedges / (nnodes * (nnodes - 1) / 2)) + + for _ in range(niter): + n = 0 + while n < max_attempts: + # pick two random edges without creating edge list + # choose source node indices from discrete distribution + (ai, ci) = discrete_sequence(2, cdistribution=cdf, seed=seed) + if ai == ci: + continue # same source, skip + a = keys[ai] # convert index to label + c = keys[ci] + # choose target uniformly from neighbors + b = seed.choice(list(G.neighbors(a))) + d = seed.choice(list(G.neighbors(c))) + bi = keys.index(b) + di = keys.index(d) + + if b in [a, c, d] or d in [a, b, c]: + continue # all vertices should be different + + # don't create parallel edges + if (d not in G[a]) and (b not in G[c]): + if D[ai, bi] + D[ci, di] >= D[ai, ci] + D[bi, di]: + # only swap if we get closer to the diagonal + G.add_edge(a, d) + G.add_edge(c, b) + G.remove_edge(a, b) + G.remove_edge(c, d) + + # Check if the graph is still connected + if connectivity and local_conn(G, a, b) == 0: + # Not connected, revert the swap + G.remove_edge(a, d) + G.remove_edge(c, b) + G.add_edge(a, b) + G.add_edge(c, d) + else: + break + n += 1 + + return G + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@py_random_state(3) +@nx._dispatchable +def sigma(G, niter=100, nrand=10, seed=None): + """Returns the small-world coefficient (sigma) of the given graph. + + The small-world coefficient is defined as: + sigma = C/Cr / L/Lr + where C and L are respectively the average clustering coefficient and + average shortest path length of G. Cr and Lr are respectively the average + clustering coefficient and average shortest path length of an equivalent + random graph. + + A graph is commonly classified as small-world if sigma>1. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + niter : integer (optional, default=100) + Approximate number of rewiring per edge to compute the equivalent + random graph. + nrand : integer (optional, default=10) + Number of random graphs generated to compute the average clustering + coefficient (Cr) and average shortest path length (Lr). + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + sigma : float + The small-world coefficient of G. + + Notes + ----- + The implementation is adapted from Humphries et al. [1]_ [2]_. + + References + ---------- + .. [1] The brainstem reticular formation is a small-world, not scale-free, + network M. D. Humphries, K. Gurney and T. J. Prescott, + Proc. Roy. Soc. B 2006 273, 503-511, doi:10.1098/rspb.2005.3354. + .. [2] Humphries and Gurney (2008). + "Network 'Small-World-Ness': A Quantitative Method for Determining + Canonical Network Equivalence". + PLoS One. 3 (4). PMID 18446219. doi:10.1371/journal.pone.0002051. + """ + import numpy as np + + # Compute the mean clustering coefficient and average shortest path length + # for an equivalent random graph + randMetrics = {"C": [], "L": []} + for i in range(nrand): + Gr = random_reference(G, niter=niter, seed=seed) + randMetrics["C"].append(nx.transitivity(Gr)) + randMetrics["L"].append(nx.average_shortest_path_length(Gr)) + + C = nx.transitivity(G) + L = nx.average_shortest_path_length(G) + Cr = np.mean(randMetrics["C"]) + Lr = np.mean(randMetrics["L"]) + + sigma = (C / Cr) / (L / Lr) + + return float(sigma) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@py_random_state(3) +@nx._dispatchable +def omega(G, niter=5, nrand=10, seed=None): + """Returns the small-world coefficient (omega) of a graph + + The small-world coefficient of a graph G is: + + omega = Lr/L - C/Cl + + where C and L are respectively the average clustering coefficient and + average shortest path length of G. Lr is the average shortest path length + of an equivalent random graph and Cl is the average clustering coefficient + of an equivalent lattice graph. + + The small-world coefficient (omega) measures how much G is like a lattice + or a random graph. Negative values mean G is similar to a lattice whereas + positive values mean G is a random graph. + Values close to 0 mean that G has small-world characteristics. + + Parameters + ---------- + G : NetworkX graph + An undirected graph. + + niter: integer (optional, default=5) + Approximate number of rewiring per edge to compute the equivalent + random graph. + + nrand: integer (optional, default=10) + Number of random graphs generated to compute the maximal clustering + coefficient (Cr) and average shortest path length (Lr). + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + + Returns + ------- + omega : float + The small-world coefficient (omega) + + Notes + ----- + The implementation is adapted from the algorithm by Telesford et al. [1]_. + + References + ---------- + .. [1] Telesford, Joyce, Hayasaka, Burdette, and Laurienti (2011). + "The Ubiquity of Small-World Networks". + Brain Connectivity. 1 (0038): 367-75. PMC 3604768. PMID 22432451. + doi:10.1089/brain.2011.0038. + """ + import numpy as np + + # Compute the mean clustering coefficient and average shortest path length + # for an equivalent random graph + randMetrics = {"C": [], "L": []} + + # Calculate initial average clustering coefficient which potentially will + # get replaced by higher clustering coefficients from generated lattice + # reference graphs + Cl = nx.average_clustering(G) + + niter_lattice_reference = niter + niter_random_reference = niter * 2 + + for _ in range(nrand): + # Generate random graph + Gr = random_reference(G, niter=niter_random_reference, seed=seed) + randMetrics["L"].append(nx.average_shortest_path_length(Gr)) + + # Generate lattice graph + Gl = lattice_reference(G, niter=niter_lattice_reference, seed=seed) + + # Replace old clustering coefficient, if clustering is higher in + # generated lattice reference + Cl_temp = nx.average_clustering(Gl) + if Cl_temp > Cl: + Cl = Cl_temp + + C = nx.average_clustering(G) + L = nx.average_shortest_path_length(G) + Lr = np.mean(randMetrics["L"]) + + omega = (Lr / L) - (C / Cl) + + return float(omega) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/smetric.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/smetric.py new file mode 100644 index 0000000000000000000000000000000000000000..d985aa805b4fb21300680afe389aae4732793a73 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/smetric.py @@ -0,0 +1,30 @@ +import networkx as nx + +__all__ = ["s_metric"] + + +@nx._dispatchable +def s_metric(G): + """Returns the s-metric [1]_ of graph. + + The s-metric is defined as the sum of the products ``deg(u) * deg(v)`` + for every edge ``(u, v)`` in `G`. + + Parameters + ---------- + G : graph + The graph used to compute the s-metric. + + Returns + ------- + s : float + The s-metric of the graph. + + References + ---------- + .. [1] Lun Li, David Alderson, John C. Doyle, and Walter Willinger, + Towards a Theory of Scale-Free Graphs: + Definition, Properties, and Implications (Extended Version), 2005. + https://arxiv.org/abs/cond-mat/0501169 + """ + return float(sum(G.degree(u) * G.degree(v) for (u, v) in G.edges())) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/sparsifiers.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/sparsifiers.py new file mode 100644 index 0000000000000000000000000000000000000000..59322372e6c1e06d595d8dff0f8680d1daa8a99e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/sparsifiers.py @@ -0,0 +1,296 @@ +"""Functions for computing sparsifiers of graphs.""" + +import math + +import networkx as nx +from networkx.utils import not_implemented_for, py_random_state + +__all__ = ["spanner"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@py_random_state(3) +@nx._dispatchable(edge_attrs="weight", returns_graph=True) +def spanner(G, stretch, weight=None, seed=None): + """Returns a spanner of the given graph with the given stretch. + + A spanner of a graph G = (V, E) with stretch t is a subgraph + H = (V, E_S) such that E_S is a subset of E and the distance between + any pair of nodes in H is at most t times the distance between the + nodes in G. + + Parameters + ---------- + G : NetworkX graph + An undirected simple graph. + + stretch : float + The stretch of the spanner. + + weight : object + The edge attribute to use as distance. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + NetworkX graph + A spanner of the given graph with the given stretch. + + Raises + ------ + ValueError + If a stretch less than 1 is given. + + Notes + ----- + This function implements the spanner algorithm by Baswana and Sen, + see [1]. + + This algorithm is a randomized las vegas algorithm: The expected + running time is O(km) where k = (stretch + 1) // 2 and m is the + number of edges in G. The returned graph is always a spanner of the + given graph with the specified stretch. For weighted graphs the + number of edges in the spanner is O(k * n^(1 + 1 / k)) where k is + defined as above and n is the number of nodes in G. For unweighted + graphs the number of edges is O(n^(1 + 1 / k) + kn). + + References + ---------- + [1] S. Baswana, S. Sen. A Simple and Linear Time Randomized + Algorithm for Computing Sparse Spanners in Weighted Graphs. + Random Struct. Algorithms 30(4): 532-563 (2007). + """ + if stretch < 1: + raise ValueError("stretch must be at least 1") + + k = (stretch + 1) // 2 + + # initialize spanner H with empty edge set + H = nx.empty_graph() + H.add_nodes_from(G.nodes) + + # phase 1: forming the clusters + # the residual graph has V' from the paper as its node set + # and E' from the paper as its edge set + residual_graph = _setup_residual_graph(G, weight) + # clustering is a dictionary that maps nodes in a cluster to the + # cluster center + clustering = {v: v for v in G.nodes} + sample_prob = math.pow(G.number_of_nodes(), -1 / k) + size_limit = 2 * math.pow(G.number_of_nodes(), 1 + 1 / k) + + i = 0 + while i < k - 1: + # step 1: sample centers + sampled_centers = set() + for center in set(clustering.values()): + if seed.random() < sample_prob: + sampled_centers.add(center) + + # combined loop for steps 2 and 3 + edges_to_add = set() + edges_to_remove = set() + new_clustering = {} + for v in residual_graph.nodes: + if clustering[v] in sampled_centers: + continue + + # step 2: find neighboring (sampled) clusters and + # lightest edges to them + lightest_edge_neighbor, lightest_edge_weight = _lightest_edge_dicts( + residual_graph, clustering, v + ) + neighboring_sampled_centers = ( + set(lightest_edge_weight.keys()) & sampled_centers + ) + + # step 3: add edges to spanner + if not neighboring_sampled_centers: + # connect to each neighboring center via lightest edge + for neighbor in lightest_edge_neighbor.values(): + edges_to_add.add((v, neighbor)) + # remove all incident edges + for neighbor in residual_graph.adj[v]: + edges_to_remove.add((v, neighbor)) + + else: # there is a neighboring sampled center + closest_center = min( + neighboring_sampled_centers, key=lightest_edge_weight.get + ) + closest_center_weight = lightest_edge_weight[closest_center] + closest_center_neighbor = lightest_edge_neighbor[closest_center] + + edges_to_add.add((v, closest_center_neighbor)) + new_clustering[v] = closest_center + + # connect to centers with edge weight less than + # closest_center_weight + for center, edge_weight in lightest_edge_weight.items(): + if edge_weight < closest_center_weight: + neighbor = lightest_edge_neighbor[center] + edges_to_add.add((v, neighbor)) + + # remove edges to centers with edge weight less than + # closest_center_weight + for neighbor in residual_graph.adj[v]: + nbr_cluster = clustering[neighbor] + nbr_weight = lightest_edge_weight[nbr_cluster] + if ( + nbr_cluster == closest_center + or nbr_weight < closest_center_weight + ): + edges_to_remove.add((v, neighbor)) + + # check whether iteration added too many edges to spanner, + # if so repeat + if len(edges_to_add) > size_limit: + # an iteration is repeated O(1) times on expectation + continue + + # iteration succeeded + i = i + 1 + + # actually add edges to spanner + for u, v in edges_to_add: + _add_edge_to_spanner(H, residual_graph, u, v, weight) + + # actually delete edges from residual graph + residual_graph.remove_edges_from(edges_to_remove) + + # copy old clustering data to new_clustering + for node, center in clustering.items(): + if center in sampled_centers: + new_clustering[node] = center + clustering = new_clustering + + # step 4: remove intra-cluster edges + for u in residual_graph.nodes: + for v in list(residual_graph.adj[u]): + if clustering[u] == clustering[v]: + residual_graph.remove_edge(u, v) + + # update residual graph node set + for v in list(residual_graph.nodes): + if v not in clustering: + residual_graph.remove_node(v) + + # phase 2: vertex-cluster joining + for v in residual_graph.nodes: + lightest_edge_neighbor, _ = _lightest_edge_dicts(residual_graph, clustering, v) + for neighbor in lightest_edge_neighbor.values(): + _add_edge_to_spanner(H, residual_graph, v, neighbor, weight) + + return H + + +def _setup_residual_graph(G, weight): + """Setup residual graph as a copy of G with unique edges weights. + + The node set of the residual graph corresponds to the set V' from + the Baswana-Sen paper and the edge set corresponds to the set E' + from the paper. + + This function associates distinct weights to the edges of the + residual graph (even for unweighted input graphs), as required by + the algorithm. + + Parameters + ---------- + G : NetworkX graph + An undirected simple graph. + + weight : object + The edge attribute to use as distance. + + Returns + ------- + NetworkX graph + The residual graph used for the Baswana-Sen algorithm. + """ + residual_graph = G.copy() + + # establish unique edge weights, even for unweighted graphs + for u, v in G.edges(): + if not weight: + residual_graph[u][v]["weight"] = (id(u), id(v)) + else: + residual_graph[u][v]["weight"] = (G[u][v][weight], id(u), id(v)) + + return residual_graph + + +def _lightest_edge_dicts(residual_graph, clustering, node): + """Find the lightest edge to each cluster. + + Searches for the minimum-weight edge to each cluster adjacent to + the given node. + + Parameters + ---------- + residual_graph : NetworkX graph + The residual graph used by the Baswana-Sen algorithm. + + clustering : dictionary + The current clustering of the nodes. + + node : node + The node from which the search originates. + + Returns + ------- + lightest_edge_neighbor, lightest_edge_weight : dictionary, dictionary + lightest_edge_neighbor is a dictionary that maps a center C to + a node v in the corresponding cluster such that the edge from + the given node to v is the lightest edge from the given node to + any node in cluster. lightest_edge_weight maps a center C to the + weight of the aforementioned edge. + + Notes + ----- + If a cluster has no node that is adjacent to the given node in the + residual graph then the center of the cluster is not a key in the + returned dictionaries. + """ + lightest_edge_neighbor = {} + lightest_edge_weight = {} + for neighbor in residual_graph.adj[node]: + nbr_center = clustering[neighbor] + weight = residual_graph[node][neighbor]["weight"] + if ( + nbr_center not in lightest_edge_weight + or weight < lightest_edge_weight[nbr_center] + ): + lightest_edge_neighbor[nbr_center] = neighbor + lightest_edge_weight[nbr_center] = weight + return lightest_edge_neighbor, lightest_edge_weight + + +def _add_edge_to_spanner(H, residual_graph, u, v, weight): + """Add the edge {u, v} to the spanner H and take weight from + the residual graph. + + Parameters + ---------- + H : NetworkX graph + The spanner under construction. + + residual_graph : NetworkX graph + The residual graph used by the Baswana-Sen algorithm. The weight + for the edge is taken from this graph. + + u : node + One endpoint of the edge. + + v : node + The other endpoint of the edge. + + weight : object + The edge attribute to use as distance. + """ + H.add_edge(u, v) + if weight: + H[u][v][weight] = residual_graph[u][v]["weight"][0] diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/structuralholes.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/structuralholes.py new file mode 100644 index 0000000000000000000000000000000000000000..bae42d060af9e2c8bc5d9732b5bc7905d3d895b9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/structuralholes.py @@ -0,0 +1,283 @@ +"""Functions for computing measures of structural holes.""" + +import networkx as nx + +__all__ = ["constraint", "local_constraint", "effective_size"] + + +@nx._dispatchable(edge_attrs="weight") +def mutual_weight(G, u, v, weight=None): + """Returns the sum of the weights of the edge from `u` to `v` and + the edge from `v` to `u` in `G`. + + `weight` is the edge data key that represents the edge weight. If + the specified key is `None` or is not in the edge data for an edge, + that edge is assumed to have weight 1. + + Pre-conditions: `u` and `v` must both be in `G`. + + """ + try: + a_uv = G[u][v].get(weight, 1) + except KeyError: + a_uv = 0 + try: + a_vu = G[v][u].get(weight, 1) + except KeyError: + a_vu = 0 + return a_uv + a_vu + + +@nx._dispatchable(edge_attrs="weight") +def normalized_mutual_weight(G, u, v, norm=sum, weight=None): + """Returns normalized mutual weight of the edges from `u` to `v` + with respect to the mutual weights of the neighbors of `u` in `G`. + + `norm` specifies how the normalization factor is computed. It must + be a function that takes a single argument and returns a number. + The argument will be an iterable of mutual weights + of pairs ``(u, w)``, where ``w`` ranges over each (in- and + out-)neighbor of ``u``. Commons values for `normalization` are + ``sum`` and ``max``. + + `weight` can be ``None`` or a string, if None, all edge weights + are considered equal. Otherwise holds the name of the edge + attribute used as weight. + + """ + scale = norm(mutual_weight(G, u, w, weight) for w in set(nx.all_neighbors(G, u))) + return 0 if scale == 0 else mutual_weight(G, u, v, weight) / scale + + +@nx._dispatchable(edge_attrs="weight") +def effective_size(G, nodes=None, weight=None): + r"""Returns the effective size of all nodes in the graph ``G``. + + The *effective size* of a node's ego network is based on the concept + of redundancy. A person's ego network has redundancy to the extent + that her contacts are connected to each other as well. The + nonredundant part of a person's relationships is the effective + size of her ego network [1]_. Formally, the effective size of a + node $u$, denoted $e(u)$, is defined by + + .. math:: + + e(u) = \sum_{v \in N(u) \setminus \{u\}} + \left(1 - \sum_{w \in N(v)} p_{uw} m_{vw}\right) + + where $N(u)$ is the set of neighbors of $u$ and $p_{uw}$ is the + normalized mutual weight of the (directed or undirected) edges + joining $u$ and $v$, for each vertex $u$ and $v$ [1]_. And $m_{vw}$ + is the mutual weight of $v$ and $w$ divided by $v$ highest mutual + weight with any of its neighbors. The *mutual weight* of $u$ and $v$ + is the sum of the weights of edges joining them (edge weights are + assumed to be one if the graph is unweighted). + + For the case of unweighted and undirected graphs, Borgatti proposed + a simplified formula to compute effective size [2]_ + + .. math:: + + e(u) = n - \frac{2t}{n} + + where `t` is the number of ties in the ego network (not including + ties to ego) and `n` is the number of nodes (excluding ego). + + Parameters + ---------- + G : NetworkX graph + The graph containing ``v``. Directed graphs are treated like + undirected graphs when computing neighbors of ``v``. + + nodes : container, optional + Container of nodes in the graph ``G`` to compute the effective size. + If None, the effective size of every node is computed. + + weight : None or string, optional + If None, all edge weights are considered equal. + Otherwise holds the name of the edge attribute used as weight. + + Returns + ------- + dict + Dictionary with nodes as keys and the effective size of the node as values. + + Notes + ----- + Burt also defined the related concept of *efficiency* of a node's ego + network, which is its effective size divided by the degree of that + node [1]_. So you can easily compute efficiency: + + >>> G = nx.DiGraph() + >>> G.add_edges_from([(0, 1), (0, 2), (1, 0), (2, 1)]) + >>> esize = nx.effective_size(G) + >>> efficiency = {n: v / G.degree(n) for n, v in esize.items()} + + See also + -------- + constraint + + References + ---------- + .. [1] Burt, Ronald S. + *Structural Holes: The Social Structure of Competition.* + Cambridge: Harvard University Press, 1995. + + .. [2] Borgatti, S. + "Structural Holes: Unpacking Burt's Redundancy Measures" + CONNECTIONS 20(1):35-38. + http://www.analytictech.com/connections/v20(1)/holes.htm + + """ + + def redundancy(G, u, v, weight=None): + nmw = normalized_mutual_weight + r = sum( + nmw(G, u, w, weight=weight) * nmw(G, v, w, norm=max, weight=weight) + for w in set(nx.all_neighbors(G, u)) + ) + return 1 - r + + effective_size = {} + if nodes is None: + nodes = G + # Use Borgatti's simplified formula for unweighted and undirected graphs + if not G.is_directed() and weight is None: + for v in nodes: + # Effective size is not defined for isolated nodes + if len(G[v]) == 0: + effective_size[v] = float("nan") + continue + E = nx.ego_graph(G, v, center=False, undirected=True) + effective_size[v] = len(E) - (2 * E.size()) / len(E) + else: + for v in nodes: + # Effective size is not defined for isolated nodes + if len(G[v]) == 0: + effective_size[v] = float("nan") + continue + effective_size[v] = sum( + redundancy(G, v, u, weight) for u in set(nx.all_neighbors(G, v)) + ) + return effective_size + + +@nx._dispatchable(edge_attrs="weight") +def constraint(G, nodes=None, weight=None): + r"""Returns the constraint on all nodes in the graph ``G``. + + The *constraint* is a measure of the extent to which a node *v* is + invested in those nodes that are themselves invested in the + neighbors of *v*. Formally, the *constraint on v*, denoted `c(v)`, + is defined by + + .. math:: + + c(v) = \sum_{w \in N(v) \setminus \{v\}} \ell(v, w) + + where $N(v)$ is the subset of the neighbors of `v` that are either + predecessors or successors of `v` and $\ell(v, w)$ is the local + constraint on `v` with respect to `w` [1]_. For the definition of local + constraint, see :func:`local_constraint`. + + Parameters + ---------- + G : NetworkX graph + The graph containing ``v``. This can be either directed or undirected. + + nodes : container, optional + Container of nodes in the graph ``G`` to compute the constraint. If + None, the constraint of every node is computed. + + weight : None or string, optional + If None, all edge weights are considered equal. + Otherwise holds the name of the edge attribute used as weight. + + Returns + ------- + dict + Dictionary with nodes as keys and the constraint on the node as values. + + See also + -------- + local_constraint + + References + ---------- + .. [1] Burt, Ronald S. + "Structural holes and good ideas". + American Journal of Sociology (110): 349–399. + + """ + if nodes is None: + nodes = G + constraint = {} + for v in nodes: + # Constraint is not defined for isolated nodes + if len(G[v]) == 0: + constraint[v] = float("nan") + continue + constraint[v] = sum( + local_constraint(G, v, n, weight) for n in set(nx.all_neighbors(G, v)) + ) + return constraint + + +@nx._dispatchable(edge_attrs="weight") +def local_constraint(G, u, v, weight=None): + r"""Returns the local constraint on the node ``u`` with respect to + the node ``v`` in the graph ``G``. + + Formally, the *local constraint on u with respect to v*, denoted + $\ell(u, v)$, is defined by + + .. math:: + + \ell(u, v) = \left(p_{uv} + \sum_{w \in N(v)} p_{uw} p_{wv}\right)^2, + + where $N(v)$ is the set of neighbors of $v$ and $p_{uv}$ is the + normalized mutual weight of the (directed or undirected) edges + joining $u$ and $v$, for each vertex $u$ and $v$ [1]_. The *mutual + weight* of $u$ and $v$ is the sum of the weights of edges joining + them (edge weights are assumed to be one if the graph is + unweighted). + + Parameters + ---------- + G : NetworkX graph + The graph containing ``u`` and ``v``. This can be either + directed or undirected. + + u : node + A node in the graph ``G``. + + v : node + A node in the graph ``G``. + + weight : None or string, optional + If None, all edge weights are considered equal. + Otherwise holds the name of the edge attribute used as weight. + + Returns + ------- + float + The constraint of the node ``v`` in the graph ``G``. + + See also + -------- + constraint + + References + ---------- + .. [1] Burt, Ronald S. + "Structural holes and good ideas". + American Journal of Sociology (110): 349–399. + + """ + nmw = normalized_mutual_weight + direct = nmw(G, u, v, weight=weight) + indirect = sum( + nmw(G, u, w, weight=weight) * nmw(G, w, v, weight=weight) + for w in set(nx.all_neighbors(G, u)) + ) + return (direct + indirect) ** 2 diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/summarization.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/summarization.py new file mode 100644 index 0000000000000000000000000000000000000000..23db8da4efffa7dcbabfb75e031187d1b2b190dc --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/summarization.py @@ -0,0 +1,564 @@ +""" +Graph summarization finds smaller representations of graphs resulting in faster +runtime of algorithms, reduced storage needs, and noise reduction. +Summarization has applications in areas such as visualization, pattern mining, +clustering and community detection, and more. Core graph summarization +techniques are grouping/aggregation, bit-compression, +simplification/sparsification, and influence based. Graph summarization +algorithms often produce either summary graphs in the form of supergraphs or +sparsified graphs, or a list of independent structures. Supergraphs are the +most common product, which consist of supernodes and original nodes and are +connected by edges and superedges, which represent aggregate edges between +nodes and supernodes. + +Grouping/aggregation based techniques compress graphs by representing +close/connected nodes and edges in a graph by a single node/edge in a +supergraph. Nodes can be grouped together into supernodes based on their +structural similarities or proximity within a graph to reduce the total number +of nodes in a graph. Edge-grouping techniques group edges into lossy/lossless +nodes called compressor or virtual nodes to reduce the total number of edges in +a graph. Edge-grouping techniques can be lossless, meaning that they can be +used to re-create the original graph, or techniques can be lossy, requiring +less space to store the summary graph, but at the expense of lower +reconstruction accuracy of the original graph. + +Bit-compression techniques minimize the amount of information needed to +describe the original graph, while revealing structural patterns in the +original graph. The two-part minimum description length (MDL) is often used to +represent the model and the original graph in terms of the model. A key +difference between graph compression and graph summarization is that graph +summarization focuses on finding structural patterns within the original graph, +whereas graph compression focuses on compressions the original graph to be as +small as possible. **NOTE**: Some bit-compression methods exist solely to +compress a graph without creating a summary graph or finding comprehensible +structural patterns. + +Simplification/Sparsification techniques attempt to create a sparse +representation of a graph by removing unimportant nodes and edges from the +graph. Sparsified graphs differ from supergraphs created by +grouping/aggregation by only containing a subset of the original nodes and +edges of the original graph. + +Influence based techniques aim to find a high-level description of influence +propagation in a large graph. These methods are scarce and have been mostly +applied to social graphs. + +*dedensification* is a grouping/aggregation based technique to compress the +neighborhoods around high-degree nodes in unweighted graphs by adding +compressor nodes that summarize multiple edges of the same type to +high-degree nodes (nodes with a degree greater than a given threshold). +Dedensification was developed for the purpose of increasing performance of +query processing around high-degree nodes in graph databases and enables direct +operations on the compressed graph. The structural patterns surrounding +high-degree nodes in the original is preserved while using fewer edges and +adding a small number of compressor nodes. The degree of nodes present in the +original graph is also preserved. The current implementation of dedensification +supports graphs with one edge type. + +For more information on graph summarization, see `Graph Summarization Methods +and Applications: A Survey `_ +""" + +from collections import Counter, defaultdict + +import networkx as nx + +__all__ = ["dedensify", "snap_aggregation"] + + +@nx._dispatchable(mutates_input={"not copy": 3}, returns_graph=True) +def dedensify(G, threshold, prefix=None, copy=True): + """Compresses neighborhoods around high-degree nodes + + Reduces the number of edges to high-degree nodes by adding compressor nodes + that summarize multiple edges of the same type to high-degree nodes (nodes + with a degree greater than a given threshold). Dedensification also has + the added benefit of reducing the number of edges around high-degree nodes. + The implementation currently supports graphs with a single edge type. + + Parameters + ---------- + G: graph + A networkx graph + threshold: int + Minimum degree threshold of a node to be considered a high degree node. + The threshold must be greater than or equal to 2. + prefix: str or None, optional (default: None) + An optional prefix for denoting compressor nodes + copy: bool, optional (default: True) + Indicates if dedensification should be done inplace + + Returns + ------- + dedensified networkx graph : (graph, set) + 2-tuple of the dedensified graph and set of compressor nodes + + Notes + ----- + According to the algorithm in [1]_, removes edges in a graph by + compressing/decompressing the neighborhoods around high degree nodes by + adding compressor nodes that summarize multiple edges of the same type + to high-degree nodes. Dedensification will only add a compressor node when + doing so will reduce the total number of edges in the given graph. This + implementation currently supports graphs with a single edge type. + + Examples + -------- + Dedensification will only add compressor nodes when doing so would result + in fewer edges:: + + >>> original_graph = nx.DiGraph() + >>> original_graph.add_nodes_from( + ... ["1", "2", "3", "4", "5", "6", "A", "B", "C"] + ... ) + >>> original_graph.add_edges_from( + ... [ + ... ("1", "C"), ("1", "B"), + ... ("2", "C"), ("2", "B"), ("2", "A"), + ... ("3", "B"), ("3", "A"), ("3", "6"), + ... ("4", "C"), ("4", "B"), ("4", "A"), + ... ("5", "B"), ("5", "A"), + ... ("6", "5"), + ... ("A", "6") + ... ] + ... ) + >>> c_graph, c_nodes = nx.dedensify(original_graph, threshold=2) + >>> original_graph.number_of_edges() + 15 + >>> c_graph.number_of_edges() + 14 + + A dedensified, directed graph can be "densified" to reconstruct the + original graph:: + + >>> original_graph = nx.DiGraph() + >>> original_graph.add_nodes_from( + ... ["1", "2", "3", "4", "5", "6", "A", "B", "C"] + ... ) + >>> original_graph.add_edges_from( + ... [ + ... ("1", "C"), ("1", "B"), + ... ("2", "C"), ("2", "B"), ("2", "A"), + ... ("3", "B"), ("3", "A"), ("3", "6"), + ... ("4", "C"), ("4", "B"), ("4", "A"), + ... ("5", "B"), ("5", "A"), + ... ("6", "5"), + ... ("A", "6") + ... ] + ... ) + >>> c_graph, c_nodes = nx.dedensify(original_graph, threshold=2) + >>> # re-densifies the compressed graph into the original graph + >>> for c_node in c_nodes: + ... all_neighbors = set(nx.all_neighbors(c_graph, c_node)) + ... out_neighbors = set(c_graph.neighbors(c_node)) + ... for out_neighbor in out_neighbors: + ... c_graph.remove_edge(c_node, out_neighbor) + ... in_neighbors = all_neighbors - out_neighbors + ... for in_neighbor in in_neighbors: + ... c_graph.remove_edge(in_neighbor, c_node) + ... for out_neighbor in out_neighbors: + ... c_graph.add_edge(in_neighbor, out_neighbor) + ... c_graph.remove_node(c_node) + ... + >>> nx.is_isomorphic(original_graph, c_graph) + True + + References + ---------- + .. [1] Maccioni, A., & Abadi, D. J. (2016, August). + Scalable pattern matching over compressed graphs via dedensification. + In Proceedings of the 22nd ACM SIGKDD International Conference on + Knowledge Discovery and Data Mining (pp. 1755-1764). + http://www.cs.umd.edu/~abadi/papers/graph-dedense.pdf + """ + if threshold < 2: + raise nx.NetworkXError("The degree threshold must be >= 2") + + degrees = G.in_degree if G.is_directed() else G.degree + # Group nodes based on degree threshold + high_degree_nodes = {n for n, d in degrees if d > threshold} + low_degree_nodes = G.nodes() - high_degree_nodes + + auxiliary = {} + for node in G: + high_degree_nbrs = frozenset(high_degree_nodes & set(G[node])) + if high_degree_nbrs: + if high_degree_nbrs in auxiliary: + auxiliary[high_degree_nbrs].add(node) + else: + auxiliary[high_degree_nbrs] = {node} + + if copy: + G = G.copy() + + compressor_nodes = set() + for index, (high_degree_nodes, low_degree_nodes) in enumerate(auxiliary.items()): + low_degree_node_count = len(low_degree_nodes) + high_degree_node_count = len(high_degree_nodes) + old_edges = high_degree_node_count * low_degree_node_count + new_edges = high_degree_node_count + low_degree_node_count + if old_edges <= new_edges: + continue + compression_node = "".join(str(node) for node in high_degree_nodes) + if prefix: + compression_node = str(prefix) + compression_node + for node in low_degree_nodes: + for high_node in high_degree_nodes: + if G.has_edge(node, high_node): + G.remove_edge(node, high_node) + + G.add_edge(node, compression_node) + for node in high_degree_nodes: + G.add_edge(compression_node, node) + compressor_nodes.add(compression_node) + return G, compressor_nodes + + +def _snap_build_graph( + G, + groups, + node_attributes, + edge_attributes, + neighbor_info, + edge_types, + prefix, + supernode_attribute, + superedge_attribute, +): + """ + Build the summary graph from the data structures produced in the SNAP aggregation algorithm + + Used in the SNAP aggregation algorithm to build the output summary graph and supernode + lookup dictionary. This process uses the original graph and the data structures to + create the supernodes with the correct node attributes, and the superedges with the correct + edge attributes + + Parameters + ---------- + G: networkx.Graph + the original graph to be summarized + groups: dict + A dictionary of unique group IDs and their corresponding node groups + node_attributes: iterable + An iterable of the node attributes considered in the summarization process + edge_attributes: iterable + An iterable of the edge attributes considered in the summarization process + neighbor_info: dict + A data structure indicating the number of edges a node has with the + groups in the current summarization of each edge type + edge_types: dict + dictionary of edges in the graph and their corresponding attributes recognized + in the summarization + prefix: string + The prefix to be added to all supernodes + supernode_attribute: str + The node attribute for recording the supernode groupings of nodes + superedge_attribute: str + The edge attribute for recording the edge types represented by superedges + + Returns + ------- + summary graph: Networkx graph + """ + output = G.__class__() + node_label_lookup = {} + for index, group_id in enumerate(groups): + group_set = groups[group_id] + supernode = f"{prefix}{index}" + node_label_lookup[group_id] = supernode + supernode_attributes = { + attr: G.nodes[next(iter(group_set))][attr] for attr in node_attributes + } + supernode_attributes[supernode_attribute] = group_set + output.add_node(supernode, **supernode_attributes) + + for group_id in groups: + group_set = groups[group_id] + source_supernode = node_label_lookup[group_id] + for other_group, group_edge_types in neighbor_info[ + next(iter(group_set)) + ].items(): + if group_edge_types: + target_supernode = node_label_lookup[other_group] + summary_graph_edge = (source_supernode, target_supernode) + + edge_types = [ + dict(zip(edge_attributes, edge_type)) + for edge_type in group_edge_types + ] + + has_edge = output.has_edge(*summary_graph_edge) + if output.is_multigraph(): + if not has_edge: + for edge_type in edge_types: + output.add_edge(*summary_graph_edge, **edge_type) + elif not output.is_directed(): + existing_edge_data = output.get_edge_data(*summary_graph_edge) + for edge_type in edge_types: + if edge_type not in existing_edge_data.values(): + output.add_edge(*summary_graph_edge, **edge_type) + else: + superedge_attributes = {superedge_attribute: edge_types} + output.add_edge(*summary_graph_edge, **superedge_attributes) + + return output + + +def _snap_eligible_group(G, groups, group_lookup, edge_types): + """ + Determines if a group is eligible to be split. + + A group is eligible to be split if all nodes in the group have edges of the same type(s) + with the same other groups. + + Parameters + ---------- + G: graph + graph to be summarized + groups: dict + A dictionary of unique group IDs and their corresponding node groups + group_lookup: dict + dictionary of nodes and their current corresponding group ID + edge_types: dict + dictionary of edges in the graph and their corresponding attributes recognized + in the summarization + + Returns + ------- + tuple: group ID to split, and neighbor-groups participation_counts data structure + """ + nbr_info = {node: {gid: Counter() for gid in groups} for node in group_lookup} + for group_id in groups: + current_group = groups[group_id] + + # build nbr_info for nodes in group + for node in current_group: + nbr_info[node] = {group_id: Counter() for group_id in groups} + edges = G.edges(node, keys=True) if G.is_multigraph() else G.edges(node) + for edge in edges: + neighbor = edge[1] + edge_type = edge_types[edge] + neighbor_group_id = group_lookup[neighbor] + nbr_info[node][neighbor_group_id][edge_type] += 1 + + # check if group_id is eligible to be split + group_size = len(current_group) + for other_group_id in groups: + edge_counts = Counter() + for node in current_group: + edge_counts.update(nbr_info[node][other_group_id].keys()) + + if not all(count == group_size for count in edge_counts.values()): + # only the nbr_info of the returned group_id is required for handling group splits + return group_id, nbr_info + + # if no eligible groups, complete nbr_info is calculated + return None, nbr_info + + +def _snap_split(groups, neighbor_info, group_lookup, group_id): + """ + Splits a group based on edge types and updates the groups accordingly + + Splits the group with the given group_id based on the edge types + of the nodes so that each new grouping will all have the same + edges with other nodes. + + Parameters + ---------- + groups: dict + A dictionary of unique group IDs and their corresponding node groups + neighbor_info: dict + A data structure indicating the number of edges a node has with the + groups in the current summarization of each edge type + edge_types: dict + dictionary of edges in the graph and their corresponding attributes recognized + in the summarization + group_lookup: dict + dictionary of nodes and their current corresponding group ID + group_id: object + ID of group to be split + + Returns + ------- + dict + The updated groups based on the split + """ + new_group_mappings = defaultdict(set) + for node in groups[group_id]: + signature = tuple( + frozenset(edge_types) for edge_types in neighbor_info[node].values() + ) + new_group_mappings[signature].add(node) + + # leave the biggest new_group as the original group + new_groups = sorted(new_group_mappings.values(), key=len) + for new_group in new_groups[:-1]: + # Assign unused integer as the new_group_id + # ids are tuples, so will not interact with the original group_ids + new_group_id = len(groups) + groups[new_group_id] = new_group + groups[group_id] -= new_group + for node in new_group: + group_lookup[node] = new_group_id + + return groups + + +@nx._dispatchable( + node_attrs="[node_attributes]", edge_attrs="[edge_attributes]", returns_graph=True +) +def snap_aggregation( + G, + node_attributes, + edge_attributes=(), + prefix="Supernode-", + supernode_attribute="group", + superedge_attribute="types", +): + """Creates a summary graph based on attributes and connectivity. + + This function uses the Summarization by Grouping Nodes on Attributes + and Pairwise edges (SNAP) algorithm for summarizing a given + graph by grouping nodes by node attributes and their edge attributes + into supernodes in a summary graph. This name SNAP should not be + confused with the Stanford Network Analysis Project (SNAP). + + Here is a high-level view of how this algorithm works: + + 1) Group nodes by node attribute values. + + 2) Iteratively split groups until all nodes in each group have edges + to nodes in the same groups. That is, until all the groups are homogeneous + in their member nodes' edges to other groups. For example, + if all the nodes in group A only have edge to nodes in group B, then the + group is homogeneous and does not need to be split. If all nodes in group B + have edges with nodes in groups {A, C}, but some also have edges with other + nodes in B, then group B is not homogeneous and needs to be split into + groups have edges with {A, C} and a group of nodes having + edges with {A, B, C}. This way, viewers of the summary graph can + assume that all nodes in the group have the exact same node attributes and + the exact same edges. + + 3) Build the output summary graph, where the groups are represented by + super-nodes. Edges represent the edges shared between all the nodes in each + respective groups. + + A SNAP summary graph can be used to visualize graphs that are too large to display + or visually analyze, or to efficiently identify sets of similar nodes with similar connectivity + patterns to other sets of similar nodes based on specified node and/or edge attributes in a graph. + + Parameters + ---------- + G: graph + Networkx Graph to be summarized + node_attributes: iterable, required + An iterable of the node attributes used to group nodes in the summarization process. Nodes + with the same values for these attributes will be grouped together in the summary graph. + edge_attributes: iterable, optional + An iterable of the edge attributes considered in the summarization process. If provided, unique + combinations of the attribute values found in the graph are used to + determine the edge types in the graph. If not provided, all edges + are considered to be of the same type. + prefix: str + The prefix used to denote supernodes in the summary graph. Defaults to 'Supernode-'. + supernode_attribute: str + The node attribute for recording the supernode groupings of nodes. Defaults to 'group'. + superedge_attribute: str + The edge attribute for recording the edge types of multiple edges. Defaults to 'types'. + + Returns + ------- + networkx.Graph: summary graph + + Examples + -------- + SNAP aggregation takes a graph and summarizes it in the context of user-provided + node and edge attributes such that a viewer can more easily extract and + analyze the information represented by the graph + + >>> nodes = { + ... "A": dict(color="Red"), + ... "B": dict(color="Red"), + ... "C": dict(color="Red"), + ... "D": dict(color="Red"), + ... "E": dict(color="Blue"), + ... "F": dict(color="Blue"), + ... } + >>> edges = [ + ... ("A", "E", "Strong"), + ... ("B", "F", "Strong"), + ... ("C", "E", "Weak"), + ... ("D", "F", "Weak"), + ... ] + >>> G = nx.Graph() + >>> for node in nodes: + ... attributes = nodes[node] + ... G.add_node(node, **attributes) + >>> for source, target, type in edges: + ... G.add_edge(source, target, type=type) + >>> node_attributes = ("color",) + >>> edge_attributes = ("type",) + >>> summary_graph = nx.snap_aggregation( + ... G, node_attributes=node_attributes, edge_attributes=edge_attributes + ... ) + + Notes + ----- + The summary graph produced is called a maximum Attribute-edge + compatible (AR-compatible) grouping. According to [1]_, an + AR-compatible grouping means that all nodes in each group have the same + exact node attribute values and the same exact edges and + edge types to one or more nodes in the same groups. The maximal + AR-compatible grouping is the grouping with the minimal cardinality. + + The AR-compatible grouping is the most detailed grouping provided by + any of the SNAP algorithms. + + References + ---------- + .. [1] Y. Tian, R. A. Hankins, and J. M. Patel. Efficient aggregation + for graph summarization. In Proc. 2008 ACM-SIGMOD Int. Conf. + Management of Data (SIGMOD’08), pages 567–580, Vancouver, Canada, + June 2008. + """ + edge_types = { + edge: tuple(attrs.get(attr) for attr in edge_attributes) + for edge, attrs in G.edges.items() + } + if not G.is_directed(): + if G.is_multigraph(): + # list is needed to avoid mutating while iterating + edges = [((v, u, k), etype) for (u, v, k), etype in edge_types.items()] + else: + # list is needed to avoid mutating while iterating + edges = [((v, u), etype) for (u, v), etype in edge_types.items()] + edge_types.update(edges) + + group_lookup = { + node: tuple(attrs[attr] for attr in node_attributes) + for node, attrs in G.nodes.items() + } + groups = defaultdict(set) + for node, node_type in group_lookup.items(): + groups[node_type].add(node) + + eligible_group_id, nbr_info = _snap_eligible_group( + G, groups, group_lookup, edge_types + ) + while eligible_group_id: + groups = _snap_split(groups, nbr_info, group_lookup, eligible_group_id) + eligible_group_id, nbr_info = _snap_eligible_group( + G, groups, group_lookup, edge_types + ) + return _snap_build_graph( + G, + groups, + node_attributes, + edge_attributes, + nbr_info, + edge_types, + prefix, + supernode_attribute, + superedge_attribute, + ) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/swap.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/swap.py new file mode 100644 index 0000000000000000000000000000000000000000..cb3cc1c0e75c375ae49976e21fcccf2dc6c76231 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/swap.py @@ -0,0 +1,406 @@ +"""Swap edges in a graph.""" + +import math + +import networkx as nx +from networkx.utils import py_random_state + +__all__ = ["double_edge_swap", "connected_double_edge_swap", "directed_edge_swap"] + + +@nx.utils.not_implemented_for("undirected") +@py_random_state(3) +@nx._dispatchable(mutates_input=True, returns_graph=True) +def directed_edge_swap(G, *, nswap=1, max_tries=100, seed=None): + """Swap three edges in a directed graph while keeping the node degrees fixed. + + A directed edge swap swaps three edges such that a -> b -> c -> d becomes + a -> c -> b -> d. This pattern of swapping allows all possible states with the + same in- and out-degree distribution in a directed graph to be reached. + + If the swap would create parallel edges (e.g. if a -> c already existed in the + previous example), another attempt is made to find a suitable trio of edges. + + Parameters + ---------- + G : DiGraph + A directed graph + + nswap : integer (optional, default=1) + Number of three-edge (directed) swaps to perform + + max_tries : integer (optional, default=100) + Maximum number of attempts to swap edges + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + G : DiGraph + The graph after the edges are swapped. + + Raises + ------ + NetworkXError + If `G` is not directed, or + If nswap > max_tries, or + If there are fewer than 4 nodes or 3 edges in `G`. + NetworkXAlgorithmError + If the number of swap attempts exceeds `max_tries` before `nswap` swaps are made + + Notes + ----- + Does not enforce any connectivity constraints. + + The graph G is modified in place. + + A later swap is allowed to undo a previous swap. + + References + ---------- + .. [1] Erdős, Péter L., et al. “A Simple Havel-Hakimi Type Algorithm to Realize + Graphical Degree Sequences of Directed Graphs.” ArXiv:0905.4913 [Math], + Jan. 2010. https://doi.org/10.48550/arXiv.0905.4913. + Published 2010 in Elec. J. Combinatorics (17(1)). R66. + http://www.combinatorics.org/Volume_17/PDF/v17i1r66.pdf + .. [2] “Combinatorics - Reaching All Possible Simple Directed Graphs with a given + Degree Sequence with 2-Edge Swaps.” Mathematics Stack Exchange, + https://math.stackexchange.com/questions/22272/. Accessed 30 May 2022. + """ + if nswap > max_tries: + raise nx.NetworkXError("Number of swaps > number of tries allowed.") + if len(G) < 4: + raise nx.NetworkXError("DiGraph has fewer than four nodes.") + if len(G.edges) < 3: + raise nx.NetworkXError("DiGraph has fewer than 3 edges") + + # Instead of choosing uniformly at random from a generated edge list, + # this algorithm chooses nonuniformly from the set of nodes with + # probability weighted by degree. + tries = 0 + swapcount = 0 + keys, degrees = zip(*G.degree()) # keys, degree + cdf = nx.utils.cumulative_distribution(degrees) # cdf of degree + discrete_sequence = nx.utils.discrete_sequence + + while swapcount < nswap: + # choose source node index from discrete distribution + start_index = discrete_sequence(1, cdistribution=cdf, seed=seed)[0] + start = keys[start_index] + tries += 1 + + if tries > max_tries: + msg = f"Maximum number of swap attempts ({tries}) exceeded before desired swaps achieved ({nswap})." + raise nx.NetworkXAlgorithmError(msg) + + # If the given node doesn't have any out edges, then there isn't anything to swap + if G.out_degree(start) == 0: + continue + second = seed.choice(list(G.succ[start])) + if start == second: + continue + + if G.out_degree(second) == 0: + continue + third = seed.choice(list(G.succ[second])) + if second == third: + continue + + if G.out_degree(third) == 0: + continue + fourth = seed.choice(list(G.succ[third])) + if third == fourth: + continue + + if ( + third not in G.succ[start] + and fourth not in G.succ[second] + and second not in G.succ[third] + ): + # Swap nodes + G.add_edge(start, third) + G.add_edge(third, second) + G.add_edge(second, fourth) + G.remove_edge(start, second) + G.remove_edge(second, third) + G.remove_edge(third, fourth) + swapcount += 1 + + return G + + +@py_random_state(3) +@nx._dispatchable(mutates_input=True, returns_graph=True) +def double_edge_swap(G, nswap=1, max_tries=100, seed=None): + """Swap two edges in the graph while keeping the node degrees fixed. + + A double-edge swap removes two randomly chosen edges u-v and x-y + and creates the new edges u-x and v-y:: + + u--v u v + becomes | | + x--y x y + + If either the edge u-x or v-y already exist no swap is performed + and another attempt is made to find a suitable edge pair. + + Parameters + ---------- + G : graph + An undirected graph + + nswap : integer (optional, default=1) + Number of double-edge swaps to perform + + max_tries : integer (optional) + Maximum number of attempts to swap edges + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + G : graph + The graph after double edge swaps. + + Raises + ------ + NetworkXError + If `G` is directed, or + If `nswap` > `max_tries`, or + If there are fewer than 4 nodes or 2 edges in `G`. + NetworkXAlgorithmError + If the number of swap attempts exceeds `max_tries` before `nswap` swaps are made + + Notes + ----- + Does not enforce any connectivity constraints. + + The graph G is modified in place. + """ + if G.is_directed(): + raise nx.NetworkXError( + "double_edge_swap() not defined for directed graphs. Use directed_edge_swap instead." + ) + if nswap > max_tries: + raise nx.NetworkXError("Number of swaps > number of tries allowed.") + if len(G) < 4: + raise nx.NetworkXError("Graph has fewer than four nodes.") + if len(G.edges) < 2: + raise nx.NetworkXError("Graph has fewer than 2 edges") + # Instead of choosing uniformly at random from a generated edge list, + # this algorithm chooses nonuniformly from the set of nodes with + # probability weighted by degree. + n = 0 + swapcount = 0 + keys, degrees = zip(*G.degree()) # keys, degree + cdf = nx.utils.cumulative_distribution(degrees) # cdf of degree + discrete_sequence = nx.utils.discrete_sequence + while swapcount < nswap: + # if random.random() < 0.5: continue # trick to avoid periodicities? + # pick two random edges without creating edge list + # choose source node indices from discrete distribution + (ui, xi) = discrete_sequence(2, cdistribution=cdf, seed=seed) + if ui == xi: + continue # same source, skip + u = keys[ui] # convert index to label + x = keys[xi] + # choose target uniformly from neighbors + v = seed.choice(list(G[u])) + y = seed.choice(list(G[x])) + if v == y: + continue # same target, skip + if (x not in G[u]) and (y not in G[v]): # don't create parallel edges + G.add_edge(u, x) + G.add_edge(v, y) + G.remove_edge(u, v) + G.remove_edge(x, y) + swapcount += 1 + if n >= max_tries: + e = ( + f"Maximum number of swap attempts ({n}) exceeded " + f"before desired swaps achieved ({nswap})." + ) + raise nx.NetworkXAlgorithmError(e) + n += 1 + return G + + +@py_random_state(3) +@nx._dispatchable(mutates_input=True) +def connected_double_edge_swap(G, nswap=1, _window_threshold=3, seed=None): + """Attempts the specified number of double-edge swaps in the graph `G`. + + A double-edge swap removes two randomly chosen edges `(u, v)` and `(x, + y)` and creates the new edges `(u, x)` and `(v, y)`:: + + u--v u v + becomes | | + x--y x y + + If either `(u, x)` or `(v, y)` already exist, then no swap is performed + so the actual number of swapped edges is always *at most* `nswap`. + + Parameters + ---------- + G : graph + An undirected graph + + nswap : integer (optional, default=1) + Number of double-edge swaps to perform + + _window_threshold : integer + + The window size below which connectedness of the graph will be checked + after each swap. + + The "window" in this function is a dynamically updated integer that + represents the number of swap attempts to make before checking if the + graph remains connected. It is an optimization used to decrease the + running time of the algorithm in exchange for increased complexity of + implementation. + + If the window size is below this threshold, then the algorithm checks + after each swap if the graph remains connected by checking if there is a + path joining the two nodes whose edge was just removed. If the window + size is above this threshold, then the algorithm performs do all the + swaps in the window and only then check if the graph is still connected. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + int + The number of successful swaps + + Raises + ------ + + NetworkXError + + If the input graph is not connected, or if the graph has fewer than four + nodes. + + Notes + ----- + + The initial graph `G` must be connected, and the resulting graph is + connected. The graph `G` is modified in place. + + References + ---------- + .. [1] C. Gkantsidis and M. Mihail and E. Zegura, + The Markov chain simulation method for generating connected + power law random graphs, 2003. + http://citeseer.ist.psu.edu/gkantsidis03markov.html + """ + if not nx.is_connected(G): + raise nx.NetworkXError("Graph not connected") + if len(G) < 4: + raise nx.NetworkXError("Graph has fewer than four nodes.") + n = 0 + swapcount = 0 + deg = G.degree() + # Label key for nodes + dk = [n for n, d in G.degree()] + cdf = nx.utils.cumulative_distribution([d for n, d in G.degree()]) + discrete_sequence = nx.utils.discrete_sequence + window = 1 + while n < nswap: + wcount = 0 + swapped = [] + # If the window is small, we just check each time whether the graph is + # connected by checking if the nodes that were just separated are still + # connected. + if window < _window_threshold: + # This Boolean keeps track of whether there was a failure or not. + fail = False + while wcount < window and n < nswap: + # Pick two random edges without creating the edge list. Choose + # source nodes from the discrete degree distribution. + (ui, xi) = discrete_sequence(2, cdistribution=cdf, seed=seed) + # If the source nodes are the same, skip this pair. + if ui == xi: + continue + # Convert an index to a node label. + u = dk[ui] + x = dk[xi] + # Choose targets uniformly from neighbors. + v = seed.choice(list(G.neighbors(u))) + y = seed.choice(list(G.neighbors(x))) + # If the target nodes are the same, skip this pair. + if v == y: + continue + if x not in G[u] and y not in G[v]: + G.remove_edge(u, v) + G.remove_edge(x, y) + G.add_edge(u, x) + G.add_edge(v, y) + swapped.append((u, v, x, y)) + swapcount += 1 + n += 1 + # If G remains connected... + if nx.has_path(G, u, v): + wcount += 1 + # Otherwise, undo the changes. + else: + G.add_edge(u, v) + G.add_edge(x, y) + G.remove_edge(u, x) + G.remove_edge(v, y) + swapcount -= 1 + fail = True + # If one of the swaps failed, reduce the window size. + if fail: + window = math.ceil(window / 2) + else: + window += 1 + # If the window is large, then there is a good chance that a bunch of + # swaps will work. It's quicker to do all those swaps first and then + # check if the graph remains connected. + else: + while wcount < window and n < nswap: + # Pick two random edges without creating the edge list. Choose + # source nodes from the discrete degree distribution. + (ui, xi) = discrete_sequence(2, cdistribution=cdf, seed=seed) + # If the source nodes are the same, skip this pair. + if ui == xi: + continue + # Convert an index to a node label. + u = dk[ui] + x = dk[xi] + # Choose targets uniformly from neighbors. + v = seed.choice(list(G.neighbors(u))) + y = seed.choice(list(G.neighbors(x))) + # If the target nodes are the same, skip this pair. + if v == y: + continue + if x not in G[u] and y not in G[v]: + G.remove_edge(u, v) + G.remove_edge(x, y) + G.add_edge(u, x) + G.add_edge(v, y) + swapped.append((u, v, x, y)) + swapcount += 1 + n += 1 + wcount += 1 + # If the graph remains connected, increase the window size. + if nx.is_connected(G): + window += 1 + # Otherwise, undo the changes from the previous window and decrease + # the window size. + else: + while swapped: + (u, v, x, y) = swapped.pop() + G.add_edge(u, v) + G.add_edge(x, y) + G.remove_edge(u, x) + G.remove_edge(v, y) + swapcount -= 1 + window = math.ceil(window / 2) + return swapcount diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/threshold.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/threshold.py new file mode 100644 index 0000000000000000000000000000000000000000..e8fb8efedb589f8ddda28dbe05ac148d01fc32d7 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/threshold.py @@ -0,0 +1,980 @@ +""" +Threshold Graphs - Creation, manipulation and identification. +""" + +from math import sqrt + +import networkx as nx +from networkx.utils import py_random_state + +__all__ = ["is_threshold_graph", "find_threshold_graph"] + + +@nx._dispatchable +def is_threshold_graph(G): + """ + Returns `True` if `G` is a threshold graph. + + Parameters + ---------- + G : NetworkX graph instance + An instance of `Graph`, `DiGraph`, `MultiGraph` or `MultiDiGraph` + + Returns + ------- + bool + `True` if `G` is a threshold graph, `False` otherwise. + + Examples + -------- + >>> from networkx.algorithms.threshold import is_threshold_graph + >>> G = nx.path_graph(3) + >>> is_threshold_graph(G) + True + >>> G = nx.barbell_graph(3, 3) + >>> is_threshold_graph(G) + False + + References + ---------- + .. [1] Threshold graphs: https://en.wikipedia.org/wiki/Threshold_graph + """ + return is_threshold_sequence([d for n, d in G.degree()]) + + +def is_threshold_sequence(degree_sequence): + """ + Returns True if the sequence is a threshold degree sequence. + + Uses the property that a threshold graph must be constructed by + adding either dominating or isolated nodes. Thus, it can be + deconstructed iteratively by removing a node of degree zero or a + node that connects to the remaining nodes. If this deconstruction + fails then the sequence is not a threshold sequence. + """ + ds = degree_sequence[:] # get a copy so we don't destroy original + ds.sort() + while ds: + if ds[0] == 0: # if isolated node + ds.pop(0) # remove it + continue + if ds[-1] != len(ds) - 1: # is the largest degree node dominating? + return False # no, not a threshold degree sequence + ds.pop() # yes, largest is the dominating node + ds = [d - 1 for d in ds] # remove it and decrement all degrees + return True + + +def creation_sequence(degree_sequence, with_labels=False, compact=False): + """ + Determines the creation sequence for the given threshold degree sequence. + + The creation sequence is a list of single characters 'd' + or 'i': 'd' for dominating or 'i' for isolated vertices. + Dominating vertices are connected to all vertices present when it + is added. The first node added is by convention 'd'. + This list can be converted to a string if desired using "".join(cs) + + If with_labels==True: + Returns a list of 2-tuples containing the vertex number + and a character 'd' or 'i' which describes the type of vertex. + + If compact==True: + Returns the creation sequence in a compact form that is the number + of 'i's and 'd's alternating. + Examples: + [1,2,2,3] represents d,i,i,d,d,i,i,i + [3,1,2] represents d,d,d,i,d,d + + Notice that the first number is the first vertex to be used for + construction and so is always 'd'. + + with_labels and compact cannot both be True. + + Returns None if the sequence is not a threshold sequence + """ + if with_labels and compact: + raise ValueError("compact sequences cannot be labeled") + + # make an indexed copy + if isinstance(degree_sequence, dict): # labeled degree sequence + ds = [[degree, label] for (label, degree) in degree_sequence.items()] + else: + ds = [[d, i] for i, d in enumerate(degree_sequence)] + ds.sort() + cs = [] # creation sequence + while ds: + if ds[0][0] == 0: # isolated node + (d, v) = ds.pop(0) + if len(ds) > 0: # make sure we start with a d + cs.insert(0, (v, "i")) + else: + cs.insert(0, (v, "d")) + continue + if ds[-1][0] != len(ds) - 1: # Not dominating node + return None # not a threshold degree sequence + (d, v) = ds.pop() + cs.insert(0, (v, "d")) + ds = [[d[0] - 1, d[1]] for d in ds] # decrement due to removing node + + if with_labels: + return cs + if compact: + return make_compact(cs) + return [v[1] for v in cs] # not labeled + + +def make_compact(creation_sequence): + """ + Returns the creation sequence in a compact form + that is the number of 'i's and 'd's alternating. + + Examples + -------- + >>> from networkx.algorithms.threshold import make_compact + >>> make_compact(["d", "i", "i", "d", "d", "i", "i", "i"]) + [1, 2, 2, 3] + >>> make_compact(["d", "d", "d", "i", "d", "d"]) + [3, 1, 2] + + Notice that the first number is the first vertex + to be used for construction and so is always 'd'. + + Labeled creation sequences lose their labels in the + compact representation. + + >>> make_compact([3, 1, 2]) + [3, 1, 2] + """ + first = creation_sequence[0] + if isinstance(first, str): # creation sequence + cs = creation_sequence[:] + elif isinstance(first, tuple): # labeled creation sequence + cs = [s[1] for s in creation_sequence] + elif isinstance(first, int): # compact creation sequence + return creation_sequence + else: + raise TypeError("Not a valid creation sequence type") + + ccs = [] + count = 1 # count the run lengths of d's or i's. + for i in range(1, len(cs)): + if cs[i] == cs[i - 1]: + count += 1 + else: + ccs.append(count) + count = 1 + ccs.append(count) # don't forget the last one + return ccs + + +def uncompact(creation_sequence): + """ + Converts a compact creation sequence for a threshold + graph to a standard creation sequence (unlabeled). + If the creation_sequence is already standard, return it. + See creation_sequence. + """ + first = creation_sequence[0] + if isinstance(first, str): # creation sequence + return creation_sequence + elif isinstance(first, tuple): # labeled creation sequence + return creation_sequence + elif isinstance(first, int): # compact creation sequence + ccscopy = creation_sequence[:] + else: + raise TypeError("Not a valid creation sequence type") + cs = [] + while ccscopy: + cs.extend(ccscopy.pop(0) * ["d"]) + if ccscopy: + cs.extend(ccscopy.pop(0) * ["i"]) + return cs + + +def creation_sequence_to_weights(creation_sequence): + """ + Returns a list of node weights which create the threshold + graph designated by the creation sequence. The weights + are scaled so that the threshold is 1.0. The order of the + nodes is the same as that in the creation sequence. + """ + # Turn input sequence into a labeled creation sequence + first = creation_sequence[0] + if isinstance(first, str): # creation sequence + if isinstance(creation_sequence, list): + wseq = creation_sequence[:] + else: + wseq = list(creation_sequence) # string like 'ddidid' + elif isinstance(first, tuple): # labeled creation sequence + wseq = [v[1] for v in creation_sequence] + elif isinstance(first, int): # compact creation sequence + wseq = uncompact(creation_sequence) + else: + raise TypeError("Not a valid creation sequence type") + # pass through twice--first backwards + wseq.reverse() + w = 0 + prev = "i" + for j, s in enumerate(wseq): + if s == "i": + wseq[j] = w + prev = s + elif prev == "i": + prev = s + w += 1 + wseq.reverse() # now pass through forwards + for j, s in enumerate(wseq): + if s == "d": + wseq[j] = w + prev = s + elif prev == "d": + prev = s + w += 1 + # Now scale weights + if prev == "d": + w += 1 + wscale = 1 / w + return [ww * wscale for ww in wseq] + # return wseq + + +def weights_to_creation_sequence( + weights, threshold=1, with_labels=False, compact=False +): + """ + Returns a creation sequence for a threshold graph + determined by the weights and threshold given as input. + If the sum of two node weights is greater than the + threshold value, an edge is created between these nodes. + + The creation sequence is a list of single characters 'd' + or 'i': 'd' for dominating or 'i' for isolated vertices. + Dominating vertices are connected to all vertices present + when it is added. The first node added is by convention 'd'. + + If with_labels==True: + Returns a list of 2-tuples containing the vertex number + and a character 'd' or 'i' which describes the type of vertex. + + If compact==True: + Returns the creation sequence in a compact form that is the number + of 'i's and 'd's alternating. + Examples: + [1,2,2,3] represents d,i,i,d,d,i,i,i + [3,1,2] represents d,d,d,i,d,d + + Notice that the first number is the first vertex to be used for + construction and so is always 'd'. + + with_labels and compact cannot both be True. + """ + if with_labels and compact: + raise ValueError("compact sequences cannot be labeled") + + # make an indexed copy + if isinstance(weights, dict): # labeled weights + wseq = [[w, label] for (label, w) in weights.items()] + else: + wseq = [[w, i] for i, w in enumerate(weights)] + wseq.sort() + cs = [] # creation sequence + cutoff = threshold - wseq[-1][0] + while wseq: + if wseq[0][0] < cutoff: # isolated node + (w, label) = wseq.pop(0) + cs.append((label, "i")) + else: + (w, label) = wseq.pop() + cs.append((label, "d")) + cutoff = threshold - wseq[-1][0] + if len(wseq) == 1: # make sure we start with a d + (w, label) = wseq.pop() + cs.append((label, "d")) + # put in correct order + cs.reverse() + + if with_labels: + return cs + if compact: + return make_compact(cs) + return [v[1] for v in cs] # not labeled + + +# Manipulating NetworkX.Graphs in context of threshold graphs +@nx._dispatchable(graphs=None, returns_graph=True) +def threshold_graph(creation_sequence, create_using=None): + """ + Create a threshold graph from the creation sequence or compact + creation_sequence. + + The input sequence can be a + + creation sequence (e.g. ['d','i','d','d','d','i']) + labeled creation sequence (e.g. [(0,'d'),(2,'d'),(1,'i')]) + compact creation sequence (e.g. [2,1,1,2,0]) + + Use cs=creation_sequence(degree_sequence,labeled=True) + to convert a degree sequence to a creation sequence. + + Returns None if the sequence is not valid + """ + # Turn input sequence into a labeled creation sequence + first = creation_sequence[0] + if isinstance(first, str): # creation sequence + ci = list(enumerate(creation_sequence)) + elif isinstance(first, tuple): # labeled creation sequence + ci = creation_sequence[:] + elif isinstance(first, int): # compact creation sequence + cs = uncompact(creation_sequence) + ci = list(enumerate(cs)) + else: + print("not a valid creation sequence type") + return None + + G = nx.empty_graph(0, create_using) + if G.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + + G.name = "Threshold Graph" + + # add nodes and edges + # if type is 'i' just add nodea + # if type is a d connect to everything previous + while ci: + (v, node_type) = ci.pop(0) + if node_type == "d": # dominating type, connect to all existing nodes + # We use `for u in list(G):` instead of + # `for u in G:` because we edit the graph `G` in + # the loop. Hence using an iterator will result in + # `RuntimeError: dictionary changed size during iteration` + for u in list(G): + G.add_edge(v, u) + G.add_node(v) + return G + + +@nx._dispatchable +def find_alternating_4_cycle(G): + """ + Returns False if there aren't any alternating 4 cycles. + Otherwise returns the cycle as [a,b,c,d] where (a,b) + and (c,d) are edges and (a,c) and (b,d) are not. + """ + for u, v in G.edges(): + for w in G.nodes(): + if not G.has_edge(u, w) and u != w: + for x in G.neighbors(w): + if not G.has_edge(v, x) and v != x: + return [u, v, w, x] + return False + + +@nx._dispatchable(returns_graph=True) +def find_threshold_graph(G, create_using=None): + """ + Returns a threshold subgraph that is close to largest in `G`. + + The threshold graph will contain the largest degree node in G. + + Parameters + ---------- + G : NetworkX graph instance + An instance of `Graph`, or `MultiDiGraph` + create_using : NetworkX graph class or `None` (default), optional + Type of graph to use when constructing the threshold graph. + If `None`, infer the appropriate graph type from the input. + + Returns + ------- + graph : + A graph instance representing the threshold graph + + Examples + -------- + >>> from networkx.algorithms.threshold import find_threshold_graph + >>> G = nx.barbell_graph(3, 3) + >>> T = find_threshold_graph(G) + >>> T.nodes # may vary + NodeView((7, 8, 5, 6)) + + References + ---------- + .. [1] Threshold graphs: https://en.wikipedia.org/wiki/Threshold_graph + """ + return threshold_graph(find_creation_sequence(G), create_using) + + +@nx._dispatchable +def find_creation_sequence(G): + """ + Find a threshold subgraph that is close to largest in G. + Returns the labeled creation sequence of that threshold graph. + """ + cs = [] + # get a local pointer to the working part of the graph + H = G + while H.order() > 0: + # get new degree sequence on subgraph + dsdict = dict(H.degree()) + ds = [(d, v) for v, d in dsdict.items()] + ds.sort() + # Update threshold graph nodes + if ds[-1][0] == 0: # all are isolated + cs.extend(zip(dsdict, ["i"] * (len(ds) - 1) + ["d"])) + break # Done! + # pull off isolated nodes + while ds[0][0] == 0: + (d, iso) = ds.pop(0) + cs.append((iso, "i")) + # find new biggest node + (d, bigv) = ds.pop() + # add edges of star to t_g + cs.append((bigv, "d")) + # form subgraph of neighbors of big node + H = H.subgraph(H.neighbors(bigv)) + cs.reverse() + return cs + + +# Properties of Threshold Graphs +def triangles(creation_sequence): + """ + Compute number of triangles in the threshold graph with the + given creation sequence. + """ + # shortcut algorithm that doesn't require computing number + # of triangles at each node. + cs = creation_sequence # alias + dr = cs.count("d") # number of d's in sequence + ntri = dr * (dr - 1) * (dr - 2) / 6 # number of triangles in clique of nd d's + # now add dr choose 2 triangles for every 'i' in sequence where + # dr is the number of d's to the right of the current i + for i, typ in enumerate(cs): + if typ == "i": + ntri += dr * (dr - 1) / 2 + else: + dr -= 1 + return ntri + + +def triangle_sequence(creation_sequence): + """ + Return triangle sequence for the given threshold graph creation sequence. + + """ + cs = creation_sequence + seq = [] + dr = cs.count("d") # number of d's to the right of the current pos + dcur = (dr - 1) * (dr - 2) // 2 # number of triangles through a node of clique dr + irun = 0 # number of i's in the last run + drun = 0 # number of d's in the last run + for i, sym in enumerate(cs): + if sym == "d": + drun += 1 + tri = dcur + (dr - 1) * irun # new triangles at this d + else: # cs[i]="i": + if prevsym == "d": # new string of i's + dcur += (dr - 1) * irun # accumulate shared shortest paths + irun = 0 # reset i run counter + dr -= drun # reduce number of d's to right + drun = 0 # reset d run counter + irun += 1 + tri = dr * (dr - 1) // 2 # new triangles at this i + seq.append(tri) + prevsym = sym + return seq + + +def cluster_sequence(creation_sequence): + """ + Return cluster sequence for the given threshold graph creation sequence. + """ + triseq = triangle_sequence(creation_sequence) + degseq = degree_sequence(creation_sequence) + cseq = [] + for i, deg in enumerate(degseq): + tri = triseq[i] + if deg <= 1: # isolated vertex or single pair gets cc 0 + cseq.append(0) + continue + max_size = (deg * (deg - 1)) // 2 + cseq.append(tri / max_size) + return cseq + + +def degree_sequence(creation_sequence): + """ + Return degree sequence for the threshold graph with the given + creation sequence + """ + cs = creation_sequence # alias + seq = [] + rd = cs.count("d") # number of d to the right + for i, sym in enumerate(cs): + if sym == "d": + rd -= 1 + seq.append(rd + i) + else: + seq.append(rd) + return seq + + +def density(creation_sequence): + """ + Return the density of the graph with this creation_sequence. + The density is the fraction of possible edges present. + """ + N = len(creation_sequence) + two_size = sum(degree_sequence(creation_sequence)) + two_possible = N * (N - 1) + den = two_size / two_possible + return den + + +def degree_correlation(creation_sequence): + """ + Return the degree-degree correlation over all edges. + """ + cs = creation_sequence + s1 = 0 # deg_i*deg_j + s2 = 0 # deg_i^2+deg_j^2 + s3 = 0 # deg_i+deg_j + m = 0 # number of edges + rd = cs.count("d") # number of d nodes to the right + rdi = [i for i, sym in enumerate(cs) if sym == "d"] # index of "d"s + ds = degree_sequence(cs) + for i, sym in enumerate(cs): + if sym == "d": + if i != rdi[0]: + print("Logic error in degree_correlation", i, rdi) + raise ValueError + rdi.pop(0) + degi = ds[i] + for dj in rdi: + degj = ds[dj] + s1 += degj * degi + s2 += degi**2 + degj**2 + s3 += degi + degj + m += 1 + denom = 2 * m * s2 - s3 * s3 + numer = 4 * m * s1 - s3 * s3 + if denom == 0: + if numer == 0: + return 1 + raise ValueError(f"Zero Denominator but Numerator is {numer}") + return numer / denom + + +def shortest_path(creation_sequence, u, v): + """ + Find the shortest path between u and v in a + threshold graph G with the given creation_sequence. + + For an unlabeled creation_sequence, the vertices + u and v must be integers in (0,len(sequence)) referring + to the position of the desired vertices in the sequence. + + For a labeled creation_sequence, u and v are labels of vertices. + + Use cs=creation_sequence(degree_sequence,with_labels=True) + to convert a degree sequence to a creation sequence. + + Returns a list of vertices from u to v. + Example: if they are neighbors, it returns [u,v] + """ + # Turn input sequence into a labeled creation sequence + first = creation_sequence[0] + if isinstance(first, str): # creation sequence + cs = [(i, creation_sequence[i]) for i in range(len(creation_sequence))] + elif isinstance(first, tuple): # labeled creation sequence + cs = creation_sequence[:] + elif isinstance(first, int): # compact creation sequence + ci = uncompact(creation_sequence) + cs = [(i, ci[i]) for i in range(len(ci))] + else: + raise TypeError("Not a valid creation sequence type") + + verts = [s[0] for s in cs] + if v not in verts: + raise ValueError(f"Vertex {v} not in graph from creation_sequence") + if u not in verts: + raise ValueError(f"Vertex {u} not in graph from creation_sequence") + # Done checking + if u == v: + return [u] + + uindex = verts.index(u) + vindex = verts.index(v) + bigind = max(uindex, vindex) + if cs[bigind][1] == "d": + return [u, v] + # must be that cs[bigind][1]=='i' + cs = cs[bigind:] + while cs: + vert = cs.pop() + if vert[1] == "d": + return [u, vert[0], v] + # All after u are type 'i' so no connection + return -1 + + +def shortest_path_length(creation_sequence, i): + """ + Return the shortest path length from indicated node to + every other node for the threshold graph with the given + creation sequence. + Node is indicated by index i in creation_sequence unless + creation_sequence is labeled in which case, i is taken to + be the label of the node. + + Paths lengths in threshold graphs are at most 2. + Length to unreachable nodes is set to -1. + """ + # Turn input sequence into a labeled creation sequence + first = creation_sequence[0] + if isinstance(first, str): # creation sequence + if isinstance(creation_sequence, list): + cs = creation_sequence[:] + else: + cs = list(creation_sequence) + elif isinstance(first, tuple): # labeled creation sequence + cs = [v[1] for v in creation_sequence] + i = [v[0] for v in creation_sequence].index(i) + elif isinstance(first, int): # compact creation sequence + cs = uncompact(creation_sequence) + else: + raise TypeError("Not a valid creation sequence type") + + # Compute + N = len(cs) + spl = [2] * N # length 2 to every node + spl[i] = 0 # except self which is 0 + # 1 for all d's to the right + for j in range(i + 1, N): + if cs[j] == "d": + spl[j] = 1 + if cs[i] == "d": # 1 for all nodes to the left + for j in range(i): + spl[j] = 1 + # and -1 for any trailing i to indicate unreachable + for j in range(N - 1, 0, -1): + if cs[j] == "d": + break + spl[j] = -1 + return spl + + +def betweenness_sequence(creation_sequence, normalized=True): + """ + Return betweenness for the threshold graph with the given creation + sequence. The result is unscaled. To scale the values + to the interval [0,1] divide by (n-1)*(n-2). + """ + cs = creation_sequence + seq = [] # betweenness + lastchar = "d" # first node is always a 'd' + dr = float(cs.count("d")) # number of d's to the right of current pos + irun = 0 # number of i's in the last run + drun = 0 # number of d's in the last run + dlast = 0.0 # betweenness of last d + for i, c in enumerate(cs): + if c == "d": # cs[i]=="d": + # betweenness = amt shared with earlier d's and i's + # + new isolated nodes covered + # + new paths to all previous nodes + b = dlast + (irun - 1) * irun / dr + 2 * irun * (i - drun - irun) / dr + drun += 1 # update counter + else: # cs[i]="i": + if lastchar == "d": # if this is a new run of i's + dlast = b # accumulate betweenness + dr -= drun # update number of d's to the right + drun = 0 # reset d counter + irun = 0 # reset i counter + b = 0 # isolated nodes have zero betweenness + irun += 1 # add another i to the run + seq.append(float(b)) + lastchar = c + + # normalize by the number of possible shortest paths + if normalized: + order = len(cs) + scale = 1.0 / ((order - 1) * (order - 2)) + seq = [s * scale for s in seq] + + return seq + + +def eigenvectors(creation_sequence): + """ + Return a 2-tuple of Laplacian eigenvalues and eigenvectors + for the threshold network with creation_sequence. + The first value is a list of eigenvalues. + The second value is a list of eigenvectors. + The lists are in the same order so corresponding eigenvectors + and eigenvalues are in the same position in the two lists. + + Notice that the order of the eigenvalues returned by eigenvalues(cs) + may not correspond to the order of these eigenvectors. + """ + ccs = make_compact(creation_sequence) + N = sum(ccs) + vec = [0] * N + val = vec[:] + # get number of type d nodes to the right (all for first node) + dr = sum(ccs[::2]) + + nn = ccs[0] + vec[0] = [1.0 / sqrt(N)] * N + val[0] = 0 + e = dr + dr -= nn + type_d = True + i = 1 + dd = 1 + while dd < nn: + scale = 1.0 / sqrt(dd * dd + i) + vec[i] = i * [-scale] + [dd * scale] + [0] * (N - i - 1) + val[i] = e + i += 1 + dd += 1 + if len(ccs) == 1: + return (val, vec) + for nn in ccs[1:]: + scale = 1.0 / sqrt(nn * i * (i + nn)) + vec[i] = i * [-nn * scale] + nn * [i * scale] + [0] * (N - i - nn) + # find eigenvalue + type_d = not type_d + if type_d: + e = i + dr + dr -= nn + else: + e = dr + val[i] = e + st = i + i += 1 + dd = 1 + while dd < nn: + scale = 1.0 / sqrt(i - st + dd * dd) + vec[i] = [0] * st + (i - st) * [-scale] + [dd * scale] + [0] * (N - i - 1) + val[i] = e + i += 1 + dd += 1 + return (val, vec) + + +def spectral_projection(u, eigenpairs): + """ + Returns the coefficients of each eigenvector + in a projection of the vector u onto the normalized + eigenvectors which are contained in eigenpairs. + + eigenpairs should be a list of two objects. The + first is a list of eigenvalues and the second a list + of eigenvectors. The eigenvectors should be lists. + + There's not a lot of error checking on lengths of + arrays, etc. so be careful. + """ + coeff = [] + evect = eigenpairs[1] + for ev in evect: + c = sum(evv * uv for (evv, uv) in zip(ev, u)) + coeff.append(c) + return coeff + + +def eigenvalues(creation_sequence): + """ + Return sequence of eigenvalues of the Laplacian of the threshold + graph for the given creation_sequence. + + Based on the Ferrer's diagram method. The spectrum is integral + and is the conjugate of the degree sequence. + + See:: + + @Article{degree-merris-1994, + author = {Russel Merris}, + title = {Degree maximal graphs are Laplacian integral}, + journal = {Linear Algebra Appl.}, + year = {1994}, + volume = {199}, + pages = {381--389}, + } + + """ + degseq = degree_sequence(creation_sequence) + degseq.sort() + eiglist = [] # zero is always one eigenvalue + eig = 0 + row = len(degseq) + bigdeg = degseq.pop() + while row: + if bigdeg < row: + eiglist.append(eig) + row -= 1 + else: + eig += 1 + if degseq: + bigdeg = degseq.pop() + else: + bigdeg = 0 + return eiglist + + +# Threshold graph creation routines + + +@py_random_state(2) +def random_threshold_sequence(n, p, seed=None): + """ + Create a random threshold sequence of size n. + A creation sequence is built by randomly choosing d's with + probability p and i's with probability 1-p. + + s=nx.random_threshold_sequence(10,0.5) + + returns a threshold sequence of length 10 with equal + probably of an i or a d at each position. + + A "random" threshold graph can be built with + + G=nx.threshold_graph(s) + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + """ + if not (0 <= p <= 1): + raise ValueError("p must be in [0,1]") + + cs = ["d"] # threshold sequences always start with a d + for i in range(1, n): + if seed.random() < p: + cs.append("d") + else: + cs.append("i") + return cs + + +# maybe *_d_threshold_sequence routines should +# be (or be called from) a single routine with a more descriptive name +# and a keyword parameter? +def right_d_threshold_sequence(n, m): + """ + Create a skewed threshold graph with a given number + of vertices (n) and a given number of edges (m). + + The routine returns an unlabeled creation sequence + for the threshold graph. + + FIXME: describe algorithm + + """ + cs = ["d"] + ["i"] * (n - 1) # create sequence with n insolated nodes + + # m n * (n - 1) / 2: + raise ValueError("Too many edges for this many nodes.") + + # connected case m >n-1 + ind = n - 1 + sum = n - 1 + while sum < m: + cs[ind] = "d" + ind -= 1 + sum += ind + ind = m - (sum - ind) + cs[ind] = "d" + return cs + + +def left_d_threshold_sequence(n, m): + """ + Create a skewed threshold graph with a given number + of vertices (n) and a given number of edges (m). + + The routine returns an unlabeled creation sequence + for the threshold graph. + + FIXME: describe algorithm + + """ + cs = ["d"] + ["i"] * (n - 1) # create sequence with n insolated nodes + + # m n * (n - 1) / 2: + raise ValueError("Too many edges for this many nodes.") + + # Connected case when M>N-1 + cs[n - 1] = "d" + sum = n - 1 + ind = 1 + while sum < m: + cs[ind] = "d" + sum += ind + ind += 1 + if sum > m: # be sure not to change the first vertex + cs[sum - m] = "i" + return cs + + +@py_random_state(3) +def swap_d(cs, p_split=1.0, p_combine=1.0, seed=None): + """ + Perform a "swap" operation on a threshold sequence. + + The swap preserves the number of nodes and edges + in the graph for the given sequence. + The resulting sequence is still a threshold sequence. + + Perform one split and one combine operation on the + 'd's of a creation sequence for a threshold graph. + This operation maintains the number of nodes and edges + in the graph, but shifts the edges from node to node + maintaining the threshold quality of the graph. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + """ + # preprocess the creation sequence + dlist = [i for (i, node_type) in enumerate(cs[1:-1]) if node_type == "d"] + # split + if seed.random() < p_split: + choice = seed.choice(dlist) + split_to = seed.choice(range(choice)) + flip_side = choice - split_to + if split_to != flip_side and cs[split_to] == "i" and cs[flip_side] == "i": + cs[choice] = "i" + cs[split_to] = "d" + cs[flip_side] = "d" + dlist.remove(choice) + # don't add or combine may reverse this action + # dlist.extend([split_to,flip_side]) + # print >>sys.stderr,"split at %s to %s and %s"%(choice,split_to,flip_side) + # combine + if seed.random() < p_combine and dlist: + first_choice = seed.choice(dlist) + second_choice = seed.choice(dlist) + target = first_choice + second_choice + if target >= len(cs) or cs[target] == "d" or first_choice == second_choice: + return cs + # OK to combine + cs[first_choice] = "i" + cs[second_choice] = "i" + cs[target] = "d" + # print >>sys.stderr,"combine %s and %s to make %s."%(first_choice,second_choice,target) + + return cs diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/time_dependent.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/time_dependent.py new file mode 100644 index 0000000000000000000000000000000000000000..d67cdcf0b8eaecdef8497c77edd3144e96501173 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/time_dependent.py @@ -0,0 +1,142 @@ +"""Time dependent algorithms.""" + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["cd_index"] + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +@nx._dispatchable(node_attrs={"time": None, "weight": 1}) +def cd_index(G, node, time_delta, *, time="time", weight=None): + r"""Compute the CD index for `node` within the graph `G`. + + Calculates the CD index for the given node of the graph, + considering only its predecessors who have the `time` attribute + smaller than or equal to the `time` attribute of the `node` + plus `time_delta`. + + Parameters + ---------- + G : graph + A directed networkx graph whose nodes have `time` attributes and optionally + `weight` attributes (if a weight is not given, it is considered 1). + node : node + The node for which the CD index is calculated. + time_delta : numeric or timedelta + Amount of time after the `time` attribute of the `node`. The value of + `time_delta` must support comparison with the `time` node attribute. For + example, if the `time` attribute of the nodes are `datetime.datetime` + objects, then `time_delta` should be a `datetime.timedelta` object. + time : string (Optional, default is "time") + The name of the node attribute that will be used for the calculations. + weight : string (Optional, default is None) + The name of the node attribute used as weight. + + Returns + ------- + float + The CD index calculated for the node `node` within the graph `G`. + + Raises + ------ + NetworkXError + If not all nodes have a `time` attribute or + `time_delta` and `time` attribute types are not compatible or + `n` equals 0. + + NetworkXNotImplemented + If `G` is a non-directed graph or a multigraph. + + Examples + -------- + >>> from datetime import datetime, timedelta + >>> G = nx.DiGraph() + >>> nodes = { + ... 1: {"time": datetime(2015, 1, 1)}, + ... 2: {"time": datetime(2012, 1, 1), "weight": 4}, + ... 3: {"time": datetime(2010, 1, 1)}, + ... 4: {"time": datetime(2008, 1, 1)}, + ... 5: {"time": datetime(2014, 1, 1)}, + ... } + >>> G.add_nodes_from([(n, nodes[n]) for n in nodes]) + >>> edges = [(1, 3), (1, 4), (2, 3), (3, 4), (3, 5)] + >>> G.add_edges_from(edges) + >>> delta = timedelta(days=5 * 365) + >>> nx.cd_index(G, 3, time_delta=delta, time="time") + 0.5 + >>> nx.cd_index(G, 3, time_delta=delta, time="time", weight="weight") + 0.12 + + Integers can also be used for the time values: + >>> node_times = {1: 2015, 2: 2012, 3: 2010, 4: 2008, 5: 2014} + >>> nx.set_node_attributes(G, node_times, "new_time") + >>> nx.cd_index(G, 3, time_delta=4, time="new_time") + 0.5 + >>> nx.cd_index(G, 3, time_delta=4, time="new_time", weight="weight") + 0.12 + + Notes + ----- + This method implements the algorithm for calculating the CD index, + as described in the paper by Funk and Owen-Smith [1]_. The CD index + is used in order to check how consolidating or destabilizing a patent + is, hence the nodes of the graph represent patents and the edges show + the citations between these patents. The mathematical model is given + below: + + .. math:: + CD_{t}=\frac{1}{n_{t}}\sum_{i=1}^{n}\frac{-2f_{it}b_{it}+f_{it}}{w_{it}}, + + where `f_{it}` equals 1 if `i` cites the focal patent else 0, `b_{it}` equals + 1 if `i` cites any of the focal patents successors else 0, `n_{t}` is the number + of forward citations in `i` and `w_{it}` is a matrix of weight for patent `i` + at time `t`. + + The `datetime.timedelta` package can lead to off-by-one issues when converting + from years to days. In the example above `timedelta(days=5 * 365)` looks like + 5 years, but it isn't because of leap year days. So it gives the same result + as `timedelta(days=4 * 365)`. But using `timedelta(days=5 * 365 + 1)` gives + a 5 year delta **for this choice of years** but may not if the 5 year gap has + more than 1 leap year. To avoid these issues, use integers to represent years, + or be very careful when you convert units of time. + + References + ---------- + .. [1] Funk, Russell J., and Jason Owen-Smith. + "A dynamic network measure of technological change." + Management science 63, no. 3 (2017): 791-817. + http://russellfunk.org/cdindex/static/papers/funk_ms_2017.pdf + + """ + if not all(time in G.nodes[n] for n in G): + raise nx.NetworkXError("Not all nodes have a 'time' attribute.") + + try: + # get target_date + target_date = G.nodes[node][time] + time_delta + # keep the predecessors that existed before the target date + pred = {i for i in G.pred[node] if G.nodes[i][time] <= target_date} + except: + raise nx.NetworkXError( + "Addition and comparison are not supported between 'time_delta' " + "and 'time' types." + ) + + # -1 if any edge between node's predecessors and node's successors, else 1 + b = [-1 if any(j in G[i] for j in G[node]) else 1 for i in pred] + + # n is size of the union of the focal node's predecessors and its successors' predecessors + n = len(pred.union(*(G.pred[s].keys() - {node} for s in G[node]))) + if n == 0: + raise nx.NetworkXError("The cd index cannot be defined.") + + # calculate cd index + if weight is None: + return round(sum(bi for bi in b) / n, 2) + else: + # If a node has the specified weight attribute, its weight is used in the calculation + # otherwise, a weight of 1 is assumed for that node + weights = [G.nodes[i].get(weight, 1) for i in pred] + return round(sum(bi / wt for bi, wt in zip(b, weights)) / n, 2) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/tournament.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/tournament.py new file mode 100644 index 0000000000000000000000000000000000000000..25c1983e6a6d0a5dbba05d28f127d3ef62140117 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/tournament.py @@ -0,0 +1,403 @@ +"""Functions concerning tournament graphs. + +A `tournament graph`_ is a complete oriented graph. In other words, it +is a directed graph in which there is exactly one directed edge joining +each pair of distinct nodes. For each function in this module that +accepts a graph as input, you must provide a tournament graph. The +responsibility is on the caller to ensure that the graph is a tournament +graph: + + >>> G = nx.DiGraph([(0, 1), (1, 2), (2, 0)]) + >>> nx.is_tournament(G) + True + +To access the functions in this module, you must access them through the +:mod:`networkx.tournament` module:: + + >>> nx.tournament.is_reachable(G, 0, 1) + True + +.. _tournament graph: https://en.wikipedia.org/wiki/Tournament_%28graph_theory%29 + +""" + +from itertools import combinations + +import networkx as nx +from networkx.algorithms.simple_paths import is_simple_path as is_path +from networkx.utils import arbitrary_element, not_implemented_for, py_random_state + +__all__ = [ + "hamiltonian_path", + "is_reachable", + "is_strongly_connected", + "is_tournament", + "random_tournament", + "score_sequence", +] + + +def index_satisfying(iterable, condition): + """Returns the index of the first element in `iterable` that + satisfies the given condition. + + If no such element is found (that is, when the iterable is + exhausted), this returns the length of the iterable (that is, one + greater than the last index of the iterable). + + `iterable` must not be empty. If `iterable` is empty, this + function raises :exc:`ValueError`. + + """ + # Pre-condition: iterable must not be empty. + for i, x in enumerate(iterable): + if condition(x): + return i + # If we reach the end of the iterable without finding an element + # that satisfies the condition, return the length of the iterable, + # which is one greater than the index of its last element. If the + # iterable was empty, `i` will not be defined, so we raise an + # exception. + try: + return i + 1 + except NameError as err: + raise ValueError("iterable must be non-empty") from err + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +@nx._dispatchable +def is_tournament(G): + """Returns True if and only if `G` is a tournament. + + A tournament is a directed graph, with neither self-loops nor + multi-edges, in which there is exactly one directed edge joining + each pair of distinct nodes. + + Parameters + ---------- + G : NetworkX graph + A directed graph representing a tournament. + + Returns + ------- + bool + Whether the given graph is a tournament graph. + + Examples + -------- + >>> G = nx.DiGraph([(0, 1), (1, 2), (2, 0)]) + >>> nx.is_tournament(G) + True + + Notes + ----- + Some definitions require a self-loop on each node, but that is not + the convention used here. + + """ + # In a tournament, there is exactly one directed edge joining each pair. + return ( + all((v in G[u]) ^ (u in G[v]) for u, v in combinations(G, 2)) + and nx.number_of_selfloops(G) == 0 + ) + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +@nx._dispatchable +def hamiltonian_path(G): + """Returns a Hamiltonian path in the given tournament graph. + + Each tournament has a Hamiltonian path. If furthermore, the + tournament is strongly connected, then the returned Hamiltonian path + is a Hamiltonian cycle (by joining the endpoints of the path). + + Parameters + ---------- + G : NetworkX graph + A directed graph representing a tournament. + + Returns + ------- + path : list + A list of nodes which form a Hamiltonian path in `G`. + + Examples + -------- + >>> G = nx.DiGraph([(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]) + >>> nx.is_tournament(G) + True + >>> nx.tournament.hamiltonian_path(G) + [0, 1, 2, 3] + + Notes + ----- + This is a recursive implementation with an asymptotic running time + of $O(n^2)$, ignoring multiplicative polylogarithmic factors, where + $n$ is the number of nodes in the graph. + + """ + if len(G) == 0: + return [] + if len(G) == 1: + return [arbitrary_element(G)] + v = arbitrary_element(G) + hampath = hamiltonian_path(G.subgraph(set(G) - {v})) + # Get the index of the first node in the path that does *not* have + # an edge to `v`, then insert `v` before that node. + index = index_satisfying(hampath, lambda u: v not in G[u]) + hampath.insert(index, v) + return hampath + + +@py_random_state(1) +@nx._dispatchable(graphs=None, returns_graph=True) +def random_tournament(n, seed=None): + r"""Returns a random tournament graph on `n` nodes. + + Parameters + ---------- + n : int + The number of nodes in the returned graph. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + G : DiGraph + A tournament on `n` nodes, with exactly one directed edge joining + each pair of distinct nodes. + + Notes + ----- + This algorithm adds, for each pair of distinct nodes, an edge with + uniformly random orientation. In other words, `\binom{n}{2}` flips + of an unbiased coin decide the orientations of the edges in the + graph. + + """ + # Flip an unbiased coin for each pair of distinct nodes. + coins = (seed.random() for i in range((n * (n - 1)) // 2)) + pairs = combinations(range(n), 2) + edges = ((u, v) if r < 0.5 else (v, u) for (u, v), r in zip(pairs, coins)) + return nx.DiGraph(edges) + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +@nx._dispatchable +def score_sequence(G): + """Returns the score sequence for the given tournament graph. + + The score sequence is the sorted list of the out-degrees of the + nodes of the graph. + + Parameters + ---------- + G : NetworkX graph + A directed graph representing a tournament. + + Returns + ------- + list + A sorted list of the out-degrees of the nodes of `G`. + + Examples + -------- + >>> G = nx.DiGraph([(1, 0), (1, 3), (0, 2), (0, 3), (2, 1), (3, 2)]) + >>> nx.is_tournament(G) + True + >>> nx.tournament.score_sequence(G) + [1, 1, 2, 2] + + """ + return sorted(d for v, d in G.out_degree()) + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +@nx._dispatchable(preserve_edge_attrs={"G": {"weight": 1}}) +def tournament_matrix(G): + r"""Returns the tournament matrix for the given tournament graph. + + This function requires SciPy. + + The *tournament matrix* of a tournament graph with edge set *E* is + the matrix *T* defined by + + .. math:: + + T_{i j} = + \begin{cases} + +1 & \text{if } (i, j) \in E \\ + -1 & \text{if } (j, i) \in E \\ + 0 & \text{if } i == j. + \end{cases} + + An equivalent definition is `T = A - A^T`, where *A* is the + adjacency matrix of the graph `G`. + + Parameters + ---------- + G : NetworkX graph + A directed graph representing a tournament. + + Returns + ------- + SciPy sparse array + The tournament matrix of the tournament graph `G`. + + Raises + ------ + ImportError + If SciPy is not available. + + """ + A = nx.adjacency_matrix(G) + return A - A.T + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +@nx._dispatchable +def is_reachable(G, s, t): + """Decides whether there is a path from `s` to `t` in the + tournament. + + This function is more theoretically efficient than the reachability + checks than the shortest path algorithms in + :mod:`networkx.algorithms.shortest_paths`. + + The given graph **must** be a tournament, otherwise this function's + behavior is undefined. + + Parameters + ---------- + G : NetworkX graph + A directed graph representing a tournament. + + s : node + A node in the graph. + + t : node + A node in the graph. + + Returns + ------- + bool + Whether there is a path from `s` to `t` in `G`. + + Examples + -------- + >>> G = nx.DiGraph([(1, 0), (1, 3), (1, 2), (2, 3), (2, 0), (3, 0)]) + >>> nx.is_tournament(G) + True + >>> nx.tournament.is_reachable(G, 1, 3) + True + >>> nx.tournament.is_reachable(G, 3, 2) + False + + Notes + ----- + Although this function is more theoretically efficient than the + generic shortest path functions, a speedup requires the use of + parallelism. Though it may in the future, the current implementation + does not use parallelism, thus you may not see much of a speedup. + + This algorithm comes from [1]. + + References + ---------- + .. [1] Tantau, Till. + "A note on the complexity of the reachability problem for + tournaments." + *Electronic Colloquium on Computational Complexity*. 2001. + + """ + + def two_neighborhood(G, v): + """Returns the set of nodes at distance at most two from `v`. + + `G` must be a graph and `v` a node in that graph. + + The returned set includes the nodes at distance zero (that is, + the node `v` itself), the nodes at distance one (that is, the + out-neighbors of `v`), and the nodes at distance two. + + """ + return { + x for x in G if x == v or x in G[v] or any(is_path(G, [v, z, x]) for z in G) + } + + def is_closed(G, nodes): + """Decides whether the given set of nodes is closed. + + A set *S* of nodes is *closed* if for each node *u* in the graph + not in *S* and for each node *v* in *S*, there is an edge from + *u* to *v*. + + """ + return all(v in G[u] for u in set(G) - nodes for v in nodes) + + neighborhoods = [two_neighborhood(G, v) for v in G] + return all(not (is_closed(G, S) and s in S and t not in S) for S in neighborhoods) + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +@nx._dispatchable(name="tournament_is_strongly_connected") +def is_strongly_connected(G): + """Decides whether the given tournament is strongly connected. + + This function is more theoretically efficient than the + :func:`~networkx.algorithms.components.is_strongly_connected` + function. + + The given graph **must** be a tournament, otherwise this function's + behavior is undefined. + + Parameters + ---------- + G : NetworkX graph + A directed graph representing a tournament. + + Returns + ------- + bool + Whether the tournament is strongly connected. + + Examples + -------- + >>> G = nx.DiGraph([(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 0)]) + >>> nx.is_tournament(G) + True + >>> nx.tournament.is_strongly_connected(G) + True + >>> G.remove_edge(3, 0) + >>> G.add_edge(0, 3) + >>> nx.is_tournament(G) + True + >>> nx.tournament.is_strongly_connected(G) + False + + Notes + ----- + Although this function is more theoretically efficient than the + generic strong connectivity function, a speedup requires the use of + parallelism. Though it may in the future, the current implementation + does not use parallelism, thus you may not see much of a speedup. + + This algorithm comes from [1]. + + References + ---------- + .. [1] Tantau, Till. + "A note on the complexity of the reachability problem for + tournaments." + *Electronic Colloquium on Computational Complexity*. 2001. + + + """ + return all(is_reachable(G, u, v) for u in G for v in G) diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/triads.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/triads.py new file mode 100644 index 0000000000000000000000000000000000000000..640fc304e2393bb0ee2a004f6bf9dc8d197c01b2 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/triads.py @@ -0,0 +1,604 @@ +# See https://github.com/networkx/networkx/pull/1474 +# Copyright 2011 Reya Group +# Copyright 2011 Alex Levenson +# Copyright 2011 Diederik van Liere +"""Functions for analyzing triads of a graph.""" + +from collections import defaultdict +from itertools import combinations, permutations + +import networkx as nx +from networkx.utils import not_implemented_for, py_random_state + +__all__ = [ + "triadic_census", + "is_triad", + "all_triplets", + "all_triads", + "triads_by_type", + "triad_type", + "random_triad", +] + +#: The integer codes representing each type of triad. +#: +#: Triads that are the same up to symmetry have the same code. +TRICODES = ( + 1, + 2, + 2, + 3, + 2, + 4, + 6, + 8, + 2, + 6, + 5, + 7, + 3, + 8, + 7, + 11, + 2, + 6, + 4, + 8, + 5, + 9, + 9, + 13, + 6, + 10, + 9, + 14, + 7, + 14, + 12, + 15, + 2, + 5, + 6, + 7, + 6, + 9, + 10, + 14, + 4, + 9, + 9, + 12, + 8, + 13, + 14, + 15, + 3, + 7, + 8, + 11, + 7, + 12, + 14, + 15, + 8, + 14, + 13, + 15, + 11, + 15, + 15, + 16, +) + +#: The names of each type of triad. The order of the elements is +#: important: it corresponds to the tricodes given in :data:`TRICODES`. +TRIAD_NAMES = ( + "003", + "012", + "102", + "021D", + "021U", + "021C", + "111D", + "111U", + "030T", + "030C", + "201", + "120D", + "120U", + "120C", + "210", + "300", +) + + +#: A dictionary mapping triad code to triad name. +TRICODE_TO_NAME = {i: TRIAD_NAMES[code - 1] for i, code in enumerate(TRICODES)} + + +def _tricode(G, v, u, w): + """Returns the integer code of the given triad. + + This is some fancy magic that comes from Batagelj and Mrvar's paper. It + treats each edge joining a pair of `v`, `u`, and `w` as a bit in + the binary representation of an integer. + + """ + combos = ((v, u, 1), (u, v, 2), (v, w, 4), (w, v, 8), (u, w, 16), (w, u, 32)) + return sum(x for u, v, x in combos if v in G[u]) + + +@not_implemented_for("undirected") +@nx._dispatchable +def triadic_census(G, nodelist=None): + """Determines the triadic census of a directed graph. + + The triadic census is a count of how many of the 16 possible types of + triads are present in a directed graph. If a list of nodes is passed, then + only those triads are taken into account which have elements of nodelist in them. + + Parameters + ---------- + G : digraph + A NetworkX DiGraph + nodelist : list + List of nodes for which you want to calculate triadic census + + Returns + ------- + census : dict + Dictionary with triad type as keys and number of occurrences as values. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (2, 3), (3, 1), (3, 4), (4, 1), (4, 2)]) + >>> triadic_census = nx.triadic_census(G) + >>> for key, value in triadic_census.items(): + ... print(f"{key}: {value}") + 003: 0 + 012: 0 + 102: 0 + 021D: 0 + 021U: 0 + 021C: 0 + 111D: 0 + 111U: 0 + 030T: 2 + 030C: 2 + 201: 0 + 120D: 0 + 120U: 0 + 120C: 0 + 210: 0 + 300: 0 + + Notes + ----- + This algorithm has complexity $O(m)$ where $m$ is the number of edges in + the graph. + + For undirected graphs, the triadic census can be computed by first converting + the graph into a directed graph using the ``G.to_directed()`` method. + After this conversion, only the triad types 003, 102, 201 and 300 will be + present in the undirected scenario. + + Raises + ------ + ValueError + If `nodelist` contains duplicate nodes or nodes not in `G`. + If you want to ignore this you can preprocess with `set(nodelist) & G.nodes` + + See also + -------- + triad_graph + + References + ---------- + .. [1] Vladimir Batagelj and Andrej Mrvar, A subquadratic triad census + algorithm for large sparse networks with small maximum degree, + University of Ljubljana, + http://vlado.fmf.uni-lj.si/pub/networks/doc/triads/triads.pdf + + """ + nodeset = set(G.nbunch_iter(nodelist)) + if nodelist is not None and len(nodelist) != len(nodeset): + raise ValueError("nodelist includes duplicate nodes or nodes not in G") + + N = len(G) + Nnot = N - len(nodeset) # can signal special counting for subset of nodes + + # create an ordering of nodes with nodeset nodes first + m = {n: i for i, n in enumerate(nodeset)} + if Nnot: + # add non-nodeset nodes later in the ordering + not_nodeset = G.nodes - nodeset + m.update((n, i + N) for i, n in enumerate(not_nodeset)) + + # build all_neighbor dicts for easy counting + # After Python 3.8 can leave off these keys(). Speedup also using G._pred + # nbrs = {n: G._pred[n].keys() | G._succ[n].keys() for n in G} + nbrs = {n: G.pred[n].keys() | G.succ[n].keys() for n in G} + dbl_nbrs = {n: G.pred[n].keys() & G.succ[n].keys() for n in G} + + if Nnot: + sgl_nbrs = {n: G.pred[n].keys() ^ G.succ[n].keys() for n in not_nodeset} + # find number of edges not incident to nodes in nodeset + sgl = sum(1 for n in not_nodeset for nbr in sgl_nbrs[n] if nbr not in nodeset) + sgl_edges_outside = sgl // 2 + dbl = sum(1 for n in not_nodeset for nbr in dbl_nbrs[n] if nbr not in nodeset) + dbl_edges_outside = dbl // 2 + + # Initialize the count for each triad to be zero. + census = {name: 0 for name in TRIAD_NAMES} + # Main loop over nodes + for v in nodeset: + vnbrs = nbrs[v] + dbl_vnbrs = dbl_nbrs[v] + if Nnot: + # set up counts of edges attached to v. + sgl_unbrs_bdy = sgl_unbrs_out = dbl_unbrs_bdy = dbl_unbrs_out = 0 + for u in vnbrs: + if m[u] <= m[v]: + continue + unbrs = nbrs[u] + neighbors = (vnbrs | unbrs) - {u, v} + # Count connected triads. + for w in neighbors: + if m[u] < m[w] or (m[v] < m[w] < m[u] and v not in nbrs[w]): + code = _tricode(G, v, u, w) + census[TRICODE_TO_NAME[code]] += 1 + + # Use a formula for dyadic triads with edge incident to v + if u in dbl_vnbrs: + census["102"] += N - len(neighbors) - 2 + else: + census["012"] += N - len(neighbors) - 2 + + # Count edges attached to v. Subtract later to get triads with v isolated + # _out are (u,unbr) for unbrs outside boundary of nodeset + # _bdy are (u,unbr) for unbrs on boundary of nodeset (get double counted) + if Nnot and u not in nodeset: + sgl_unbrs = sgl_nbrs[u] + sgl_unbrs_bdy += len(sgl_unbrs & vnbrs - nodeset) + sgl_unbrs_out += len(sgl_unbrs - vnbrs - nodeset) + dbl_unbrs = dbl_nbrs[u] + dbl_unbrs_bdy += len(dbl_unbrs & vnbrs - nodeset) + dbl_unbrs_out += len(dbl_unbrs - vnbrs - nodeset) + # if nodeset == G.nodes, skip this b/c we will find the edge later. + if Nnot: + # Count edges outside nodeset not connected with v (v isolated triads) + census["012"] += sgl_edges_outside - (sgl_unbrs_out + sgl_unbrs_bdy // 2) + census["102"] += dbl_edges_outside - (dbl_unbrs_out + dbl_unbrs_bdy // 2) + + # calculate null triads: "003" + # null triads = total number of possible triads - all found triads + total_triangles = (N * (N - 1) * (N - 2)) // 6 + triangles_without_nodeset = (Nnot * (Nnot - 1) * (Nnot - 2)) // 6 + total_census = total_triangles - triangles_without_nodeset + census["003"] = total_census - sum(census.values()) + + return census + + +@nx._dispatchable +def is_triad(G): + """Returns True if the graph G is a triad, else False. + + Parameters + ---------- + G : graph + A NetworkX Graph + + Returns + ------- + istriad : boolean + Whether G is a valid triad + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (2, 3), (3, 1)]) + >>> nx.is_triad(G) + True + >>> G.add_edge(0, 1) + >>> nx.is_triad(G) + False + """ + if isinstance(G, nx.Graph): + if G.order() == 3 and nx.is_directed(G): + if not any((n, n) in G.edges() for n in G.nodes()): + return True + return False + + +@not_implemented_for("undirected") +@nx._dispatchable +def all_triplets(G): + """Returns a generator of all possible sets of 3 nodes in a DiGraph. + + .. deprecated:: 3.3 + + all_triplets is deprecated and will be removed in NetworkX version 3.5. + Use `itertools.combinations` instead:: + + all_triplets = itertools.combinations(G, 3) + + Parameters + ---------- + G : digraph + A NetworkX DiGraph + + Returns + ------- + triplets : generator of 3-tuples + Generator of tuples of 3 nodes + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (2, 3), (3, 4)]) + >>> list(nx.all_triplets(G)) + [(1, 2, 3), (1, 2, 4), (1, 3, 4), (2, 3, 4)] + + """ + import warnings + + warnings.warn( + ( + "\n\nall_triplets is deprecated and will be removed in v3.5.\n" + "Use `itertools.combinations(G, 3)` instead." + ), + category=DeprecationWarning, + stacklevel=4, + ) + triplets = combinations(G.nodes(), 3) + return triplets + + +@not_implemented_for("undirected") +@nx._dispatchable(returns_graph=True) +def all_triads(G): + """A generator of all possible triads in G. + + Parameters + ---------- + G : digraph + A NetworkX DiGraph + + Returns + ------- + all_triads : generator of DiGraphs + Generator of triads (order-3 DiGraphs) + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (2, 3), (3, 1), (3, 4), (4, 1), (4, 2)]) + >>> for triad in nx.all_triads(G): + ... print(triad.edges) + [(1, 2), (2, 3), (3, 1)] + [(1, 2), (4, 1), (4, 2)] + [(3, 1), (3, 4), (4, 1)] + [(2, 3), (3, 4), (4, 2)] + + """ + triplets = combinations(G.nodes(), 3) + for triplet in triplets: + yield G.subgraph(triplet).copy() + + +@not_implemented_for("undirected") +@nx._dispatchable +def triads_by_type(G): + """Returns a list of all triads for each triad type in a directed graph. + There are exactly 16 different types of triads possible. Suppose 1, 2, 3 are three + nodes, they will be classified as a particular triad type if their connections + are as follows: + + - 003: 1, 2, 3 + - 012: 1 -> 2, 3 + - 102: 1 <-> 2, 3 + - 021D: 1 <- 2 -> 3 + - 021U: 1 -> 2 <- 3 + - 021C: 1 -> 2 -> 3 + - 111D: 1 <-> 2 <- 3 + - 111U: 1 <-> 2 -> 3 + - 030T: 1 -> 2 -> 3, 1 -> 3 + - 030C: 1 <- 2 <- 3, 1 -> 3 + - 201: 1 <-> 2 <-> 3 + - 120D: 1 <- 2 -> 3, 1 <-> 3 + - 120U: 1 -> 2 <- 3, 1 <-> 3 + - 120C: 1 -> 2 -> 3, 1 <-> 3 + - 210: 1 -> 2 <-> 3, 1 <-> 3 + - 300: 1 <-> 2 <-> 3, 1 <-> 3 + + Refer to the :doc:`example gallery ` + for visual examples of the triad types. + + Parameters + ---------- + G : digraph + A NetworkX DiGraph + + Returns + ------- + tri_by_type : dict + Dictionary with triad types as keys and lists of triads as values. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (1, 3), (2, 3), (3, 1), (5, 6), (5, 4), (6, 7)]) + >>> dict = nx.triads_by_type(G) + >>> dict["120C"][0].edges() + OutEdgeView([(1, 2), (1, 3), (2, 3), (3, 1)]) + >>> dict["012"][0].edges() + OutEdgeView([(1, 2)]) + + References + ---------- + .. [1] Snijders, T. (2012). "Transitivity and triads." University of + Oxford. + https://web.archive.org/web/20170830032057/http://www.stats.ox.ac.uk/~snijders/Trans_Triads_ha.pdf + """ + # num_triads = o * (o - 1) * (o - 2) // 6 + # if num_triads > TRIAD_LIMIT: print(WARNING) + all_tri = all_triads(G) + tri_by_type = defaultdict(list) + for triad in all_tri: + name = triad_type(triad) + tri_by_type[name].append(triad) + return tri_by_type + + +@not_implemented_for("undirected") +@nx._dispatchable +def triad_type(G): + """Returns the sociological triad type for a triad. + + Parameters + ---------- + G : digraph + A NetworkX DiGraph with 3 nodes + + Returns + ------- + triad_type : str + A string identifying the triad type + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (2, 3), (3, 1)]) + >>> nx.triad_type(G) + '030C' + >>> G.add_edge(1, 3) + >>> nx.triad_type(G) + '120C' + + Notes + ----- + There can be 6 unique edges in a triad (order-3 DiGraph) (so 2^^6=64 unique + triads given 3 nodes). These 64 triads each display exactly 1 of 16 + topologies of triads (topologies can be permuted). These topologies are + identified by the following notation: + + {m}{a}{n}{type} (for example: 111D, 210, 102) + + Here: + + {m} = number of mutual ties (takes 0, 1, 2, 3); a mutual tie is (0,1) + AND (1,0) + {a} = number of asymmetric ties (takes 0, 1, 2, 3); an asymmetric tie + is (0,1) BUT NOT (1,0) or vice versa + {n} = number of null ties (takes 0, 1, 2, 3); a null tie is NEITHER + (0,1) NOR (1,0) + {type} = a letter (takes U, D, C, T) corresponding to up, down, cyclical + and transitive. This is only used for topologies that can have + more than one form (eg: 021D and 021U). + + References + ---------- + .. [1] Snijders, T. (2012). "Transitivity and triads." University of + Oxford. + https://web.archive.org/web/20170830032057/http://www.stats.ox.ac.uk/~snijders/Trans_Triads_ha.pdf + """ + if not is_triad(G): + raise nx.NetworkXAlgorithmError("G is not a triad (order-3 DiGraph)") + num_edges = len(G.edges()) + if num_edges == 0: + return "003" + elif num_edges == 1: + return "012" + elif num_edges == 2: + e1, e2 = G.edges() + if set(e1) == set(e2): + return "102" + elif e1[0] == e2[0]: + return "021D" + elif e1[1] == e2[1]: + return "021U" + elif e1[1] == e2[0] or e2[1] == e1[0]: + return "021C" + elif num_edges == 3: + for e1, e2, e3 in permutations(G.edges(), 3): + if set(e1) == set(e2): + if e3[0] in e1: + return "111U" + # e3[1] in e1: + return "111D" + elif set(e1).symmetric_difference(set(e2)) == set(e3): + if {e1[0], e2[0], e3[0]} == {e1[0], e2[0], e3[0]} == set(G.nodes()): + return "030C" + # e3 == (e1[0], e2[1]) and e2 == (e1[1], e3[1]): + return "030T" + elif num_edges == 4: + for e1, e2, e3, e4 in permutations(G.edges(), 4): + if set(e1) == set(e2): + # identify pair of symmetric edges (which necessarily exists) + if set(e3) == set(e4): + return "201" + if {e3[0]} == {e4[0]} == set(e3).intersection(set(e4)): + return "120D" + if {e3[1]} == {e4[1]} == set(e3).intersection(set(e4)): + return "120U" + if e3[1] == e4[0]: + return "120C" + elif num_edges == 5: + return "210" + elif num_edges == 6: + return "300" + + +@not_implemented_for("undirected") +@py_random_state(1) +@nx._dispatchable(preserve_all_attrs=True, returns_graph=True) +def random_triad(G, seed=None): + """Returns a random triad from a directed graph. + + .. deprecated:: 3.3 + + random_triad is deprecated and will be removed in version 3.5. + Use random sampling directly instead:: + + G.subgraph(random.sample(list(G), 3)) + + Parameters + ---------- + G : digraph + A NetworkX DiGraph + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + G2 : subgraph + A randomly selected triad (order-3 NetworkX DiGraph) + + Raises + ------ + NetworkXError + If the input Graph has less than 3 nodes. + + Examples + -------- + >>> G = nx.DiGraph([(1, 2), (1, 3), (2, 3), (3, 1), (5, 6), (5, 4), (6, 7)]) + >>> triad = nx.random_triad(G, seed=1) + >>> triad.edges + OutEdgeView([(1, 2)]) + + """ + import warnings + + warnings.warn( + ( + "\n\nrandom_triad is deprecated and will be removed in NetworkX v3.5.\n" + "Use random.sample instead, e.g.::\n\n" + "\tG.subgraph(random.sample(list(G), 3))\n" + ), + category=DeprecationWarning, + stacklevel=5, + ) + if len(G) < 3: + raise nx.NetworkXError( + f"G needs at least 3 nodes to form a triad; (it has {len(G)} nodes)" + ) + nodes = seed.sample(list(G.nodes()), 3) + G2 = G.subgraph(nodes) + return G2 diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/vitality.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/vitality.py new file mode 100644 index 0000000000000000000000000000000000000000..bf4b016e78dc7429810bb48f948f40212e542eca --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/vitality.py @@ -0,0 +1,76 @@ +""" +Vitality measures. +""" + +from functools import partial + +import networkx as nx + +__all__ = ["closeness_vitality"] + + +@nx._dispatchable(edge_attrs="weight") +def closeness_vitality(G, node=None, weight=None, wiener_index=None): + """Returns the closeness vitality for nodes in the graph. + + The *closeness vitality* of a node, defined in Section 3.6.2 of [1], + is the change in the sum of distances between all node pairs when + excluding that node. + + Parameters + ---------- + G : NetworkX graph + A strongly-connected graph. + + weight : string + The name of the edge attribute used as weight. This is passed + directly to the :func:`~networkx.wiener_index` function. + + node : object + If specified, only the closeness vitality for this node will be + returned. Otherwise, a dictionary mapping each node to its + closeness vitality will be returned. + + Other parameters + ---------------- + wiener_index : number + If you have already computed the Wiener index of the graph + `G`, you can provide that value here. Otherwise, it will be + computed for you. + + Returns + ------- + dictionary or float + If `node` is None, this function returns a dictionary + with nodes as keys and closeness vitality as the + value. Otherwise, it returns only the closeness vitality for the + specified `node`. + + The closeness vitality of a node may be negative infinity if + removing that node would disconnect the graph. + + Examples + -------- + >>> G = nx.cycle_graph(3) + >>> nx.closeness_vitality(G) + {0: 2.0, 1: 2.0, 2: 2.0} + + See Also + -------- + closeness_centrality + + References + ---------- + .. [1] Ulrik Brandes, Thomas Erlebach (eds.). + *Network Analysis: Methodological Foundations*. + Springer, 2005. + + + """ + if wiener_index is None: + wiener_index = nx.wiener_index(G, weight=weight) + if node is not None: + after = nx.wiener_index(G.subgraph(set(G) - {node}), weight=weight) + return wiener_index - after + vitality = partial(closeness_vitality, G, weight=weight, wiener_index=wiener_index) + return {v: vitality(node=v) for v in G} diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/voronoi.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/voronoi.py new file mode 100644 index 0000000000000000000000000000000000000000..609a68deff89620e0e022020c33863107decced4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/voronoi.py @@ -0,0 +1,86 @@ +"""Functions for computing the Voronoi cells of a graph.""" + +import networkx as nx +from networkx.utils import groups + +__all__ = ["voronoi_cells"] + + +@nx._dispatchable(edge_attrs="weight") +def voronoi_cells(G, center_nodes, weight="weight"): + """Returns the Voronoi cells centered at `center_nodes` with respect + to the shortest-path distance metric. + + If $C$ is a set of nodes in the graph and $c$ is an element of $C$, + the *Voronoi cell* centered at a node $c$ is the set of all nodes + $v$ that are closer to $c$ than to any other center node in $C$ with + respect to the shortest-path distance metric. [1]_ + + For directed graphs, this will compute the "outward" Voronoi cells, + as defined in [1]_, in which distance is measured from the center + nodes to the target node. For the "inward" Voronoi cells, use the + :meth:`DiGraph.reverse` method to reverse the orientation of the + edges before invoking this function on the directed graph. + + Parameters + ---------- + G : NetworkX graph + + center_nodes : set + A nonempty set of nodes in the graph `G` that represent the + center of the Voronoi cells. + + weight : string or function + The edge attribute (or an arbitrary function) representing the + weight of an edge. This keyword argument is as described in the + documentation for :func:`~networkx.multi_source_dijkstra_path`, + for example. + + Returns + ------- + dictionary + A mapping from center node to set of all nodes in the graph + closer to that center node than to any other center node. The + keys of the dictionary are the element of `center_nodes`, and + the values of the dictionary form a partition of the nodes of + `G`. + + Examples + -------- + To get only the partition of the graph induced by the Voronoi cells, + take the collection of all values in the returned dictionary:: + + >>> G = nx.path_graph(6) + >>> center_nodes = {0, 3} + >>> cells = nx.voronoi_cells(G, center_nodes) + >>> partition = set(map(frozenset, cells.values())) + >>> sorted(map(sorted, partition)) + [[0, 1], [2, 3, 4, 5]] + + Raises + ------ + ValueError + If `center_nodes` is empty. + + References + ---------- + .. [1] Erwig, Martin. (2000),"The graph Voronoi diagram with applications." + *Networks*, 36: 156--163. + https://doi.org/10.1002/1097-0037(200010)36:3<156::AID-NET2>3.0.CO;2-L + + """ + # Determine the shortest paths from any one of the center nodes to + # every node in the graph. + # + # This raises `ValueError` if `center_nodes` is an empty set. + paths = nx.multi_source_dijkstra_path(G, center_nodes, weight=weight) + # Determine the center node from which the shortest path originates. + nearest = {v: p[0] for v, p in paths.items()} + # Get the mapping from center node to all nodes closer to it than to + # any other center node. + cells = groups(nearest) + # We collect all unreachable nodes under a special key, if there are any. + unreachable = set(G) - set(nearest) + if unreachable: + cells["unreachable"] = unreachable + return cells diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/walks.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/walks.py new file mode 100644 index 0000000000000000000000000000000000000000..0ef9dac121805ef2c4e4538a97f275a05dff92cb --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/walks.py @@ -0,0 +1,79 @@ +"""Function for computing walks in a graph.""" + +import networkx as nx + +__all__ = ["number_of_walks"] + + +@nx._dispatchable +def number_of_walks(G, walk_length): + """Returns the number of walks connecting each pair of nodes in `G` + + A *walk* is a sequence of nodes in which each adjacent pair of nodes + in the sequence is adjacent in the graph. A walk can repeat the same + edge and go in the opposite direction just as people can walk on a + set of paths, but standing still is not counted as part of the walk. + + This function only counts the walks with `walk_length` edges. Note that + the number of nodes in the walk sequence is one more than `walk_length`. + The number of walks can grow very quickly on a larger graph + and with a larger walk length. + + Parameters + ---------- + G : NetworkX graph + + walk_length : int + A nonnegative integer representing the length of a walk. + + Returns + ------- + dict + A dictionary of dictionaries in which outer keys are source + nodes, inner keys are target nodes, and inner values are the + number of walks of length `walk_length` connecting those nodes. + + Raises + ------ + ValueError + If `walk_length` is negative + + Examples + -------- + + >>> G = nx.Graph([(0, 1), (1, 2)]) + >>> walks = nx.number_of_walks(G, 2) + >>> walks + {0: {0: 1, 1: 0, 2: 1}, 1: {0: 0, 1: 2, 2: 0}, 2: {0: 1, 1: 0, 2: 1}} + >>> total_walks = sum(sum(tgts.values()) for _, tgts in walks.items()) + + You can also get the number of walks from a specific source node using the + returned dictionary. For example, number of walks of length 1 from node 0 + can be found as follows: + + >>> walks = nx.number_of_walks(G, 1) + >>> walks[0] + {0: 0, 1: 1, 2: 0} + >>> sum(walks[0].values()) # walks from 0 of length 1 + 1 + + Similarly, a target node can also be specified: + + >>> walks[0][1] + 1 + + """ + import numpy as np + + if walk_length < 0: + raise ValueError(f"`walk_length` cannot be negative: {walk_length}") + + A = nx.adjacency_matrix(G, weight=None) + # TODO: Use matrix_power from scipy.sparse when available + # power = sp.sparse.linalg.matrix_power(A, walk_length) + power = np.linalg.matrix_power(A.toarray(), walk_length) + result = { + u: {v: power.item(u_idx, v_idx) for v_idx, v in enumerate(G)} + for u_idx, u in enumerate(G) + } + return result diff --git a/.venv/lib/python3.11/site-packages/networkx/algorithms/wiener.py b/.venv/lib/python3.11/site-packages/networkx/algorithms/wiener.py new file mode 100644 index 0000000000000000000000000000000000000000..ac3abe4ac12b267c1d5403b2653e6ed0b7e489c3 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/networkx/algorithms/wiener.py @@ -0,0 +1,226 @@ +"""Functions related to the Wiener Index of a graph. + +The Wiener Index is a topological measure of a graph +related to the distance between nodes and their degree. +The Schultz Index and Gutman Index are similar measures. +They are used categorize molecules via the network of +atoms connected by chemical bonds. The indices are +correlated with functional aspects of the molecules. + +References +---------- +.. [1] `Wikipedia: Wiener Index `_ +.. [2] M.V. Diudeaa and I. Gutman, Wiener-Type Topological Indices, + Croatica Chemica Acta, 71 (1998), 21-51. + https://hrcak.srce.hr/132323 +""" + +import itertools as it + +import networkx as nx + +__all__ = ["wiener_index", "schultz_index", "gutman_index"] + + +@nx._dispatchable(edge_attrs="weight") +def wiener_index(G, weight=None): + """Returns the Wiener index of the given graph. + + The *Wiener index* of a graph is the sum of the shortest-path + (weighted) distances between each pair of reachable nodes. + For pairs of nodes in undirected graphs, only one orientation + of the pair is counted. + + Parameters + ---------- + G : NetworkX graph + + weight : string or None, optional (default: None) + If None, every edge has weight 1. + If a string, use this edge attribute as the edge weight. + Any edge attribute not present defaults to 1. + The edge weights are used to computing shortest-path distances. + + Returns + ------- + number + The Wiener index of the graph `G`. + + Raises + ------ + NetworkXError + If the graph `G` is not connected. + + Notes + ----- + If a pair of nodes is not reachable, the distance is assumed to be + infinity. This means that for graphs that are not + strongly-connected, this function returns ``inf``. + + The Wiener index is not usually defined for directed graphs, however + this function uses the natural generalization of the Wiener index to + directed graphs. + + Examples + -------- + The Wiener index of the (unweighted) complete graph on *n* nodes + equals the number of pairs of the *n* nodes, since each pair of + nodes is at distance one:: + + >>> n = 10 + >>> G = nx.complete_graph(n) + >>> nx.wiener_index(G) == n * (n - 1) / 2 + True + + Graphs that are not strongly-connected have infinite Wiener index:: + + >>> G = nx.empty_graph(2) + >>> nx.wiener_index(G) + inf + + References + ---------- + .. [1] `Wikipedia: Wiener Index `_ + """ + connected = nx.is_strongly_connected(G) if G.is_directed() else nx.is_connected(G) + if not connected: + return float("inf") + + spl = nx.shortest_path_length(G, weight=weight) + total = sum(it.chain.from_iterable(nbrs.values() for node, nbrs in spl)) + # Need to account for double counting pairs of nodes in undirected graphs. + return total if G.is_directed() else total / 2 + + +@nx.utils.not_implemented_for("directed") +@nx.utils.not_implemented_for("multigraph") +@nx._dispatchable(edge_attrs="weight") +def schultz_index(G, weight=None): + r"""Returns the Schultz Index (of the first kind) of `G` + + The *Schultz Index* [3]_ of a graph is the sum over all node pairs of + distances times the sum of degrees. Consider an undirected graph `G`. + For each node pair ``(u, v)`` compute ``dist(u, v) * (deg(u) + deg(v)`` + where ``dist`` is the shortest path length between two nodes and ``deg`` + is the degree of a node. + + The Schultz Index is the sum of these quantities over all (unordered) + pairs of nodes. + + Parameters + ---------- + G : NetworkX graph + The undirected graph of interest. + weight : string or None, optional (default: None) + If None, every edge has weight 1. + If a string, use this edge attribute as the edge weight. + Any edge attribute not present defaults to 1. + The edge weights are used to computing shortest-path distances. + + Returns + ------- + number + The first kind of Schultz Index of the graph `G`. + + Examples + -------- + The Schultz Index of the (unweighted) complete graph on *n* nodes + equals the number of pairs of the *n* nodes times ``2 * (n - 1)``, + since each pair of nodes is at distance one and the sum of degree + of two nodes is ``2 * (n - 1)``. + + >>> n = 10 + >>> G = nx.complete_graph(n) + >>> nx.schultz_index(G) == (n * (n - 1) / 2) * (2 * (n - 1)) + True + + Graph that is disconnected + + >>> nx.schultz_index(nx.empty_graph(2)) + inf + + References + ---------- + .. [1] I. Gutman, Selected properties of the Schultz molecular topological index, + J. Chem. Inf. Comput. Sci. 34 (1994), 1087–1089. + https://doi.org/10.1021/ci00021a009 + .. [2] M.V. Diudeaa and I. Gutman, Wiener-Type Topological Indices, + Croatica Chemica Acta, 71 (1998), 21-51. + https://hrcak.srce.hr/132323 + .. [3] H. P. Schultz, Topological organic chemistry. 1. + Graph theory and topological indices of alkanes,i + J. Chem. Inf. Comput. Sci. 29 (1989), 239–257. + + """ + if not nx.is_connected(G): + return float("inf") + + spl = nx.shortest_path_length(G, weight=weight) + d = dict(G.degree, weight=weight) + return sum(dist * (d[u] + d[v]) for u, info in spl for v, dist in info.items()) / 2 + + +@nx.utils.not_implemented_for("directed") +@nx.utils.not_implemented_for("multigraph") +@nx._dispatchable(edge_attrs="weight") +def gutman_index(G, weight=None): + r"""Returns the Gutman Index for the graph `G`. + + The *Gutman Index* measures the topology of networks, especially for molecule + networks of atoms connected by bonds [1]_. It is also called the Schultz Index + of the second kind [2]_. + + Consider an undirected graph `G` with node set ``V``. + The Gutman Index of a graph is the sum over all (unordered) pairs of nodes + of nodes ``(u, v)``, with distance ``dist(u, v)`` and degrees ``deg(u)`` + and ``deg(v)``, of ``dist(u, v) * deg(u) * deg(v)`` + + Parameters + ---------- + G : NetworkX graph + + weight : string or None, optional (default: None) + If None, every edge has weight 1. + If a string, use this edge attribute as the edge weight. + Any edge attribute not present defaults to 1. + The edge weights are used to computing shortest-path distances. + + Returns + ------- + number + The Gutman Index of the graph `G`. + + Examples + -------- + The Gutman Index of the (unweighted) complete graph on *n* nodes + equals the number of pairs of the *n* nodes times ``(n - 1) * (n - 1)``, + since each pair of nodes is at distance one and the product of degree of two + vertices is ``(n - 1) * (n - 1)``. + + >>> n = 10 + >>> G = nx.complete_graph(n) + >>> nx.gutman_index(G) == (n * (n - 1) / 2) * ((n - 1) * (n - 1)) + True + + Graphs that are disconnected + + >>> G = nx.empty_graph(2) + >>> nx.gutman_index(G) + inf + + References + ---------- + .. [1] M.V. Diudeaa and I. Gutman, Wiener-Type Topological Indices, + Croatica Chemica Acta, 71 (1998), 21-51. + https://hrcak.srce.hr/132323 + .. [2] I. Gutman, Selected properties of the Schultz molecular topological index, + J. Chem. Inf. Comput. Sci. 34 (1994), 1087–1089. + https://doi.org/10.1021/ci00021a009 + + """ + if not nx.is_connected(G): + return float("inf") + + spl = nx.shortest_path_length(G, weight=weight) + d = dict(G.degree, weight=weight) + return sum(dist * d[u] * d[v] for u, vinfo in spl for v, dist in vinfo.items()) / 2 diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__init__.py b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e9e64cc2e7dc628e94aa9a9e9e9ba6d8f52159a0 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/dispatch_interface.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/dispatch_interface.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..106c5c1f44ff45317f97979ad77f105e06a76eee Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/dispatch_interface.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/historical_tests.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/historical_tests.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b5412492e0482d4621dea93e167045a0ae00b88 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/historical_tests.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_coreviews.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_coreviews.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f65aa7282e422faf59b69b573805d451727285a Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_coreviews.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_digraph.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_digraph.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..272e913dd2dac4dc07dbd1ef1654c9f5c7f8f690 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_digraph.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_digraph_historical.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_digraph_historical.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..15df5fce8c047a9bbe8ed955ebb1d87a83581056 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_digraph_historical.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_filters.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_filters.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..69ad26c2dbedfab355356a227991bc3562709d53 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_filters.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_function.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_function.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25028fd0a2108fd35b275d5141ba080ff4ad545e Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_function.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_graph.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_graph.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5dfdd265d33f0f0bd63b61d2808b0062d3af080b Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_graph.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_graph_historical.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_graph_historical.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10aeaf9174ba75500d2aff00bb78f3e00884e768 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_graph_historical.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_graphviews.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_graphviews.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d295bb74a84ba6c11b5900893da07a6b2c4415f1 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_graphviews.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_multidigraph.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_multidigraph.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3947da6e6b634d14626cfa22b7bdf3ca5793f8cc Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_multidigraph.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_multigraph.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_multigraph.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d6293351f189ff654a14bd2ae72fcdceb0f5522 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_multigraph.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_reportviews.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_reportviews.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1dd8c9a49ca9ffef994ce007b75ba71a6676b625 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_reportviews.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_special.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_special.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..81dd28c77c919b6d4af7383c53afb86db2170e71 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_special.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_subgraphviews.cpython-311.pyc b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_subgraphviews.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7269da9b24506c1e65fe12dc953e22c35ae94a65 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/networkx/classes/tests/__pycache__/test_subgraphviews.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/torchaudio/lib/_torchaudio_sox.so b/.venv/lib/python3.11/site-packages/torchaudio/lib/_torchaudio_sox.so new file mode 100644 index 0000000000000000000000000000000000000000..99834fa7dfc6ab157a1f0268d6226c3bf8d7b88c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchaudio/lib/_torchaudio_sox.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29146cb3e986188309eef6aaf85b6beb4df24bd882507c72efdc2f31bea439d3 +size 265672 diff --git a/.venv/lib/python3.11/site-packages/torchaudio/transforms/__pycache__/_transforms.cpython-311.pyc b/.venv/lib/python3.11/site-packages/torchaudio/transforms/__pycache__/_transforms.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9de3dc2b9eb688518c875abe22530ee7673e899 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/torchaudio/transforms/__pycache__/_transforms.cpython-311.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471cf9b38d0d919acd5f9febcd981be4cac42d43c4f041f27c53759b6701ce99 +size 108918